aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Joey Hess <joey@kitenet.net>2011-02-11 15:37:37 -0400
committerGravatar Joey Hess <joey@kitenet.net>2011-02-11 15:37:37 -0400
commit5a50a7cf137997a9d940b9a89a0968452a1ac411 (patch)
tree64ac76dc4a4327a2b3eafbee28c45670af71340f
parent285fb2bb08c7da534c111ebfeee5911e850570cc (diff)
update unicode FilePath handling
Based on http://hackage.haskell.org/trac/ghc/ticket/3307 , whether FilePath contains decoded unicode varies by OS. So, add a configure check for it. Also, renamed showFile to filePathToString
-rw-r--r--Backend/File.hs6
-rw-r--r--Backend/SHA1.hs2
-rw-r--r--Backend/WORM.hs2
-rw-r--r--Command/Find.hs2
-rw-r--r--Command/PreCommit.hs2
-rw-r--r--Command/Unused.hs2
-rw-r--r--Content.hs2
-rw-r--r--Messages.hs14
-rw-r--r--configure.hs15
-rw-r--r--doc/bugs/problems_with_utf8_names.mdwn2
-rw-r--r--testdata/unicode-test-ö1
11 files changed, 34 insertions, 16 deletions
diff --git a/Backend/File.hs b/Backend/File.hs
index fca385a1e..d5691595a 100644
--- a/Backend/File.hs
+++ b/Backend/File.hs
@@ -193,14 +193,14 @@ checkKeyNumCopies key file numcopies = do
missingNote :: String -> Int -> Int -> String -> String
missingNote file 0 _ [] =
- "** No known copies of " ++ showFile file ++ " exist!"
+ "** No known copies of " ++ filePathToString file ++ " exist!"
missingNote file 0 _ untrusted =
- "Only these untrusted locations may have copies of " ++ showFile file ++
+ "Only these untrusted locations may have copies of " ++ filePathToString file ++
"\n" ++ untrusted ++
"Back it up to trusted locations with git-annex copy."
missingNote file present needed [] =
"Only " ++ show present ++ " of " ++ show needed ++
- " trustworthy copies of " ++ showFile file ++ " exist." ++
+ " trustworthy copies of " ++ filePathToString file ++ " exist." ++
"\nBack it up with git-annex copy."
missingNote file present needed untrusted =
missingNote file present needed [] ++
diff --git a/Backend/SHA1.hs b/Backend/SHA1.hs
index f1092492e..9636787f0 100644
--- a/Backend/SHA1.hs
+++ b/Backend/SHA1.hs
@@ -58,5 +58,5 @@ checkKeySHA1 key = do
then return True
else do
dest <- moveBad key
- warning $ "Bad file content; moved to " ++ showFile dest
+ warning $ "Bad file content; moved to " ++ filePathToString dest
return False
diff --git a/Backend/WORM.hs b/Backend/WORM.hs
index 7f40a2acb..92fe5a2d4 100644
--- a/Backend/WORM.hs
+++ b/Backend/WORM.hs
@@ -67,5 +67,5 @@ checkKeySize key = do
then return True
else do
dest <- moveBad key
- warning $ "Bad file size; moved to " ++ showFile dest
+ warning $ "Bad file size; moved to " ++ filePathToString dest
return False
diff --git a/Command/Find.hs b/Command/Find.hs
index 45156af05..3e9125b9a 100644
--- a/Command/Find.hs
+++ b/Command/Find.hs
@@ -25,5 +25,5 @@ seek = [withFilesInGit start]
start :: CommandStartString
start file = isAnnexed file $ \(key, _) -> do
exists <- inAnnex key
- when exists $ liftIO $ putStrLn $ showFile file
+ when exists $ liftIO $ putStrLn $ filePathToString file
return Nothing
diff --git a/Command/PreCommit.hs b/Command/PreCommit.hs
index 76aab3855..750997f54 100644
--- a/Command/PreCommit.hs
+++ b/Command/PreCommit.hs
@@ -33,7 +33,7 @@ perform pair@(file, _) = do
ok <- doCommand $ Command.Add.start pair
if ok
then return $ Just $ cleanup file
- else error $ "failed to add " ++ showFile file ++ "; canceling commit"
+ else error $ "failed to add " ++ filePathToString file ++ "; canceling commit"
cleanup :: FilePath -> CommandCleanup
cleanup file = do
diff --git a/Command/Unused.hs b/Command/Unused.hs
index 2b390b956..67a227237 100644
--- a/Command/Unused.hs
+++ b/Command/Unused.hs
@@ -68,7 +68,7 @@ checkUnused = do
dropmsg = ["(To remove unwanted data: git-annex dropunused NUMBER)"]
table l = [" NUMBER KEY"] ++ map cols l
- cols (n,k) = " " ++ pad 6 (show n) ++ " " ++ (showFile . show) k
+ cols (n,k) = " " ++ pad 6 (show n) ++ " " ++ (filePathToString . show) k
pad n s = s ++ replicate (n - length s) ' '
number :: Int -> [a] -> [(Int, a)]
diff --git a/Content.hs b/Content.hs
index 188a38787..345599dba 100644
--- a/Content.hs
+++ b/Content.hs
@@ -50,7 +50,7 @@ calcGitLink file key = do
cwd <- liftIO $ getCurrentDirectory
let absfile = case absNormPath cwd file of
Just f -> f
- Nothing -> error $ "unable to normalize " ++ showFile file
+ Nothing -> error $ "unable to normalize " ++ filePathToString file
return $ relPathDirToDir (parentDir absfile) (Git.workTree g) ++
annexLocation key
diff --git a/Messages.hs b/Messages.hs
index ab5992a6c..80b53e5cd 100644
--- a/Messages.hs
+++ b/Messages.hs
@@ -11,10 +11,11 @@ import Control.Monad.State (liftIO)
import System.IO
import Control.Monad (unless)
import Data.String.Utils
-import Codec.Binary.UTF8.String as UTF8
+import qualified Codec.Binary.UTF8.String as UTF8
import Types
import qualified Annex
+import SysConfig
verbose :: Annex () -> Annex ()
verbose a = do
@@ -26,7 +27,7 @@ showSideAction s = verbose $ liftIO $ putStrLn $ "(" ++ s ++ ")"
showStart :: String -> String -> Annex ()
showStart command file = verbose $ do
- liftIO $ putStr $ command ++ " " ++ showFile file ++ " "
+ liftIO $ putStr $ command ++ " " ++ filePathToString file ++ " "
liftIO $ hFlush stdout
showNote :: String -> Annex ()
@@ -58,7 +59,8 @@ warning w = do
indent :: String -> String
indent s = join "\n" $ map (\l -> " " ++ l) $ lines s
-{- Prepares a filename for display. This is needed because strings are
- - internally represented in git-annex is non-decoded form. -}
-showFile :: FilePath -> String
-showFile = decodeString
+{- Prepares a filename for display. This is needed because on many
+ - platforms (eg, unix), FilePaths are internally stored in
+ - non-decoded form. -}
+filePathToString :: FilePath -> String
+filePathToString = if unicodefilepath then id else UTF8.decodeString
diff --git a/configure.hs b/configure.hs
index 1451d7eaa..b5437ec1a 100644
--- a/configure.hs
+++ b/configure.hs
@@ -1,6 +1,7 @@
{- Checks system configuration and generates SysConfig.hs. -}
import System.Directory
+import Data.List
import TestConfig
@@ -13,6 +14,7 @@ tests = [
, TestCase "sha1sum" $ requireCmd "sha1sum" "sha1sum </dev/null"
, TestCase "xargs -0" $ requireCmd "xargs_0" "xargs -0 </dev/null"
, TestCase "rsync" $ requireCmd "rsync" "rsync --version >/dev/null"
+ , TestCase "unicode FilePath support" $ unicodeFilePath
]
tmpDir :: String
@@ -27,6 +29,19 @@ testCp k option = TestCase cmd $ testCmd k run
cmd = "cp " ++ option
run = cmd ++ " " ++ testFile ++ " " ++ testFile ++ ".new"
+{- Checks if FilePaths contain decoded unicode, or not. The testdata
+ - directory contains a "unicode-test-ü" file; try to find the file,
+ - and see if the "ü" is encoded correctly.
+ -
+ - Note that the file is shipped with git-annex, rather than created,
+ - to avoid other potential unicode issues.
+ -}
+unicodeFilePath :: Test
+unicodeFilePath = do
+ fs <- getDirectoryContents "testdata"
+ let file = head $ filter (isInfixOf "unicode-test") fs
+ return $ Config "unicodefilepath" (BoolConfig $ isInfixOf "ü" file)
+
setup :: IO ()
setup = do
createDirectoryIfMissing True tmpDir
diff --git a/doc/bugs/problems_with_utf8_names.mdwn b/doc/bugs/problems_with_utf8_names.mdwn
index 29213e1ec..efde1c9a3 100644
--- a/doc/bugs/problems_with_utf8_names.mdwn
+++ b/doc/bugs/problems_with_utf8_names.mdwn
@@ -46,7 +46,7 @@ It looks like the common latin1-to-UTF8 encoding. Functionality other than otupu
> user's configured encoding), and allow haskell's output encoding to then
> encode it according to the user's locale configuration.
> > This is now [[implemented|done]]. I'm not very happy that I have to watch
-> > out for any place that a filename is output and call `showFile`
+> > out for any place that a filename is output and call `filePathToString`
> > on it, but there are really not too many such places in git-annex.
> >
> > Note that this only affects filenames apparently.
diff --git a/testdata/unicode-test-ö b/testdata/unicode-test-ö
new file mode 100644
index 000000000..45b983be3
--- /dev/null
+++ b/testdata/unicode-test-ö
@@ -0,0 +1 @@
+hi