From 5a50a7cf137997a9d940b9a89a0968452a1ac411 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 11 Feb 2011 15:37:37 -0400 Subject: update unicode FilePath handling Based on http://hackage.haskell.org/trac/ghc/ticket/3307 , whether FilePath contains decoded unicode varies by OS. So, add a configure check for it. Also, renamed showFile to filePathToString --- Backend/File.hs | 6 +++--- Backend/SHA1.hs | 2 +- Backend/WORM.hs | 2 +- Command/Find.hs | 2 +- Command/PreCommit.hs | 2 +- Command/Unused.hs | 2 +- Content.hs | 2 +- Messages.hs | 14 ++++++++------ configure.hs | 15 +++++++++++++++ doc/bugs/problems_with_utf8_names.mdwn | 2 +- "testdata/unicode-test-\303\266" | 1 + 11 files changed, 34 insertions(+), 16 deletions(-) create mode 100644 "testdata/unicode-test-\303\266" diff --git a/Backend/File.hs b/Backend/File.hs index fca385a1e..d5691595a 100644 --- a/Backend/File.hs +++ b/Backend/File.hs @@ -193,14 +193,14 @@ checkKeyNumCopies key file numcopies = do missingNote :: String -> Int -> Int -> String -> String missingNote file 0 _ [] = - "** No known copies of " ++ showFile file ++ " exist!" + "** No known copies of " ++ filePathToString file ++ " exist!" missingNote file 0 _ untrusted = - "Only these untrusted locations may have copies of " ++ showFile file ++ + "Only these untrusted locations may have copies of " ++ filePathToString file ++ "\n" ++ untrusted ++ "Back it up to trusted locations with git-annex copy." missingNote file present needed [] = "Only " ++ show present ++ " of " ++ show needed ++ - " trustworthy copies of " ++ showFile file ++ " exist." ++ + " trustworthy copies of " ++ filePathToString file ++ " exist." ++ "\nBack it up with git-annex copy." missingNote file present needed untrusted = missingNote file present needed [] ++ diff --git a/Backend/SHA1.hs b/Backend/SHA1.hs index f1092492e..9636787f0 100644 --- a/Backend/SHA1.hs +++ b/Backend/SHA1.hs @@ -58,5 +58,5 @@ checkKeySHA1 key = do then return True else do dest <- moveBad key - warning $ "Bad file content; moved to " ++ showFile dest + warning $ "Bad file content; moved to " ++ filePathToString dest return False diff --git a/Backend/WORM.hs b/Backend/WORM.hs index 7f40a2acb..92fe5a2d4 100644 --- a/Backend/WORM.hs +++ b/Backend/WORM.hs @@ -67,5 +67,5 @@ checkKeySize key = do then return True else do dest <- moveBad key - warning $ "Bad file size; moved to " ++ showFile dest + warning $ "Bad file size; moved to " ++ filePathToString dest return False diff --git a/Command/Find.hs b/Command/Find.hs index 45156af05..3e9125b9a 100644 --- a/Command/Find.hs +++ b/Command/Find.hs @@ -25,5 +25,5 @@ seek = [withFilesInGit start] start :: CommandStartString start file = isAnnexed file $ \(key, _) -> do exists <- inAnnex key - when exists $ liftIO $ putStrLn $ showFile file + when exists $ liftIO $ putStrLn $ filePathToString file return Nothing diff --git a/Command/PreCommit.hs b/Command/PreCommit.hs index 76aab3855..750997f54 100644 --- a/Command/PreCommit.hs +++ b/Command/PreCommit.hs @@ -33,7 +33,7 @@ perform pair@(file, _) = do ok <- doCommand $ Command.Add.start pair if ok then return $ Just $ cleanup file - else error $ "failed to add " ++ showFile file ++ "; canceling commit" + else error $ "failed to add " ++ filePathToString file ++ "; canceling commit" cleanup :: FilePath -> CommandCleanup cleanup file = do diff --git a/Command/Unused.hs b/Command/Unused.hs index 2b390b956..67a227237 100644 --- a/Command/Unused.hs +++ b/Command/Unused.hs @@ -68,7 +68,7 @@ checkUnused = do dropmsg = ["(To remove unwanted data: git-annex dropunused NUMBER)"] table l = [" NUMBER KEY"] ++ map cols l - cols (n,k) = " " ++ pad 6 (show n) ++ " " ++ (showFile . show) k + cols (n,k) = " " ++ pad 6 (show n) ++ " " ++ (filePathToString . show) k pad n s = s ++ replicate (n - length s) ' ' number :: Int -> [a] -> [(Int, a)] diff --git a/Content.hs b/Content.hs index 188a38787..345599dba 100644 --- a/Content.hs +++ b/Content.hs @@ -50,7 +50,7 @@ calcGitLink file key = do cwd <- liftIO $ getCurrentDirectory let absfile = case absNormPath cwd file of Just f -> f - Nothing -> error $ "unable to normalize " ++ showFile file + Nothing -> error $ "unable to normalize " ++ filePathToString file return $ relPathDirToDir (parentDir absfile) (Git.workTree g) ++ annexLocation key diff --git a/Messages.hs b/Messages.hs index ab5992a6c..80b53e5cd 100644 --- a/Messages.hs +++ b/Messages.hs @@ -11,10 +11,11 @@ import Control.Monad.State (liftIO) import System.IO import Control.Monad (unless) import Data.String.Utils -import Codec.Binary.UTF8.String as UTF8 +import qualified Codec.Binary.UTF8.String as UTF8 import Types import qualified Annex +import SysConfig verbose :: Annex () -> Annex () verbose a = do @@ -26,7 +27,7 @@ showSideAction s = verbose $ liftIO $ putStrLn $ "(" ++ s ++ ")" showStart :: String -> String -> Annex () showStart command file = verbose $ do - liftIO $ putStr $ command ++ " " ++ showFile file ++ " " + liftIO $ putStr $ command ++ " " ++ filePathToString file ++ " " liftIO $ hFlush stdout showNote :: String -> Annex () @@ -58,7 +59,8 @@ warning w = do indent :: String -> String indent s = join "\n" $ map (\l -> " " ++ l) $ lines s -{- Prepares a filename for display. This is needed because strings are - - internally represented in git-annex is non-decoded form. -} -showFile :: FilePath -> String -showFile = decodeString +{- Prepares a filename for display. This is needed because on many + - platforms (eg, unix), FilePaths are internally stored in + - non-decoded form. -} +filePathToString :: FilePath -> String +filePathToString = if unicodefilepath then id else UTF8.decodeString diff --git a/configure.hs b/configure.hs index 1451d7eaa..b5437ec1a 100644 --- a/configure.hs +++ b/configure.hs @@ -1,6 +1,7 @@ {- Checks system configuration and generates SysConfig.hs. -} import System.Directory +import Data.List import TestConfig @@ -13,6 +14,7 @@ tests = [ , TestCase "sha1sum" $ requireCmd "sha1sum" "sha1sum /dev/null" + , TestCase "unicode FilePath support" $ unicodeFilePath ] tmpDir :: String @@ -27,6 +29,19 @@ testCp k option = TestCase cmd $ testCmd k run cmd = "cp " ++ option run = cmd ++ " " ++ testFile ++ " " ++ testFile ++ ".new" +{- Checks if FilePaths contain decoded unicode, or not. The testdata + - directory contains a "unicode-test-ü" file; try to find the file, + - and see if the "ü" is encoded correctly. + - + - Note that the file is shipped with git-annex, rather than created, + - to avoid other potential unicode issues. + -} +unicodeFilePath :: Test +unicodeFilePath = do + fs <- getDirectoryContents "testdata" + let file = head $ filter (isInfixOf "unicode-test") fs + return $ Config "unicodefilepath" (BoolConfig $ isInfixOf "ü" file) + setup :: IO () setup = do createDirectoryIfMissing True tmpDir diff --git a/doc/bugs/problems_with_utf8_names.mdwn b/doc/bugs/problems_with_utf8_names.mdwn index 29213e1ec..efde1c9a3 100644 --- a/doc/bugs/problems_with_utf8_names.mdwn +++ b/doc/bugs/problems_with_utf8_names.mdwn @@ -46,7 +46,7 @@ It looks like the common latin1-to-UTF8 encoding. Functionality other than otupu > user's configured encoding), and allow haskell's output encoding to then > encode it according to the user's locale configuration. > > This is now [[implemented|done]]. I'm not very happy that I have to watch -> > out for any place that a filename is output and call `showFile` +> > out for any place that a filename is output and call `filePathToString` > > on it, but there are really not too many such places in git-annex. > > > > Note that this only affects filenames apparently. diff --git "a/testdata/unicode-test-\303\266" "b/testdata/unicode-test-\303\266" new file mode 100644 index 000000000..45b983be3 --- /dev/null +++ "b/testdata/unicode-test-\303\266" @@ -0,0 +1 @@ +hi -- cgit v1.2.3