diff options
-rw-r--r-- | Backend/File.hs | 6 | ||||
-rw-r--r-- | Backend/SHA1.hs | 2 | ||||
-rw-r--r-- | Backend/WORM.hs | 2 | ||||
-rw-r--r-- | Command/Find.hs | 3 | ||||
-rw-r--r-- | Command/PreCommit.hs | 2 | ||||
-rw-r--r-- | Command/Unused.hs | 2 | ||||
-rw-r--r-- | Content.hs | 2 | ||||
-rw-r--r-- | Messages.hs | 9 | ||||
-rw-r--r-- | debian/changelog | 1 | ||||
-rw-r--r-- | doc/bugs/problems_with_utf8_names.mdwn | 22 | ||||
-rw-r--r-- | doc/bugs/unhappy_without_UTF8_locale.mdwn | 33 |
11 files changed, 63 insertions, 21 deletions
diff --git a/Backend/File.hs b/Backend/File.hs index d76cd2939..fca385a1e 100644 --- a/Backend/File.hs +++ b/Backend/File.hs @@ -193,14 +193,14 @@ checkKeyNumCopies key file numcopies = do missingNote :: String -> Int -> Int -> String -> String missingNote file 0 _ [] = - "** No known copies of " ++ file ++ " exist!" + "** No known copies of " ++ showFile file ++ " exist!" missingNote file 0 _ untrusted = - "Only these untrusted locations may have copies of " ++ file ++ + "Only these untrusted locations may have copies of " ++ showFile file ++ "\n" ++ untrusted ++ "Back it up to trusted locations with git-annex copy." missingNote file present needed [] = "Only " ++ show present ++ " of " ++ show needed ++ - " trustworthy copies of " ++ file ++ " exist." ++ + " trustworthy copies of " ++ showFile file ++ " exist." ++ "\nBack it up with git-annex copy." missingNote file present needed untrusted = missingNote file present needed [] ++ diff --git a/Backend/SHA1.hs b/Backend/SHA1.hs index 3d868dbd1..f1092492e 100644 --- a/Backend/SHA1.hs +++ b/Backend/SHA1.hs @@ -58,5 +58,5 @@ checkKeySHA1 key = do then return True else do dest <- moveBad key - warning $ "Bad file content; moved to "++dest + warning $ "Bad file content; moved to " ++ showFile dest return False diff --git a/Backend/WORM.hs b/Backend/WORM.hs index 20a81d841..7f40a2acb 100644 --- a/Backend/WORM.hs +++ b/Backend/WORM.hs @@ -67,5 +67,5 @@ checkKeySize key = do then return True else do dest <- moveBad key - warning $ "Bad file size; moved to "++dest + warning $ "Bad file size; moved to " ++ showFile dest return False diff --git a/Command/Find.hs b/Command/Find.hs index 3ed15c153..45156af05 100644 --- a/Command/Find.hs +++ b/Command/Find.hs @@ -12,6 +12,7 @@ import Control.Monad.State (liftIO) import Command import Content +import Messages command :: [Command] command = [Command "find" (paramOptional $ paramRepeating paramPath) seek @@ -24,5 +25,5 @@ seek = [withFilesInGit start] start :: CommandStartString start file = isAnnexed file $ \(key, _) -> do exists <- inAnnex key - when exists $ liftIO $ putStrLn file + when exists $ liftIO $ putStrLn $ showFile file return Nothing diff --git a/Command/PreCommit.hs b/Command/PreCommit.hs index 12e5ed806..f22300a03 100644 --- a/Command/PreCommit.hs +++ b/Command/PreCommit.hs @@ -32,7 +32,7 @@ perform pair@(file, _) = do ok <- doCommand $ Command.Add.start pair if ok then return $ Just $ cleanup file - else error $ "failed to add " ++ file ++ "; canceling commit" + else error $ "failed to add " ++ showFile file ++ "; canceling commit" cleanup :: FilePath -> CommandCleanup cleanup file = do diff --git a/Command/Unused.hs b/Command/Unused.hs index d9f4e3978..2b390b956 100644 --- a/Command/Unused.hs +++ b/Command/Unused.hs @@ -68,7 +68,7 @@ checkUnused = do dropmsg = ["(To remove unwanted data: git-annex dropunused NUMBER)"] table l = [" NUMBER KEY"] ++ map cols l - cols (n,k) = " " ++ pad 6 (show n) ++ " " ++ show k + cols (n,k) = " " ++ pad 6 (show n) ++ " " ++ (showFile . show) k pad n s = s ++ replicate (n - length s) ' ' number :: Int -> [a] -> [(Int, a)] diff --git a/Content.hs b/Content.hs index e16ad883c..fae980bae 100644 --- a/Content.hs +++ b/Content.hs @@ -49,7 +49,7 @@ calcGitLink file key = do cwd <- liftIO $ getCurrentDirectory let absfile = case absNormPath cwd file of Just f -> f - Nothing -> error $ "unable to normalize " ++ file + Nothing -> error $ "unable to normalize " ++ showFile file return $ relPathDirToDir (parentDir absfile) (Git.workTree g) ++ annexLocation key diff --git a/Messages.hs b/Messages.hs index 6f4ec1e62..12a836d3c 100644 --- a/Messages.hs +++ b/Messages.hs @@ -11,6 +11,7 @@ import Control.Monad.State (liftIO) import System.IO import Control.Monad (unless) import Data.String.Utils +import Codec.Binary.UTF8.String as UTF8 import Types import qualified Annex @@ -25,7 +26,7 @@ showSideAction s = verbose $ liftIO $ putStrLn $ "(" ++ s ++ ")" showStart :: String -> String -> Annex () showStart command file = verbose $ do - liftIO $ putStr $ command ++ " " ++ file ++ " " + liftIO $ putStr $ command ++ " " ++ showFile file ++ " " liftIO $ hFlush stdout showNote :: String -> Annex () @@ -45,7 +46,6 @@ showEndOk = verbose $ liftIO $ putStrLn "ok" showEndFail :: Annex () showEndFail = verbose $ liftIO $ putStrLn "\nfailed" -{- Exception pretty-printing. -} showErr :: (Show a) => a -> Annex () showErr e = warning $ "git-annex: " ++ show e @@ -57,3 +57,8 @@ warning w = do indent :: String -> String indent s = join "\n" $ map (\l -> " " ++ l) $ lines s + +{- Prepares a filename for display. This is needed because strings are + - internally represented in git-annex is non-decoded form. -} +showFile :: String -> String +showFile = decodeString diff --git a/debian/changelog b/debian/changelog index 2e5b97c0f..277869b02 100644 --- a/debian/changelog +++ b/debian/changelog @@ -4,6 +4,7 @@ git-annex (0.21) UNRELEASED; urgency=low * unannex: Fix recently introduced bug when attempting to unannex more than one file at a time. * test: Set git user name and email in case git can't guess values. + * Fix display of unicode filenames. -- Joey Hess <joeyh@debian.org> Wed, 09 Feb 2011 00:12:11 -0400 diff --git a/doc/bugs/problems_with_utf8_names.mdwn b/doc/bugs/problems_with_utf8_names.mdwn index 30f3495f4..257f8dff2 100644 --- a/doc/bugs/problems_with_utf8_names.mdwn +++ b/doc/bugs/problems_with_utf8_names.mdwn @@ -37,10 +37,22 @@ It looks like the common latin1-to-UTF8 encoding. Functionality other than otupu > encoded in utf-8 (an archive could have historical filenames using > varying encodings), and you don't want which files are accessed to > depend on locale settings. +> > I tried to do this by making parts of GitRepo call +> > Codec.Binary.UTF8.String.decodeString when reading filenames from +> > git. This seemed to break attempts to operate on the files, +> > weirdly encoded strings were seen in syscalls in strace. > 1. Keep input and internal data un-decoded, but decode it when > outputting a filename (assuming the filename is encoded using the > user's configured encoding), and allow haskell's output encoding to then > encode it according to the user's locale configuration. +> > This is now [[implemented|done]]. I'm not very happy that I have to watch +> > out for any place that a filename is output and call `showFile` +> > on it, but there are really not too many such places in git-annex. +> > +> > Note that this only affects filenames apparently. +> > (Names of files in the annex, and also some places where names +> > of keys are displayed.) Utf-8 in the uuid.map file etc seems +> > to be handled cleanly. > 1. Avoid encodings entirely. Mostly what I'm doing now; probably > could find a way to disable encoding of console output. Then the raw > filename would be displayed, which should work ok. git-annex does @@ -50,13 +62,3 @@ It looks like the common latin1-to-UTF8 encoding. Functionality other than otupu > One other possible > issue would be that this could cause problems if git-annex were > translated. -> -> BTW, for more fun, try unsetting LANG, and then you can see -> stuff like this: - - joey@gnu:~/tmp/aa>git annex add ./Üa - add add add add git-annex: <stdout>: commitAndReleaseBuffer: invalid - argument (Invalid or incomplete multibyte or wide character) - -> (Add -q to work around this; once it doesn't need to print the filename, -> it can act on it ok!) diff --git a/doc/bugs/unhappy_without_UTF8_locale.mdwn b/doc/bugs/unhappy_without_UTF8_locale.mdwn new file mode 100644 index 000000000..6f1df4fab --- /dev/null +++ b/doc/bugs/unhappy_without_UTF8_locale.mdwn @@ -0,0 +1,33 @@ +Try unsetting LANG and passing git-annex unicode filenames. + + joey@gnu:~/tmp/aa>git annex add ./Üa + add add add add git-annex: <stdout>: commitAndReleaseBuffer: invalid + argument (Invalid or incomplete multibyte or wide character) + +The same problem can be seen with a simple haskell program: + + import System.Environment + import Codec.Binary.UTF8.String + main = do + args <- getArgs + putStrLn $ decodeString $ args !! 0 + + joey@gnu:~/src/git-annex>LANG= runghc ~/foo.hs Ü + foo.hs: <stdout>: hPutChar: invalid argument (Invalid or incomplete multibyte or wide character) + +(The call to `decodeString` is necessary to make the input +unicode string be displayed properly in a utf8 locale, but +does not contribute to this problem.) + +I guess that haskell is setting the IO encoding to latin1, which +is [documented](http://haskell.org/ghc/docs/latest/html/libraries/base/System-IO.html#v:latin1) +to error out on characters > 255. + +So this program doesn't have the problem -- but may output garbage +on non-utf-8 capable terminals: + + import System.IO + main = do + hSetEncoding stdout utf8 + args <- getArgs + putStrLn $ decodeString $ args !! 0 |