summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Joey Hess <joey@kitenet.net>2011-03-12 15:30:17 -0400
committerGravatar Joey Hess <joey@kitenet.net>2011-03-12 15:30:17 -0400
commit72d268401604fbac93ca4701ab53d32880483686 (patch)
treea1ab880d30ece5ed1720d6db13ab5d9c5c1ca560
parent9229d182d32570f6829ced655aa673ceddfe7693 (diff)
Rethink filename encoding handling for display. Since filename encoding may or may not match locale settings, any attempt to decode filenames will fail for some files. So instead, do all output in binary mode.0.23
-rw-r--r--Backend/File.hs6
-rw-r--r--Backend/SHA.hs2
-rw-r--r--Backend/WORM.hs2
-rw-r--r--CmdLine.hs3
-rw-r--r--Command/Find.hs2
-rw-r--r--Command/PreCommit.hs2
-rw-r--r--Command/Unused.hs2
-rw-r--r--Content.hs2
-rw-r--r--Messages.hs30
-rw-r--r--debian/changelog15
-rw-r--r--doc/bugs/problems_with_utf8_names.mdwn3
-rw-r--r--doc/todo/support-non-utf8-locales.mdwn6
12 files changed, 38 insertions, 37 deletions
diff --git a/Backend/File.hs b/Backend/File.hs
index d5691595a..d76cd2939 100644
--- a/Backend/File.hs
+++ b/Backend/File.hs
@@ -193,14 +193,14 @@ checkKeyNumCopies key file numcopies = do
missingNote :: String -> Int -> Int -> String -> String
missingNote file 0 _ [] =
- "** No known copies of " ++ filePathToString file ++ " exist!"
+ "** No known copies of " ++ file ++ " exist!"
missingNote file 0 _ untrusted =
- "Only these untrusted locations may have copies of " ++ filePathToString file ++
+ "Only these untrusted locations may have copies of " ++ file ++
"\n" ++ untrusted ++
"Back it up to trusted locations with git-annex copy."
missingNote file present needed [] =
"Only " ++ show present ++ " of " ++ show needed ++
- " trustworthy copies of " ++ filePathToString file ++ " exist." ++
+ " trustworthy copies of " ++ file ++ " exist." ++
"\nBack it up with git-annex copy."
missingNote file present needed untrusted =
missingNote file present needed [] ++
diff --git a/Backend/SHA.hs b/Backend/SHA.hs
index c074ab48a..4eea890ce 100644
--- a/Backend/SHA.hs
+++ b/Backend/SHA.hs
@@ -83,5 +83,5 @@ checkKeyChecksum size key = do
then return True
else do
dest <- moveBad key
- warning $ "Bad file content; moved to " ++ filePathToString dest
+ warning $ "Bad file content; moved to " ++ dest
return False
diff --git a/Backend/WORM.hs b/Backend/WORM.hs
index 8a6412eb1..a0d814aa0 100644
--- a/Backend/WORM.hs
+++ b/Backend/WORM.hs
@@ -70,5 +70,5 @@ checkKeySize key = do
then return True
else do
dest <- moveBad key
- warning $ "Bad file size; moved to " ++ filePathToString dest
+ warning $ "Bad file size; moved to " ++ dest
return False
diff --git a/CmdLine.hs b/CmdLine.hs
index 1c01aa75f..b8fd6af7c 100644
--- a/CmdLine.hs
+++ b/CmdLine.hs
@@ -11,7 +11,6 @@ module CmdLine (
shutdown
) where
-import System.IO
import System.IO.Error (try)
import System.Console.GetOpt
import Control.Monad.State (liftIO)
@@ -31,7 +30,7 @@ import UUID
{- Runs the passed command line. -}
dispatch :: Git.Repo -> [String] -> [Command] -> [Option] -> String -> IO ()
dispatch gitrepo args cmds options header = do
- forceUtf8
+ setupConsole
state <- Annex.new gitrepo allBackends
(actions, state') <- Annex.run state $ parseCmd args header cmds options
tryRun state' $ [startup, upgrade] ++ actions ++ [shutdown]
diff --git a/Command/Find.hs b/Command/Find.hs
index 3e9125b9a..1ca6ff1e7 100644
--- a/Command/Find.hs
+++ b/Command/Find.hs
@@ -25,5 +25,5 @@ seek = [withFilesInGit start]
start :: CommandStartString
start file = isAnnexed file $ \(key, _) -> do
exists <- inAnnex key
- when exists $ liftIO $ putStrLn $ filePathToString file
+ when exists $ liftIO $ putStrLn file
return Nothing
diff --git a/Command/PreCommit.hs b/Command/PreCommit.hs
index d2f696434..6f9adb79a 100644
--- a/Command/PreCommit.hs
+++ b/Command/PreCommit.hs
@@ -34,7 +34,7 @@ perform pair@(file, _) = do
ok <- doCommand $ Command.Add.start pair
if ok
then return $ Just $ cleanup file
- else error $ "failed to add " ++ filePathToString file ++ "; canceling commit"
+ else error $ "failed to add " ++ file ++ "; canceling commit"
cleanup :: FilePath -> CommandCleanup
cleanup file = do
diff --git a/Command/Unused.hs b/Command/Unused.hs
index 9f3881d59..a614ce5d9 100644
--- a/Command/Unused.hs
+++ b/Command/Unused.hs
@@ -68,7 +68,7 @@ checkUnused = do
dropmsg = ["(To remove unwanted data: git-annex dropunused NUMBER)"]
table l = [" NUMBER KEY"] ++ map cols l
- cols (n,k) = " " ++ pad 6 (show n) ++ " " ++ (filePathToString . show) k
+ cols (n,k) = " " ++ pad 6 (show n) ++ " " ++ show k
pad n s = s ++ replicate (n - length s) ' '
number :: Int -> [a] -> [(Int, a)]
diff --git a/Content.hs b/Content.hs
index bcd4ac0e1..895a8812c 100644
--- a/Content.hs
+++ b/Content.hs
@@ -50,7 +50,7 @@ calcGitLink file key = do
cwd <- liftIO $ getCurrentDirectory
let absfile = case absNormPath cwd file of
Just f -> f
- Nothing -> error $ "unable to normalize " ++ filePathToString file
+ Nothing -> error $ "unable to normalize " ++ file
return $ relPathDirToDir (parentDir absfile)
(Git.workTree g) </> ".git" </> annexLocation key
diff --git a/Messages.hs b/Messages.hs
index 83b3ecf23..733638ce1 100644
--- a/Messages.hs
+++ b/Messages.hs
@@ -11,11 +11,9 @@ import Control.Monad.State (liftIO)
import System.IO
import Control.Monad (unless)
import Data.String.Utils
-import qualified Codec.Binary.UTF8.String as UTF8
import Types
import qualified Annex
-import qualified SysConfig
verbose :: Annex () -> Annex ()
verbose a = do
@@ -27,7 +25,7 @@ showSideAction s = verbose $ liftIO $ putStrLn $ "(" ++ s ++ ")"
showStart :: String -> String -> Annex ()
showStart command file = verbose $ do
- liftIO $ putStr $ command ++ " " ++ filePathToString file ++ " "
+ liftIO $ putStr $ command ++ " " ++ file ++ " "
liftIO $ hFlush stdout
showNote :: String -> Annex ()
@@ -59,17 +57,15 @@ warning w = do
indent :: String -> String
indent s = join "\n" $ map (\l -> " " ++ l) $ lines s
-{- Prepares a filename for display. This is needed because on many
- - platforms (eg, unix), FilePaths are internally stored in
- - non-decoded form. -}
-filePathToString :: FilePath -> String
-filePathToString = if SysConfig.unicodefilepath then id else UTF8.decodeString
-
-{- Workaround to avoid crashes displaying filenames containing
- - characters > 255 in non-utf8 locales. Force encodings to utf-8,
- - even though this may mean some characters in the encoding
- - are mangled. -}
-forceUtf8 :: IO ()
-forceUtf8 = do
- hSetEncoding stdout utf8
- hSetEncoding stderr utf8
+{- By default, haskell honors the user's locale in its output to stdout
+ - and stderr. While that's great for proper unicode support, for git-annex
+ - all that's really needed is the ability to display simple messages
+ - (currently untranslated), and importantly, to display filenames exactly
+ - as they are written on disk, no matter what their encoding. So, force
+ - raw mode.
+ -
+ - NB: Once git-annex gets localized, this will need a rethink. -}
+setupConsole :: IO ()
+setupConsole = do
+ hSetBinaryMode stdout True
+ hSetBinaryMode stderr True
diff --git a/debian/changelog b/debian/changelog
index a414b3bef..90b4cf6b2 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,15 +1,12 @@
-git-annex (0.23) UNRELEASED; urgency=low
+git-annex (0.23) unstable; urgency=low
* Support ssh remotes with a port specified.
* whereis: New subcommand to show where a file's content has gotten to.
- * Haskell's IO layer crashes on characters > 255 when in a non-unicode
- locale. Until Haskell gets better behavior, put in an admittedly
- ugly workaround for that: git-annex forces utf8 output mode no matter
- what locale is selected. So if you use a non-utf8 locale, your
- filenames with characters > 127 will not be displayed as you'd expect.
- But at least it won't crash.
-
- -- Joey Hess <joeyh@debian.org> Sat, 05 Mar 2011 15:39:13 -0400
+ * Rethink filename encoding handling for display. Since filename encoding
+ may or may not match locale settings, any attempt to decode filenames
+ will fail for some files. So instead, do all output in binary mode.
+
+ -- Joey Hess <joeyh@debian.org> Sat, 12 Mar 2011 15:02:49 -0400
git-annex (0.22) unstable; urgency=low
diff --git a/doc/bugs/problems_with_utf8_names.mdwn b/doc/bugs/problems_with_utf8_names.mdwn
index efde1c9a3..d6dc6ca3c 100644
--- a/doc/bugs/problems_with_utf8_names.mdwn
+++ b/doc/bugs/problems_with_utf8_names.mdwn
@@ -63,6 +63,9 @@ It looks like the common latin1-to-UTF8 encoding. Functionality other than otupu
> One other possible
> issue would be that this could cause problems if git-annex were
> translated.
+> > On second thought, I switched to this. Any decoding of a filename
+> > is going to make someone unhappy; the previous approach broke
+> > non-utf8 filenames.
----
diff --git a/doc/todo/support-non-utf8-locales.mdwn b/doc/todo/support-non-utf8-locales.mdwn
index 60f35eec8..da40118d5 100644
--- a/doc/todo/support-non-utf8-locales.mdwn
+++ b/doc/todo/support-non-utf8-locales.mdwn
@@ -18,3 +18,9 @@ of filename encodings. In particular,
git-annex's behavior is unlikely to improve much until haskell's
support for utf8 filenames improves. --[[Joey]]
+
+> [[done]] -- I just turned off all encoding handling on stdout and stderr,
+> which avoids these problems nicely. Git-annex now displays just what it
+> input, at least on platforms where haskell does not decode unicode in
+> FilePaths. This will later be a problem when it gets localized, but for
+> now works great. --[[Joey]]