summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Joey Hess <joey@kitenet.net>2011-03-08 18:05:20 -0400
committerGravatar Joey Hess <joey@kitenet.net>2011-03-08 18:05:20 -0400
commit26544de9463291b8185fdd1a7c1b33710ef7db3c (patch)
treeab7d626afd1908f6f089e154f87e2585dff703d6
parentb85c91743c73597f41cae73b042ffa86be929ed3 (diff)
put in utf8 forcing workaround
Haskell's IO layer crashes on characters > 255 when in a non-unicode (latin1) locale. Until Haskell gets better behavior, put in an admittedly ugly workaround for that: git-annex forces utf8 output mode no matter what locale is selected. So if you use a non-utf8 locale, your filenames with characters > 127 will not be displayed as you'd expect. But at least it won't crash.
-rw-r--r--CmdLine.hs2
-rw-r--r--Messages.hs9
-rw-r--r--debian/changelog6
-rw-r--r--doc/bugs/unhappy_without_UTF8_locale.mdwn2
-rw-r--r--git-annex.hs1
5 files changed, 20 insertions, 0 deletions
diff --git a/CmdLine.hs b/CmdLine.hs
index 475ca99e7..1c01aa75f 100644
--- a/CmdLine.hs
+++ b/CmdLine.hs
@@ -11,6 +11,7 @@ module CmdLine (
shutdown
) where
+import System.IO
import System.IO.Error (try)
import System.Console.GetOpt
import Control.Monad.State (liftIO)
@@ -30,6 +31,7 @@ import UUID
{- Runs the passed command line. -}
dispatch :: Git.Repo -> [String] -> [Command] -> [Option] -> String -> IO ()
dispatch gitrepo args cmds options header = do
+ forceUtf8
state <- Annex.new gitrepo allBackends
(actions, state') <- Annex.run state $ parseCmd args header cmds options
tryRun state' $ [startup, upgrade] ++ actions ++ [shutdown]
diff --git a/Messages.hs b/Messages.hs
index 90857280a..83b3ecf23 100644
--- a/Messages.hs
+++ b/Messages.hs
@@ -64,3 +64,12 @@ indent s = join "\n" $ map (\l -> " " ++ l) $ lines s
- non-decoded form. -}
filePathToString :: FilePath -> String
filePathToString = if SysConfig.unicodefilepath then id else UTF8.decodeString
+
+{- Workaround to avoid crashes displaying filenames containing
+ - characters > 255 in non-utf8 locales. Force encodings to utf-8,
+ - even though this may mean some characters in the encoding
+ - are mangled. -}
+forceUtf8 :: IO ()
+forceUtf8 = do
+ hSetEncoding stdout utf8
+ hSetEncoding stderr utf8
diff --git a/debian/changelog b/debian/changelog
index e8b094607..a414b3bef 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -2,6 +2,12 @@ git-annex (0.23) UNRELEASED; urgency=low
* Support ssh remotes with a port specified.
* whereis: New subcommand to show where a file's content has gotten to.
+ * Haskell's IO layer crashes on characters > 255 when in a non-unicode
+ locale. Until Haskell gets better behavior, put in an admittedly
+ ugly workaround for that: git-annex forces utf8 output mode no matter
+ what locale is selected. So if you use a non-utf8 locale, your
+ filenames with characters > 127 will not be displayed as you'd expect.
+ But at least it won't crash.
-- Joey Hess <joeyh@debian.org> Sat, 05 Mar 2011 15:39:13 -0400
diff --git a/doc/bugs/unhappy_without_UTF8_locale.mdwn b/doc/bugs/unhappy_without_UTF8_locale.mdwn
index 7b0e5be66..8d22b9ee4 100644
--- a/doc/bugs/unhappy_without_UTF8_locale.mdwn
+++ b/doc/bugs/unhappy_without_UTF8_locale.mdwn
@@ -8,6 +8,8 @@ Try unsetting LANG and passing git-annex unicode filenames.
> with certian input filenames, while in en_US.UTF-8, it's ok.
> The workaround below avoided the problem in de_DE.UTF-8. --[[Joey]]
+> Put in the utf-8 forcing workaround for now. [[done]] --[[Joey]]
+
## underlying haskell problem and workaround
The same problem can be seen with a simple haskell program:
diff --git a/git-annex.hs b/git-annex.hs
index 878d8bdbb..9d6012f2c 100644
--- a/git-annex.hs
+++ b/git-annex.hs
@@ -8,6 +8,7 @@
import System.Environment
import GitAnnex
+
main :: IO ()
main = do
args <- getArgs