summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Annex/Branch.hs10
-rw-r--r--Annex/Journal.hs6
-rw-r--r--debian/changelog4
-rw-r--r--doc/bugs/forget_corrupts_non-ascii_chars.mdwn4
-rw-r--r--doc/bugs/unicode_tags.mdwn3
5 files changed, 19 insertions, 8 deletions
diff --git a/Annex/Branch.hs b/Annex/Branch.hs
index 94c4c029c..7a75d8acf 100644
--- a/Annex/Branch.hs
+++ b/Annex/Branch.hs
@@ -25,9 +25,10 @@ module Annex.Branch (
performTransitions,
) where
-import qualified Data.ByteString.Lazy.Char8 as L
+import qualified Data.ByteString.Lazy as L
import qualified Data.Set as S
import qualified Data.Map as M
+import Data.Bits.Utils
import Common.Annex
import Annex.BranchState
@@ -199,7 +200,7 @@ getHistorical :: RefDate -> FilePath -> Annex String
getHistorical date = getRef (Git.Ref.dateRef fullname date)
getRef :: Ref -> FilePath -> Annex String
-getRef ref file = withIndex $ L.unpack <$> catFile ref file
+getRef ref file = withIndex $ decodeBS <$> catFile ref file
{- Applies a function to modifiy the content of a file.
-
@@ -259,7 +260,8 @@ commitIndex' jl branchref message parents = do
where
-- look for "parent ref" lines and return the refs
commitparents = map (Git.Ref . snd) . filter isparent .
- map (toassoc . L.unpack) . L.lines
+ map (toassoc . decodeBS) . L.split newline
+ newline = c2w8 '\n'
toassoc = separate (== ' ')
isparent (k,_) = k == "parent"
@@ -432,7 +434,7 @@ handleTransitions jl localts refs = do
return True
where
getreftransition ref = do
- ts <- parseTransitionsStrictly "remote" . L.unpack
+ ts <- parseTransitionsStrictly "remote" . decodeBS
<$> catFile ref transitionsLog
return (ref, ts)
diff --git a/Annex/Journal.hs b/Annex/Journal.hs
index 395e81d29..dcd3779de 100644
--- a/Annex/Journal.hs
+++ b/Annex/Journal.hs
@@ -13,8 +13,6 @@
module Annex.Journal where
-import System.IO.Binary
-
import Common.Annex
import Annex.Exception
import qualified Git
@@ -42,7 +40,7 @@ setJournalFile _jl file content = do
jfile <- fromRepo $ journalFile file
let tmpfile = tmp </> takeFileName jfile
liftIO $ do
- writeBinaryFile tmpfile content
+ writeFileAnyEncoding tmpfile content
moveFile tmpfile jfile
{- Gets any journalled content for a file in the branch. -}
@@ -54,7 +52,7 @@ getJournalFile _jl = getJournalFileStale
- changes. -}
getJournalFileStale :: FilePath -> Annex (Maybe String)
getJournalFileStale file = inRepo $ \g -> catchMaybeIO $
- readFileStrict $ journalFile file g
+ readFileStrictAnyEncoding $ journalFile file g
{- List of files that have updated content in the journal. -}
getJournalledFiles :: JournalLocked -> Annex [FilePath]
diff --git a/debian/changelog b/debian/changelog
index 8603adf17..68678ed21 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -13,6 +13,10 @@ git-annex (5.20140518) UNRELEASED; urgency=medium
so that it can be easily enabled elsewhere.
* android: Run busybox install with -s, since some versions of Android
prohibit making hard links.
+ * Fix encoding of data written to git-annex branch. Avoid truncating
+ unicode characters to 8 bits. Allow any encoding to be used, as with
+ filenames (but utf8 is the sane choice). Affects metadata and repository
+ descriptions, and preferred content expressions.
-- Joey Hess <joeyh@debian.org> Mon, 19 May 2014 15:59:25 -0400
diff --git a/doc/bugs/forget_corrupts_non-ascii_chars.mdwn b/doc/bugs/forget_corrupts_non-ascii_chars.mdwn
index daee6f63a..f4506c228 100644
--- a/doc/bugs/forget_corrupts_non-ascii_chars.mdwn
+++ b/doc/bugs/forget_corrupts_non-ascii_chars.mdwn
@@ -72,3 +72,7 @@ backend usage:
"""]]
well that's interesting - the above paste is broken by ikiwiki as well... in the text area where i paste it, "rachel@topcrapn:~/Vidéos/anarcat" shows up as "rachel@topcrapn:~/Vidéos/anarcat" but when i preview, the character gets corrupted. and obviously, the second instance then gets *double* corrupted - wheepee. the original paste has "rachel@topcrapn:~/Vidéos/anarcat". --[[anarcat]]
+
+> [[fixed|done]]; writes to git-annex branch now preserve the original
+> encoding, and I've tested that lots of interesting unicode is preserved
+> across a forget run. --[[Joey]]
diff --git a/doc/bugs/unicode_tags.mdwn b/doc/bugs/unicode_tags.mdwn
index 6ecfad95a..ff3c78666 100644
--- a/doc/bugs/unicode_tags.mdwn
+++ b/doc/bugs/unicode_tags.mdwn
@@ -44,3 +44,6 @@ OS X 10.9
LC_TIME="en_US.UTF-8"
LC_ALL=
+> All strings written to the git-annex branch were truncated to 8 bits.
+> I've fixed this, and this example works now (of course data written with
+> an old git-annex remains truncated). [[done]] --[[Joey]]