diff options
-rw-r--r-- | Annex/Branch.hs | 10 | ||||
-rw-r--r-- | Annex/Journal.hs | 6 | ||||
-rw-r--r-- | debian/changelog | 4 | ||||
-rw-r--r-- | doc/bugs/forget_corrupts_non-ascii_chars.mdwn | 4 | ||||
-rw-r--r-- | doc/bugs/unicode_tags.mdwn | 3 |
5 files changed, 19 insertions, 8 deletions
diff --git a/Annex/Branch.hs b/Annex/Branch.hs index 94c4c029c..7a75d8acf 100644 --- a/Annex/Branch.hs +++ b/Annex/Branch.hs @@ -25,9 +25,10 @@ module Annex.Branch ( performTransitions, ) where -import qualified Data.ByteString.Lazy.Char8 as L +import qualified Data.ByteString.Lazy as L import qualified Data.Set as S import qualified Data.Map as M +import Data.Bits.Utils import Common.Annex import Annex.BranchState @@ -199,7 +200,7 @@ getHistorical :: RefDate -> FilePath -> Annex String getHistorical date = getRef (Git.Ref.dateRef fullname date) getRef :: Ref -> FilePath -> Annex String -getRef ref file = withIndex $ L.unpack <$> catFile ref file +getRef ref file = withIndex $ decodeBS <$> catFile ref file {- Applies a function to modifiy the content of a file. - @@ -259,7 +260,8 @@ commitIndex' jl branchref message parents = do where -- look for "parent ref" lines and return the refs commitparents = map (Git.Ref . snd) . filter isparent . - map (toassoc . L.unpack) . L.lines + map (toassoc . decodeBS) . L.split newline + newline = c2w8 '\n' toassoc = separate (== ' ') isparent (k,_) = k == "parent" @@ -432,7 +434,7 @@ handleTransitions jl localts refs = do return True where getreftransition ref = do - ts <- parseTransitionsStrictly "remote" . L.unpack + ts <- parseTransitionsStrictly "remote" . decodeBS <$> catFile ref transitionsLog return (ref, ts) diff --git a/Annex/Journal.hs b/Annex/Journal.hs index 395e81d29..dcd3779de 100644 --- a/Annex/Journal.hs +++ b/Annex/Journal.hs @@ -13,8 +13,6 @@ module Annex.Journal where -import System.IO.Binary - import Common.Annex import Annex.Exception import qualified Git @@ -42,7 +40,7 @@ setJournalFile _jl file content = do jfile <- fromRepo $ journalFile file let tmpfile = tmp </> takeFileName jfile liftIO $ do - writeBinaryFile tmpfile content + writeFileAnyEncoding tmpfile content moveFile tmpfile jfile {- Gets any journalled content for a file in the branch. -} @@ -54,7 +52,7 @@ getJournalFile _jl = getJournalFileStale - changes. -} getJournalFileStale :: FilePath -> Annex (Maybe String) getJournalFileStale file = inRepo $ \g -> catchMaybeIO $ - readFileStrict $ journalFile file g + readFileStrictAnyEncoding $ journalFile file g {- List of files that have updated content in the journal. -} getJournalledFiles :: JournalLocked -> Annex [FilePath] diff --git a/debian/changelog b/debian/changelog index 8603adf17..68678ed21 100644 --- a/debian/changelog +++ b/debian/changelog @@ -13,6 +13,10 @@ git-annex (5.20140518) UNRELEASED; urgency=medium so that it can be easily enabled elsewhere. * android: Run busybox install with -s, since some versions of Android prohibit making hard links. + * Fix encoding of data written to git-annex branch. Avoid truncating + unicode characters to 8 bits. Allow any encoding to be used, as with + filenames (but utf8 is the sane choice). Affects metadata and repository + descriptions, and preferred content expressions. -- Joey Hess <joeyh@debian.org> Mon, 19 May 2014 15:59:25 -0400 diff --git a/doc/bugs/forget_corrupts_non-ascii_chars.mdwn b/doc/bugs/forget_corrupts_non-ascii_chars.mdwn index daee6f63a..f4506c228 100644 --- a/doc/bugs/forget_corrupts_non-ascii_chars.mdwn +++ b/doc/bugs/forget_corrupts_non-ascii_chars.mdwn @@ -72,3 +72,7 @@ backend usage: """]] well that's interesting - the above paste is broken by ikiwiki as well... in the text area where i paste it, "rachel@topcrapn:~/Vidéos/anarcat" shows up as "rachel@topcrapn:~/Vidéos/anarcat" but when i preview, the character gets corrupted. and obviously, the second instance then gets *double* corrupted - wheepee. the original paste has "rachel@topcrapn:~/Vidéos/anarcat". --[[anarcat]] + +> [[fixed|done]]; writes to git-annex branch now preserve the original +> encoding, and I've tested that lots of interesting unicode is preserved +> across a forget run. --[[Joey]] diff --git a/doc/bugs/unicode_tags.mdwn b/doc/bugs/unicode_tags.mdwn index 6ecfad95a..ff3c78666 100644 --- a/doc/bugs/unicode_tags.mdwn +++ b/doc/bugs/unicode_tags.mdwn @@ -44,3 +44,6 @@ OS X 10.9 LC_TIME="en_US.UTF-8" LC_ALL= +> All strings written to the git-annex branch were truncated to 8 bits. +> I've fixed this, and this example works now (of course data written with +> an old git-annex remains truncated). [[done]] --[[Joey]] |