diff options
-rw-r--r-- | Annex/TaggedPush.hs | 1 | ||||
-rw-r--r-- | Git/UnionMerge.hs | 2 | ||||
-rw-r--r-- | Utility/Base64.hs | 9 | ||||
-rw-r--r-- | Utility/FileSystemEncoding.hs | 11 | ||||
-rw-r--r-- | debian/changelog | 6 | ||||
-rw-r--r-- | doc/bugs/cannot_change_locale___40__en__95__US.UTF-8__41__.mdwn | 4 | ||||
-rw-r--r-- | doc/bugs/view_fails_with___34__invalid_character__34__.mdwn | 26 | ||||
-rw-r--r-- | doc/bugs/weird_entry_in_process_list/comment_3_84ef8c257fd7bbd1db4ac124af241929._comment | 15 | ||||
-rw-r--r-- | doc/devblog/day_312__release_prep.mdwn | 12 |
9 files changed, 78 insertions, 8 deletions
diff --git a/Annex/TaggedPush.hs b/Annex/TaggedPush.hs index 642d4db0b..eff8d6e0e 100644 --- a/Annex/TaggedPush.hs +++ b/Annex/TaggedPush.hs @@ -45,7 +45,6 @@ fromTaggedBranch b = case split "/" $ Git.fromRef b of ("refs":"synced":u:_base) -> Just (toUUID u, Nothing) _ -> Nothing - where taggedPush :: UUID -> Maybe String -> Git.Ref -> Remote -> Git.Repo -> IO Bool taggedPush u info branch remote = Git.Command.runBool diff --git a/Git/UnionMerge.hs b/Git/UnionMerge.hs index 26bb3d39f..bb42e7cc5 100644 --- a/Git/UnionMerge.hs +++ b/Git/UnionMerge.hs @@ -93,7 +93,7 @@ mergeFile info file h repo = case filter (/= nullSha) [Ref asha, Ref bsha] of -- FileSystemEncoding for this is a hack, but ensures there -- are no decoding errors. Note that this works because -- hashObject sets fileEncoding on its write handle. - getcontents s = lines . encodeW8 . L.unpack <$> catObject h s + getcontents s = lines . encodeW8NUL . L.unpack <$> catObject h s {- Calculates a union merge between a list of refs, with contents. - diff --git a/Utility/Base64.hs b/Utility/Base64.hs index 6ab3c984f..0e3087276 100644 --- a/Utility/Base64.hs +++ b/Utility/Base64.hs @@ -1,5 +1,8 @@ {- Simple Base64 encoding of Strings - + - Note that this uses the FileSystemEncoding, so it can be used on Strings + - that repesent filepaths containing arbitrarily encoded characters. + - - Copyright 2011, 2015 Joey Hess <id@joeyh.name> - - License: BSD-2-clause @@ -9,13 +12,15 @@ module Utility.Base64 (toB64, fromB64Maybe, fromB64, prop_b64_roundtrips) where import qualified "sandi" Codec.Binary.Base64 as B64 import Data.Maybe +import qualified Data.ByteString.Lazy as L import Data.ByteString.UTF8 (fromString, toString) +import Utility.FileSystemEncoding toB64 :: String -> String -toB64 = toString . B64.encode . fromString +toB64 = toString . B64.encode . L.toStrict . encodeBS fromB64Maybe :: String -> Maybe String -fromB64Maybe s = either (const Nothing) (Just . toString) +fromB64Maybe s = either (const Nothing) (Just . decodeBS . L.fromStrict) (B64.decode $ fromString s) fromB64 :: String -> String diff --git a/Utility/FileSystemEncoding.hs b/Utility/FileSystemEncoding.hs index 41c5972a0..25a09ecc0 100644 --- a/Utility/FileSystemEncoding.hs +++ b/Utility/FileSystemEncoding.hs @@ -13,6 +13,7 @@ module Utility.FileSystemEncoding ( withFilePath, md5FilePath, decodeBS, + encodeBS, decodeW8, encodeW8, encodeW8NUL, @@ -81,13 +82,21 @@ md5FilePath = MD5.Str . _encodeFilePath {- Decodes a ByteString into a FilePath, applying the filesystem encoding. -} decodeBS :: L.ByteString -> FilePath #ifndef mingw32_HOST_OS -decodeBS = encodeW8 . L.unpack +decodeBS = encodeW8NUL . L.unpack #else {- On Windows, we assume that the ByteString is utf-8, since Windows - only uses unicode for filenames. -} decodeBS = L8.toString #endif +{- Encodes a FilePath into a ByteString, applying the filesystem encoding. -} +encodeBS :: FilePath -> L.ByteString +#ifndef mingw32_HOST_OS +encodeBS = L.pack . decodeW8NUL +#else +encodeBS = L8.fromString +#endif + {- Converts a [Word8] to a FilePath, encoding using the filesystem encoding. - - w82c produces a String, which may contain Chars that are invalid diff --git a/debian/changelog b/debian/changelog index 789d0f8cc..3100d0125 100644 --- a/debian/changelog +++ b/debian/changelog @@ -34,8 +34,10 @@ git-annex (5.20150732) UNRELEASED; urgency=medium built using the cryptonite library. * Improve Setup.hs file so that cabal copy --destdir works. Thanks, Magnus Therning. - * metadata: Fix reversion introduced in 5.20150727 that caused display - of metadata to not work. + * metadata: Fix reversion introduced in 5.20150727 that caused recursive + display of metadata to not work. + * Fix setting/setting/viewing metadata that contains unicode or other + special characters, when in a non-unicode locale. -- Joey Hess <id@joeyh.name> Fri, 31 Jul 2015 12:31:39 -0400 diff --git a/doc/bugs/cannot_change_locale___40__en__95__US.UTF-8__41__.mdwn b/doc/bugs/cannot_change_locale___40__en__95__US.UTF-8__41__.mdwn index dd0a2b742..bdf288a41 100644 --- a/doc/bugs/cannot_change_locale___40__en__95__US.UTF-8__41__.mdwn +++ b/doc/bugs/cannot_change_locale___40__en__95__US.UTF-8__41__.mdwn @@ -27,4 +27,6 @@ he_IL.iso88598 he_IL.utf8 POSIX - +> Closing since I've seen nothing to indicate this is anything other than a +> misconfigured OS that doesn't have the requested locale available. +> [[done]] --[[Joey]] diff --git a/doc/bugs/view_fails_with___34__invalid_character__34__.mdwn b/doc/bugs/view_fails_with___34__invalid_character__34__.mdwn index 4b6e97764..f77f5013f 100644 --- a/doc/bugs/view_fails_with___34__invalid_character__34__.mdwn +++ b/doc/bugs/view_fails_with___34__invalid_character__34__.mdwn @@ -28,3 +28,29 @@ local repository version: 5 supported repository version: 5 upgrade supported from repository versions: 0 1 2 4 """]] + +> I'm assuming the setlocale part of this is a misconfigured system locale; +> as also seen by an arch linux user in +> <http://git-annex.branchable.com/bugs/cannot_change_locale___40__en__95__US.UTF-8__41__/> +> +> So, disregarding that part of the bug report, we still have the actual +> failure. +> +> With LANG=C, setting and getting metadata like "Rondò Veneziano" fails, +> as does generating views of that metadata. +> +> In all cases, it's an IO encoding failure, "commitBuffer: invalid argument (invalid character)" +> +> This only occurs when there's a space in the metadata; in this case the + +> value is base64ed. While the 'ò' comes back out as "\242", which is the right +> character, it's not encoded using the filesystem encoding. This means that +> the IO layer can't handle it, when not in a unicode locale. Instead, it +> needs to come back out as "\56515\56498". +> +> Apparently this is a reversion; it worked in an earlier version of +> git-annex. Commits such as 9b93278e8abe1163d53fbf56909d0fe6d7de69e9 +> or the conversion to Sandi may have caused the reversion, unsure. +> +> Fix is to apply the filesystem encoding when decoding base64ed values. +> [[done]] --[[Joey]] diff --git a/doc/bugs/weird_entry_in_process_list/comment_3_84ef8c257fd7bbd1db4ac124af241929._comment b/doc/bugs/weird_entry_in_process_list/comment_3_84ef8c257fd7bbd1db4ac124af241929._comment new file mode 100644 index 000000000..c7a7e1a28 --- /dev/null +++ b/doc/bugs/weird_entry_in_process_list/comment_3_84ef8c257fd7bbd1db4ac124af241929._comment @@ -0,0 +1,15 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 3""" + date="2015-08-11T22:54:48Z" + content=""" +I made it "more intuitive", to the extent reasonably possible. + +I don't see why the standalone build is a must. Builds of current versions of +git-annex are available from many distributions. + +That C hack would work, but I'm not at all convinced that it's worth adding +such a potentially fragile hack for such a minor inconvenience. + +The GHC bug report is <https://ghc.haskell.org/trac/ghc/ticket/9143>. +"""]] diff --git a/doc/devblog/day_312__release_prep.mdwn b/doc/devblog/day_312__release_prep.mdwn new file mode 100644 index 000000000..4e0db7de2 --- /dev/null +++ b/doc/devblog/day_312__release_prep.mdwn @@ -0,0 +1,12 @@ +Catching up on weekend's traffic, and preparing for a release tomorrow. + +Found another place where the optparse-applicative conversion broke some +command-line parsing; using git-annex metadata to dump metadata recursively +got broken. This is the second known bug caused by that transition, which +is not too surpising given how large it was. + +Tracked down and fixed a [very tricky encoding problem with metadata values](http://git-annex.branchable.com/bugs/view_fails_with___34__invalid_character__34__/). + +The arm autobuilder broke so it won't boot; got a serial console hooked up +to it and looks like a botched upgrade resulting in a udev/systemd/linux +version mismatch. |