diff options
author | Joey Hess <joeyh@joeyh.name> | 2015-03-04 11:16:03 -0400 |
---|---|---|
committer | Joey Hess <joeyh@joeyh.name> | 2015-03-04 12:54:30 -0400 |
commit | 05697fe62116181511084a2eba28c5220e8a0363 (patch) | |
tree | 6965f56f5648d6dfa6c5e7d6e31e32eb3975b073 /Utility | |
parent | 0c3570844cf60428808d01a73c808e4f7232f082 (diff) |
metadata: Fix encoding problem that led to mojibake when storing metadata strings that contained both unicode characters and a space (or '!') character.
The fix is to stop using w82s, which does not properly reconstitute unicode
strings. Instrad, use utf8 bytestring to get the [Word8] to base64. This
passes unicode through perfectly, including any invalid filesystem encoded
characters.
Note that toB64 / fromB64 are also used for creds and cipher
embedding. It would be unfortunate if this change broke those uses.
For cipher embedding, note that ciphers can contain arbitrary bytes (should
really be using ByteString.Char8 there). Testing indicated it's not safe to
use the new fromB64 there; I think that characters were incorrectly
combined.
For credpair embedding, the username or password could contain unicode.
Before, that unicode would fail to round-trip through the b64.
So, I guess this is not going to break any embedded creds that worked
before.
This bug may have affected some creds before, and if so,
this change will not fix old ones, but should fix new ones at least.
Diffstat (limited to 'Utility')
-rw-r--r-- | Utility/Base64.hs | 18 | ||||
-rw-r--r-- | Utility/FileSystemEncoding.hs | 17 |
2 files changed, 28 insertions, 7 deletions
diff --git a/Utility/Base64.hs b/Utility/Base64.hs index 56637a117..80cc122a1 100644 --- a/Utility/Base64.hs +++ b/Utility/Base64.hs @@ -1,24 +1,28 @@ -{- Simple Base64 access +{- Simple Base64 encoding of Strings - - Copyright 2011 Joey Hess <id@joeyh.name> - - License: BSD-2-clause -} -module Utility.Base64 (toB64, fromB64Maybe, fromB64) where +module Utility.Base64 (toB64, fromB64Maybe, fromB64, prop_b64_roundtrips) where -import "dataenc" Codec.Binary.Base64 -import Data.Bits.Utils +import qualified "dataenc" Codec.Binary.Base64 as B64 import Control.Applicative import Data.Maybe +import qualified Data.ByteString.Lazy as L +import Data.ByteString.Lazy.UTF8 (fromString, toString) -toB64 :: String -> String -toB64 = encode . s2w8 +toB64 :: String -> String +toB64 = B64.encode . L.unpack . fromString fromB64Maybe :: String -> Maybe String -fromB64Maybe s = w82s <$> decode s +fromB64Maybe s = toString . L.pack <$> B64.decode s fromB64 :: String -> String fromB64 = fromMaybe bad . fromB64Maybe where bad = error "bad base64 encoded data" + +prop_b64_roundtrips :: String -> Bool +prop_b64_roundtrips s = s == fromB64 (toB64 s) diff --git a/Utility/FileSystemEncoding.hs b/Utility/FileSystemEncoding.hs index 844e81e59..139b74fe4 100644 --- a/Utility/FileSystemEncoding.hs +++ b/Utility/FileSystemEncoding.hs @@ -14,6 +14,8 @@ module Utility.FileSystemEncoding ( decodeBS, decodeW8, encodeW8, + encodeW8NUL, + decodeW8NUL, truncateFilePath, ) where @@ -25,6 +27,7 @@ import System.IO.Unsafe import qualified Data.Hash.MD5 as MD5 import Data.Word import Data.Bits.Utils +import Data.List.Utils import qualified Data.ByteString.Lazy as L #ifdef mingw32_HOST_OS import qualified Data.ByteString.Lazy.UTF8 as L8 @@ -89,6 +92,9 @@ decodeBS = L8.toString - w82c produces a String, which may contain Chars that are invalid - unicode. From there, this is really a simple matter of applying the - file system encoding, only complicated by GHC's interface to doing so. + - + - Note that the encoding stops at any NUL in the input. FilePaths + - do not normally contain embedded NUL, but Haskell Strings may. -} {-# NOINLINE encodeW8 #-} encodeW8 :: [Word8] -> FilePath @@ -101,6 +107,17 @@ encodeW8 w8 = unsafePerformIO $ do decodeW8 :: FilePath -> [Word8] decodeW8 = s2w8 . _encodeFilePath +{- Like encodeW8 and decodeW8, but NULs are passed through unchanged. -} +encodeW8NUL :: [Word8] -> FilePath +encodeW8NUL = join nul . map encodeW8 . split (s2w8 nul) + where + nul = ['\NUL'] + +decodeW8NUL :: FilePath -> [Word8] +decodeW8NUL = join (s2w8 nul) . map decodeW8 . split nul + where + nul = ['\NUL'] + {- Truncates a FilePath to the given number of bytes (or less), - as represented on disk. - |