aboutsummaryrefslogtreecommitdiff
path: root/Utility
diff options
context:
space:
mode:
authorGravatar Joey Hess <joeyh@joeyh.name>2017-06-17 16:17:09 -0400
committerGravatar Joey Hess <joeyh@joeyh.name>2017-06-17 16:48:00 -0400
commit3adf1ea63d9b5feccd4e6cb457fbbb52fdfc9fe0 (patch)
treebb68382446f50f6687dde96de1816d881bac655a /Utility
parent9f375315d220169240e0e99a667601136b3e06e9 (diff)
fix failing quickcheck properties
QuickCheck 2.10 found a counterexample eg "\929184" broke the property. As far as I can tell, Git.Filename is matching how git handles encoding of strange high unicode characters in filenames for display. Git does not display high unicode characters, and instead displays the C-style escaped form of each byte. This is ambiguous, but since git is not unicode aware, it doesn't need to roundtrip parse it. So, making Git.FileName's roundtrip test only chars < 256 seems fine. Utility.Format.format uses encode_c, in order to mimic git, so that's ok. Utility.Format.gen uses decode_c, but only so that stuff like "\n" in the format string is handled. If the format string contains C-style octal escapes, they will be converted to ascii characters, and not combined into unicode characters, but that should not be a problem. If the user wants unicode characters, they can include them in the format string, without escaping them. Finally, decode_c is used by Utility.Gpg.secretKeys, because gpg --with-colons hex-escapes some characters in particular ':' and '\\'. gpg passes unicode through, so this use of decode_c is not a problem. This commit was sponsored by Henrik Riomar on Patreon.
Diffstat (limited to 'Utility')
-rw-r--r--Utility/Format.hs21
-rw-r--r--Utility/Gpg.hs3
2 files changed, 18 insertions, 6 deletions
diff --git a/Utility/Format.hs b/Utility/Format.hs
index 1ebf68d6c..3670cd717 100644
--- a/Utility/Format.hs
+++ b/Utility/Format.hs
@@ -11,7 +11,7 @@ module Utility.Format (
format,
decode_c,
encode_c,
- prop_isomorphic_deencode
+ prop_encode_c_decode_c_roundtrip
) where
import Text.Printf (printf)
@@ -100,8 +100,8 @@ empty :: Frag -> Bool
empty (Const "") = True
empty _ = False
-{- Decodes a C-style encoding, where \n is a newline, \NNN is an octal
- - encoded character, and \xNN is a hex encoded character.
+{- Decodes a C-style encoding, where \n is a newline (etc),
+ - \NNN is an octal encoded character, and \xNN is a hex encoded character.
-}
decode_c :: FormatString -> String
decode_c [] = []
@@ -173,6 +173,15 @@ encode_c' p = concatMap echar
e_asc c = showoctal $ ord c
showoctal i = '\\' : printf "%03o" i
-{- for quickcheck -}
-prop_isomorphic_deencode :: String -> Bool
-prop_isomorphic_deencode s = s == decode_c (encode_c s)
+{- For quickcheck.
+ -
+ - Encoding and then decoding roundtrips only when
+ - the string does not contain high unicode, because eg,
+ - both "\12345" and "\227\128\185" are encoded to "\343\200\271".
+ -
+ - This property papers over the problem, by only testing chars < 256.
+ -}
+prop_encode_c_decode_c_roundtrip :: String -> Bool
+prop_encode_c_decode_c_roundtrip s = s' == decode_c (encode_c s')
+ where
+ s' = filter (\c -> ord c < 256) s
diff --git a/Utility/Gpg.hs b/Utility/Gpg.hs
index dae254854..94d588cd7 100644
--- a/Utility/Gpg.hs
+++ b/Utility/Gpg.hs
@@ -184,6 +184,9 @@ secretKeys cmd = catchDefaultIO M.empty makemap
params = [Param "--with-colons", Param "--list-secret-keys", Param "--fixed-list-mode"]
parse = extract [] Nothing . map (splitc ':')
extract c (Just keyid) (("uid":_:_:_:_:_:_:_:_:userid:_):rest) =
+ -- If the userid contains a ":" or a few other special
+ -- characters, gpg will hex-escape it. Use decode_c to
+ -- undo.
extract ((keyid, decode_c userid):c) Nothing rest
extract c (Just keyid) rest@(("sec":_):_) =
extract ((keyid, ""):c) Nothing rest