diff options
author | Joey Hess <joey@kitenet.net> | 2011-12-22 20:14:35 -0400 |
---|---|---|
committer | Joey Hess <joey@kitenet.net> | 2011-12-23 01:05:16 -0400 |
commit | cba3ce08dfaa3318aa80b414e4d6d4f40d843d15 (patch) | |
tree | cdb54dd767cb1530c88b92277b1150f961d06012 /Utility/Format.hs | |
parent | a0872a8ec34894689489dd0cf47887d8609b9f47 (diff) |
handle C-style escapes in Format
I was happily able to repurpose some code from Git.Filename to handle this.
I remember writing that code... a whole afternoon at a coffee shop, after
which I felt I'd struggled with Haskell and git, and sorta lost, in needing
to write this nasty peice of code. But was also pleased at the use of a
pair of functions and quickcheck that allowed me to get it 100% right.
So, turns out I not only got it right, but the code wasn't as special-purpose
as I'd feared. Yay!
Diffstat (limited to 'Utility/Format.hs')
-rw-r--r-- | Utility/Format.hs | 93 |
1 files changed, 84 insertions, 9 deletions
diff --git a/Utility/Format.hs b/Utility/Format.hs index 5a74da96b..804dbff4c 100644 --- a/Utility/Format.hs +++ b/Utility/Format.hs @@ -1,17 +1,25 @@ {- Formatted string handling. - - - Copyright 2011 Joey Hess <joey@kitenet.net> + - Copyright 2010, 2011 Joey Hess <joey@kitenet.net> - - Licensed under the GNU GPL version 3 or higher. -} -module Utility.Format (Format, gen, format) where +module Utility.Format ( + Format, + gen, + format, + decode_c, + encode_c, + prop_idempotent_deencode +) where import Text.Printf (printf) -import Data.String.Utils (replace) -import Data.Char (isAlphaNum) +import Data.Char (isAlphaNum, isOctDigit, chr, ord) +import Data.Maybe (fromMaybe) +import Data.Word (Word8) +import qualified Codec.Binary.UTF8.String import qualified Data.Map as M -import Data.Maybe import Utility.PartialPrelude @@ -52,12 +60,11 @@ format f vars = concatMap expand f - (This is the same type of format string used by dpkg-query.) -} gen :: FormatString -> Format -gen = finalize . scan [] +gen = filter (not . empty) . fuse [] . scan [] . decode_c where -- The Format is built up in reverse, for efficiency, - -- To finalize it, fix the reversing and do some - -- optimisations, including fusing adjacent Consts. - finalize = filter (not . empty) . fuse [] + -- and can have adjacent Consts. Fusing it fixes both + -- problems. fuse f [] = f fuse f (Const c1:Const c2:vs) = fuse f $ Const (c2++c1) : vs fuse f (v:vs) = fuse (v:f) vs @@ -82,3 +89,71 @@ gen = finalize . scan [] novar v = "${" ++ reverse v foundvar f v p cs = scan (Var (reverse v) p : f) cs + + +{- Decodes a C-style encoding, where \n is a newline, \NNN is an octal + - encoded character, etc. + -} +decode_c :: FormatString -> FormatString +decode_c [] = [] +decode_c s = unescape ("", s) + where + e = '\\' + unescape (b, []) = b + -- look for escapes starting with '\' + unescape (b, v) = b ++ fst pair ++ unescape (handle $ snd pair) + where + pair = span (/= e) v + isescape x = x == e + -- \NNN is an octal encoded character + handle (x:n1:n2:n3:rest) + | isescape x && alloctal = (fromoctal, rest) + where + alloctal = isOctDigit n1 && + isOctDigit n2 && + isOctDigit n3 + fromoctal = [chr $ readoctal [n1, n2, n3]] + readoctal o = Prelude.read $ "0o" ++ o :: Int + -- \C is used for a few special characters + handle (x:nc:rest) + | isescape x = ([echar nc], rest) + where + echar 'a' = '\a' + echar 'b' = '\b' + echar 'f' = '\f' + echar 'n' = '\n' + echar 'r' = '\r' + echar 't' = '\t' + echar 'v' = '\v' + echar a = a + handle n = ("", n) + +{- Should not need to use this, except for testing decode_c. -} +encode_c :: FormatString -> FormatString +encode_c s = concatMap echar s + where + e c = '\\' : [c] + echar '\a' = e 'a' + echar '\b' = e 'b' + echar '\f' = e 'f' + echar '\n' = e 'n' + echar '\r' = e 'r' + echar '\t' = e 't' + echar '\v' = e 'v' + echar '\\' = e '\\' + echar '"' = e '"' + echar x + | ord x < 0x20 = e_num x -- low ascii + | ord x >= 256 = e_utf x + | ord x > 0x7E = e_num x -- high ascii + | otherwise = [x] -- printable ascii + where + showoctal i = '\\' : printf "%03o" i + e_num c = showoctal $ ord c + -- unicode character is decomposed to + -- Word8s and each is shown in octal + e_utf c = showoctal =<< (Codec.Binary.UTF8.String.encode [c] :: [Word8]) + +{- for quickcheck -} +prop_idempotent_deencode :: String -> Bool +prop_idempotent_deencode s = s == decode_c (encode_c s) |