{- Some git commands output encoded filenames, in a rather annoyingly complex - C-style encoding. - - Copyright 2010, 2011 Joey Hess - - Licensed under the GNU GPL version 3 or higher. -} module Git.Filename where import qualified Codec.Binary.UTF8.String import Data.Char import Data.Word (Word8) import Text.Printf decode :: String -> FilePath decode [] = [] decode f@(c:s) -- encoded strings will be inside double quotes | c == '"' = unescape ("", middle) | otherwise = f where e = '\\' middle = init s unescape (b, []) = b -- look for escapes starting with '\' unescape (b, v) = b ++ beginning ++ unescape (handle rest) where pair = span (/= e) v beginning = fst pair rest = snd pair isescape x = x == e -- \NNN is an octal encoded character handle (x:n1:n2:n3:rest) | isescape x && alloctal = (fromoctal, rest) where alloctal = isOctDigit n1 && isOctDigit n2 && isOctDigit n3 fromoctal = [chr $ readoctal [n1, n2, n3]] readoctal o = read $ "0o" ++ o :: Int -- \C is used for a few special characters handle (x:nc:rest) | isescape x = ([echar nc], rest) where echar 'a' = '\a' echar 'b' = '\b' echar 'f' = '\f' echar 'n' = '\n' echar 'r' = '\r' echar 't' = '\t' echar 'v' = '\v' echar a = a handle n = ("", n) {- Should not need to use this, except for testing decode. -} encode :: FilePath -> String encode s = foldl (++) "\"" (map echar s) ++ "\"" where e c = '\\' : [c] echar '\a' = e 'a' echar '\b' = e 'b' echar '\f' = e 'f' echar '\n' = e 'n' echar '\r' = e 'r' echar '\t' = e 't' echar '\v' = e 'v' echar '\\' = e '\\' echar '"' = e '"' echar x | ord x < 0x20 = e_num x -- low ascii | ord x >= 256 = e_utf x | ord x > 0x7E = e_num x -- high ascii | otherwise = [x] -- printable ascii where showoctal i = '\\' : printf "%03o" i e_num c = showoctal $ ord c -- unicode character is decomposed to -- Word8s and each is shown in octal e_utf c = showoctal =<< (Codec.Binary.UTF8.String.encode [c] :: [Word8]) {- for quickcheck -} prop_idempotent_deencode :: String -> Bool prop_idempotent_deencode s = s == decode (encode s)