summaryrefslogtreecommitdiff
path: root/Utility
diff options
context:
space:
mode:
authorGravatar Joey Hess <joeyh@joeyh.name>2017-01-31 18:40:42 -0400
committerGravatar Joey Hess <joeyh@joeyh.name>2017-01-31 19:06:22 -0400
commit7fd21be7f967bdc21530b730f595379b23fe1174 (patch)
treed2af9101541d8166b2035271967bb3ac01751e36 /Utility
parent164466c987a7607a5f598b36e5b3111a68bd101f (diff)
Some optimisations to string splitting code.
Turns out that Data.List.Utils.split is slow and makes a lot of allocations. Here's a much simpler single character splitter that behaves the same (even in wacky corner cases) while running in half the time and 75% the allocations. As well as being an optimisation, this helps move toward eliminating use of missingh. (Data.List.Split.splitOn is nearly as slow as Data.List.Utils.split and allocates even more.) I have not benchmarked the effect on git-annex, but would not be surprised to see some parsing of eg, large streams from git commands run twice as fast, and possibly in less memory. This commit was sponsored by Boyd Stephen Smith Jr. on Patreon.
Diffstat (limited to 'Utility')
-rw-r--r--Utility/DottedVersion.hs2
-rw-r--r--Utility/Gpg.hs4
-rw-r--r--Utility/Lsof.hs2
-rw-r--r--Utility/Misc.hs8
-rw-r--r--Utility/Quvi.hs4
-rw-r--r--Utility/Rsync.hs4
-rw-r--r--Utility/SafeCommand.hs4
7 files changed, 18 insertions, 10 deletions
diff --git a/Utility/DottedVersion.hs b/Utility/DottedVersion.hs
index ebf4c0bd1..3198b1ce2 100644
--- a/Utility/DottedVersion.hs
+++ b/Utility/DottedVersion.hs
@@ -25,7 +25,7 @@ instance Show DottedVersion where
normalize :: String -> DottedVersion
normalize v = DottedVersion v $
sum $ mult 1 $ reverse $ extend precision $ take precision $
- map readi $ split "." v
+ map readi $ splitc '.' v
where
extend n l = l ++ replicate (n - length l) 0
mult _ [] = []
diff --git a/Utility/Gpg.hs b/Utility/Gpg.hs
index 118515222..a5d382083 100644
--- a/Utility/Gpg.hs
+++ b/Utility/Gpg.hs
@@ -162,7 +162,7 @@ findPubKeys :: GpgCmd -> String -> IO KeyIds
findPubKeys cmd for = KeyIds . parse . lines <$> readStrict cmd params
where
params = [Param "--with-colons", Param "--list-public-keys", Param for]
- parse = mapMaybe (keyIdField . split ":")
+ parse = mapMaybe (keyIdField . splitc ':')
keyIdField ("pub":_:_:_:f:_) = Just f
keyIdField _ = Nothing
@@ -175,7 +175,7 @@ secretKeys cmd = catchDefaultIO M.empty makemap
where
makemap = M.fromList . parse . lines <$> readStrict cmd params
params = [Param "--with-colons", Param "--list-secret-keys", Param "--fixed-list-mode"]
- parse = extract [] Nothing . map (split ":")
+ parse = extract [] Nothing . map (splitc ':')
extract c (Just keyid) (("uid":_:_:_:_:_:_:_:_:userid:_):rest) =
extract ((keyid, decode_c userid):c) Nothing rest
extract c (Just keyid) rest@(("sec":_):_) =
diff --git a/Utility/Lsof.hs b/Utility/Lsof.hs
index 27d34b592..e3ed709ec 100644
--- a/Utility/Lsof.hs
+++ b/Utility/Lsof.hs
@@ -107,7 +107,7 @@ parseFormatted s = bundle $ go [] $ lines s
parsemode ('u':_) = OpenReadWrite
parsemode _ = OpenUnknown
- splitnull = split "\0"
+ splitnull = splitc '\0'
parsefail = error $ "failed to parse lsof output: " ++ show s
diff --git a/Utility/Misc.hs b/Utility/Misc.hs
index 4498c0a03..564935ddb 100644
--- a/Utility/Misc.hs
+++ b/Utility/Misc.hs
@@ -45,6 +45,14 @@ separate c l = unbreak $ break c l
| null b = r
| otherwise = (a, tail b)
+{- Split on a single character. This is over twice as fast as using
+ - Data.List.Utils.split on a list of length 1, while producing
+ - identical results. -}
+splitc :: Char -> String -> [String]
+splitc c s = case break (== c) s of
+ (i, _c:rest) -> i : splitc c rest
+ (i, []) -> i : []
+
{- Breaks out the first line. -}
firstLine :: String -> String
firstLine = takeWhile (/= '\n')
diff --git a/Utility/Quvi.hs b/Utility/Quvi.hs
index d33d79bb8..ff1ad854c 100644
--- a/Utility/Quvi.hs
+++ b/Utility/Quvi.hs
@@ -124,14 +124,14 @@ supported Quvi09 url = (firstlevel <&&> secondlevel)
Nothing -> return False
Just auth -> do
let domain = map toLower $ uriRegName auth
- let basedomain = intercalate "." $ reverse $ take 2 $ reverse $ split "." domain
+ let basedomain = intercalate "." $ reverse $ take 2 $ reverse $ splitc '.' domain
any (\h -> domain `isSuffixOf` h || basedomain `isSuffixOf` h)
. map (map toLower) <$> listdomains Quvi09
secondlevel = snd <$> processTranscript "quvi"
(toCommand [Param "dump", Param "-o", Param url]) Nothing
listdomains :: QuviVersion -> IO [String]
-listdomains Quvi09 = concatMap (split ",")
+listdomains Quvi09 = concatMap (splitc ',')
. concatMap (drop 1 . words)
. filter ("domains: " `isPrefixOf`) . lines
<$> readQuvi (toCommand [Param "info", Param "-p", Param "domains"])
diff --git a/Utility/Rsync.hs b/Utility/Rsync.hs
index d3fe98120..d3823a528 100644
--- a/Utility/Rsync.hs
+++ b/Utility/Rsync.hs
@@ -24,7 +24,7 @@ rsyncShell command = [Param "-e", Param $ unwords $ map escape (toCommand comman
{- rsync requires some weird, non-shell like quoting in
- here. A doubled single quote inside the single quoted
- string is a single quote. -}
- escape s = "'" ++ intercalate "''" (split "'" s) ++ "'"
+ escape s = "'" ++ intercalate "''" (splitc '\'' s) ++ "'"
{- Runs rsync in server mode to send a file. -}
rsyncServerSend :: [CommandParam] -> FilePath -> IO Bool
@@ -123,7 +123,7 @@ parseRsyncProgress = go [] . reverse . progresschunks
{- Find chunks that each start with delim.
- The first chunk doesn't start with it
- (it's empty when delim is at the start of the string). -}
- progresschunks = drop 1 . split [delim]
+ progresschunks = drop 1 . splitc delim
findbytesstart s = dropWhile isSpace s
parsebytes :: String -> Maybe Integer
diff --git a/Utility/SafeCommand.hs b/Utility/SafeCommand.hs
index 5ce17a845..bef0a619d 100644
--- a/Utility/SafeCommand.hs
+++ b/Utility/SafeCommand.hs
@@ -11,7 +11,7 @@ module Utility.SafeCommand where
import System.Exit
import Utility.Process
-import Data.String.Utils
+import Utility.Misc
import System.FilePath
import Data.Char
import Data.List
@@ -86,7 +86,7 @@ shellEscape :: String -> String
shellEscape f = "'" ++ escaped ++ "'"
where
-- replace ' with '"'"'
- escaped = intercalate "'\"'\"'" $ split "'" f
+ escaped = intercalate "'\"'\"'" $ splitc '\'' f
-- | Unescapes a set of shellEscaped words or filenames.
shellUnEscape :: String -> [String]