aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Joey Hess <joeyh@joeyh.name>2017-01-31 18:40:42 -0400
committerGravatar Joey Hess <joeyh@joeyh.name>2017-01-31 19:06:22 -0400
commit7fd21be7f967bdc21530b730f595379b23fe1174 (patch)
treed2af9101541d8166b2035271967bb3ac01751e36
parent164466c987a7607a5f598b36e5b3111a68bd101f (diff)
Some optimisations to string splitting code.
Turns out that Data.List.Utils.split is slow and makes a lot of allocations. Here's a much simpler single character splitter that behaves the same (even in wacky corner cases) while running in half the time and 75% the allocations. As well as being an optimisation, this helps move toward eliminating use of missingh. (Data.List.Split.splitOn is nearly as slow as Data.List.Utils.split and allocates even more.) I have not benchmarked the effect on git-annex, but would not be surprised to see some parsing of eg, large streams from git commands run twice as fast, and possibly in less memory. This commit was sponsored by Boyd Stephen Smith Jr. on Patreon.
-rw-r--r--Annex/Direct.hs4
-rw-r--r--Annex/TaggedPush.hs2
-rw-r--r--Assistant/Ssh.hs2
-rw-r--r--Backend/Hash.hs2
-rw-r--r--Build/OSXMkLibs.hs2
-rw-r--r--CHANGELOG1
-rw-r--r--Command/AddUrl.hs2
-rw-r--r--Command/Map.hs2
-rw-r--r--Command/Unused.hs2
-rw-r--r--Crypto.hs4
-rw-r--r--Git/Command.hs8
-rw-r--r--Git/Config.hs2
-rw-r--r--Git/Construct.hs4
-rw-r--r--Git/Ref.hs2
-rw-r--r--Limit.hs2
-rw-r--r--Logs/Transfer.hs2
-rw-r--r--Logs/UUID.hs2
-rw-r--r--Remote.hs2
-rw-r--r--Remote/BitTorrent.hs2
-rw-r--r--Remote/Bup.hs2
-rw-r--r--Remote/Helper/Encryptable.hs2
-rw-r--r--Types/RefSpec.hs2
-rw-r--r--Upgrade/V1.hs2
-rw-r--r--Utility/DottedVersion.hs2
-rw-r--r--Utility/Gpg.hs4
-rw-r--r--Utility/Lsof.hs2
-rw-r--r--Utility/Misc.hs8
-rw-r--r--Utility/Quvi.hs4
-rw-r--r--Utility/Rsync.hs4
-rw-r--r--Utility/SafeCommand.hs4
30 files changed, 47 insertions, 38 deletions
diff --git a/Annex/Direct.hs b/Annex/Direct.hs
index 5724d1162..e5c1c47c8 100644
--- a/Annex/Direct.hs
+++ b/Annex/Direct.hs
@@ -441,7 +441,7 @@ setDirect wantdirect = do
- this way things that show HEAD (eg shell prompts) will
- hopefully show just "master". -}
directBranch :: Ref -> Ref
-directBranch orighead = case split "/" $ fromRef orighead of
+directBranch orighead = case splitc '/' $ fromRef orighead of
("refs":"heads":"annex":"direct":_) -> orighead
("refs":"heads":rest) ->
Ref $ "refs/heads/annex/direct/" ++ intercalate "/" rest
@@ -452,7 +452,7 @@ directBranch orighead = case split "/" $ fromRef orighead of
- Any other ref is left unchanged.
-}
fromDirectBranch :: Ref -> Ref
-fromDirectBranch directhead = case split "/" $ fromRef directhead of
+fromDirectBranch directhead = case splitc '/' $ fromRef directhead of
("refs":"heads":"annex":"direct":rest) ->
Ref $ "refs/heads/" ++ intercalate "/" rest
_ -> directhead
diff --git a/Annex/TaggedPush.hs b/Annex/TaggedPush.hs
index ef1aeeea2..3b06170b3 100644
--- a/Annex/TaggedPush.hs
+++ b/Annex/TaggedPush.hs
@@ -39,7 +39,7 @@ toTaggedBranch u info b = Git.Ref $ intercalate "/" $ catMaybes
]
fromTaggedBranch :: Git.Branch -> Maybe (UUID, Maybe String)
-fromTaggedBranch b = case split "/" $ Git.fromRef b of
+fromTaggedBranch b = case splitc '/' $ Git.fromRef b of
("refs":"synced":u:info:_base) ->
Just (toUUID u, fromB64Maybe info)
("refs":"synced":u:_base) ->
diff --git a/Assistant/Ssh.hs b/Assistant/Ssh.hs
index 66ed54257..e439ecd23 100644
--- a/Assistant/Ssh.hs
+++ b/Assistant/Ssh.hs
@@ -383,7 +383,7 @@ mangleSshHostName sshdata = intercalate "-"
{- Extracts the real hostname from a mangled ssh hostname. -}
unMangleSshHostName :: String -> String
-unMangleSshHostName h = case split "-" h of
+unMangleSshHostName h = case splitc '-' h of
("git":"annex":rest) -> unescape (intercalate "-" (beginning rest))
_ -> h
where
diff --git a/Backend/Hash.hs b/Backend/Hash.hs
index ba8d4bc64..c85047d51 100644
--- a/Backend/Hash.hs
+++ b/Backend/Hash.hs
@@ -103,7 +103,7 @@ selectExtension f
es = filter (not . null) $ reverse $
take 2 $ map (filter validInExtension) $
takeWhile shortenough $
- reverse $ split "." $ takeExtensions f
+ reverse $ splitc '.' $ takeExtensions f
shortenough e = length e <= 4 -- long enough for "jpeg"
{- A key's checksum is checked during fsck. -}
diff --git a/Build/OSXMkLibs.hs b/Build/OSXMkLibs.hs
index 948b0d5a3..2243c6b86 100644
--- a/Build/OSXMkLibs.hs
+++ b/Build/OSXMkLibs.hs
@@ -95,7 +95,7 @@ findLibPath l = go =<< getEnv "DYLD_LIBRARY_PATH"
where
go Nothing = return l
go (Just p) = fromMaybe l
- <$> firstM doesFileExist (map (</> f) (split ":" p))
+ <$> firstM doesFileExist (map (</> f) (splitc ':' p))
f = takeFileName l
{- Expands any @rpath in the list of libraries.
diff --git a/CHANGELOG b/CHANGELOG
index 30b7b147b..634b22082 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -8,6 +8,7 @@ git-annex (6.20170102) UNRELEASED; urgency=medium
* vicfg: Include the numcopies configuation.
* config: New command for storing configuration in the git-annex branch.
* stack.yaml: Update to lts-7.18.
+ * Some optimisations to string splitting code.
-- Joey Hess <id@joeyh.name> Fri, 06 Jan 2017 15:22:06 -0400
diff --git a/Command/AddUrl.hs b/Command/AddUrl.hs
index 8cc148440..169875f4b 100644
--- a/Command/AddUrl.hs
+++ b/Command/AddUrl.hs
@@ -387,7 +387,7 @@ url2file url pathdepth pathmax = case pathdepth of
]
frombits a = intercalate "/" $ a urlbits
urlbits = map (truncateFilePath pathmax . sanitizeFilePath) $
- filter (not . null) $ split "/" fullurl
+ filter (not . null) $ splitc '/' fullurl
urlString2file :: URLString -> Maybe Int -> Int -> FilePath
urlString2file s pathdepth pathmax = case Url.parseURIRelaxed s of
diff --git a/Command/Map.hs b/Command/Map.hs
index 43c00d257..b04beb477 100644
--- a/Command/Map.hs
+++ b/Command/Map.hs
@@ -92,7 +92,7 @@ hostname r
| otherwise = "localhost"
basehostname :: Git.Repo -> String
-basehostname r = fromMaybe "" $ headMaybe $ split "." $ hostname r
+basehostname r = fromMaybe "" $ headMaybe $ splitc '.' $ hostname r
{- A name to display for a repo. Uses the name from uuid.log if available,
- or the remote name if not. -}
diff --git a/Command/Unused.hs b/Command/Unused.hs
index 1711fe047..3953f4486 100644
--- a/Command/Unused.hs
+++ b/Command/Unused.hs
@@ -269,7 +269,7 @@ withKeysReferencedDiff a getdiff extractsha = do
forM_ ds go
liftIO $ void clean
where
- go d = do
+ go d = do
let sha = extractsha d
unless (sha == nullSha) $
(parseLinkOrPointer <$> catObject sha)
diff --git a/Crypto.hs b/Crypto.hs
index d3cbfa2f7..dc1d2e6d2 100644
--- a/Crypto.hs
+++ b/Crypto.hs
@@ -231,8 +231,8 @@ instance LensGpgEncParams (RemoteConfig, RemoteGitConfig) where
{- When the remote is configured to use public-key encryption,
- look up the recipient keys and add them to the option list. -}
case M.lookup "encryption" c of
- Just "pubkey" -> Gpg.pkEncTo $ maybe [] (split ",") $ M.lookup "cipherkeys" c
- Just "sharedpubkey" -> Gpg.pkEncTo $ maybe [] (split ",") $ M.lookup "pubkeys" c
+ Just "pubkey" -> Gpg.pkEncTo $ maybe [] (splitc ',') $ M.lookup "cipherkeys" c
+ Just "sharedpubkey" -> Gpg.pkEncTo $ maybe [] (splitc ',') $ M.lookup "pubkeys" c
_ -> []
getGpgDecParams (_c,gc) = map Param (remoteAnnexGnupgDecryptOptions gc)
diff --git a/Git/Command.hs b/Git/Command.hs
index adea7622e..f40dfabcd 100644
--- a/Git/Command.hs
+++ b/Git/Command.hs
@@ -91,16 +91,16 @@ pipeWrite params repo = withHandle StdinHandle createProcessSuccess $
pipeNullSplit :: [CommandParam] -> Repo -> IO ([String], IO Bool)
pipeNullSplit params repo = do
(s, cleanup) <- pipeReadLazy params repo
- return (filter (not . null) $ split sep s, cleanup)
+ return (filter (not . null) $ splitc sep s, cleanup)
where
- sep = "\0"
+ sep = '\0'
pipeNullSplitStrict :: [CommandParam] -> Repo -> IO [String]
pipeNullSplitStrict params repo = do
s <- pipeReadStrict params repo
- return $ filter (not . null) $ split sep s
+ return $ filter (not . null) $ splitc sep s
where
- sep = "\0"
+ sep = '\0'
pipeNullSplitZombie :: [CommandParam] -> Repo -> IO [String]
pipeNullSplitZombie params repo = leaveZombie <$> pipeNullSplit params repo
diff --git a/Git/Config.hs b/Git/Config.hs
index 65bd9b7ba..9b4c342a4 100644
--- a/Git/Config.hs
+++ b/Git/Config.hs
@@ -132,7 +132,7 @@ parse s
-- --list output will have an = in the first line
| all ('=' `elem`) (take 1 ls) = sep '=' ls
-- --null --list output separates keys from values with newlines
- | otherwise = sep '\n' $ split "\0" s
+ | otherwise = sep '\n' $ splitc '\0' s
where
ls = lines s
sep c = M.fromListWith (++) . map (\(k,v) -> (k, [v])) .
diff --git a/Git/Construct.hs b/Git/Construct.hs
index 765562212..489927880 100644
--- a/Git/Construct.hs
+++ b/Git/Construct.hs
@@ -26,7 +26,7 @@ module Git.Construct (
#ifndef mingw32_HOST_OS
import System.Posix.User
#endif
-import qualified Data.Map as M hiding (map, split)
+import qualified Data.Map as M
import Network.URI
import Common
@@ -143,7 +143,7 @@ remoteNamedFromKey :: String -> IO Repo -> IO Repo
remoteNamedFromKey k = remoteNamed basename
where
basename = intercalate "." $
- reverse $ drop 1 $ reverse $ drop 1 $ split "." k
+ reverse $ drop 1 $ reverse $ drop 1 $ splitc '.' k
{- Constructs a new Repo for one of a Repo's remotes using a given
- location (ie, an url). -}
diff --git a/Git/Ref.hs b/Git/Ref.hs
index 5b3b85324..2d8013738 100644
--- a/Git/Ref.hs
+++ b/Git/Ref.hs
@@ -144,6 +144,6 @@ legal allowonelevel s = all (== False) illegal
ends v = v `isSuffixOf` s
begins v = v `isPrefixOf` s
- pathbits = split "/" s
+ pathbits = splitc '/' s
illegalchars = " ~^:?*[\\" ++ controlchars
controlchars = chr 0o177 : [chr 0 .. chr (0o40-1)]
diff --git a/Limit.hs b/Limit.hs
index efe4fea85..7b26f9e58 100644
--- a/Limit.hs
+++ b/Limit.hs
@@ -161,7 +161,7 @@ addCopies :: String -> Annex ()
addCopies = addLimit . limitCopies
limitCopies :: MkLimit Annex
-limitCopies want = case split ":" want of
+limitCopies want = case splitc ':' want of
[v, n] -> case parsetrustspec v of
Just checker -> go n $ checktrust checker
Nothing -> go n $ checkgroup v
diff --git a/Logs/Transfer.hs b/Logs/Transfer.hs
index 28f7b0a26..903db96fe 100644
--- a/Logs/Transfer.hs
+++ b/Logs/Transfer.hs
@@ -268,7 +268,7 @@ readTransferInfo mpid s = TransferInfo
filename
| end rest == "\n" = beginning rest
| otherwise = rest
- bits = split " " firstline
+ bits = splitc ' ' firstline
numbits = length bits
time = if numbits > 0
then Just <$> parsePOSIXTime =<< headMaybe bits
diff --git a/Logs/UUID.hs b/Logs/UUID.hs
index 60c8a2ef9..4c84d10bd 100644
--- a/Logs/UUID.hs
+++ b/Logs/UUID.hs
@@ -66,7 +66,7 @@ fixBadUUID = M.fromList . map fixup . M.toList
newertime (LogEntry (Date d) _) = d + minimumPOSIXTimeSlice
newertime (LogEntry Unknown _) = minimumPOSIXTimeSlice
minimumPOSIXTimeSlice = 0.000001
- isuuid s = length s == 36 && length (split "-" s) == 5
+ isuuid s = length s == 36 && length (splitc '-' s) == 5
{- Records the uuid in the log, if it's not already there. -}
recordUUID :: UUID -> Annex ()
diff --git a/Remote.hs b/Remote.hs
index 9479e72d1..8c774915a 100644
--- a/Remote.hs
+++ b/Remote.hs
@@ -140,7 +140,7 @@ byName' n = go . filter matching <$> remoteList
byNameOrGroup :: RemoteName -> Annex [Remote]
byNameOrGroup n = go =<< getConfigMaybe (ConfigKey ("remotes." ++ n))
where
- go (Just l) = catMaybes <$> mapM (byName . Just) (split " " l)
+ go (Just l) = catMaybes <$> mapM (byName . Just) (splitc ' ' l)
go Nothing = maybeToList <$> byName (Just n)
{- Only matches remote name, not UUID -}
diff --git a/Remote/BitTorrent.hs b/Remote/BitTorrent.hs
index 0ec78aa64..2f29f5baa 100644
--- a/Remote/BitTorrent.hs
+++ b/Remote/BitTorrent.hs
@@ -302,7 +302,7 @@ ariaProgress (Just sz) meter ps = do
=<< ariaParams ps
parseAriaProgress :: Integer -> ProgressParser
-parseAriaProgress totalsize = go [] . reverse . split ['\r']
+parseAriaProgress totalsize = go [] . reverse . splitc '\r'
where
go remainder [] = (Nothing, remainder)
go remainder (x:xs) = case readish (findpercent x) of
diff --git a/Remote/Bup.hs b/Remote/Bup.hs
index 332e8d5dc..75b379558 100644
--- a/Remote/Bup.hs
+++ b/Remote/Bup.hs
@@ -254,7 +254,7 @@ bup2GitRemote r
else giveup "please specify an absolute path"
| otherwise = Git.Construct.fromUrl $ "ssh://" ++ host ++ slash dir
where
- bits = split ":" r
+ bits = splitc ':' r
host = Prelude.head bits
dir = intercalate ":" $ drop 1 bits
-- "host:~user/dir" is not supported specially by bup;
diff --git a/Remote/Helper/Encryptable.hs b/Remote/Helper/Encryptable.hs
index 45ceae068..029ac4b09 100644
--- a/Remote/Helper/Encryptable.hs
+++ b/Remote/Helper/Encryptable.hs
@@ -165,7 +165,7 @@ extractCipher c = case (M.lookup "cipher" c,
Just $ SharedCipher (fromB64bs t)
_ -> Nothing
where
- readkeys = KeyIds . split ","
+ readkeys = KeyIds . splitc ','
describeEncryption :: RemoteConfig -> String
describeEncryption c = case extractCipher c of
diff --git a/Types/RefSpec.hs b/Types/RefSpec.hs
index 091631abd..c71e57d92 100644
--- a/Types/RefSpec.hs
+++ b/Types/RefSpec.hs
@@ -25,7 +25,7 @@ allRefSpec :: RefSpec
allRefSpec = [AddMatching $ compileGlob "*" CaseSensative]
parseRefSpec :: String -> Either String RefSpec
-parseRefSpec v = case partitionEithers (map mk $ split ":" v) of
+parseRefSpec v = case partitionEithers (map mk $ splitc ':' v) of
([],refspec) -> Right refspec
(e:_,_) -> Left e
where
diff --git a/Upgrade/V1.hs b/Upgrade/V1.hs
index c82cf92f5..725bb4089 100644
--- a/Upgrade/V1.hs
+++ b/Upgrade/V1.hs
@@ -148,7 +148,7 @@ readKey1 v
, keyMtime = t
}
where
- bits = split ":" v
+ bits = splitc ':' v
b = Prelude.head bits
n = intercalate ":" $ drop (if wormy then 3 else 1) bits
t = if wormy
diff --git a/Utility/DottedVersion.hs b/Utility/DottedVersion.hs
index ebf4c0bd1..3198b1ce2 100644
--- a/Utility/DottedVersion.hs
+++ b/Utility/DottedVersion.hs
@@ -25,7 +25,7 @@ instance Show DottedVersion where
normalize :: String -> DottedVersion
normalize v = DottedVersion v $
sum $ mult 1 $ reverse $ extend precision $ take precision $
- map readi $ split "." v
+ map readi $ splitc '.' v
where
extend n l = l ++ replicate (n - length l) 0
mult _ [] = []
diff --git a/Utility/Gpg.hs b/Utility/Gpg.hs
index 118515222..a5d382083 100644
--- a/Utility/Gpg.hs
+++ b/Utility/Gpg.hs
@@ -162,7 +162,7 @@ findPubKeys :: GpgCmd -> String -> IO KeyIds
findPubKeys cmd for = KeyIds . parse . lines <$> readStrict cmd params
where
params = [Param "--with-colons", Param "--list-public-keys", Param for]
- parse = mapMaybe (keyIdField . split ":")
+ parse = mapMaybe (keyIdField . splitc ':')
keyIdField ("pub":_:_:_:f:_) = Just f
keyIdField _ = Nothing
@@ -175,7 +175,7 @@ secretKeys cmd = catchDefaultIO M.empty makemap
where
makemap = M.fromList . parse . lines <$> readStrict cmd params
params = [Param "--with-colons", Param "--list-secret-keys", Param "--fixed-list-mode"]
- parse = extract [] Nothing . map (split ":")
+ parse = extract [] Nothing . map (splitc ':')
extract c (Just keyid) (("uid":_:_:_:_:_:_:_:_:userid:_):rest) =
extract ((keyid, decode_c userid):c) Nothing rest
extract c (Just keyid) rest@(("sec":_):_) =
diff --git a/Utility/Lsof.hs b/Utility/Lsof.hs
index 27d34b592..e3ed709ec 100644
--- a/Utility/Lsof.hs
+++ b/Utility/Lsof.hs
@@ -107,7 +107,7 @@ parseFormatted s = bundle $ go [] $ lines s
parsemode ('u':_) = OpenReadWrite
parsemode _ = OpenUnknown
- splitnull = split "\0"
+ splitnull = splitc '\0'
parsefail = error $ "failed to parse lsof output: " ++ show s
diff --git a/Utility/Misc.hs b/Utility/Misc.hs
index 4498c0a03..564935ddb 100644
--- a/Utility/Misc.hs
+++ b/Utility/Misc.hs
@@ -45,6 +45,14 @@ separate c l = unbreak $ break c l
| null b = r
| otherwise = (a, tail b)
+{- Split on a single character. This is over twice as fast as using
+ - Data.List.Utils.split on a list of length 1, while producing
+ - identical results. -}
+splitc :: Char -> String -> [String]
+splitc c s = case break (== c) s of
+ (i, _c:rest) -> i : splitc c rest
+ (i, []) -> i : []
+
{- Breaks out the first line. -}
firstLine :: String -> String
firstLine = takeWhile (/= '\n')
diff --git a/Utility/Quvi.hs b/Utility/Quvi.hs
index d33d79bb8..ff1ad854c 100644
--- a/Utility/Quvi.hs
+++ b/Utility/Quvi.hs
@@ -124,14 +124,14 @@ supported Quvi09 url = (firstlevel <&&> secondlevel)
Nothing -> return False
Just auth -> do
let domain = map toLower $ uriRegName auth
- let basedomain = intercalate "." $ reverse $ take 2 $ reverse $ split "." domain
+ let basedomain = intercalate "." $ reverse $ take 2 $ reverse $ splitc '.' domain
any (\h -> domain `isSuffixOf` h || basedomain `isSuffixOf` h)
. map (map toLower) <$> listdomains Quvi09
secondlevel = snd <$> processTranscript "quvi"
(toCommand [Param "dump", Param "-o", Param url]) Nothing
listdomains :: QuviVersion -> IO [String]
-listdomains Quvi09 = concatMap (split ",")
+listdomains Quvi09 = concatMap (splitc ',')
. concatMap (drop 1 . words)
. filter ("domains: " `isPrefixOf`) . lines
<$> readQuvi (toCommand [Param "info", Param "-p", Param "domains"])
diff --git a/Utility/Rsync.hs b/Utility/Rsync.hs
index d3fe98120..d3823a528 100644
--- a/Utility/Rsync.hs
+++ b/Utility/Rsync.hs
@@ -24,7 +24,7 @@ rsyncShell command = [Param "-e", Param $ unwords $ map escape (toCommand comman
{- rsync requires some weird, non-shell like quoting in
- here. A doubled single quote inside the single quoted
- string is a single quote. -}
- escape s = "'" ++ intercalate "''" (split "'" s) ++ "'"
+ escape s = "'" ++ intercalate "''" (splitc '\'' s) ++ "'"
{- Runs rsync in server mode to send a file. -}
rsyncServerSend :: [CommandParam] -> FilePath -> IO Bool
@@ -123,7 +123,7 @@ parseRsyncProgress = go [] . reverse . progresschunks
{- Find chunks that each start with delim.
- The first chunk doesn't start with it
- (it's empty when delim is at the start of the string). -}
- progresschunks = drop 1 . split [delim]
+ progresschunks = drop 1 . splitc delim
findbytesstart s = dropWhile isSpace s
parsebytes :: String -> Maybe Integer
diff --git a/Utility/SafeCommand.hs b/Utility/SafeCommand.hs
index 5ce17a845..bef0a619d 100644
--- a/Utility/SafeCommand.hs
+++ b/Utility/SafeCommand.hs
@@ -11,7 +11,7 @@ module Utility.SafeCommand where
import System.Exit
import Utility.Process
-import Data.String.Utils
+import Utility.Misc
import System.FilePath
import Data.Char
import Data.List
@@ -86,7 +86,7 @@ shellEscape :: String -> String
shellEscape f = "'" ++ escaped ++ "'"
where
-- replace ' with '"'"'
- escaped = intercalate "'\"'\"'" $ split "'" f
+ escaped = intercalate "'\"'\"'" $ splitc '\'' f
-- | Unescapes a set of shellEscaped words or filenames.
shellUnEscape :: String -> [String]