diff options
author | Joey Hess <joeyh@joeyh.name> | 2017-01-31 18:40:42 -0400 |
---|---|---|
committer | Joey Hess <joeyh@joeyh.name> | 2017-01-31 19:06:22 -0400 |
commit | 7fd21be7f967bdc21530b730f595379b23fe1174 (patch) | |
tree | d2af9101541d8166b2035271967bb3ac01751e36 | |
parent | 164466c987a7607a5f598b36e5b3111a68bd101f (diff) |
Some optimisations to string splitting code.
Turns out that Data.List.Utils.split is slow and makes a lot of
allocations. Here's a much simpler single character splitter that behaves
the same (even in wacky corner cases) while running in half the time and
75% the allocations.
As well as being an optimisation, this helps move toward eliminating use of
missingh.
(Data.List.Split.splitOn is nearly as slow as Data.List.Utils.split and
allocates even more.)
I have not benchmarked the effect on git-annex, but would not be surprised
to see some parsing of eg, large streams from git commands run twice as
fast, and possibly in less memory.
This commit was sponsored by Boyd Stephen Smith Jr. on Patreon.
-rw-r--r-- | Annex/Direct.hs | 4 | ||||
-rw-r--r-- | Annex/TaggedPush.hs | 2 | ||||
-rw-r--r-- | Assistant/Ssh.hs | 2 | ||||
-rw-r--r-- | Backend/Hash.hs | 2 | ||||
-rw-r--r-- | Build/OSXMkLibs.hs | 2 | ||||
-rw-r--r-- | CHANGELOG | 1 | ||||
-rw-r--r-- | Command/AddUrl.hs | 2 | ||||
-rw-r--r-- | Command/Map.hs | 2 | ||||
-rw-r--r-- | Command/Unused.hs | 2 | ||||
-rw-r--r-- | Crypto.hs | 4 | ||||
-rw-r--r-- | Git/Command.hs | 8 | ||||
-rw-r--r-- | Git/Config.hs | 2 | ||||
-rw-r--r-- | Git/Construct.hs | 4 | ||||
-rw-r--r-- | Git/Ref.hs | 2 | ||||
-rw-r--r-- | Limit.hs | 2 | ||||
-rw-r--r-- | Logs/Transfer.hs | 2 | ||||
-rw-r--r-- | Logs/UUID.hs | 2 | ||||
-rw-r--r-- | Remote.hs | 2 | ||||
-rw-r--r-- | Remote/BitTorrent.hs | 2 | ||||
-rw-r--r-- | Remote/Bup.hs | 2 | ||||
-rw-r--r-- | Remote/Helper/Encryptable.hs | 2 | ||||
-rw-r--r-- | Types/RefSpec.hs | 2 | ||||
-rw-r--r-- | Upgrade/V1.hs | 2 | ||||
-rw-r--r-- | Utility/DottedVersion.hs | 2 | ||||
-rw-r--r-- | Utility/Gpg.hs | 4 | ||||
-rw-r--r-- | Utility/Lsof.hs | 2 | ||||
-rw-r--r-- | Utility/Misc.hs | 8 | ||||
-rw-r--r-- | Utility/Quvi.hs | 4 | ||||
-rw-r--r-- | Utility/Rsync.hs | 4 | ||||
-rw-r--r-- | Utility/SafeCommand.hs | 4 |
30 files changed, 47 insertions, 38 deletions
diff --git a/Annex/Direct.hs b/Annex/Direct.hs index 5724d1162..e5c1c47c8 100644 --- a/Annex/Direct.hs +++ b/Annex/Direct.hs @@ -441,7 +441,7 @@ setDirect wantdirect = do - this way things that show HEAD (eg shell prompts) will - hopefully show just "master". -} directBranch :: Ref -> Ref -directBranch orighead = case split "/" $ fromRef orighead of +directBranch orighead = case splitc '/' $ fromRef orighead of ("refs":"heads":"annex":"direct":_) -> orighead ("refs":"heads":rest) -> Ref $ "refs/heads/annex/direct/" ++ intercalate "/" rest @@ -452,7 +452,7 @@ directBranch orighead = case split "/" $ fromRef orighead of - Any other ref is left unchanged. -} fromDirectBranch :: Ref -> Ref -fromDirectBranch directhead = case split "/" $ fromRef directhead of +fromDirectBranch directhead = case splitc '/' $ fromRef directhead of ("refs":"heads":"annex":"direct":rest) -> Ref $ "refs/heads/" ++ intercalate "/" rest _ -> directhead diff --git a/Annex/TaggedPush.hs b/Annex/TaggedPush.hs index ef1aeeea2..3b06170b3 100644 --- a/Annex/TaggedPush.hs +++ b/Annex/TaggedPush.hs @@ -39,7 +39,7 @@ toTaggedBranch u info b = Git.Ref $ intercalate "/" $ catMaybes ] fromTaggedBranch :: Git.Branch -> Maybe (UUID, Maybe String) -fromTaggedBranch b = case split "/" $ Git.fromRef b of +fromTaggedBranch b = case splitc '/' $ Git.fromRef b of ("refs":"synced":u:info:_base) -> Just (toUUID u, fromB64Maybe info) ("refs":"synced":u:_base) -> diff --git a/Assistant/Ssh.hs b/Assistant/Ssh.hs index 66ed54257..e439ecd23 100644 --- a/Assistant/Ssh.hs +++ b/Assistant/Ssh.hs @@ -383,7 +383,7 @@ mangleSshHostName sshdata = intercalate "-" {- Extracts the real hostname from a mangled ssh hostname. -} unMangleSshHostName :: String -> String -unMangleSshHostName h = case split "-" h of +unMangleSshHostName h = case splitc '-' h of ("git":"annex":rest) -> unescape (intercalate "-" (beginning rest)) _ -> h where diff --git a/Backend/Hash.hs b/Backend/Hash.hs index ba8d4bc64..c85047d51 100644 --- a/Backend/Hash.hs +++ b/Backend/Hash.hs @@ -103,7 +103,7 @@ selectExtension f es = filter (not . null) $ reverse $ take 2 $ map (filter validInExtension) $ takeWhile shortenough $ - reverse $ split "." $ takeExtensions f + reverse $ splitc '.' $ takeExtensions f shortenough e = length e <= 4 -- long enough for "jpeg" {- A key's checksum is checked during fsck. -} diff --git a/Build/OSXMkLibs.hs b/Build/OSXMkLibs.hs index 948b0d5a3..2243c6b86 100644 --- a/Build/OSXMkLibs.hs +++ b/Build/OSXMkLibs.hs @@ -95,7 +95,7 @@ findLibPath l = go =<< getEnv "DYLD_LIBRARY_PATH" where go Nothing = return l go (Just p) = fromMaybe l - <$> firstM doesFileExist (map (</> f) (split ":" p)) + <$> firstM doesFileExist (map (</> f) (splitc ':' p)) f = takeFileName l {- Expands any @rpath in the list of libraries. @@ -8,6 +8,7 @@ git-annex (6.20170102) UNRELEASED; urgency=medium * vicfg: Include the numcopies configuation. * config: New command for storing configuration in the git-annex branch. * stack.yaml: Update to lts-7.18. + * Some optimisations to string splitting code. -- Joey Hess <id@joeyh.name> Fri, 06 Jan 2017 15:22:06 -0400 diff --git a/Command/AddUrl.hs b/Command/AddUrl.hs index 8cc148440..169875f4b 100644 --- a/Command/AddUrl.hs +++ b/Command/AddUrl.hs @@ -387,7 +387,7 @@ url2file url pathdepth pathmax = case pathdepth of ] frombits a = intercalate "/" $ a urlbits urlbits = map (truncateFilePath pathmax . sanitizeFilePath) $ - filter (not . null) $ split "/" fullurl + filter (not . null) $ splitc '/' fullurl urlString2file :: URLString -> Maybe Int -> Int -> FilePath urlString2file s pathdepth pathmax = case Url.parseURIRelaxed s of diff --git a/Command/Map.hs b/Command/Map.hs index 43c00d257..b04beb477 100644 --- a/Command/Map.hs +++ b/Command/Map.hs @@ -92,7 +92,7 @@ hostname r | otherwise = "localhost" basehostname :: Git.Repo -> String -basehostname r = fromMaybe "" $ headMaybe $ split "." $ hostname r +basehostname r = fromMaybe "" $ headMaybe $ splitc '.' $ hostname r {- A name to display for a repo. Uses the name from uuid.log if available, - or the remote name if not. -} diff --git a/Command/Unused.hs b/Command/Unused.hs index 1711fe047..3953f4486 100644 --- a/Command/Unused.hs +++ b/Command/Unused.hs @@ -269,7 +269,7 @@ withKeysReferencedDiff a getdiff extractsha = do forM_ ds go liftIO $ void clean where - go d = do + go d = do let sha = extractsha d unless (sha == nullSha) $ (parseLinkOrPointer <$> catObject sha) @@ -231,8 +231,8 @@ instance LensGpgEncParams (RemoteConfig, RemoteGitConfig) where {- When the remote is configured to use public-key encryption, - look up the recipient keys and add them to the option list. -} case M.lookup "encryption" c of - Just "pubkey" -> Gpg.pkEncTo $ maybe [] (split ",") $ M.lookup "cipherkeys" c - Just "sharedpubkey" -> Gpg.pkEncTo $ maybe [] (split ",") $ M.lookup "pubkeys" c + Just "pubkey" -> Gpg.pkEncTo $ maybe [] (splitc ',') $ M.lookup "cipherkeys" c + Just "sharedpubkey" -> Gpg.pkEncTo $ maybe [] (splitc ',') $ M.lookup "pubkeys" c _ -> [] getGpgDecParams (_c,gc) = map Param (remoteAnnexGnupgDecryptOptions gc) diff --git a/Git/Command.hs b/Git/Command.hs index adea7622e..f40dfabcd 100644 --- a/Git/Command.hs +++ b/Git/Command.hs @@ -91,16 +91,16 @@ pipeWrite params repo = withHandle StdinHandle createProcessSuccess $ pipeNullSplit :: [CommandParam] -> Repo -> IO ([String], IO Bool) pipeNullSplit params repo = do (s, cleanup) <- pipeReadLazy params repo - return (filter (not . null) $ split sep s, cleanup) + return (filter (not . null) $ splitc sep s, cleanup) where - sep = "\0" + sep = '\0' pipeNullSplitStrict :: [CommandParam] -> Repo -> IO [String] pipeNullSplitStrict params repo = do s <- pipeReadStrict params repo - return $ filter (not . null) $ split sep s + return $ filter (not . null) $ splitc sep s where - sep = "\0" + sep = '\0' pipeNullSplitZombie :: [CommandParam] -> Repo -> IO [String] pipeNullSplitZombie params repo = leaveZombie <$> pipeNullSplit params repo diff --git a/Git/Config.hs b/Git/Config.hs index 65bd9b7ba..9b4c342a4 100644 --- a/Git/Config.hs +++ b/Git/Config.hs @@ -132,7 +132,7 @@ parse s -- --list output will have an = in the first line | all ('=' `elem`) (take 1 ls) = sep '=' ls -- --null --list output separates keys from values with newlines - | otherwise = sep '\n' $ split "\0" s + | otherwise = sep '\n' $ splitc '\0' s where ls = lines s sep c = M.fromListWith (++) . map (\(k,v) -> (k, [v])) . diff --git a/Git/Construct.hs b/Git/Construct.hs index 765562212..489927880 100644 --- a/Git/Construct.hs +++ b/Git/Construct.hs @@ -26,7 +26,7 @@ module Git.Construct ( #ifndef mingw32_HOST_OS import System.Posix.User #endif -import qualified Data.Map as M hiding (map, split) +import qualified Data.Map as M import Network.URI import Common @@ -143,7 +143,7 @@ remoteNamedFromKey :: String -> IO Repo -> IO Repo remoteNamedFromKey k = remoteNamed basename where basename = intercalate "." $ - reverse $ drop 1 $ reverse $ drop 1 $ split "." k + reverse $ drop 1 $ reverse $ drop 1 $ splitc '.' k {- Constructs a new Repo for one of a Repo's remotes using a given - location (ie, an url). -} diff --git a/Git/Ref.hs b/Git/Ref.hs index 5b3b85324..2d8013738 100644 --- a/Git/Ref.hs +++ b/Git/Ref.hs @@ -144,6 +144,6 @@ legal allowonelevel s = all (== False) illegal ends v = v `isSuffixOf` s begins v = v `isPrefixOf` s - pathbits = split "/" s + pathbits = splitc '/' s illegalchars = " ~^:?*[\\" ++ controlchars controlchars = chr 0o177 : [chr 0 .. chr (0o40-1)] @@ -161,7 +161,7 @@ addCopies :: String -> Annex () addCopies = addLimit . limitCopies limitCopies :: MkLimit Annex -limitCopies want = case split ":" want of +limitCopies want = case splitc ':' want of [v, n] -> case parsetrustspec v of Just checker -> go n $ checktrust checker Nothing -> go n $ checkgroup v diff --git a/Logs/Transfer.hs b/Logs/Transfer.hs index 28f7b0a26..903db96fe 100644 --- a/Logs/Transfer.hs +++ b/Logs/Transfer.hs @@ -268,7 +268,7 @@ readTransferInfo mpid s = TransferInfo filename | end rest == "\n" = beginning rest | otherwise = rest - bits = split " " firstline + bits = splitc ' ' firstline numbits = length bits time = if numbits > 0 then Just <$> parsePOSIXTime =<< headMaybe bits diff --git a/Logs/UUID.hs b/Logs/UUID.hs index 60c8a2ef9..4c84d10bd 100644 --- a/Logs/UUID.hs +++ b/Logs/UUID.hs @@ -66,7 +66,7 @@ fixBadUUID = M.fromList . map fixup . M.toList newertime (LogEntry (Date d) _) = d + minimumPOSIXTimeSlice newertime (LogEntry Unknown _) = minimumPOSIXTimeSlice minimumPOSIXTimeSlice = 0.000001 - isuuid s = length s == 36 && length (split "-" s) == 5 + isuuid s = length s == 36 && length (splitc '-' s) == 5 {- Records the uuid in the log, if it's not already there. -} recordUUID :: UUID -> Annex () @@ -140,7 +140,7 @@ byName' n = go . filter matching <$> remoteList byNameOrGroup :: RemoteName -> Annex [Remote] byNameOrGroup n = go =<< getConfigMaybe (ConfigKey ("remotes." ++ n)) where - go (Just l) = catMaybes <$> mapM (byName . Just) (split " " l) + go (Just l) = catMaybes <$> mapM (byName . Just) (splitc ' ' l) go Nothing = maybeToList <$> byName (Just n) {- Only matches remote name, not UUID -} diff --git a/Remote/BitTorrent.hs b/Remote/BitTorrent.hs index 0ec78aa64..2f29f5baa 100644 --- a/Remote/BitTorrent.hs +++ b/Remote/BitTorrent.hs @@ -302,7 +302,7 @@ ariaProgress (Just sz) meter ps = do =<< ariaParams ps parseAriaProgress :: Integer -> ProgressParser -parseAriaProgress totalsize = go [] . reverse . split ['\r'] +parseAriaProgress totalsize = go [] . reverse . splitc '\r' where go remainder [] = (Nothing, remainder) go remainder (x:xs) = case readish (findpercent x) of diff --git a/Remote/Bup.hs b/Remote/Bup.hs index 332e8d5dc..75b379558 100644 --- a/Remote/Bup.hs +++ b/Remote/Bup.hs @@ -254,7 +254,7 @@ bup2GitRemote r else giveup "please specify an absolute path" | otherwise = Git.Construct.fromUrl $ "ssh://" ++ host ++ slash dir where - bits = split ":" r + bits = splitc ':' r host = Prelude.head bits dir = intercalate ":" $ drop 1 bits -- "host:~user/dir" is not supported specially by bup; diff --git a/Remote/Helper/Encryptable.hs b/Remote/Helper/Encryptable.hs index 45ceae068..029ac4b09 100644 --- a/Remote/Helper/Encryptable.hs +++ b/Remote/Helper/Encryptable.hs @@ -165,7 +165,7 @@ extractCipher c = case (M.lookup "cipher" c, Just $ SharedCipher (fromB64bs t) _ -> Nothing where - readkeys = KeyIds . split "," + readkeys = KeyIds . splitc ',' describeEncryption :: RemoteConfig -> String describeEncryption c = case extractCipher c of diff --git a/Types/RefSpec.hs b/Types/RefSpec.hs index 091631abd..c71e57d92 100644 --- a/Types/RefSpec.hs +++ b/Types/RefSpec.hs @@ -25,7 +25,7 @@ allRefSpec :: RefSpec allRefSpec = [AddMatching $ compileGlob "*" CaseSensative] parseRefSpec :: String -> Either String RefSpec -parseRefSpec v = case partitionEithers (map mk $ split ":" v) of +parseRefSpec v = case partitionEithers (map mk $ splitc ':' v) of ([],refspec) -> Right refspec (e:_,_) -> Left e where diff --git a/Upgrade/V1.hs b/Upgrade/V1.hs index c82cf92f5..725bb4089 100644 --- a/Upgrade/V1.hs +++ b/Upgrade/V1.hs @@ -148,7 +148,7 @@ readKey1 v , keyMtime = t } where - bits = split ":" v + bits = splitc ':' v b = Prelude.head bits n = intercalate ":" $ drop (if wormy then 3 else 1) bits t = if wormy diff --git a/Utility/DottedVersion.hs b/Utility/DottedVersion.hs index ebf4c0bd1..3198b1ce2 100644 --- a/Utility/DottedVersion.hs +++ b/Utility/DottedVersion.hs @@ -25,7 +25,7 @@ instance Show DottedVersion where normalize :: String -> DottedVersion normalize v = DottedVersion v $ sum $ mult 1 $ reverse $ extend precision $ take precision $ - map readi $ split "." v + map readi $ splitc '.' v where extend n l = l ++ replicate (n - length l) 0 mult _ [] = [] diff --git a/Utility/Gpg.hs b/Utility/Gpg.hs index 118515222..a5d382083 100644 --- a/Utility/Gpg.hs +++ b/Utility/Gpg.hs @@ -162,7 +162,7 @@ findPubKeys :: GpgCmd -> String -> IO KeyIds findPubKeys cmd for = KeyIds . parse . lines <$> readStrict cmd params where params = [Param "--with-colons", Param "--list-public-keys", Param for] - parse = mapMaybe (keyIdField . split ":") + parse = mapMaybe (keyIdField . splitc ':') keyIdField ("pub":_:_:_:f:_) = Just f keyIdField _ = Nothing @@ -175,7 +175,7 @@ secretKeys cmd = catchDefaultIO M.empty makemap where makemap = M.fromList . parse . lines <$> readStrict cmd params params = [Param "--with-colons", Param "--list-secret-keys", Param "--fixed-list-mode"] - parse = extract [] Nothing . map (split ":") + parse = extract [] Nothing . map (splitc ':') extract c (Just keyid) (("uid":_:_:_:_:_:_:_:_:userid:_):rest) = extract ((keyid, decode_c userid):c) Nothing rest extract c (Just keyid) rest@(("sec":_):_) = diff --git a/Utility/Lsof.hs b/Utility/Lsof.hs index 27d34b592..e3ed709ec 100644 --- a/Utility/Lsof.hs +++ b/Utility/Lsof.hs @@ -107,7 +107,7 @@ parseFormatted s = bundle $ go [] $ lines s parsemode ('u':_) = OpenReadWrite parsemode _ = OpenUnknown - splitnull = split "\0" + splitnull = splitc '\0' parsefail = error $ "failed to parse lsof output: " ++ show s diff --git a/Utility/Misc.hs b/Utility/Misc.hs index 4498c0a03..564935ddb 100644 --- a/Utility/Misc.hs +++ b/Utility/Misc.hs @@ -45,6 +45,14 @@ separate c l = unbreak $ break c l | null b = r | otherwise = (a, tail b) +{- Split on a single character. This is over twice as fast as using + - Data.List.Utils.split on a list of length 1, while producing + - identical results. -} +splitc :: Char -> String -> [String] +splitc c s = case break (== c) s of + (i, _c:rest) -> i : splitc c rest + (i, []) -> i : [] + {- Breaks out the first line. -} firstLine :: String -> String firstLine = takeWhile (/= '\n') diff --git a/Utility/Quvi.hs b/Utility/Quvi.hs index d33d79bb8..ff1ad854c 100644 --- a/Utility/Quvi.hs +++ b/Utility/Quvi.hs @@ -124,14 +124,14 @@ supported Quvi09 url = (firstlevel <&&> secondlevel) Nothing -> return False Just auth -> do let domain = map toLower $ uriRegName auth - let basedomain = intercalate "." $ reverse $ take 2 $ reverse $ split "." domain + let basedomain = intercalate "." $ reverse $ take 2 $ reverse $ splitc '.' domain any (\h -> domain `isSuffixOf` h || basedomain `isSuffixOf` h) . map (map toLower) <$> listdomains Quvi09 secondlevel = snd <$> processTranscript "quvi" (toCommand [Param "dump", Param "-o", Param url]) Nothing listdomains :: QuviVersion -> IO [String] -listdomains Quvi09 = concatMap (split ",") +listdomains Quvi09 = concatMap (splitc ',') . concatMap (drop 1 . words) . filter ("domains: " `isPrefixOf`) . lines <$> readQuvi (toCommand [Param "info", Param "-p", Param "domains"]) diff --git a/Utility/Rsync.hs b/Utility/Rsync.hs index d3fe98120..d3823a528 100644 --- a/Utility/Rsync.hs +++ b/Utility/Rsync.hs @@ -24,7 +24,7 @@ rsyncShell command = [Param "-e", Param $ unwords $ map escape (toCommand comman {- rsync requires some weird, non-shell like quoting in - here. A doubled single quote inside the single quoted - string is a single quote. -} - escape s = "'" ++ intercalate "''" (split "'" s) ++ "'" + escape s = "'" ++ intercalate "''" (splitc '\'' s) ++ "'" {- Runs rsync in server mode to send a file. -} rsyncServerSend :: [CommandParam] -> FilePath -> IO Bool @@ -123,7 +123,7 @@ parseRsyncProgress = go [] . reverse . progresschunks {- Find chunks that each start with delim. - The first chunk doesn't start with it - (it's empty when delim is at the start of the string). -} - progresschunks = drop 1 . split [delim] + progresschunks = drop 1 . splitc delim findbytesstart s = dropWhile isSpace s parsebytes :: String -> Maybe Integer diff --git a/Utility/SafeCommand.hs b/Utility/SafeCommand.hs index 5ce17a845..bef0a619d 100644 --- a/Utility/SafeCommand.hs +++ b/Utility/SafeCommand.hs @@ -11,7 +11,7 @@ module Utility.SafeCommand where import System.Exit import Utility.Process -import Data.String.Utils +import Utility.Misc import System.FilePath import Data.Char import Data.List @@ -86,7 +86,7 @@ shellEscape :: String -> String shellEscape f = "'" ++ escaped ++ "'" where -- replace ' with '"'"' - escaped = intercalate "'\"'\"'" $ split "'" f + escaped = intercalate "'\"'\"'" $ splitc '\'' f -- | Unescapes a set of shellEscaped words or filenames. shellUnEscape :: String -> [String] |