summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Joey Hess <joey@kitenet.net>2011-09-18 22:40:31 -0400
committerGravatar Joey Hess <joey@kitenet.net>2011-09-18 22:40:31 -0400
commit4f1fea1a856a2c82ed200e805bb18e9f9aaaa67b (patch)
tree2fa27f0507949ea8d381b18120ef109e70d304b9
parent8d1e8c0760e0d9935223523f35f5b8ea954730ac (diff)
fix memory leak
filterM is not a good idea if you were streaming in a large list of files. Fixing this memory leak that I introduced earlier today was a PITA because to avoid the filterM, it's necessary to do the filtering only after building up the data structures like BackendFile, and that means each separate data structure needs it own function to apply the filter, at least in this naive implementation. There is also a minor performance regression, when using copy/drop/get/fsck with a filter, git is now asked to look up attributes for all files, since that now comes before the filter is applied. This is only a very minor thing, since getting the attributes is very fast and --exclude was probably not typically used to speed it up.
-rw-r--r--Command.hs53
-rw-r--r--Limit.hs15
2 files changed, 40 insertions, 28 deletions
diff --git a/Command.hs b/Command.hs
index fd58ca801..3cfff268c 100644
--- a/Command.hs
+++ b/Command.hs
@@ -107,17 +107,22 @@ notBareRepo a = do
{- These functions find appropriate files or other things based on a
user's parameters, and run a specified action on them. -}
-withFilesInGit :: (String -> CommandStart) -> CommandSeek
+withFilesInGit :: (FilePath -> CommandStart) -> CommandSeek
withFilesInGit a params = do
repo <- Annex.gitRepo
- files <- liftIO $ runPreserveOrder (LsFiles.inRepo repo) params
- liftM (map a) $ filterFiles files
+ runFiltered a $ liftIO $ runPreserveOrder (LsFiles.inRepo repo) params
withAttrFilesInGit :: String -> ((FilePath, String) -> CommandStart) -> CommandSeek
withAttrFilesInGit attr a params = do
repo <- Annex.gitRepo
files <- liftIO $ runPreserveOrder (LsFiles.inRepo repo) params
- files' <- filterFiles files
- liftM (map a) $ liftIO $ Git.checkAttr repo attr files'
+ run $ liftIO $ Git.checkAttr repo attr files
+ where
+ run fs = do
+ matcher <- Limit.getMatcher
+ liftM (map $ proc matcher) fs
+ proc matcher p@(f, _) = do
+ ok <- matcher f
+ if ok then a p else stop
withNumCopies :: (FilePath -> Maybe Int -> CommandStart) -> CommandSeek
withNumCopies a params = withAttrFilesInGit "annex.numcopies" go params
where
@@ -128,23 +133,17 @@ withBackendFilesInGit :: (BackendFile -> CommandStart) -> CommandSeek
withBackendFilesInGit a params = do
repo <- Annex.gitRepo
files <- liftIO $ runPreserveOrder (LsFiles.inRepo repo) params
- files' <- filterFiles files
- backendPairs a files'
+ backendPairs a files
withFilesMissing :: (String -> CommandStart) -> CommandSeek
-withFilesMissing a params = do
- files <- liftIO $ filterM missing params
- liftM (map a) $ filterFiles files
+withFilesMissing a params = runFiltered a $ liftIO $ filterM missing params
where
- missing f = do
- e <- doesFileExist f
- return $ not e
+ missing = liftM not . doesFileExist
withFilesNotInGit :: (BackendFile -> CommandStart) -> CommandSeek
withFilesNotInGit a params = do
repo <- Annex.gitRepo
force <- Annex.getState Annex.force
newfiles <- liftIO $ runPreserveOrder (LsFiles.notInRepo repo force) params
- newfiles' <- filterFiles newfiles
- backendPairs a newfiles'
+ backendPairs a newfiles
withWords :: ([String] -> CommandStart) -> CommandSeek
withWords a params = return [a params]
withStrings :: (String -> CommandStart) -> CommandSeek
@@ -152,8 +151,8 @@ withStrings a params = return $ map a params
withFilesToBeCommitted :: (String -> CommandStart) -> CommandSeek
withFilesToBeCommitted a params = do
repo <- Annex.gitRepo
- tocommit <- liftIO $ runPreserveOrder (LsFiles.stagedNotDeleted repo) params
- liftM (map a) $ filterFiles tocommit
+ runFiltered a $
+ liftIO $ runPreserveOrder (LsFiles.stagedNotDeleted repo) params
withFilesUnlocked :: (BackendFile -> CommandStart) -> CommandSeek
withFilesUnlocked = withFilesUnlocked' LsFiles.typeChanged
withFilesUnlockedToBeCommitted :: (BackendFile -> CommandStart) -> CommandSeek
@@ -165,8 +164,7 @@ withFilesUnlocked' typechanged a params = do
typechangedfiles <- liftIO $ runPreserveOrder (typechanged repo) params
unlockedfiles <- liftIO $ filterM notSymlink $
map (\f -> Git.workTree repo ++ "/" ++ f) typechangedfiles
- unlockedfiles' <- filterFiles unlockedfiles
- backendPairs a unlockedfiles'
+ backendPairs a unlockedfiles
withKeys :: (Key -> CommandStart) -> CommandSeek
withKeys a params = return $ map (a . parse) params
where
@@ -175,8 +173,23 @@ withNothing :: CommandStart -> CommandSeek
withNothing a [] = return [a]
withNothing _ _ = error "This command takes no parameters."
+runFiltered :: (FilePath -> Annex (Maybe a)) -> Annex [FilePath] -> Annex [Annex (Maybe a)]
+runFiltered a fs = do
+ matcher <- Limit.getMatcher
+ liftM (map $ proc matcher) fs
+ where
+ proc matcher f = do
+ ok <- matcher f
+ if ok then a f else stop
+
backendPairs :: (BackendFile -> CommandStart) -> CommandSeek
-backendPairs a files = map a <$> Backend.chooseBackends files
+backendPairs a fs = do
+ matcher <- Limit.getMatcher
+ liftM (map $ proc matcher) (Backend.chooseBackends fs)
+ where
+ proc matcher p@(_, f) = do
+ ok <- matcher f
+ if ok then a p else stop
{- filter out symlinks -}
notSymlink :: FilePath -> IO Bool
diff --git a/Limit.hs b/Limit.hs
index 51f3fc950..602da7001 100644
--- a/Limit.hs
+++ b/Limit.hs
@@ -22,20 +22,19 @@ import Utility
type Limit = Utility.Matcher.Token (FilePath -> Annex Bool)
-{- Filter out files not matching user-specified limits. -}
-filterFiles :: [FilePath] -> Annex [FilePath]
-filterFiles l = do
- matcher <- getMatcher
- filterM (Utility.Matcher.matchM matcher) l
-
{- Checks if there are user-specified limits. -}
limited :: Annex Bool
-limited = (not . Utility.Matcher.matchesAny) <$> getMatcher
+limited = (not . Utility.Matcher.matchesAny) <$> getMatcher'
{- Gets a matcher for the user-specified limits. The matcher is cached for
- speed; once it's obtained the user-specified limits can't change. -}
-getMatcher :: Annex (Utility.Matcher.Matcher (FilePath -> Annex Bool))
+getMatcher :: Annex (FilePath -> Annex Bool)
getMatcher = do
+ m <- getMatcher'
+ return $ Utility.Matcher.matchM m
+
+getMatcher' :: Annex (Utility.Matcher.Matcher (FilePath -> Annex Bool))
+getMatcher' = do
m <- Annex.getState Annex.limit
case m of
Right r -> return r