summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Joey Hess <joey@kitenet.net>2013-09-19 14:48:42 -0400
committerGravatar Joey Hess <joey@kitenet.net>2013-09-19 14:48:42 -0400
commit2ec103cb68110deee36e8445d7320d0297ed4342 (patch)
treea57c1f474343773854f726b3ca3531af2d4cfbcd
parent5f90fe2a343ba513682513d1b1b8019f85b1ac9f (diff)
sync, pre-commit, indirect: Avoid unnecessarily catting non-symlink files from git, which can be so large it runs out of memory.
-rw-r--r--Annex/Direct.hs8
-rw-r--r--Command/Indirect.hs5
-rw-r--r--Command/PreCommit.hs7
-rw-r--r--Git/LsFiles.hs15
-rw-r--r--Logs/Web.hs3
-rw-r--r--debian/changelog2
6 files changed, 26 insertions, 14 deletions
diff --git a/Annex/Direct.hs b/Annex/Direct.hs
index b66b2fdfd..a6c30ad08 100644
--- a/Annex/Direct.hs
+++ b/Annex/Direct.hs
@@ -14,8 +14,8 @@ import qualified Git.Merge
import qualified Git.DiffTree as DiffTree
import Git.Sha
import Git.Types
-import Annex.CatFile
import Git.FileMode
+import Annex.CatFile
import qualified Annex.Queue
import Logs.Location
import Backend
@@ -45,8 +45,10 @@ stageDirect = do
{- Determine what kind of modified or deleted file this is, as
- efficiently as we can, by getting any key that's associated
- with it in git, as well as its stat info. -}
- go (file, Just sha) = do
- shakey <- catKey sha
+ go (file, Just sha, Just mode) = do
+ shakey <- if isSymLink mode
+ then catKey sha
+ else return Nothing
mstat <- liftIO $ catchMaybeIO $ getSymbolicLinkStatus file
filekey <- isAnnexLink file
case (shakey, filekey, mstat, toInodeCache =<< mstat) of
diff --git a/Command/Indirect.hs b/Command/Indirect.hs
index e63c4cb8a..79e736d11 100644
--- a/Command/Indirect.hs
+++ b/Command/Indirect.hs
@@ -14,6 +14,7 @@ import Command
import qualified Git
import qualified Git.Command
import qualified Git.LsFiles
+import Git.FileMode
import Config
import qualified Annex
import Annex.Direct
@@ -67,8 +68,7 @@ perform = do
{- Walk tree from top and move all present direct mode files into
- the annex, replacing with symlinks. Also delete direct mode
- caches and mappings. -}
- go (_, Nothing) = noop
- go (f, Just sha) = do
+ go (f, Just sha, Just mode) | isSymLink mode = do
r <- liftIO $ catchMaybeIO $ getSymbolicLinkStatus f
case r of
Just s
@@ -80,6 +80,7 @@ perform = do
maybe noop (fromdirect f)
=<< catKey sha
_ -> noop
+ go _ = noop
fromdirect f k = do
showStart "indirect" f
diff --git a/Command/PreCommit.hs b/Command/PreCommit.hs
index c6d9dd278..c35cf61e1 100644
--- a/Command/PreCommit.hs
+++ b/Command/PreCommit.hs
@@ -16,6 +16,7 @@ import qualified Git.Ref
import Annex.CatFile
import Annex.Content.Direct
import Git.Sha
+import Git.FileMode
def :: [Command]
def = [command "pre-commit" paramPaths seek SectionPlumbing
@@ -44,10 +45,10 @@ startDirect _ = next $ do
next $ liftIO clean
where
go diff = do
- withkey (Git.DiffTree.srcsha diff) removeAssociatedFile
- withkey (Git.DiffTree.dstsha diff) addAssociatedFile
+ withkey (Git.DiffTree.srcsha diff) (Git.DiffTree.srcmode diff) removeAssociatedFile
+ withkey (Git.DiffTree.dstsha diff) (Git.DiffTree.dstmode diff) addAssociatedFile
where
- withkey sha a = when (sha /= nullSha) $ do
+ withkey sha mode a = when (sha /= nullSha && isSymLink mode) $ do
k <- catKey sha
case k of
Nothing -> noop
diff --git a/Git/LsFiles.hs b/Git/LsFiles.hs
index e2e29ea36..8a5d4bd6a 100644
--- a/Git/LsFiles.hs
+++ b/Git/LsFiles.hs
@@ -28,6 +28,9 @@ import Git.Command
import Git.Types
import Git.Sha
+import Numeric
+import System.Posix.Types
+
{- Scans for files that are checked into git at the specified locations. -}
inRepo :: [FilePath] -> Repo -> IO ([FilePath], IO Bool)
inRepo l = pipeNullSplit $ Params "ls-files --cached -z --" : map File l
@@ -78,16 +81,16 @@ staged' ps l = pipeNullSplit $ prefix ++ ps ++ suffix
{- Returns details about files that are staged in the index,
- as well as files not yet in git. Skips ignored files. -}
-stagedOthersDetails :: [FilePath] -> Repo -> IO ([(FilePath, Maybe Sha)], IO Bool)
+stagedOthersDetails :: [FilePath] -> Repo -> IO ([(FilePath, Maybe Sha, Maybe FileMode)], IO Bool)
stagedOthersDetails = stagedDetails' [Params "--others --exclude-standard"]
{- Returns details about all files that are staged in the index. -}
-stagedDetails :: [FilePath] -> Repo -> IO ([(FilePath, Maybe Sha)], IO Bool)
+stagedDetails :: [FilePath] -> Repo -> IO ([(FilePath, Maybe Sha, Maybe FileMode)], IO Bool)
stagedDetails = stagedDetails' []
{- Gets details about staged files, including the Sha of their staged
- contents. -}
-stagedDetails' :: [CommandParam] -> [FilePath] -> Repo -> IO ([(FilePath, Maybe Sha)], IO Bool)
+stagedDetails' :: [CommandParam] -> [FilePath] -> Repo -> IO ([(FilePath, Maybe Sha, Maybe FileMode)], IO Bool)
stagedDetails' ps l repo = do
(ls, cleanup) <- pipeNullSplit params repo
return (map parse ls, cleanup)
@@ -95,10 +98,12 @@ stagedDetails' ps l repo = do
params = Params "ls-files --stage -z" : ps ++
Param "--" : map File l
parse s
- | null file = (s, Nothing)
- | otherwise = (file, extractSha $ take shaSize $ drop 7 metadata)
+ | null file = (s, Nothing, Nothing)
+ | otherwise = (file, extractSha $ take shaSize rest, readmode mode)
where
(metadata, file) = separate (== '\t') s
+ (mode, rest) = separate (== ' ') metadata
+ readmode = headMaybe . readOct >=*> fst
{- Returns a list of the files in the specified locations that are staged
- for commit, and whose type has changed. -}
diff --git a/Logs/Web.hs b/Logs/Web.hs
index 0239f2335..ede600ec2 100644
--- a/Logs/Web.hs
+++ b/Logs/Web.hs
@@ -18,6 +18,7 @@ module Logs.Web (
) where
import qualified Data.ByteString.Lazy.Char8 as L
+import Data.Tuple.Utils
import Common.Annex
import Logs
@@ -70,7 +71,7 @@ knownUrls = do
Annex.Branch.withIndex $ do
top <- fromRepo Git.repoPath
(l, cleanup) <- inRepo $ Git.LsFiles.stagedDetails [top]
- r <- mapM (geturls . snd) $ filter (isUrlLog . fst) l
+ r <- mapM (geturls . snd3) $ filter (isUrlLog . fst3) l
void $ liftIO cleanup
return $ concat r
where
diff --git a/debian/changelog b/debian/changelog
index 1fd137518..6cc20e412 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -18,6 +18,8 @@ git-annex (4.20130912) UNRELEASED; urgency=low
numcopies levels. (--fast avoids calculating these)
* gcrypt: Ensure that signing key is set to one of the participants keys.
* webapp: Show encryption information when editing a remote.
+ * sync, pre-commit, indirect: Avoid unnecessarily catting non-symlink
+ files from git, which can be so large it runs out of memory.
-- Joey Hess <joeyh@debian.org> Thu, 12 Sep 2013 12:14:46 -0400