From b93d5c83cb1b9089589dd08a854315b92a2533d0 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 1 Aug 2013 17:30:47 -0400 Subject: Slow and ugly work around for bug #718517 in git, which broke git-cat-file --batch for filenames containing spaces. This runs git-cat-file in non-batch mode for all files with spaces. If a directory tree has a lot of them, and is in direct mode, even "git annex add" when there are few new files will need a *lot* of forks! The only reason buffering the whole file content to get the sha is not a memory leak is that git-annex only ever uses this on symlinks. This needs to be reverted as soon as a fix is available in git! --- Git/CatFile.hs | 51 +++++++++++++++++++++++++++++++++++++++++---------- Utility/Process.hs | 6 +++++- debian/changelog | 2 ++ 3 files changed, 48 insertions(+), 11 deletions(-) diff --git a/Git/CatFile.hs b/Git/CatFile.hs index d95972393..f779e99c6 100644 --- a/Git/CatFile.hs +++ b/Git/CatFile.hs @@ -1,6 +1,6 @@ {- git cat-file interface - - - Copyright 2011 Joey Hess + - Copyright 2011, 2013 Joey Hess - - Licensed under the GNU GPL version 3 or higher. -} @@ -17,6 +17,9 @@ module Git.CatFile ( import System.IO import qualified Data.ByteString as S import qualified Data.ByteString.Lazy as L +import Data.Digest.Pure.SHA +import Data.Char +import System.Process (std_out, std_err) import Common import Git @@ -26,16 +29,18 @@ import Git.Types import Git.FilePath import qualified Utility.CoProcess as CoProcess -type CatFileHandle = CoProcess.CoProcessHandle +data CatFileHandle = CatFileHandle CoProcess.CoProcessHandle Repo catFileStart :: Repo -> IO CatFileHandle -catFileStart = CoProcess.rawMode <=< gitCoProcessStart True - [ Param "cat-file" - , Param "--batch" - ] +catFileStart repo = do + coprocess <- CoProcess.rawMode =<< gitCoProcessStart True + [ Param "cat-file" + , Param "--batch" + ] repo + return $ CatFileHandle coprocess repo catFileStop :: CatFileHandle -> IO () -catFileStop = CoProcess.stop +catFileStop (CatFileHandle p _) = CoProcess.stop p {- Reads a file from a specified branch. -} catFile :: CatFileHandle -> Branch -> FilePath -> IO L.ByteString @@ -49,9 +54,10 @@ catObject h object = maybe L.empty fst <$> catObjectDetails h object {- Gets both the content of an object, and its Sha. -} catObjectDetails :: CatFileHandle -> Ref -> IO (Maybe (L.ByteString, Sha)) -catObjectDetails h object = CoProcess.query h send receive +catObjectDetails (CatFileHandle hdl repo) object = CoProcess.query hdl send receive where - send to = hPutStrLn to $ show object + query = show object + send to = hPutStrLn to query receive from = do header <- hGetLine from case words header of @@ -64,7 +70,10 @@ catObjectDetails h object = CoProcess.query h send receive | otherwise -> dne _ | header == show object ++ " missing" -> dne - | otherwise -> error $ "unknown response from git cat-file " ++ show (header, object) + | otherwise -> + if any isSpace query + then fallback + else error $ "unknown response from git cat-file " ++ show (header, object) readcontent bytes from sha = do content <- S.hGet from bytes eatchar '\n' from @@ -74,3 +83,25 @@ catObjectDetails h object = CoProcess.query h send receive c <- hGetChar from when (c /= expected) $ error $ "missing " ++ (show expected) ++ " from git cat-file" + + {- Work around a bug in git 1.8.4 rc0 which broke it for filenames + - containing spaces. http://bugs.debian.org/718517 + - Slow! Also can use a lot of memory, if the object is large. -} + fallback = do + let p = gitCreateProcess + [ Param "cat-file" + , Param "-p" + , Param query + ] repo + (_, Just h, _, pid) <- withNullHandle $ \null -> + createProcess p + { std_out = CreatePipe + , std_err = UseHandle null + } + fileEncoding h + content <- L.hGetContents h + let sha = (\s -> length s `seq` s) (showDigest $ sha1 content) + ok <- checkSuccessProcess pid + return $ if ok + then Just (content, Ref sha) + else Nothing diff --git a/Utility/Process.hs b/Utility/Process.hs index 3a8677fa3..590124289 100644 --- a/Utility/Process.hs +++ b/Utility/Process.hs @@ -25,6 +25,7 @@ module Utility.Process ( withHandle, withBothHandles, withQuietOutput, + withNullHandle, createProcess, startInteractiveProcess, stdinHandle, @@ -241,12 +242,15 @@ withQuietOutput :: CreateProcessRunner -> CreateProcess -> IO () -withQuietOutput creator p = withFile devnull WriteMode $ \nullh -> do +withQuietOutput creator p = withNullHandle $ \nullh -> do let p' = p { std_out = UseHandle nullh , std_err = UseHandle nullh } creator p' $ const $ return () + +withNullHandle :: (Handle -> IO a) -> IO a +withNullHandle = withFile devnull WriteMode where #ifndef mingw32_HOST_OS devnull = "/dev/null" diff --git a/debian/changelog b/debian/changelog index b5d1211b7..193f0c4e6 100644 --- a/debian/changelog +++ b/debian/changelog @@ -39,6 +39,8 @@ git-annex (4.20130724) UNRELEASED; urgency=low * find: Avoid polluting stdout with progress messages. Closes: #718186 * Escape ':' in file/directory names to avoid it being treated as a pathspec by some git commands. Closes: #718185 + * Slow and ugly work around for bug #718517 in git, which broke + git-cat-file --batch for filenames containing spaces. -- Joey Hess Tue, 23 Jul 2013 12:39:48 -0400 -- cgit v1.2.3