diff options
76 files changed, 2395 insertions, 897 deletions
@@ -60,6 +60,7 @@ import Types.NumCopies import Types.LockCache import Types.DesktopNotify import Types.CleanupActions +import qualified Database.Keys.Handle as Keys #ifdef WITH_QUVI import Utility.Quvi (QuviVersion) #endif @@ -134,6 +135,7 @@ data AnnexState = AnnexState , desktopnotify :: DesktopNotify , workers :: [Either AnnexState (Async AnnexState)] , concurrentjobs :: Maybe Int + , keysdbhandle :: Maybe Keys.DbHandle } newState :: GitConfig -> Git.Repo -> AnnexState @@ -179,6 +181,7 @@ newState c r = AnnexState , desktopnotify = mempty , workers = [] , concurrentjobs = Nothing + , keysdbhandle = Nothing } {- Makes an Annex state object for the specified git repo. @@ -193,25 +196,32 @@ new r = do {- Performs an action in the Annex monad from a starting state, - returning a new state. -} run :: AnnexState -> Annex a -> IO (a, AnnexState) -run s a = do - mvar <- newMVar s +run s a = flip run' a =<< newMVar s + +run' :: MVar AnnexState -> Annex a -> IO (a, AnnexState) +run' mvar a = do r <- runReaderT (runAnnex a) mvar + `onException` (flush =<< readMVar mvar) s' <- takeMVar mvar + flush s' return (r, s') + where + flush = maybe noop Keys.flushDbQueue . keysdbhandle {- Performs an action in the Annex monad from a starting state, - and throws away the new state. -} eval :: AnnexState -> Annex a -> IO a -eval s a = do - mvar <- newMVar s - runReaderT (runAnnex a) mvar +eval s a = fst <$> run s a {- Makes a runner action, that allows diving into IO and from inside - the IO action, running an Annex action. -} makeRunner :: Annex (Annex a -> IO a) makeRunner = do mvar <- ask - return $ \a -> runReaderT (runAnnex a) mvar + return $ \a -> do + (r, s) <- run' mvar a + putMVar mvar s + return r getState :: (AnnexState -> v) -> Annex v getState selector = do diff --git a/Annex/AutoMerge.hs b/Annex/AutoMerge.hs index bfbe71dc2..c32c3f66a 100644 --- a/Annex/AutoMerge.hs +++ b/Annex/AutoMerge.hs @@ -25,7 +25,6 @@ import qualified Git.Branch import Git.Types (BlobType(..)) import Config import Annex.ReplaceFile -import Git.FileMode import Annex.VariantFile import qualified Data.Set as S @@ -135,7 +134,7 @@ resolveMerge' (Just us) them u = do | select (LsFiles.unmergedBlobType u) == Just SymlinkBlob = case select' (LsFiles.unmergedSha u) of Nothing -> return Nothing - Just sha -> catKey sha symLinkMode + Just sha -> catKey sha | otherwise = return Nothing makelink key = do @@ -174,7 +173,7 @@ resolveMerge' (Just us) them u = do case select' (LsFiles.unmergedSha u) of Nothing -> noop Just sha -> do - link <- catLink True sha + link <- catSymLinkTarget sha replacewithlink item link resolveby a = do diff --git a/Annex/CatFile.hs b/Annex/CatFile.hs index 179149844..aefccd424 100644 --- a/Annex/CatFile.hs +++ b/Annex/CatFile.hs @@ -1,6 +1,6 @@ {- git cat-file interface, with handle automatically stored in the Annex monad - - - Copyright 2011-2013 Joey Hess <id@joeyh.name> + - Copyright 2011-2015 Joey Hess <id@joeyh.name> - - Licensed under the GNU GPL version 3 or higher. -} @@ -16,7 +16,7 @@ module Annex.CatFile ( catKey, catKeyFile, catKeyFileHEAD, - catLink, + catSymLinkTarget, ) where import qualified Data.ByteString.Lazy as L @@ -29,8 +29,8 @@ import qualified Git.CatFile import qualified Annex import Git.Types import Git.FilePath -import Git.FileMode import qualified Git.Ref +import Annex.Link catFile :: Git.Branch -> FilePath -> Annex L.ByteString catFile branch file = do @@ -80,52 +80,17 @@ catFileStop = do (s { Annex.catfilehandles = M.empty }, Annex.catfilehandles s) liftIO $ mapM_ Git.CatFile.catFileStop (M.elems m) -{- From the Sha or Ref of a symlink back to the key. - - - - Requires a mode witness, to guarantee that the file is a symlink. - -} -catKey :: Ref -> FileMode -> Annex (Maybe Key) -catKey = catKey' True - -catKey' :: Bool -> Sha -> FileMode -> Annex (Maybe Key) -catKey' modeguaranteed sha mode - | isSymLink mode = do - l <- catLink modeguaranteed sha - return $ if isLinkToAnnex l - then fileKey $ takeFileName l - else Nothing - | otherwise = return Nothing +{- From ref to a symlink or a pointer file, get the key. -} +catKey :: Ref -> Annex (Maybe Key) +catKey ref = parseLinkOrPointer <$> catObject ref {- Gets a symlink target. -} -catLink :: Bool -> Sha -> Annex String -catLink modeguaranteed sha = fromInternalGitPath . decodeBS <$> get - where - -- If the mode is not guaranteed to be correct, avoid - -- buffering the whole file content, which might be large. - -- 8192 is enough if it really is a symlink. - get - | modeguaranteed = catObject sha - | otherwise = L.take 8192 <$> catObject sha - -{- Looks up the key corresponding to the Ref using the running cat-file. - - - - Currently this always has to look in HEAD, because cat-file --batch - - does not offer a way to specify that we want to look up a tree object - - in the index. So if the index has a file staged not as a symlink, - - and it is a symlink in head, the wrong mode is gotten. - - Also, we have to assume the file is a symlink if it's not yet committed - - to HEAD. For these reasons, modeguaranteed is not set. - -} -catKeyChecked :: Bool -> Ref -> Annex (Maybe Key) -catKeyChecked needhead ref@(Ref r) = - catKey' False ref =<< findmode <$> catTree treeref +catSymLinkTarget :: Sha -> Annex String +catSymLinkTarget sha = fromInternalGitPath . decodeBS <$> get where - pathparts = split "/" r - dir = intercalate "/" $ take (length pathparts - 1) pathparts - file = fromMaybe "" $ lastMaybe pathparts - treeref = Ref $ if needhead then "HEAD" ++ dir ++ "/" else dir ++ "/" - findmode = fromMaybe symLinkMode . headMaybe . - map snd . filter (\p -> fst p == file) + -- Avoid buffering the whole file content, which might be large. + -- 8192 is enough if it really is a symlink or pointer file. + get = L.take 8192 <$> catObject sha {- From a file in the repository back to the key. - @@ -151,8 +116,8 @@ catKeyChecked needhead ref@(Ref r) = catKeyFile :: FilePath -> Annex (Maybe Key) catKeyFile f = ifM (Annex.getState Annex.daemon) ( catKeyFileHEAD f - , catKeyChecked True $ Git.Ref.fileRef f + , catKey $ Git.Ref.fileRef f ) catKeyFileHEAD :: FilePath -> Annex (Maybe Key) -catKeyFileHEAD f = catKeyChecked False $ Git.Ref.fileFromRef Git.Ref.headRef f +catKeyFileHEAD f = catKey $ Git.Ref.fileFromRef Git.Ref.headRef f diff --git a/Annex/Content.hs b/Annex/Content.hs index 60ffb8141..e501df072 100644 --- a/Annex/Content.hs +++ b/Annex/Content.hs @@ -1,6 +1,6 @@ {- git-annex file content managing - - - Copyright 2010-2014 Joey Hess <id@joeyh.name> + - Copyright 2010-2015 Joey Hess <id@joeyh.name> - - Licensed under the GNU GPL version 3 or higher. -} @@ -24,6 +24,12 @@ module Annex.Content ( withTmp, checkDiskSpace, moveAnnex, + populatePointerFile, + linkAnnex, + linkAnnex', + LinkAnnexResult(..), + unlinkAnnex, + checkedCopyFile, sendAnnex, prepSendAnnex, removeAnnex, @@ -38,6 +44,7 @@ module Annex.Content ( dirKeys, withObjectLoc, staleKeysPrune, + isUnmodified, ) where import System.IO.Unsafe (unsafeInterleaveIO) @@ -61,15 +68,19 @@ import Config import Git.SharedRepository import Annex.Perms import Annex.Link -import Annex.Content.Direct +import qualified Annex.Content.Direct as Direct import Annex.ReplaceFile import Annex.LockPool import Messages.Progress import qualified Types.Remote import qualified Types.Backend import qualified Backend +import qualified Database.Keys import Types.NumCopies import Annex.UUID +import Annex.InodeSentinal +import Utility.InodeCache +import Utility.PosixFiles {- Checks if a given key's content is currently present. -} inAnnex :: Key -> Annex Bool @@ -79,7 +90,10 @@ inAnnex key = inAnnexCheck key $ liftIO . doesFileExist inAnnexCheck :: Key -> (FilePath -> Annex Bool) -> Annex Bool inAnnexCheck key check = inAnnex' id False check key -{- Generic inAnnex, handling both indirect and direct mode. +{- inAnnex that performs an arbitrary check of the key's content. + - + - When the content is unlocked, it must also be unmodified, or the bad + - value will be returned. - - In direct mode, at least one of the associated files must pass the - check. Additionally, the file must be unmodified. @@ -88,14 +102,22 @@ inAnnex' :: (a -> Bool) -> a -> (FilePath -> Annex a) -> Key -> Annex a inAnnex' isgood bad check key = withObjectLoc key checkindirect checkdirect where checkindirect loc = do - whenM (fromRepo Git.repoIsUrl) $ - error "inAnnex cannot check remote repo" - check loc + r <- check loc + if isgood r + then do + cache <- Database.Keys.getInodeCaches key + if null cache + then return r + else ifM (sameInodeCache loc cache) + ( return r + , return bad + ) + else return bad checkdirect [] = return bad checkdirect (loc:locs) = do r <- check loc if isgood r - then ifM (goodContent key loc) + then ifM (Direct.goodContent key loc) ( return r , checkdirect locs ) @@ -371,7 +393,7 @@ withTmp key action = do return res {- Checks that there is disk space available to store a given key, - - in a destination (or the annex) printing a warning if not. + - in a destination directory (or the annex) printing a warning if not. - - If the destination is on the same filesystem as the annex, - checks for any other running downloads, removing the amount of data still @@ -379,7 +401,12 @@ withTmp key action = do - when doing concurrent downloads. -} checkDiskSpace :: Maybe FilePath -> Key -> Integer -> Bool -> Annex Bool -checkDiskSpace destdir key alreadythere samefilesystem = ifM (Annex.getState Annex.force) +checkDiskSpace destdir key = checkDiskSpace' (fromMaybe 1 (keySize key)) destdir key + +{- Allows specifying the size of the key, if it's known, which is useful + - as not all keys know their size. -} +checkDiskSpace' :: Integer -> Maybe FilePath -> Key -> Integer -> Bool -> Annex Bool +checkDiskSpace' need destdir key alreadythere samefilesystem = ifM (Annex.getState Annex.force) ( return True , do -- We can't get inprogress and free at the same @@ -392,8 +419,8 @@ checkDiskSpace destdir key alreadythere samefilesystem = ifM (Annex.getState Ann then sizeOfDownloadsInProgress (/= key) else pure 0 free <- liftIO . getDiskFree =<< dir - case (free, fromMaybe 1 (keySize key)) of - (Just have, need) -> do + case free of + Just have -> do reserve <- annexDiskReserve <$> Annex.getGitConfig let delta = need + reserve - have - alreadythere + inprogress let ok = delta <= 0 @@ -412,7 +439,10 @@ checkDiskSpace destdir key alreadythere samefilesystem = ifM (Annex.getState Ann {- Moves a key's content into .git/annex/objects/ - - - In direct mode, moves it to the associated file, or files. + - When a key has associated pointer files, the object is hard + - linked (or copied) to the files, and the object file is left thawed. + + - In direct mode, moves the object file to the associated file, or files. - - What if the key there already has content? This could happen for - various reasons; perhaps the same content is being annexed again. @@ -440,7 +470,12 @@ moveAnnex key src = withObjectLoc key storeobject storedirect ( alreadyhave , modifyContent dest $ do liftIO $ moveFile src dest - freezeContent dest + fs <- Database.Keys.getAssociatedFiles key + if null fs + then freezeContent dest + else do + mapM_ (populatePointerFile key dest) fs + Database.Keys.storeInodeCaches key (dest:fs) ) storeindirect = storeobject =<< calcRepo (gitAnnexLocation key) @@ -458,21 +493,116 @@ moveAnnex key src = withObjectLoc key storeobject storedirect v <- isAnnexLink f if Just key == v then do - updateInodeCache key src + Direct.updateInodeCache key src replaceFile f $ liftIO . moveFile src chmodContent f forM_ fs $ - addContentWhenNotPresent key f - else ifM (goodContent key f) + Direct.addContentWhenNotPresent key f + else ifM (Direct.goodContent key f) ( storedirect' alreadyhave fs , storedirect' fallback fs ) alreadyhave = liftIO $ removeFile src +populatePointerFile :: Key -> FilePath -> FilePath -> Annex () +populatePointerFile k obj f = go =<< isPointerFile f + where + go (Just k') | k == k' = do + liftIO $ nukeFile f + unlessM (linkAnnex'' k obj f) $ + liftIO $ writeFile f (formatPointer k) + go _ = return () + +{- Hard links a file into .git/annex/objects/, falling back to a copy + - if necessary. Does nothing if the object file already exists. + - + - Does not lock down the hard linked object, so that the user can modify + - the source file. So, adding an object to the annex this way can + - prevent losing the content if the source file is deleted, but does not + - guard against modifications. + -} +linkAnnex :: Key -> FilePath -> Maybe InodeCache -> Annex LinkAnnexResult +linkAnnex key src srcic = do + dest <- calcRepo (gitAnnexLocation key) + modifyContent dest $ linkAnnex' key src srcic dest + +{- Hard links (or copies) src to dest, one of which should be the + - annex object. Updates inode cache for src and for dest when it's + - changed. -} +linkAnnex' :: Key -> FilePath -> Maybe InodeCache -> FilePath -> Annex LinkAnnexResult +linkAnnex' _ _ Nothing _ = return LinkAnnexFailed +linkAnnex' key src (Just srcic) dest = + ifM (liftIO $ doesFileExist dest) + ( do + Database.Keys.addInodeCaches key [srcic] + return LinkAnnexNoop + , ifM (linkAnnex'' key src dest) + ( do + thawContent dest + -- src could have changed while being copied + -- to dest + mcache <- withTSDelta (liftIO . genInodeCache src) + case mcache of + Just srcic' | compareStrong srcic srcic' -> do + destic <- withTSDelta (liftIO . genInodeCache dest) + Database.Keys.addInodeCaches key $ + catMaybes [destic, Just srcic] + return LinkAnnexOk + _ -> do + liftIO $ nukeFile dest + failed + , failed + ) + ) + where + failed = do + Database.Keys.addInodeCaches key [srcic] + return LinkAnnexFailed + +data LinkAnnexResult = LinkAnnexOk | LinkAnnexFailed | LinkAnnexNoop + +{- Hard links or copies src to dest. Only uses a hard link if src + - is not already hardlinked to elsewhere. Checks disk reserve before + - copying, and will fail if not enough space, or if the dest file + - already exists. -} +linkAnnex'' :: Key -> FilePath -> FilePath -> Annex Bool +linkAnnex'' key src dest = catchBoolIO $ do + s <- liftIO $ getFileStatus src + let copy = checkedCopyFile' key src dest s +#ifndef mingw32_HOST_OS + if linkCount s > 1 + then copy + else liftIO (createLink src dest >> return True) + `catchIO` const copy +#else + copy +#endif + +{- Removes the annex object file for a key. Lowlevel. -} +unlinkAnnex :: Key -> Annex () +unlinkAnnex key = do + obj <- calcRepo $ gitAnnexLocation key + modifyContent obj $ do + secureErase obj + liftIO $ nukeFile obj + +{- Checks disk space before copying. -} +checkedCopyFile :: Key -> FilePath -> FilePath -> Annex Bool +checkedCopyFile key src dest = catchBoolIO $ + checkedCopyFile' key src dest + =<< liftIO (getFileStatus src) + +checkedCopyFile' :: Key -> FilePath -> FilePath -> FileStatus -> Annex Bool +checkedCopyFile' key src dest s = catchBoolIO $ + ifM (checkDiskSpace' (fromIntegral $ fileSize s) (Just $ takeDirectory dest) key 0 True) + ( liftIO $ copyFileExternal CopyAllMetaData src dest + , return False + ) + {- Runs an action to transfer an object's content. - - - In direct mode, it's possible for the file to change as it's being sent. + - In some cases, it's possible for the file to change as it's being sent. - If this happens, runs the rollback action and returns False. The - rollback action should remove the data that was transferred. -} @@ -492,8 +622,9 @@ sendAnnex key rollback sendobject = go =<< prepSendAnnex key {- Returns a file that contains an object's content, - and a check to run after the transfer is complete. - - - In direct mode, it's possible for the file to change as it's being sent, - - and the check detects this case and returns False. + - When a file is unlocked (or in direct mode), it's possble for its + - content to change as it's being sent. The check detects this case + - and returns False. - - Note that the returned check action is, in some cases, run in the - Annex monad of the remote that is receiving the object, rather than @@ -502,10 +633,23 @@ sendAnnex key rollback sendobject = go =<< prepSendAnnex key prepSendAnnex :: Key -> Annex (Maybe (FilePath, Annex Bool)) prepSendAnnex key = withObjectLoc key indirect direct where - indirect f = return $ Just (f, return True) + indirect f = do + cache <- Database.Keys.getInodeCaches key + cache' <- if null cache + -- Since no inode cache is in the database, this + -- object is not currently unlocked. But that could + -- change while the transfer is in progress, so + -- generate an inode cache for the starting + -- content. + then maybeToList <$> + withTSDelta (liftIO . genInodeCache f) + else pure cache + return $ if null cache' + then Nothing + else Just (f, sameInodeCache f cache') direct [] = return Nothing direct (f:fs) = do - cache <- recordedInodeCache key + cache <- Direct.recordedInodeCache key -- check that we have a good file ifM (sameInodeCache f cache) ( return $ Just (f, sameInodeCache f cache) @@ -520,7 +664,7 @@ prepSendAnnex key = withObjectLoc key indirect direct withObjectLoc :: Key -> (FilePath -> Annex a) -> ([FilePath] -> Annex a) -> Annex a withObjectLoc key indirect direct = ifM isDirect ( do - fs <- associatedFiles key + fs <- Direct.associatedFiles key if null fs then goindirect else direct fs @@ -544,6 +688,9 @@ cleanObjectLoc key cleaner = do {- Removes a key's file from .git/annex/objects/ - + - When a key has associated pointer files, they are checked for + - modifications, and if unmodified, are reset. + - - In direct mode, deletes the associated files or files, and replaces - them with symlinks. -} @@ -553,16 +700,50 @@ removeAnnex (ContentRemovalLock key) = withObjectLoc key remove removedirect remove file = cleanObjectLoc key $ do secureErase file liftIO $ nukeFile file - removeInodeCache key + mapM_ (void . tryIO . resetpointer) + =<< Database.Keys.getAssociatedFiles key + Database.Keys.removeInodeCaches key + Direct.removeInodeCache key + resetpointer file = ifM (isUnmodified key file) + ( do + secureErase file + liftIO $ nukeFile file + liftIO $ writeFile file (formatPointer key) + -- Can't delete the pointer file. + -- If it was a hard link to the annex object, + -- that object might have been frozen as part of the + -- removal process, so thaw it. + , void $ tryIO $ thawContent file + ) removedirect fs = do - cache <- recordedInodeCache key - removeInodeCache key + cache <- Direct.recordedInodeCache key + Direct.removeInodeCache key mapM_ (resetfile cache) fs - resetfile cache f = whenM (sameInodeCache f cache) $ do + resetfile cache f = whenM (Direct.sameInodeCache f cache) $ do l <- calcRepo $ gitAnnexLink f key secureErase f replaceFile f $ makeAnnexLink l +{- Check if a file contains the unmodified content of the key. + - + - The expensive way to tell is to do a verification of its content. + - The cheaper way is to see if the InodeCache for the key matches the + - file. -} +isUnmodified :: Key -> FilePath -> Annex Bool +isUnmodified key f = go =<< geti + where + go Nothing = return False + go (Just fc) = cheapcheck fc <||> expensivecheck fc + cheapcheck fc = anyM (compareInodeCaches fc) + =<< Database.Keys.getInodeCaches key + expensivecheck fc = ifM (verifyKeyContent AlwaysVerify Types.Remote.UnVerified key f) + -- The file could have been modified while it was + -- being verified. Detect that. + ( geti >>= maybe (return False) (compareInodeCaches fc) + , return False + ) + geti = withTSDelta (liftIO . genInodeCache f) + {- Runs the secure erase command if set, otherwise does nothing. - File may or may not be deleted at the end; caller is responsible for - making sure it's deleted. -} @@ -586,13 +767,14 @@ moveBad key = do logStatus key InfoMissing return dest -data KeyLocation = InAnnex | InRepository +data KeyLocation = InAnnex | InRepository | InAnywhere {- List of keys whose content exists in the specified location. - - InAnnex only lists keys under .git/annex/objects, - - while InRepository, in direct mode, also finds keys located in the - - work tree. + - InAnnex only lists keys with content in .git/annex/objects, + - while InRepository, in direct mode, also finds keys with content + - in the work tree. InAnywhere lists all keys that have directories + - in .git/annex/objects, whether or not the content is present. - - Note that InRepository has to check whether direct mode files - have goodContent. @@ -621,6 +803,11 @@ getKeysPresent keyloc = do morekeys <- unsafeInterleaveIO a continue (morekeys++keys) as + inanywhere = case keyloc of + InAnywhere -> True + _ -> False + + present _ _ _ | inanywhere = pure True present _ False d = presentInAnnex d present s True d = presentDirect s d <||> presentInAnnex d @@ -632,7 +819,8 @@ getKeysPresent keyloc = do InRepository -> case fileKey (takeFileName d) of Nothing -> return False Just k -> Annex.eval s $ - anyM (goodContent k) =<< associatedFiles k + anyM (Direct.goodContent k) =<< Direct.associatedFiles k + InAnywhere -> return True {- In order to run Annex monad actions within unsafeInterleaveIO, - the current state is taken and reused. No changes made to this diff --git a/Annex/Content/Direct.hs b/Annex/Content/Direct.hs index 86e053d7f..3b9d1aea2 100644 --- a/Annex/Content/Direct.hs +++ b/Annex/Content/Direct.hs @@ -1,12 +1,13 @@ {- git-annex file content managing for direct mode - + - This is deprecated, and will be removed when direct mode gets removed + - from git-annex. + - - Copyright 2012-2014 Joey Hess <id@joeyh.name> - - Licensed under the GNU GPL version 3 or higher. -} -{-# LANGUAGE CPP #-} - module Annex.Content.Direct ( associatedFiles, associatedFilesRelative, @@ -20,21 +21,15 @@ module Annex.Content.Direct ( addInodeCache, writeInodeCache, compareInodeCaches, - compareInodeCachesWith, sameInodeCache, elemInodeCaches, sameFileStatus, removeInodeCache, toInodeCache, - inodesChanged, - createInodeSentinalFile, addContentWhenNotPresent, - withTSDelta, - getTSDelta, ) where import Common.Annex -import qualified Annex import Annex.Perms import qualified Git import Utility.Tmp @@ -43,6 +38,7 @@ import Utility.InodeCache import Utility.CopyFile import Annex.ReplaceFile import Annex.Link +import Annex.InodeSentinal {- Absolute FilePaths of Files in the tree that are associated with a key. -} associatedFiles :: Key -> Annex [FilePath] @@ -165,14 +161,6 @@ removeInodeCache key = withInodeCacheFile key $ \f -> withInodeCacheFile :: Key -> (FilePath -> Annex a) -> Annex a withInodeCacheFile key a = a =<< calcRepo (gitAnnexInodeCache key) -{- Checks if a InodeCache matches the current version of a file. -} -sameInodeCache :: FilePath -> [InodeCache] -> Annex Bool -sameInodeCache _ [] = return False -sameInodeCache file old = go =<< withTSDelta (liftIO . genInodeCache file) - where - go Nothing = return False - go (Just curr) = elemInodeCaches curr old - {- Checks if a FileStatus matches the recorded InodeCache of a file. -} sameFileStatus :: Key -> FilePath -> FileStatus -> Annex Bool sameFileStatus key f status = do @@ -183,25 +171,6 @@ sameFileStatus key f status = do ([], Nothing) -> return True _ -> return False -{- If the inodes have changed, only the size and mtime are compared. -} -compareInodeCaches :: InodeCache -> InodeCache -> Annex Bool -compareInodeCaches x y - | compareStrong x y = return True - | otherwise = ifM inodesChanged - ( return $ compareWeak x y - , return False - ) - -elemInodeCaches :: InodeCache -> [InodeCache] -> Annex Bool -elemInodeCaches _ [] = return False -elemInodeCaches c (l:ls) = ifM (compareInodeCaches c l) - ( return True - , elemInodeCaches c ls - ) - -compareInodeCachesWith :: Annex InodeComparisonType -compareInodeCachesWith = ifM inodesChanged ( return Weakly, return Strongly ) - {- Copies the contentfile to the associated file, if the associated - file has no content. If the associated file does have content, - even if the content differs, it's left unchanged. -} @@ -212,52 +181,3 @@ addContentWhenNotPresent key contentfile associatedfile = do replaceFile associatedfile $ liftIO . void . copyFileExternal CopyAllMetaData contentfile updateInodeCache key associatedfile - -{- Some filesystems get new inodes each time they are mounted. - - In order to work on such a filesystem, a sentinal file is used to detect - - when the inodes have changed. - - - - If the sentinal file does not exist, we have to assume that the - - inodes have changed. - -} -inodesChanged :: Annex Bool -inodesChanged = sentinalInodesChanged <$> sentinalStatus - -withTSDelta :: (TSDelta -> Annex a) -> Annex a -withTSDelta a = a =<< getTSDelta - -getTSDelta :: Annex TSDelta -#ifdef mingw32_HOST_OS -getTSDelta = sentinalTSDelta <$> sentinalStatus -#else -getTSDelta = pure noTSDelta -- optimisation -#endif - -sentinalStatus :: Annex SentinalStatus -sentinalStatus = maybe check return =<< Annex.getState Annex.sentinalstatus - where - check = do - sc <- liftIO . checkSentinalFile =<< annexSentinalFile - Annex.changeState $ \s -> s { Annex.sentinalstatus = Just sc } - return sc - -{- The sentinal file is only created when first initializing a repository. - - If there are any annexed objects in the repository already, creating - - the file would invalidate their inode caches. -} -createInodeSentinalFile :: Annex () -createInodeSentinalFile = unlessM (alreadyexists <||> hasobjects) $ do - s <- annexSentinalFile - createAnnexDirectory (parentDir (sentinalFile s)) - liftIO $ writeSentinalFile s - where - alreadyexists = liftIO. sentinalFileExists =<< annexSentinalFile - hasobjects = liftIO . doesDirectoryExist =<< fromRepo gitAnnexObjectDir - -annexSentinalFile :: Annex SentinalFile -annexSentinalFile = do - sentinalfile <- fromRepo gitAnnexInodeSentinal - sentinalcachefile <- fromRepo gitAnnexInodeSentinalCache - return SentinalFile - { sentinalFile = sentinalfile - , sentinalCacheFile = sentinalcachefile - } diff --git a/Annex/Direct.hs b/Annex/Direct.hs index 495ff5e75..8c3d5bb56 100644 --- a/Annex/Direct.hs +++ b/Annex/Direct.hs @@ -1,5 +1,8 @@ {- git-annex direct mode - + - This is deprecated, and will be removed when direct mode gets removed + - from git-annex. + - - Copyright 2012-2014 Joey Hess <id@joeyh.name> - - Licensed under the GNU GPL version 3 or higher. @@ -36,6 +39,7 @@ import Annex.VariantFile import Git.Index import Annex.Index import Annex.LockFile +import Annex.InodeSentinal {- Uses git ls-files to find files that need to be committed, and stages - them into the index. Returns True if some changes were staged. -} @@ -53,8 +57,8 @@ stageDirect = do {- Determine what kind of modified or deleted file this is, as - efficiently as we can, by getting any key that's associated - with it in git, as well as its stat info. -} - go (file, Just sha, Just mode) = withTSDelta $ \delta -> do - shakey <- catKey sha mode + go (file, Just sha, Just _mode) = withTSDelta $ \delta -> do + shakey <- catKey sha mstat <- liftIO $ catchMaybeIO $ getSymbolicLinkStatus file mcache <- liftIO $ maybe (pure Nothing) (toInodeCache delta file) mstat filekey <- isAnnexLink file @@ -107,8 +111,8 @@ preCommitDirect = do withkey (DiffTree.srcsha diff) (DiffTree.srcmode diff) removeAssociatedFile withkey (DiffTree.dstsha diff) (DiffTree.dstmode diff) addAssociatedFile where - withkey sha mode a = when (sha /= nullSha) $ do - k <- catKey sha mode + withkey sha _mode a = when (sha /= nullSha) $ do + k <- catKey sha case k of Nothing -> noop Just key -> void $ a key $ @@ -256,16 +260,16 @@ updateWorkTree d oldref force = do makeabs <- flip fromTopFilePath <$> gitRepo let fsitems = zip (map (makeabs . DiffTree.file) items) items forM_ fsitems $ - go makeabs DiffTree.srcsha DiffTree.srcmode moveout moveout_raw + go makeabs DiffTree.srcsha moveout moveout_raw forM_ fsitems $ - go makeabs DiffTree.dstsha DiffTree.dstmode movein movein_raw + go makeabs DiffTree.dstsha movein movein_raw void $ liftIO cleanup where - go makeabs getsha getmode a araw (f, item) + go makeabs getsha a araw (f, item) | getsha item == nullSha = noop | otherwise = void $ tryNonAsync . maybe (araw item makeabs f) (\k -> void $ a item makeabs k f) - =<< catKey (getsha item) (getmode item) + =<< catKey (getsha item) moveout _ _ = removeDirect @@ -395,7 +399,7 @@ changedDirect oldk f = do whenM (pure (null locs) <&&> not <$> inAnnex oldk) $ logStatus oldk InfoMissing -{- Enable/disable direct mode. -} +{- Git config settings to enable/disable direct mode. -} setDirect :: Bool -> Annex () setDirect wantdirect = do if wantdirect diff --git a/Annex/FileMatcher.hs b/Annex/FileMatcher.hs index 8b0db60ad..a008198f3 100644 --- a/Annex/FileMatcher.hs +++ b/Annex/FileMatcher.hs @@ -14,7 +14,6 @@ import Limit import Utility.Matcher import Types.Group import Logs.Group -import Logs.Remote import Annex.UUID import qualified Annex import Types.FileMatcher @@ -53,8 +52,8 @@ parsedToMatcher parsed = case partitionEithers parsed of ([], vs) -> Right $ generate vs (es, _) -> Left $ unwords $ map ("Parse failure: " ++) es -exprParser :: FileMatcher Annex -> FileMatcher Annex -> GroupMap -> M.Map UUID RemoteConfig -> Maybe UUID -> String -> [Either String (Token (MatchFiles Annex))] -exprParser matchstandard matchgroupwanted groupmap configmap mu expr = +exprParser :: FileMatcher Annex -> FileMatcher Annex -> Annex GroupMap -> M.Map UUID RemoteConfig -> Maybe UUID -> String -> [Either String (Token (MatchFiles Annex))] +exprParser matchstandard matchgroupwanted getgroupmap configmap mu expr = map parse $ tokenizeMatcher expr where parse = parseToken @@ -62,12 +61,12 @@ exprParser matchstandard matchgroupwanted groupmap configmap mu expr = matchgroupwanted (limitPresent mu) (limitInDir preferreddir) - groupmap + getgroupmap preferreddir = fromMaybe "public" $ M.lookup "preferreddir" =<< (`M.lookup` configmap) =<< mu -parseToken :: FileMatcher Annex -> FileMatcher Annex -> MkLimit Annex -> MkLimit Annex -> GroupMap -> String -> Either String (Token (MatchFiles Annex)) -parseToken matchstandard matchgroupwanted checkpresent checkpreferreddir groupmap t +parseToken :: FileMatcher Annex -> FileMatcher Annex -> MkLimit Annex -> MkLimit Annex -> Annex GroupMap -> String -> Either String (Token (MatchFiles Annex)) +parseToken matchstandard matchgroupwanted checkpresent checkpreferreddir getgroupmap t | t `elem` tokens = Right $ token t | t == "standard" = call matchstandard | t == "groupwanted" = call matchgroupwanted @@ -86,7 +85,7 @@ parseToken matchstandard matchgroupwanted checkpresent checkpreferreddir groupma , ("largerthan", limitSize (>)) , ("smallerthan", limitSize (<)) , ("metadata", limitMetaData) - , ("inallgroup", limitInAllGroup groupmap) + , ("inallgroup", limitInAllGroup getgroupmap) ] where (k, v) = separate (== '=') t @@ -109,9 +108,12 @@ largeFilesMatcher = go =<< annexLargeFiles <$> Annex.getGitConfig where go Nothing = return matchAll go (Just expr) = do - gm <- groupMap - rc <- readRemoteLog u <- getUUID + -- No need to read remote configs, that's only needed for + -- inpreferreddir, which is used in preferred content + -- expressions but does not make sense in the + -- annex.largefiles expression. + let emptyconfig = M.empty either badexpr return $ - parsedToMatcher $ exprParser matchAll matchAll gm rc (Just u) expr + parsedToMatcher $ exprParser matchAll matchAll groupMap emptyconfig (Just u) expr badexpr e = error $ "bad annex.largefiles configuration: " ++ e diff --git a/Annex/Ingest.hs b/Annex/Ingest.hs new file mode 100644 index 000000000..b2eb27616 --- /dev/null +++ b/Annex/Ingest.hs @@ -0,0 +1,289 @@ +{- git-annex content ingestion + - + - Copyright 2010-2015 Joey Hess <id@joeyh.name> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +{-# LANGUAGE CPP #-} + +module Annex.Ingest ( + LockedDown(..), + lockDown, + ingest, + finishIngestDirect, + finishIngestUnlocked, + cleanOldKeys, + addLink, + makeLink, + restoreFile, + forceParams, +) where + +import Common.Annex +import Types.KeySource +import Backend +import Annex.Content +import Annex.Content.Direct +import Annex.Perms +import Annex.Link +import Annex.MetaData +import Logs.Location +import qualified Annex +import qualified Annex.Queue +import qualified Database.Keys +import Config +import Utility.InodeCache +import Annex.ReplaceFile +import Utility.Tmp +import Utility.CopyFile +import Annex.InodeSentinal +#ifdef WITH_CLIBS +#ifndef __ANDROID__ +import Utility.Touch +#endif +#endif + +import Control.Exception (IOException) + +data LockedDown = LockedDown + { lockingFile :: Bool + , keySource :: KeySource + } + deriving (Show) + +{- The file that's being ingested is locked down before a key is generated, + - to prevent it from being modified in between. This lock down is not + - perfect at best (and pretty weak at worst). For example, it does not + - guard against files that are already opened for write by another process. + - So, the InodeCache can be used to detect any changes that might be made + - to the file after it was locked down. + - + - When possible, the file is hard linked to a temp directory. This guards + - against some changes, like deletion or overwrite of the file, and + - allows lsof checks to be done more efficiently when adding a lot of files. + - + - If lockingfile is True, the file is going to be added in locked mode. + - So, its write bit is removed as part of the lock down. + - + - Lockdown can fail if a file gets deleted, and Nothing will be returned. + -} +lockDown :: Bool -> FilePath -> Annex (Maybe LockedDown) +lockDown lockingfile file = either + (\e -> warning (show e) >> return Nothing) + (return . Just) + =<< lockDown' lockingfile file + +lockDown' :: Bool -> FilePath -> Annex (Either IOException LockedDown) +lockDown' lockingfile file = ifM crippledFileSystem + ( withTSDelta $ liftIO . tryIO . nohardlink + , tryIO $ do + tmp <- fromRepo gitAnnexTmpMiscDir + createAnnexDirectory tmp + when lockingfile $ + freezeContent file + withTSDelta $ \delta -> liftIO $ do + (tmpfile, h) <- openTempFile tmp $ + relatedTemplate $ takeFileName file + hClose h + nukeFile tmpfile + withhardlink delta tmpfile `catchIO` const (nohardlink delta) + ) + where + nohardlink delta = do + cache <- genInodeCache file delta + return $ LockedDown lockingfile $ KeySource + { keyFilename = file + , contentLocation = file + , inodeCache = cache + } + withhardlink delta tmpfile = do + createLink file tmpfile + cache <- genInodeCache tmpfile delta + return $ LockedDown lockingfile $ KeySource + { keyFilename = file + , contentLocation = tmpfile + , inodeCache = cache + } + +{- Ingests a locked down file into the annex. + - + - The file may be added to the git repository as a locked or an unlocked + - file. When unlocked, the work tree file is left alone. When locked, + - the work tree file is deleted, in preparation for adding the symlink. + -} +ingest :: Maybe LockedDown -> Annex (Maybe Key, Maybe InodeCache) +ingest Nothing = return (Nothing, Nothing) +ingest (Just (LockedDown lockingfile source)) = withTSDelta $ \delta -> do + backend <- chooseBackend $ keyFilename source + k <- genKey source backend + let src = contentLocation source + ms <- liftIO $ catchMaybeIO $ getFileStatus src + mcache <- maybe (pure Nothing) (liftIO . toInodeCache delta src) ms + case (mcache, inodeCache source) of + (_, Nothing) -> go k mcache ms + (Just newc, Just c) | compareStrong c newc -> go k mcache ms + _ -> failure "changed while it was being added" + where + go (Just (key, _)) mcache (Just s) + | lockingfile = golocked key mcache s + | otherwise = ifM isDirect + ( godirect key mcache s + , gounlocked key mcache s + ) + go _ _ _ = failure "failed to generate a key" + + golocked key mcache s = do + catchNonAsync (moveAnnex key $ contentLocation source) + (restoreFile (keyFilename source) key) + liftIO $ nukeFile $ keyFilename source + populateAssociatedFiles key source + success key mcache s + + gounlocked key (Just cache) s = do + -- Remove temp directory hard link first because + -- linkAnnex falls back to copying if a file + -- already has a hard link. + cleanCruft source + cleanOldKeys (keyFilename source) key + r <- linkAnnex key (keyFilename source) (Just cache) + case r of + LinkAnnexFailed -> failure "failed to link to annex" + _ -> do + finishIngestUnlocked' key source + success key (Just cache) s + gounlocked _ _ _ = failure "failed statting file" + + godirect key (Just cache) s = do + addInodeCache key cache + finishIngestDirect key source + success key (Just cache) s + godirect _ _ _ = failure "failed statting file" + + success k mcache s = do + genMetaData k (keyFilename source) s + return (Just k, mcache) + + failure msg = do + warning $ keyFilename source ++ " " ++ msg + cleanCruft source + return (Nothing, Nothing) + +finishIngestDirect :: Key -> KeySource -> Annex () +finishIngestDirect key source = do + void $ addAssociatedFile key $ keyFilename source + cleanCruft source + + {- Copy to any other locations using the same key. -} + otherfs <- filter (/= keyFilename source) <$> associatedFiles key + forM_ otherfs $ + addContentWhenNotPresent key (keyFilename source) + +finishIngestUnlocked :: Key -> KeySource -> Annex () +finishIngestUnlocked key source = do + cleanCruft source + finishIngestUnlocked' key source + +finishIngestUnlocked' :: Key -> KeySource -> Annex () +finishIngestUnlocked' key source = do + Database.Keys.addAssociatedFile key (keyFilename source) + populateAssociatedFiles key source + +{- Copy to any other locations using the same key. -} +populateAssociatedFiles :: Key -> KeySource -> Annex () +populateAssociatedFiles key source = do + otherfs <- filter (/= keyFilename source) <$> Database.Keys.getAssociatedFiles key + obj <- calcRepo (gitAnnexLocation key) + forM_ otherfs $ + populatePointerFile key obj + +cleanCruft :: KeySource -> Annex () +cleanCruft source = when (contentLocation source /= keyFilename source) $ + liftIO $ nukeFile $ contentLocation source + +-- If a worktree file was was hard linked to an annex object before, +-- modifying the file would have caused the object to have the wrong +-- content. Clean up from that. +cleanOldKeys :: FilePath -> Key -> Annex () +cleanOldKeys file newkey = do + oldkeys <- filter (/= newkey) + <$> Database.Keys.getAssociatedKey file + mapM_ go oldkeys + where + go key = do + obj <- calcRepo (gitAnnexLocation key) + caches <- Database.Keys.getInodeCaches key + unlessM (sameInodeCache obj caches) $ do + unlinkAnnex key + fs <- filter (/= file) + <$> Database.Keys.getAssociatedFiles key + fs' <- filterM (`sameInodeCache` caches) fs + case fs' of + -- If linkAnnex fails, the associated + -- file with the content is still present, + -- so no need for any recovery. + (f:_) -> do + ic <- withTSDelta (liftIO . genInodeCache f) + void $ linkAnnex key f ic + _ -> lostcontent + where + lostcontent = logStatus key InfoMissing + +{- On error, put the file back so it doesn't seem to have vanished. + - This can be called before or after the symlink is in place. -} +restoreFile :: FilePath -> Key -> SomeException -> Annex a +restoreFile file key e = do + whenM (inAnnex key) $ do + liftIO $ nukeFile file + -- The key could be used by other files too, so leave the + -- content in the annex, and make a copy back to the file. + obj <- calcRepo $ gitAnnexLocation key + unlessM (liftIO $ copyFileExternal CopyTimeStamps obj file) $ + warning $ "Unable to restore content of " ++ file ++ "; it should be located in " ++ obj + thawContent file + throwM e + +{- Creates the symlink to the annexed content, returns the link target. -} +makeLink :: FilePath -> Key -> Maybe InodeCache -> Annex String +makeLink file key mcache = flip catchNonAsync (restoreFile file key) $ do + l <- calcRepo $ gitAnnexLink file key + replaceFile file $ makeAnnexLink l + + -- touch symlink to have same time as the original file, + -- as provided in the InodeCache + case mcache of +#if defined(WITH_CLIBS) && ! defined(__ANDROID__) + Just c -> liftIO $ touch file (TimeSpec $ inodeCacheToMtime c) False +#else + Just _ -> noop +#endif + Nothing -> noop + + return l + +{- Creates the symlink to the annexed content, and stages it in git. + - + - As long as the filesystem supports symlinks, we use + - git add, rather than directly staging the symlink to git. + - Using git add is best because it allows the queuing to work + - and is faster (staging the symlink runs hash-object commands each time). + - Also, using git add allows it to skip gitignored files, unless forced + - to include them. + -} +addLink :: FilePath -> Key -> Maybe InodeCache -> Annex () +addLink file key mcache = ifM (coreSymlinks <$> Annex.getGitConfig) + ( do + _ <- makeLink file key mcache + ps <- forceParams + Annex.Queue.addCommand "add" (ps++[Param "--"]) [file] + , do + l <- makeLink file key mcache + addAnnexLink l file + ) + +{- Parameters to pass to git add, forcing addition of ignored files. -} +forceParams :: Annex [CommandParam] +forceParams = ifM (Annex.getState Annex.force) + ( return [Param "-f"] + , return [] + ) diff --git a/Annex/Init.hs b/Annex/Init.hs index 65e9aa474..99bb03e92 100644 --- a/Annex/Init.hs +++ b/Annex/Init.hs @@ -29,12 +29,12 @@ import Types.TrustLevel import Annex.Version import Annex.Difference import Annex.UUID +import Annex.Link import Config import Annex.Direct -import Annex.Content.Direct import Annex.Environment -import Backend import Annex.Hook +import Annex.InodeSentinal import Upgrade #ifndef mingw32_HOST_OS import Utility.UserInfo @@ -57,8 +57,8 @@ genDescription Nothing = do return $ concat [hostname, ":", reldir] #endif -initialize :: Maybe String -> Annex () -initialize mdescription = do +initialize :: Maybe String -> Maybe Version -> Annex () +initialize mdescription mversion = do {- Has to come before any commits are made as the shared - clone heuristic expects no local objects. -} sharedclone <- checkSharedClone @@ -68,7 +68,7 @@ initialize mdescription = do ensureCommit $ Annex.Branch.create prepUUID - initialize' + initialize' mversion initSharedClone sharedclone @@ -77,15 +77,18 @@ initialize mdescription = do -- Everything except for uuid setup, shared clone setup, and initial -- description. -initialize' :: Annex () -initialize' = do +initialize' :: Maybe Version -> Annex () +initialize' mversion = do checkLockSupport checkFifoSupport checkCrippledFileSystem unlessM isBare $ hookWrite preCommitHook setDifferences - setVersion supportedVersion + unlessM (isJust <$> getVersion) $ + setVersion (fromMaybe defaultVersion mversion) + whenM versionSupportsUnlockedPointers + configureSmudgeFilter ifM (crippledFileSystem <&&> not <$> isBare) ( do enableDirectMode @@ -95,7 +98,7 @@ initialize' = do , unlessM isBare switchHEADBack ) - createInodeSentinalFile + createInodeSentinalFile False uninitialize :: Annex () uninitialize = do @@ -114,7 +117,7 @@ ensureInitialized :: Annex () ensureInitialized = getVersion >>= maybe needsinit checkUpgrade where needsinit = ifM Annex.Branch.hasSibling - ( initialize Nothing + ( initialize Nothing Nothing , error "First run: git-annex init" ) diff --git a/Annex/InodeSentinal.hs b/Annex/InodeSentinal.hs new file mode 100644 index 000000000..412a7accc --- /dev/null +++ b/Annex/InodeSentinal.hs @@ -0,0 +1,96 @@ +{- git-annex inode sentinal file + - + - Copyright 2012-2015 Joey Hess <id@joeyh.name> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +{-# LANGUAGE CPP #-} + +module Annex.InodeSentinal where + +import Common.Annex +import qualified Annex +import Utility.InodeCache +import Annex.Perms + +{- If the sendinal shows the inodes have changed, only the size and mtime + - are compared. -} +compareInodeCaches :: InodeCache -> InodeCache -> Annex Bool +compareInodeCaches x y + | compareStrong x y = return True + | otherwise = ifM inodesChanged + ( return $ compareWeak x y + , return False + ) + +compareInodeCachesWith :: Annex InodeComparisonType +compareInodeCachesWith = ifM inodesChanged ( return Weakly, return Strongly ) + +{- Checks if one of the provided old InodeCache matches the current + - version of a file. -} +sameInodeCache :: FilePath -> [InodeCache] -> Annex Bool +sameInodeCache _ [] = return False +sameInodeCache file old = go =<< withTSDelta (liftIO . genInodeCache file) + where + go Nothing = return False + go (Just curr) = elemInodeCaches curr old + +elemInodeCaches :: InodeCache -> [InodeCache] -> Annex Bool +elemInodeCaches _ [] = return False +elemInodeCaches c (l:ls) = ifM (compareInodeCaches c l) + ( return True + , elemInodeCaches c ls + ) + +{- Some filesystems get new inodes each time they are mounted. + - In order to work on such a filesystem, a sentinal file is used to detect + - when the inodes have changed. + - + - If the sentinal file does not exist, we have to assume that the + - inodes have changed. + -} +inodesChanged :: Annex Bool +inodesChanged = sentinalInodesChanged <$> sentinalStatus + +withTSDelta :: (TSDelta -> Annex a) -> Annex a +withTSDelta a = a =<< getTSDelta + +getTSDelta :: Annex TSDelta +#ifdef mingw32_HOST_OS +getTSDelta = sentinalTSDelta <$> sentinalStatus +#else +getTSDelta = pure noTSDelta -- optimisation +#endif + +sentinalStatus :: Annex SentinalStatus +sentinalStatus = maybe check return =<< Annex.getState Annex.sentinalstatus + where + check = do + sc <- liftIO . checkSentinalFile =<< annexSentinalFile + Annex.changeState $ \s -> s { Annex.sentinalstatus = Just sc } + return sc + +{- The sentinal file is only created when first initializing a repository. + - If there are any annexed objects in the repository already, creating + - the file would invalidate their inode caches. -} +createInodeSentinalFile :: Bool -> Annex () +createInodeSentinalFile evenwithobjects = + unlessM (alreadyexists <||> hasobjects) $ do + s <- annexSentinalFile + createAnnexDirectory (parentDir (sentinalFile s)) + liftIO $ writeSentinalFile s + where + alreadyexists = liftIO. sentinalFileExists =<< annexSentinalFile + hasobjects + | evenwithobjects = pure False + | otherwise = liftIO . doesDirectoryExist =<< fromRepo gitAnnexObjectDir + +annexSentinalFile :: Annex SentinalFile +annexSentinalFile = do + sentinalfile <- fromRepo gitAnnexInodeSentinal + sentinalcachefile <- fromRepo gitAnnexInodeSentinalCache + return SentinalFile + { sentinalFile = sentinalfile + , sentinalCacheFile = sentinalcachefile + } diff --git a/Annex/Link.hs b/Annex/Link.hs index 98b200f0a..61c61b561 100644 --- a/Annex/Link.hs +++ b/Annex/Link.hs @@ -5,7 +5,9 @@ - On other filesystems, git instead stores the symlink target in a regular - file. - - - Copyright 2013 Joey Hess <id@joeyh.name> + - Pointer files are used instead of symlinks for unlocked files. + - + - Copyright 2013-2015 Joey Hess <id@joeyh.name> - - Licensed under the GNU GPL version 3 or higher. -} @@ -19,6 +21,9 @@ import qualified Git.UpdateIndex import qualified Annex.Queue import Git.Types import Git.FilePath +import Types.Key + +import qualified Data.ByteString.Lazy as L type LinkTarget = String @@ -105,8 +110,49 @@ hashSymlink' :: Git.HashObject.HashObjectHandle -> LinkTarget -> Annex Sha hashSymlink' h linktarget = liftIO $ Git.HashObject.hashBlob h $ toInternalGitPath linktarget -{- Stages a symlink to the annex, using a Sha of its target. -} +{- Stages a symlink to an annexed object, using a Sha of its target. -} stageSymlink :: FilePath -> Sha -> Annex () stageSymlink file sha = Annex.Queue.addUpdateIndex =<< inRepo (Git.UpdateIndex.stageSymlink file sha) + +{- Injects a pointer file content into git, returning its Sha. -} +hashPointerFile :: Key -> Annex Sha +hashPointerFile key = inRepo $ Git.HashObject.hashObject BlobObject $ + formatPointer key + +{- Stages a pointer file, using a Sha of its content -} +stagePointerFile :: FilePath -> Sha -> Annex () +stagePointerFile file sha = + Annex.Queue.addUpdateIndex =<< + inRepo (Git.UpdateIndex.stageFile sha FileBlob file) + +{- Parses a symlink target or a pointer file to a Key. + - Only looks at the first line, as pointer files can have subsequent + - lines. -} +parseLinkOrPointer :: L.ByteString -> Maybe Key +parseLinkOrPointer = parseLinkOrPointer' . decodeBS . L.take maxsz + where + {- Want to avoid buffering really big files in git into + - memory when reading files that may be pointers. + - + - 8192 bytes is plenty for a pointer to a key. + - Pad some more to allow for any pointer files that might have + - lines after the key explaining what the file is used for. -} + maxsz = 81920 + +parseLinkOrPointer' :: String -> Maybe Key +parseLinkOrPointer' s = headMaybe (lines (fromInternalGitPath s)) >>= go + where + go l + | isLinkToAnnex l = file2key $ takeFileName l + | otherwise = Nothing + +formatPointer :: Key -> String +formatPointer k = + toInternalGitPath (pathSeparator:objectDir </> key2file k) ++ "\n" + +{- Checks if a file is a pointer to a key. -} +isPointerFile :: FilePath -> Annex (Maybe Key) +isPointerFile f = liftIO $ catchDefaultIO Nothing $ + parseLinkOrPointer <$> L.readFile f diff --git a/Annex/MakeRepo.hs b/Annex/MakeRepo.hs index 73443c43d..adf49ed2c 100644 --- a/Annex/MakeRepo.hs +++ b/Annex/MakeRepo.hs @@ -75,7 +75,7 @@ initRepo False _ dir desc mgroup = inDir dir $ do initRepo' :: Maybe String -> Maybe StandardGroup -> Annex () initRepo' desc mgroup = unlessM isInitialized $ do - initialize desc + initialize desc Nothing u <- getUUID maybe noop (defaultStandardGroup u) mgroup {- Ensure branch gets committed right away so it is diff --git a/Annex/Version.hs b/Annex/Version.hs index d08f994e9..b54fb68e0 100644 --- a/Annex/Version.hs +++ b/Annex/Version.hs @@ -15,14 +15,20 @@ import qualified Annex type Version = String -supportedVersion :: Version -supportedVersion = "5" +defaultVersion :: Version +defaultVersion = "5" + +latestVersion :: Version +latestVersion = "6" + +supportedVersions :: [Version] +supportedVersions = ["5", "6"] upgradableVersions :: [Version] #ifndef mingw32_HOST_OS -upgradableVersions = ["0", "1", "2", "4"] +upgradableVersions = ["0", "1", "2", "4", "5"] #else -upgradableVersions = ["2", "3", "4"] +upgradableVersions = ["2", "3", "4", "5"] #endif autoUpgradeableVersions :: [Version] @@ -34,6 +40,18 @@ versionField = annexConfig "version" getVersion :: Annex (Maybe Version) getVersion = annexVersion <$> Annex.getGitConfig +versionSupportsDirectMode :: Annex Bool +versionSupportsDirectMode = go <$> getVersion + where + go (Just "6") = False + go _ = True + +versionSupportsUnlockedPointers :: Annex Bool +versionSupportsUnlockedPointers = go <$> getVersion + where + go (Just "6") = True + go _ = False + setVersion :: Version -> Annex () setVersion = setConfig versionField diff --git a/Annex/View.hs b/Annex/View.hs index 2b8a80e5f..8ddbb9c63 100644 --- a/Annex/View.hs +++ b/Annex/View.hs @@ -22,7 +22,7 @@ import Git.Sha import Git.HashObject import Git.Types import Git.FilePath -import qualified Backend +import Annex.WorkTree import Annex.Index import Annex.Link import Annex.CatFile @@ -342,7 +342,7 @@ applyView' mkviewedfile getfilemetadata view = do hasher <- inRepo hashObjectStart forM_ l $ \f -> do relf <- getTopFilePath <$> inRepo (toTopFilePath f) - go uh hasher relf =<< Backend.lookupFile f + go uh hasher relf =<< lookupFile f liftIO $ do hashObjectStop hasher void $ stopUpdateIndex uh @@ -413,13 +413,13 @@ withViewChanges addmeta removemeta = do handleremovals item | DiffTree.srcsha item /= nullSha = handlechange item removemeta - =<< catKey (DiffTree.srcsha item) (DiffTree.srcmode item) + =<< catKey (DiffTree.srcsha item) | otherwise = noop handleadds makeabs item | DiffTree.dstsha item /= nullSha = handlechange item addmeta =<< ifM isDirect - ( catKey (DiffTree.dstsha item) (DiffTree.dstmode item) + ( catKey (DiffTree.dstsha item) -- optimisation , isAnnexLink $ makeabs $ DiffTree.file item ) diff --git a/Annex/WorkTree.hs b/Annex/WorkTree.hs new file mode 100644 index 000000000..c824e7fc5 --- /dev/null +++ b/Annex/WorkTree.hs @@ -0,0 +1,40 @@ +{- git-annex worktree files + - + - Copyright 2013-2015 Joey Hess <id@joeyh.name> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Annex.WorkTree where + +import Common.Annex +import Annex.Link +import Annex.CatFile +import Annex.Version +import Config + +{- Looks up the key corresponding to an annexed file, + - by examining what the file links to. + - + - An unlocked file will not have a link on disk, so fall back to + - looking for a pointer to a key in git. + -} +lookupFile :: FilePath -> Annex (Maybe Key) +lookupFile file = do + mkey <- isAnnexLink file + case mkey of + Just key -> makeret key + Nothing -> ifM (versionSupportsUnlockedPointers <||> isDirect) + ( maybe (return Nothing) makeret =<< catKeyFile file + , return Nothing + ) + where + makeret = return . Just + +{- Modifies an action to only act on files that are already annexed, + - and passes the key on to it. -} +whenAnnexed :: (FilePath -> Key -> Annex (Maybe a)) -> FilePath -> Annex (Maybe a) +whenAnnexed a file = ifAnnexed file (a file) (return Nothing) + +ifAnnexed :: FilePath -> (Key -> Annex a) -> Annex a -> Annex a +ifAnnexed file yes no = maybe no yes =<< lookupFile file diff --git a/Assistant/Threads/Committer.hs b/Assistant/Threads/Committer.hs index 59ca69e88..0bdbb0378 100644 --- a/Assistant/Threads/Committer.hs +++ b/Assistant/Threads/Committer.hs @@ -21,18 +21,21 @@ import Logs.Transfer import Logs.Location import qualified Annex.Queue import qualified Git.LsFiles -import qualified Command.Add import Utility.ThreadScheduler import qualified Utility.Lsof as Lsof import qualified Utility.DirWatcher as DirWatcher import Types.KeySource import Config import Annex.Content +import Annex.Ingest import Annex.Link import Annex.CatFile +import Annex.InodeSentinal +import Annex.Version import qualified Annex import Utility.InodeCache import Annex.Content.Direct +import qualified Database.Keys import qualified Command.Sync import qualified Git.Branch @@ -52,7 +55,8 @@ commitThread = namedThread "Committer" $ do =<< annexDelayAdd <$> Annex.getGitConfig msg <- liftAnnex Command.Sync.commitMsg waitChangeTime $ \(changes, time) -> do - readychanges <- handleAdds havelsof delayadd changes + readychanges <- handleAdds havelsof delayadd $ + simplifyChanges changes if shouldCommit False time (length readychanges) readychanges then do debug @@ -227,12 +231,11 @@ commitStaged msg = do return ok {- OSX needs a short delay after a file is added before locking it down, - - when using a non-direct mode repository, as pasting a file seems to - - try to set file permissions or otherwise access the file after closing - - it. -} + - as pasting a file seems to try to set file permissions or otherwise + - access the file after closing it. -} delayaddDefault :: Annex (Maybe Seconds) #ifdef darwin_HOST_OS -delayaddDefault = ifM isDirect +delayaddDefault = ifM (isDirect || versionSupportsUnlockedPointers) ( return Nothing , return $ Just $ Seconds 1 ) @@ -249,12 +252,11 @@ delayaddDefault = return Nothing - for write by some other process, and faster checking with git-ls-files - that the files are not already checked into git. - - - When a file is added, Inotify will notice the new symlink. So this waits - - for additional Changes to arrive, so that the symlink has hopefully been - - staged before returning, and will be committed immediately. - - - - OTOH, for kqueue, eventsCoalesce, so instead the symlink is directly - - created and staged. + - When a file is added in locked mode, Inotify will notice the new symlink. + - So this waits for additional Changes to arrive, so that the symlink has + - hopefully been staged before returning, and will be committed immediately. + - (OTOH, for kqueue, eventsCoalesce, so instead the symlink is directly + - created and staged.) - - Returns a list of all changes that are ready to be committed. - Any pending adds that are not ready yet are put back into the ChangeChan, @@ -264,10 +266,13 @@ handleAdds :: Bool -> Maybe Seconds -> [Change] -> Assistant [Change] handleAdds havelsof delayadd cs = returnWhen (null incomplete) $ do let (pending, inprocess) = partition isPendingAddChange incomplete direct <- liftAnnex isDirect - (pending', cleanup) <- if direct + unlocked <- liftAnnex versionSupportsUnlockedPointers + let lockingfiles = not (unlocked || direct) + (pending', cleanup) <- if unlocked || direct then return (pending, noop) else findnew pending - (postponed, toadd) <- partitionEithers <$> safeToAdd havelsof delayadd pending' inprocess + (postponed, toadd) <- partitionEithers + <$> safeToAdd lockingfiles havelsof delayadd pending' inprocess cleanup unless (null postponed) $ @@ -275,10 +280,11 @@ handleAdds havelsof delayadd cs = returnWhen (null incomplete) $ do returnWhen (null toadd) $ do added <- addaction toadd $ - catMaybes <$> if direct - then adddirect toadd - else forM toadd add - if DirWatcher.eventsCoalesce || null added || direct + catMaybes <$> + if not lockingfiles + then addunlocked direct toadd + else forM toadd (add lockingfiles) + if DirWatcher.eventsCoalesce || null added || unlocked || direct then return $ added ++ otherchanges else do r <- handleAdds havelsof delayadd =<< getChanges @@ -304,52 +310,57 @@ handleAdds havelsof delayadd cs = returnWhen (null incomplete) $ do | c = return otherchanges | otherwise = a - add :: Change -> Assistant (Maybe Change) - add change@(InProcessAddChange { keySource = ks }) = + add :: Bool -> Change -> Assistant (Maybe Change) + add lockingfile change@(InProcessAddChange { lockedDown = ld }) = catchDefaultIO Nothing <~> doadd where + ks = keySource ld doadd = sanitycheck ks $ do (mkey, mcache) <- liftAnnex $ do showStart "add" $ keyFilename ks - Command.Add.ingest $ Just ks + ingest $ Just $ LockedDown lockingfile ks maybe (failedingest change) (done change mcache $ keyFilename ks) mkey - add _ = return Nothing + add _ _ = return Nothing - {- In direct mode, avoid overhead of re-injesting a renamed - - file, by examining the other Changes to see if a removed - - file has the same InodeCache as the new file. If so, - - we can just update bookkeeping, and stage the file in git. + {- Avoid overhead of re-injesting a renamed unlocked file, by + - examining the other Changes to see if a removed file has the + - same InodeCache as the new file. If so, we can just update + - bookkeeping, and stage the file in git. -} - adddirect :: [Change] -> Assistant [Maybe Change] - adddirect toadd = do + addunlocked :: Bool -> [Change] -> Assistant [Maybe Change] + addunlocked isdirect toadd = do ct <- liftAnnex compareInodeCachesWith - m <- liftAnnex $ removedKeysMap ct cs + m <- liftAnnex $ removedKeysMap isdirect ct cs delta <- liftAnnex getTSDelta if M.null m - then forM toadd add + then forM toadd (add False) else forM toadd $ \c -> do mcache <- liftIO $ genInodeCache (changeFile c) delta case mcache of - Nothing -> add c + Nothing -> add False c Just cache -> case M.lookup (inodeCacheToKey ct cache) m of - Nothing -> add c - Just k -> fastadd c k - - fastadd :: Change -> Key -> Assistant (Maybe Change) - fastadd change key = do - let source = keySource change - liftAnnex $ Command.Add.finishIngestDirect key source + Nothing -> add False c + Just k -> fastadd isdirect c k + + fastadd :: Bool -> Change -> Key -> Assistant (Maybe Change) + fastadd isdirect change key = do + let source = keySource $ lockedDown change + liftAnnex $ if isdirect + then finishIngestDirect key source + else finishIngestUnlocked key source done change Nothing (keyFilename source) key - removedKeysMap :: InodeComparisonType -> [Change] -> Annex (M.Map InodeCacheKey Key) - removedKeysMap ct l = do + removedKeysMap :: Bool -> InodeComparisonType -> [Change] -> Annex (M.Map InodeCacheKey Key) + removedKeysMap isdirect ct l = do mks <- forM (filter isRmChange l) $ \c -> catKeyFile $ changeFile c M.fromList . concat <$> mapM mkpairs (catMaybes mks) where mkpairs k = map (\c -> (inodeCacheToKey ct c, k)) <$> - recordedInodeCache k + if isdirect + then recordedInodeCache k + else Database.Keys.getInodeCaches k failedingest change = do refill [retryChange change] @@ -358,12 +369,16 @@ handleAdds havelsof delayadd cs = returnWhen (null incomplete) $ do done change mcache file key = liftAnnex $ do logStatus key InfoPresent - link <- ifM isDirect - ( calcRepo $ gitAnnexLink file key - , Command.Add.link file key mcache + ifM versionSupportsUnlockedPointers + ( stagePointerFile file =<< hashPointerFile key + , do + link <- ifM isDirect + ( calcRepo $ gitAnnexLink file key + , makeLink file key mcache + ) + whenM (pure DirWatcher.eventsCoalesce <||> isDirect) $ + stageSymlink file =<< hashSymlink link ) - whenM (pure DirWatcher.eventsCoalesce <||> isDirect) $ - stageSymlink file =<< hashSymlink link showEndOk return $ Just $ finishedChange change key @@ -401,16 +416,16 @@ handleAdds havelsof delayadd cs = returnWhen (null incomplete) $ do - - Check by running lsof on the repository. -} -safeToAdd :: Bool -> Maybe Seconds -> [Change] -> [Change] -> Assistant [Either Change Change] -safeToAdd _ _ [] [] = return [] -safeToAdd havelsof delayadd pending inprocess = do +safeToAdd :: Bool -> Bool -> Maybe Seconds -> [Change] -> [Change] -> Assistant [Either Change Change] +safeToAdd _ _ _ [] [] = return [] +safeToAdd lockingfiles havelsof delayadd pending inprocess = do maybe noop (liftIO . threadDelaySeconds) delayadd liftAnnex $ do - keysources <- forM pending $ Command.Add.lockDown . changeFile - let inprocess' = inprocess ++ mapMaybe mkinprocess (zip pending keysources) + lockeddown <- forM pending $ lockDown lockingfiles . changeFile + let inprocess' = inprocess ++ mapMaybe mkinprocess (zip pending lockeddown) openfiles <- if havelsof then S.fromList . map fst3 . filter openwrite <$> - findopenfiles (map keySource inprocess') + findopenfiles (map (keySource . lockedDown) inprocess') else pure S.empty let checked = map (check openfiles) inprocess' @@ -423,17 +438,18 @@ safeToAdd havelsof delayadd pending inprocess = do allRight $ rights checked else return checked where - check openfiles change@(InProcessAddChange { keySource = ks }) - | S.member (contentLocation ks) openfiles = Left change + check openfiles change@(InProcessAddChange { lockedDown = ld }) + | S.member (contentLocation (keySource ld)) openfiles = Left change check _ change = Right change - mkinprocess (c, Just ks) = Just InProcessAddChange + mkinprocess (c, Just ld) = Just InProcessAddChange { changeTime = changeTime c - , keySource = ks + , lockedDown = ld } mkinprocess (_, Nothing) = Nothing - canceladd (InProcessAddChange { keySource = ks }) = do + canceladd (InProcessAddChange { lockedDown = ld }) = do + let ks = keySource ld warning $ keyFilename ks ++ " still has writers, not adding" -- remove the hard link diff --git a/Assistant/Threads/TransferScanner.hs b/Assistant/Threads/TransferScanner.hs index f35c1f1f5..7386d5528 100644 --- a/Assistant/Threads/TransferScanner.hs +++ b/Assistant/Threads/TransferScanner.hs @@ -25,7 +25,7 @@ import Utility.ThreadScheduler import Utility.NotificationBroadcaster import Utility.Batch import qualified Git.LsFiles as LsFiles -import qualified Backend +import Annex.WorkTree import Annex.Content import Annex.Wanted import CmdLine.Action @@ -142,7 +142,7 @@ expensiveScan urlrenderer rs = batch <~> do (unwanted', ts) <- maybe (return (unwanted, [])) (findtransfers f unwanted) - =<< liftAnnex (Backend.lookupFile f) + =<< liftAnnex (lookupFile f) mapM_ (enqueue f) ts scan unwanted' fs diff --git a/Assistant/Threads/Watcher.hs b/Assistant/Threads/Watcher.hs index 8c6ff378d..bb9659b7c 100644 --- a/Assistant/Threads/Watcher.hs +++ b/Assistant/Threads/Watcher.hs @@ -1,6 +1,6 @@ {- git-annex assistant tree watcher - - - Copyright 2012-2013 Joey Hess <id@joeyh.name> + - Copyright 2012-2015 Joey Hess <id@joeyh.name> - - Licensed under the GNU GPL version 3 or higher. -} @@ -28,7 +28,7 @@ import qualified Annex.Queue import qualified Git import qualified Git.UpdateIndex import qualified Git.LsFiles as LsFiles -import qualified Backend +import Annex.WorkTree import Annex.Direct import Annex.Content.Direct import Annex.CatFile @@ -36,10 +36,15 @@ import Annex.CheckIgnore import Annex.Link import Annex.FileMatcher import Types.FileMatcher +import Annex.Content import Annex.ReplaceFile +import Annex.Version +import Annex.InodeSentinal import Git.Types import Config import Utility.ThreadScheduler +import Logs.Location +import qualified Database.Keys #ifndef mingw32_HOST_OS import qualified Utility.Lsof as Lsof #endif @@ -88,10 +93,13 @@ runWatcher = do startup <- asIO1 startupScan matcher <- liftAnnex largeFilesMatcher direct <- liftAnnex isDirect + unlocked <- liftAnnex versionSupportsUnlockedPointers symlinkssupported <- liftAnnex $ coreSymlinks <$> Annex.getGitConfig - addhook <- hook $ if direct - then onAddDirect symlinkssupported matcher - else onAdd matcher + addhook <- hook $ if unlocked + then onAddUnlocked symlinkssupported matcher + else if direct + then onAddDirect symlinkssupported matcher + else onAdd matcher delhook <- hook onDel addsymlinkhook <- hook $ onAddSymlink direct deldirhook <- hook onDelDir @@ -216,15 +224,33 @@ onAdd matcher file filestatus shouldRestage :: DaemonStatus -> Bool shouldRestage ds = scanComplete ds || forceRestage ds +onAddUnlocked :: Bool -> FileMatcher Annex -> Handler +onAddUnlocked = onAddUnlocked' False contentchanged Database.Keys.addAssociatedFile samefilestatus + where + samefilestatus key file status = do + cache <- Database.Keys.getInodeCaches key + curr <- withTSDelta $ \delta -> liftIO $ toInodeCache delta file status + case (cache, curr) of + (_, Just c) -> elemInodeCaches c cache + ([], Nothing) -> return True + _ -> return False + contentchanged oldkey file = do + Database.Keys.removeAssociatedFile oldkey file + unlessM (inAnnex oldkey) $ + logStatus oldkey InfoMissing + {- In direct mode, add events are received for both new files, and - modified existing files. -} onAddDirect :: Bool -> FileMatcher Annex -> Handler -onAddDirect symlinkssupported matcher file fs = do +onAddDirect = onAddUnlocked' True changedDirect (\k f -> void $ addAssociatedFile k f) sameFileStatus + +onAddUnlocked' :: Bool -> (Key -> FilePath -> Annex ()) -> (Key -> FilePath -> Annex ()) -> (Key -> FilePath -> FileStatus -> Annex Bool) -> Bool -> FileMatcher Annex -> Handler +onAddUnlocked' isdirect contentchanged addassociatedfile samefilestatus symlinkssupported matcher file fs = do v <- liftAnnex $ catKeyFile file case (v, fs) of (Just key, Just filestatus) -> - ifM (liftAnnex $ sameFileStatus key file filestatus) + ifM (liftAnnex $ samefilestatus key file filestatus) {- It's possible to get an add event for - an existing file that is not - really modified, but it might have @@ -237,13 +263,13 @@ onAddDirect symlinkssupported matcher file fs = do , noChange ) , guardSymlinkStandin (Just key) $ do - debug ["changed direct", file] - liftAnnex $ changedDirect key file + debug ["changed", file] + liftAnnex $ contentchanged key file add matcher file ) _ -> unlessIgnored file $ guardSymlinkStandin Nothing $ do - debug ["add direct", file] + debug ["add", file] add matcher file where {- On a filesystem without symlinks, we'll get changes for regular @@ -259,9 +285,9 @@ onAddDirect symlinkssupported matcher file fs = do Just lt -> do case fileKey $ takeFileName lt of Nothing -> noop - Just key -> void $ liftAnnex $ - addAssociatedFile key file - onAddSymlink' linktarget mk True file fs + Just key -> liftAnnex $ + addassociatedfile key file + onAddSymlink' linktarget mk isdirect file fs {- A symlink might be an arbitrary symlink, which is just added. - Or, if it is a git-annex symlink, ensure it points to the content @@ -270,7 +296,7 @@ onAddDirect symlinkssupported matcher file fs = do onAddSymlink :: Bool -> Handler onAddSymlink isdirect file filestatus = unlessIgnored file $ do linktarget <- liftIO (catchMaybeIO $ readSymbolicLink file) - kv <- liftAnnex (Backend.lookupFile file) + kv <- liftAnnex (lookupFile file) onAddSymlink' linktarget kv isdirect file filestatus onAddSymlink' :: Maybe String -> Maybe Key -> Bool -> Handler @@ -330,13 +356,15 @@ onDel file _ = do onDel' :: FilePath -> Annex () onDel' file = do - whenM isDirect $ do - mkey <- catKeyFile file - case mkey of - Nothing -> noop - Just key -> void $ removeAssociatedFile key file + ifM versionSupportsUnlockedPointers + ( withkey $ flip Database.Keys.removeAssociatedFile file + , whenM isDirect $ + withkey $ \key -> void $ removeAssociatedFile key file + ) Annex.Queue.addUpdateIndex =<< inRepo (Git.UpdateIndex.unstageFile file) + where + withkey a = maybe noop a =<< catKeyFile file {- A directory has been deleted, or moved, so tell git to remove anything - that was inside it from its cache. Since it could reappear at any time, diff --git a/Assistant/Types/Changes.hs b/Assistant/Types/Changes.hs index 1d8b51775..70c40523a 100644 --- a/Assistant/Types/Changes.hs +++ b/Assistant/Types/Changes.hs @@ -1,18 +1,22 @@ {- git-annex assistant change tracking - - - Copyright 2012-2013 Joey Hess <id@joeyh.name> + - Copyright 2012-2015 Joey Hess <id@joeyh.name> - - Licensed under the GNU GPL version 3 or higher. -} +{-# LANGUAGE BangPatterns #-} + module Assistant.Types.Changes where import Types.KeySource import Types.Key import Utility.TList +import Annex.Ingest import Control.Concurrent.STM import Data.Time.Clock +import qualified Data.Set as S {- An un-ordered pool of Changes that have been noticed and should be - staged and committed. Changes will typically be in order, but ordering @@ -38,7 +42,7 @@ data Change } | InProcessAddChange { changeTime ::UTCTime - , keySource :: KeySource + , lockedDown :: LockedDown } deriving (Show) @@ -53,7 +57,7 @@ changeInfoKey _ = Nothing changeFile :: Change -> FilePath changeFile (Change _ f _) = f changeFile (PendingAddChange _ f) = f -changeFile (InProcessAddChange _ ks) = keyFilename ks +changeFile (InProcessAddChange _ ld) = keyFilename $ keySource ld isPendingAddChange :: Change -> Bool isPendingAddChange (PendingAddChange {}) = True @@ -64,14 +68,33 @@ isInProcessAddChange (InProcessAddChange {}) = True isInProcessAddChange _ = False retryChange :: Change -> Change -retryChange (InProcessAddChange time ks) = - PendingAddChange time (keyFilename ks) +retryChange c@(InProcessAddChange time _) = + PendingAddChange time $ changeFile c retryChange c = c finishedChange :: Change -> Key -> Change -finishedChange c@(InProcessAddChange { keySource = ks }) k = Change +finishedChange c@(InProcessAddChange {}) k = Change { changeTime = changeTime c - , _changeFile = keyFilename ks + , _changeFile = changeFile c , changeInfo = AddKeyChange k } finishedChange c _ = c + +{- Combine PendingAddChanges that are for the same file. + - Multiple such often get noticed when eg, a file is opened and then + - closed in quick succession. -} +simplifyChanges :: [Change] -> [Change] +simplifyChanges [c] = [c] +simplifyChanges cl = go cl S.empty [] + where + go [] _ l = reverse l + go (c:cs) seen l + | isPendingAddChange c = + if S.member f seen + then go cs seen l + else + let !seen' = S.insert f seen + in go cs seen' (c:l) + | otherwise = go cs seen (c:l) + where + f = changeFile c diff --git a/Backend.hs b/Backend.hs index 922d0c2a7..c2f3d28d4 100644 --- a/Backend.hs +++ b/Backend.hs @@ -9,9 +9,7 @@ module Backend ( list, orderedList, genKey, - lookupFile, getBackend, - isAnnexLink, chooseBackend, lookupBackendName, maybeLookupBackendName, @@ -21,12 +19,9 @@ module Backend ( import Common.Annex import qualified Annex import Annex.CheckAttr -import Annex.CatFile -import Annex.Link import Types.Key import Types.KeySource import qualified Types.Backend as B -import Config -- When adding a new backend, import it here and add it to the list. import qualified Backend.Hash @@ -78,26 +73,6 @@ genKey' (b:bs) source = do | c == '\n' = '_' | otherwise = c -{- Looks up the key corresponding to an annexed file, - - by examining what the file links to. - - - - In direct mode, there is often no link on disk, in which case - - the symlink is looked up in git instead. However, a real link - - on disk still takes precedence over what was committed to git in direct - - mode. - -} -lookupFile :: FilePath -> Annex (Maybe Key) -lookupFile file = do - mkey <- isAnnexLink file - case mkey of - Just key -> makeret key - Nothing -> ifM isDirect - ( maybe (return Nothing) makeret =<< catKeyFile file - , return Nothing - ) - where - makeret k = return $ Just k - getBackend :: FilePath -> Key -> Annex (Maybe Backend) getBackend file k = let bname = keyBackendName k in case maybeLookupBackendName bname of diff --git a/CmdLine/GitAnnex.hs b/CmdLine/GitAnnex.hs index f585bff3e..ba7689f70 100644 --- a/CmdLine/GitAnnex.hs +++ b/CmdLine/GitAnnex.hs @@ -96,6 +96,7 @@ import qualified Command.Upgrade import qualified Command.Forget import qualified Command.Proxy import qualified Command.DiffDriver +import qualified Command.Smudge import qualified Command.Undo import qualified Command.Version #ifdef WITH_ASSISTANT @@ -201,6 +202,7 @@ cmds testoptparser testrunner = , Command.Forget.cmd , Command.Proxy.cmd , Command.DiffDriver.cmd + , Command.Smudge.cmd , Command.Undo.cmd , Command.Version.cmd #ifdef WITH_ASSISTANT diff --git a/CmdLine/Seek.hs b/CmdLine/Seek.hs index 8d253e47d..e6ee6f3fe 100644 --- a/CmdLine/Seek.hs +++ b/CmdLine/Seek.hs @@ -80,7 +80,7 @@ withFilesInRefs a = mapM_ go l <- inRepo $ LsTree.lsTree (Git.Ref r) forM_ l $ \i -> do let f = getTopFilePath $ LsTree.file i - v <- catKey (Git.Ref $ LsTree.sha i) (LsTree.mode i) + v <- catKey (Git.Ref $ LsTree.sha i) case v of Nothing -> noop Just k -> whenM (matcher $ MatchingKey k) $ @@ -115,29 +115,29 @@ withPairs a params = seekActions $ return $ map a $ pairs [] params pairs c (x:y:xs) = pairs ((x,y):c) xs pairs _ _ = error "expected pairs" -withFilesToBeCommitted :: (String -> CommandStart) -> CmdParams -> CommandSeek +withFilesToBeCommitted :: (FilePath -> CommandStart) -> CmdParams -> CommandSeek withFilesToBeCommitted a params = seekActions $ prepFiltered a $ seekHelper LsFiles.stagedNotDeleted params -withFilesUnlocked :: (FilePath -> CommandStart) -> CmdParams -> CommandSeek -withFilesUnlocked = withFilesUnlocked' LsFiles.typeChanged +withFilesOldUnlocked :: (FilePath -> CommandStart) -> CmdParams -> CommandSeek +withFilesOldUnlocked = withFilesOldUnlocked' LsFiles.typeChanged -withFilesUnlockedToBeCommitted :: (FilePath -> CommandStart) -> CmdParams -> CommandSeek -withFilesUnlockedToBeCommitted = withFilesUnlocked' LsFiles.typeChangedStaged +withFilesOldUnlockedToBeCommitted :: (FilePath -> CommandStart) -> CmdParams -> CommandSeek +withFilesOldUnlockedToBeCommitted = withFilesOldUnlocked' LsFiles.typeChangedStaged -{- Unlocked files have changed type from a symlink to a regular file. +{- Unlocked files before v6 have changed type from a symlink to a regular file. - - Furthermore, unlocked files used to be a git-annex symlink, - not some other sort of symlink. -} -withFilesUnlocked' :: ([FilePath] -> Git.Repo -> IO ([FilePath], IO Bool)) -> (FilePath -> CommandStart) -> CmdParams -> CommandSeek -withFilesUnlocked' typechanged a params = seekActions $ +withFilesOldUnlocked' :: ([FilePath] -> Git.Repo -> IO ([FilePath], IO Bool)) -> (FilePath -> CommandStart) -> CmdParams -> CommandSeek +withFilesOldUnlocked' typechanged a params = seekActions $ prepFiltered a unlockedfiles where - unlockedfiles = filterM isUnlocked =<< seekHelper typechanged params + unlockedfiles = filterM isOldUnlocked =<< seekHelper typechanged params -isUnlocked :: FilePath -> Annex Bool -isUnlocked f = liftIO (notSymlink f) <&&> +isOldUnlocked :: FilePath -> Annex Bool +isOldUnlocked f = liftIO (notSymlink f) <&&> (isJust <$> catKeyFile f <||> isJust <$> catKeyFileHEAD f) {- Finds files that may be modified. -} diff --git a/Command.hs b/Command.hs index bee63bb74..387f7b8b5 100644 --- a/Command.hs +++ b/Command.hs @@ -18,12 +18,13 @@ module Command ( stopUnless, whenAnnexed, ifAnnexed, + lookupFile, isBareRepo, module ReExported ) where import Common.Annex -import qualified Backend +import Annex.WorkTree import qualified Git import Types.Command as ReExported import Types.Option as ReExported @@ -100,13 +101,5 @@ stop = return Nothing stopUnless :: Annex Bool -> Annex (Maybe a) -> Annex (Maybe a) stopUnless c a = ifM c ( a , stop ) -{- Modifies an action to only act on files that are already annexed, - - and passes the key on to it. -} -whenAnnexed :: (FilePath -> Key -> Annex (Maybe a)) -> FilePath -> Annex (Maybe a) -whenAnnexed a file = ifAnnexed file (a file) (return Nothing) - -ifAnnexed :: FilePath -> (Key -> Annex a) -> Annex a -> Annex a -ifAnnexed file yes no = maybe no yes =<< Backend.lookupFile file - isBareRepo :: Annex Bool isBareRepo = fromRepo Git.repoIsLocalBare diff --git a/Command/Add.hs b/Command/Add.hs index 27c11eab4..8a7db0a91 100644 --- a/Command/Add.hs +++ b/Command/Add.hs @@ -5,35 +5,22 @@ - Licensed under the GNU GPL version 3 or higher. -} -{-# LANGUAGE CPP #-} - module Command.Add where import Common.Annex import Command -import Types.KeySource -import Backend +import Annex.Ingest import Logs.Location import Annex.Content import Annex.Content.Direct -import Annex.Perms import Annex.Link -import Annex.MetaData import qualified Annex import qualified Annex.Queue -#ifdef WITH_CLIBS -#ifndef __ANDROID__ -import Utility.Touch -#endif -#endif import Config import Utility.InodeCache import Annex.FileMatcher -import Annex.ReplaceFile -import Utility.Tmp -import Utility.CopyFile - -import Control.Exception (IOException) +import Annex.Version +import qualified Database.Keys cmd :: Command cmd = notBareRepo $ withGlobalOptions (jobsOption : fileMatchingOptions) $ @@ -64,9 +51,9 @@ seek o = allowConcurrentOutput $ do , startSmall file ) go $ withFilesNotInGit (not $ includeDotFiles o) - ifM isDirect + ifM (versionSupportsUnlockedPointers <||> isDirect) ( go withFilesMaybeModified - , go withFilesUnlocked + , go withFilesOldUnlocked ) {- Pass file off to git-add. -} @@ -86,9 +73,6 @@ addFile file = do Annex.Queue.addCommand "add" (ps++[Param "--"]) [file] return True -{- The add subcommand annexes a file, generating a key for it using a - - backend, and then moving it into the annex directory and setting up - - the symlink pointing to its content. -} start :: FilePath -> CommandStart start file = ifAnnexed file addpresent add where @@ -103,13 +87,22 @@ start file = ifAnnexed file addpresent add next $ if isSymbolicLink s then next $ addFile file else perform file - addpresent key = ifM isDirect + addpresent key = ifM versionSupportsUnlockedPointers ( do ms <- liftIO $ catchMaybeIO $ getSymbolicLinkStatus file case ms of Just s | isSymbolicLink s -> fixup key - _ -> ifM (goodContent key file) ( stop , add ) - , fixup key + _ -> ifM (sameInodeCache file =<< Database.Keys.getInodeCaches key) + ( stop, add ) + , ifM isDirect + ( do + ms <- liftIO $ catchMaybeIO $ getSymbolicLinkStatus file + case ms of + Just s | isSymbolicLink s -> fixup key + _ -> ifM (goodContent key file) + ( stop , add ) + , fixup key + ) ) fixup key = do -- the annexed symlink is present but not yet added to git @@ -119,188 +112,14 @@ start file = ifAnnexed file addpresent add void $ addAssociatedFile key file next $ next $ cleanup file key Nothing =<< inAnnex key -{- The file that's being added is locked down before a key is generated, - - to prevent it from being modified in between. This lock down is not - - perfect at best (and pretty weak at worst). For example, it does not - - guard against files that are already opened for write by another process. - - So a KeySource is returned. Its inodeCache can be used to detect any - - changes that might be made to the file after it was locked down. - - - - When possible, the file is hard linked to a temp directory. This guards - - against some changes, like deletion or overwrite of the file, and - - allows lsof checks to be done more efficiently when adding a lot of files. - - - - Lockdown can fail if a file gets deleted, and Nothing will be returned. - -} -lockDown :: FilePath -> Annex (Maybe KeySource) -lockDown = either - (\e -> warning (show e) >> return Nothing) - (return . Just) - <=< lockDown' - -lockDown' :: FilePath -> Annex (Either IOException KeySource) -lockDown' file = ifM crippledFileSystem - ( withTSDelta $ liftIO . tryIO . nohardlink - , tryIO $ do - tmp <- fromRepo gitAnnexTmpMiscDir - createAnnexDirectory tmp - go tmp - ) - where - {- In indirect mode, the write bit is removed from the file as part - - of lock down to guard against further writes, and because objects - - in the annex have their write bit disabled anyway. - - - - Freezing the content early also lets us fail early when - - someone else owns the file. - - - - This is not done in direct mode, because files there need to - - remain writable at all times. - -} - go tmp = do - unlessM isDirect $ - freezeContent file - withTSDelta $ \delta -> liftIO $ do - (tmpfile, h) <- openTempFile tmp $ - relatedTemplate $ takeFileName file - hClose h - nukeFile tmpfile - withhardlink delta tmpfile `catchIO` const (nohardlink delta) - nohardlink delta = do - cache <- genInodeCache file delta - return KeySource - { keyFilename = file - , contentLocation = file - , inodeCache = cache - } - withhardlink delta tmpfile = do - createLink file tmpfile - cache <- genInodeCache tmpfile delta - return KeySource - { keyFilename = file - , contentLocation = tmpfile - , inodeCache = cache - } - -{- Ingests a locked down file into the annex. - - - - In direct mode, leaves the file alone, and just updates bookkeeping - - information. - -} -ingest :: Maybe KeySource -> Annex (Maybe Key, Maybe InodeCache) -ingest Nothing = return (Nothing, Nothing) -ingest (Just source) = withTSDelta $ \delta -> do - backend <- chooseBackend $ keyFilename source - k <- genKey source backend - let src = contentLocation source - ms <- liftIO $ catchMaybeIO $ getFileStatus src - mcache <- maybe (pure Nothing) (liftIO . toInodeCache delta src) ms - case (mcache, inodeCache source) of - (_, Nothing) -> go k mcache ms - (Just newc, Just c) | compareStrong c newc -> go k mcache ms - _ -> failure "changed while it was being added" - where - go k mcache ms = ifM isDirect - ( godirect k mcache ms - , goindirect k mcache ms - ) - - goindirect (Just (key, _)) mcache ms = do - catchNonAsync (moveAnnex key $ contentLocation source) - (undo (keyFilename source) key) - maybe noop (genMetaData key (keyFilename source)) ms - liftIO $ nukeFile $ keyFilename source - return (Just key, mcache) - goindirect _ _ _ = failure "failed to generate a key" - - godirect (Just (key, _)) (Just cache) ms = do - addInodeCache key cache - maybe noop (genMetaData key (keyFilename source)) ms - finishIngestDirect key source - return (Just key, Just cache) - godirect _ _ _ = failure "failed to generate a key" - - failure msg = do - warning $ keyFilename source ++ " " ++ msg - when (contentLocation source /= keyFilename source) $ - liftIO $ nukeFile $ contentLocation source - return (Nothing, Nothing) - -finishIngestDirect :: Key -> KeySource -> Annex () -finishIngestDirect key source = do - void $ addAssociatedFile key $ keyFilename source - when (contentLocation source /= keyFilename source) $ - liftIO $ nukeFile $ contentLocation source - - {- Copy to any other locations using the same key. -} - otherfs <- filter (/= keyFilename source) <$> associatedFiles key - forM_ otherfs $ - addContentWhenNotPresent key (keyFilename source) - perform :: FilePath -> CommandPerform -perform file = lockDown file >>= ingest >>= go +perform file = do + lockingfile <- not <$> isDirect + lockDown lockingfile file >>= ingest >>= go where go (Just key, cache) = next $ cleanup file key cache True go (Nothing, _) = stop -{- On error, put the file back so it doesn't seem to have vanished. - - This can be called before or after the symlink is in place. -} -undo :: FilePath -> Key -> SomeException -> Annex a -undo file key e = do - whenM (inAnnex key) $ do - liftIO $ nukeFile file - -- The key could be used by other files too, so leave the - -- content in the annex, and make a copy back to the file. - obj <- calcRepo $ gitAnnexLocation key - unlessM (liftIO $ copyFileExternal CopyTimeStamps obj file) $ - warning $ "Unable to restore content of " ++ file ++ "; it should be located in " ++ obj - thawContent file - throwM e - -{- Creates the symlink to the annexed content, returns the link target. -} -link :: FilePath -> Key -> Maybe InodeCache -> Annex String -link file key mcache = flip catchNonAsync (undo file key) $ do - l <- calcRepo $ gitAnnexLink file key - replaceFile file $ makeAnnexLink l - - -- touch symlink to have same time as the original file, - -- as provided in the InodeCache - case mcache of -#if defined(WITH_CLIBS) && ! defined(__ANDROID__) - Just c -> liftIO $ touch file (TimeSpec $ inodeCacheToMtime c) False -#else - Just _ -> noop -#endif - Nothing -> noop - - return l - -{- Creates the symlink to the annexed content, and stages it in git. - - - - As long as the filesystem supports symlinks, we use - - git add, rather than directly staging the symlink to git. - - Using git add is best because it allows the queuing to work - - and is faster (staging the symlink runs hash-object commands each time). - - Also, using git add allows it to skip gitignored files, unless forced - - to include them. - -} -addLink :: FilePath -> Key -> Maybe InodeCache -> Annex () -addLink file key mcache = ifM (coreSymlinks <$> Annex.getGitConfig) - ( do - _ <- link file key mcache - ps <- forceParams - Annex.Queue.addCommand "add" (ps++[Param "--"]) [file] - , do - l <- link file key mcache - addAnnexLink l file - ) - -forceParams :: Annex [CommandParam] -forceParams = ifM (Annex.getState Annex.force) - ( return [Param "-f"] - , return [] - ) - cleanup :: FilePath -> Key -> Maybe InodeCache -> Bool -> CommandCleanup cleanup file key mcache hascontent = do ifM (isDirect <&&> pure hascontent) diff --git a/Command/AddUnused.hs b/Command/AddUnused.hs index 2b315eada..57fd0cf38 100644 --- a/Command/AddUnused.hs +++ b/Command/AddUnused.hs @@ -10,7 +10,7 @@ module Command.AddUnused where import Common.Annex import Logs.Location import Command -import qualified Command.Add +import Annex.Ingest import Command.Unused (withUnusedMaps, UnusedMaps(..), startUnused) import Types.Key @@ -31,7 +31,7 @@ start = startUnused "addunused" perform perform :: Key -> CommandPerform perform key = next $ do logStatus key InfoPresent - Command.Add.addLink file key Nothing + addLink file key Nothing return True where file = "unused." ++ key2file key diff --git a/Command/AddUrl.hs b/Command/AddUrl.hs index 2989c5830..746a6725c 100644 --- a/Command/AddUrl.hs +++ b/Command/AddUrl.hs @@ -14,14 +14,15 @@ import Network.URI import Common.Annex import Command import Backend -import qualified Command.Add import qualified Annex import qualified Annex.Queue import qualified Annex.Url as Url import qualified Backend.URL import qualified Remote import qualified Types.Remote as Remote +import qualified Command.Add import Annex.Content +import Annex.Ingest import Annex.UUID import Logs.Web import Types.Key @@ -373,7 +374,7 @@ cleanup u url file key mtmp = case mtmp of when (isJust mtmp) $ logStatus key InfoPresent setUrlPresent u key url - Command.Add.addLink file key Nothing + addLink file key Nothing whenM isDirect $ do void $ addAssociatedFile key file {- For moveAnnex to work in direct mode, the symlink diff --git a/Command/ConfigList.hs b/Command/ConfigList.hs index 46c909107..997016e8e 100644 --- a/Command/ConfigList.hs +++ b/Command/ConfigList.hs @@ -46,7 +46,7 @@ findOrGenUUID = do else ifM (Annex.Branch.hasSibling <||> (isJust <$> Fields.getField Fields.autoInit)) ( do liftIO checkNotReadOnly - initialize Nothing + initialize Nothing Nothing getUUID , return NoUUID ) diff --git a/Command/Direct.hs b/Command/Direct.hs index 162780dd5..9cfd258eb 100644 --- a/Command/Direct.hs +++ b/Command/Direct.hs @@ -14,6 +14,7 @@ import qualified Git.LsFiles import qualified Git.Branch import Config import Annex.Direct +import Annex.Version cmd :: Command cmd = notBareRepo $ noDaemonRunning $ @@ -24,7 +25,10 @@ seek :: CmdParams -> CommandSeek seek = withNothing start start :: CommandStart -start = ifM isDirect ( stop , next perform ) +start = ifM versionSupportsDirectMode + ( ifM isDirect ( stop , next perform ) + , error "Direct mode is not suppported by this repository version. Use git-annex unlock instead." + ) perform :: CommandPerform perform = do diff --git a/Command/Fsck.hs b/Command/Fsck.hs index 1531d2ab7..46de4ac96 100644 --- a/Command/Fsck.hs +++ b/Command/Fsck.hs @@ -34,6 +34,7 @@ import Utility.HumanTime import Utility.CopyFile import Git.FilePath import Utility.PID +import qualified Database.Keys #ifdef WITH_DATABASE import qualified Database.Fsck as FsckDb @@ -118,16 +119,18 @@ start from inc file key = do go = runFsck inc file key perform :: Key -> FilePath -> Backend -> NumCopies -> Annex Bool -perform key file backend numcopies = check - -- order matters - [ fixLink key file - , verifyLocationLog key file - , verifyDirectMapping key file - , verifyDirectMode key file - , checkKeySize key - , checkBackend backend key (Just file) - , checkKeyNumCopies key (Just file) numcopies - ] +perform key file backend numcopies = do + keystatus <- getKeyStatus key + check + -- order matters + [ fixLink key file + , verifyLocationLog key keystatus file + , verifyDirectMapping key file + , verifyDirectMode key file + , checkKeySize key keystatus + , checkBackend backend key keystatus (Just file) + , checkKeyNumCopies key (Just file) numcopies + ] {- To fsck a remote, the content is retrieved to a tmp file, - and checked locally. -} @@ -183,19 +186,19 @@ startKey inc key numcopies = performKey key backend numcopies performKey :: Key -> Backend -> NumCopies -> Annex Bool -performKey key backend numcopies = check - [ verifyLocationLog key (key2file key) - , checkKeySize key - , checkBackend backend key Nothing - , checkKeyNumCopies key Nothing numcopies - ] +performKey key backend numcopies = do + keystatus <- getKeyStatus key + check + [ verifyLocationLog key keystatus (key2file key) + , checkKeySize key keystatus + , checkBackend backend key keystatus Nothing + , checkKeyNumCopies key Nothing numcopies + ] check :: [Annex Bool] -> Annex Bool check cs = and <$> sequence cs -{- Checks that the file's link points correctly to the content. - - - - In direct mode, there is only a link when the content is not present. +{- Checks that symlinks points correctly to the annexed content. -} fixLink :: Key -> FilePath -> Annex Bool fixLink key file = do @@ -214,19 +217,23 @@ fixLink key file = do {- Checks that the location log reflects the current status of the key, - in this repository only. -} -verifyLocationLog :: Key -> String -> Annex Bool -verifyLocationLog key desc = do - present <- inAnnex key +verifyLocationLog :: Key -> KeyStatus -> String -> Annex Bool +verifyLocationLog key keystatus desc = do + obj <- calcRepo $ gitAnnexLocation key + present <- if isKeyUnlocked keystatus + then liftIO (doesFileExist obj) + else inAnnex key direct <- isDirect u <- getUUID - {- Since we're checking that a key's file is present, throw + {- Since we're checking that a key's object file is present, throw - in a permission fixup here too. -} - file <- calcRepo $ gitAnnexLocation key - when (present && not direct) $ - freezeContent file - whenM (liftIO $ doesDirectoryExist $ parentDir file) $ - freezeContentDir file + when (present && not direct) $ void $ tryIO $ + if isKeyUnlocked keystatus + then thawContent obj + else freezeContent obj + whenM (liftIO $ doesDirectoryExist $ parentDir obj) $ + freezeContentDir obj {- In direct mode, modified files will show up as not present, - but that is expected and not something to do anything about. -} @@ -288,18 +295,16 @@ verifyDirectMode key file = do {- The size of the data for a key is checked against the size encoded in - the key's metadata, if available. - - - Not checked in direct mode, because files can be changed directly. + - Not checked when a file is unlocked, or in direct mode. -} -checkKeySize :: Key -> Annex Bool -checkKeySize key = ifM isDirect - ( return True - , do - file <- calcRepo $ gitAnnexLocation key - ifM (liftIO $ doesFileExist file) - ( checkKeySizeOr badContent key file - , return True - ) - ) +checkKeySize :: Key -> KeyStatus -> Annex Bool +checkKeySize _ KeyUnlocked = return True +checkKeySize key _ = do + file <- calcRepo $ gitAnnexLocation key + ifM (liftIO $ doesFileExist file) + ( checkKeySizeOr badContent key file + , return True + ) checkKeySizeRemote :: Key -> Remote -> Maybe FilePath -> Annex Bool checkKeySizeRemote _ _ Nothing = return True @@ -326,18 +331,26 @@ checkKeySizeOr bad key file = case Types.Key.keySize key of , msg ] -{- Runs the backend specific check on a key's content. +{- Runs the backend specific check on a key's content object. + - + - When a file is unlocked, it may be a hard link to the object, + - thus when the user modifies the file, the object will be modified and + - not pass the check, and we don't want to find an error in this case. + - So, skip the check if the key is unlocked and modified. - - In direct mode this is not done if the file has clearly been modified, - because modification of direct mode files is allowed. It's still done - if the file does not appear modified, to catch disk corruption, etc. -} -checkBackend :: Backend -> Key -> Maybe FilePath -> Annex Bool -checkBackend backend key mfile = go =<< isDirect +checkBackend :: Backend -> Key -> KeyStatus -> Maybe FilePath -> Annex Bool +checkBackend backend key keystatus mfile = go =<< isDirect where go False = do content <- calcRepo $ gitAnnexLocation key - checkBackendOr badContent backend key content + ifM (pure (isKeyUnlocked keystatus) <&&> (not <$> isUnmodified key content)) + ( nocheck + , checkBackendOr badContent backend key content + ) go True = maybe nocheck checkdirect mfile checkdirect file = ifM (goodContent key file) ( checkBackendOr' (badContentDirect file) backend key file @@ -582,3 +595,20 @@ withFsckDb (StartIncremental h) a = a h withFsckDb NonIncremental _ = noop withFsckDb (ScheduleIncremental _ _ i) a = withFsckDb i a #endif + +data KeyStatus = KeyLocked | KeyUnlocked | KeyMissing + +isKeyUnlocked :: KeyStatus -> Bool +isKeyUnlocked KeyUnlocked = True +isKeyUnlocked KeyLocked = False +isKeyUnlocked KeyMissing = False + +getKeyStatus :: Key -> Annex KeyStatus +getKeyStatus key = ifM isDirect + ( return KeyUnlocked + , catchDefaultIO KeyMissing $ do + obj <- calcRepo $ gitAnnexLocation key + unlocked <- ((> 1) . linkCount <$> liftIO (getFileStatus obj)) + <&&> (not . null <$> Database.Keys.getAssociatedFiles key) + return $ if unlocked then KeyUnlocked else KeyLocked + ) diff --git a/Command/Indirect.hs b/Command/Indirect.hs index c12c91a48..06897e292 100644 --- a/Command/Indirect.hs +++ b/Command/Indirect.hs @@ -20,7 +20,7 @@ import Annex.Content import Annex.Content.Direct import Annex.CatFile import Annex.Init -import qualified Command.Add +import Annex.Ingest cmd :: Command cmd = notBareRepo $ noDaemonRunning $ @@ -76,7 +76,7 @@ perform = do return Nothing | otherwise -> maybe noop (fromdirect f) - =<< catKey sha mode + =<< catKey sha _ -> noop go _ = noop @@ -90,7 +90,7 @@ perform = do Right _ -> do l <- calcRepo $ gitAnnexLink f k liftIO $ createSymbolicLink l f - Left e -> catchNonAsync (Command.Add.undo f k e) + Left e -> catchNonAsync (restoreFile f k e) warnlocked showEndOk diff --git a/Command/Init.hs b/Command/Init.hs index d969669f8..94d8168a6 100644 --- a/Command/Init.hs +++ b/Command/Init.hs @@ -10,25 +10,44 @@ module Command.Init where import Common.Annex import Command import Annex.Init +import Annex.Version import qualified Annex.SpecialRemote cmd :: Command cmd = dontCheck repoExists $ command "init" SectionSetup "initialize git-annex" - paramDesc (withParams seek) + paramDesc (seek <$$> optParser) -seek :: CmdParams -> CommandSeek -seek = withWords start +data InitOptions = InitOptions + { initDesc :: String + , initVersion :: Maybe Version + } -start :: [String] -> CommandStart -start ws = do - showStart "init" description - next $ perform description - where - description = unwords ws +optParser :: CmdParamsDesc -> Parser InitOptions +optParser desc = InitOptions + <$> (unwords <$> cmdParams desc) + <*> optional (option (str >>= parseVersion) + ( long "version" <> metavar paramValue + <> help "Override default annex.version" + )) -perform :: String -> CommandPerform -perform description = do - initialize $ if null description then Nothing else Just description +parseVersion :: Monad m => String -> m Version +parseVersion v + | v `elem` supportedVersions = return v + | otherwise = fail $ v ++ " is not a currently supported repository version" + +seek :: InitOptions -> CommandSeek +seek = commandAction . start + +start :: InitOptions -> CommandStart +start os = do + showStart "init" (initDesc os) + next $ perform os + +perform :: InitOptions -> CommandPerform +perform os = do + initialize + (if null (initDesc os) then Nothing else Just (initDesc os)) + (initVersion os) Annex.SpecialRemote.autoEnable next $ return True diff --git a/Command/Lock.hs b/Command/Lock.hs index 7711ec3b8..1be6e9c76 100644 --- a/Command/Lock.hs +++ b/Command/Lock.hs @@ -1,6 +1,6 @@ {- git-annex command - - - Copyright 2010 Joey Hess <id@joeyh.name> + - Copyright 2010,2015 Joey Hess <id@joeyh.name> - - Licensed under the GNU GPL version 3 or higher. -} @@ -11,6 +11,16 @@ import Common.Annex import Command import qualified Annex.Queue import qualified Annex +import Annex.Version +import Annex.Content +import Annex.Link +import Annex.InodeSentinal +import Annex.Perms +import Annex.ReplaceFile +import Utility.InodeCache +import qualified Database.Keys +import Annex.Ingest +import Logs.Location cmd :: Command cmd = notDirect $ withGlobalOptions annexedMatchingOptions $ @@ -19,18 +29,90 @@ cmd = notDirect $ withGlobalOptions annexedMatchingOptions $ paramPaths (withParams seek) seek :: CmdParams -> CommandSeek -seek ps = do - withFilesUnlocked start ps - withFilesUnlockedToBeCommitted start ps +seek ps = ifM versionSupportsUnlockedPointers + ( withFilesInGit (whenAnnexed startNew) ps + , do + withFilesOldUnlocked startOld ps + withFilesOldUnlockedToBeCommitted startOld ps + ) -start :: FilePath -> CommandStart -start file = do +startNew :: FilePath -> Key -> CommandStart +startNew file key = ifM (isJust <$> isAnnexLink file) + ( stop + , do + showStart "lock" file + go =<< isPointerFile file + ) + where + go (Just key') + | key' == key = cont False + | otherwise = errorModified + go Nothing = + ifM (isUnmodified key file) + ( cont False + , ifM (Annex.getState Annex.force) + ( cont True + , errorModified + ) + ) + cont = next . performNew file key + +performNew :: FilePath -> Key -> Bool -> CommandPerform +performNew file key filemodified = do + lockdown =<< calcRepo (gitAnnexLocation key) + addLink file key + =<< withTSDelta (liftIO . genInodeCache file) + next $ cleanupNew file key + where + lockdown obj = do + ifM (catchBoolIO $ sameInodeCache obj =<< Database.Keys.getInodeCaches key) + ( breakhardlink obj + , repopulate obj + ) + whenM (liftIO $ doesFileExist obj) $ + freezeContent obj + + -- It's ok if the file is hard linked to obj, but if some other + -- associated file is, we need to break that link to lock down obj. + breakhardlink obj = whenM (catchBoolIO $ (> 1) . linkCount <$> liftIO (getFileStatus obj)) $ do + mfc <- withTSDelta (liftIO . genInodeCache file) + unlessM (sameInodeCache obj (maybeToList mfc)) $ do + modifyContent obj $ replaceFile obj $ \tmp -> do + unlessM (checkedCopyFile key obj tmp) $ + error "unable to lock file; need more free disk space" + Database.Keys.storeInodeCaches key [obj] + + -- Try to repopulate obj from an unmodified associated file. + repopulate obj + | filemodified = modifyContent obj $ do + fs <- Database.Keys.getAssociatedFiles key + mfile <- firstM (isUnmodified key) fs + liftIO $ nukeFile obj + case mfile of + Just unmodified -> + unlessM (checkedCopyFile key unmodified obj) + lostcontent + Nothing -> lostcontent + | otherwise = modifyContent obj $ + liftIO $ renameFile file obj + lostcontent = logStatus key InfoMissing + +cleanupNew :: FilePath -> Key -> CommandCleanup +cleanupNew file key = do + Database.Keys.removeAssociatedFile key file + return True + +startOld :: FilePath -> CommandStart +startOld file = do showStart "lock" file - unlessM (Annex.getState Annex.force) $ - error "Locking this file would discard any changes you have made to it. Use 'git annex add' to stage your changes. (Or, use --force to override)" - next $ perform file + unlessM (Annex.getState Annex.force) + errorModified + next $ performOld file -perform :: FilePath -> CommandPerform -perform file = do +performOld :: FilePath -> CommandPerform +performOld file = do Annex.Queue.addCommand "checkout" [Param "--"] [file] - next $ return True -- no cleanup needed + next $ return True + +errorModified :: a +errorModified = error "Locking this file would discard any changes you have made to it. Use 'git annex add' to stage your changes. (Or, use --force to override)" diff --git a/Command/Migrate.hs b/Command/Migrate.hs index d1c7902d7..b8d2eea87 100644 --- a/Command/Migrate.hs +++ b/Command/Migrate.hs @@ -72,7 +72,7 @@ perform file oldkey oldbackend newbackend = go =<< genkey go (Just (newkey, knowngoodcontent)) | knowngoodcontent = finish newkey | otherwise = stopUnless checkcontent $ finish newkey - checkcontent = Command.Fsck.checkBackend oldbackend oldkey $ Just file + checkcontent = Command.Fsck.checkBackend oldbackend oldkey Command.Fsck.KeyLocked $ Just file finish newkey = stopUnless (Command.ReKey.linkKey oldkey newkey) $ next $ Command.ReKey.cleanup file oldkey newkey genkey = case maybe Nothing (\fm -> fm oldkey newbackend (Just file)) (fastMigrate oldbackend) of diff --git a/Command/PreCommit.hs b/Command/PreCommit.hs index 2d62b51f3..cbf7f6e3d 100644 --- a/Command/PreCommit.hs +++ b/Command/PreCommit.hs @@ -16,7 +16,9 @@ import qualified Command.Add import qualified Command.Fix import Annex.Direct import Annex.Hook +import Annex.Link import Annex.View +import Annex.Version import Annex.View.ViewedFile import Annex.LockFile import Logs.View @@ -41,17 +43,22 @@ seek ps = lockPreCommitHook $ ifM isDirect withWords startDirect ps runAnnexHook preCommitAnnexHook , do - ifM (liftIO Git.haveFalseIndex) + ifM (not <$> versionSupportsUnlockedPointers <&&> liftIO Git.haveFalseIndex) ( do (fs, cleanup) <- inRepo $ Git.typeChangedStaged ps - whenM (anyM isUnlocked fs) $ + whenM (anyM isOldUnlocked fs) $ error "Cannot make a partial commit with unlocked annexed files. You should `git annex add` the files you want to commit, and then run git commit." void $ liftIO cleanup , do -- fix symlinks to files being committed - withFilesToBeCommitted (whenAnnexed Command.Fix.start) ps + flip withFilesToBeCommitted ps $ \f -> + maybe stop (Command.Fix.start f) + =<< isAnnexLink f -- inject unlocked files into the annex - withFilesUnlockedToBeCommitted startIndirect ps + -- (not needed when repo version uses + -- unlocked pointer files) + unlessM versionSupportsUnlockedPointers $ + withFilesOldUnlockedToBeCommitted startInjectUnlocked ps ) runAnnexHook preCommitAnnexHook -- committing changes to a view updates metadata @@ -64,8 +71,8 @@ seek ps = lockPreCommitHook $ ifM isDirect ) -startIndirect :: FilePath -> CommandStart -startIndirect f = next $ do +startInjectUnlocked :: FilePath -> CommandStart +startInjectUnlocked f = next $ do unlessM (callCommandAction $ Command.Add.start f) $ error $ "failed to add " ++ f ++ "; canceling commit" next $ return True diff --git a/Command/ReKey.hs b/Command/ReKey.hs index fe13d4dd4..9fb8515c0 100644 --- a/Command/ReKey.hs +++ b/Command/ReKey.hs @@ -12,7 +12,7 @@ import Command import qualified Annex import Types.Key import Annex.Content -import qualified Command.Add +import Annex.Ingest import Logs.Web import Logs.Location import Utility.CopyFile @@ -70,6 +70,6 @@ cleanup file oldkey newkey = do -- Update symlink to use the new key. liftIO $ removeFile file - Command.Add.addLink file newkey Nothing + addLink file newkey Nothing logStatus newkey InfoPresent return True diff --git a/Command/Reinit.hs b/Command/Reinit.hs index 1be692871..e2c00a3d2 100644 --- a/Command/Reinit.hs +++ b/Command/Reinit.hs @@ -38,6 +38,6 @@ perform s = do then return $ toUUID s else Remote.nameToUUID s storeUUID u - initialize' + initialize' Nothing Annex.SpecialRemote.autoEnable next $ return True diff --git a/Command/Smudge.hs b/Command/Smudge.hs new file mode 100644 index 000000000..bde440f7e --- /dev/null +++ b/Command/Smudge.hs @@ -0,0 +1,115 @@ +{- git-annex command + - + - Copyright 2015 Joey Hess <id@joeyh.name> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Command.Smudge where + +import Common.Annex +import Command +import Annex.Content +import Annex.Link +import Annex.MetaData +import Annex.FileMatcher +import Annex.InodeSentinal +import Annex.Ingest +import Utility.InodeCache +import Types.KeySource +import Backend +import Logs.Location +import qualified Database.Keys + +import qualified Data.ByteString.Lazy as B + +cmd :: Command +cmd = noCommit $ noMessages $ + command "smudge" SectionPlumbing + "git smudge filter" + paramFile (seek <$$> optParser) + +data SmudgeOptions = SmudgeOptions + { smudgeFile :: FilePath + , cleanOption :: Bool + } + +optParser :: CmdParamsDesc -> Parser SmudgeOptions +optParser desc = SmudgeOptions + <$> argument str ( metavar desc ) + <*> switch ( long "clean" <> help "clean filter" ) + +seek :: SmudgeOptions -> CommandSeek +seek o = commandAction $ + (if cleanOption o then clean else smudge) (smudgeFile o) + +-- Smudge filter is fed git file content, and if it's a pointer to an +-- available annex object, should output its content. +smudge :: FilePath -> CommandStart +smudge file = do + b <- liftIO $ B.hGetContents stdin + case parseLinkOrPointer b of + Nothing -> liftIO $ B.putStr b + Just k -> do + -- A previous unlocked checkout of the file may have + -- led to the annex object getting modified; + -- don't provide such modified content as it + -- will be confusing. inAnnex will detect such + -- modifications. + ifM (inAnnex k) + ( do + content <- calcRepo (gitAnnexLocation k) + liftIO $ B.putStr . fromMaybe b + =<< catchMaybeIO (B.readFile content) + , liftIO $ B.putStr b + ) + Database.Keys.addAssociatedFile k file + stop + +-- Clean filter is fed file content on stdin, decides if a file +-- should be stored in the annex, and outputs a pointer to its +-- injested content. +clean :: FilePath -> CommandStart +clean file = do + b <- liftIO $ B.hGetContents stdin + if isJust (parseLinkOrPointer b) + then liftIO $ B.hPut stdout b + else ifM (shouldAnnex file) + ( liftIO . emitPointer =<< ingestLocal file + , liftIO $ B.hPut stdout b + ) + stop + +shouldAnnex :: FilePath -> Annex Bool +shouldAnnex file = do + matcher <- largeFilesMatcher + checkFileMatcher matcher file + +-- TODO: Use main ingest code instead? +ingestLocal :: FilePath -> Annex Key +ingestLocal file = do + backend <- chooseBackend file + ic <- withTSDelta (liftIO . genInodeCache file) + let source = KeySource + { keyFilename = file + , contentLocation = file + , inodeCache = ic + } + k <- fst . fromMaybe (error "failed to generate a key") + <$> genKey source backend + -- Hard link (or copy) file content to annex object + -- to prevent it from being lost when git checks out + -- a branch not containing this file. + r <- linkAnnex k file ic + case r of + LinkAnnexFailed -> error "Problem adding file to the annex" + LinkAnnexOk -> logStatus k InfoPresent + LinkAnnexNoop -> noop + genMetaData k file + =<< liftIO (getFileStatus file) + cleanOldKeys file k + Database.Keys.addAssociatedFile k file + return k + +emitPointer :: Key -> IO () +emitPointer = putStr . formatPointer diff --git a/Command/Unannex.hs b/Command/Unannex.hs index fdf976d3e..9bde19106 100644 --- a/Command/Unannex.hs +++ b/Command/Unannex.hs @@ -15,12 +15,14 @@ import Config import qualified Annex import Annex.Content import Annex.Content.Direct +import Annex.Version import qualified Git.Command import qualified Git.Branch import qualified Git.Ref import qualified Git.DiffTree as DiffTree import Utility.CopyFile import Command.PreCommit (lockPreCommitHook) +import qualified Database.Keys cmd :: Command cmd = withGlobalOptions annexedMatchingOptions $ @@ -32,7 +34,7 @@ seek :: CmdParams -> CommandSeek seek = wrapUnannex . (withFilesInGit $ whenAnnexed start) wrapUnannex :: Annex a -> Annex a -wrapUnannex a = ifM isDirect +wrapUnannex a = ifM (versionSupportsUnlockedPointers <||> isDirect) ( a {- Run with the pre-commit hook disabled, to avoid confusing - behavior if an unannexed file is added back to git as @@ -85,6 +87,7 @@ performIndirect file key = do cleanupIndirect :: FilePath -> Key -> CommandCleanup cleanupIndirect file key = do + Database.Keys.removeAssociatedFile key file src <- calcRepo $ gitAnnexLocation key ifM (Annex.getState Annex.fast) ( do diff --git a/Command/Undo.hs b/Command/Undo.hs index c647dfba4..0692dce34 100644 --- a/Command/Undo.hs +++ b/Command/Undo.hs @@ -72,7 +72,7 @@ perform p = do f <- mkrel di whenM isDirect $ maybe noop (`removeDirect` f) - =<< catKey (srcsha di) (srcmode di) + =<< catKey (srcsha di) liftIO $ nukeFile f forM_ adds $ \di -> do @@ -80,6 +80,6 @@ perform p = do inRepo $ Git.run [Param "checkout", Param "--", File f] whenM isDirect $ maybe noop (`toDirect` f) - =<< catKey (dstsha di) (dstmode di) + =<< catKey (dstsha di) next $ liftIO cleanup diff --git a/Command/Unlock.hs b/Command/Unlock.hs index d1b1d0e90..b82f78096 100644 --- a/Command/Unlock.hs +++ b/Command/Unlock.hs @@ -1,6 +1,6 @@ {- git-annex command - - - Copyright 2010 Joey Hess <id@joeyh.name> + - Copyright 2010,2015 Joey Hess <id@joeyh.name> - - Licensed under the GNU GPL version 3 or higher. -} @@ -11,6 +11,11 @@ import Common.Annex import Command import Annex.Content import Annex.CatFile +import Annex.Version +import Annex.Link +import Annex.ReplaceFile +import Annex.InodeSentinal +import Utility.InodeCache import Utility.CopyFile cmd :: Command @@ -26,14 +31,46 @@ mkcmd n d = notDirect $ withGlobalOptions annexedMatchingOptions $ seek :: CmdParams -> CommandSeek seek = withFilesInGit $ whenAnnexed start -{- The unlock subcommand replaces the symlink with a copy of the file's - - content. -} +{- Before v6, the unlock subcommand replaces the symlink with a copy of + - the file's content. In v6 and above, it converts the file from a symlink + - to a pointer. -} start :: FilePath -> Key -> CommandStart -start file key = do - showStart "unlock" file +start file key = ifM (isJust <$> isAnnexLink file) + ( do + showStart "unlock" file + ifM (inAnnex key) + ( ifM versionSupportsUnlockedPointers + ( next $ performNew file key + , startOld file key + ) + , do + warning "content not present; cannot unlock" + next $ next $ return False + ) + , stop + ) + +performNew :: FilePath -> Key -> CommandPerform +performNew dest key = do + src <- calcRepo (gitAnnexLocation key) + srcic <- withTSDelta (liftIO . genInodeCache src) + replaceFile dest $ \tmp -> do + r <- linkAnnex' key src srcic tmp + case r of + LinkAnnexOk -> return () + _ -> error "linkAnnex failed" + next $ cleanupNew dest key + +cleanupNew :: FilePath -> Key -> CommandCleanup +cleanupNew dest key = do + stagePointerFile dest =<< hashPointerFile key + return True + +startOld :: FilePath -> Key -> CommandStart +startOld file key = ifM (inAnnex key) ( ifM (isJust <$> catKeyFileHEAD file) - ( next $ perform file key + ( next $ performOld file key , do warning "this has not yet been committed to git; cannot unlock it" next $ next $ return False @@ -43,8 +80,8 @@ start file key = do next $ next $ return False ) -perform :: FilePath -> Key -> CommandPerform -perform dest key = ifM (checkDiskSpace Nothing key 0 True) +performOld :: FilePath -> Key -> CommandPerform +performOld dest key = ifM (checkDiskSpace Nothing key 0 True) ( do src <- calcRepo $ gitAnnexLocation key tmpdest <- fromRepo $ gitAnnexTmpObjectLocation key diff --git a/Command/Unused.hs b/Command/Unused.hs index 4756cda5d..4353bd075 100644 --- a/Command/Unused.hs +++ b/Command/Unused.hs @@ -24,7 +24,6 @@ import qualified Git.Branch import qualified Git.RefLog import qualified Git.LsFiles as LsFiles import qualified Git.DiffTree as DiffTree -import qualified Backend import qualified Remote import qualified Annex.Branch import Annex.CatFile @@ -215,7 +214,7 @@ withKeysReferenced' mdir initial a = do Just dir -> inRepo $ LsFiles.inRepo [dir] go v [] = return v go v (f:fs) = do - x <- Backend.lookupFile f + x <- lookupFile f case x of Nothing -> go v fs Just k -> do @@ -266,7 +265,7 @@ withKeysReferencedInGitRef a ref = do forM_ ts $ tKey lookAtWorkingTree >=> maybe noop a liftIO $ void clean where - tKey True = Backend.lookupFile . getTopFilePath . DiffTree.file + tKey True = lookupFile . getTopFilePath . DiffTree.file tKey False = fileKey . takeFileName . decodeBS <$$> catFile ref . getTopFilePath . DiffTree.file diff --git a/Command/Upgrade.hs b/Command/Upgrade.hs index c02a6709f..8a34022e3 100644 --- a/Command/Upgrade.hs +++ b/Command/Upgrade.hs @@ -13,6 +13,7 @@ import Upgrade cmd :: Command cmd = dontCheck repoExists $ -- because an old version may not seem to exist + noDaemonRunning $ -- avoid upgrading repo out from under daemon command "upgrade" SectionMaintenance "upgrade repository layout" paramNothing (withParams seek) diff --git a/Command/Version.hs b/Command/Version.hs index 72bbe4064..c5a9fcef2 100644 --- a/Command/Version.hs +++ b/Command/Version.hs @@ -50,7 +50,8 @@ showVersion = do liftIO $ do showPackageVersion vinfo "local repository version" $ fromMaybe "unknown" v - vinfo "supported repository version" supportedVersion + vinfo "supported repository versions" $ + unwords supportedVersions vinfo "upgrade supported from repository versions" $ unwords upgradableVersions @@ -90,3 +90,21 @@ setCrippledFileSystem :: Bool -> Annex () setCrippledFileSystem b = do setConfig (annexConfig "crippledfilesystem") (Git.Config.boolConfig b) Annex.changeGitConfig $ \c -> c { annexCrippledFileSystem = b } + +configureSmudgeFilter :: Annex () +configureSmudgeFilter = do + setConfig (ConfigKey "filter.annex.smudge") "git-annex smudge %f" + setConfig (ConfigKey "filter.annex.clean") "git-annex smudge --clean %f" + lf <- Annex.fromRepo Git.attributesLocal + gf <- Annex.fromRepo Git.attributes + lfs <- readattr lf + gfs <- readattr gf + liftIO $ unless ("filter=annex" `isInfixOf` (lfs ++ gfs)) $ do + createDirectoryIfMissing True (takeDirectory lf) + writeFile lf (lfs ++ "\n" ++ stdattr) + where + readattr = liftIO . catchDefaultIO "" . readFileStrictAnyEncoding + stdattr = unlines + [ "* filter=annex" + , ".* !filter" + ] diff --git a/Database/Fsck.hs b/Database/Fsck.hs index ed00e62d8..e7ece34ed 100644 --- a/Database/Fsck.hs +++ b/Database/Fsck.hs @@ -21,7 +21,7 @@ module Database.Fsck ( ) where import Database.Types -import qualified Database.Handle as H +import qualified Database.Queue as H import Locations import Utility.PosixFiles import Utility.Exception @@ -31,13 +31,12 @@ import Types.Key import Types.UUID import Annex.Perms import Annex.LockFile -import Messages import Database.Persist.TH import Database.Esqueleto hiding (Key) import Data.Time.Clock -data FsckHandle = FsckHandle H.DbHandle UUID +data FsckHandle = FsckHandle H.DbQueue UUID {- Each key stored in the database has already been fscked as part - of the latest incremental fsck pass. -} @@ -59,7 +58,7 @@ newPass u = isJust <$> tryExclusiveLock (gitAnnexFsckDbLock u) go go = liftIO . void . tryIO . removeDirectoryRecursive =<< fromRepo (gitAnnexFsckDbDir u) -{- Opens the database, creating it atomically if it doesn't exist yet. -} +{- Opens the database, creating it if it doesn't exist yet. -} openDb :: UUID -> Annex FsckHandle openDb u = do dbdir <- fromRepo (gitAnnexFsckDbDir u) @@ -77,16 +76,12 @@ openDb u = do void $ tryIO $ removeDirectoryRecursive dbdir rename tmpdbdir dbdir lockFileCached =<< fromRepo (gitAnnexFsckDbLock u) - h <- liftIO $ H.openDb db "fscked" - - -- work around https://github.com/yesodweb/persistent/issues/474 - liftIO setConsoleEncoding - + h <- liftIO $ H.openDbQueue db "fscked" return $ FsckHandle h u closeDb :: FsckHandle -> Annex () closeDb (FsckHandle h u) = do - liftIO $ H.closeDb h + liftIO $ H.closeDbQueue h unlockFile =<< fromRepo (gitAnnexFsckDbLock u) addDb :: FsckHandle -> Key -> IO () @@ -102,8 +97,9 @@ addDb (FsckHandle h _) k = H.queueDb h checkcommit $ now <- getCurrentTime return $ diffUTCTime lastcommittime now > 300 +{- Doesn't know about keys that were just added with addDb. -} inDb :: FsckHandle -> Key -> IO Bool -inDb (FsckHandle h _) = H.queryDb h . inDb' . toSKey +inDb (FsckHandle h _) = H.queryDbQueue h . inDb' . toSKey inDb' :: SKey -> SqlPersistM Bool inDb' sk = do diff --git a/Database/Handle.hs b/Database/Handle.hs index 439e7c18b..748feaa97 100644 --- a/Database/Handle.hs +++ b/Database/Handle.hs @@ -11,17 +11,15 @@ module Database.Handle ( DbHandle, initDb, openDb, + TableName, queryDb, closeDb, - Size, - queueDb, - flushQueueDb, commitDb, + commitDb', ) where import Utility.Exception -import Utility.Monad -import Messages +import Utility.FileSystemEncoding import Database.Persist.Sqlite import qualified Database.Sqlite as Sqlite @@ -29,22 +27,22 @@ import Control.Monad import Control.Monad.IO.Class (liftIO) import Control.Concurrent import Control.Concurrent.Async -import Control.Exception (throwIO) +import Control.Exception (throwIO, BlockedIndefinitelyOnMVar(..)) import qualified Data.Text as T import Control.Monad.Trans.Resource (runResourceT) import Control.Monad.Logger (runNoLoggingT) import Data.List -import Data.Time.Clock +import System.IO {- A DbHandle is a reference to a worker thread that communicates with - the database. It has a MVar which Jobs are submitted to. -} -data DbHandle = DbHandle (Async ()) (MVar Job) (MVar DbQueue) +data DbHandle = DbHandle (Async ()) (MVar Job) {- Ensures that the database is initialized. Pass the migration action for - the database. - - - The database is put into WAL mode, to prevent readers from blocking - - writers, and prevent a writer from blocking readers. + - The database is initialized using WAL mode, to prevent readers + - from blocking writers, and prevent a writer from blocking readers. -} initDb :: FilePath -> SqlPersistM () -> IO () initDb f migration = do @@ -60,67 +58,27 @@ enableWAL db = do void $ Sqlite.finalize stmt Sqlite.close conn +{- Name of a table that should exist once the database is initialized. -} +type TableName = String + {- Opens the database, but does not perform any migrations. Only use - if the database is known to exist and have the right tables. -} openDb :: FilePath -> TableName -> IO DbHandle openDb db tablename = do jobs <- newEmptyMVar worker <- async (workerThread (T.pack db) tablename jobs) - q <- newMVar =<< emptyDbQueue - return $ DbHandle worker jobs q - -data Job - = QueryJob (SqlPersistM ()) - | ChangeJob ((SqlPersistM () -> IO ()) -> IO ()) - | CloseJob - -type TableName = String - -workerThread :: T.Text -> TableName -> MVar Job -> IO () -workerThread db tablename jobs = catchNonAsync (run loop) showerr - where - showerr e = liftIO $ warningIO $ - "sqlite worker thread crashed: " ++ show e - - loop = do - job <- liftIO $ takeMVar jobs - case job of - QueryJob a -> a >> loop - -- change is run in a separate database connection - -- since sqlite only supports a single writer at a - -- time, and it may crash the database connection - ChangeJob a -> liftIO (a run) >> loop - CloseJob -> return () - -- like runSqlite, but calls settle on the raw sql Connection. - run a = do - conn <- Sqlite.open db - settle conn - runResourceT $ runNoLoggingT $ - withSqlConn (wrapConnection conn) $ - runSqlConn a + -- work around https://github.com/yesodweb/persistent/issues/474 + liftIO setConsoleEncoding - -- Work around a bug in sqlite: New database connections can - -- sometimes take a while to become usable; select statements will - -- fail with ErrorBusy for some time. So, loop until a select - -- succeeds; once one succeeds the connection will stay usable. - -- <http://thread.gmane.org/gmane.comp.db.sqlite.general/93116> - settle conn = do - r <- tryNonAsync $ do - stmt <- Sqlite.prepare conn nullselect - void $ Sqlite.step stmt - void $ Sqlite.finalize stmt - case r of - Right _ -> return () - Left e -> do - if "ErrorBusy" `isInfixOf` show e - then do - threadDelay 1000 -- 1/1000th second - settle conn - else throwIO e - - -- This should succeed for any table. - nullselect = T.pack $ "SELECT null from " ++ tablename ++ " limit 1" + return $ DbHandle worker jobs + +{- This is optional; when the DbHandle gets garbage collected it will + - auto-close. -} +closeDb :: DbHandle -> IO () +closeDb (DbHandle worker jobs) = do + putMVar jobs CloseJob + wait worker {- Makes a query using the DbHandle. This should not be used to make - changes to the database! @@ -133,71 +91,21 @@ workerThread db tablename jobs = catchNonAsync (run loop) showerr - it is able to run. -} queryDb :: DbHandle -> SqlPersistM a -> IO a -queryDb (DbHandle _ jobs _) a = do +queryDb (DbHandle _ jobs) a = do res <- newEmptyMVar putMVar jobs $ QueryJob $ liftIO . putMVar res =<< tryNonAsync a (either throwIO return =<< takeMVar res) `catchNonAsync` (const $ error "sqlite query crashed") -closeDb :: DbHandle -> IO () -closeDb h@(DbHandle worker jobs _) = do - flushQueueDb h - putMVar jobs CloseJob - wait worker - -type Size = Int - -type LastCommitTime = UTCTime - -{- A queue of actions to perform, with a count of the number of actions - - queued, and a last commit time. -} -data DbQueue = DbQueue Size LastCommitTime (SqlPersistM ()) - -emptyDbQueue :: IO DbQueue -emptyDbQueue = do - now <- getCurrentTime - return $ DbQueue 0 now (return ()) - -{- Queues a change to be made to the database. It will be buffered - - to be committed later, unless the commitchecker action returns true. - - - - (Be sure to call closeDb or flushQueueDb to ensure the change - - gets committed.) +{- Writes a change to the database. - - - Transactions built up by queueDb are sent to sqlite all at once. - - If sqlite fails due to another change being made concurrently by another - - process, the transaction is put back in the queue. This solves - - the sqlite multiple writer problem. + - If a database is opened multiple times and there's a concurrent writer, + - the write could fail. Retries repeatedly for up to 10 seconds, + - which should avoid all but the most exceptional problems. -} -queueDb - :: DbHandle - -> (Size -> LastCommitTime -> IO Bool) - -> SqlPersistM () - -> IO () -queueDb h@(DbHandle _ _ qvar) commitchecker a = do - DbQueue sz lastcommittime qa <- takeMVar qvar - let !sz' = sz + 1 - let qa' = qa >> a - let enqueue = putMVar qvar - ifM (commitchecker sz' lastcommittime) - ( do - r <- commitDb h qa' - case r of - Left _ -> enqueue $ DbQueue sz' lastcommittime qa' - Right _ -> do - now <- getCurrentTime - enqueue $ DbQueue 0 now (return ()) - , enqueue $ DbQueue sz' lastcommittime qa' - ) - -{- If flushing the queue fails, this could be because there is another - - writer to the database. Retry repeatedly for up to 10 seconds. -} -flushQueueDb :: DbHandle -> IO () -flushQueueDb h@(DbHandle _ _ qvar) = do - DbQueue sz _ qa <- takeMVar qvar - when (sz > 0) $ - robustly Nothing 100 (commitDb h qa) +commitDb :: DbHandle -> SqlPersistM () -> IO () +commitDb h wa = robustly Nothing 100 (commitDb' h wa) where robustly :: Maybe SomeException -> Int -> IO (Either SomeException ()) -> IO () robustly e 0 _ = error $ "failed to commit changes to sqlite database: " ++ show e @@ -209,9 +117,69 @@ flushQueueDb h@(DbHandle _ _ qvar) = do threadDelay 100000 -- 1/10th second robustly (Just e) (n-1) a -commitDb :: DbHandle -> SqlPersistM () -> IO (Either SomeException ()) -commitDb (DbHandle _ jobs _) a = do +commitDb' :: DbHandle -> SqlPersistM () -> IO (Either SomeException ()) +commitDb' (DbHandle _ jobs) a = do res <- newEmptyMVar putMVar jobs $ ChangeJob $ \runner -> liftIO $ putMVar res =<< tryNonAsync (runner a) takeMVar res + +data Job + = QueryJob (SqlPersistM ()) + | ChangeJob ((SqlPersistM () -> IO ()) -> IO ()) + | CloseJob + +workerThread :: T.Text -> TableName -> MVar Job -> IO () +workerThread db tablename jobs = + catchNonAsync (runSqliteRobustly tablename db loop) showerr + where + showerr e = hPutStrLn stderr $ + "sqlite worker thread crashed: " ++ show e + + getjob :: IO (Either BlockedIndefinitelyOnMVar Job) + getjob = try $ takeMVar jobs + + loop = do + job <- liftIO getjob + case job of + -- Exception is thrown when the MVar is garbage + -- collected, which means the whole DbHandle + -- is not used any longer. Shutdown cleanly. + Left BlockedIndefinitelyOnMVar -> return () + Right CloseJob -> return () + Right (QueryJob a) -> a >> loop + -- change is run in a separate database connection + -- since sqlite only supports a single writer at a + -- time, and it may crash the database connection + Right (ChangeJob a) -> liftIO (a (runSqliteRobustly tablename db)) >> loop + +-- like runSqlite, but calls settle on the raw sql Connection. +runSqliteRobustly :: TableName -> T.Text -> (SqlPersistM a) -> IO a +runSqliteRobustly tablename db a = do + conn <- Sqlite.open db + settle conn + runResourceT $ runNoLoggingT $ + withSqlConn (wrapConnection conn) $ + runSqlConn a + where + -- Work around a bug in sqlite: New database connections can + -- sometimes take a while to become usable; select statements will + -- fail with ErrorBusy for some time. So, loop until a select + -- succeeds; once one succeeds the connection will stay usable. + -- <http://thread.gmane.org/gmane.comp.db.sqlite.general/93116> + settle conn = do + r <- tryNonAsync $ do + stmt <- Sqlite.prepare conn nullselect + void $ Sqlite.step stmt + void $ Sqlite.finalize stmt + case r of + Right _ -> return () + Left e -> do + if "ErrorBusy" `isInfixOf` show e + then do + threadDelay 1000 -- 1/1000th second + settle conn + else throwIO e + + -- This should succeed for any table. + nullselect = T.pack $ "SELECT null from " ++ tablename ++ " limit 1" diff --git a/Database/Keys.hs b/Database/Keys.hs new file mode 100644 index 000000000..f5a28c704 --- /dev/null +++ b/Database/Keys.hs @@ -0,0 +1,237 @@ +{- Sqlite database of information about Keys + - + - Copyright 2015 Joey Hess <id@joeyh.name> + -: + - Licensed under the GNU GPL version 3 or higher. + -} + +{-# LANGUAGE QuasiQuotes, TypeFamilies, TemplateHaskell #-} +{-# LANGUAGE OverloadedStrings, GADTs, FlexibleContexts #-} +{-# LANGUAGE MultiParamTypeClasses, GeneralizedNewtypeDeriving #-} +{-# LANGUAGE RankNTypes #-} + +module Database.Keys ( + DbHandle, + addAssociatedFile, + getAssociatedFiles, + getAssociatedKey, + removeAssociatedFile, + storeInodeCaches, + addInodeCaches, + getInodeCaches, + removeInodeCaches, + AssociatedId, + ContentId, +) where + +import Database.Types +import Database.Keys.Handle +import qualified Database.Queue as H +import Locations +import Common hiding (delete) +import Annex +import Types.Key +import Annex.Perms +import Annex.LockFile +import Utility.InodeCache +import Annex.InodeSentinal + +import Database.Persist.TH +import Database.Esqueleto hiding (Key) +import Data.Time.Clock + +share [mkPersist sqlSettings, mkMigrate "migrateKeysDb"] [persistLowerCase| +Associated + key SKey + file FilePath + KeyFileIndex key file +Content + key SKey + cache SInodeCache + KeyCacheIndex key cache +|] + +newtype ReadHandle = ReadHandle H.DbQueue + +type Reader v = ReadHandle -> Annex v + +{- Runs an action that reads from the database. + - + - If the database doesn't already exist, it's not created; mempty is + - returned instead. This way, when the keys database is not in use, + - there's minimal overhead in checking it. + - + - If the database is already open, any writes are flushed to it, to ensure + - consistency. + - + - Any queued writes will be flushed before the read. + -} +runReader :: Monoid v => Reader v -> Annex v +runReader a = do + h <- getDbHandle + withDbState h go + where + go DbEmpty = return (mempty, DbEmpty) + go st@(DbOpen qh) = do + liftIO $ H.flushDbQueue qh + v <- a (ReadHandle qh) + return (v, st) + go DbClosed = do + st' <- openDb False DbClosed + v <- case st' of + (DbOpen qh) -> a (ReadHandle qh) + _ -> return mempty + return (v, st') + +readDb :: SqlPersistM a -> ReadHandle -> Annex a +readDb a (ReadHandle h) = liftIO $ H.queryDbQueue h a + +newtype WriteHandle = WriteHandle H.DbQueue + +type Writer = WriteHandle -> Annex () + +{- Runs an action that writes to the database. Typically this is used to + - queue changes, which will be flushed at a later point. + - + - The database is created if it doesn't exist yet. -} +runWriter :: Writer -> Annex () +runWriter a = do + h <- getDbHandle + withDbState h go + where + go st@(DbOpen qh) = do + v <- a (WriteHandle qh) + return (v, st) + go st = do + st' <- openDb True st + v <- case st' of + DbOpen qh -> a (WriteHandle qh) + _ -> error "internal" + return (v, st') + +queueDb :: SqlPersistM () -> WriteHandle -> Annex () +queueDb a (WriteHandle h) = liftIO $ H.queueDb h checkcommit a + where + -- commit queue after 1000 changes or 5 minutes, whichever comes first + checkcommit sz lastcommittime + | sz > 1000 = return True + | otherwise = do + now <- getCurrentTime + return $ diffUTCTime lastcommittime now > 300 + +{- Gets the handle cached in Annex state; creates a new one if it's not yet + - available, but doesn't open the database. -} +getDbHandle :: Annex DbHandle +getDbHandle = go =<< getState keysdbhandle + where + go (Just h) = pure h + go Nothing = do + h <- liftIO newDbHandle + changeState $ \s -> s { keysdbhandle = Just h } + return h + +{- Opens the database, perhaps creating it if it doesn't exist yet. + - + - Multiple readers and writers can have the database open at the same + - time. Database.Handle deals with the concurrency issues. + - The lock is held while opening the database, so that when + - the database doesn't exist yet, one caller wins the lock and + - can create it undisturbed. + -} +openDb :: Bool -> DbState -> Annex DbState +openDb _ st@(DbOpen _) = return st +openDb False DbEmpty = return DbEmpty +openDb createdb _ = withExclusiveLock gitAnnexKeysDbLock $ do + dbdir <- fromRepo gitAnnexKeysDb + let db = dbdir </> "db" + dbexists <- liftIO $ doesFileExist db + case (dbexists, createdb) of + (True, _) -> open db + (False, True) -> do + liftIO $ do + createDirectoryIfMissing True dbdir + H.initDb db $ void $ + runMigrationSilent migrateKeysDb + setAnnexDirPerm dbdir + setAnnexFilePerm db + open db + (False, False) -> return DbEmpty + where + open db = liftIO $ DbOpen <$> H.openDbQueue db "content" + +addAssociatedFile :: Key -> FilePath -> Annex () +addAssociatedFile k f = runWriter $ addAssociatedFile' k f + +addAssociatedFile' :: Key -> FilePath -> Writer +addAssociatedFile' k f = queueDb $ do + -- If the same file was associated with a different key before, + -- remove that. + delete $ from $ \r -> do + where_ (r ^. AssociatedFile ==. val f &&. r ^. AssociatedKey ==. val sk) + void $ insertUnique $ Associated sk f + where + sk = toSKey k + +{- Note that the files returned were once associated with the key, but + - some of them may not be any longer. -} +getAssociatedFiles :: Key -> Annex [FilePath] +getAssociatedFiles = runReader . getAssociatedFiles' . toSKey + +getAssociatedFiles' :: SKey -> Reader [FilePath] +getAssociatedFiles' sk = readDb $ do + l <- select $ from $ \r -> do + where_ (r ^. AssociatedKey ==. val sk) + return (r ^. AssociatedFile) + return $ map unValue l + +{- Gets any keys that are on record as having a particular associated file. + - (Should be one or none but the database doesn't enforce that.) -} +getAssociatedKey :: FilePath -> Annex [Key] +getAssociatedKey = runReader . getAssociatedKey' + +getAssociatedKey' :: FilePath -> Reader [Key] +getAssociatedKey' f = readDb $ do + l <- select $ from $ \r -> do + where_ (r ^. AssociatedFile ==. val f) + return (r ^. AssociatedKey) + return $ map (fromSKey . unValue) l + +removeAssociatedFile :: Key -> FilePath -> Annex () +removeAssociatedFile k = runWriter . removeAssociatedFile' (toSKey k) + +removeAssociatedFile' :: SKey -> FilePath -> Writer +removeAssociatedFile' sk f = queueDb $ + delete $ from $ \r -> do + where_ (r ^. AssociatedKey ==. val sk &&. r ^. AssociatedFile ==. val f) + +{- Stats the files, and stores their InodeCaches. -} +storeInodeCaches :: Key -> [FilePath] -> Annex () +storeInodeCaches k fs = withTSDelta $ \d -> + addInodeCaches k . catMaybes =<< liftIO (mapM (`genInodeCache` d) fs) + +addInodeCaches :: Key -> [InodeCache] -> Annex () +addInodeCaches k is = runWriter $ addInodeCaches' (toSKey k) is + +addInodeCaches' :: SKey -> [InodeCache] -> Writer +addInodeCaches' sk is = queueDb $ + forM_ is $ \i -> insertUnique $ Content sk (toSInodeCache i) + +{- A key may have multiple InodeCaches; one for the annex object, and one + - for each pointer file that is a copy of it. -} +getInodeCaches :: Key -> Annex [InodeCache] +getInodeCaches = runReader . getInodeCaches' . toSKey + +getInodeCaches' :: SKey -> Reader [InodeCache] +getInodeCaches' sk = readDb $ do + l <- select $ from $ \r -> do + where_ (r ^. ContentKey ==. val sk) + return (r ^. ContentCache) + return $ map (fromSInodeCache . unValue) l + +removeInodeCaches :: Key -> Annex () +removeInodeCaches = runWriter . removeInodeCaches' . toSKey + +removeInodeCaches' :: SKey -> Writer +removeInodeCaches' sk = queueDb $ + delete $ from $ \r -> do + where_ (r ^. ContentKey ==. val sk) diff --git a/Database/Keys/Handle.hs b/Database/Keys/Handle.hs new file mode 100644 index 000000000..5a5912b0b --- /dev/null +++ b/Database/Keys/Handle.hs @@ -0,0 +1,55 @@ +{- Handle for the Keys database. + - + - Copyright 2015 Joey Hess <id@joeyh.name> + -: + - Licensed under the GNU GPL version 3 or higher. + -} + +module Database.Keys.Handle ( + DbHandle, + newDbHandle, + DbState(..), + withDbState, + flushDbQueue, +) where + +import qualified Database.Queue as H +import Utility.Exception + +import Control.Concurrent +import Control.Monad.IO.Class (liftIO, MonadIO) + +-- The MVar is always left full except when actions are run +-- that access the database. +newtype DbHandle = DbHandle (MVar DbState) + +-- The database can be closed or open, but it also may have been +-- tried to open (for read) and didn't exist yet. +data DbState = DbClosed | DbOpen H.DbQueue | DbEmpty + +newDbHandle :: IO DbHandle +newDbHandle = DbHandle <$> newMVar DbClosed + +-- Runs an action on the state of the handle, which can change its state. +-- The MVar is empty while the action runs, which blocks other users +-- of the handle from running. +withDbState + :: (MonadIO m, MonadCatch m) + => DbHandle + -> (DbState + -> m (v, DbState)) + -> m v +withDbState (DbHandle mvar) a = do + st <- liftIO $ takeMVar mvar + go st `onException` (liftIO $ putMVar mvar st) + where + go st = do + (v, st') <- a st + liftIO $ putMVar mvar st' + return v + +flushDbQueue :: DbHandle -> IO () +flushDbQueue (DbHandle mvar) = go =<< readMVar mvar + where + go (DbOpen qh) = H.flushDbQueue qh + go _ = return () diff --git a/Database/Queue.hs b/Database/Queue.hs new file mode 100644 index 000000000..99fbacb9b --- /dev/null +++ b/Database/Queue.hs @@ -0,0 +1,107 @@ +{- Persistent sqlite database queues + - + - Copyright 2015 Joey Hess <id@joeyh.name> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +{-# LANGUAGE BangPatterns #-} + +module Database.Queue ( + DbQueue, + initDb, + openDbQueue, + queryDbQueue, + closeDbQueue, + flushDbQueue, + QueueSize, + queueDb, +) where + +import Utility.Monad +import Database.Handle + +import Database.Persist.Sqlite +import Control.Concurrent +import Data.Time.Clock + +{- A DbQueue wraps a DbHandle, adding a queue of writes to perform. + - + - This is efficient when there are frequent writes, but + - reads will not immediately have access to queued writes. -} +data DbQueue = DQ DbHandle (MVar Queue) + +{- Opens the database queue, but does not perform any migrations. Only use + - if the database is known to exist and have the right tables; ie after + - running initDb. -} +openDbQueue :: FilePath -> TableName -> IO DbQueue +openDbQueue db tablename = DQ + <$> openDb db tablename + <*> (newMVar =<< emptyQueue) + +{- This or flushDbQueue must be called, eg at program exit to ensure + - queued changes get written to the database. -} +closeDbQueue :: DbQueue -> IO () +closeDbQueue h@(DQ hdl _) = do + flushDbQueue h + closeDb hdl + +{- Blocks until all queued changes have been written to the database. -} +flushDbQueue :: DbQueue -> IO () +flushDbQueue (DQ hdl qvar) = do + q@(Queue sz _ qa) <- takeMVar qvar + if sz > 0 + then do + commitDb hdl qa + putMVar qvar =<< emptyQueue + else putMVar qvar q + +{- Makes a query using the DbQueue's database connection. + - This should not be used to make changes to the database! + - + - Queries will not return changes that have been recently queued, + - so use with care. + -} +queryDbQueue :: DbQueue -> SqlPersistM a -> IO a +queryDbQueue (DQ hdl _) = queryDb hdl + +{- A queue of actions to perform, with a count of the number of actions + - queued, and a last commit time. -} +data Queue = Queue QueueSize LastCommitTime (SqlPersistM ()) + +type QueueSize = Int + +type LastCommitTime = UTCTime + +emptyQueue :: IO Queue +emptyQueue = do + now <- getCurrentTime + return $ Queue 0 now (return ()) + +{- Queues a change to be made to the database. It will be queued + - to be committed later, unless the commitchecker action returns true, + - in which case any previously queued changes are also committed. + - + - Transactions built up by queueDb are sent to sqlite all at once. + - If sqlite fails due to another change being made concurrently by another + - process, the transaction is put back in the queue. This avoids + - the sqlite multiple writer problem. + -} +queueDb + :: DbQueue + -> (QueueSize -> LastCommitTime -> IO Bool) + -> SqlPersistM () + -> IO () +queueDb (DQ hdl qvar) commitchecker a = do + Queue sz lastcommittime qa <- takeMVar qvar + let !sz' = sz + 1 + let qa' = qa >> a + let enqueue = putMVar qvar + ifM (commitchecker sz' lastcommittime) + ( do + r <- commitDb' hdl qa' + case r of + Left _ -> enqueue $ Queue sz' lastcommittime qa' + Right _ -> enqueue =<< emptyQueue + , enqueue $ Queue sz' lastcommittime qa' + ) diff --git a/Database/Types.hs b/Database/Types.hs index dee56832b..1476a693a 100644 --- a/Database/Types.hs +++ b/Database/Types.hs @@ -13,6 +13,7 @@ import Database.Persist.TH import Data.Maybe import Types.Key +import Utility.InodeCache -- A serialized Key newtype SKey = SKey String @@ -22,6 +23,18 @@ toSKey :: Key -> SKey toSKey = SKey . key2file fromSKey :: SKey -> Key -fromSKey (SKey s) = fromMaybe (error $ "bad serialied key " ++ s) (file2key s) +fromSKey (SKey s) = fromMaybe (error $ "bad serialied Key " ++ s) (file2key s) derivePersistField "SKey" + +-- A serialized InodeCache +newtype SInodeCache = I String + deriving (Show, Read) + +toSInodeCache :: InodeCache -> SInodeCache +toSInodeCache = I . showInodeCache + +fromSInodeCache :: SInodeCache -> InodeCache +fromSInodeCache (I s) = fromMaybe (error $ "bad serialied InodeCache " ++ s) (readInodeCache s) + +derivePersistField "SInodeCache" @@ -28,6 +28,7 @@ module Git ( repoPath, localGitDir, attributes, + attributesLocal, hookPath, assertLocal, adjustPath, @@ -125,8 +126,11 @@ assertLocal repo action {- Path to a repository's gitattributes file. -} attributes :: Repo -> FilePath attributes repo - | repoIsLocalBare repo = repoPath repo ++ "/info/.gitattributes" - | otherwise = repoPath repo ++ "/.gitattributes" + | repoIsLocalBare repo = attributesLocal repo + | otherwise = repoPath repo </> ".gitattributes" + +attributesLocal :: Repo -> FilePath +attributesLocal repo = localGitDir repo </> "info" </> "attributes" {- Path to a given hook script in a repository, only if the hook exists - and is executable. -} @@ -11,8 +11,8 @@ import Common.Annex import qualified Annex import qualified Utility.Matcher import qualified Remote -import qualified Backend import Annex.Content +import Annex.WorkTree import Annex.Action import Annex.UUID import Logs.Trust @@ -201,22 +201,22 @@ limitAnything _ _ = return True {- Adds a limit to skip files not believed to be present in all - repositories in the specified group. -} addInAllGroup :: String -> Annex () -addInAllGroup groupname = do - m <- groupMap - addLimit $ limitInAllGroup m groupname - -limitInAllGroup :: GroupMap -> MkLimit Annex -limitInAllGroup m groupname - | S.null want = Right $ const $ const $ return True - | otherwise = Right $ \notpresent -> checkKey $ check notpresent - where - want = fromMaybe S.empty $ M.lookup groupname $ uuidsByGroup m - check notpresent key +addInAllGroup groupname = addLimit $ limitInAllGroup groupMap groupname + +limitInAllGroup :: Annex GroupMap -> MkLimit Annex +limitInAllGroup getgroupmap groupname = Right $ \notpresent mi -> do + m <- getgroupmap + let want = fromMaybe S.empty $ M.lookup groupname $ uuidsByGroup m + if S.null want + then return True -- optimisation: Check if a wanted uuid is notpresent. - | not (S.null (S.intersection want notpresent)) = return False - | otherwise = do - present <- S.fromList <$> Remote.keyLocations key - return $ S.null $ want `S.difference` present + else if not (S.null (S.intersection want notpresent)) + then return False + else checkKey (check want) mi + where + check want key = do + present <- S.fromList <$> Remote.keyLocations key + return $ S.null $ want `S.difference` present {- Adds a limit to skip files not using a specified key-value backend. -} addInBackend :: String -> Annex () @@ -277,7 +277,7 @@ addTimeLimit s = do else return True lookupFileKey :: FileInfo -> Annex (Maybe Key) -lookupFileKey = Backend.lookupFile . currFile +lookupFileKey = lookupFile . currFile checkKey :: (Key -> Annex Bool) -> MatchInfo -> Annex Bool checkKey a (MatchingFile fi) = lookupFileKey fi >>= maybe (return False) a diff --git a/Locations.hs b/Locations.hs index ba6115155..200297321 100644 --- a/Locations.hs +++ b/Locations.hs @@ -29,6 +29,8 @@ module Locations ( gitAnnexBadDir, gitAnnexBadLocation, gitAnnexUnusedLog, + gitAnnexKeysDb, + gitAnnexKeysDbLock, gitAnnexFsckState, gitAnnexFsckDbDir, gitAnnexFsckDbLock, @@ -237,6 +239,14 @@ gitAnnexBadLocation key r = gitAnnexBadDir r </> keyFile key gitAnnexUnusedLog :: FilePath -> Git.Repo -> FilePath gitAnnexUnusedLog prefix r = gitAnnexDir r </> (prefix ++ "unused") +{- .git/annex/keys/ contains a database of information about keys. -} +gitAnnexKeysDb :: Git.Repo -> FilePath +gitAnnexKeysDb r = gitAnnexDir r </> "keys" + +{- Lock file for the keys database. -} +gitAnnexKeysDbLock :: Git.Repo -> FilePath +gitAnnexKeysDbLock r = gitAnnexKeysDb r ++ "lck" + {- .git/annex/fsck/uuid/ is used to store information about incremental - fscks. -} gitAnnexFsckDir :: UUID -> Git.Repo -> FilePath diff --git a/Logs/PreferredContent.hs b/Logs/PreferredContent.hs index c21d67010..035c098f6 100644 --- a/Logs/PreferredContent.hs +++ b/Logs/PreferredContent.hs @@ -102,7 +102,7 @@ makeMatcher groupmap configmap groupwantedmap u = go True True | null (lefts tokens) = generate $ rights tokens | otherwise = unknownMatcher u where - tokens = exprParser matchstandard matchgroupwanted groupmap configmap (Just u) expr + tokens = exprParser matchstandard matchgroupwanted (pure groupmap) configmap (Just u) expr matchstandard | expandstandard = maybe (unknownMatcher u) (go False False) (standardPreferredContent <$> getStandardGroup mygroups) @@ -133,7 +133,7 @@ checkPreferredContentExpression expr = case parsedToMatcher tokens of Left e -> Just e Right _ -> Nothing where - tokens = exprParser matchAll matchAll emptyGroupMap M.empty Nothing expr + tokens = exprParser matchAll matchAll (pure emptyGroupMap) M.empty Nothing expr {- Puts a UUID in a standard group, and sets its preferred content to use - the standard expression for that group (unless preferred content is diff --git a/Messages.hs b/Messages.hs index a49f20711..b62e6d2a7 100644 --- a/Messages.hs +++ b/Messages.hs @@ -31,7 +31,6 @@ module Messages ( showHeader, showRaw, setupConsole, - setConsoleEncoding, enableDebugOutput, disableDebugOutput, debugEnabled, @@ -183,13 +182,6 @@ setupConsole = do updateGlobalLogger rootLoggerName (setLevel NOTICE . setHandlers [s]) setConsoleEncoding -{- This avoids ghc's output layer crashing on invalid encoded characters in - - filenames when printing them out. -} -setConsoleEncoding :: IO () -setConsoleEncoding = do - fileEncoding stdout - fileEncoding stderr - {- Log formatter with precision into fractions of a second. -} preciseLogFormatter :: LogFormatter a preciseLogFormatter = tfLogFormatter "%F %X%Q" "[$time] $msg" diff --git a/Remote/Git.hs b/Remote/Git.hs index 890e40b51..6dc5345c9 100644 --- a/Remote/Git.hs +++ b/Remote/Git.hs @@ -672,7 +672,7 @@ wantHardLink :: Annex Bool wantHardLink = (annexHardLink <$> Annex.getGitConfig) <&&> (not <$> isDirect) -- Copies from src to dest, updating a meter. If the copy finishes --- successfully, calls a final check action, which must also success, or +-- successfully, calls a final check action, which must also succeed, or -- returns false. -- -- If either the remote or local repository wants to use hard links, @@ -38,6 +38,7 @@ import Common import qualified Utility.SafeCommand import qualified Annex import qualified Annex.UUID +import qualified Annex.Version import qualified Backend import qualified Git.CurrentRepo import qualified Git.Filename @@ -65,6 +66,7 @@ import qualified Types.Messages import qualified Config import qualified Config.Cost import qualified Crypto +import qualified Annex.WorkTree import qualified Annex.Init import qualified Annex.CatFile import qualified Annex.View @@ -117,18 +119,17 @@ ingredients = ] tests :: TestTree -tests = testGroup "Tests" - -- Test both direct and indirect mode. - -- Windows is only going to use direct mode, so don't test twice. - [ properties +tests = testGroup "Tests" $ properties : + map (\(d, te) -> withTestMode te (unitTests d)) testmodes + where + testmodes = + [ ("v6", TestMode { forceDirect = False, annexVersion = "6" }) + , ("v5", TestMode { forceDirect = False, annexVersion = "5" }) + -- Windows will only use direct mode, so don't test twice. #ifndef mingw32_HOST_OS - , withTestEnv True $ unitTests "(direct)" - , withTestEnv False $ unitTests "(indirect)" -#else - , withTestEnv False $ unitTests "" + , ("v5 direct", TestMode { forceDirect = True, annexVersion = "5" }) + ] #endif - ] - properties :: TestTree properties = localOption (QuickCheckTests 1000) $ testGroup "QuickCheck" @@ -242,8 +243,11 @@ unitTests note = testGroup ("Unit Tests " ++ note) -- this test case create the main repo test_init :: Assertion test_init = innewrepo $ do - git_annex "init" [reponame] @? "init failed" - handleforcedirect + ver <- annexVersion <$> getTestMode + if ver == Annex.Version.defaultVersion + then git_annex "init" [reponame] @? "init failed" + else git_annex "init" [reponame, "--version", ver] @? "init failed" + setupTestMode where reponame = "test repo" @@ -294,7 +298,6 @@ test_shared_clone = intmpsharedclonerepo $ do , "--get" , "annex.hardlink" ] - print v v == Just "true\n" @? "shared clone of repo did not get annex.hardlink set" @@ -534,10 +537,13 @@ test_lock = intmpclonerepoInDirect $ do annexed_notpresent annexedfile -- regression test: unlock of newly added, not committed file - -- should fail + -- should fail in v5 mode. In v6 mode, this is allowed. writeFile "newfile" "foo" git_annex "add" ["newfile"] @? "add new file failed" - not <$> git_annex "unlock" ["newfile"] @? "unlock failed to fail on newly added, never committed file" + ifM (annexeval Annex.Version.versionSupportsUnlockedPointers) + ( git_annex "unlock" ["newfile"] @? "unlock failed on newly added, never committed file in v6 repository" + , not <$> git_annex "unlock" ["newfile"] @? "unlock failed to fail on newly added, never committed file in v5 repository" + ) git_annex "get" [annexedfile] @? "get of file failed" annexed_present annexedfile @@ -549,12 +555,21 @@ test_lock = intmpclonerepoInDirect $ do writeFile annexedfile $ content annexedfile ++ "foo" not <$> git_annex "lock" [annexedfile] @? "lock failed to fail without --force" git_annex "lock" ["--force", annexedfile] @? "lock --force failed" + -- In v6 mode, the original content of the file is not always + -- preserved after modification, so re-get it. + git_annex "get" [annexedfile] @? "get of file failed after lock --force" annexed_present annexedfile git_annex "unlock" [annexedfile] @? "unlock failed" unannexed annexedfile changecontent annexedfile - git_annex "add" [annexedfile] @? "add of modified file failed" - runchecks [checklink, checkunwritable] annexedfile + ifM (annexeval Annex.Version.versionSupportsUnlockedPointers) + ( do + boolSystem "git" [Param "add", Param annexedfile] @? "add of modified file failed" + runchecks [checkregularfile, checkwritable] annexedfile + , do + git_annex "add" [annexedfile] @? "add of modified file failed" + runchecks [checklink, checkunwritable] annexedfile + ) c <- readFile annexedfile assertEqual "content of modified file" c (changedcontent annexedfile) r' <- git_annex "drop" [annexedfile] @@ -580,7 +595,10 @@ test_edit' precommit = intmpclonerepoInDirect $ do @? "pre-commit failed" else boolSystem "git" [Param "commit", Param "-q", Param "-m", Param "contentchanged"] @? "git commit of edited file failed" - runchecks [checklink, checkunwritable] annexedfile + ifM (annexeval Annex.Version.versionSupportsUnlockedPointers) + ( runchecks [checkregularfile, checkwritable] annexedfile + , runchecks [checklink, checkunwritable] annexedfile + ) c <- readFile annexedfile assertEqual "content of modified file" c (changedcontent annexedfile) not <$> git_annex "drop" [annexedfile] @? "drop wrongly succeeded with no known copy of modified file" @@ -590,8 +608,12 @@ test_partial_commit = intmpclonerepoInDirect $ do git_annex "get" [annexedfile] @? "get of file failed" annexed_present annexedfile git_annex "unlock" [annexedfile] @? "unlock failed" - not <$> boolSystem "git" [Param "commit", Param "-q", Param "-m", Param "test", File annexedfile] - @? "partial commit of unlocked file not blocked by pre-commit hook" + ifM (annexeval Annex.Version.versionSupportsUnlockedPointers) + ( boolSystem "git" [Param "commit", Param "-q", Param "-m", Param "test", File annexedfile] + @? "partial commit of unlocked file should be allowed in v6 repository" + , not <$> boolSystem "git" [Param "commit", Param "-q", Param "-m", Param "test", File annexedfile] + @? "partial commit of unlocked file not blocked by pre-commit hook" + ) test_fix :: Assertion test_fix = intmpclonerepoInDirect $ do @@ -617,9 +639,13 @@ test_direct :: Assertion test_direct = intmpclonerepoInDirect $ do git_annex "get" [annexedfile] @? "get of file failed" annexed_present annexedfile - git_annex "direct" [] @? "switch to direct mode failed" - annexed_present annexedfile - git_annex "indirect" [] @? "switch to indirect mode failed" + ifM (annexeval Annex.Version.versionSupportsUnlockedPointers) + ( not <$> git_annex "direct" [] @? "switch to direct mode failed to fail in v6 repository" + , do + git_annex "direct" [] @? "switch to direct mode failed" + annexed_present annexedfile + git_annex "indirect" [] @? "switch to indirect mode failed" + ) test_trust :: Assertion test_trust = intmpclonerepo $ do @@ -810,7 +836,7 @@ test_unused = intmpclonerepoInDirect $ do assertEqual ("unused keys differ " ++ desc) (sort expectedkeys) (sort unusedkeys) findkey f = do - r <- Backend.lookupFile f + r <- Annex.WorkTree.lookupFile f return $ fromJust r test_describe :: Assertion @@ -1056,8 +1082,9 @@ test_nonannexed_file_conflict_resolution :: Assertion test_nonannexed_file_conflict_resolution = do check True False check False False - check True True - check False True + whenM (annexeval Annex.Version.versionSupportsDirectMode) $ do + check True True + check False True where check inr1 switchdirect = withtmpclonerepo $ \r1 -> withtmpclonerepo $ \r2 -> @@ -1106,8 +1133,9 @@ test_nonannexed_symlink_conflict_resolution :: Assertion test_nonannexed_symlink_conflict_resolution = do check True False check False False - check True True - check False True + whenM (annexeval Annex.Version.versionSupportsDirectMode) $ do + check True True + check False True where check inr1 switchdirect = withtmpclonerepo $ \r1 -> withtmpclonerepo $ \r2 -> @@ -1380,7 +1408,7 @@ test_crypto = do (c,k) <- annexeval $ do uuid <- Remote.nameToUUID "foo" rs <- Logs.Remote.readRemoteLog - Just k <- Backend.lookupFile annexedfile + Just k <- Annex.WorkTree.lookupFile annexedfile return (fromJust $ M.lookup uuid rs, k) let key = if scheme `elem` ["hybrid","pubkey"] then Just $ Utility.Gpg.KeyIds [Utility.Gpg.testKeyId] @@ -1505,7 +1533,7 @@ intmpclonerepoInDirect a = intmpclonerepo $ ) where isdirect = annexeval $ do - Annex.Init.initialize Nothing + Annex.Init.initialize Nothing Nothing Config.isDirect checkRepo :: Types.Annex a -> FilePath -> IO a @@ -1584,11 +1612,14 @@ clonerepo old new cfg = do ] boolSystem "git" cloneparams @? "git clone failed" configrepo new - indir new $ - git_annex "init" ["-q", new] @? "git annex init failed" + indir new $ do + ver <- annexVersion <$> getTestMode + if ver == Annex.Version.defaultVersion + then git_annex "init" ["-q", new] @? "git annex init failed" + else git_annex "init" ["-q", new, "--version", ver] @? "git annex init failed" unless (bareClone cfg) $ indir new $ - handleforcedirect + setupTestMode return new configrepo :: FilePath -> IO () @@ -1599,10 +1630,6 @@ configrepo dir = indir dir $ do -- avoid signed commits by test suite boolSystem "git" [Param "config", Param "commit.gpgsign", Param "false"] @? "git config failed" -handleforcedirect :: IO () -handleforcedirect = whenM ((==) "1" <$> Utility.Env.getEnvDefault "FORCEDIRECT" "") $ - git_annex "direct" ["-q"] @? "git annex direct failed" - ensuretmpdir :: IO () ensuretmpdir = do e <- doesDirectoryExist tmpdir @@ -1666,10 +1693,10 @@ checkunwritable f = unlessM (annexeval Config.isDirect) $ do checkwritable :: FilePath -> Assertion checkwritable f = do - r <- tryIO $ writeFile f $ content f - case r of - Left _ -> assertFailure $ "unable to modify " ++ f - Right _ -> return () + s <- getFileStatus f + let mode = fileMode s + unless (mode == mode `unionFileModes` ownerWriteMode) $ + assertFailure $ "unable to modify " ++ f checkdangling :: FilePath -> Assertion checkdangling f = ifM (annexeval Config.crippledFileSystem) @@ -1684,7 +1711,7 @@ checkdangling f = ifM (annexeval Config.crippledFileSystem) checklocationlog :: FilePath -> Bool -> Assertion checklocationlog f expected = do thisuuid <- annexeval Annex.UUID.getUUID - r <- annexeval $ Backend.lookupFile f + r <- annexeval $ Annex.WorkTree.lookupFile f case r of Just k -> do uuids <- annexeval $ Remote.keyLocations k @@ -1695,7 +1722,7 @@ checklocationlog f expected = do checkbackend :: FilePath -> Types.Backend -> Assertion checkbackend file expected = do b <- annexeval $ maybe (return Nothing) (Backend.getBackend file) - =<< Backend.lookupFile file + =<< Annex.WorkTree.lookupFile file assertEqual ("backend for " ++ file) (Just expected) b inlocationlog :: FilePath -> Assertion @@ -1721,11 +1748,16 @@ annexed_present = runchecks unannexed :: FilePath -> Assertion unannexed = runchecks [checkregularfile, checkcontent, checkwritable] -withTestEnv :: Bool -> TestTree -> TestTree -withTestEnv forcedirect = withResource prepare release . const +data TestMode = TestMode + { forceDirect :: Bool + , annexVersion :: String + } deriving (Read, Show) + +withTestMode :: TestMode -> TestTree -> TestTree +withTestMode testmode = withResource prepare release . const where prepare = do - setTestEnv forcedirect + setTestMode testmode case tryIngredients [consoleTestReporter] mempty initTests of Nothing -> error "No tests found!?" Just act -> unlessM act $ @@ -1733,8 +1765,8 @@ withTestEnv forcedirect = withResource prepare release . const return () release _ = cleanup' True tmpdir -setTestEnv :: Bool -> IO () -setTestEnv forcedirect = do +setTestMode :: TestMode -> IO () +setTestMode testmode = do whenM (doesDirectoryExist tmpdir) $ error $ "The temporary directory " ++ tmpdir ++ " already exists; cannot run test suite." @@ -1754,9 +1786,24 @@ setTestEnv forcedirect = do , ("GIT_COMMITTER_NAME", "git-annex test") -- force gpg into batch mode for the tests , ("GPG_BATCH", "1") - , ("FORCEDIRECT", if forcedirect then "1" else "") + , ("TESTMODE", show testmode) ] +getTestMode :: IO TestMode +getTestMode = Prelude.read <$> Utility.Env.getEnvDefault "TESTMODE" "" + +setupTestMode :: IO () +setupTestMode = do + testmode <- getTestMode + when (forceDirect testmode) $ + git_annex "direct" ["-q"] @? "git annex direct failed" + whenM (annexeval Annex.Version.versionSupportsUnlockedPointers) $ + boolSystem "git" + [ Param "config" + , Param "annex.largefiles" + , Param ("exclude=" ++ ingitfile) + ] @? "git config annex.largefiles failed" + changeToTmpDir :: FilePath -> IO () changeToTmpDir t = do topdir <- Utility.Env.getEnvDefault "TOPDIR" (error "TOPDIR not set") @@ -1791,7 +1838,7 @@ sha1annexedfiledup :: String sha1annexedfiledup = "sha1foodup" ingitfile :: String -ingitfile = "bar" +ingitfile = "bar.c" content :: FilePath -> String content f diff --git a/Types/KeySource.hs b/Types/KeySource.hs index 7c2fd13d5..25774588a 100644 --- a/Types/KeySource.hs +++ b/Types/KeySource.hs @@ -9,7 +9,7 @@ module Types.KeySource where import Utility.InodeCache -{- When content is in the process of being added to the annex, +{- When content is in the process of being ingested into the annex, - and a Key generated from it, this data type is used. - - The contentLocation may be different from the filename @@ -19,7 +19,7 @@ import Utility.InodeCache - of a different Key. - - The inodeCache can be used to detect some types of modifications to - - files that may be made while they're in the process of being added. + - files that may be made while they're in the process of being ingested. -} data KeySource = KeySource { keyFilename :: FilePath diff --git a/Upgrade.hs b/Upgrade.hs index 8d205a874..f9dfb7258 100644 --- a/Upgrade.hs +++ b/Upgrade.hs @@ -18,13 +18,14 @@ import qualified Upgrade.V1 import qualified Upgrade.V2 import qualified Upgrade.V3 import qualified Upgrade.V4 +import qualified Upgrade.V5 checkUpgrade :: Version -> Annex () checkUpgrade = maybe noop error <=< needsUpgrade needsUpgrade :: Version -> Annex (Maybe String) needsUpgrade v - | v == supportedVersion = ok + | v `elem` supportedVersions = ok | v `elem` autoUpgradeableVersions = ifM (upgrade True) ( ok , err "Automatic upgrade failed!" @@ -40,7 +41,7 @@ upgrade :: Bool -> Annex Bool upgrade automatic = do upgraded <- go =<< getVersion when upgraded $ - setVersion supportedVersion + setVersion latestVersion return upgraded where #ifndef mingw32_HOST_OS @@ -53,4 +54,5 @@ upgrade automatic = do go (Just "2") = Upgrade.V2.upgrade go (Just "3") = Upgrade.V3.upgrade automatic go (Just "4") = Upgrade.V4.upgrade automatic + go (Just "5") = Upgrade.V5.upgrade automatic go _ = return True diff --git a/Upgrade/V1.hs b/Upgrade/V1.hs index 801cdafa0..507af9e3b 100644 --- a/Upgrade/V1.hs +++ b/Upgrade/V1.hs @@ -54,14 +54,14 @@ upgrade = do ifM (fromRepo Git.repoIsLocalBare) ( do moveContent - setVersion supportedVersion + setVersion latestVersion , do moveContent updateSymlinks moveLocationLogs Annex.Queue.flush - setVersion supportedVersion + setVersion latestVersion ) Upgrade.V2.upgrade diff --git a/Upgrade/V5.hs b/Upgrade/V5.hs new file mode 100644 index 000000000..f6d18df43 --- /dev/null +++ b/Upgrade/V5.hs @@ -0,0 +1,104 @@ +{- git-annex v5 -> v6 upgrade support + - + - Copyright 2015 Joey Hess <id@joeyh.name> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Upgrade.V5 where + +import Common.Annex +import Config +import Annex.InodeSentinal +import Annex.Link +import Annex.Direct +import Annex.Content +import Annex.WorkTree +import qualified Database.Keys +import qualified Annex.Content.Direct as Direct +import qualified Git +import qualified Git.LsFiles +import qualified Git.Branch +import Git.FileMode +import Utility.InodeCache + +upgrade :: Bool -> Annex Bool +upgrade automatic = do + unless automatic $ + showAction "v5 to v6" + whenM isDirect $ do + {- Since upgrade from direct mode changes how files + - are represented in git, commit any changes in the + - work tree first. -} + whenM stageDirect $ do + unless automatic $ + showAction "committing first" + upgradeDirectCommit automatic + "commit before upgrade to annex.version 6" + setDirect False + upgradeDirectWorkTree + removeDirectCruft + showLongNote "Upgraded repository out of direct mode." + showLongNote "Changes have been staged for all annexed files in this repository; you should run `git commit` to commit these changes." + showLongNote "Any other clones of this repository that use direct mode need to be upgraded now, too." + configureSmudgeFilter + -- Inode sentinal file was only used in direct mode and when + -- locking down files as they were added. In v6, it's used more + -- extensively, so make sure it exists, since old repos that didn't + -- use direct mode may not have created it. + unlessM (isDirect) $ + createInodeSentinalFile True + return True + +upgradeDirectCommit :: Bool -> String -> Annex () +upgradeDirectCommit automatic msg = + void $ inRepo $ Git.Branch.commitCommand commitmode + [ Param "-m" + , Param msg + ] + where + commitmode = if automatic then Git.Branch.AutomaticCommit else Git.Branch.ManualCommit + +{- Walk work tree from top and convert all annex symlinks to pointer files, + - staging them in the index, and updating the work tree files with + - either the content of the object, or the pointer file content. -} +upgradeDirectWorkTree :: Annex () +upgradeDirectWorkTree = do + top <- fromRepo Git.repoPath + (l, clean) <- inRepo $ Git.LsFiles.stagedDetails [top] + forM_ l go + void $ liftIO clean + where + go (f, Just _sha, Just mode) | isSymLink mode = do + mk <- lookupFile f + case mk of + Nothing -> noop + Just k -> do + ifM (isJust <$> getAnnexLinkTarget f) + ( writepointer f k + , fromdirect f k + ) + stagePointerFile f =<< hashPointerFile k + Database.Keys.addAssociatedFile k f + return () + go _ = noop + + fromdirect f k = do + -- If linkAnnex fails for some reason, the work tree file + -- still has the content; the annex object file is just + -- not populated with it. Since the work tree file + -- is recorded as an associated file, things will still + -- work that way, it's just not ideal. + ic <- withTSDelta (liftIO . genInodeCache f) + void $ linkAnnex k f ic + writepointer f k = liftIO $ do + nukeFile f + writeFile f (formatPointer k) + +{- Remove all direct mode bookkeeping files. -} +removeDirectCruft :: Annex () +removeDirectCruft = mapM_ go =<< getKeysPresent InAnywhere + where + go k = do + Direct.removeInodeCache k + Direct.removeAssociatedFiles k diff --git a/Utility/FileSystemEncoding.hs b/Utility/FileSystemEncoding.hs index 67341d371..eab98337a 100644 --- a/Utility/FileSystemEncoding.hs +++ b/Utility/FileSystemEncoding.hs @@ -19,6 +19,7 @@ module Utility.FileSystemEncoding ( encodeW8NUL, decodeW8NUL, truncateFilePath, + setConsoleEncoding, ) where import qualified GHC.Foreign as GHC @@ -164,3 +165,10 @@ truncateFilePath n = reverse . go [] n . L8.fromString else go (c:coll) (cnt - x') (L8.drop 1 bs) _ -> coll #endif + +{- This avoids ghc's output layer crashing on invalid encoded characters in + - filenames when printing them out. -} +setConsoleEncoding :: IO () +setConsoleEncoding = do + fileEncoding stdout + fileEncoding stderr diff --git a/Utility/InodeCache.hs b/Utility/InodeCache.hs index b5fe9034e..8bd7ae0cd 100644 --- a/Utility/InodeCache.hs +++ b/Utility/InodeCache.hs @@ -1,7 +1,7 @@ {- Caching a file's inode, size, and modification time - to see when it's changed. - - - Copyright 2013, 2014 Joey Hess <id@joeyh.name> + - Copyright 2013-2014 Joey Hess <id@joeyh.name> - - License: BSD-2-clause -} diff --git a/debian/changelog b/debian/changelog index 73035a683..1ce79936e 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,24 @@ +git-annex (6.20151225) unstable; urgency=medium + + * Added v6 repository mode, but v5 is still the default for now. + * The upgrade to version 6 is not done fully automatically, because + upgrading a direct mode repository to version 6 will prevent old + versions of git-annex from working in other clones of that repository. + * init: --version parameter added to control which supported repository + version to use. + * smudge: New command, used for git smudge filter. + This will replace direct mode. + * init, upgrade: Configure .git/info/attributes to use git-annex as a smudge + filter. Note that this changes the default behavior of git add in a + newly initialized repository; it will add files to the annex. + * unlock, lock: In v6 mode, unlocking a file changes it from a symlink to a + pointer file, and this change can be committed to the git repository. + * add: In v6 mode, adds modified files to the annex. + * assistant: In v6 mode, adds files in unlocked mode, so they can + continue to be modified. + + -- Joey Hess <id@joeyh.name> Tue, 08 Dec 2015 11:14:03 -0400 + git-annex (5.20151219) UNRELEASED; urgency=medium * status: On crippled filesystems, was displaying M for all annexed files diff --git a/doc/direct_mode.mdwn b/doc/direct_mode.mdwn index 4c2cb2dd7..d3e1067f9 100644 --- a/doc/direct_mode.mdwn +++ b/doc/direct_mode.mdwn @@ -9,6 +9,13 @@ understand how to update its working tree. [[!toc]] +## deprecated + +Direct mode is deprecated! Intead, git-annex v6 repositories can simply +have files that are unlocked and thus can be directly accessed and +modified. See [[upgrades]] for details about the transition to v6 +repositories. + ## enabling (and disabling) direct mode Normally, git-annex repositories start off in indirect mode. With some diff --git a/doc/git-annex-add.mdwn b/doc/git-annex-add.mdwn index cfeb8a98e..7f796fec1 100644 --- a/doc/git-annex-add.mdwn +++ b/doc/git-annex-add.mdwn @@ -11,12 +11,18 @@ git annex add `[path ...]` Adds files in the path to the annex. If no path is specified, adds files from the current directory and below. -Normally, files that are already checked into git, or that git has been -configured to ignore will be silently skipped. - -If annex.largefiles is configured, and does not match a file that is being -added, `git annex add` will behave the same as `git add` and add the -non-large file directly to the git repository, instead of to the annex. +Files that are already checked into git and are unmodified, or that +git has been configured to ignore will be silently skipped. + +If annex.largefiles is configured, and does not match a file, `git annex +add` will behave the same as `git add` and add the non-large file directly +to the git repository, instead of to the annex. + +Large files are added to the annex in locked form, which prevents further +modification of their content unless unlocked by [[git-annex-unlock]](1). +(This is not the case however when a repository is in direct mode.) +To add a file to the annex in unlocked form, `git add` can be used instead +(that only works when the repository has annex.version 6 or higher). This command can also be used to add symbolic links, both symlinks to annexed content, and other symlinks. diff --git a/doc/git-annex-direct.mdwn b/doc/git-annex-direct.mdwn index 457ae3116..3cade1a8c 100644 --- a/doc/git-annex-direct.mdwn +++ b/doc/git-annex-direct.mdwn @@ -17,12 +17,18 @@ Note that git commands that operate on the work tree will refuse to run in direct mode repositories. Use `git annex proxy` to safely run such commands. +Note that the direct mode/indirect mode distinction is removed in v6 +git-annex repositories. In such a repository, you can +use [[git-annex-unlock]](1) to make a file's content be directly present. + # SEE ALSO [[git-annex]](1) [[git-annex-indirect]](1) +[[git-annex-unlock]](1) + # AUTHOR Joey Hess <id@joeyh.name> diff --git a/doc/git-annex-indirect.mdwn b/doc/git-annex-indirect.mdwn index 99def6144..321e0fb36 100644 --- a/doc/git-annex-indirect.mdwn +++ b/doc/git-annex-indirect.mdwn @@ -11,9 +11,8 @@ git annex indirect Switches a repository back from direct mode to the default, indirect mode. -Some systems cannot support git-annex in indirect mode, because they -do not support symbolic links. Repositories on such systems instead -default to using direct mode. +Note that the direct mode/indirect mode distinction is removed in v6 +git-annex repositories. # SEE ALSO diff --git a/doc/git-annex-init.mdwn b/doc/git-annex-init.mdwn index 145705105..29522181d 100644 --- a/doc/git-annex-init.mdwn +++ b/doc/git-annex-init.mdwn @@ -24,6 +24,13 @@ mark it as dead (see [[git-annex-dead]](1)). This command is entirely safe, although usually pointless, to run inside an already initialized git-annex repository. +# OPTIONS + +* `--version=N` + + Force the repository to be initialized using a different annex.version + than the current default. + # SEE ALSO [[git-annex]](1) diff --git a/doc/git-annex-lock.mdwn b/doc/git-annex-lock.mdwn index 4bf279fb2..b9e5d3450 100644 --- a/doc/git-annex-lock.mdwn +++ b/doc/git-annex-lock.mdwn @@ -9,7 +9,7 @@ git annex lock `[path ...]` # DESCRIPTION Use this to undo an unlock command if you don't want to modify -the files, or have made modifications you want to discard. +the files any longer, or have made modifications you want to discard. # OPTIONS diff --git a/doc/git-annex-pre-commit.mdwn b/doc/git-annex-pre-commit.mdwn index bc1e86e18..21e5aef68 100644 --- a/doc/git-annex-pre-commit.mdwn +++ b/doc/git-annex-pre-commit.mdwn @@ -12,10 +12,14 @@ This is meant to be called from git's pre-commit hook. `git annex init` automatically creates a pre-commit hook using this. Fixes up symlinks that are staged as part of a commit, to ensure they -point to annexed content. Also handles injecting changes to unlocked -files into the annex. When in a view, updates metadata to reflect changes +point to annexed content. + +When in a view, updates metadata to reflect changes made to files in the view. +When in a repository that has not been upgraded to annex.version 6, +also handles injecting changes to unlocked files into the annex. + # SEE ALSO [[git-annex]](1) diff --git a/doc/git-annex-smudge.mdwn b/doc/git-annex-smudge.mdwn new file mode 100644 index 000000000..7439c8784 --- /dev/null +++ b/doc/git-annex-smudge.mdwn @@ -0,0 +1,43 @@ +# NAME + +git-annex smudge - git filter driver for git-annex + +# SYNOPSIS + +git annex smudge [--clean] file + +# DESCRIPTION + +This command lets git-annex be used as a git filter driver which lets +annexed files in the git repository to be unlocked at all times, instead +of being symlinks. + +When adding a file with `git add`, the annex.largefiles config is +consulted to decide if a given file should be added to git as-is, +or if its content are large enough to need to use git-annex. + +The git configuration to use this command as a filter driver is as follows. +This is normally set up for you by git-annex init, so you should +not need to configure it manually. + + [filter "annex"] + smudge = git-annex smudge %f + clean = git-annex smudge --clean %f + +To make git use that filter driver, it needs to be configured in +the .gitattributes file or in `.git/config/attributes`. The latter +is normally configured when a repository is initialized, with the following +contents: + + * filter=annex + .* !filter + +# SEE ALSO + +[[git-annex]](1) + +# AUTHOR + +Joey Hess <id@joeyh.name> + +Warning: Automatically converted into a man page by mdwn2man. Edit with care. diff --git a/doc/git-annex-unlock.mdwn b/doc/git-annex-unlock.mdwn index ac8c21185..123146836 100644 --- a/doc/git-annex-unlock.mdwn +++ b/doc/git-annex-unlock.mdwn @@ -11,8 +11,16 @@ git annex unlock `[path ...]` Normally, the content of annexed files is protected from being changed. Unlocking an annexed file allows it to be modified. This replaces the symlink for each specified file with a copy of the file's content. -You can then modify it and `git annex add` (or `git commit`) to inject -it back into the annex. +You can then modify it and `git annex add` (or `git commit`) to save your +changes. + +In repositories with annex.version 5 or earlier, unlocking a file is local +to the repository, and is temporary. With version 6, unlocking a file +changes how it is stored in the git repository (from a symlink to a pointer +file), so you can commit it like any other change. Also in version 6, you +can use `git add` to add a fie to the annex in unlocked form. This allows +workflows where a file starts out unlocked, is modified as necessary, and +is locked once it reaches its final version. # OPTIONS diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index 2020ccf3f..1a2fd6e67 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -626,6 +626,14 @@ subdirectories). See [[git-annex-diffdriver]](1) for details. +* `smudge` + + This command lets git-annex be used as a git filter driver, allowing + annexed files in the git repository to be unlocked at all times, instead + of being symlinks. + + See [[git-annex-smudge]](1) for details. + * `remotedaemon` Detects when network remotes have received git pushes and fetches from them. diff --git a/doc/todo/smudge.mdwn b/doc/todo/smudge.mdwn index aea0c9b98..a62e19f68 100644 --- a/doc/todo/smudge.mdwn +++ b/doc/todo/smudge.mdwn @@ -158,7 +158,8 @@ Using git-annex on a crippled filesystem that does not support symlinks. Data: * An annex pointer file has as its first line the git-annex key - that it's standing in for. Subsequent lines of the file might + that it's standing in for (prefixed with "annex/objects/", similar to + an annex symlink target). Subsequent lines of the file might be a message saying that the file's content is not currently available. An annex pointer file is checked into the git repository the same way that an annex symlink is checked in. @@ -177,8 +178,8 @@ Configuration: the annex. Other files are passed through the smudge/clean as-is and have their contents stored in git. -* annex.direct is repurposed to configure how the assistant adds files. - When set to true, they're added unlocked. +* annex.direct is repurposed to configure how git-annex adds files. + When set to false, it adds symlinks and when true it adds pointer files. git-annex clean: @@ -232,15 +233,11 @@ git annex lock/unlock: transition repositories to using pointers, and a cleaner unlock/lock for repos using symlinks. - unlock will stage a pointer file, and will copy the content of the object - out of .git/annex/objects to the work tree file. (Might want a --hardlink - switch.) + unlock will stage a pointer file, and will link the content of the object + from .git/annex/objects to the work tree file. - lock will replace the current work tree file with the symlink, and stage it. - Note that multiple work tree files could point to the same object. - So, if the link count is > 1, replace the annex object with a copy of - itself to break such a hard link. Always finish by locking down the - permissions of the annex object. + lock will replace the current work tree file with the symlink, and stage it, + and lock down the permissions of the annex object. #### file map @@ -248,7 +245,8 @@ The file map needs to map from `Key -> [File]`. `File -> Key` seems useful to have, but in practice is not worthwhile. Drop and get operations need to know what files in the work tree use a -given key in order to update the work tree. +given key in order to update the work tree. And, we don't want to +overwrite a work tree file if it's been modified when dropping or getting. git-annex commands that look at annex symlinks to get keys to act on will need fall back to either consulting the file map, or looking at the staged @@ -275,13 +273,14 @@ In particular: * Is the smudge filter called at any other time? Seems unlikely but then there could be situations with a detached work tree or such. * Does git call any useful hooks when removing a file from the work tree, - or converting it to not be annexed? + or converting it to not be annexed, or for `git mv` of an annexed file? No! From this analysis, any file map generated by the smudge/clean filters is necessary potentially innaccurate. It may list deleted files. It may or may not reflect current unstaged changes from the work tree. + Follows that any use of the file map needs to verify the info from it, and throw out bad cached info (updating the map to match reality). @@ -306,17 +305,71 @@ just look at the repo content in the first place.. annex.version changes to 6 -Upgrade should be handled automatically. +git config for filter.annex.smudge and filter.annex.clean is set up. -On upgrade, update .gitattributes with a stock configuration, unless -it already mentions "filter=annex". +.gitattributes is updated with a stock configuration, +unless it already mentions "filter=annex". Upgrading a direct mode repo needs to switch it out of bare mode, and needs to run `git annex unlock` on all files (or reach the same result). So will need to stage changes to all annexed files. When a repo has some clones indirect and some direct, the upgraded repo -will have all files unlocked, necessarily in all clones. +will have all files unlocked, necessarily in all clones. This happens +automatically, because when the direct repos are upgraded that causes the +files to be unlocked, while the indirect upgrades don't touch the files. + +#### implementation todo list + +* Still a few test suite failues for v6 with locked files. +* Test suite should make pass for v6 with unlocked files. +* Reconcile staged changes into the associated files database, whenever + the database is queried. This is needed to handle eg: + git add largefile + git mv largefile othername + git annex move othername --to foo + # fails to drop content from associated file othername, + # because it doesn't know it has that name + # git commit clears up this mess +* Interaction with shared clones. Should avoid hard linking from/to a + object in a shared clone if either repository has the object unlocked. + (And should avoid unlocking an object if it's hard linked to a shared clone, + but that's already accomplished because it avoids unlocking an object if + it's hard linked at all) +* Make automatic merge conflict resolution work for pointer files. + - Should probably automatically handle merge conflicts between annex + symlinks and pointer files too. Maybe by always resulting in a pointer + file, since the symlinks don't work everwhere. +* Crippled filesystem should cause all files to be transparently unlocked. + Note that this presents problems when dealing with merge conflicts and + when pushing changes committed in such a repo. Ideally, should avoid + committing implicit unlocks, or should prevent such commits leaking out + in pushes. +* Dropping a smudged file causes git status (and git annex status) + to show it as modified, because the timestamp has changed. + Getting a smudged file can also cause this. + Upgrading a direct mode repo also leaves files in this state. + User can use `git add` to clear it up, but better to avoid this, + by updating stat info in the index. + (May need to use libgit2 to do this, cannot find + any plumbing except git-update-index, which is very inneficient for + smudged files.) +* Audit code for all uses of isDirect. These places almost always need + adjusting to support v6, if they haven't already. +* Optimisation: See if the database schema can be improved to speed things + up. Are there enough indexes? getAssociatedKey in particular does a + reverse lookup and might benefit from an index. +* Optimisation: Reads from the Keys database avoid doing anything if the + database doesn't exist. This makes v5 repos, or v6 with all locked files + faster. However, if a v6 repo unlocks and then re-locks a file, its + database will exist, and so this optimisation will no longer apply. + Could try to detect when the database is empty, and remove it or avoid reads. + +* Eventually (but not yet), make v6 the default for new repositories. + Note that the assistant forces repos into direct mode; that will need to + be changed then. +* Later still, remove support for direct mode, and enable automatic + v5 to v6 upgrades. ---- diff --git a/doc/upgrades.mdwn b/doc/upgrades.mdwn index f5e9cbc3a..9d30c2f14 100644 --- a/doc/upgrades.mdwn +++ b/doc/upgrades.mdwn @@ -43,6 +43,46 @@ conflicts first before upgrading git-annex. The upgrade events, so far: +## v5 -> v6 (git-annex version 6.x) + +The upgrade from v5 to v6 is handled manually. Run `git-annex upgrade` +perform the upgrade. + +Warning: All places that a direct mode repository is cloned to should be +running git-annex version 6.x before you upgrade the repository. +This is necessary because the contents of the repository are changed +in the upgrade, and the old version of git-annex won't be able to +access files after the repo is upgraded. + +This upgrade does away with the direct mode/indirect mode distinction. +A v6 git-annex repository can have some files locked and other files +unlocked, and all git and git-annex commands can be used on both locked and +unlocked files. (Although for locked files to work, the filesystem +must support symbolic links..) + +The behavior of some commands changes in an upgraded repository: + +* `git add` will add files to the annex, in unlocked mode, rather than + adding them directly to the git repository. To cause some files to be + added directly to git, you can configure `annex.largefiles`. For + example: + + git config annex.largefiles "largerthan=100kb and not (include=*.c or include=*.h)" + +* `git annex unlock` and `git annex lock` change how the pointer to + the annexed content is stored in git. + +If a repository is only used in indirect mode, you can use git-annex +v5 and v6 in different clones of the same indirect mode repository without +problems. + +On upgrade, all files in a direct mode repository will be converted to +unlocked files. The upgrade will stage changes to all annexed files in +the git repository, which you can then commit. + +If a repository has some clones using direct mode and some using indirect +mode, all the files will end up unlocked in all clones after the upgrade. + ## v4 -> v5 (git-annex version 5.x) The upgrade from v4 to v5 is handled |