diff options
66 files changed, 1593 insertions, 463 deletions
diff --git a/Annex/AutoMerge.hs b/Annex/AutoMerge.hs index bfbe71dc2..c32c3f66a 100644 --- a/Annex/AutoMerge.hs +++ b/Annex/AutoMerge.hs @@ -25,7 +25,6 @@ import qualified Git.Branch import Git.Types (BlobType(..)) import Config import Annex.ReplaceFile -import Git.FileMode import Annex.VariantFile import qualified Data.Set as S @@ -135,7 +134,7 @@ resolveMerge' (Just us) them u = do | select (LsFiles.unmergedBlobType u) == Just SymlinkBlob = case select' (LsFiles.unmergedSha u) of Nothing -> return Nothing - Just sha -> catKey sha symLinkMode + Just sha -> catKey sha | otherwise = return Nothing makelink key = do @@ -174,7 +173,7 @@ resolveMerge' (Just us) them u = do case select' (LsFiles.unmergedSha u) of Nothing -> noop Just sha -> do - link <- catLink True sha + link <- catSymLinkTarget sha replacewithlink item link resolveby a = do diff --git a/Annex/CatFile.hs b/Annex/CatFile.hs index 179149844..aefccd424 100644 --- a/Annex/CatFile.hs +++ b/Annex/CatFile.hs @@ -1,6 +1,6 @@ {- git cat-file interface, with handle automatically stored in the Annex monad - - - Copyright 2011-2013 Joey Hess <id@joeyh.name> + - Copyright 2011-2015 Joey Hess <id@joeyh.name> - - Licensed under the GNU GPL version 3 or higher. -} @@ -16,7 +16,7 @@ module Annex.CatFile ( catKey, catKeyFile, catKeyFileHEAD, - catLink, + catSymLinkTarget, ) where import qualified Data.ByteString.Lazy as L @@ -29,8 +29,8 @@ import qualified Git.CatFile import qualified Annex import Git.Types import Git.FilePath -import Git.FileMode import qualified Git.Ref +import Annex.Link catFile :: Git.Branch -> FilePath -> Annex L.ByteString catFile branch file = do @@ -80,52 +80,17 @@ catFileStop = do (s { Annex.catfilehandles = M.empty }, Annex.catfilehandles s) liftIO $ mapM_ Git.CatFile.catFileStop (M.elems m) -{- From the Sha or Ref of a symlink back to the key. - - - - Requires a mode witness, to guarantee that the file is a symlink. - -} -catKey :: Ref -> FileMode -> Annex (Maybe Key) -catKey = catKey' True - -catKey' :: Bool -> Sha -> FileMode -> Annex (Maybe Key) -catKey' modeguaranteed sha mode - | isSymLink mode = do - l <- catLink modeguaranteed sha - return $ if isLinkToAnnex l - then fileKey $ takeFileName l - else Nothing - | otherwise = return Nothing +{- From ref to a symlink or a pointer file, get the key. -} +catKey :: Ref -> Annex (Maybe Key) +catKey ref = parseLinkOrPointer <$> catObject ref {- Gets a symlink target. -} -catLink :: Bool -> Sha -> Annex String -catLink modeguaranteed sha = fromInternalGitPath . decodeBS <$> get - where - -- If the mode is not guaranteed to be correct, avoid - -- buffering the whole file content, which might be large. - -- 8192 is enough if it really is a symlink. - get - | modeguaranteed = catObject sha - | otherwise = L.take 8192 <$> catObject sha - -{- Looks up the key corresponding to the Ref using the running cat-file. - - - - Currently this always has to look in HEAD, because cat-file --batch - - does not offer a way to specify that we want to look up a tree object - - in the index. So if the index has a file staged not as a symlink, - - and it is a symlink in head, the wrong mode is gotten. - - Also, we have to assume the file is a symlink if it's not yet committed - - to HEAD. For these reasons, modeguaranteed is not set. - -} -catKeyChecked :: Bool -> Ref -> Annex (Maybe Key) -catKeyChecked needhead ref@(Ref r) = - catKey' False ref =<< findmode <$> catTree treeref +catSymLinkTarget :: Sha -> Annex String +catSymLinkTarget sha = fromInternalGitPath . decodeBS <$> get where - pathparts = split "/" r - dir = intercalate "/" $ take (length pathparts - 1) pathparts - file = fromMaybe "" $ lastMaybe pathparts - treeref = Ref $ if needhead then "HEAD" ++ dir ++ "/" else dir ++ "/" - findmode = fromMaybe symLinkMode . headMaybe . - map snd . filter (\p -> fst p == file) + -- Avoid buffering the whole file content, which might be large. + -- 8192 is enough if it really is a symlink or pointer file. + get = L.take 8192 <$> catObject sha {- From a file in the repository back to the key. - @@ -151,8 +116,8 @@ catKeyChecked needhead ref@(Ref r) = catKeyFile :: FilePath -> Annex (Maybe Key) catKeyFile f = ifM (Annex.getState Annex.daemon) ( catKeyFileHEAD f - , catKeyChecked True $ Git.Ref.fileRef f + , catKey $ Git.Ref.fileRef f ) catKeyFileHEAD :: FilePath -> Annex (Maybe Key) -catKeyFileHEAD f = catKeyChecked False $ Git.Ref.fileFromRef Git.Ref.headRef f +catKeyFileHEAD f = catKey $ Git.Ref.fileFromRef Git.Ref.headRef f diff --git a/Annex/Content.hs b/Annex/Content.hs index 60ffb8141..4cd2b0259 100644 --- a/Annex/Content.hs +++ b/Annex/Content.hs @@ -1,6 +1,6 @@ {- git-annex file content managing - - - Copyright 2010-2014 Joey Hess <id@joeyh.name> + - Copyright 2010-2015 Joey Hess <id@joeyh.name> - - Licensed under the GNU GPL version 3 or higher. -} @@ -24,6 +24,11 @@ module Annex.Content ( withTmp, checkDiskSpace, moveAnnex, + linkAnnex, + linkAnnex', + LinkAnnexResult(..), + unlinkAnnex, + checkedCopyFile, sendAnnex, prepSendAnnex, removeAnnex, @@ -38,6 +43,7 @@ module Annex.Content ( dirKeys, withObjectLoc, staleKeysPrune, + isUnmodified, ) where import System.IO.Unsafe (unsafeInterleaveIO) @@ -61,15 +67,19 @@ import Config import Git.SharedRepository import Annex.Perms import Annex.Link -import Annex.Content.Direct +import qualified Annex.Content.Direct as Direct import Annex.ReplaceFile import Annex.LockPool import Messages.Progress import qualified Types.Remote import qualified Types.Backend import qualified Backend +import qualified Database.Keys import Types.NumCopies import Annex.UUID +import Annex.InodeSentinal +import Utility.InodeCache +import Utility.PosixFiles {- Checks if a given key's content is currently present. -} inAnnex :: Key -> Annex Bool @@ -79,7 +89,10 @@ inAnnex key = inAnnexCheck key $ liftIO . doesFileExist inAnnexCheck :: Key -> (FilePath -> Annex Bool) -> Annex Bool inAnnexCheck key check = inAnnex' id False check key -{- Generic inAnnex, handling both indirect and direct mode. +{- inAnnex that performs an arbitrary check of the key's content. + - + - When the content is unlocked, it must also be unmodified, or the bad + - value will be returned. - - In direct mode, at least one of the associated files must pass the - check. Additionally, the file must be unmodified. @@ -88,14 +101,22 @@ inAnnex' :: (a -> Bool) -> a -> (FilePath -> Annex a) -> Key -> Annex a inAnnex' isgood bad check key = withObjectLoc key checkindirect checkdirect where checkindirect loc = do - whenM (fromRepo Git.repoIsUrl) $ - error "inAnnex cannot check remote repo" - check loc + r <- check loc + if isgood r + then do + cache <- Database.Keys.getInodeCaches key + if null cache + then return r + else ifM (sameInodeCache loc cache) + ( return r + , return bad + ) + else return bad checkdirect [] = return bad checkdirect (loc:locs) = do r <- check loc if isgood r - then ifM (goodContent key loc) + then ifM (Direct.goodContent key loc) ( return r , checkdirect locs ) @@ -371,7 +392,7 @@ withTmp key action = do return res {- Checks that there is disk space available to store a given key, - - in a destination (or the annex) printing a warning if not. + - in a destination directory (or the annex) printing a warning if not. - - If the destination is on the same filesystem as the annex, - checks for any other running downloads, removing the amount of data still @@ -379,7 +400,12 @@ withTmp key action = do - when doing concurrent downloads. -} checkDiskSpace :: Maybe FilePath -> Key -> Integer -> Bool -> Annex Bool -checkDiskSpace destdir key alreadythere samefilesystem = ifM (Annex.getState Annex.force) +checkDiskSpace destdir key = checkDiskSpace' (fromMaybe 1 (keySize key)) destdir key + +{- Allows specifying the size of the key, if it's known, which is useful + - as not all keys know their size. -} +checkDiskSpace' :: Integer -> Maybe FilePath -> Key -> Integer -> Bool -> Annex Bool +checkDiskSpace' need destdir key alreadythere samefilesystem = ifM (Annex.getState Annex.force) ( return True , do -- We can't get inprogress and free at the same @@ -392,8 +418,8 @@ checkDiskSpace destdir key alreadythere samefilesystem = ifM (Annex.getState Ann then sizeOfDownloadsInProgress (/= key) else pure 0 free <- liftIO . getDiskFree =<< dir - case (free, fromMaybe 1 (keySize key)) of - (Just have, need) -> do + case free of + Just have -> do reserve <- annexDiskReserve <$> Annex.getGitConfig let delta = need + reserve - have - alreadythere + inprogress let ok = delta <= 0 @@ -412,7 +438,10 @@ checkDiskSpace destdir key alreadythere samefilesystem = ifM (Annex.getState Ann {- Moves a key's content into .git/annex/objects/ - - - In direct mode, moves it to the associated file, or files. + - When a key has associated pointer files, the object is hard + - linked (or copied) to the files, and the object file is left thawed. + + - In direct mode, moves the object file to the associated file, or files. - - What if the key there already has content? This could happen for - various reasons; perhaps the same content is being annexed again. @@ -440,7 +469,12 @@ moveAnnex key src = withObjectLoc key storeobject storedirect ( alreadyhave , modifyContent dest $ do liftIO $ moveFile src dest - freezeContent dest + fs <- Database.Keys.getAssociatedFiles key + if null fs + then freezeContent dest + else do + mapM_ (populatePointerFile key dest) fs + Database.Keys.storeInodeCaches key (dest:fs) ) storeindirect = storeobject =<< calcRepo (gitAnnexLocation key) @@ -458,21 +492,103 @@ moveAnnex key src = withObjectLoc key storeobject storedirect v <- isAnnexLink f if Just key == v then do - updateInodeCache key src + Direct.updateInodeCache key src replaceFile f $ liftIO . moveFile src chmodContent f forM_ fs $ - addContentWhenNotPresent key f - else ifM (goodContent key f) + Direct.addContentWhenNotPresent key f + else ifM (Direct.goodContent key f) ( storedirect' alreadyhave fs , storedirect' fallback fs ) alreadyhave = liftIO $ removeFile src +populatePointerFile :: Key -> FilePath -> FilePath -> Annex () +populatePointerFile k obj f = go =<< isPointerFile f + where + go (Just k') | k == k' = do + liftIO $ nukeFile f + unlessM (linkAnnex'' k obj f) $ + liftIO $ writeFile f (formatPointer k) + go _ = return () + +{- Hard links a file into .git/annex/objects/, falling back to a copy + - if necessary. Does nothing if the object file already exists. + - + - Does not lock down the hard linked object, so that the user can modify + - the source file. So, adding an object to the annex this way can + - prevent losing the content if the source file is deleted, but does not + - guard against modifications. + -} +linkAnnex :: Key -> FilePath -> Annex LinkAnnexResult +linkAnnex key src = do + dest <- calcRepo (gitAnnexLocation key) + modifyContent dest $ linkAnnex' key src dest + +{- Hard links (or copies) src to dest, one of which should be the + - annex object. Updates inode cache for src and for dest when it's + - changed. -} +linkAnnex' :: Key -> FilePath -> FilePath -> Annex LinkAnnexResult +linkAnnex' key src dest = + ifM (liftIO $ doesFileExist dest) + ( do + Database.Keys.storeInodeCaches key [src] + return LinkAnnexNoop + , ifM (linkAnnex'' key src dest) + ( do + thawContent dest + Database.Keys.storeInodeCaches key [dest, src] + return LinkAnnexOk + , do + Database.Keys.storeInodeCaches key [src] + return LinkAnnexFailed + ) + ) + +data LinkAnnexResult = LinkAnnexOk | LinkAnnexFailed | LinkAnnexNoop + +{- Hard links or copies src to dest. Only uses a hard link if src + - is not already hardlinked to elsewhere. Checks disk reserve before + - copying, and will fail if not enough space, or if the dest file + - already exists. -} +linkAnnex'' :: Key -> FilePath -> FilePath -> Annex Bool +linkAnnex'' key src dest = catchBoolIO $ do + s <- liftIO $ getFileStatus src + let copy = checkedCopyFile' key src dest s +#ifndef mingw32_HOST_OS + if linkCount s > 1 + then copy + else liftIO (createLink src dest >> return True) + `catchIO` const copy +#else + copy +#endif + +{- Removes the annex object file for a key. Lowlevel. -} +unlinkAnnex :: Key -> Annex () +unlinkAnnex key = do + obj <- calcRepo $ gitAnnexLocation key + modifyContent obj $ do + secureErase obj + liftIO $ nukeFile obj + +{- Checks disk space before copying. -} +checkedCopyFile :: Key -> FilePath -> FilePath -> Annex Bool +checkedCopyFile key src dest = catchBoolIO $ + checkedCopyFile' key src dest + =<< liftIO (getFileStatus src) + +checkedCopyFile' :: Key -> FilePath -> FilePath -> FileStatus -> Annex Bool +checkedCopyFile' key src dest s = catchBoolIO $ + ifM (checkDiskSpace' (fromIntegral $ fileSize s) (Just $ takeDirectory dest) key 0 True) + ( liftIO $ copyFileExternal CopyAllMetaData src dest + , return False + ) + {- Runs an action to transfer an object's content. - - - In direct mode, it's possible for the file to change as it's being sent. + - In some cases, it's possible for the file to change as it's being sent. - If this happens, runs the rollback action and returns False. The - rollback action should remove the data that was transferred. -} @@ -492,8 +608,9 @@ sendAnnex key rollback sendobject = go =<< prepSendAnnex key {- Returns a file that contains an object's content, - and a check to run after the transfer is complete. - - - In direct mode, it's possible for the file to change as it's being sent, - - and the check detects this case and returns False. + - When a file is unlocked (or in direct mode), it's possble for its + - content to change as it's being sent. The check detects this case + - and returns False. - - Note that the returned check action is, in some cases, run in the - Annex monad of the remote that is receiving the object, rather than @@ -502,10 +619,23 @@ sendAnnex key rollback sendobject = go =<< prepSendAnnex key prepSendAnnex :: Key -> Annex (Maybe (FilePath, Annex Bool)) prepSendAnnex key = withObjectLoc key indirect direct where - indirect f = return $ Just (f, return True) + indirect f = do + cache <- Database.Keys.getInodeCaches key + cache' <- if null cache + -- Since no inode cache is in the database, this + -- object is not currently unlocked. But that could + -- change while the transfer is in progress, so + -- generate an inode cache for the starting + -- content. + then maybeToList <$> + withTSDelta (liftIO . genInodeCache f) + else pure cache + return $ if null cache' + then Nothing + else Just (f, sameInodeCache f cache') direct [] = return Nothing direct (f:fs) = do - cache <- recordedInodeCache key + cache <- Direct.recordedInodeCache key -- check that we have a good file ifM (sameInodeCache f cache) ( return $ Just (f, sameInodeCache f cache) @@ -520,7 +650,7 @@ prepSendAnnex key = withObjectLoc key indirect direct withObjectLoc :: Key -> (FilePath -> Annex a) -> ([FilePath] -> Annex a) -> Annex a withObjectLoc key indirect direct = ifM isDirect ( do - fs <- associatedFiles key + fs <- Direct.associatedFiles key if null fs then goindirect else direct fs @@ -544,6 +674,9 @@ cleanObjectLoc key cleaner = do {- Removes a key's file from .git/annex/objects/ - + - When a key has associated pointer files, they are checked for + - modifications, and if unmodified, are reset. + - - In direct mode, deletes the associated files or files, and replaces - them with symlinks. -} @@ -553,16 +686,50 @@ removeAnnex (ContentRemovalLock key) = withObjectLoc key remove removedirect remove file = cleanObjectLoc key $ do secureErase file liftIO $ nukeFile file - removeInodeCache key + mapM_ (void . tryIO . resetpointer) + =<< Database.Keys.getAssociatedFiles key + Database.Keys.removeInodeCaches key + Direct.removeInodeCache key + resetpointer file = ifM (isUnmodified key file) + ( do + secureErase file + liftIO $ nukeFile file + liftIO $ writeFile file (formatPointer key) + -- Can't delete the pointer file. + -- If it was a hard link to the annex object, + -- that object might have been frozen as part of the + -- removal process, so thaw it. + , void $ tryIO $ thawContent file + ) removedirect fs = do - cache <- recordedInodeCache key - removeInodeCache key + cache <- Direct.recordedInodeCache key + Direct.removeInodeCache key mapM_ (resetfile cache) fs - resetfile cache f = whenM (sameInodeCache f cache) $ do + resetfile cache f = whenM (Direct.sameInodeCache f cache) $ do l <- calcRepo $ gitAnnexLink f key secureErase f replaceFile f $ makeAnnexLink l +{- Check if a file contains the unmodified content of the key. + - + - The expensive way to tell is to do a verification of its content. + - The cheaper way is to see if the InodeCache for the key matches the + - file. -} +isUnmodified :: Key -> FilePath -> Annex Bool +isUnmodified key f = go =<< geti + where + go Nothing = return False + go (Just fc) = cheapcheck fc <||> expensivecheck fc + cheapcheck fc = anyM (compareInodeCaches fc) + =<< Database.Keys.getInodeCaches key + expensivecheck fc = ifM (verifyKeyContent AlwaysVerify Types.Remote.UnVerified key f) + -- The file could have been modified while it was + -- being verified. Detect that. + ( geti >>= maybe (return False) (compareInodeCaches fc) + , return False + ) + geti = withTSDelta (liftIO . genInodeCache f) + {- Runs the secure erase command if set, otherwise does nothing. - File may or may not be deleted at the end; caller is responsible for - making sure it's deleted. -} @@ -586,13 +753,14 @@ moveBad key = do logStatus key InfoMissing return dest -data KeyLocation = InAnnex | InRepository +data KeyLocation = InAnnex | InRepository | InAnywhere {- List of keys whose content exists in the specified location. - - InAnnex only lists keys under .git/annex/objects, - - while InRepository, in direct mode, also finds keys located in the - - work tree. + - InAnnex only lists keys with content in .git/annex/objects, + - while InRepository, in direct mode, also finds keys with content + - in the work tree. InAnywhere lists all keys that have directories + - in .git/annex/objects, whether or not the content is present. - - Note that InRepository has to check whether direct mode files - have goodContent. @@ -621,6 +789,11 @@ getKeysPresent keyloc = do morekeys <- unsafeInterleaveIO a continue (morekeys++keys) as + inanywhere = case keyloc of + InAnywhere -> True + _ -> False + + present _ _ _ | inanywhere = pure True present _ False d = presentInAnnex d present s True d = presentDirect s d <||> presentInAnnex d @@ -632,7 +805,8 @@ getKeysPresent keyloc = do InRepository -> case fileKey (takeFileName d) of Nothing -> return False Just k -> Annex.eval s $ - anyM (goodContent k) =<< associatedFiles k + anyM (Direct.goodContent k) =<< Direct.associatedFiles k + InAnywhere -> return True {- In order to run Annex monad actions within unsafeInterleaveIO, - the current state is taken and reused. No changes made to this diff --git a/Annex/Content/Direct.hs b/Annex/Content/Direct.hs index 86e053d7f..59bea8f99 100644 --- a/Annex/Content/Direct.hs +++ b/Annex/Content/Direct.hs @@ -1,12 +1,13 @@ {- git-annex file content managing for direct mode - + - This is deprecated, and will be removed when direct mode gets removed + - from git-annex. + - - Copyright 2012-2014 Joey Hess <id@joeyh.name> - - Licensed under the GNU GPL version 3 or higher. -} -{-# LANGUAGE CPP #-} - module Annex.Content.Direct ( associatedFiles, associatedFilesRelative, @@ -26,15 +27,10 @@ module Annex.Content.Direct ( sameFileStatus, removeInodeCache, toInodeCache, - inodesChanged, - createInodeSentinalFile, addContentWhenNotPresent, - withTSDelta, - getTSDelta, ) where import Common.Annex -import qualified Annex import Annex.Perms import qualified Git import Utility.Tmp @@ -43,6 +39,7 @@ import Utility.InodeCache import Utility.CopyFile import Annex.ReplaceFile import Annex.Link +import Annex.InodeSentinal {- Absolute FilePaths of Files in the tree that are associated with a key. -} associatedFiles :: Key -> Annex [FilePath] @@ -165,14 +162,6 @@ removeInodeCache key = withInodeCacheFile key $ \f -> withInodeCacheFile :: Key -> (FilePath -> Annex a) -> Annex a withInodeCacheFile key a = a =<< calcRepo (gitAnnexInodeCache key) -{- Checks if a InodeCache matches the current version of a file. -} -sameInodeCache :: FilePath -> [InodeCache] -> Annex Bool -sameInodeCache _ [] = return False -sameInodeCache file old = go =<< withTSDelta (liftIO . genInodeCache file) - where - go Nothing = return False - go (Just curr) = elemInodeCaches curr old - {- Checks if a FileStatus matches the recorded InodeCache of a file. -} sameFileStatus :: Key -> FilePath -> FileStatus -> Annex Bool sameFileStatus key f status = do @@ -183,22 +172,6 @@ sameFileStatus key f status = do ([], Nothing) -> return True _ -> return False -{- If the inodes have changed, only the size and mtime are compared. -} -compareInodeCaches :: InodeCache -> InodeCache -> Annex Bool -compareInodeCaches x y - | compareStrong x y = return True - | otherwise = ifM inodesChanged - ( return $ compareWeak x y - , return False - ) - -elemInodeCaches :: InodeCache -> [InodeCache] -> Annex Bool -elemInodeCaches _ [] = return False -elemInodeCaches c (l:ls) = ifM (compareInodeCaches c l) - ( return True - , elemInodeCaches c ls - ) - compareInodeCachesWith :: Annex InodeComparisonType compareInodeCachesWith = ifM inodesChanged ( return Weakly, return Strongly ) @@ -212,52 +185,3 @@ addContentWhenNotPresent key contentfile associatedfile = do replaceFile associatedfile $ liftIO . void . copyFileExternal CopyAllMetaData contentfile updateInodeCache key associatedfile - -{- Some filesystems get new inodes each time they are mounted. - - In order to work on such a filesystem, a sentinal file is used to detect - - when the inodes have changed. - - - - If the sentinal file does not exist, we have to assume that the - - inodes have changed. - -} -inodesChanged :: Annex Bool -inodesChanged = sentinalInodesChanged <$> sentinalStatus - -withTSDelta :: (TSDelta -> Annex a) -> Annex a -withTSDelta a = a =<< getTSDelta - -getTSDelta :: Annex TSDelta -#ifdef mingw32_HOST_OS -getTSDelta = sentinalTSDelta <$> sentinalStatus -#else -getTSDelta = pure noTSDelta -- optimisation -#endif - -sentinalStatus :: Annex SentinalStatus -sentinalStatus = maybe check return =<< Annex.getState Annex.sentinalstatus - where - check = do - sc <- liftIO . checkSentinalFile =<< annexSentinalFile - Annex.changeState $ \s -> s { Annex.sentinalstatus = Just sc } - return sc - -{- The sentinal file is only created when first initializing a repository. - - If there are any annexed objects in the repository already, creating - - the file would invalidate their inode caches. -} -createInodeSentinalFile :: Annex () -createInodeSentinalFile = unlessM (alreadyexists <||> hasobjects) $ do - s <- annexSentinalFile - createAnnexDirectory (parentDir (sentinalFile s)) - liftIO $ writeSentinalFile s - where - alreadyexists = liftIO. sentinalFileExists =<< annexSentinalFile - hasobjects = liftIO . doesDirectoryExist =<< fromRepo gitAnnexObjectDir - -annexSentinalFile :: Annex SentinalFile -annexSentinalFile = do - sentinalfile <- fromRepo gitAnnexInodeSentinal - sentinalcachefile <- fromRepo gitAnnexInodeSentinalCache - return SentinalFile - { sentinalFile = sentinalfile - , sentinalCacheFile = sentinalcachefile - } diff --git a/Annex/Direct.hs b/Annex/Direct.hs index 495ff5e75..8c3d5bb56 100644 --- a/Annex/Direct.hs +++ b/Annex/Direct.hs @@ -1,5 +1,8 @@ {- git-annex direct mode - + - This is deprecated, and will be removed when direct mode gets removed + - from git-annex. + - - Copyright 2012-2014 Joey Hess <id@joeyh.name> - - Licensed under the GNU GPL version 3 or higher. @@ -36,6 +39,7 @@ import Annex.VariantFile import Git.Index import Annex.Index import Annex.LockFile +import Annex.InodeSentinal {- Uses git ls-files to find files that need to be committed, and stages - them into the index. Returns True if some changes were staged. -} @@ -53,8 +57,8 @@ stageDirect = do {- Determine what kind of modified or deleted file this is, as - efficiently as we can, by getting any key that's associated - with it in git, as well as its stat info. -} - go (file, Just sha, Just mode) = withTSDelta $ \delta -> do - shakey <- catKey sha mode + go (file, Just sha, Just _mode) = withTSDelta $ \delta -> do + shakey <- catKey sha mstat <- liftIO $ catchMaybeIO $ getSymbolicLinkStatus file mcache <- liftIO $ maybe (pure Nothing) (toInodeCache delta file) mstat filekey <- isAnnexLink file @@ -107,8 +111,8 @@ preCommitDirect = do withkey (DiffTree.srcsha diff) (DiffTree.srcmode diff) removeAssociatedFile withkey (DiffTree.dstsha diff) (DiffTree.dstmode diff) addAssociatedFile where - withkey sha mode a = when (sha /= nullSha) $ do - k <- catKey sha mode + withkey sha _mode a = when (sha /= nullSha) $ do + k <- catKey sha case k of Nothing -> noop Just key -> void $ a key $ @@ -256,16 +260,16 @@ updateWorkTree d oldref force = do makeabs <- flip fromTopFilePath <$> gitRepo let fsitems = zip (map (makeabs . DiffTree.file) items) items forM_ fsitems $ - go makeabs DiffTree.srcsha DiffTree.srcmode moveout moveout_raw + go makeabs DiffTree.srcsha moveout moveout_raw forM_ fsitems $ - go makeabs DiffTree.dstsha DiffTree.dstmode movein movein_raw + go makeabs DiffTree.dstsha movein movein_raw void $ liftIO cleanup where - go makeabs getsha getmode a araw (f, item) + go makeabs getsha a araw (f, item) | getsha item == nullSha = noop | otherwise = void $ tryNonAsync . maybe (araw item makeabs f) (\k -> void $ a item makeabs k f) - =<< catKey (getsha item) (getmode item) + =<< catKey (getsha item) moveout _ _ = removeDirect @@ -395,7 +399,7 @@ changedDirect oldk f = do whenM (pure (null locs) <&&> not <$> inAnnex oldk) $ logStatus oldk InfoMissing -{- Enable/disable direct mode. -} +{- Git config settings to enable/disable direct mode. -} setDirect :: Bool -> Annex () setDirect wantdirect = do if wantdirect diff --git a/Annex/FileMatcher.hs b/Annex/FileMatcher.hs index 8b0db60ad..a008198f3 100644 --- a/Annex/FileMatcher.hs +++ b/Annex/FileMatcher.hs @@ -14,7 +14,6 @@ import Limit import Utility.Matcher import Types.Group import Logs.Group -import Logs.Remote import Annex.UUID import qualified Annex import Types.FileMatcher @@ -53,8 +52,8 @@ parsedToMatcher parsed = case partitionEithers parsed of ([], vs) -> Right $ generate vs (es, _) -> Left $ unwords $ map ("Parse failure: " ++) es -exprParser :: FileMatcher Annex -> FileMatcher Annex -> GroupMap -> M.Map UUID RemoteConfig -> Maybe UUID -> String -> [Either String (Token (MatchFiles Annex))] -exprParser matchstandard matchgroupwanted groupmap configmap mu expr = +exprParser :: FileMatcher Annex -> FileMatcher Annex -> Annex GroupMap -> M.Map UUID RemoteConfig -> Maybe UUID -> String -> [Either String (Token (MatchFiles Annex))] +exprParser matchstandard matchgroupwanted getgroupmap configmap mu expr = map parse $ tokenizeMatcher expr where parse = parseToken @@ -62,12 +61,12 @@ exprParser matchstandard matchgroupwanted groupmap configmap mu expr = matchgroupwanted (limitPresent mu) (limitInDir preferreddir) - groupmap + getgroupmap preferreddir = fromMaybe "public" $ M.lookup "preferreddir" =<< (`M.lookup` configmap) =<< mu -parseToken :: FileMatcher Annex -> FileMatcher Annex -> MkLimit Annex -> MkLimit Annex -> GroupMap -> String -> Either String (Token (MatchFiles Annex)) -parseToken matchstandard matchgroupwanted checkpresent checkpreferreddir groupmap t +parseToken :: FileMatcher Annex -> FileMatcher Annex -> MkLimit Annex -> MkLimit Annex -> Annex GroupMap -> String -> Either String (Token (MatchFiles Annex)) +parseToken matchstandard matchgroupwanted checkpresent checkpreferreddir getgroupmap t | t `elem` tokens = Right $ token t | t == "standard" = call matchstandard | t == "groupwanted" = call matchgroupwanted @@ -86,7 +85,7 @@ parseToken matchstandard matchgroupwanted checkpresent checkpreferreddir groupma , ("largerthan", limitSize (>)) , ("smallerthan", limitSize (<)) , ("metadata", limitMetaData) - , ("inallgroup", limitInAllGroup groupmap) + , ("inallgroup", limitInAllGroup getgroupmap) ] where (k, v) = separate (== '=') t @@ -109,9 +108,12 @@ largeFilesMatcher = go =<< annexLargeFiles <$> Annex.getGitConfig where go Nothing = return matchAll go (Just expr) = do - gm <- groupMap - rc <- readRemoteLog u <- getUUID + -- No need to read remote configs, that's only needed for + -- inpreferreddir, which is used in preferred content + -- expressions but does not make sense in the + -- annex.largefiles expression. + let emptyconfig = M.empty either badexpr return $ - parsedToMatcher $ exprParser matchAll matchAll gm rc (Just u) expr + parsedToMatcher $ exprParser matchAll matchAll groupMap emptyconfig (Just u) expr badexpr e = error $ "bad annex.largefiles configuration: " ++ e diff --git a/Annex/Init.hs b/Annex/Init.hs index 65e9aa474..99bb03e92 100644 --- a/Annex/Init.hs +++ b/Annex/Init.hs @@ -29,12 +29,12 @@ import Types.TrustLevel import Annex.Version import Annex.Difference import Annex.UUID +import Annex.Link import Config import Annex.Direct -import Annex.Content.Direct import Annex.Environment -import Backend import Annex.Hook +import Annex.InodeSentinal import Upgrade #ifndef mingw32_HOST_OS import Utility.UserInfo @@ -57,8 +57,8 @@ genDescription Nothing = do return $ concat [hostname, ":", reldir] #endif -initialize :: Maybe String -> Annex () -initialize mdescription = do +initialize :: Maybe String -> Maybe Version -> Annex () +initialize mdescription mversion = do {- Has to come before any commits are made as the shared - clone heuristic expects no local objects. -} sharedclone <- checkSharedClone @@ -68,7 +68,7 @@ initialize mdescription = do ensureCommit $ Annex.Branch.create prepUUID - initialize' + initialize' mversion initSharedClone sharedclone @@ -77,15 +77,18 @@ initialize mdescription = do -- Everything except for uuid setup, shared clone setup, and initial -- description. -initialize' :: Annex () -initialize' = do +initialize' :: Maybe Version -> Annex () +initialize' mversion = do checkLockSupport checkFifoSupport checkCrippledFileSystem unlessM isBare $ hookWrite preCommitHook setDifferences - setVersion supportedVersion + unlessM (isJust <$> getVersion) $ + setVersion (fromMaybe defaultVersion mversion) + whenM versionSupportsUnlockedPointers + configureSmudgeFilter ifM (crippledFileSystem <&&> not <$> isBare) ( do enableDirectMode @@ -95,7 +98,7 @@ initialize' = do , unlessM isBare switchHEADBack ) - createInodeSentinalFile + createInodeSentinalFile False uninitialize :: Annex () uninitialize = do @@ -114,7 +117,7 @@ ensureInitialized :: Annex () ensureInitialized = getVersion >>= maybe needsinit checkUpgrade where needsinit = ifM Annex.Branch.hasSibling - ( initialize Nothing + ( initialize Nothing Nothing , error "First run: git-annex init" ) diff --git a/Annex/InodeSentinal.hs b/Annex/InodeSentinal.hs new file mode 100644 index 000000000..8b48094df --- /dev/null +++ b/Annex/InodeSentinal.hs @@ -0,0 +1,93 @@ +{- git-annex inode sentinal file + - + - Copyright 2012-2015 Joey Hess <id@joeyh.name> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +{-# LANGUAGE CPP #-} + +module Annex.InodeSentinal where + +import Common.Annex +import qualified Annex +import Utility.InodeCache +import Annex.Perms + +{- If the sendinal shows the inodes have changed, only the size and mtime + - are compared. -} +compareInodeCaches :: InodeCache -> InodeCache -> Annex Bool +compareInodeCaches x y + | compareStrong x y = return True + | otherwise = ifM inodesChanged + ( return $ compareWeak x y + , return False + ) + +{- Checks if one of the provided old InodeCache matches the current + - version of a file. -} +sameInodeCache :: FilePath -> [InodeCache] -> Annex Bool +sameInodeCache _ [] = return False +sameInodeCache file old = go =<< withTSDelta (liftIO . genInodeCache file) + where + go Nothing = return False + go (Just curr) = elemInodeCaches curr old + +elemInodeCaches :: InodeCache -> [InodeCache] -> Annex Bool +elemInodeCaches _ [] = return False +elemInodeCaches c (l:ls) = ifM (compareInodeCaches c l) + ( return True + , elemInodeCaches c ls + ) + +{- Some filesystems get new inodes each time they are mounted. + - In order to work on such a filesystem, a sentinal file is used to detect + - when the inodes have changed. + - + - If the sentinal file does not exist, we have to assume that the + - inodes have changed. + -} +inodesChanged :: Annex Bool +inodesChanged = sentinalInodesChanged <$> sentinalStatus + +withTSDelta :: (TSDelta -> Annex a) -> Annex a +withTSDelta a = a =<< getTSDelta + +getTSDelta :: Annex TSDelta +#ifdef mingw32_HOST_OS +getTSDelta = sentinalTSDelta <$> sentinalStatus +#else +getTSDelta = pure noTSDelta -- optimisation +#endif + +sentinalStatus :: Annex SentinalStatus +sentinalStatus = maybe check return =<< Annex.getState Annex.sentinalstatus + where + check = do + sc <- liftIO . checkSentinalFile =<< annexSentinalFile + Annex.changeState $ \s -> s { Annex.sentinalstatus = Just sc } + return sc + +{- The sentinal file is only created when first initializing a repository. + - If there are any annexed objects in the repository already, creating + - the file would invalidate their inode caches. -} +createInodeSentinalFile :: Bool -> Annex () +createInodeSentinalFile evenwithobjects = + unlessM (alreadyexists <||> hasobjects) $ do + s <- annexSentinalFile + createAnnexDirectory (parentDir (sentinalFile s)) + liftIO $ writeSentinalFile s + where + alreadyexists = liftIO. sentinalFileExists =<< annexSentinalFile + hasobjects + | evenwithobjects = pure False + | otherwise = liftIO . doesDirectoryExist =<< fromRepo gitAnnexObjectDir + +annexSentinalFile :: Annex SentinalFile +annexSentinalFile = do + sentinalfile <- fromRepo gitAnnexInodeSentinal + sentinalcachefile <- fromRepo gitAnnexInodeSentinalCache + return SentinalFile + { sentinalFile = sentinalfile + , sentinalCacheFile = sentinalcachefile + } diff --git a/Annex/Link.hs b/Annex/Link.hs index 98b200f0a..61c61b561 100644 --- a/Annex/Link.hs +++ b/Annex/Link.hs @@ -5,7 +5,9 @@ - On other filesystems, git instead stores the symlink target in a regular - file. - - - Copyright 2013 Joey Hess <id@joeyh.name> + - Pointer files are used instead of symlinks for unlocked files. + - + - Copyright 2013-2015 Joey Hess <id@joeyh.name> - - Licensed under the GNU GPL version 3 or higher. -} @@ -19,6 +21,9 @@ import qualified Git.UpdateIndex import qualified Annex.Queue import Git.Types import Git.FilePath +import Types.Key + +import qualified Data.ByteString.Lazy as L type LinkTarget = String @@ -105,8 +110,49 @@ hashSymlink' :: Git.HashObject.HashObjectHandle -> LinkTarget -> Annex Sha hashSymlink' h linktarget = liftIO $ Git.HashObject.hashBlob h $ toInternalGitPath linktarget -{- Stages a symlink to the annex, using a Sha of its target. -} +{- Stages a symlink to an annexed object, using a Sha of its target. -} stageSymlink :: FilePath -> Sha -> Annex () stageSymlink file sha = Annex.Queue.addUpdateIndex =<< inRepo (Git.UpdateIndex.stageSymlink file sha) + +{- Injects a pointer file content into git, returning its Sha. -} +hashPointerFile :: Key -> Annex Sha +hashPointerFile key = inRepo $ Git.HashObject.hashObject BlobObject $ + formatPointer key + +{- Stages a pointer file, using a Sha of its content -} +stagePointerFile :: FilePath -> Sha -> Annex () +stagePointerFile file sha = + Annex.Queue.addUpdateIndex =<< + inRepo (Git.UpdateIndex.stageFile sha FileBlob file) + +{- Parses a symlink target or a pointer file to a Key. + - Only looks at the first line, as pointer files can have subsequent + - lines. -} +parseLinkOrPointer :: L.ByteString -> Maybe Key +parseLinkOrPointer = parseLinkOrPointer' . decodeBS . L.take maxsz + where + {- Want to avoid buffering really big files in git into + - memory when reading files that may be pointers. + - + - 8192 bytes is plenty for a pointer to a key. + - Pad some more to allow for any pointer files that might have + - lines after the key explaining what the file is used for. -} + maxsz = 81920 + +parseLinkOrPointer' :: String -> Maybe Key +parseLinkOrPointer' s = headMaybe (lines (fromInternalGitPath s)) >>= go + where + go l + | isLinkToAnnex l = file2key $ takeFileName l + | otherwise = Nothing + +formatPointer :: Key -> String +formatPointer k = + toInternalGitPath (pathSeparator:objectDir </> key2file k) ++ "\n" + +{- Checks if a file is a pointer to a key. -} +isPointerFile :: FilePath -> Annex (Maybe Key) +isPointerFile f = liftIO $ catchDefaultIO Nothing $ + parseLinkOrPointer <$> L.readFile f diff --git a/Annex/MakeRepo.hs b/Annex/MakeRepo.hs index 73443c43d..adf49ed2c 100644 --- a/Annex/MakeRepo.hs +++ b/Annex/MakeRepo.hs @@ -75,7 +75,7 @@ initRepo False _ dir desc mgroup = inDir dir $ do initRepo' :: Maybe String -> Maybe StandardGroup -> Annex () initRepo' desc mgroup = unlessM isInitialized $ do - initialize desc + initialize desc Nothing u <- getUUID maybe noop (defaultStandardGroup u) mgroup {- Ensure branch gets committed right away so it is diff --git a/Annex/Version.hs b/Annex/Version.hs index d08f994e9..b54fb68e0 100644 --- a/Annex/Version.hs +++ b/Annex/Version.hs @@ -15,14 +15,20 @@ import qualified Annex type Version = String -supportedVersion :: Version -supportedVersion = "5" +defaultVersion :: Version +defaultVersion = "5" + +latestVersion :: Version +latestVersion = "6" + +supportedVersions :: [Version] +supportedVersions = ["5", "6"] upgradableVersions :: [Version] #ifndef mingw32_HOST_OS -upgradableVersions = ["0", "1", "2", "4"] +upgradableVersions = ["0", "1", "2", "4", "5"] #else -upgradableVersions = ["2", "3", "4"] +upgradableVersions = ["2", "3", "4", "5"] #endif autoUpgradeableVersions :: [Version] @@ -34,6 +40,18 @@ versionField = annexConfig "version" getVersion :: Annex (Maybe Version) getVersion = annexVersion <$> Annex.getGitConfig +versionSupportsDirectMode :: Annex Bool +versionSupportsDirectMode = go <$> getVersion + where + go (Just "6") = False + go _ = True + +versionSupportsUnlockedPointers :: Annex Bool +versionSupportsUnlockedPointers = go <$> getVersion + where + go (Just "6") = True + go _ = False + setVersion :: Version -> Annex () setVersion = setConfig versionField diff --git a/Annex/View.hs b/Annex/View.hs index 2b8a80e5f..8ddbb9c63 100644 --- a/Annex/View.hs +++ b/Annex/View.hs @@ -22,7 +22,7 @@ import Git.Sha import Git.HashObject import Git.Types import Git.FilePath -import qualified Backend +import Annex.WorkTree import Annex.Index import Annex.Link import Annex.CatFile @@ -342,7 +342,7 @@ applyView' mkviewedfile getfilemetadata view = do hasher <- inRepo hashObjectStart forM_ l $ \f -> do relf <- getTopFilePath <$> inRepo (toTopFilePath f) - go uh hasher relf =<< Backend.lookupFile f + go uh hasher relf =<< lookupFile f liftIO $ do hashObjectStop hasher void $ stopUpdateIndex uh @@ -413,13 +413,13 @@ withViewChanges addmeta removemeta = do handleremovals item | DiffTree.srcsha item /= nullSha = handlechange item removemeta - =<< catKey (DiffTree.srcsha item) (DiffTree.srcmode item) + =<< catKey (DiffTree.srcsha item) | otherwise = noop handleadds makeabs item | DiffTree.dstsha item /= nullSha = handlechange item addmeta =<< ifM isDirect - ( catKey (DiffTree.dstsha item) (DiffTree.dstmode item) + ( catKey (DiffTree.dstsha item) -- optimisation , isAnnexLink $ makeabs $ DiffTree.file item ) diff --git a/Annex/WorkTree.hs b/Annex/WorkTree.hs new file mode 100644 index 000000000..c824e7fc5 --- /dev/null +++ b/Annex/WorkTree.hs @@ -0,0 +1,40 @@ +{- git-annex worktree files + - + - Copyright 2013-2015 Joey Hess <id@joeyh.name> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Annex.WorkTree where + +import Common.Annex +import Annex.Link +import Annex.CatFile +import Annex.Version +import Config + +{- Looks up the key corresponding to an annexed file, + - by examining what the file links to. + - + - An unlocked file will not have a link on disk, so fall back to + - looking for a pointer to a key in git. + -} +lookupFile :: FilePath -> Annex (Maybe Key) +lookupFile file = do + mkey <- isAnnexLink file + case mkey of + Just key -> makeret key + Nothing -> ifM (versionSupportsUnlockedPointers <||> isDirect) + ( maybe (return Nothing) makeret =<< catKeyFile file + , return Nothing + ) + where + makeret = return . Just + +{- Modifies an action to only act on files that are already annexed, + - and passes the key on to it. -} +whenAnnexed :: (FilePath -> Key -> Annex (Maybe a)) -> FilePath -> Annex (Maybe a) +whenAnnexed a file = ifAnnexed file (a file) (return Nothing) + +ifAnnexed :: FilePath -> (Key -> Annex a) -> Annex a -> Annex a +ifAnnexed file yes no = maybe no yes =<< lookupFile file diff --git a/Assistant/Threads/Committer.hs b/Assistant/Threads/Committer.hs index 59ca69e88..745047d9d 100644 --- a/Assistant/Threads/Committer.hs +++ b/Assistant/Threads/Committer.hs @@ -30,6 +30,7 @@ import Config import Annex.Content import Annex.Link import Annex.CatFile +import Annex.InodeSentinal import qualified Annex import Utility.InodeCache import Annex.Content.Direct diff --git a/Assistant/Threads/TransferScanner.hs b/Assistant/Threads/TransferScanner.hs index f35c1f1f5..7386d5528 100644 --- a/Assistant/Threads/TransferScanner.hs +++ b/Assistant/Threads/TransferScanner.hs @@ -25,7 +25,7 @@ import Utility.ThreadScheduler import Utility.NotificationBroadcaster import Utility.Batch import qualified Git.LsFiles as LsFiles -import qualified Backend +import Annex.WorkTree import Annex.Content import Annex.Wanted import CmdLine.Action @@ -142,7 +142,7 @@ expensiveScan urlrenderer rs = batch <~> do (unwanted', ts) <- maybe (return (unwanted, [])) (findtransfers f unwanted) - =<< liftAnnex (Backend.lookupFile f) + =<< liftAnnex (lookupFile f) mapM_ (enqueue f) ts scan unwanted' fs diff --git a/Assistant/Threads/Watcher.hs b/Assistant/Threads/Watcher.hs index 8c6ff378d..37e0154b4 100644 --- a/Assistant/Threads/Watcher.hs +++ b/Assistant/Threads/Watcher.hs @@ -28,7 +28,7 @@ import qualified Annex.Queue import qualified Git import qualified Git.UpdateIndex import qualified Git.LsFiles as LsFiles -import qualified Backend +import Annex.WorkTree import Annex.Direct import Annex.Content.Direct import Annex.CatFile @@ -270,7 +270,7 @@ onAddDirect symlinkssupported matcher file fs = do onAddSymlink :: Bool -> Handler onAddSymlink isdirect file filestatus = unlessIgnored file $ do linktarget <- liftIO (catchMaybeIO $ readSymbolicLink file) - kv <- liftAnnex (Backend.lookupFile file) + kv <- liftAnnex (lookupFile file) onAddSymlink' linktarget kv isdirect file filestatus onAddSymlink' :: Maybe String -> Maybe Key -> Bool -> Handler diff --git a/Backend.hs b/Backend.hs index 922d0c2a7..c2f3d28d4 100644 --- a/Backend.hs +++ b/Backend.hs @@ -9,9 +9,7 @@ module Backend ( list, orderedList, genKey, - lookupFile, getBackend, - isAnnexLink, chooseBackend, lookupBackendName, maybeLookupBackendName, @@ -21,12 +19,9 @@ module Backend ( import Common.Annex import qualified Annex import Annex.CheckAttr -import Annex.CatFile -import Annex.Link import Types.Key import Types.KeySource import qualified Types.Backend as B -import Config -- When adding a new backend, import it here and add it to the list. import qualified Backend.Hash @@ -78,26 +73,6 @@ genKey' (b:bs) source = do | c == '\n' = '_' | otherwise = c -{- Looks up the key corresponding to an annexed file, - - by examining what the file links to. - - - - In direct mode, there is often no link on disk, in which case - - the symlink is looked up in git instead. However, a real link - - on disk still takes precedence over what was committed to git in direct - - mode. - -} -lookupFile :: FilePath -> Annex (Maybe Key) -lookupFile file = do - mkey <- isAnnexLink file - case mkey of - Just key -> makeret key - Nothing -> ifM isDirect - ( maybe (return Nothing) makeret =<< catKeyFile file - , return Nothing - ) - where - makeret k = return $ Just k - getBackend :: FilePath -> Key -> Annex (Maybe Backend) getBackend file k = let bname = keyBackendName k in case maybeLookupBackendName bname of diff --git a/CmdLine/GitAnnex.hs b/CmdLine/GitAnnex.hs index f585bff3e..ba7689f70 100644 --- a/CmdLine/GitAnnex.hs +++ b/CmdLine/GitAnnex.hs @@ -96,6 +96,7 @@ import qualified Command.Upgrade import qualified Command.Forget import qualified Command.Proxy import qualified Command.DiffDriver +import qualified Command.Smudge import qualified Command.Undo import qualified Command.Version #ifdef WITH_ASSISTANT @@ -201,6 +202,7 @@ cmds testoptparser testrunner = , Command.Forget.cmd , Command.Proxy.cmd , Command.DiffDriver.cmd + , Command.Smudge.cmd , Command.Undo.cmd , Command.Version.cmd #ifdef WITH_ASSISTANT diff --git a/CmdLine/Seek.hs b/CmdLine/Seek.hs index 8d253e47d..e6ee6f3fe 100644 --- a/CmdLine/Seek.hs +++ b/CmdLine/Seek.hs @@ -80,7 +80,7 @@ withFilesInRefs a = mapM_ go l <- inRepo $ LsTree.lsTree (Git.Ref r) forM_ l $ \i -> do let f = getTopFilePath $ LsTree.file i - v <- catKey (Git.Ref $ LsTree.sha i) (LsTree.mode i) + v <- catKey (Git.Ref $ LsTree.sha i) case v of Nothing -> noop Just k -> whenM (matcher $ MatchingKey k) $ @@ -115,29 +115,29 @@ withPairs a params = seekActions $ return $ map a $ pairs [] params pairs c (x:y:xs) = pairs ((x,y):c) xs pairs _ _ = error "expected pairs" -withFilesToBeCommitted :: (String -> CommandStart) -> CmdParams -> CommandSeek +withFilesToBeCommitted :: (FilePath -> CommandStart) -> CmdParams -> CommandSeek withFilesToBeCommitted a params = seekActions $ prepFiltered a $ seekHelper LsFiles.stagedNotDeleted params -withFilesUnlocked :: (FilePath -> CommandStart) -> CmdParams -> CommandSeek -withFilesUnlocked = withFilesUnlocked' LsFiles.typeChanged +withFilesOldUnlocked :: (FilePath -> CommandStart) -> CmdParams -> CommandSeek +withFilesOldUnlocked = withFilesOldUnlocked' LsFiles.typeChanged -withFilesUnlockedToBeCommitted :: (FilePath -> CommandStart) -> CmdParams -> CommandSeek -withFilesUnlockedToBeCommitted = withFilesUnlocked' LsFiles.typeChangedStaged +withFilesOldUnlockedToBeCommitted :: (FilePath -> CommandStart) -> CmdParams -> CommandSeek +withFilesOldUnlockedToBeCommitted = withFilesOldUnlocked' LsFiles.typeChangedStaged -{- Unlocked files have changed type from a symlink to a regular file. +{- Unlocked files before v6 have changed type from a symlink to a regular file. - - Furthermore, unlocked files used to be a git-annex symlink, - not some other sort of symlink. -} -withFilesUnlocked' :: ([FilePath] -> Git.Repo -> IO ([FilePath], IO Bool)) -> (FilePath -> CommandStart) -> CmdParams -> CommandSeek -withFilesUnlocked' typechanged a params = seekActions $ +withFilesOldUnlocked' :: ([FilePath] -> Git.Repo -> IO ([FilePath], IO Bool)) -> (FilePath -> CommandStart) -> CmdParams -> CommandSeek +withFilesOldUnlocked' typechanged a params = seekActions $ prepFiltered a unlockedfiles where - unlockedfiles = filterM isUnlocked =<< seekHelper typechanged params + unlockedfiles = filterM isOldUnlocked =<< seekHelper typechanged params -isUnlocked :: FilePath -> Annex Bool -isUnlocked f = liftIO (notSymlink f) <&&> +isOldUnlocked :: FilePath -> Annex Bool +isOldUnlocked f = liftIO (notSymlink f) <&&> (isJust <$> catKeyFile f <||> isJust <$> catKeyFileHEAD f) {- Finds files that may be modified. -} diff --git a/Command.hs b/Command.hs index bee63bb74..387f7b8b5 100644 --- a/Command.hs +++ b/Command.hs @@ -18,12 +18,13 @@ module Command ( stopUnless, whenAnnexed, ifAnnexed, + lookupFile, isBareRepo, module ReExported ) where import Common.Annex -import qualified Backend +import Annex.WorkTree import qualified Git import Types.Command as ReExported import Types.Option as ReExported @@ -100,13 +101,5 @@ stop = return Nothing stopUnless :: Annex Bool -> Annex (Maybe a) -> Annex (Maybe a) stopUnless c a = ifM c ( a , stop ) -{- Modifies an action to only act on files that are already annexed, - - and passes the key on to it. -} -whenAnnexed :: (FilePath -> Key -> Annex (Maybe a)) -> FilePath -> Annex (Maybe a) -whenAnnexed a file = ifAnnexed file (a file) (return Nothing) - -ifAnnexed :: FilePath -> (Key -> Annex a) -> Annex a -> Annex a -ifAnnexed file yes no = maybe no yes =<< Backend.lookupFile file - isBareRepo :: Annex Bool isBareRepo = fromRepo Git.repoIsLocalBare diff --git a/Command/Add.hs b/Command/Add.hs index 27c11eab4..ab4e3a9d1 100644 --- a/Command/Add.hs +++ b/Command/Add.hs @@ -32,6 +32,9 @@ import Annex.FileMatcher import Annex.ReplaceFile import Utility.Tmp import Utility.CopyFile +import Annex.InodeSentinal +import Annex.Version +import qualified Database.Keys import Control.Exception (IOException) @@ -64,9 +67,9 @@ seek o = allowConcurrentOutput $ do , startSmall file ) go $ withFilesNotInGit (not $ includeDotFiles o) - ifM isDirect + ifM (versionSupportsUnlockedPointers <||> isDirect) ( go withFilesMaybeModified - , go withFilesUnlocked + , go withFilesOldUnlocked ) {- Pass file off to git-add. -} @@ -103,13 +106,22 @@ start file = ifAnnexed file addpresent add next $ if isSymbolicLink s then next $ addFile file else perform file - addpresent key = ifM isDirect + addpresent key = ifM versionSupportsUnlockedPointers ( do ms <- liftIO $ catchMaybeIO $ getSymbolicLinkStatus file case ms of Just s | isSymbolicLink s -> fixup key - _ -> ifM (goodContent key file) ( stop , add ) - , fixup key + _ -> ifM (sameInodeCache file =<< Database.Keys.getInodeCaches key) + ( stop, add ) + , ifM isDirect + ( do + ms <- liftIO $ catchMaybeIO $ getSymbolicLinkStatus file + case ms of + Just s | isSymbolicLink s -> fixup key + _ -> ifM (goodContent key file) + ( stop , add ) + , fixup key + ) ) fixup key = do -- the annexed symlink is present but not yet added to git diff --git a/Command/ConfigList.hs b/Command/ConfigList.hs index 46c909107..997016e8e 100644 --- a/Command/ConfigList.hs +++ b/Command/ConfigList.hs @@ -46,7 +46,7 @@ findOrGenUUID = do else ifM (Annex.Branch.hasSibling <||> (isJust <$> Fields.getField Fields.autoInit)) ( do liftIO checkNotReadOnly - initialize Nothing + initialize Nothing Nothing getUUID , return NoUUID ) diff --git a/Command/Direct.hs b/Command/Direct.hs index 162780dd5..9cfd258eb 100644 --- a/Command/Direct.hs +++ b/Command/Direct.hs @@ -14,6 +14,7 @@ import qualified Git.LsFiles import qualified Git.Branch import Config import Annex.Direct +import Annex.Version cmd :: Command cmd = notBareRepo $ noDaemonRunning $ @@ -24,7 +25,10 @@ seek :: CmdParams -> CommandSeek seek = withNothing start start :: CommandStart -start = ifM isDirect ( stop , next perform ) +start = ifM versionSupportsDirectMode + ( ifM isDirect ( stop , next perform ) + , error "Direct mode is not suppported by this repository version. Use git-annex unlock instead." + ) perform :: CommandPerform perform = do diff --git a/Command/Fsck.hs b/Command/Fsck.hs index 1531d2ab7..46de4ac96 100644 --- a/Command/Fsck.hs +++ b/Command/Fsck.hs @@ -34,6 +34,7 @@ import Utility.HumanTime import Utility.CopyFile import Git.FilePath import Utility.PID +import qualified Database.Keys #ifdef WITH_DATABASE import qualified Database.Fsck as FsckDb @@ -118,16 +119,18 @@ start from inc file key = do go = runFsck inc file key perform :: Key -> FilePath -> Backend -> NumCopies -> Annex Bool -perform key file backend numcopies = check - -- order matters - [ fixLink key file - , verifyLocationLog key file - , verifyDirectMapping key file - , verifyDirectMode key file - , checkKeySize key - , checkBackend backend key (Just file) - , checkKeyNumCopies key (Just file) numcopies - ] +perform key file backend numcopies = do + keystatus <- getKeyStatus key + check + -- order matters + [ fixLink key file + , verifyLocationLog key keystatus file + , verifyDirectMapping key file + , verifyDirectMode key file + , checkKeySize key keystatus + , checkBackend backend key keystatus (Just file) + , checkKeyNumCopies key (Just file) numcopies + ] {- To fsck a remote, the content is retrieved to a tmp file, - and checked locally. -} @@ -183,19 +186,19 @@ startKey inc key numcopies = performKey key backend numcopies performKey :: Key -> Backend -> NumCopies -> Annex Bool -performKey key backend numcopies = check - [ verifyLocationLog key (key2file key) - , checkKeySize key - , checkBackend backend key Nothing - , checkKeyNumCopies key Nothing numcopies - ] +performKey key backend numcopies = do + keystatus <- getKeyStatus key + check + [ verifyLocationLog key keystatus (key2file key) + , checkKeySize key keystatus + , checkBackend backend key keystatus Nothing + , checkKeyNumCopies key Nothing numcopies + ] check :: [Annex Bool] -> Annex Bool check cs = and <$> sequence cs -{- Checks that the file's link points correctly to the content. - - - - In direct mode, there is only a link when the content is not present. +{- Checks that symlinks points correctly to the annexed content. -} fixLink :: Key -> FilePath -> Annex Bool fixLink key file = do @@ -214,19 +217,23 @@ fixLink key file = do {- Checks that the location log reflects the current status of the key, - in this repository only. -} -verifyLocationLog :: Key -> String -> Annex Bool -verifyLocationLog key desc = do - present <- inAnnex key +verifyLocationLog :: Key -> KeyStatus -> String -> Annex Bool +verifyLocationLog key keystatus desc = do + obj <- calcRepo $ gitAnnexLocation key + present <- if isKeyUnlocked keystatus + then liftIO (doesFileExist obj) + else inAnnex key direct <- isDirect u <- getUUID - {- Since we're checking that a key's file is present, throw + {- Since we're checking that a key's object file is present, throw - in a permission fixup here too. -} - file <- calcRepo $ gitAnnexLocation key - when (present && not direct) $ - freezeContent file - whenM (liftIO $ doesDirectoryExist $ parentDir file) $ - freezeContentDir file + when (present && not direct) $ void $ tryIO $ + if isKeyUnlocked keystatus + then thawContent obj + else freezeContent obj + whenM (liftIO $ doesDirectoryExist $ parentDir obj) $ + freezeContentDir obj {- In direct mode, modified files will show up as not present, - but that is expected and not something to do anything about. -} @@ -288,18 +295,16 @@ verifyDirectMode key file = do {- The size of the data for a key is checked against the size encoded in - the key's metadata, if available. - - - Not checked in direct mode, because files can be changed directly. + - Not checked when a file is unlocked, or in direct mode. -} -checkKeySize :: Key -> Annex Bool -checkKeySize key = ifM isDirect - ( return True - , do - file <- calcRepo $ gitAnnexLocation key - ifM (liftIO $ doesFileExist file) - ( checkKeySizeOr badContent key file - , return True - ) - ) +checkKeySize :: Key -> KeyStatus -> Annex Bool +checkKeySize _ KeyUnlocked = return True +checkKeySize key _ = do + file <- calcRepo $ gitAnnexLocation key + ifM (liftIO $ doesFileExist file) + ( checkKeySizeOr badContent key file + , return True + ) checkKeySizeRemote :: Key -> Remote -> Maybe FilePath -> Annex Bool checkKeySizeRemote _ _ Nothing = return True @@ -326,18 +331,26 @@ checkKeySizeOr bad key file = case Types.Key.keySize key of , msg ] -{- Runs the backend specific check on a key's content. +{- Runs the backend specific check on a key's content object. + - + - When a file is unlocked, it may be a hard link to the object, + - thus when the user modifies the file, the object will be modified and + - not pass the check, and we don't want to find an error in this case. + - So, skip the check if the key is unlocked and modified. - - In direct mode this is not done if the file has clearly been modified, - because modification of direct mode files is allowed. It's still done - if the file does not appear modified, to catch disk corruption, etc. -} -checkBackend :: Backend -> Key -> Maybe FilePath -> Annex Bool -checkBackend backend key mfile = go =<< isDirect +checkBackend :: Backend -> Key -> KeyStatus -> Maybe FilePath -> Annex Bool +checkBackend backend key keystatus mfile = go =<< isDirect where go False = do content <- calcRepo $ gitAnnexLocation key - checkBackendOr badContent backend key content + ifM (pure (isKeyUnlocked keystatus) <&&> (not <$> isUnmodified key content)) + ( nocheck + , checkBackendOr badContent backend key content + ) go True = maybe nocheck checkdirect mfile checkdirect file = ifM (goodContent key file) ( checkBackendOr' (badContentDirect file) backend key file @@ -582,3 +595,20 @@ withFsckDb (StartIncremental h) a = a h withFsckDb NonIncremental _ = noop withFsckDb (ScheduleIncremental _ _ i) a = withFsckDb i a #endif + +data KeyStatus = KeyLocked | KeyUnlocked | KeyMissing + +isKeyUnlocked :: KeyStatus -> Bool +isKeyUnlocked KeyUnlocked = True +isKeyUnlocked KeyLocked = False +isKeyUnlocked KeyMissing = False + +getKeyStatus :: Key -> Annex KeyStatus +getKeyStatus key = ifM isDirect + ( return KeyUnlocked + , catchDefaultIO KeyMissing $ do + obj <- calcRepo $ gitAnnexLocation key + unlocked <- ((> 1) . linkCount <$> liftIO (getFileStatus obj)) + <&&> (not . null <$> Database.Keys.getAssociatedFiles key) + return $ if unlocked then KeyUnlocked else KeyLocked + ) diff --git a/Command/Indirect.hs b/Command/Indirect.hs index c12c91a48..f5234b4dc 100644 --- a/Command/Indirect.hs +++ b/Command/Indirect.hs @@ -76,7 +76,7 @@ perform = do return Nothing | otherwise -> maybe noop (fromdirect f) - =<< catKey sha mode + =<< catKey sha _ -> noop go _ = noop diff --git a/Command/Init.hs b/Command/Init.hs index d969669f8..94d8168a6 100644 --- a/Command/Init.hs +++ b/Command/Init.hs @@ -10,25 +10,44 @@ module Command.Init where import Common.Annex import Command import Annex.Init +import Annex.Version import qualified Annex.SpecialRemote cmd :: Command cmd = dontCheck repoExists $ command "init" SectionSetup "initialize git-annex" - paramDesc (withParams seek) + paramDesc (seek <$$> optParser) -seek :: CmdParams -> CommandSeek -seek = withWords start +data InitOptions = InitOptions + { initDesc :: String + , initVersion :: Maybe Version + } -start :: [String] -> CommandStart -start ws = do - showStart "init" description - next $ perform description - where - description = unwords ws +optParser :: CmdParamsDesc -> Parser InitOptions +optParser desc = InitOptions + <$> (unwords <$> cmdParams desc) + <*> optional (option (str >>= parseVersion) + ( long "version" <> metavar paramValue + <> help "Override default annex.version" + )) -perform :: String -> CommandPerform -perform description = do - initialize $ if null description then Nothing else Just description +parseVersion :: Monad m => String -> m Version +parseVersion v + | v `elem` supportedVersions = return v + | otherwise = fail $ v ++ " is not a currently supported repository version" + +seek :: InitOptions -> CommandSeek +seek = commandAction . start + +start :: InitOptions -> CommandStart +start os = do + showStart "init" (initDesc os) + next $ perform os + +perform :: InitOptions -> CommandPerform +perform os = do + initialize + (if null (initDesc os) then Nothing else Just (initDesc os)) + (initVersion os) Annex.SpecialRemote.autoEnable next $ return True diff --git a/Command/Lock.hs b/Command/Lock.hs index 7711ec3b8..741c18c15 100644 --- a/Command/Lock.hs +++ b/Command/Lock.hs @@ -1,6 +1,6 @@ {- git-annex command - - - Copyright 2010 Joey Hess <id@joeyh.name> + - Copyright 2010,2015 Joey Hess <id@joeyh.name> - - Licensed under the GNU GPL version 3 or higher. -} @@ -11,6 +11,16 @@ import Common.Annex import Command import qualified Annex.Queue import qualified Annex +import Annex.Version +import Annex.Content +import Annex.Link +import Annex.InodeSentinal +import Annex.Perms +import Annex.ReplaceFile +import Utility.InodeCache +import qualified Database.Keys +import qualified Command.Add +import Logs.Location cmd :: Command cmd = notDirect $ withGlobalOptions annexedMatchingOptions $ @@ -19,18 +29,90 @@ cmd = notDirect $ withGlobalOptions annexedMatchingOptions $ paramPaths (withParams seek) seek :: CmdParams -> CommandSeek -seek ps = do - withFilesUnlocked start ps - withFilesUnlockedToBeCommitted start ps +seek ps = ifM versionSupportsUnlockedPointers + ( withFilesInGit (whenAnnexed startNew) ps + , do + withFilesOldUnlocked startOld ps + withFilesOldUnlockedToBeCommitted startOld ps + ) -start :: FilePath -> CommandStart -start file = do +startNew :: FilePath -> Key -> CommandStart +startNew file key = ifM (isJust <$> isAnnexLink file) + ( stop + , do + showStart "lock" file + go =<< isPointerFile file + ) + where + go (Just key') + | key' == key = cont False + | otherwise = errorModified + go Nothing = + ifM (isUnmodified key file) + ( cont False + , ifM (Annex.getState Annex.force) + ( cont True + , errorModified + ) + ) + cont = next . performNew file key + +performNew :: FilePath -> Key -> Bool -> CommandPerform +performNew file key filemodified = do + lockdown =<< calcRepo (gitAnnexLocation key) + Command.Add.addLink file key + =<< withTSDelta (liftIO . genInodeCache file) + next $ cleanupNew file key + where + lockdown obj = do + ifM (catchBoolIO $ sameInodeCache obj =<< Database.Keys.getInodeCaches key) + ( breakhardlink obj + , repopulate obj + ) + whenM (liftIO $ doesFileExist obj) $ + freezeContent obj + + -- It's ok if the file is hard linked to obj, but if some other + -- associated file is, we need to break that link to lock down obj. + breakhardlink obj = whenM (catchBoolIO $ (> 1) . linkCount <$> liftIO (getFileStatus obj)) $ do + mfc <- withTSDelta (liftIO . genInodeCache file) + unlessM (sameInodeCache obj (maybeToList mfc)) $ do + modifyContent obj $ replaceFile obj $ \tmp -> do + unlessM (checkedCopyFile key obj tmp) $ + error "unable to lock file; need more free disk space" + Database.Keys.storeInodeCaches key [obj] + + -- Try to repopulate obj from an unmodified associated file. + repopulate obj + | filemodified = modifyContent obj $ do + fs <- Database.Keys.getAssociatedFiles key + mfile <- firstM (isUnmodified key) fs + liftIO $ nukeFile obj + case mfile of + Just unmodified -> + unlessM (checkedCopyFile key unmodified obj) + lostcontent + Nothing -> lostcontent + | otherwise = modifyContent obj $ + liftIO $ renameFile file obj + lostcontent = logStatus key InfoMissing + +cleanupNew :: FilePath -> Key -> CommandCleanup +cleanupNew file key = do + Database.Keys.removeAssociatedFile key file + return True + +startOld :: FilePath -> CommandStart +startOld file = do showStart "lock" file - unlessM (Annex.getState Annex.force) $ - error "Locking this file would discard any changes you have made to it. Use 'git annex add' to stage your changes. (Or, use --force to override)" - next $ perform file + unlessM (Annex.getState Annex.force) + errorModified + next $ performOld file -perform :: FilePath -> CommandPerform -perform file = do +performOld :: FilePath -> CommandPerform +performOld file = do Annex.Queue.addCommand "checkout" [Param "--"] [file] - next $ return True -- no cleanup needed + next $ return True + +errorModified :: a +errorModified = error "Locking this file would discard any changes you have made to it. Use 'git annex add' to stage your changes. (Or, use --force to override)" diff --git a/Command/Migrate.hs b/Command/Migrate.hs index d1c7902d7..b8d2eea87 100644 --- a/Command/Migrate.hs +++ b/Command/Migrate.hs @@ -72,7 +72,7 @@ perform file oldkey oldbackend newbackend = go =<< genkey go (Just (newkey, knowngoodcontent)) | knowngoodcontent = finish newkey | otherwise = stopUnless checkcontent $ finish newkey - checkcontent = Command.Fsck.checkBackend oldbackend oldkey $ Just file + checkcontent = Command.Fsck.checkBackend oldbackend oldkey Command.Fsck.KeyLocked $ Just file finish newkey = stopUnless (Command.ReKey.linkKey oldkey newkey) $ next $ Command.ReKey.cleanup file oldkey newkey genkey = case maybe Nothing (\fm -> fm oldkey newbackend (Just file)) (fastMigrate oldbackend) of diff --git a/Command/PreCommit.hs b/Command/PreCommit.hs index 2d62b51f3..cbf7f6e3d 100644 --- a/Command/PreCommit.hs +++ b/Command/PreCommit.hs @@ -16,7 +16,9 @@ import qualified Command.Add import qualified Command.Fix import Annex.Direct import Annex.Hook +import Annex.Link import Annex.View +import Annex.Version import Annex.View.ViewedFile import Annex.LockFile import Logs.View @@ -41,17 +43,22 @@ seek ps = lockPreCommitHook $ ifM isDirect withWords startDirect ps runAnnexHook preCommitAnnexHook , do - ifM (liftIO Git.haveFalseIndex) + ifM (not <$> versionSupportsUnlockedPointers <&&> liftIO Git.haveFalseIndex) ( do (fs, cleanup) <- inRepo $ Git.typeChangedStaged ps - whenM (anyM isUnlocked fs) $ + whenM (anyM isOldUnlocked fs) $ error "Cannot make a partial commit with unlocked annexed files. You should `git annex add` the files you want to commit, and then run git commit." void $ liftIO cleanup , do -- fix symlinks to files being committed - withFilesToBeCommitted (whenAnnexed Command.Fix.start) ps + flip withFilesToBeCommitted ps $ \f -> + maybe stop (Command.Fix.start f) + =<< isAnnexLink f -- inject unlocked files into the annex - withFilesUnlockedToBeCommitted startIndirect ps + -- (not needed when repo version uses + -- unlocked pointer files) + unlessM versionSupportsUnlockedPointers $ + withFilesOldUnlockedToBeCommitted startInjectUnlocked ps ) runAnnexHook preCommitAnnexHook -- committing changes to a view updates metadata @@ -64,8 +71,8 @@ seek ps = lockPreCommitHook $ ifM isDirect ) -startIndirect :: FilePath -> CommandStart -startIndirect f = next $ do +startInjectUnlocked :: FilePath -> CommandStart +startInjectUnlocked f = next $ do unlessM (callCommandAction $ Command.Add.start f) $ error $ "failed to add " ++ f ++ "; canceling commit" next $ return True diff --git a/Command/Reinit.hs b/Command/Reinit.hs index 1be692871..e2c00a3d2 100644 --- a/Command/Reinit.hs +++ b/Command/Reinit.hs @@ -38,6 +38,6 @@ perform s = do then return $ toUUID s else Remote.nameToUUID s storeUUID u - initialize' + initialize' Nothing Annex.SpecialRemote.autoEnable next $ return True diff --git a/Command/Smudge.hs b/Command/Smudge.hs new file mode 100644 index 000000000..e6541bc6d --- /dev/null +++ b/Command/Smudge.hs @@ -0,0 +1,135 @@ +{- git-annex command + - + - Copyright 2015 Joey Hess <id@joeyh.name> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Command.Smudge where + +import Common.Annex +import Command +import Annex.Content +import Annex.Link +import Annex.MetaData +import Annex.FileMatcher +import Annex.InodeSentinal +import Types.KeySource +import Backend +import Logs.Location +import qualified Database.Keys + +import qualified Data.ByteString.Lazy as B + +cmd :: Command +cmd = noCommit $ noMessages $ + command "smudge" SectionPlumbing + "git smudge filter" + paramFile (seek <$$> optParser) + +data SmudgeOptions = SmudgeOptions + { smudgeFile :: FilePath + , cleanOption :: Bool + } + +optParser :: CmdParamsDesc -> Parser SmudgeOptions +optParser desc = SmudgeOptions + <$> argument str ( metavar desc ) + <*> switch ( long "clean" <> help "clean filter" ) + +seek :: SmudgeOptions -> CommandSeek +seek o = commandAction $ + (if cleanOption o then clean else smudge) (smudgeFile o) + +-- Smudge filter is fed git file content, and if it's a pointer to an +-- available annex object, should output its content. +smudge :: FilePath -> CommandStart +smudge file = do + b <- liftIO $ B.hGetContents stdin + case parseLinkOrPointer b of + Nothing -> liftIO $ B.putStr b + Just k -> do + -- A previous unlocked checkout of the file may have + -- led to the annex object getting modified; + -- don't provide such modified content as it + -- will be confusing. inAnnex will detect such + -- modifications. + ifM (inAnnex k) + ( do + content <- calcRepo (gitAnnexLocation k) + liftIO $ B.putStr . fromMaybe b + =<< catchMaybeIO (B.readFile content) + , liftIO $ B.putStr b + ) + Database.Keys.addAssociatedFile k file + stop + +-- Clean filter is fed file content on stdin, decides if a file +-- should be stored in the annex, and outputs a pointer to its +-- injested content. +clean :: FilePath -> CommandStart +clean file = do + b <- liftIO $ B.hGetContents stdin + if isJust (parseLinkOrPointer b) + then liftIO $ B.hPut stdout b + else ifM (shouldAnnex file) + ( do + k <- ingest file + oldkeys <- filter (/= k) + <$> Database.Keys.getAssociatedKey file + mapM_ (cleanOldKey file) oldkeys + Database.Keys.addAssociatedFile k file + liftIO $ emitPointer k + , liftIO $ B.hPut stdout b + ) + stop + +-- If the file being cleaned was hard linked to the old key's annex object, +-- modifying the file will have caused the object to have the wrong content. +-- Clean up from that, making the +cleanOldKey :: FilePath -> Key -> Annex () +cleanOldKey modifiedfile key = do + obj <- calcRepo (gitAnnexLocation key) + caches <- Database.Keys.getInodeCaches key + unlessM (sameInodeCache obj caches) $ do + unlinkAnnex key + fs <- filter (/= modifiedfile) + <$> Database.Keys.getAssociatedFiles key + fs' <- filterM (`sameInodeCache` caches) fs + case fs' of + -- If linkAnnex fails, the file with the content + -- is still present, so no need for any recovery. + (f:_) -> void $ linkAnnex key f + _ -> lostcontent + where + lostcontent = logStatus key InfoMissing + +shouldAnnex :: FilePath -> Annex Bool +shouldAnnex file = do + matcher <- largeFilesMatcher + checkFileMatcher matcher file + +ingest :: FilePath -> Annex Key +ingest file = do + backend <- chooseBackend file + let source = KeySource + { keyFilename = file + , contentLocation = file + , inodeCache = Nothing + } + k <- fst . fromMaybe (error "failed to generate a key") + <$> genKey source backend + -- Hard link (or copy) file content to annex object + -- to prevent it from being lost when git checks out + -- a branch not containing this file. + r <- linkAnnex k file + case r of + LinkAnnexFailed -> error "Problem adding file to the annex" + LinkAnnexOk -> logStatus k InfoPresent + LinkAnnexNoop -> noop + genMetaData k file + =<< liftIO (getFileStatus file) + return k + +emitPointer :: Key -> IO () +emitPointer = putStr . formatPointer diff --git a/Command/Unannex.hs b/Command/Unannex.hs index fdf976d3e..f7af8cde6 100644 --- a/Command/Unannex.hs +++ b/Command/Unannex.hs @@ -15,6 +15,7 @@ import Config import qualified Annex import Annex.Content import Annex.Content.Direct +import Annex.Version import qualified Git.Command import qualified Git.Branch import qualified Git.Ref @@ -32,7 +33,7 @@ seek :: CmdParams -> CommandSeek seek = wrapUnannex . (withFilesInGit $ whenAnnexed start) wrapUnannex :: Annex a -> Annex a -wrapUnannex a = ifM isDirect +wrapUnannex a = ifM (versionSupportsUnlockedPointers <||> isDirect) ( a {- Run with the pre-commit hook disabled, to avoid confusing - behavior if an unannexed file is added back to git as diff --git a/Command/Undo.hs b/Command/Undo.hs index c647dfba4..0692dce34 100644 --- a/Command/Undo.hs +++ b/Command/Undo.hs @@ -72,7 +72,7 @@ perform p = do f <- mkrel di whenM isDirect $ maybe noop (`removeDirect` f) - =<< catKey (srcsha di) (srcmode di) + =<< catKey (srcsha di) liftIO $ nukeFile f forM_ adds $ \di -> do @@ -80,6 +80,6 @@ perform p = do inRepo $ Git.run [Param "checkout", Param "--", File f] whenM isDirect $ maybe noop (`toDirect` f) - =<< catKey (dstsha di) (dstmode di) + =<< catKey (dstsha di) next $ liftIO cleanup diff --git a/Command/Unlock.hs b/Command/Unlock.hs index d1b1d0e90..1cfd4a0b2 100644 --- a/Command/Unlock.hs +++ b/Command/Unlock.hs @@ -1,6 +1,6 @@ {- git-annex command - - - Copyright 2010 Joey Hess <id@joeyh.name> + - Copyright 2010,2015 Joey Hess <id@joeyh.name> - - Licensed under the GNU GPL version 3 or higher. -} @@ -11,6 +11,9 @@ import Common.Annex import Command import Annex.Content import Annex.CatFile +import Annex.Version +import Annex.Link +import Annex.ReplaceFile import Utility.CopyFile cmd :: Command @@ -26,14 +29,45 @@ mkcmd n d = notDirect $ withGlobalOptions annexedMatchingOptions $ seek :: CmdParams -> CommandSeek seek = withFilesInGit $ whenAnnexed start -{- The unlock subcommand replaces the symlink with a copy of the file's - - content. -} +{- Before v6, the unlock subcommand replaces the symlink with a copy of + - the file's content. In v6 and above, it converts the file from a symlink + - to a pointer. -} start :: FilePath -> Key -> CommandStart -start file key = do - showStart "unlock" file +start file key = ifM (isJust <$> isAnnexLink file) + ( do + showStart "unlock" file + ifM (inAnnex key) + ( ifM versionSupportsUnlockedPointers + ( next $ performNew file key + , startOld file key + ) + , do + warning "content not present; cannot unlock" + next $ next $ return False + ) + , stop + ) + +performNew :: FilePath -> Key -> CommandPerform +performNew dest key = do + src <- calcRepo (gitAnnexLocation key) + replaceFile dest $ \tmp -> do + r <- linkAnnex' key src tmp + case r of + LinkAnnexOk -> return () + _ -> error "linkAnnex failed" + next $ cleanupNew dest key + +cleanupNew :: FilePath -> Key -> CommandCleanup +cleanupNew dest key = do + stagePointerFile dest =<< hashPointerFile key + return True + +startOld :: FilePath -> Key -> CommandStart +startOld file key = ifM (inAnnex key) ( ifM (isJust <$> catKeyFileHEAD file) - ( next $ perform file key + ( next $ performOld file key , do warning "this has not yet been committed to git; cannot unlock it" next $ next $ return False @@ -43,8 +77,8 @@ start file key = do next $ next $ return False ) -perform :: FilePath -> Key -> CommandPerform -perform dest key = ifM (checkDiskSpace Nothing key 0 True) +performOld :: FilePath -> Key -> CommandPerform +performOld dest key = ifM (checkDiskSpace Nothing key 0 True) ( do src <- calcRepo $ gitAnnexLocation key tmpdest <- fromRepo $ gitAnnexTmpObjectLocation key diff --git a/Command/Unused.hs b/Command/Unused.hs index 4756cda5d..4353bd075 100644 --- a/Command/Unused.hs +++ b/Command/Unused.hs @@ -24,7 +24,6 @@ import qualified Git.Branch import qualified Git.RefLog import qualified Git.LsFiles as LsFiles import qualified Git.DiffTree as DiffTree -import qualified Backend import qualified Remote import qualified Annex.Branch import Annex.CatFile @@ -215,7 +214,7 @@ withKeysReferenced' mdir initial a = do Just dir -> inRepo $ LsFiles.inRepo [dir] go v [] = return v go v (f:fs) = do - x <- Backend.lookupFile f + x <- lookupFile f case x of Nothing -> go v fs Just k -> do @@ -266,7 +265,7 @@ withKeysReferencedInGitRef a ref = do forM_ ts $ tKey lookAtWorkingTree >=> maybe noop a liftIO $ void clean where - tKey True = Backend.lookupFile . getTopFilePath . DiffTree.file + tKey True = lookupFile . getTopFilePath . DiffTree.file tKey False = fileKey . takeFileName . decodeBS <$$> catFile ref . getTopFilePath . DiffTree.file diff --git a/Command/Upgrade.hs b/Command/Upgrade.hs index c02a6709f..8a34022e3 100644 --- a/Command/Upgrade.hs +++ b/Command/Upgrade.hs @@ -13,6 +13,7 @@ import Upgrade cmd :: Command cmd = dontCheck repoExists $ -- because an old version may not seem to exist + noDaemonRunning $ -- avoid upgrading repo out from under daemon command "upgrade" SectionMaintenance "upgrade repository layout" paramNothing (withParams seek) diff --git a/Command/Version.hs b/Command/Version.hs index 72bbe4064..c5a9fcef2 100644 --- a/Command/Version.hs +++ b/Command/Version.hs @@ -50,7 +50,8 @@ showVersion = do liftIO $ do showPackageVersion vinfo "local repository version" $ fromMaybe "unknown" v - vinfo "supported repository version" supportedVersion + vinfo "supported repository versions" $ + unwords supportedVersions vinfo "upgrade supported from repository versions" $ unwords upgradableVersions @@ -90,3 +90,21 @@ setCrippledFileSystem :: Bool -> Annex () setCrippledFileSystem b = do setConfig (annexConfig "crippledfilesystem") (Git.Config.boolConfig b) Annex.changeGitConfig $ \c -> c { annexCrippledFileSystem = b } + +configureSmudgeFilter :: Annex () +configureSmudgeFilter = do + setConfig (ConfigKey "filter.annex.smudge") "git-annex smudge %f" + setConfig (ConfigKey "filter.annex.clean") "git-annex smudge --clean %f" + lf <- Annex.fromRepo Git.attributesLocal + gf <- Annex.fromRepo Git.attributes + lfs <- readattr lf + gfs <- readattr gf + liftIO $ unless ("filter=annex" `isInfixOf` (lfs ++ gfs)) $ do + createDirectoryIfMissing True (takeDirectory lf) + writeFile lf (lfs ++ "\n" ++ stdattr) + where + readattr = liftIO . catchDefaultIO "" . readFileStrictAnyEncoding + stdattr = unlines + [ "* filter=annex" + , ".* !filter" + ] diff --git a/Database/Fsck.hs b/Database/Fsck.hs index ed00e62d8..b0e56f6c0 100644 --- a/Database/Fsck.hs +++ b/Database/Fsck.hs @@ -59,7 +59,7 @@ newPass u = isJust <$> tryExclusiveLock (gitAnnexFsckDbLock u) go go = liftIO . void . tryIO . removeDirectoryRecursive =<< fromRepo (gitAnnexFsckDbDir u) -{- Opens the database, creating it atomically if it doesn't exist yet. -} +{- Opens the database, creating it if it doesn't exist yet. -} openDb :: UUID -> Annex FsckHandle openDb u = do dbdir <- fromRepo (gitAnnexFsckDbDir u) diff --git a/Database/Handle.hs b/Database/Handle.hs index 439e7c18b..67f759265 100644 --- a/Database/Handle.hs +++ b/Database/Handle.hs @@ -21,7 +21,6 @@ module Database.Handle ( import Utility.Exception import Utility.Monad -import Messages import Database.Persist.Sqlite import qualified Database.Sqlite as Sqlite @@ -35,6 +34,7 @@ import Control.Monad.Trans.Resource (runResourceT) import Control.Monad.Logger (runNoLoggingT) import Data.List import Data.Time.Clock +import System.IO {- A DbHandle is a reference to a worker thread that communicates with - the database. It has a MVar which Jobs are submitted to. -} @@ -79,7 +79,7 @@ type TableName = String workerThread :: T.Text -> TableName -> MVar Job -> IO () workerThread db tablename jobs = catchNonAsync (run loop) showerr where - showerr e = liftIO $ warningIO $ + showerr e = liftIO $ hPutStrLn stderr $ "sqlite worker thread crashed: " ++ show e loop = do @@ -142,9 +142,9 @@ queryDb (DbHandle _ jobs _) a = do closeDb :: DbHandle -> IO () closeDb h@(DbHandle worker jobs _) = do - flushQueueDb h putMVar jobs CloseJob wait worker + flushQueueDb h type Size = Int diff --git a/Database/Keys.hs b/Database/Keys.hs new file mode 100644 index 000000000..a0c5b1a04 --- /dev/null +++ b/Database/Keys.hs @@ -0,0 +1,156 @@ +{- Sqlite database of information about Keys + - + - Copyright 2015 Joey Hess <id@joeyh.name> + -: + - Licensed under the GNU GPL version 3 or higher. + -} + +{-# LANGUAGE QuasiQuotes, TypeFamilies, TemplateHaskell #-} +{-# LANGUAGE OverloadedStrings, GADTs, FlexibleContexts #-} +{-# LANGUAGE MultiParamTypeClasses, GeneralizedNewtypeDeriving #-} +{-# LANGUAGE RankNTypes #-} + +module Database.Keys ( + DbHandle, + openDb, + closeDb, + addAssociatedFile, + getAssociatedFiles, + getAssociatedKey, + removeAssociatedFile, + storeInodeCaches, + addInodeCaches, + getInodeCaches, + removeInodeCaches, + AssociatedId, + ContentId, +) where + +import Database.Types +import Database.Keys.Types +import qualified Database.Handle as H +import Locations +import Common hiding (delete) +import Annex +import Types.Key +import Annex.Perms +import Annex.LockFile +import Messages +import Utility.InodeCache +import Annex.InodeSentinal + +import Database.Persist.TH +import Database.Esqueleto hiding (Key) + +share [mkPersist sqlSettings, mkMigrate "migrateKeysDb"] [persistLowerCase| +Associated + key SKey + file FilePath + KeyFileIndex key file +Content + key SKey + cache SInodeCache + KeyCacheIndex key cache +|] + +{- Opens the database, creating it if it doesn't exist yet. + - + - Multiple readers and writers can have the database open at the same + - time. Database.Handle deals with the concurrency issues. + - The lock is held while opening the database, so that when + - the database doesn't exist yet, one caller wins the lock and + - can create it undisturbed. + -} +openDb :: Annex DbHandle +openDb = withExclusiveLock gitAnnexKeysDbLock $ do + dbdir <- fromRepo gitAnnexKeysDb + let db = dbdir </> "db" + unlessM (liftIO $ doesFileExist db) $ do + liftIO $ do + createDirectoryIfMissing True dbdir + H.initDb db $ void $ + runMigrationSilent migrateKeysDb + setAnnexDirPerm dbdir + setAnnexFilePerm db + h <- liftIO $ H.openDb db "content" + + -- work around https://github.com/yesodweb/persistent/issues/474 + liftIO setConsoleEncoding + + return $ DbHandle h + +closeDb :: DbHandle -> IO () +closeDb (DbHandle h) = H.closeDb h + +withDbHandle :: (H.DbHandle -> IO a) -> Annex a +withDbHandle a = bracket openDb (liftIO . closeDb) (\(DbHandle h) -> liftIO (a h)) + +addAssociatedFile :: Key -> FilePath -> Annex () +addAssociatedFile k f = withDbHandle $ \h -> H.queueDb h (\_ _ -> pure True) $ do + -- If the same file was associated with a different key before, + -- remove that. + delete $ from $ \r -> do + where_ (r ^. AssociatedFile ==. val f &&. r ^. AssociatedKey ==. val sk) + void $ insertUnique $ Associated sk f + where + sk = toSKey k + +{- Note that the files returned were once associated with the key, but + - some of them may not be any longer. -} +getAssociatedFiles :: Key -> Annex [FilePath] +getAssociatedFiles k = withDbHandle $ \h -> H.queryDb h $ + getAssociatedFiles' $ toSKey k + +getAssociatedFiles' :: SKey -> SqlPersistM [FilePath] +getAssociatedFiles' sk = do + l <- select $ from $ \r -> do + where_ (r ^. AssociatedKey ==. val sk) + return (r ^. AssociatedFile) + return $ map unValue l + +{- Gets any keys that are on record as having a particular associated file. + - (Should be one or none but the database doesn't enforce that.) -} +getAssociatedKey :: FilePath -> Annex [Key] +getAssociatedKey f = withDbHandle $ \h -> H.queryDb h $ + getAssociatedKey' f + +getAssociatedKey' :: FilePath -> SqlPersistM [Key] +getAssociatedKey' f = do + l <- select $ from $ \r -> do + where_ (r ^. AssociatedFile ==. val f) + return (r ^. AssociatedKey) + return $ map (fromSKey . unValue) l + +removeAssociatedFile :: Key -> FilePath -> Annex () +removeAssociatedFile k f = withDbHandle $ \h -> H.queueDb h (\_ _ -> pure True) $ + delete $ from $ \r -> do + where_ (r ^. AssociatedKey ==. val sk &&. r ^. AssociatedFile ==. val f) + where + sk = toSKey k + +{- Stats the files, and stores their InodeCaches. -} +storeInodeCaches :: Key -> [FilePath] -> Annex () +storeInodeCaches k fs = withTSDelta $ \d -> + addInodeCaches k . catMaybes =<< liftIO (mapM (`genInodeCache` d) fs) + +addInodeCaches :: Key -> [InodeCache] -> Annex () +addInodeCaches k is = withDbHandle $ \h -> H.queueDb h (\_ _ -> pure True) $ + forM_ is $ \i -> insertUnique $ Content (toSKey k) (toSInodeCache i) + +{- A key may have multiple InodeCaches; one for the annex object, and one + - for each pointer file that is a copy of it. -} +getInodeCaches :: Key -> Annex [InodeCache] +getInodeCaches k = withDbHandle $ \h -> H.queryDb h $ do + l <- select $ from $ \r -> do + where_ (r ^. ContentKey ==. val sk) + return (r ^. ContentCache) + return $ map (fromSInodeCache . unValue) l + where + sk = toSKey k + +removeInodeCaches :: Key -> Annex () +removeInodeCaches k = withDbHandle $ \h -> H.queueDb h (\_ _ -> pure True) $ + delete $ from $ \r -> do + where_ (r ^. ContentKey ==. val sk) + where + sk = toSKey k diff --git a/Database/Keys/Types.hs b/Database/Keys/Types.hs new file mode 100644 index 000000000..a627b3ca5 --- /dev/null +++ b/Database/Keys/Types.hs @@ -0,0 +1,14 @@ +{- Sqlite database of information about Keys, data types. + - + - Copyright 2015 Joey Hess <id@joeyh.name> + -: + - Licensed under the GNU GPL version 3 or higher. + -} + +module Database.Keys.Types ( + DbHandle(..) +) where + +import qualified Database.Handle as H + +newtype DbHandle = DbHandle H.DbHandle diff --git a/Database/Types.hs b/Database/Types.hs index dee56832b..1476a693a 100644 --- a/Database/Types.hs +++ b/Database/Types.hs @@ -13,6 +13,7 @@ import Database.Persist.TH import Data.Maybe import Types.Key +import Utility.InodeCache -- A serialized Key newtype SKey = SKey String @@ -22,6 +23,18 @@ toSKey :: Key -> SKey toSKey = SKey . key2file fromSKey :: SKey -> Key -fromSKey (SKey s) = fromMaybe (error $ "bad serialied key " ++ s) (file2key s) +fromSKey (SKey s) = fromMaybe (error $ "bad serialied Key " ++ s) (file2key s) derivePersistField "SKey" + +-- A serialized InodeCache +newtype SInodeCache = I String + deriving (Show, Read) + +toSInodeCache :: InodeCache -> SInodeCache +toSInodeCache = I . showInodeCache + +fromSInodeCache :: SInodeCache -> InodeCache +fromSInodeCache (I s) = fromMaybe (error $ "bad serialied InodeCache " ++ s) (readInodeCache s) + +derivePersistField "SInodeCache" @@ -28,6 +28,7 @@ module Git ( repoPath, localGitDir, attributes, + attributesLocal, hookPath, assertLocal, adjustPath, @@ -125,8 +126,11 @@ assertLocal repo action {- Path to a repository's gitattributes file. -} attributes :: Repo -> FilePath attributes repo - | repoIsLocalBare repo = repoPath repo ++ "/info/.gitattributes" - | otherwise = repoPath repo ++ "/.gitattributes" + | repoIsLocalBare repo = attributesLocal repo + | otherwise = repoPath repo </> ".gitattributes" + +attributesLocal :: Repo -> FilePath +attributesLocal repo = localGitDir repo </> "info" </> "attributes" {- Path to a given hook script in a repository, only if the hook exists - and is executable. -} @@ -11,8 +11,8 @@ import Common.Annex import qualified Annex import qualified Utility.Matcher import qualified Remote -import qualified Backend import Annex.Content +import Annex.WorkTree import Annex.Action import Annex.UUID import Logs.Trust @@ -201,22 +201,22 @@ limitAnything _ _ = return True {- Adds a limit to skip files not believed to be present in all - repositories in the specified group. -} addInAllGroup :: String -> Annex () -addInAllGroup groupname = do - m <- groupMap - addLimit $ limitInAllGroup m groupname - -limitInAllGroup :: GroupMap -> MkLimit Annex -limitInAllGroup m groupname - | S.null want = Right $ const $ const $ return True - | otherwise = Right $ \notpresent -> checkKey $ check notpresent - where - want = fromMaybe S.empty $ M.lookup groupname $ uuidsByGroup m - check notpresent key +addInAllGroup groupname = addLimit $ limitInAllGroup groupMap groupname + +limitInAllGroup :: Annex GroupMap -> MkLimit Annex +limitInAllGroup getgroupmap groupname = Right $ \notpresent mi -> do + m <- getgroupmap + let want = fromMaybe S.empty $ M.lookup groupname $ uuidsByGroup m + if S.null want + then return True -- optimisation: Check if a wanted uuid is notpresent. - | not (S.null (S.intersection want notpresent)) = return False - | otherwise = do - present <- S.fromList <$> Remote.keyLocations key - return $ S.null $ want `S.difference` present + else if not (S.null (S.intersection want notpresent)) + then return False + else checkKey (check want) mi + where + check want key = do + present <- S.fromList <$> Remote.keyLocations key + return $ S.null $ want `S.difference` present {- Adds a limit to skip files not using a specified key-value backend. -} addInBackend :: String -> Annex () @@ -277,7 +277,7 @@ addTimeLimit s = do else return True lookupFileKey :: FileInfo -> Annex (Maybe Key) -lookupFileKey = Backend.lookupFile . currFile +lookupFileKey = lookupFile . currFile checkKey :: (Key -> Annex Bool) -> MatchInfo -> Annex Bool checkKey a (MatchingFile fi) = lookupFileKey fi >>= maybe (return False) a diff --git a/Locations.hs b/Locations.hs index ba6115155..200297321 100644 --- a/Locations.hs +++ b/Locations.hs @@ -29,6 +29,8 @@ module Locations ( gitAnnexBadDir, gitAnnexBadLocation, gitAnnexUnusedLog, + gitAnnexKeysDb, + gitAnnexKeysDbLock, gitAnnexFsckState, gitAnnexFsckDbDir, gitAnnexFsckDbLock, @@ -237,6 +239,14 @@ gitAnnexBadLocation key r = gitAnnexBadDir r </> keyFile key gitAnnexUnusedLog :: FilePath -> Git.Repo -> FilePath gitAnnexUnusedLog prefix r = gitAnnexDir r </> (prefix ++ "unused") +{- .git/annex/keys/ contains a database of information about keys. -} +gitAnnexKeysDb :: Git.Repo -> FilePath +gitAnnexKeysDb r = gitAnnexDir r </> "keys" + +{- Lock file for the keys database. -} +gitAnnexKeysDbLock :: Git.Repo -> FilePath +gitAnnexKeysDbLock r = gitAnnexKeysDb r ++ "lck" + {- .git/annex/fsck/uuid/ is used to store information about incremental - fscks. -} gitAnnexFsckDir :: UUID -> Git.Repo -> FilePath diff --git a/Logs/PreferredContent.hs b/Logs/PreferredContent.hs index c21d67010..035c098f6 100644 --- a/Logs/PreferredContent.hs +++ b/Logs/PreferredContent.hs @@ -102,7 +102,7 @@ makeMatcher groupmap configmap groupwantedmap u = go True True | null (lefts tokens) = generate $ rights tokens | otherwise = unknownMatcher u where - tokens = exprParser matchstandard matchgroupwanted groupmap configmap (Just u) expr + tokens = exprParser matchstandard matchgroupwanted (pure groupmap) configmap (Just u) expr matchstandard | expandstandard = maybe (unknownMatcher u) (go False False) (standardPreferredContent <$> getStandardGroup mygroups) @@ -133,7 +133,7 @@ checkPreferredContentExpression expr = case parsedToMatcher tokens of Left e -> Just e Right _ -> Nothing where - tokens = exprParser matchAll matchAll emptyGroupMap M.empty Nothing expr + tokens = exprParser matchAll matchAll (pure emptyGroupMap) M.empty Nothing expr {- Puts a UUID in a standard group, and sets its preferred content to use - the standard expression for that group (unless preferred content is diff --git a/Remote/Git.hs b/Remote/Git.hs index 890e40b51..6dc5345c9 100644 --- a/Remote/Git.hs +++ b/Remote/Git.hs @@ -672,7 +672,7 @@ wantHardLink :: Annex Bool wantHardLink = (annexHardLink <$> Annex.getGitConfig) <&&> (not <$> isDirect) -- Copies from src to dest, updating a meter. If the copy finishes --- successfully, calls a final check action, which must also success, or +-- successfully, calls a final check action, which must also succeed, or -- returns false. -- -- If either the remote or local repository wants to use hard links, @@ -38,6 +38,7 @@ import Common import qualified Utility.SafeCommand import qualified Annex import qualified Annex.UUID +import qualified Annex.Version import qualified Backend import qualified Git.CurrentRepo import qualified Git.Filename @@ -65,6 +66,7 @@ import qualified Types.Messages import qualified Config import qualified Config.Cost import qualified Crypto +import qualified Annex.WorkTree import qualified Annex.Init import qualified Annex.CatFile import qualified Annex.View @@ -117,18 +119,17 @@ ingredients = ] tests :: TestTree -tests = testGroup "Tests" - -- Test both direct and indirect mode. - -- Windows is only going to use direct mode, so don't test twice. - [ properties +tests = testGroup "Tests" $ properties : + map (\(d, te) -> withTestMode te (unitTests d)) testmodes + where + testmodes = + [ ("v6", TestMode { forceDirect = False, annexVersion = "6" }) + , ("v5", TestMode { forceDirect = False, annexVersion = "5" }) + -- Windows will only use direct mode, so don't test twice. #ifndef mingw32_HOST_OS - , withTestEnv True $ unitTests "(direct)" - , withTestEnv False $ unitTests "(indirect)" -#else - , withTestEnv False $ unitTests "" + , ("v5 direct", TestMode { forceDirect = True, annexVersion = "5" }) + ] #endif - ] - properties :: TestTree properties = localOption (QuickCheckTests 1000) $ testGroup "QuickCheck" @@ -242,8 +243,11 @@ unitTests note = testGroup ("Unit Tests " ++ note) -- this test case create the main repo test_init :: Assertion test_init = innewrepo $ do - git_annex "init" [reponame] @? "init failed" - handleforcedirect + ver <- annexVersion <$> getTestMode + if ver == Annex.Version.defaultVersion + then git_annex "init" [reponame] @? "init failed" + else git_annex "init" [reponame, "--version", ver] @? "init failed" + setupTestMode where reponame = "test repo" @@ -294,7 +298,6 @@ test_shared_clone = intmpsharedclonerepo $ do , "--get" , "annex.hardlink" ] - print v v == Just "true\n" @? "shared clone of repo did not get annex.hardlink set" @@ -534,10 +537,13 @@ test_lock = intmpclonerepoInDirect $ do annexed_notpresent annexedfile -- regression test: unlock of newly added, not committed file - -- should fail + -- should fail in v5 mode. In v6 mode, this is allowed. writeFile "newfile" "foo" git_annex "add" ["newfile"] @? "add new file failed" - not <$> git_annex "unlock" ["newfile"] @? "unlock failed to fail on newly added, never committed file" + ifM (annexeval Annex.Version.versionSupportsUnlockedPointers) + ( git_annex "unlock" ["newfile"] @? "unlock failed on newly added, never committed file in v6 repository" + , not <$> git_annex "unlock" ["newfile"] @? "unlock failed to fail on newly added, never committed file in v5 repository" + ) git_annex "get" [annexedfile] @? "get of file failed" annexed_present annexedfile @@ -549,12 +555,21 @@ test_lock = intmpclonerepoInDirect $ do writeFile annexedfile $ content annexedfile ++ "foo" not <$> git_annex "lock" [annexedfile] @? "lock failed to fail without --force" git_annex "lock" ["--force", annexedfile] @? "lock --force failed" + -- In v6 mode, the original content of the file is not always + -- preserved after modification, so re-get it. + git_annex "get" [annexedfile] @? "get of file failed after lock --force" annexed_present annexedfile git_annex "unlock" [annexedfile] @? "unlock failed" unannexed annexedfile changecontent annexedfile - git_annex "add" [annexedfile] @? "add of modified file failed" - runchecks [checklink, checkunwritable] annexedfile + ifM (annexeval Annex.Version.versionSupportsUnlockedPointers) + ( do + boolSystem "git" [Param "add", Param annexedfile] @? "add of modified file failed" + runchecks [checkregularfile, checkwritable] annexedfile + , do + git_annex "add" [annexedfile] @? "add of modified file failed" + runchecks [checklink, checkunwritable] annexedfile + ) c <- readFile annexedfile assertEqual "content of modified file" c (changedcontent annexedfile) r' <- git_annex "drop" [annexedfile] @@ -580,7 +595,10 @@ test_edit' precommit = intmpclonerepoInDirect $ do @? "pre-commit failed" else boolSystem "git" [Param "commit", Param "-q", Param "-m", Param "contentchanged"] @? "git commit of edited file failed" - runchecks [checklink, checkunwritable] annexedfile + ifM (annexeval Annex.Version.versionSupportsUnlockedPointers) + ( runchecks [checkregularfile, checkwritable] annexedfile + , runchecks [checklink, checkunwritable] annexedfile + ) c <- readFile annexedfile assertEqual "content of modified file" c (changedcontent annexedfile) not <$> git_annex "drop" [annexedfile] @? "drop wrongly succeeded with no known copy of modified file" @@ -590,8 +608,12 @@ test_partial_commit = intmpclonerepoInDirect $ do git_annex "get" [annexedfile] @? "get of file failed" annexed_present annexedfile git_annex "unlock" [annexedfile] @? "unlock failed" - not <$> boolSystem "git" [Param "commit", Param "-q", Param "-m", Param "test", File annexedfile] - @? "partial commit of unlocked file not blocked by pre-commit hook" + ifM (annexeval Annex.Version.versionSupportsUnlockedPointers) + ( boolSystem "git" [Param "commit", Param "-q", Param "-m", Param "test", File annexedfile] + @? "partial commit of unlocked file should be allowed in v6 repository" + , not <$> boolSystem "git" [Param "commit", Param "-q", Param "-m", Param "test", File annexedfile] + @? "partial commit of unlocked file not blocked by pre-commit hook" + ) test_fix :: Assertion test_fix = intmpclonerepoInDirect $ do @@ -617,9 +639,13 @@ test_direct :: Assertion test_direct = intmpclonerepoInDirect $ do git_annex "get" [annexedfile] @? "get of file failed" annexed_present annexedfile - git_annex "direct" [] @? "switch to direct mode failed" - annexed_present annexedfile - git_annex "indirect" [] @? "switch to indirect mode failed" + ifM (annexeval Annex.Version.versionSupportsUnlockedPointers) + ( not <$> git_annex "direct" [] @? "switch to direct mode failed to fail in v6 repository" + , do + git_annex "direct" [] @? "switch to direct mode failed" + annexed_present annexedfile + git_annex "indirect" [] @? "switch to indirect mode failed" + ) test_trust :: Assertion test_trust = intmpclonerepo $ do @@ -810,7 +836,7 @@ test_unused = intmpclonerepoInDirect $ do assertEqual ("unused keys differ " ++ desc) (sort expectedkeys) (sort unusedkeys) findkey f = do - r <- Backend.lookupFile f + r <- Annex.WorkTree.lookupFile f return $ fromJust r test_describe :: Assertion @@ -1056,8 +1082,9 @@ test_nonannexed_file_conflict_resolution :: Assertion test_nonannexed_file_conflict_resolution = do check True False check False False - check True True - check False True + whenM (annexeval Annex.Version.versionSupportsDirectMode) $ do + check True True + check False True where check inr1 switchdirect = withtmpclonerepo $ \r1 -> withtmpclonerepo $ \r2 -> @@ -1106,8 +1133,9 @@ test_nonannexed_symlink_conflict_resolution :: Assertion test_nonannexed_symlink_conflict_resolution = do check True False check False False - check True True - check False True + whenM (annexeval Annex.Version.versionSupportsDirectMode) $ do + check True True + check False True where check inr1 switchdirect = withtmpclonerepo $ \r1 -> withtmpclonerepo $ \r2 -> @@ -1380,7 +1408,7 @@ test_crypto = do (c,k) <- annexeval $ do uuid <- Remote.nameToUUID "foo" rs <- Logs.Remote.readRemoteLog - Just k <- Backend.lookupFile annexedfile + Just k <- Annex.WorkTree.lookupFile annexedfile return (fromJust $ M.lookup uuid rs, k) let key = if scheme `elem` ["hybrid","pubkey"] then Just $ Utility.Gpg.KeyIds [Utility.Gpg.testKeyId] @@ -1505,7 +1533,7 @@ intmpclonerepoInDirect a = intmpclonerepo $ ) where isdirect = annexeval $ do - Annex.Init.initialize Nothing + Annex.Init.initialize Nothing Nothing Config.isDirect checkRepo :: Types.Annex a -> FilePath -> IO a @@ -1584,11 +1612,14 @@ clonerepo old new cfg = do ] boolSystem "git" cloneparams @? "git clone failed" configrepo new - indir new $ - git_annex "init" ["-q", new] @? "git annex init failed" + indir new $ do + ver <- annexVersion <$> getTestMode + if ver == Annex.Version.defaultVersion + then git_annex "init" ["-q", new] @? "git annex init failed" + else git_annex "init" ["-q", new, "--version", ver] @? "git annex init failed" unless (bareClone cfg) $ indir new $ - handleforcedirect + setupTestMode return new configrepo :: FilePath -> IO () @@ -1599,10 +1630,6 @@ configrepo dir = indir dir $ do -- avoid signed commits by test suite boolSystem "git" [Param "config", Param "commit.gpgsign", Param "false"] @? "git config failed" -handleforcedirect :: IO () -handleforcedirect = whenM ((==) "1" <$> Utility.Env.getEnvDefault "FORCEDIRECT" "") $ - git_annex "direct" ["-q"] @? "git annex direct failed" - ensuretmpdir :: IO () ensuretmpdir = do e <- doesDirectoryExist tmpdir @@ -1666,10 +1693,10 @@ checkunwritable f = unlessM (annexeval Config.isDirect) $ do checkwritable :: FilePath -> Assertion checkwritable f = do - r <- tryIO $ writeFile f $ content f - case r of - Left _ -> assertFailure $ "unable to modify " ++ f - Right _ -> return () + s <- getFileStatus f + let mode = fileMode s + unless (mode == mode `unionFileModes` ownerWriteMode) $ + assertFailure $ "unable to modify " ++ f checkdangling :: FilePath -> Assertion checkdangling f = ifM (annexeval Config.crippledFileSystem) @@ -1684,7 +1711,7 @@ checkdangling f = ifM (annexeval Config.crippledFileSystem) checklocationlog :: FilePath -> Bool -> Assertion checklocationlog f expected = do thisuuid <- annexeval Annex.UUID.getUUID - r <- annexeval $ Backend.lookupFile f + r <- annexeval $ Annex.WorkTree.lookupFile f case r of Just k -> do uuids <- annexeval $ Remote.keyLocations k @@ -1695,7 +1722,7 @@ checklocationlog f expected = do checkbackend :: FilePath -> Types.Backend -> Assertion checkbackend file expected = do b <- annexeval $ maybe (return Nothing) (Backend.getBackend file) - =<< Backend.lookupFile file + =<< Annex.WorkTree.lookupFile file assertEqual ("backend for " ++ file) (Just expected) b inlocationlog :: FilePath -> Assertion @@ -1721,11 +1748,16 @@ annexed_present = runchecks unannexed :: FilePath -> Assertion unannexed = runchecks [checkregularfile, checkcontent, checkwritable] -withTestEnv :: Bool -> TestTree -> TestTree -withTestEnv forcedirect = withResource prepare release . const +data TestMode = TestMode + { forceDirect :: Bool + , annexVersion :: String + } deriving (Read, Show) + +withTestMode :: TestMode -> TestTree -> TestTree +withTestMode testmode = withResource prepare release . const where prepare = do - setTestEnv forcedirect + setTestMode testmode case tryIngredients [consoleTestReporter] mempty initTests of Nothing -> error "No tests found!?" Just act -> unlessM act $ @@ -1733,8 +1765,8 @@ withTestEnv forcedirect = withResource prepare release . const return () release _ = cleanup' True tmpdir -setTestEnv :: Bool -> IO () -setTestEnv forcedirect = do +setTestMode :: TestMode -> IO () +setTestMode testmode = do whenM (doesDirectoryExist tmpdir) $ error $ "The temporary directory " ++ tmpdir ++ " already exists; cannot run test suite." @@ -1754,9 +1786,24 @@ setTestEnv forcedirect = do , ("GIT_COMMITTER_NAME", "git-annex test") -- force gpg into batch mode for the tests , ("GPG_BATCH", "1") - , ("FORCEDIRECT", if forcedirect then "1" else "") + , ("TESTMODE", show testmode) ] +getTestMode :: IO TestMode +getTestMode = Prelude.read <$> Utility.Env.getEnvDefault "TESTMODE" "" + +setupTestMode :: IO () +setupTestMode = do + testmode <- getTestMode + when (forceDirect testmode) $ + git_annex "direct" ["-q"] @? "git annex direct failed" + whenM (annexeval Annex.Version.versionSupportsUnlockedPointers) $ + boolSystem "git" + [ Param "config" + , Param "annex.largefiles" + , Param ("exclude=" ++ ingitfile) + ] @? "git config annex.largefiles failed" + changeToTmpDir :: FilePath -> IO () changeToTmpDir t = do topdir <- Utility.Env.getEnvDefault "TOPDIR" (error "TOPDIR not set") @@ -1791,7 +1838,7 @@ sha1annexedfiledup :: String sha1annexedfiledup = "sha1foodup" ingitfile :: String -ingitfile = "bar" +ingitfile = "bar.c" content :: FilePath -> String content f diff --git a/Upgrade.hs b/Upgrade.hs index 8d205a874..f9dfb7258 100644 --- a/Upgrade.hs +++ b/Upgrade.hs @@ -18,13 +18,14 @@ import qualified Upgrade.V1 import qualified Upgrade.V2 import qualified Upgrade.V3 import qualified Upgrade.V4 +import qualified Upgrade.V5 checkUpgrade :: Version -> Annex () checkUpgrade = maybe noop error <=< needsUpgrade needsUpgrade :: Version -> Annex (Maybe String) needsUpgrade v - | v == supportedVersion = ok + | v `elem` supportedVersions = ok | v `elem` autoUpgradeableVersions = ifM (upgrade True) ( ok , err "Automatic upgrade failed!" @@ -40,7 +41,7 @@ upgrade :: Bool -> Annex Bool upgrade automatic = do upgraded <- go =<< getVersion when upgraded $ - setVersion supportedVersion + setVersion latestVersion return upgraded where #ifndef mingw32_HOST_OS @@ -53,4 +54,5 @@ upgrade automatic = do go (Just "2") = Upgrade.V2.upgrade go (Just "3") = Upgrade.V3.upgrade automatic go (Just "4") = Upgrade.V4.upgrade automatic + go (Just "5") = Upgrade.V5.upgrade automatic go _ = return True diff --git a/Upgrade/V1.hs b/Upgrade/V1.hs index 801cdafa0..507af9e3b 100644 --- a/Upgrade/V1.hs +++ b/Upgrade/V1.hs @@ -54,14 +54,14 @@ upgrade = do ifM (fromRepo Git.repoIsLocalBare) ( do moveContent - setVersion supportedVersion + setVersion latestVersion , do moveContent updateSymlinks moveLocationLogs Annex.Queue.flush - setVersion supportedVersion + setVersion latestVersion ) Upgrade.V2.upgrade diff --git a/Upgrade/V5.hs b/Upgrade/V5.hs new file mode 100644 index 000000000..2073a0150 --- /dev/null +++ b/Upgrade/V5.hs @@ -0,0 +1,102 @@ +{- git-annex v5 -> v6 upgrade support + - + - Copyright 2015 Joey Hess <id@joeyh.name> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Upgrade.V5 where + +import Common.Annex +import Config +import Annex.InodeSentinal +import Annex.Link +import Annex.Direct +import Annex.Content +import Annex.WorkTree +import qualified Database.Keys +import qualified Annex.Content.Direct as Direct +import qualified Git +import qualified Git.LsFiles +import qualified Git.Branch +import Git.FileMode + +upgrade :: Bool -> Annex Bool +upgrade automatic = do + unless automatic $ + showAction "v5 to v6" + whenM isDirect $ do + {- Since upgrade from direct mode changes how files + - are represented in git, commit any changes in the + - work tree first. -} + whenM stageDirect $ do + unless automatic $ + showAction "committing first" + upgradeDirectCommit automatic + "commit before upgrade to annex.version 6" + setDirect False + upgradeDirectWorkTree + removeDirectCruft + showLongNote "Upgraded repository out of direct mode." + showLongNote "Changes have been staged for all annexed files in this repository; you should run `git commit` to commit these changes." + showLongNote "Any other clones of this repository that use direct mode need to be upgraded now, too." + configureSmudgeFilter + -- Inode sentinal file was only used in direct mode and when + -- locking down files as they were added. In v6, it's used more + -- extensively, so make sure it exists, since old repos that didn't + -- use direct mode may not have created it. + unlessM (isDirect) $ + createInodeSentinalFile True + return True + +upgradeDirectCommit :: Bool -> String -> Annex () +upgradeDirectCommit automatic msg = + void $ inRepo $ Git.Branch.commitCommand commitmode + [ Param "-m" + , Param msg + ] + where + commitmode = if automatic then Git.Branch.AutomaticCommit else Git.Branch.ManualCommit + +{- Walk work tree from top and convert all annex symlinks to pointer files, + - staging them in the index, and updating the work tree files with + - either the content of the object, or the pointer file content. -} +upgradeDirectWorkTree :: Annex () +upgradeDirectWorkTree = do + top <- fromRepo Git.repoPath + (l, clean) <- inRepo $ Git.LsFiles.stagedDetails [top] + forM_ l go + void $ liftIO clean + where + go (f, Just _sha, Just mode) | isSymLink mode = do + mk <- lookupFile f + case mk of + Nothing -> noop + Just k -> do + ifM (isJust <$> getAnnexLinkTarget f) + ( writepointer f k + , fromdirect f k + ) + stagePointerFile f =<< hashPointerFile k + Database.Keys.addAssociatedFile k f + return () + go _ = noop + + fromdirect f k = do + -- If linkAnnex fails for some reason, the work tree file + -- still has the content; the annex object file is just + -- not populated with it. Since the work tree file + -- is recorded as an associated file, things will still + -- work that way, it's just not ideal. + void $ linkAnnex k f + writepointer f k = liftIO $ do + nukeFile f + writeFile f (formatPointer k) + +{- Remove all direct mode bookkeeping files. -} +removeDirectCruft :: Annex () +removeDirectCruft = mapM_ go =<< getKeysPresent InAnywhere + where + go k = do + Direct.removeInodeCache k + Direct.removeAssociatedFiles k diff --git a/Utility/InodeCache.hs b/Utility/InodeCache.hs index b5fe9034e..8bd7ae0cd 100644 --- a/Utility/InodeCache.hs +++ b/Utility/InodeCache.hs @@ -1,7 +1,7 @@ {- Caching a file's inode, size, and modification time - to see when it's changed. - - - Copyright 2013, 2014 Joey Hess <id@joeyh.name> + - Copyright 2013-2014 Joey Hess <id@joeyh.name> - - License: BSD-2-clause -} diff --git a/debian/changelog b/debian/changelog index 34e106d39..0488c2eb1 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,22 @@ +git-annex (6.20151225) unstable; urgency=medium + + * Added v6 repository mode, but v5 is still the default for now. + * The upgrade to version 6 is not done fully automatically, because + upgrading a direct mode repository to version 6 will prevent old + versions of git-annex from working in other clones of that repository. + * smudge: New command, used for git smudge filter. + This will replace direct mode. + * init: Configure .git/info/attributes to use git-annex as a smudge + filter. Note that this changes the default behavior of git add in a + newly initialized repository; it will add files to the annex. + * unlock, lock: In v6 mode, unlocking a file changes it from a symlink to a + pointer file, and this change can be committed to the git repository. + * add: In v6 mode, adds modified files to the annex. + * init: --version parameter added to control which supported repository + version to use. + + -- Joey Hess <id@joeyh.name> Tue, 08 Dec 2015 11:14:03 -0400 + git-annex (5.20151219) UNRELEASED; urgency=medium * status: On crippled filesystems, was displaying M for all annexed files diff --git a/doc/direct_mode.mdwn b/doc/direct_mode.mdwn index 4c2cb2dd7..d3e1067f9 100644 --- a/doc/direct_mode.mdwn +++ b/doc/direct_mode.mdwn @@ -9,6 +9,13 @@ understand how to update its working tree. [[!toc]] +## deprecated + +Direct mode is deprecated! Intead, git-annex v6 repositories can simply +have files that are unlocked and thus can be directly accessed and +modified. See [[upgrades]] for details about the transition to v6 +repositories. + ## enabling (and disabling) direct mode Normally, git-annex repositories start off in indirect mode. With some diff --git a/doc/git-annex-add.mdwn b/doc/git-annex-add.mdwn index cfeb8a98e..7f796fec1 100644 --- a/doc/git-annex-add.mdwn +++ b/doc/git-annex-add.mdwn @@ -11,12 +11,18 @@ git annex add `[path ...]` Adds files in the path to the annex. If no path is specified, adds files from the current directory and below. -Normally, files that are already checked into git, or that git has been -configured to ignore will be silently skipped. - -If annex.largefiles is configured, and does not match a file that is being -added, `git annex add` will behave the same as `git add` and add the -non-large file directly to the git repository, instead of to the annex. +Files that are already checked into git and are unmodified, or that +git has been configured to ignore will be silently skipped. + +If annex.largefiles is configured, and does not match a file, `git annex +add` will behave the same as `git add` and add the non-large file directly +to the git repository, instead of to the annex. + +Large files are added to the annex in locked form, which prevents further +modification of their content unless unlocked by [[git-annex-unlock]](1). +(This is not the case however when a repository is in direct mode.) +To add a file to the annex in unlocked form, `git add` can be used instead +(that only works when the repository has annex.version 6 or higher). This command can also be used to add symbolic links, both symlinks to annexed content, and other symlinks. diff --git a/doc/git-annex-direct.mdwn b/doc/git-annex-direct.mdwn index 457ae3116..3cade1a8c 100644 --- a/doc/git-annex-direct.mdwn +++ b/doc/git-annex-direct.mdwn @@ -17,12 +17,18 @@ Note that git commands that operate on the work tree will refuse to run in direct mode repositories. Use `git annex proxy` to safely run such commands. +Note that the direct mode/indirect mode distinction is removed in v6 +git-annex repositories. In such a repository, you can +use [[git-annex-unlock]](1) to make a file's content be directly present. + # SEE ALSO [[git-annex]](1) [[git-annex-indirect]](1) +[[git-annex-unlock]](1) + # AUTHOR Joey Hess <id@joeyh.name> diff --git a/doc/git-annex-indirect.mdwn b/doc/git-annex-indirect.mdwn index 99def6144..321e0fb36 100644 --- a/doc/git-annex-indirect.mdwn +++ b/doc/git-annex-indirect.mdwn @@ -11,9 +11,8 @@ git annex indirect Switches a repository back from direct mode to the default, indirect mode. -Some systems cannot support git-annex in indirect mode, because they -do not support symbolic links. Repositories on such systems instead -default to using direct mode. +Note that the direct mode/indirect mode distinction is removed in v6 +git-annex repositories. # SEE ALSO diff --git a/doc/git-annex-init.mdwn b/doc/git-annex-init.mdwn index 145705105..29522181d 100644 --- a/doc/git-annex-init.mdwn +++ b/doc/git-annex-init.mdwn @@ -24,6 +24,13 @@ mark it as dead (see [[git-annex-dead]](1)). This command is entirely safe, although usually pointless, to run inside an already initialized git-annex repository. +# OPTIONS + +* `--version=N` + + Force the repository to be initialized using a different annex.version + than the current default. + # SEE ALSO [[git-annex]](1) diff --git a/doc/git-annex-lock.mdwn b/doc/git-annex-lock.mdwn index 4bf279fb2..b9e5d3450 100644 --- a/doc/git-annex-lock.mdwn +++ b/doc/git-annex-lock.mdwn @@ -9,7 +9,7 @@ git annex lock `[path ...]` # DESCRIPTION Use this to undo an unlock command if you don't want to modify -the files, or have made modifications you want to discard. +the files any longer, or have made modifications you want to discard. # OPTIONS diff --git a/doc/git-annex-pre-commit.mdwn b/doc/git-annex-pre-commit.mdwn index bc1e86e18..21e5aef68 100644 --- a/doc/git-annex-pre-commit.mdwn +++ b/doc/git-annex-pre-commit.mdwn @@ -12,10 +12,14 @@ This is meant to be called from git's pre-commit hook. `git annex init` automatically creates a pre-commit hook using this. Fixes up symlinks that are staged as part of a commit, to ensure they -point to annexed content. Also handles injecting changes to unlocked -files into the annex. When in a view, updates metadata to reflect changes +point to annexed content. + +When in a view, updates metadata to reflect changes made to files in the view. +When in a repository that has not been upgraded to annex.version 6, +also handles injecting changes to unlocked files into the annex. + # SEE ALSO [[git-annex]](1) diff --git a/doc/git-annex-smudge.mdwn b/doc/git-annex-smudge.mdwn new file mode 100644 index 000000000..7439c8784 --- /dev/null +++ b/doc/git-annex-smudge.mdwn @@ -0,0 +1,43 @@ +# NAME + +git-annex smudge - git filter driver for git-annex + +# SYNOPSIS + +git annex smudge [--clean] file + +# DESCRIPTION + +This command lets git-annex be used as a git filter driver which lets +annexed files in the git repository to be unlocked at all times, instead +of being symlinks. + +When adding a file with `git add`, the annex.largefiles config is +consulted to decide if a given file should be added to git as-is, +or if its content are large enough to need to use git-annex. + +The git configuration to use this command as a filter driver is as follows. +This is normally set up for you by git-annex init, so you should +not need to configure it manually. + + [filter "annex"] + smudge = git-annex smudge %f + clean = git-annex smudge --clean %f + +To make git use that filter driver, it needs to be configured in +the .gitattributes file or in `.git/config/attributes`. The latter +is normally configured when a repository is initialized, with the following +contents: + + * filter=annex + .* !filter + +# SEE ALSO + +[[git-annex]](1) + +# AUTHOR + +Joey Hess <id@joeyh.name> + +Warning: Automatically converted into a man page by mdwn2man. Edit with care. diff --git a/doc/git-annex-unlock.mdwn b/doc/git-annex-unlock.mdwn index ac8c21185..123146836 100644 --- a/doc/git-annex-unlock.mdwn +++ b/doc/git-annex-unlock.mdwn @@ -11,8 +11,16 @@ git annex unlock `[path ...]` Normally, the content of annexed files is protected from being changed. Unlocking an annexed file allows it to be modified. This replaces the symlink for each specified file with a copy of the file's content. -You can then modify it and `git annex add` (or `git commit`) to inject -it back into the annex. +You can then modify it and `git annex add` (or `git commit`) to save your +changes. + +In repositories with annex.version 5 or earlier, unlocking a file is local +to the repository, and is temporary. With version 6, unlocking a file +changes how it is stored in the git repository (from a symlink to a pointer +file), so you can commit it like any other change. Also in version 6, you +can use `git add` to add a fie to the annex in unlocked form. This allows +workflows where a file starts out unlocked, is modified as necessary, and +is locked once it reaches its final version. # OPTIONS diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index 2020ccf3f..1a2fd6e67 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -626,6 +626,14 @@ subdirectories). See [[git-annex-diffdriver]](1) for details. +* `smudge` + + This command lets git-annex be used as a git filter driver, allowing + annexed files in the git repository to be unlocked at all times, instead + of being symlinks. + + See [[git-annex-smudge]](1) for details. + * `remotedaemon` Detects when network remotes have received git pushes and fetches from them. diff --git a/doc/todo/smudge.mdwn b/doc/todo/smudge.mdwn index aea0c9b98..63f05c42b 100644 --- a/doc/todo/smudge.mdwn +++ b/doc/todo/smudge.mdwn @@ -158,7 +158,8 @@ Using git-annex on a crippled filesystem that does not support symlinks. Data: * An annex pointer file has as its first line the git-annex key - that it's standing in for. Subsequent lines of the file might + that it's standing in for (prefixed with "annex/objects/", similar to + an annex symlink target). Subsequent lines of the file might be a message saying that the file's content is not currently available. An annex pointer file is checked into the git repository the same way that an annex symlink is checked in. @@ -177,8 +178,8 @@ Configuration: the annex. Other files are passed through the smudge/clean as-is and have their contents stored in git. -* annex.direct is repurposed to configure how the assistant adds files. - When set to true, they're added unlocked. +* annex.direct is repurposed to configure how git-annex adds files. + When set to false, it adds symlinks and when true it adds pointer files. git-annex clean: @@ -232,15 +233,11 @@ git annex lock/unlock: transition repositories to using pointers, and a cleaner unlock/lock for repos using symlinks. - unlock will stage a pointer file, and will copy the content of the object - out of .git/annex/objects to the work tree file. (Might want a --hardlink - switch.) + unlock will stage a pointer file, and will link the content of the object + from .git/annex/objects to the work tree file. - lock will replace the current work tree file with the symlink, and stage it. - Note that multiple work tree files could point to the same object. - So, if the link count is > 1, replace the annex object with a copy of - itself to break such a hard link. Always finish by locking down the - permissions of the annex object. + lock will replace the current work tree file with the symlink, and stage it, + and lock down the permissions of the annex object. #### file map @@ -248,7 +245,8 @@ The file map needs to map from `Key -> [File]`. `File -> Key` seems useful to have, but in practice is not worthwhile. Drop and get operations need to know what files in the work tree use a -given key in order to update the work tree. +given key in order to update the work tree. And, we don't want to +overwrite a work tree file if it's been modified when dropping or getting. git-annex commands that look at annex symlinks to get keys to act on will need fall back to either consulting the file map, or looking at the staged @@ -275,13 +273,14 @@ In particular: * Is the smudge filter called at any other time? Seems unlikely but then there could be situations with a detached work tree or such. * Does git call any useful hooks when removing a file from the work tree, - or converting it to not be annexed? + or converting it to not be annexed, or for `git mv` of an annexed file? No! From this analysis, any file map generated by the smudge/clean filters is necessary potentially innaccurate. It may list deleted files. It may or may not reflect current unstaged changes from the work tree. + Follows that any use of the file map needs to verify the info from it, and throw out bad cached info (updating the map to match reality). @@ -306,17 +305,71 @@ just look at the repo content in the first place.. annex.version changes to 6 -Upgrade should be handled automatically. +git config for filter.annex.smudge and filter.annex.clean is set up. -On upgrade, update .gitattributes with a stock configuration, unless -it already mentions "filter=annex". +.gitattributes is updated with a stock configuration, +unless it already mentions "filter=annex". Upgrading a direct mode repo needs to switch it out of bare mode, and needs to run `git annex unlock` on all files (or reach the same result). So will need to stage changes to all annexed files. When a repo has some clones indirect and some direct, the upgraded repo -will have all files unlocked, necessarily in all clones. +will have all files unlocked, necessarily in all clones. This happens +automatically, because when the direct repos are upgraded that causes the +files to be unlocked, while the indirect upgrades don't touch the files. + +#### implementation todo list + +* Still a few test suite failues for v6 with locked files. +* Test suite should make pass for v6 with unlocked files. +* assistant: In v6 mode, adds files in unlocked mode, so they can + continue to be modified. TODO +* When the webapp creates a repo, it forces it into direct mode. But that + will fail when annex.version=6. Long-term, the assistant should make v6 + repos, but short-term, the assistant should make v5 repos in direct mode. +* Reconcile staged changes into the associated files database, whenever + the database is queried. This is needed to handle eg: + git add largefile + git mv largefile othername + git annex move othername --to foo + # fails to drop content from associated file othername, + # because it doesn't know it has that name + # git commit clears up this mess +* A new connection to the Keys database is opened each time. + It would be more efficient to reuse a connection. + However, that needs a way to close the connection, which was a problem. + See 38a23928e9d45b56d6836a4eac703862d63cf93c for details. +* See if the cases where the Keys database is not used can be + optimised. Eg, if the Keys database doesn't exist at all, + we know smudge/clean are not used, so queries don't + need to open the database or do reconciliation, but can simply return none. + Also, no need for Backend.lookupFile to catKeyFile in this case + (when not in direct mode). + However, beware over-optimisation breaking the assistant or perhaps other + long-lived processes. +* Interaction with shared clones. Should avoid hard linking from/to a + object in a shared clone if either repository has the object unlocked. + (And should avoid unlocking an object if it's hard linked to a shared clone, + but that's already accomplished because it avoids unlocking an object if + it's hard linked at all) +* Make automatic merge conflict resolution work for pointer files. + - Should probably automatically handle merge conflicts between annex + symlinks and pointer files too. Maybe by always resulting in a pointer + file, since the symlinks don't work everwhere. +* Crippled filesystem should cause all files to be transparently unlocked. + Note that this presents problems when dealing with merge conflicts and + when pushing changes committed in such a repo. Ideally, should avoid + committing implicit unlocks, or should prevent such commits leaking out + in pushes. +* Dropping a smudged file causes git status to show it as modified, + because the timestamp has changed. Getting a smudged file can also + cause this. Upgrading a direct mode repo also leaves files in this state. + User can use `git add` to clear it up, but better to avoid this, + by updating stat info in the index. + (May need to use libgit2 to do this, cannot find + any plumbing except git-update-index, which is very inneficient for + smudged files.) ---- diff --git a/doc/upgrades.mdwn b/doc/upgrades.mdwn index f5e9cbc3a..9d30c2f14 100644 --- a/doc/upgrades.mdwn +++ b/doc/upgrades.mdwn @@ -43,6 +43,46 @@ conflicts first before upgrading git-annex. The upgrade events, so far: +## v5 -> v6 (git-annex version 6.x) + +The upgrade from v5 to v6 is handled manually. Run `git-annex upgrade` +perform the upgrade. + +Warning: All places that a direct mode repository is cloned to should be +running git-annex version 6.x before you upgrade the repository. +This is necessary because the contents of the repository are changed +in the upgrade, and the old version of git-annex won't be able to +access files after the repo is upgraded. + +This upgrade does away with the direct mode/indirect mode distinction. +A v6 git-annex repository can have some files locked and other files +unlocked, and all git and git-annex commands can be used on both locked and +unlocked files. (Although for locked files to work, the filesystem +must support symbolic links..) + +The behavior of some commands changes in an upgraded repository: + +* `git add` will add files to the annex, in unlocked mode, rather than + adding them directly to the git repository. To cause some files to be + added directly to git, you can configure `annex.largefiles`. For + example: + + git config annex.largefiles "largerthan=100kb and not (include=*.c or include=*.h)" + +* `git annex unlock` and `git annex lock` change how the pointer to + the annexed content is stored in git. + +If a repository is only used in indirect mode, you can use git-annex +v5 and v6 in different clones of the same indirect mode repository without +problems. + +On upgrade, all files in a direct mode repository will be converted to +unlocked files. The upgrade will stage changes to all annexed files in +the git repository, which you can then commit. + +If a repository has some clones using direct mode and some using indirect +mode, all the files will end up unlocked in all clones after the upgrade. + ## v4 -> v5 (git-annex version 5.x) The upgrade from v4 to v5 is handled |