summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Annex.hs22
-rw-r--r--Annex/AutoMerge.hs5
-rw-r--r--Annex/CatFile.hs61
-rw-r--r--Annex/Content.hs250
-rw-r--r--Annex/Content/Direct.hs88
-rw-r--r--Annex/Direct.hs22
-rw-r--r--Annex/FileMatcher.hs22
-rw-r--r--Annex/Ingest.hs289
-rw-r--r--Annex/Init.hs23
-rw-r--r--Annex/InodeSentinal.hs96
-rw-r--r--Annex/Link.hs50
-rw-r--r--Annex/MakeRepo.hs2
-rw-r--r--Annex/Version.hs26
-rw-r--r--Annex/View.hs8
-rw-r--r--Annex/WorkTree.hs40
-rw-r--r--Assistant/Threads/Committer.hs130
-rw-r--r--Assistant/Threads/TransferScanner.hs4
-rw-r--r--Assistant/Threads/Watcher.hs66
-rw-r--r--Assistant/Types/Changes.hs37
-rw-r--r--Backend.hs25
-rw-r--r--CmdLine/GitAnnex.hs2
-rw-r--r--CmdLine/Seek.hs24
-rw-r--r--Command.hs11
-rw-r--r--Command/Add.hs221
-rw-r--r--Command/AddUnused.hs4
-rw-r--r--Command/AddUrl.hs5
-rw-r--r--Command/ConfigList.hs2
-rw-r--r--Command/Direct.hs6
-rw-r--r--Command/Fsck.hs116
-rw-r--r--Command/Indirect.hs6
-rw-r--r--Command/Init.hs43
-rw-r--r--Command/Lock.hs106
-rw-r--r--Command/Migrate.hs2
-rw-r--r--Command/PreCommit.hs19
-rw-r--r--Command/ReKey.hs4
-rw-r--r--Command/Reinit.hs2
-rw-r--r--Command/Smudge.hs115
-rw-r--r--Command/Unannex.hs5
-rw-r--r--Command/Undo.hs4
-rw-r--r--Command/Unlock.hs53
-rw-r--r--Command/Unused.hs5
-rw-r--r--Command/Upgrade.hs1
-rw-r--r--Command/Version.hs3
-rw-r--r--Config.hs18
-rw-r--r--Database/Fsck.hs18
-rw-r--r--Database/Handle.hs212
-rw-r--r--Database/Keys.hs237
-rw-r--r--Database/Keys/Handle.hs55
-rw-r--r--Database/Queue.hs107
-rw-r--r--Database/Types.hs15
-rw-r--r--Git.hs8
-rw-r--r--Limit.hs34
-rw-r--r--Locations.hs10
-rw-r--r--Logs/PreferredContent.hs4
-rw-r--r--Messages.hs8
-rw-r--r--Remote/Git.hs2
-rw-r--r--Test.hs147
-rw-r--r--Types/KeySource.hs4
-rw-r--r--Upgrade.hs6
-rw-r--r--Upgrade/V1.hs4
-rw-r--r--Upgrade/V5.hs104
-rw-r--r--Utility/FileSystemEncoding.hs8
-rw-r--r--Utility/InodeCache.hs2
-rw-r--r--debian/changelog21
-rw-r--r--doc/direct_mode.mdwn7
-rw-r--r--doc/git-annex-add.mdwn18
-rw-r--r--doc/git-annex-direct.mdwn6
-rw-r--r--doc/git-annex-indirect.mdwn5
-rw-r--r--doc/git-annex-init.mdwn7
-rw-r--r--doc/git-annex-lock.mdwn2
-rw-r--r--doc/git-annex-pre-commit.mdwn8
-rw-r--r--doc/git-annex-smudge.mdwn43
-rw-r--r--doc/git-annex-unlock.mdwn12
-rw-r--r--doc/git-annex.mdwn8
-rw-r--r--doc/todo/smudge.mdwn87
-rw-r--r--doc/upgrades.mdwn40
76 files changed, 2395 insertions, 897 deletions
diff --git a/Annex.hs b/Annex.hs
index c9a4ef6a0..4f26c497c 100644
--- a/Annex.hs
+++ b/Annex.hs
@@ -60,6 +60,7 @@ import Types.NumCopies
import Types.LockCache
import Types.DesktopNotify
import Types.CleanupActions
+import qualified Database.Keys.Handle as Keys
#ifdef WITH_QUVI
import Utility.Quvi (QuviVersion)
#endif
@@ -134,6 +135,7 @@ data AnnexState = AnnexState
, desktopnotify :: DesktopNotify
, workers :: [Either AnnexState (Async AnnexState)]
, concurrentjobs :: Maybe Int
+ , keysdbhandle :: Maybe Keys.DbHandle
}
newState :: GitConfig -> Git.Repo -> AnnexState
@@ -179,6 +181,7 @@ newState c r = AnnexState
, desktopnotify = mempty
, workers = []
, concurrentjobs = Nothing
+ , keysdbhandle = Nothing
}
{- Makes an Annex state object for the specified git repo.
@@ -193,25 +196,32 @@ new r = do
{- Performs an action in the Annex monad from a starting state,
- returning a new state. -}
run :: AnnexState -> Annex a -> IO (a, AnnexState)
-run s a = do
- mvar <- newMVar s
+run s a = flip run' a =<< newMVar s
+
+run' :: MVar AnnexState -> Annex a -> IO (a, AnnexState)
+run' mvar a = do
r <- runReaderT (runAnnex a) mvar
+ `onException` (flush =<< readMVar mvar)
s' <- takeMVar mvar
+ flush s'
return (r, s')
+ where
+ flush = maybe noop Keys.flushDbQueue . keysdbhandle
{- Performs an action in the Annex monad from a starting state,
- and throws away the new state. -}
eval :: AnnexState -> Annex a -> IO a
-eval s a = do
- mvar <- newMVar s
- runReaderT (runAnnex a) mvar
+eval s a = fst <$> run s a
{- Makes a runner action, that allows diving into IO and from inside
- the IO action, running an Annex action. -}
makeRunner :: Annex (Annex a -> IO a)
makeRunner = do
mvar <- ask
- return $ \a -> runReaderT (runAnnex a) mvar
+ return $ \a -> do
+ (r, s) <- run' mvar a
+ putMVar mvar s
+ return r
getState :: (AnnexState -> v) -> Annex v
getState selector = do
diff --git a/Annex/AutoMerge.hs b/Annex/AutoMerge.hs
index bfbe71dc2..c32c3f66a 100644
--- a/Annex/AutoMerge.hs
+++ b/Annex/AutoMerge.hs
@@ -25,7 +25,6 @@ import qualified Git.Branch
import Git.Types (BlobType(..))
import Config
import Annex.ReplaceFile
-import Git.FileMode
import Annex.VariantFile
import qualified Data.Set as S
@@ -135,7 +134,7 @@ resolveMerge' (Just us) them u = do
| select (LsFiles.unmergedBlobType u) == Just SymlinkBlob =
case select' (LsFiles.unmergedSha u) of
Nothing -> return Nothing
- Just sha -> catKey sha symLinkMode
+ Just sha -> catKey sha
| otherwise = return Nothing
makelink key = do
@@ -174,7 +173,7 @@ resolveMerge' (Just us) them u = do
case select' (LsFiles.unmergedSha u) of
Nothing -> noop
Just sha -> do
- link <- catLink True sha
+ link <- catSymLinkTarget sha
replacewithlink item link
resolveby a = do
diff --git a/Annex/CatFile.hs b/Annex/CatFile.hs
index 179149844..aefccd424 100644
--- a/Annex/CatFile.hs
+++ b/Annex/CatFile.hs
@@ -1,6 +1,6 @@
{- git cat-file interface, with handle automatically stored in the Annex monad
-
- - Copyright 2011-2013 Joey Hess <id@joeyh.name>
+ - Copyright 2011-2015 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@@ -16,7 +16,7 @@ module Annex.CatFile (
catKey,
catKeyFile,
catKeyFileHEAD,
- catLink,
+ catSymLinkTarget,
) where
import qualified Data.ByteString.Lazy as L
@@ -29,8 +29,8 @@ import qualified Git.CatFile
import qualified Annex
import Git.Types
import Git.FilePath
-import Git.FileMode
import qualified Git.Ref
+import Annex.Link
catFile :: Git.Branch -> FilePath -> Annex L.ByteString
catFile branch file = do
@@ -80,52 +80,17 @@ catFileStop = do
(s { Annex.catfilehandles = M.empty }, Annex.catfilehandles s)
liftIO $ mapM_ Git.CatFile.catFileStop (M.elems m)
-{- From the Sha or Ref of a symlink back to the key.
- -
- - Requires a mode witness, to guarantee that the file is a symlink.
- -}
-catKey :: Ref -> FileMode -> Annex (Maybe Key)
-catKey = catKey' True
-
-catKey' :: Bool -> Sha -> FileMode -> Annex (Maybe Key)
-catKey' modeguaranteed sha mode
- | isSymLink mode = do
- l <- catLink modeguaranteed sha
- return $ if isLinkToAnnex l
- then fileKey $ takeFileName l
- else Nothing
- | otherwise = return Nothing
+{- From ref to a symlink or a pointer file, get the key. -}
+catKey :: Ref -> Annex (Maybe Key)
+catKey ref = parseLinkOrPointer <$> catObject ref
{- Gets a symlink target. -}
-catLink :: Bool -> Sha -> Annex String
-catLink modeguaranteed sha = fromInternalGitPath . decodeBS <$> get
- where
- -- If the mode is not guaranteed to be correct, avoid
- -- buffering the whole file content, which might be large.
- -- 8192 is enough if it really is a symlink.
- get
- | modeguaranteed = catObject sha
- | otherwise = L.take 8192 <$> catObject sha
-
-{- Looks up the key corresponding to the Ref using the running cat-file.
- -
- - Currently this always has to look in HEAD, because cat-file --batch
- - does not offer a way to specify that we want to look up a tree object
- - in the index. So if the index has a file staged not as a symlink,
- - and it is a symlink in head, the wrong mode is gotten.
- - Also, we have to assume the file is a symlink if it's not yet committed
- - to HEAD. For these reasons, modeguaranteed is not set.
- -}
-catKeyChecked :: Bool -> Ref -> Annex (Maybe Key)
-catKeyChecked needhead ref@(Ref r) =
- catKey' False ref =<< findmode <$> catTree treeref
+catSymLinkTarget :: Sha -> Annex String
+catSymLinkTarget sha = fromInternalGitPath . decodeBS <$> get
where
- pathparts = split "/" r
- dir = intercalate "/" $ take (length pathparts - 1) pathparts
- file = fromMaybe "" $ lastMaybe pathparts
- treeref = Ref $ if needhead then "HEAD" ++ dir ++ "/" else dir ++ "/"
- findmode = fromMaybe symLinkMode . headMaybe .
- map snd . filter (\p -> fst p == file)
+ -- Avoid buffering the whole file content, which might be large.
+ -- 8192 is enough if it really is a symlink or pointer file.
+ get = L.take 8192 <$> catObject sha
{- From a file in the repository back to the key.
-
@@ -151,8 +116,8 @@ catKeyChecked needhead ref@(Ref r) =
catKeyFile :: FilePath -> Annex (Maybe Key)
catKeyFile f = ifM (Annex.getState Annex.daemon)
( catKeyFileHEAD f
- , catKeyChecked True $ Git.Ref.fileRef f
+ , catKey $ Git.Ref.fileRef f
)
catKeyFileHEAD :: FilePath -> Annex (Maybe Key)
-catKeyFileHEAD f = catKeyChecked False $ Git.Ref.fileFromRef Git.Ref.headRef f
+catKeyFileHEAD f = catKey $ Git.Ref.fileFromRef Git.Ref.headRef f
diff --git a/Annex/Content.hs b/Annex/Content.hs
index 60ffb8141..e501df072 100644
--- a/Annex/Content.hs
+++ b/Annex/Content.hs
@@ -1,6 +1,6 @@
{- git-annex file content managing
-
- - Copyright 2010-2014 Joey Hess <id@joeyh.name>
+ - Copyright 2010-2015 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@@ -24,6 +24,12 @@ module Annex.Content (
withTmp,
checkDiskSpace,
moveAnnex,
+ populatePointerFile,
+ linkAnnex,
+ linkAnnex',
+ LinkAnnexResult(..),
+ unlinkAnnex,
+ checkedCopyFile,
sendAnnex,
prepSendAnnex,
removeAnnex,
@@ -38,6 +44,7 @@ module Annex.Content (
dirKeys,
withObjectLoc,
staleKeysPrune,
+ isUnmodified,
) where
import System.IO.Unsafe (unsafeInterleaveIO)
@@ -61,15 +68,19 @@ import Config
import Git.SharedRepository
import Annex.Perms
import Annex.Link
-import Annex.Content.Direct
+import qualified Annex.Content.Direct as Direct
import Annex.ReplaceFile
import Annex.LockPool
import Messages.Progress
import qualified Types.Remote
import qualified Types.Backend
import qualified Backend
+import qualified Database.Keys
import Types.NumCopies
import Annex.UUID
+import Annex.InodeSentinal
+import Utility.InodeCache
+import Utility.PosixFiles
{- Checks if a given key's content is currently present. -}
inAnnex :: Key -> Annex Bool
@@ -79,7 +90,10 @@ inAnnex key = inAnnexCheck key $ liftIO . doesFileExist
inAnnexCheck :: Key -> (FilePath -> Annex Bool) -> Annex Bool
inAnnexCheck key check = inAnnex' id False check key
-{- Generic inAnnex, handling both indirect and direct mode.
+{- inAnnex that performs an arbitrary check of the key's content.
+ -
+ - When the content is unlocked, it must also be unmodified, or the bad
+ - value will be returned.
-
- In direct mode, at least one of the associated files must pass the
- check. Additionally, the file must be unmodified.
@@ -88,14 +102,22 @@ inAnnex' :: (a -> Bool) -> a -> (FilePath -> Annex a) -> Key -> Annex a
inAnnex' isgood bad check key = withObjectLoc key checkindirect checkdirect
where
checkindirect loc = do
- whenM (fromRepo Git.repoIsUrl) $
- error "inAnnex cannot check remote repo"
- check loc
+ r <- check loc
+ if isgood r
+ then do
+ cache <- Database.Keys.getInodeCaches key
+ if null cache
+ then return r
+ else ifM (sameInodeCache loc cache)
+ ( return r
+ , return bad
+ )
+ else return bad
checkdirect [] = return bad
checkdirect (loc:locs) = do
r <- check loc
if isgood r
- then ifM (goodContent key loc)
+ then ifM (Direct.goodContent key loc)
( return r
, checkdirect locs
)
@@ -371,7 +393,7 @@ withTmp key action = do
return res
{- Checks that there is disk space available to store a given key,
- - in a destination (or the annex) printing a warning if not.
+ - in a destination directory (or the annex) printing a warning if not.
-
- If the destination is on the same filesystem as the annex,
- checks for any other running downloads, removing the amount of data still
@@ -379,7 +401,12 @@ withTmp key action = do
- when doing concurrent downloads.
-}
checkDiskSpace :: Maybe FilePath -> Key -> Integer -> Bool -> Annex Bool
-checkDiskSpace destdir key alreadythere samefilesystem = ifM (Annex.getState Annex.force)
+checkDiskSpace destdir key = checkDiskSpace' (fromMaybe 1 (keySize key)) destdir key
+
+{- Allows specifying the size of the key, if it's known, which is useful
+ - as not all keys know their size. -}
+checkDiskSpace' :: Integer -> Maybe FilePath -> Key -> Integer -> Bool -> Annex Bool
+checkDiskSpace' need destdir key alreadythere samefilesystem = ifM (Annex.getState Annex.force)
( return True
, do
-- We can't get inprogress and free at the same
@@ -392,8 +419,8 @@ checkDiskSpace destdir key alreadythere samefilesystem = ifM (Annex.getState Ann
then sizeOfDownloadsInProgress (/= key)
else pure 0
free <- liftIO . getDiskFree =<< dir
- case (free, fromMaybe 1 (keySize key)) of
- (Just have, need) -> do
+ case free of
+ Just have -> do
reserve <- annexDiskReserve <$> Annex.getGitConfig
let delta = need + reserve - have - alreadythere + inprogress
let ok = delta <= 0
@@ -412,7 +439,10 @@ checkDiskSpace destdir key alreadythere samefilesystem = ifM (Annex.getState Ann
{- Moves a key's content into .git/annex/objects/
-
- - In direct mode, moves it to the associated file, or files.
+ - When a key has associated pointer files, the object is hard
+ - linked (or copied) to the files, and the object file is left thawed.
+
+ - In direct mode, moves the object file to the associated file, or files.
-
- What if the key there already has content? This could happen for
- various reasons; perhaps the same content is being annexed again.
@@ -440,7 +470,12 @@ moveAnnex key src = withObjectLoc key storeobject storedirect
( alreadyhave
, modifyContent dest $ do
liftIO $ moveFile src dest
- freezeContent dest
+ fs <- Database.Keys.getAssociatedFiles key
+ if null fs
+ then freezeContent dest
+ else do
+ mapM_ (populatePointerFile key dest) fs
+ Database.Keys.storeInodeCaches key (dest:fs)
)
storeindirect = storeobject =<< calcRepo (gitAnnexLocation key)
@@ -458,21 +493,116 @@ moveAnnex key src = withObjectLoc key storeobject storedirect
v <- isAnnexLink f
if Just key == v
then do
- updateInodeCache key src
+ Direct.updateInodeCache key src
replaceFile f $ liftIO . moveFile src
chmodContent f
forM_ fs $
- addContentWhenNotPresent key f
- else ifM (goodContent key f)
+ Direct.addContentWhenNotPresent key f
+ else ifM (Direct.goodContent key f)
( storedirect' alreadyhave fs
, storedirect' fallback fs
)
alreadyhave = liftIO $ removeFile src
+populatePointerFile :: Key -> FilePath -> FilePath -> Annex ()
+populatePointerFile k obj f = go =<< isPointerFile f
+ where
+ go (Just k') | k == k' = do
+ liftIO $ nukeFile f
+ unlessM (linkAnnex'' k obj f) $
+ liftIO $ writeFile f (formatPointer k)
+ go _ = return ()
+
+{- Hard links a file into .git/annex/objects/, falling back to a copy
+ - if necessary. Does nothing if the object file already exists.
+ -
+ - Does not lock down the hard linked object, so that the user can modify
+ - the source file. So, adding an object to the annex this way can
+ - prevent losing the content if the source file is deleted, but does not
+ - guard against modifications.
+ -}
+linkAnnex :: Key -> FilePath -> Maybe InodeCache -> Annex LinkAnnexResult
+linkAnnex key src srcic = do
+ dest <- calcRepo (gitAnnexLocation key)
+ modifyContent dest $ linkAnnex' key src srcic dest
+
+{- Hard links (or copies) src to dest, one of which should be the
+ - annex object. Updates inode cache for src and for dest when it's
+ - changed. -}
+linkAnnex' :: Key -> FilePath -> Maybe InodeCache -> FilePath -> Annex LinkAnnexResult
+linkAnnex' _ _ Nothing _ = return LinkAnnexFailed
+linkAnnex' key src (Just srcic) dest =
+ ifM (liftIO $ doesFileExist dest)
+ ( do
+ Database.Keys.addInodeCaches key [srcic]
+ return LinkAnnexNoop
+ , ifM (linkAnnex'' key src dest)
+ ( do
+ thawContent dest
+ -- src could have changed while being copied
+ -- to dest
+ mcache <- withTSDelta (liftIO . genInodeCache src)
+ case mcache of
+ Just srcic' | compareStrong srcic srcic' -> do
+ destic <- withTSDelta (liftIO . genInodeCache dest)
+ Database.Keys.addInodeCaches key $
+ catMaybes [destic, Just srcic]
+ return LinkAnnexOk
+ _ -> do
+ liftIO $ nukeFile dest
+ failed
+ , failed
+ )
+ )
+ where
+ failed = do
+ Database.Keys.addInodeCaches key [srcic]
+ return LinkAnnexFailed
+
+data LinkAnnexResult = LinkAnnexOk | LinkAnnexFailed | LinkAnnexNoop
+
+{- Hard links or copies src to dest. Only uses a hard link if src
+ - is not already hardlinked to elsewhere. Checks disk reserve before
+ - copying, and will fail if not enough space, or if the dest file
+ - already exists. -}
+linkAnnex'' :: Key -> FilePath -> FilePath -> Annex Bool
+linkAnnex'' key src dest = catchBoolIO $ do
+ s <- liftIO $ getFileStatus src
+ let copy = checkedCopyFile' key src dest s
+#ifndef mingw32_HOST_OS
+ if linkCount s > 1
+ then copy
+ else liftIO (createLink src dest >> return True)
+ `catchIO` const copy
+#else
+ copy
+#endif
+
+{- Removes the annex object file for a key. Lowlevel. -}
+unlinkAnnex :: Key -> Annex ()
+unlinkAnnex key = do
+ obj <- calcRepo $ gitAnnexLocation key
+ modifyContent obj $ do
+ secureErase obj
+ liftIO $ nukeFile obj
+
+{- Checks disk space before copying. -}
+checkedCopyFile :: Key -> FilePath -> FilePath -> Annex Bool
+checkedCopyFile key src dest = catchBoolIO $
+ checkedCopyFile' key src dest
+ =<< liftIO (getFileStatus src)
+
+checkedCopyFile' :: Key -> FilePath -> FilePath -> FileStatus -> Annex Bool
+checkedCopyFile' key src dest s = catchBoolIO $
+ ifM (checkDiskSpace' (fromIntegral $ fileSize s) (Just $ takeDirectory dest) key 0 True)
+ ( liftIO $ copyFileExternal CopyAllMetaData src dest
+ , return False
+ )
+
{- Runs an action to transfer an object's content.
-
- - In direct mode, it's possible for the file to change as it's being sent.
+ - In some cases, it's possible for the file to change as it's being sent.
- If this happens, runs the rollback action and returns False. The
- rollback action should remove the data that was transferred.
-}
@@ -492,8 +622,9 @@ sendAnnex key rollback sendobject = go =<< prepSendAnnex key
{- Returns a file that contains an object's content,
- and a check to run after the transfer is complete.
-
- - In direct mode, it's possible for the file to change as it's being sent,
- - and the check detects this case and returns False.
+ - When a file is unlocked (or in direct mode), it's possble for its
+ - content to change as it's being sent. The check detects this case
+ - and returns False.
-
- Note that the returned check action is, in some cases, run in the
- Annex monad of the remote that is receiving the object, rather than
@@ -502,10 +633,23 @@ sendAnnex key rollback sendobject = go =<< prepSendAnnex key
prepSendAnnex :: Key -> Annex (Maybe (FilePath, Annex Bool))
prepSendAnnex key = withObjectLoc key indirect direct
where
- indirect f = return $ Just (f, return True)
+ indirect f = do
+ cache <- Database.Keys.getInodeCaches key
+ cache' <- if null cache
+ -- Since no inode cache is in the database, this
+ -- object is not currently unlocked. But that could
+ -- change while the transfer is in progress, so
+ -- generate an inode cache for the starting
+ -- content.
+ then maybeToList <$>
+ withTSDelta (liftIO . genInodeCache f)
+ else pure cache
+ return $ if null cache'
+ then Nothing
+ else Just (f, sameInodeCache f cache')
direct [] = return Nothing
direct (f:fs) = do
- cache <- recordedInodeCache key
+ cache <- Direct.recordedInodeCache key
-- check that we have a good file
ifM (sameInodeCache f cache)
( return $ Just (f, sameInodeCache f cache)
@@ -520,7 +664,7 @@ prepSendAnnex key = withObjectLoc key indirect direct
withObjectLoc :: Key -> (FilePath -> Annex a) -> ([FilePath] -> Annex a) -> Annex a
withObjectLoc key indirect direct = ifM isDirect
( do
- fs <- associatedFiles key
+ fs <- Direct.associatedFiles key
if null fs
then goindirect
else direct fs
@@ -544,6 +688,9 @@ cleanObjectLoc key cleaner = do
{- Removes a key's file from .git/annex/objects/
-
+ - When a key has associated pointer files, they are checked for
+ - modifications, and if unmodified, are reset.
+ -
- In direct mode, deletes the associated files or files, and replaces
- them with symlinks.
-}
@@ -553,16 +700,50 @@ removeAnnex (ContentRemovalLock key) = withObjectLoc key remove removedirect
remove file = cleanObjectLoc key $ do
secureErase file
liftIO $ nukeFile file
- removeInodeCache key
+ mapM_ (void . tryIO . resetpointer)
+ =<< Database.Keys.getAssociatedFiles key
+ Database.Keys.removeInodeCaches key
+ Direct.removeInodeCache key
+ resetpointer file = ifM (isUnmodified key file)
+ ( do
+ secureErase file
+ liftIO $ nukeFile file
+ liftIO $ writeFile file (formatPointer key)
+ -- Can't delete the pointer file.
+ -- If it was a hard link to the annex object,
+ -- that object might have been frozen as part of the
+ -- removal process, so thaw it.
+ , void $ tryIO $ thawContent file
+ )
removedirect fs = do
- cache <- recordedInodeCache key
- removeInodeCache key
+ cache <- Direct.recordedInodeCache key
+ Direct.removeInodeCache key
mapM_ (resetfile cache) fs
- resetfile cache f = whenM (sameInodeCache f cache) $ do
+ resetfile cache f = whenM (Direct.sameInodeCache f cache) $ do
l <- calcRepo $ gitAnnexLink f key
secureErase f
replaceFile f $ makeAnnexLink l
+{- Check if a file contains the unmodified content of the key.
+ -
+ - The expensive way to tell is to do a verification of its content.
+ - The cheaper way is to see if the InodeCache for the key matches the
+ - file. -}
+isUnmodified :: Key -> FilePath -> Annex Bool
+isUnmodified key f = go =<< geti
+ where
+ go Nothing = return False
+ go (Just fc) = cheapcheck fc <||> expensivecheck fc
+ cheapcheck fc = anyM (compareInodeCaches fc)
+ =<< Database.Keys.getInodeCaches key
+ expensivecheck fc = ifM (verifyKeyContent AlwaysVerify Types.Remote.UnVerified key f)
+ -- The file could have been modified while it was
+ -- being verified. Detect that.
+ ( geti >>= maybe (return False) (compareInodeCaches fc)
+ , return False
+ )
+ geti = withTSDelta (liftIO . genInodeCache f)
+
{- Runs the secure erase command if set, otherwise does nothing.
- File may or may not be deleted at the end; caller is responsible for
- making sure it's deleted. -}
@@ -586,13 +767,14 @@ moveBad key = do
logStatus key InfoMissing
return dest
-data KeyLocation = InAnnex | InRepository
+data KeyLocation = InAnnex | InRepository | InAnywhere
{- List of keys whose content exists in the specified location.
- - InAnnex only lists keys under .git/annex/objects,
- - while InRepository, in direct mode, also finds keys located in the
- - work tree.
+ - InAnnex only lists keys with content in .git/annex/objects,
+ - while InRepository, in direct mode, also finds keys with content
+ - in the work tree. InAnywhere lists all keys that have directories
+ - in .git/annex/objects, whether or not the content is present.
-
- Note that InRepository has to check whether direct mode files
- have goodContent.
@@ -621,6 +803,11 @@ getKeysPresent keyloc = do
morekeys <- unsafeInterleaveIO a
continue (morekeys++keys) as
+ inanywhere = case keyloc of
+ InAnywhere -> True
+ _ -> False
+
+ present _ _ _ | inanywhere = pure True
present _ False d = presentInAnnex d
present s True d = presentDirect s d <||> presentInAnnex d
@@ -632,7 +819,8 @@ getKeysPresent keyloc = do
InRepository -> case fileKey (takeFileName d) of
Nothing -> return False
Just k -> Annex.eval s $
- anyM (goodContent k) =<< associatedFiles k
+ anyM (Direct.goodContent k) =<< Direct.associatedFiles k
+ InAnywhere -> return True
{- In order to run Annex monad actions within unsafeInterleaveIO,
- the current state is taken and reused. No changes made to this
diff --git a/Annex/Content/Direct.hs b/Annex/Content/Direct.hs
index 86e053d7f..3b9d1aea2 100644
--- a/Annex/Content/Direct.hs
+++ b/Annex/Content/Direct.hs
@@ -1,12 +1,13 @@
{- git-annex file content managing for direct mode
-
+ - This is deprecated, and will be removed when direct mode gets removed
+ - from git-annex.
+ -
- Copyright 2012-2014 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
-{-# LANGUAGE CPP #-}
-
module Annex.Content.Direct (
associatedFiles,
associatedFilesRelative,
@@ -20,21 +21,15 @@ module Annex.Content.Direct (
addInodeCache,
writeInodeCache,
compareInodeCaches,
- compareInodeCachesWith,
sameInodeCache,
elemInodeCaches,
sameFileStatus,
removeInodeCache,
toInodeCache,
- inodesChanged,
- createInodeSentinalFile,
addContentWhenNotPresent,
- withTSDelta,
- getTSDelta,
) where
import Common.Annex
-import qualified Annex
import Annex.Perms
import qualified Git
import Utility.Tmp
@@ -43,6 +38,7 @@ import Utility.InodeCache
import Utility.CopyFile
import Annex.ReplaceFile
import Annex.Link
+import Annex.InodeSentinal
{- Absolute FilePaths of Files in the tree that are associated with a key. -}
associatedFiles :: Key -> Annex [FilePath]
@@ -165,14 +161,6 @@ removeInodeCache key = withInodeCacheFile key $ \f ->
withInodeCacheFile :: Key -> (FilePath -> Annex a) -> Annex a
withInodeCacheFile key a = a =<< calcRepo (gitAnnexInodeCache key)
-{- Checks if a InodeCache matches the current version of a file. -}
-sameInodeCache :: FilePath -> [InodeCache] -> Annex Bool
-sameInodeCache _ [] = return False
-sameInodeCache file old = go =<< withTSDelta (liftIO . genInodeCache file)
- where
- go Nothing = return False
- go (Just curr) = elemInodeCaches curr old
-
{- Checks if a FileStatus matches the recorded InodeCache of a file. -}
sameFileStatus :: Key -> FilePath -> FileStatus -> Annex Bool
sameFileStatus key f status = do
@@ -183,25 +171,6 @@ sameFileStatus key f status = do
([], Nothing) -> return True
_ -> return False
-{- If the inodes have changed, only the size and mtime are compared. -}
-compareInodeCaches :: InodeCache -> InodeCache -> Annex Bool
-compareInodeCaches x y
- | compareStrong x y = return True
- | otherwise = ifM inodesChanged
- ( return $ compareWeak x y
- , return False
- )
-
-elemInodeCaches :: InodeCache -> [InodeCache] -> Annex Bool
-elemInodeCaches _ [] = return False
-elemInodeCaches c (l:ls) = ifM (compareInodeCaches c l)
- ( return True
- , elemInodeCaches c ls
- )
-
-compareInodeCachesWith :: Annex InodeComparisonType
-compareInodeCachesWith = ifM inodesChanged ( return Weakly, return Strongly )
-
{- Copies the contentfile to the associated file, if the associated
- file has no content. If the associated file does have content,
- even if the content differs, it's left unchanged. -}
@@ -212,52 +181,3 @@ addContentWhenNotPresent key contentfile associatedfile = do
replaceFile associatedfile $
liftIO . void . copyFileExternal CopyAllMetaData contentfile
updateInodeCache key associatedfile
-
-{- Some filesystems get new inodes each time they are mounted.
- - In order to work on such a filesystem, a sentinal file is used to detect
- - when the inodes have changed.
- -
- - If the sentinal file does not exist, we have to assume that the
- - inodes have changed.
- -}
-inodesChanged :: Annex Bool
-inodesChanged = sentinalInodesChanged <$> sentinalStatus
-
-withTSDelta :: (TSDelta -> Annex a) -> Annex a
-withTSDelta a = a =<< getTSDelta
-
-getTSDelta :: Annex TSDelta
-#ifdef mingw32_HOST_OS
-getTSDelta = sentinalTSDelta <$> sentinalStatus
-#else
-getTSDelta = pure noTSDelta -- optimisation
-#endif
-
-sentinalStatus :: Annex SentinalStatus
-sentinalStatus = maybe check return =<< Annex.getState Annex.sentinalstatus
- where
- check = do
- sc <- liftIO . checkSentinalFile =<< annexSentinalFile
- Annex.changeState $ \s -> s { Annex.sentinalstatus = Just sc }
- return sc
-
-{- The sentinal file is only created when first initializing a repository.
- - If there are any annexed objects in the repository already, creating
- - the file would invalidate their inode caches. -}
-createInodeSentinalFile :: Annex ()
-createInodeSentinalFile = unlessM (alreadyexists <||> hasobjects) $ do
- s <- annexSentinalFile
- createAnnexDirectory (parentDir (sentinalFile s))
- liftIO $ writeSentinalFile s
- where
- alreadyexists = liftIO. sentinalFileExists =<< annexSentinalFile
- hasobjects = liftIO . doesDirectoryExist =<< fromRepo gitAnnexObjectDir
-
-annexSentinalFile :: Annex SentinalFile
-annexSentinalFile = do
- sentinalfile <- fromRepo gitAnnexInodeSentinal
- sentinalcachefile <- fromRepo gitAnnexInodeSentinalCache
- return SentinalFile
- { sentinalFile = sentinalfile
- , sentinalCacheFile = sentinalcachefile
- }
diff --git a/Annex/Direct.hs b/Annex/Direct.hs
index 495ff5e75..8c3d5bb56 100644
--- a/Annex/Direct.hs
+++ b/Annex/Direct.hs
@@ -1,5 +1,8 @@
{- git-annex direct mode
-
+ - This is deprecated, and will be removed when direct mode gets removed
+ - from git-annex.
+ -
- Copyright 2012-2014 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
@@ -36,6 +39,7 @@ import Annex.VariantFile
import Git.Index
import Annex.Index
import Annex.LockFile
+import Annex.InodeSentinal
{- Uses git ls-files to find files that need to be committed, and stages
- them into the index. Returns True if some changes were staged. -}
@@ -53,8 +57,8 @@ stageDirect = do
{- Determine what kind of modified or deleted file this is, as
- efficiently as we can, by getting any key that's associated
- with it in git, as well as its stat info. -}
- go (file, Just sha, Just mode) = withTSDelta $ \delta -> do
- shakey <- catKey sha mode
+ go (file, Just sha, Just _mode) = withTSDelta $ \delta -> do
+ shakey <- catKey sha
mstat <- liftIO $ catchMaybeIO $ getSymbolicLinkStatus file
mcache <- liftIO $ maybe (pure Nothing) (toInodeCache delta file) mstat
filekey <- isAnnexLink file
@@ -107,8 +111,8 @@ preCommitDirect = do
withkey (DiffTree.srcsha diff) (DiffTree.srcmode diff) removeAssociatedFile
withkey (DiffTree.dstsha diff) (DiffTree.dstmode diff) addAssociatedFile
where
- withkey sha mode a = when (sha /= nullSha) $ do
- k <- catKey sha mode
+ withkey sha _mode a = when (sha /= nullSha) $ do
+ k <- catKey sha
case k of
Nothing -> noop
Just key -> void $ a key $
@@ -256,16 +260,16 @@ updateWorkTree d oldref force = do
makeabs <- flip fromTopFilePath <$> gitRepo
let fsitems = zip (map (makeabs . DiffTree.file) items) items
forM_ fsitems $
- go makeabs DiffTree.srcsha DiffTree.srcmode moveout moveout_raw
+ go makeabs DiffTree.srcsha moveout moveout_raw
forM_ fsitems $
- go makeabs DiffTree.dstsha DiffTree.dstmode movein movein_raw
+ go makeabs DiffTree.dstsha movein movein_raw
void $ liftIO cleanup
where
- go makeabs getsha getmode a araw (f, item)
+ go makeabs getsha a araw (f, item)
| getsha item == nullSha = noop
| otherwise = void $
tryNonAsync . maybe (araw item makeabs f) (\k -> void $ a item makeabs k f)
- =<< catKey (getsha item) (getmode item)
+ =<< catKey (getsha item)
moveout _ _ = removeDirect
@@ -395,7 +399,7 @@ changedDirect oldk f = do
whenM (pure (null locs) <&&> not <$> inAnnex oldk) $
logStatus oldk InfoMissing
-{- Enable/disable direct mode. -}
+{- Git config settings to enable/disable direct mode. -}
setDirect :: Bool -> Annex ()
setDirect wantdirect = do
if wantdirect
diff --git a/Annex/FileMatcher.hs b/Annex/FileMatcher.hs
index 8b0db60ad..a008198f3 100644
--- a/Annex/FileMatcher.hs
+++ b/Annex/FileMatcher.hs
@@ -14,7 +14,6 @@ import Limit
import Utility.Matcher
import Types.Group
import Logs.Group
-import Logs.Remote
import Annex.UUID
import qualified Annex
import Types.FileMatcher
@@ -53,8 +52,8 @@ parsedToMatcher parsed = case partitionEithers parsed of
([], vs) -> Right $ generate vs
(es, _) -> Left $ unwords $ map ("Parse failure: " ++) es
-exprParser :: FileMatcher Annex -> FileMatcher Annex -> GroupMap -> M.Map UUID RemoteConfig -> Maybe UUID -> String -> [Either String (Token (MatchFiles Annex))]
-exprParser matchstandard matchgroupwanted groupmap configmap mu expr =
+exprParser :: FileMatcher Annex -> FileMatcher Annex -> Annex GroupMap -> M.Map UUID RemoteConfig -> Maybe UUID -> String -> [Either String (Token (MatchFiles Annex))]
+exprParser matchstandard matchgroupwanted getgroupmap configmap mu expr =
map parse $ tokenizeMatcher expr
where
parse = parseToken
@@ -62,12 +61,12 @@ exprParser matchstandard matchgroupwanted groupmap configmap mu expr =
matchgroupwanted
(limitPresent mu)
(limitInDir preferreddir)
- groupmap
+ getgroupmap
preferreddir = fromMaybe "public" $
M.lookup "preferreddir" =<< (`M.lookup` configmap) =<< mu
-parseToken :: FileMatcher Annex -> FileMatcher Annex -> MkLimit Annex -> MkLimit Annex -> GroupMap -> String -> Either String (Token (MatchFiles Annex))
-parseToken matchstandard matchgroupwanted checkpresent checkpreferreddir groupmap t
+parseToken :: FileMatcher Annex -> FileMatcher Annex -> MkLimit Annex -> MkLimit Annex -> Annex GroupMap -> String -> Either String (Token (MatchFiles Annex))
+parseToken matchstandard matchgroupwanted checkpresent checkpreferreddir getgroupmap t
| t `elem` tokens = Right $ token t
| t == "standard" = call matchstandard
| t == "groupwanted" = call matchgroupwanted
@@ -86,7 +85,7 @@ parseToken matchstandard matchgroupwanted checkpresent checkpreferreddir groupma
, ("largerthan", limitSize (>))
, ("smallerthan", limitSize (<))
, ("metadata", limitMetaData)
- , ("inallgroup", limitInAllGroup groupmap)
+ , ("inallgroup", limitInAllGroup getgroupmap)
]
where
(k, v) = separate (== '=') t
@@ -109,9 +108,12 @@ largeFilesMatcher = go =<< annexLargeFiles <$> Annex.getGitConfig
where
go Nothing = return matchAll
go (Just expr) = do
- gm <- groupMap
- rc <- readRemoteLog
u <- getUUID
+ -- No need to read remote configs, that's only needed for
+ -- inpreferreddir, which is used in preferred content
+ -- expressions but does not make sense in the
+ -- annex.largefiles expression.
+ let emptyconfig = M.empty
either badexpr return $
- parsedToMatcher $ exprParser matchAll matchAll gm rc (Just u) expr
+ parsedToMatcher $ exprParser matchAll matchAll groupMap emptyconfig (Just u) expr
badexpr e = error $ "bad annex.largefiles configuration: " ++ e
diff --git a/Annex/Ingest.hs b/Annex/Ingest.hs
new file mode 100644
index 000000000..b2eb27616
--- /dev/null
+++ b/Annex/Ingest.hs
@@ -0,0 +1,289 @@
+{- git-annex content ingestion
+ -
+ - Copyright 2010-2015 Joey Hess <id@joeyh.name>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+{-# LANGUAGE CPP #-}
+
+module Annex.Ingest (
+ LockedDown(..),
+ lockDown,
+ ingest,
+ finishIngestDirect,
+ finishIngestUnlocked,
+ cleanOldKeys,
+ addLink,
+ makeLink,
+ restoreFile,
+ forceParams,
+) where
+
+import Common.Annex
+import Types.KeySource
+import Backend
+import Annex.Content
+import Annex.Content.Direct
+import Annex.Perms
+import Annex.Link
+import Annex.MetaData
+import Logs.Location
+import qualified Annex
+import qualified Annex.Queue
+import qualified Database.Keys
+import Config
+import Utility.InodeCache
+import Annex.ReplaceFile
+import Utility.Tmp
+import Utility.CopyFile
+import Annex.InodeSentinal
+#ifdef WITH_CLIBS
+#ifndef __ANDROID__
+import Utility.Touch
+#endif
+#endif
+
+import Control.Exception (IOException)
+
+data LockedDown = LockedDown
+ { lockingFile :: Bool
+ , keySource :: KeySource
+ }
+ deriving (Show)
+
+{- The file that's being ingested is locked down before a key is generated,
+ - to prevent it from being modified in between. This lock down is not
+ - perfect at best (and pretty weak at worst). For example, it does not
+ - guard against files that are already opened for write by another process.
+ - So, the InodeCache can be used to detect any changes that might be made
+ - to the file after it was locked down.
+ -
+ - When possible, the file is hard linked to a temp directory. This guards
+ - against some changes, like deletion or overwrite of the file, and
+ - allows lsof checks to be done more efficiently when adding a lot of files.
+ -
+ - If lockingfile is True, the file is going to be added in locked mode.
+ - So, its write bit is removed as part of the lock down.
+ -
+ - Lockdown can fail if a file gets deleted, and Nothing will be returned.
+ -}
+lockDown :: Bool -> FilePath -> Annex (Maybe LockedDown)
+lockDown lockingfile file = either
+ (\e -> warning (show e) >> return Nothing)
+ (return . Just)
+ =<< lockDown' lockingfile file
+
+lockDown' :: Bool -> FilePath -> Annex (Either IOException LockedDown)
+lockDown' lockingfile file = ifM crippledFileSystem
+ ( withTSDelta $ liftIO . tryIO . nohardlink
+ , tryIO $ do
+ tmp <- fromRepo gitAnnexTmpMiscDir
+ createAnnexDirectory tmp
+ when lockingfile $
+ freezeContent file
+ withTSDelta $ \delta -> liftIO $ do
+ (tmpfile, h) <- openTempFile tmp $
+ relatedTemplate $ takeFileName file
+ hClose h
+ nukeFile tmpfile
+ withhardlink delta tmpfile `catchIO` const (nohardlink delta)
+ )
+ where
+ nohardlink delta = do
+ cache <- genInodeCache file delta
+ return $ LockedDown lockingfile $ KeySource
+ { keyFilename = file
+ , contentLocation = file
+ , inodeCache = cache
+ }
+ withhardlink delta tmpfile = do
+ createLink file tmpfile
+ cache <- genInodeCache tmpfile delta
+ return $ LockedDown lockingfile $ KeySource
+ { keyFilename = file
+ , contentLocation = tmpfile
+ , inodeCache = cache
+ }
+
+{- Ingests a locked down file into the annex.
+ -
+ - The file may be added to the git repository as a locked or an unlocked
+ - file. When unlocked, the work tree file is left alone. When locked,
+ - the work tree file is deleted, in preparation for adding the symlink.
+ -}
+ingest :: Maybe LockedDown -> Annex (Maybe Key, Maybe InodeCache)
+ingest Nothing = return (Nothing, Nothing)
+ingest (Just (LockedDown lockingfile source)) = withTSDelta $ \delta -> do
+ backend <- chooseBackend $ keyFilename source
+ k <- genKey source backend
+ let src = contentLocation source
+ ms <- liftIO $ catchMaybeIO $ getFileStatus src
+ mcache <- maybe (pure Nothing) (liftIO . toInodeCache delta src) ms
+ case (mcache, inodeCache source) of
+ (_, Nothing) -> go k mcache ms
+ (Just newc, Just c) | compareStrong c newc -> go k mcache ms
+ _ -> failure "changed while it was being added"
+ where
+ go (Just (key, _)) mcache (Just s)
+ | lockingfile = golocked key mcache s
+ | otherwise = ifM isDirect
+ ( godirect key mcache s
+ , gounlocked key mcache s
+ )
+ go _ _ _ = failure "failed to generate a key"
+
+ golocked key mcache s = do
+ catchNonAsync (moveAnnex key $ contentLocation source)
+ (restoreFile (keyFilename source) key)
+ liftIO $ nukeFile $ keyFilename source
+ populateAssociatedFiles key source
+ success key mcache s
+
+ gounlocked key (Just cache) s = do
+ -- Remove temp directory hard link first because
+ -- linkAnnex falls back to copying if a file
+ -- already has a hard link.
+ cleanCruft source
+ cleanOldKeys (keyFilename source) key
+ r <- linkAnnex key (keyFilename source) (Just cache)
+ case r of
+ LinkAnnexFailed -> failure "failed to link to annex"
+ _ -> do
+ finishIngestUnlocked' key source
+ success key (Just cache) s
+ gounlocked _ _ _ = failure "failed statting file"
+
+ godirect key (Just cache) s = do
+ addInodeCache key cache
+ finishIngestDirect key source
+ success key (Just cache) s
+ godirect _ _ _ = failure "failed statting file"
+
+ success k mcache s = do
+ genMetaData k (keyFilename source) s
+ return (Just k, mcache)
+
+ failure msg = do
+ warning $ keyFilename source ++ " " ++ msg
+ cleanCruft source
+ return (Nothing, Nothing)
+
+finishIngestDirect :: Key -> KeySource -> Annex ()
+finishIngestDirect key source = do
+ void $ addAssociatedFile key $ keyFilename source
+ cleanCruft source
+
+ {- Copy to any other locations using the same key. -}
+ otherfs <- filter (/= keyFilename source) <$> associatedFiles key
+ forM_ otherfs $
+ addContentWhenNotPresent key (keyFilename source)
+
+finishIngestUnlocked :: Key -> KeySource -> Annex ()
+finishIngestUnlocked key source = do
+ cleanCruft source
+ finishIngestUnlocked' key source
+
+finishIngestUnlocked' :: Key -> KeySource -> Annex ()
+finishIngestUnlocked' key source = do
+ Database.Keys.addAssociatedFile key (keyFilename source)
+ populateAssociatedFiles key source
+
+{- Copy to any other locations using the same key. -}
+populateAssociatedFiles :: Key -> KeySource -> Annex ()
+populateAssociatedFiles key source = do
+ otherfs <- filter (/= keyFilename source) <$> Database.Keys.getAssociatedFiles key
+ obj <- calcRepo (gitAnnexLocation key)
+ forM_ otherfs $
+ populatePointerFile key obj
+
+cleanCruft :: KeySource -> Annex ()
+cleanCruft source = when (contentLocation source /= keyFilename source) $
+ liftIO $ nukeFile $ contentLocation source
+
+-- If a worktree file was was hard linked to an annex object before,
+-- modifying the file would have caused the object to have the wrong
+-- content. Clean up from that.
+cleanOldKeys :: FilePath -> Key -> Annex ()
+cleanOldKeys file newkey = do
+ oldkeys <- filter (/= newkey)
+ <$> Database.Keys.getAssociatedKey file
+ mapM_ go oldkeys
+ where
+ go key = do
+ obj <- calcRepo (gitAnnexLocation key)
+ caches <- Database.Keys.getInodeCaches key
+ unlessM (sameInodeCache obj caches) $ do
+ unlinkAnnex key
+ fs <- filter (/= file)
+ <$> Database.Keys.getAssociatedFiles key
+ fs' <- filterM (`sameInodeCache` caches) fs
+ case fs' of
+ -- If linkAnnex fails, the associated
+ -- file with the content is still present,
+ -- so no need for any recovery.
+ (f:_) -> do
+ ic <- withTSDelta (liftIO . genInodeCache f)
+ void $ linkAnnex key f ic
+ _ -> lostcontent
+ where
+ lostcontent = logStatus key InfoMissing
+
+{- On error, put the file back so it doesn't seem to have vanished.
+ - This can be called before or after the symlink is in place. -}
+restoreFile :: FilePath -> Key -> SomeException -> Annex a
+restoreFile file key e = do
+ whenM (inAnnex key) $ do
+ liftIO $ nukeFile file
+ -- The key could be used by other files too, so leave the
+ -- content in the annex, and make a copy back to the file.
+ obj <- calcRepo $ gitAnnexLocation key
+ unlessM (liftIO $ copyFileExternal CopyTimeStamps obj file) $
+ warning $ "Unable to restore content of " ++ file ++ "; it should be located in " ++ obj
+ thawContent file
+ throwM e
+
+{- Creates the symlink to the annexed content, returns the link target. -}
+makeLink :: FilePath -> Key -> Maybe InodeCache -> Annex String
+makeLink file key mcache = flip catchNonAsync (restoreFile file key) $ do
+ l <- calcRepo $ gitAnnexLink file key
+ replaceFile file $ makeAnnexLink l
+
+ -- touch symlink to have same time as the original file,
+ -- as provided in the InodeCache
+ case mcache of
+#if defined(WITH_CLIBS) && ! defined(__ANDROID__)
+ Just c -> liftIO $ touch file (TimeSpec $ inodeCacheToMtime c) False
+#else
+ Just _ -> noop
+#endif
+ Nothing -> noop
+
+ return l
+
+{- Creates the symlink to the annexed content, and stages it in git.
+ -
+ - As long as the filesystem supports symlinks, we use
+ - git add, rather than directly staging the symlink to git.
+ - Using git add is best because it allows the queuing to work
+ - and is faster (staging the symlink runs hash-object commands each time).
+ - Also, using git add allows it to skip gitignored files, unless forced
+ - to include them.
+ -}
+addLink :: FilePath -> Key -> Maybe InodeCache -> Annex ()
+addLink file key mcache = ifM (coreSymlinks <$> Annex.getGitConfig)
+ ( do
+ _ <- makeLink file key mcache
+ ps <- forceParams
+ Annex.Queue.addCommand "add" (ps++[Param "--"]) [file]
+ , do
+ l <- makeLink file key mcache
+ addAnnexLink l file
+ )
+
+{- Parameters to pass to git add, forcing addition of ignored files. -}
+forceParams :: Annex [CommandParam]
+forceParams = ifM (Annex.getState Annex.force)
+ ( return [Param "-f"]
+ , return []
+ )
diff --git a/Annex/Init.hs b/Annex/Init.hs
index 65e9aa474..99bb03e92 100644
--- a/Annex/Init.hs
+++ b/Annex/Init.hs
@@ -29,12 +29,12 @@ import Types.TrustLevel
import Annex.Version
import Annex.Difference
import Annex.UUID
+import Annex.Link
import Config
import Annex.Direct
-import Annex.Content.Direct
import Annex.Environment
-import Backend
import Annex.Hook
+import Annex.InodeSentinal
import Upgrade
#ifndef mingw32_HOST_OS
import Utility.UserInfo
@@ -57,8 +57,8 @@ genDescription Nothing = do
return $ concat [hostname, ":", reldir]
#endif
-initialize :: Maybe String -> Annex ()
-initialize mdescription = do
+initialize :: Maybe String -> Maybe Version -> Annex ()
+initialize mdescription mversion = do
{- Has to come before any commits are made as the shared
- clone heuristic expects no local objects. -}
sharedclone <- checkSharedClone
@@ -68,7 +68,7 @@ initialize mdescription = do
ensureCommit $ Annex.Branch.create
prepUUID
- initialize'
+ initialize' mversion
initSharedClone sharedclone
@@ -77,15 +77,18 @@ initialize mdescription = do
-- Everything except for uuid setup, shared clone setup, and initial
-- description.
-initialize' :: Annex ()
-initialize' = do
+initialize' :: Maybe Version -> Annex ()
+initialize' mversion = do
checkLockSupport
checkFifoSupport
checkCrippledFileSystem
unlessM isBare $
hookWrite preCommitHook
setDifferences
- setVersion supportedVersion
+ unlessM (isJust <$> getVersion) $
+ setVersion (fromMaybe defaultVersion mversion)
+ whenM versionSupportsUnlockedPointers
+ configureSmudgeFilter
ifM (crippledFileSystem <&&> not <$> isBare)
( do
enableDirectMode
@@ -95,7 +98,7 @@ initialize' = do
, unlessM isBare
switchHEADBack
)
- createInodeSentinalFile
+ createInodeSentinalFile False
uninitialize :: Annex ()
uninitialize = do
@@ -114,7 +117,7 @@ ensureInitialized :: Annex ()
ensureInitialized = getVersion >>= maybe needsinit checkUpgrade
where
needsinit = ifM Annex.Branch.hasSibling
- ( initialize Nothing
+ ( initialize Nothing Nothing
, error "First run: git-annex init"
)
diff --git a/Annex/InodeSentinal.hs b/Annex/InodeSentinal.hs
new file mode 100644
index 000000000..412a7accc
--- /dev/null
+++ b/Annex/InodeSentinal.hs
@@ -0,0 +1,96 @@
+{- git-annex inode sentinal file
+ -
+ - Copyright 2012-2015 Joey Hess <id@joeyh.name>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+{-# LANGUAGE CPP #-}
+
+module Annex.InodeSentinal where
+
+import Common.Annex
+import qualified Annex
+import Utility.InodeCache
+import Annex.Perms
+
+{- If the sendinal shows the inodes have changed, only the size and mtime
+ - are compared. -}
+compareInodeCaches :: InodeCache -> InodeCache -> Annex Bool
+compareInodeCaches x y
+ | compareStrong x y = return True
+ | otherwise = ifM inodesChanged
+ ( return $ compareWeak x y
+ , return False
+ )
+
+compareInodeCachesWith :: Annex InodeComparisonType
+compareInodeCachesWith = ifM inodesChanged ( return Weakly, return Strongly )
+
+{- Checks if one of the provided old InodeCache matches the current
+ - version of a file. -}
+sameInodeCache :: FilePath -> [InodeCache] -> Annex Bool
+sameInodeCache _ [] = return False
+sameInodeCache file old = go =<< withTSDelta (liftIO . genInodeCache file)
+ where
+ go Nothing = return False
+ go (Just curr) = elemInodeCaches curr old
+
+elemInodeCaches :: InodeCache -> [InodeCache] -> Annex Bool
+elemInodeCaches _ [] = return False
+elemInodeCaches c (l:ls) = ifM (compareInodeCaches c l)
+ ( return True
+ , elemInodeCaches c ls
+ )
+
+{- Some filesystems get new inodes each time they are mounted.
+ - In order to work on such a filesystem, a sentinal file is used to detect
+ - when the inodes have changed.
+ -
+ - If the sentinal file does not exist, we have to assume that the
+ - inodes have changed.
+ -}
+inodesChanged :: Annex Bool
+inodesChanged = sentinalInodesChanged <$> sentinalStatus
+
+withTSDelta :: (TSDelta -> Annex a) -> Annex a
+withTSDelta a = a =<< getTSDelta
+
+getTSDelta :: Annex TSDelta
+#ifdef mingw32_HOST_OS
+getTSDelta = sentinalTSDelta <$> sentinalStatus
+#else
+getTSDelta = pure noTSDelta -- optimisation
+#endif
+
+sentinalStatus :: Annex SentinalStatus
+sentinalStatus = maybe check return =<< Annex.getState Annex.sentinalstatus
+ where
+ check = do
+ sc <- liftIO . checkSentinalFile =<< annexSentinalFile
+ Annex.changeState $ \s -> s { Annex.sentinalstatus = Just sc }
+ return sc
+
+{- The sentinal file is only created when first initializing a repository.
+ - If there are any annexed objects in the repository already, creating
+ - the file would invalidate their inode caches. -}
+createInodeSentinalFile :: Bool -> Annex ()
+createInodeSentinalFile evenwithobjects =
+ unlessM (alreadyexists <||> hasobjects) $ do
+ s <- annexSentinalFile
+ createAnnexDirectory (parentDir (sentinalFile s))
+ liftIO $ writeSentinalFile s
+ where
+ alreadyexists = liftIO. sentinalFileExists =<< annexSentinalFile
+ hasobjects
+ | evenwithobjects = pure False
+ | otherwise = liftIO . doesDirectoryExist =<< fromRepo gitAnnexObjectDir
+
+annexSentinalFile :: Annex SentinalFile
+annexSentinalFile = do
+ sentinalfile <- fromRepo gitAnnexInodeSentinal
+ sentinalcachefile <- fromRepo gitAnnexInodeSentinalCache
+ return SentinalFile
+ { sentinalFile = sentinalfile
+ , sentinalCacheFile = sentinalcachefile
+ }
diff --git a/Annex/Link.hs b/Annex/Link.hs
index 98b200f0a..61c61b561 100644
--- a/Annex/Link.hs
+++ b/Annex/Link.hs
@@ -5,7 +5,9 @@
- On other filesystems, git instead stores the symlink target in a regular
- file.
-
- - Copyright 2013 Joey Hess <id@joeyh.name>
+ - Pointer files are used instead of symlinks for unlocked files.
+ -
+ - Copyright 2013-2015 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@@ -19,6 +21,9 @@ import qualified Git.UpdateIndex
import qualified Annex.Queue
import Git.Types
import Git.FilePath
+import Types.Key
+
+import qualified Data.ByteString.Lazy as L
type LinkTarget = String
@@ -105,8 +110,49 @@ hashSymlink' :: Git.HashObject.HashObjectHandle -> LinkTarget -> Annex Sha
hashSymlink' h linktarget = liftIO $ Git.HashObject.hashBlob h $
toInternalGitPath linktarget
-{- Stages a symlink to the annex, using a Sha of its target. -}
+{- Stages a symlink to an annexed object, using a Sha of its target. -}
stageSymlink :: FilePath -> Sha -> Annex ()
stageSymlink file sha =
Annex.Queue.addUpdateIndex =<<
inRepo (Git.UpdateIndex.stageSymlink file sha)
+
+{- Injects a pointer file content into git, returning its Sha. -}
+hashPointerFile :: Key -> Annex Sha
+hashPointerFile key = inRepo $ Git.HashObject.hashObject BlobObject $
+ formatPointer key
+
+{- Stages a pointer file, using a Sha of its content -}
+stagePointerFile :: FilePath -> Sha -> Annex ()
+stagePointerFile file sha =
+ Annex.Queue.addUpdateIndex =<<
+ inRepo (Git.UpdateIndex.stageFile sha FileBlob file)
+
+{- Parses a symlink target or a pointer file to a Key.
+ - Only looks at the first line, as pointer files can have subsequent
+ - lines. -}
+parseLinkOrPointer :: L.ByteString -> Maybe Key
+parseLinkOrPointer = parseLinkOrPointer' . decodeBS . L.take maxsz
+ where
+ {- Want to avoid buffering really big files in git into
+ - memory when reading files that may be pointers.
+ -
+ - 8192 bytes is plenty for a pointer to a key.
+ - Pad some more to allow for any pointer files that might have
+ - lines after the key explaining what the file is used for. -}
+ maxsz = 81920
+
+parseLinkOrPointer' :: String -> Maybe Key
+parseLinkOrPointer' s = headMaybe (lines (fromInternalGitPath s)) >>= go
+ where
+ go l
+ | isLinkToAnnex l = file2key $ takeFileName l
+ | otherwise = Nothing
+
+formatPointer :: Key -> String
+formatPointer k =
+ toInternalGitPath (pathSeparator:objectDir </> key2file k) ++ "\n"
+
+{- Checks if a file is a pointer to a key. -}
+isPointerFile :: FilePath -> Annex (Maybe Key)
+isPointerFile f = liftIO $ catchDefaultIO Nothing $
+ parseLinkOrPointer <$> L.readFile f
diff --git a/Annex/MakeRepo.hs b/Annex/MakeRepo.hs
index 73443c43d..adf49ed2c 100644
--- a/Annex/MakeRepo.hs
+++ b/Annex/MakeRepo.hs
@@ -75,7 +75,7 @@ initRepo False _ dir desc mgroup = inDir dir $ do
initRepo' :: Maybe String -> Maybe StandardGroup -> Annex ()
initRepo' desc mgroup = unlessM isInitialized $ do
- initialize desc
+ initialize desc Nothing
u <- getUUID
maybe noop (defaultStandardGroup u) mgroup
{- Ensure branch gets committed right away so it is
diff --git a/Annex/Version.hs b/Annex/Version.hs
index d08f994e9..b54fb68e0 100644
--- a/Annex/Version.hs
+++ b/Annex/Version.hs
@@ -15,14 +15,20 @@ import qualified Annex
type Version = String
-supportedVersion :: Version
-supportedVersion = "5"
+defaultVersion :: Version
+defaultVersion = "5"
+
+latestVersion :: Version
+latestVersion = "6"
+
+supportedVersions :: [Version]
+supportedVersions = ["5", "6"]
upgradableVersions :: [Version]
#ifndef mingw32_HOST_OS
-upgradableVersions = ["0", "1", "2", "4"]
+upgradableVersions = ["0", "1", "2", "4", "5"]
#else
-upgradableVersions = ["2", "3", "4"]
+upgradableVersions = ["2", "3", "4", "5"]
#endif
autoUpgradeableVersions :: [Version]
@@ -34,6 +40,18 @@ versionField = annexConfig "version"
getVersion :: Annex (Maybe Version)
getVersion = annexVersion <$> Annex.getGitConfig
+versionSupportsDirectMode :: Annex Bool
+versionSupportsDirectMode = go <$> getVersion
+ where
+ go (Just "6") = False
+ go _ = True
+
+versionSupportsUnlockedPointers :: Annex Bool
+versionSupportsUnlockedPointers = go <$> getVersion
+ where
+ go (Just "6") = True
+ go _ = False
+
setVersion :: Version -> Annex ()
setVersion = setConfig versionField
diff --git a/Annex/View.hs b/Annex/View.hs
index 2b8a80e5f..8ddbb9c63 100644
--- a/Annex/View.hs
+++ b/Annex/View.hs
@@ -22,7 +22,7 @@ import Git.Sha
import Git.HashObject
import Git.Types
import Git.FilePath
-import qualified Backend
+import Annex.WorkTree
import Annex.Index
import Annex.Link
import Annex.CatFile
@@ -342,7 +342,7 @@ applyView' mkviewedfile getfilemetadata view = do
hasher <- inRepo hashObjectStart
forM_ l $ \f -> do
relf <- getTopFilePath <$> inRepo (toTopFilePath f)
- go uh hasher relf =<< Backend.lookupFile f
+ go uh hasher relf =<< lookupFile f
liftIO $ do
hashObjectStop hasher
void $ stopUpdateIndex uh
@@ -413,13 +413,13 @@ withViewChanges addmeta removemeta = do
handleremovals item
| DiffTree.srcsha item /= nullSha =
handlechange item removemeta
- =<< catKey (DiffTree.srcsha item) (DiffTree.srcmode item)
+ =<< catKey (DiffTree.srcsha item)
| otherwise = noop
handleadds makeabs item
| DiffTree.dstsha item /= nullSha =
handlechange item addmeta
=<< ifM isDirect
- ( catKey (DiffTree.dstsha item) (DiffTree.dstmode item)
+ ( catKey (DiffTree.dstsha item)
-- optimisation
, isAnnexLink $ makeabs $ DiffTree.file item
)
diff --git a/Annex/WorkTree.hs b/Annex/WorkTree.hs
new file mode 100644
index 000000000..c824e7fc5
--- /dev/null
+++ b/Annex/WorkTree.hs
@@ -0,0 +1,40 @@
+{- git-annex worktree files
+ -
+ - Copyright 2013-2015 Joey Hess <id@joeyh.name>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+module Annex.WorkTree where
+
+import Common.Annex
+import Annex.Link
+import Annex.CatFile
+import Annex.Version
+import Config
+
+{- Looks up the key corresponding to an annexed file,
+ - by examining what the file links to.
+ -
+ - An unlocked file will not have a link on disk, so fall back to
+ - looking for a pointer to a key in git.
+ -}
+lookupFile :: FilePath -> Annex (Maybe Key)
+lookupFile file = do
+ mkey <- isAnnexLink file
+ case mkey of
+ Just key -> makeret key
+ Nothing -> ifM (versionSupportsUnlockedPointers <||> isDirect)
+ ( maybe (return Nothing) makeret =<< catKeyFile file
+ , return Nothing
+ )
+ where
+ makeret = return . Just
+
+{- Modifies an action to only act on files that are already annexed,
+ - and passes the key on to it. -}
+whenAnnexed :: (FilePath -> Key -> Annex (Maybe a)) -> FilePath -> Annex (Maybe a)
+whenAnnexed a file = ifAnnexed file (a file) (return Nothing)
+
+ifAnnexed :: FilePath -> (Key -> Annex a) -> Annex a -> Annex a
+ifAnnexed file yes no = maybe no yes =<< lookupFile file
diff --git a/Assistant/Threads/Committer.hs b/Assistant/Threads/Committer.hs
index 59ca69e88..0bdbb0378 100644
--- a/Assistant/Threads/Committer.hs
+++ b/Assistant/Threads/Committer.hs
@@ -21,18 +21,21 @@ import Logs.Transfer
import Logs.Location
import qualified Annex.Queue
import qualified Git.LsFiles
-import qualified Command.Add
import Utility.ThreadScheduler
import qualified Utility.Lsof as Lsof
import qualified Utility.DirWatcher as DirWatcher
import Types.KeySource
import Config
import Annex.Content
+import Annex.Ingest
import Annex.Link
import Annex.CatFile
+import Annex.InodeSentinal
+import Annex.Version
import qualified Annex
import Utility.InodeCache
import Annex.Content.Direct
+import qualified Database.Keys
import qualified Command.Sync
import qualified Git.Branch
@@ -52,7 +55,8 @@ commitThread = namedThread "Committer" $ do
=<< annexDelayAdd <$> Annex.getGitConfig
msg <- liftAnnex Command.Sync.commitMsg
waitChangeTime $ \(changes, time) -> do
- readychanges <- handleAdds havelsof delayadd changes
+ readychanges <- handleAdds havelsof delayadd $
+ simplifyChanges changes
if shouldCommit False time (length readychanges) readychanges
then do
debug
@@ -227,12 +231,11 @@ commitStaged msg = do
return ok
{- OSX needs a short delay after a file is added before locking it down,
- - when using a non-direct mode repository, as pasting a file seems to
- - try to set file permissions or otherwise access the file after closing
- - it. -}
+ - as pasting a file seems to try to set file permissions or otherwise
+ - access the file after closing it. -}
delayaddDefault :: Annex (Maybe Seconds)
#ifdef darwin_HOST_OS
-delayaddDefault = ifM isDirect
+delayaddDefault = ifM (isDirect || versionSupportsUnlockedPointers)
( return Nothing
, return $ Just $ Seconds 1
)
@@ -249,12 +252,11 @@ delayaddDefault = return Nothing
- for write by some other process, and faster checking with git-ls-files
- that the files are not already checked into git.
-
- - When a file is added, Inotify will notice the new symlink. So this waits
- - for additional Changes to arrive, so that the symlink has hopefully been
- - staged before returning, and will be committed immediately.
- -
- - OTOH, for kqueue, eventsCoalesce, so instead the symlink is directly
- - created and staged.
+ - When a file is added in locked mode, Inotify will notice the new symlink.
+ - So this waits for additional Changes to arrive, so that the symlink has
+ - hopefully been staged before returning, and will be committed immediately.
+ - (OTOH, for kqueue, eventsCoalesce, so instead the symlink is directly
+ - created and staged.)
-
- Returns a list of all changes that are ready to be committed.
- Any pending adds that are not ready yet are put back into the ChangeChan,
@@ -264,10 +266,13 @@ handleAdds :: Bool -> Maybe Seconds -> [Change] -> Assistant [Change]
handleAdds havelsof delayadd cs = returnWhen (null incomplete) $ do
let (pending, inprocess) = partition isPendingAddChange incomplete
direct <- liftAnnex isDirect
- (pending', cleanup) <- if direct
+ unlocked <- liftAnnex versionSupportsUnlockedPointers
+ let lockingfiles = not (unlocked || direct)
+ (pending', cleanup) <- if unlocked || direct
then return (pending, noop)
else findnew pending
- (postponed, toadd) <- partitionEithers <$> safeToAdd havelsof delayadd pending' inprocess
+ (postponed, toadd) <- partitionEithers
+ <$> safeToAdd lockingfiles havelsof delayadd pending' inprocess
cleanup
unless (null postponed) $
@@ -275,10 +280,11 @@ handleAdds havelsof delayadd cs = returnWhen (null incomplete) $ do
returnWhen (null toadd) $ do
added <- addaction toadd $
- catMaybes <$> if direct
- then adddirect toadd
- else forM toadd add
- if DirWatcher.eventsCoalesce || null added || direct
+ catMaybes <$>
+ if not lockingfiles
+ then addunlocked direct toadd
+ else forM toadd (add lockingfiles)
+ if DirWatcher.eventsCoalesce || null added || unlocked || direct
then return $ added ++ otherchanges
else do
r <- handleAdds havelsof delayadd =<< getChanges
@@ -304,52 +310,57 @@ handleAdds havelsof delayadd cs = returnWhen (null incomplete) $ do
| c = return otherchanges
| otherwise = a
- add :: Change -> Assistant (Maybe Change)
- add change@(InProcessAddChange { keySource = ks }) =
+ add :: Bool -> Change -> Assistant (Maybe Change)
+ add lockingfile change@(InProcessAddChange { lockedDown = ld }) =
catchDefaultIO Nothing <~> doadd
where
+ ks = keySource ld
doadd = sanitycheck ks $ do
(mkey, mcache) <- liftAnnex $ do
showStart "add" $ keyFilename ks
- Command.Add.ingest $ Just ks
+ ingest $ Just $ LockedDown lockingfile ks
maybe (failedingest change) (done change mcache $ keyFilename ks) mkey
- add _ = return Nothing
+ add _ _ = return Nothing
- {- In direct mode, avoid overhead of re-injesting a renamed
- - file, by examining the other Changes to see if a removed
- - file has the same InodeCache as the new file. If so,
- - we can just update bookkeeping, and stage the file in git.
+ {- Avoid overhead of re-injesting a renamed unlocked file, by
+ - examining the other Changes to see if a removed file has the
+ - same InodeCache as the new file. If so, we can just update
+ - bookkeeping, and stage the file in git.
-}
- adddirect :: [Change] -> Assistant [Maybe Change]
- adddirect toadd = do
+ addunlocked :: Bool -> [Change] -> Assistant [Maybe Change]
+ addunlocked isdirect toadd = do
ct <- liftAnnex compareInodeCachesWith
- m <- liftAnnex $ removedKeysMap ct cs
+ m <- liftAnnex $ removedKeysMap isdirect ct cs
delta <- liftAnnex getTSDelta
if M.null m
- then forM toadd add
+ then forM toadd (add False)
else forM toadd $ \c -> do
mcache <- liftIO $ genInodeCache (changeFile c) delta
case mcache of
- Nothing -> add c
+ Nothing -> add False c
Just cache ->
case M.lookup (inodeCacheToKey ct cache) m of
- Nothing -> add c
- Just k -> fastadd c k
-
- fastadd :: Change -> Key -> Assistant (Maybe Change)
- fastadd change key = do
- let source = keySource change
- liftAnnex $ Command.Add.finishIngestDirect key source
+ Nothing -> add False c
+ Just k -> fastadd isdirect c k
+
+ fastadd :: Bool -> Change -> Key -> Assistant (Maybe Change)
+ fastadd isdirect change key = do
+ let source = keySource $ lockedDown change
+ liftAnnex $ if isdirect
+ then finishIngestDirect key source
+ else finishIngestUnlocked key source
done change Nothing (keyFilename source) key
- removedKeysMap :: InodeComparisonType -> [Change] -> Annex (M.Map InodeCacheKey Key)
- removedKeysMap ct l = do
+ removedKeysMap :: Bool -> InodeComparisonType -> [Change] -> Annex (M.Map InodeCacheKey Key)
+ removedKeysMap isdirect ct l = do
mks <- forM (filter isRmChange l) $ \c ->
catKeyFile $ changeFile c
M.fromList . concat <$> mapM mkpairs (catMaybes mks)
where
mkpairs k = map (\c -> (inodeCacheToKey ct c, k)) <$>
- recordedInodeCache k
+ if isdirect
+ then recordedInodeCache k
+ else Database.Keys.getInodeCaches k
failedingest change = do
refill [retryChange change]
@@ -358,12 +369,16 @@ handleAdds havelsof delayadd cs = returnWhen (null incomplete) $ do
done change mcache file key = liftAnnex $ do
logStatus key InfoPresent
- link <- ifM isDirect
- ( calcRepo $ gitAnnexLink file key
- , Command.Add.link file key mcache
+ ifM versionSupportsUnlockedPointers
+ ( stagePointerFile file =<< hashPointerFile key
+ , do
+ link <- ifM isDirect
+ ( calcRepo $ gitAnnexLink file key
+ , makeLink file key mcache
+ )
+ whenM (pure DirWatcher.eventsCoalesce <||> isDirect) $
+ stageSymlink file =<< hashSymlink link
)
- whenM (pure DirWatcher.eventsCoalesce <||> isDirect) $
- stageSymlink file =<< hashSymlink link
showEndOk
return $ Just $ finishedChange change key
@@ -401,16 +416,16 @@ handleAdds havelsof delayadd cs = returnWhen (null incomplete) $ do
-
- Check by running lsof on the repository.
-}
-safeToAdd :: Bool -> Maybe Seconds -> [Change] -> [Change] -> Assistant [Either Change Change]
-safeToAdd _ _ [] [] = return []
-safeToAdd havelsof delayadd pending inprocess = do
+safeToAdd :: Bool -> Bool -> Maybe Seconds -> [Change] -> [Change] -> Assistant [Either Change Change]
+safeToAdd _ _ _ [] [] = return []
+safeToAdd lockingfiles havelsof delayadd pending inprocess = do
maybe noop (liftIO . threadDelaySeconds) delayadd
liftAnnex $ do
- keysources <- forM pending $ Command.Add.lockDown . changeFile
- let inprocess' = inprocess ++ mapMaybe mkinprocess (zip pending keysources)
+ lockeddown <- forM pending $ lockDown lockingfiles . changeFile
+ let inprocess' = inprocess ++ mapMaybe mkinprocess (zip pending lockeddown)
openfiles <- if havelsof
then S.fromList . map fst3 . filter openwrite <$>
- findopenfiles (map keySource inprocess')
+ findopenfiles (map (keySource . lockedDown) inprocess')
else pure S.empty
let checked = map (check openfiles) inprocess'
@@ -423,17 +438,18 @@ safeToAdd havelsof delayadd pending inprocess = do
allRight $ rights checked
else return checked
where
- check openfiles change@(InProcessAddChange { keySource = ks })
- | S.member (contentLocation ks) openfiles = Left change
+ check openfiles change@(InProcessAddChange { lockedDown = ld })
+ | S.member (contentLocation (keySource ld)) openfiles = Left change
check _ change = Right change
- mkinprocess (c, Just ks) = Just InProcessAddChange
+ mkinprocess (c, Just ld) = Just InProcessAddChange
{ changeTime = changeTime c
- , keySource = ks
+ , lockedDown = ld
}
mkinprocess (_, Nothing) = Nothing
- canceladd (InProcessAddChange { keySource = ks }) = do
+ canceladd (InProcessAddChange { lockedDown = ld }) = do
+ let ks = keySource ld
warning $ keyFilename ks
++ " still has writers, not adding"
-- remove the hard link
diff --git a/Assistant/Threads/TransferScanner.hs b/Assistant/Threads/TransferScanner.hs
index f35c1f1f5..7386d5528 100644
--- a/Assistant/Threads/TransferScanner.hs
+++ b/Assistant/Threads/TransferScanner.hs
@@ -25,7 +25,7 @@ import Utility.ThreadScheduler
import Utility.NotificationBroadcaster
import Utility.Batch
import qualified Git.LsFiles as LsFiles
-import qualified Backend
+import Annex.WorkTree
import Annex.Content
import Annex.Wanted
import CmdLine.Action
@@ -142,7 +142,7 @@ expensiveScan urlrenderer rs = batch <~> do
(unwanted', ts) <- maybe
(return (unwanted, []))
(findtransfers f unwanted)
- =<< liftAnnex (Backend.lookupFile f)
+ =<< liftAnnex (lookupFile f)
mapM_ (enqueue f) ts
scan unwanted' fs
diff --git a/Assistant/Threads/Watcher.hs b/Assistant/Threads/Watcher.hs
index 8c6ff378d..bb9659b7c 100644
--- a/Assistant/Threads/Watcher.hs
+++ b/Assistant/Threads/Watcher.hs
@@ -1,6 +1,6 @@
{- git-annex assistant tree watcher
-
- - Copyright 2012-2013 Joey Hess <id@joeyh.name>
+ - Copyright 2012-2015 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@@ -28,7 +28,7 @@ import qualified Annex.Queue
import qualified Git
import qualified Git.UpdateIndex
import qualified Git.LsFiles as LsFiles
-import qualified Backend
+import Annex.WorkTree
import Annex.Direct
import Annex.Content.Direct
import Annex.CatFile
@@ -36,10 +36,15 @@ import Annex.CheckIgnore
import Annex.Link
import Annex.FileMatcher
import Types.FileMatcher
+import Annex.Content
import Annex.ReplaceFile
+import Annex.Version
+import Annex.InodeSentinal
import Git.Types
import Config
import Utility.ThreadScheduler
+import Logs.Location
+import qualified Database.Keys
#ifndef mingw32_HOST_OS
import qualified Utility.Lsof as Lsof
#endif
@@ -88,10 +93,13 @@ runWatcher = do
startup <- asIO1 startupScan
matcher <- liftAnnex largeFilesMatcher
direct <- liftAnnex isDirect
+ unlocked <- liftAnnex versionSupportsUnlockedPointers
symlinkssupported <- liftAnnex $ coreSymlinks <$> Annex.getGitConfig
- addhook <- hook $ if direct
- then onAddDirect symlinkssupported matcher
- else onAdd matcher
+ addhook <- hook $ if unlocked
+ then onAddUnlocked symlinkssupported matcher
+ else if direct
+ then onAddDirect symlinkssupported matcher
+ else onAdd matcher
delhook <- hook onDel
addsymlinkhook <- hook $ onAddSymlink direct
deldirhook <- hook onDelDir
@@ -216,15 +224,33 @@ onAdd matcher file filestatus
shouldRestage :: DaemonStatus -> Bool
shouldRestage ds = scanComplete ds || forceRestage ds
+onAddUnlocked :: Bool -> FileMatcher Annex -> Handler
+onAddUnlocked = onAddUnlocked' False contentchanged Database.Keys.addAssociatedFile samefilestatus
+ where
+ samefilestatus key file status = do
+ cache <- Database.Keys.getInodeCaches key
+ curr <- withTSDelta $ \delta -> liftIO $ toInodeCache delta file status
+ case (cache, curr) of
+ (_, Just c) -> elemInodeCaches c cache
+ ([], Nothing) -> return True
+ _ -> return False
+ contentchanged oldkey file = do
+ Database.Keys.removeAssociatedFile oldkey file
+ unlessM (inAnnex oldkey) $
+ logStatus oldkey InfoMissing
+
{- In direct mode, add events are received for both new files, and
- modified existing files.
-}
onAddDirect :: Bool -> FileMatcher Annex -> Handler
-onAddDirect symlinkssupported matcher file fs = do
+onAddDirect = onAddUnlocked' True changedDirect (\k f -> void $ addAssociatedFile k f) sameFileStatus
+
+onAddUnlocked' :: Bool -> (Key -> FilePath -> Annex ()) -> (Key -> FilePath -> Annex ()) -> (Key -> FilePath -> FileStatus -> Annex Bool) -> Bool -> FileMatcher Annex -> Handler
+onAddUnlocked' isdirect contentchanged addassociatedfile samefilestatus symlinkssupported matcher file fs = do
v <- liftAnnex $ catKeyFile file
case (v, fs) of
(Just key, Just filestatus) ->
- ifM (liftAnnex $ sameFileStatus key file filestatus)
+ ifM (liftAnnex $ samefilestatus key file filestatus)
{- It's possible to get an add event for
- an existing file that is not
- really modified, but it might have
@@ -237,13 +263,13 @@ onAddDirect symlinkssupported matcher file fs = do
, noChange
)
, guardSymlinkStandin (Just key) $ do
- debug ["changed direct", file]
- liftAnnex $ changedDirect key file
+ debug ["changed", file]
+ liftAnnex $ contentchanged key file
add matcher file
)
_ -> unlessIgnored file $
guardSymlinkStandin Nothing $ do
- debug ["add direct", file]
+ debug ["add", file]
add matcher file
where
{- On a filesystem without symlinks, we'll get changes for regular
@@ -259,9 +285,9 @@ onAddDirect symlinkssupported matcher file fs = do
Just lt -> do
case fileKey $ takeFileName lt of
Nothing -> noop
- Just key -> void $ liftAnnex $
- addAssociatedFile key file
- onAddSymlink' linktarget mk True file fs
+ Just key -> liftAnnex $
+ addassociatedfile key file
+ onAddSymlink' linktarget mk isdirect file fs
{- A symlink might be an arbitrary symlink, which is just added.
- Or, if it is a git-annex symlink, ensure it points to the content
@@ -270,7 +296,7 @@ onAddDirect symlinkssupported matcher file fs = do
onAddSymlink :: Bool -> Handler
onAddSymlink isdirect file filestatus = unlessIgnored file $ do
linktarget <- liftIO (catchMaybeIO $ readSymbolicLink file)
- kv <- liftAnnex (Backend.lookupFile file)
+ kv <- liftAnnex (lookupFile file)
onAddSymlink' linktarget kv isdirect file filestatus
onAddSymlink' :: Maybe String -> Maybe Key -> Bool -> Handler
@@ -330,13 +356,15 @@ onDel file _ = do
onDel' :: FilePath -> Annex ()
onDel' file = do
- whenM isDirect $ do
- mkey <- catKeyFile file
- case mkey of
- Nothing -> noop
- Just key -> void $ removeAssociatedFile key file
+ ifM versionSupportsUnlockedPointers
+ ( withkey $ flip Database.Keys.removeAssociatedFile file
+ , whenM isDirect $
+ withkey $ \key -> void $ removeAssociatedFile key file
+ )
Annex.Queue.addUpdateIndex =<<
inRepo (Git.UpdateIndex.unstageFile file)
+ where
+ withkey a = maybe noop a =<< catKeyFile file
{- A directory has been deleted, or moved, so tell git to remove anything
- that was inside it from its cache. Since it could reappear at any time,
diff --git a/Assistant/Types/Changes.hs b/Assistant/Types/Changes.hs
index 1d8b51775..70c40523a 100644
--- a/Assistant/Types/Changes.hs
+++ b/Assistant/Types/Changes.hs
@@ -1,18 +1,22 @@
{- git-annex assistant change tracking
-
- - Copyright 2012-2013 Joey Hess <id@joeyh.name>
+ - Copyright 2012-2015 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
+{-# LANGUAGE BangPatterns #-}
+
module Assistant.Types.Changes where
import Types.KeySource
import Types.Key
import Utility.TList
+import Annex.Ingest
import Control.Concurrent.STM
import Data.Time.Clock
+import qualified Data.Set as S
{- An un-ordered pool of Changes that have been noticed and should be
- staged and committed. Changes will typically be in order, but ordering
@@ -38,7 +42,7 @@ data Change
}
| InProcessAddChange
{ changeTime ::UTCTime
- , keySource :: KeySource
+ , lockedDown :: LockedDown
}
deriving (Show)
@@ -53,7 +57,7 @@ changeInfoKey _ = Nothing
changeFile :: Change -> FilePath
changeFile (Change _ f _) = f
changeFile (PendingAddChange _ f) = f
-changeFile (InProcessAddChange _ ks) = keyFilename ks
+changeFile (InProcessAddChange _ ld) = keyFilename $ keySource ld
isPendingAddChange :: Change -> Bool
isPendingAddChange (PendingAddChange {}) = True
@@ -64,14 +68,33 @@ isInProcessAddChange (InProcessAddChange {}) = True
isInProcessAddChange _ = False
retryChange :: Change -> Change
-retryChange (InProcessAddChange time ks) =
- PendingAddChange time (keyFilename ks)
+retryChange c@(InProcessAddChange time _) =
+ PendingAddChange time $ changeFile c
retryChange c = c
finishedChange :: Change -> Key -> Change
-finishedChange c@(InProcessAddChange { keySource = ks }) k = Change
+finishedChange c@(InProcessAddChange {}) k = Change
{ changeTime = changeTime c
- , _changeFile = keyFilename ks
+ , _changeFile = changeFile c
, changeInfo = AddKeyChange k
}
finishedChange c _ = c
+
+{- Combine PendingAddChanges that are for the same file.
+ - Multiple such often get noticed when eg, a file is opened and then
+ - closed in quick succession. -}
+simplifyChanges :: [Change] -> [Change]
+simplifyChanges [c] = [c]
+simplifyChanges cl = go cl S.empty []
+ where
+ go [] _ l = reverse l
+ go (c:cs) seen l
+ | isPendingAddChange c =
+ if S.member f seen
+ then go cs seen l
+ else
+ let !seen' = S.insert f seen
+ in go cs seen' (c:l)
+ | otherwise = go cs seen (c:l)
+ where
+ f = changeFile c
diff --git a/Backend.hs b/Backend.hs
index 922d0c2a7..c2f3d28d4 100644
--- a/Backend.hs
+++ b/Backend.hs
@@ -9,9 +9,7 @@ module Backend (
list,
orderedList,
genKey,
- lookupFile,
getBackend,
- isAnnexLink,
chooseBackend,
lookupBackendName,
maybeLookupBackendName,
@@ -21,12 +19,9 @@ module Backend (
import Common.Annex
import qualified Annex
import Annex.CheckAttr
-import Annex.CatFile
-import Annex.Link
import Types.Key
import Types.KeySource
import qualified Types.Backend as B
-import Config
-- When adding a new backend, import it here and add it to the list.
import qualified Backend.Hash
@@ -78,26 +73,6 @@ genKey' (b:bs) source = do
| c == '\n' = '_'
| otherwise = c
-{- Looks up the key corresponding to an annexed file,
- - by examining what the file links to.
- -
- - In direct mode, there is often no link on disk, in which case
- - the symlink is looked up in git instead. However, a real link
- - on disk still takes precedence over what was committed to git in direct
- - mode.
- -}
-lookupFile :: FilePath -> Annex (Maybe Key)
-lookupFile file = do
- mkey <- isAnnexLink file
- case mkey of
- Just key -> makeret key
- Nothing -> ifM isDirect
- ( maybe (return Nothing) makeret =<< catKeyFile file
- , return Nothing
- )
- where
- makeret k = return $ Just k
-
getBackend :: FilePath -> Key -> Annex (Maybe Backend)
getBackend file k = let bname = keyBackendName k in
case maybeLookupBackendName bname of
diff --git a/CmdLine/GitAnnex.hs b/CmdLine/GitAnnex.hs
index f585bff3e..ba7689f70 100644
--- a/CmdLine/GitAnnex.hs
+++ b/CmdLine/GitAnnex.hs
@@ -96,6 +96,7 @@ import qualified Command.Upgrade
import qualified Command.Forget
import qualified Command.Proxy
import qualified Command.DiffDriver
+import qualified Command.Smudge
import qualified Command.Undo
import qualified Command.Version
#ifdef WITH_ASSISTANT
@@ -201,6 +202,7 @@ cmds testoptparser testrunner =
, Command.Forget.cmd
, Command.Proxy.cmd
, Command.DiffDriver.cmd
+ , Command.Smudge.cmd
, Command.Undo.cmd
, Command.Version.cmd
#ifdef WITH_ASSISTANT
diff --git a/CmdLine/Seek.hs b/CmdLine/Seek.hs
index 8d253e47d..e6ee6f3fe 100644
--- a/CmdLine/Seek.hs
+++ b/CmdLine/Seek.hs
@@ -80,7 +80,7 @@ withFilesInRefs a = mapM_ go
l <- inRepo $ LsTree.lsTree (Git.Ref r)
forM_ l $ \i -> do
let f = getTopFilePath $ LsTree.file i
- v <- catKey (Git.Ref $ LsTree.sha i) (LsTree.mode i)
+ v <- catKey (Git.Ref $ LsTree.sha i)
case v of
Nothing -> noop
Just k -> whenM (matcher $ MatchingKey k) $
@@ -115,29 +115,29 @@ withPairs a params = seekActions $ return $ map a $ pairs [] params
pairs c (x:y:xs) = pairs ((x,y):c) xs
pairs _ _ = error "expected pairs"
-withFilesToBeCommitted :: (String -> CommandStart) -> CmdParams -> CommandSeek
+withFilesToBeCommitted :: (FilePath -> CommandStart) -> CmdParams -> CommandSeek
withFilesToBeCommitted a params = seekActions $ prepFiltered a $
seekHelper LsFiles.stagedNotDeleted params
-withFilesUnlocked :: (FilePath -> CommandStart) -> CmdParams -> CommandSeek
-withFilesUnlocked = withFilesUnlocked' LsFiles.typeChanged
+withFilesOldUnlocked :: (FilePath -> CommandStart) -> CmdParams -> CommandSeek
+withFilesOldUnlocked = withFilesOldUnlocked' LsFiles.typeChanged
-withFilesUnlockedToBeCommitted :: (FilePath -> CommandStart) -> CmdParams -> CommandSeek
-withFilesUnlockedToBeCommitted = withFilesUnlocked' LsFiles.typeChangedStaged
+withFilesOldUnlockedToBeCommitted :: (FilePath -> CommandStart) -> CmdParams -> CommandSeek
+withFilesOldUnlockedToBeCommitted = withFilesOldUnlocked' LsFiles.typeChangedStaged
-{- Unlocked files have changed type from a symlink to a regular file.
+{- Unlocked files before v6 have changed type from a symlink to a regular file.
-
- Furthermore, unlocked files used to be a git-annex symlink,
- not some other sort of symlink.
-}
-withFilesUnlocked' :: ([FilePath] -> Git.Repo -> IO ([FilePath], IO Bool)) -> (FilePath -> CommandStart) -> CmdParams -> CommandSeek
-withFilesUnlocked' typechanged a params = seekActions $
+withFilesOldUnlocked' :: ([FilePath] -> Git.Repo -> IO ([FilePath], IO Bool)) -> (FilePath -> CommandStart) -> CmdParams -> CommandSeek
+withFilesOldUnlocked' typechanged a params = seekActions $
prepFiltered a unlockedfiles
where
- unlockedfiles = filterM isUnlocked =<< seekHelper typechanged params
+ unlockedfiles = filterM isOldUnlocked =<< seekHelper typechanged params
-isUnlocked :: FilePath -> Annex Bool
-isUnlocked f = liftIO (notSymlink f) <&&>
+isOldUnlocked :: FilePath -> Annex Bool
+isOldUnlocked f = liftIO (notSymlink f) <&&>
(isJust <$> catKeyFile f <||> isJust <$> catKeyFileHEAD f)
{- Finds files that may be modified. -}
diff --git a/Command.hs b/Command.hs
index bee63bb74..387f7b8b5 100644
--- a/Command.hs
+++ b/Command.hs
@@ -18,12 +18,13 @@ module Command (
stopUnless,
whenAnnexed,
ifAnnexed,
+ lookupFile,
isBareRepo,
module ReExported
) where
import Common.Annex
-import qualified Backend
+import Annex.WorkTree
import qualified Git
import Types.Command as ReExported
import Types.Option as ReExported
@@ -100,13 +101,5 @@ stop = return Nothing
stopUnless :: Annex Bool -> Annex (Maybe a) -> Annex (Maybe a)
stopUnless c a = ifM c ( a , stop )
-{- Modifies an action to only act on files that are already annexed,
- - and passes the key on to it. -}
-whenAnnexed :: (FilePath -> Key -> Annex (Maybe a)) -> FilePath -> Annex (Maybe a)
-whenAnnexed a file = ifAnnexed file (a file) (return Nothing)
-
-ifAnnexed :: FilePath -> (Key -> Annex a) -> Annex a -> Annex a
-ifAnnexed file yes no = maybe no yes =<< Backend.lookupFile file
-
isBareRepo :: Annex Bool
isBareRepo = fromRepo Git.repoIsLocalBare
diff --git a/Command/Add.hs b/Command/Add.hs
index 27c11eab4..8a7db0a91 100644
--- a/Command/Add.hs
+++ b/Command/Add.hs
@@ -5,35 +5,22 @@
- Licensed under the GNU GPL version 3 or higher.
-}
-{-# LANGUAGE CPP #-}
-
module Command.Add where
import Common.Annex
import Command
-import Types.KeySource
-import Backend
+import Annex.Ingest
import Logs.Location
import Annex.Content
import Annex.Content.Direct
-import Annex.Perms
import Annex.Link
-import Annex.MetaData
import qualified Annex
import qualified Annex.Queue
-#ifdef WITH_CLIBS
-#ifndef __ANDROID__
-import Utility.Touch
-#endif
-#endif
import Config
import Utility.InodeCache
import Annex.FileMatcher
-import Annex.ReplaceFile
-import Utility.Tmp
-import Utility.CopyFile
-
-import Control.Exception (IOException)
+import Annex.Version
+import qualified Database.Keys
cmd :: Command
cmd = notBareRepo $ withGlobalOptions (jobsOption : fileMatchingOptions) $
@@ -64,9 +51,9 @@ seek o = allowConcurrentOutput $ do
, startSmall file
)
go $ withFilesNotInGit (not $ includeDotFiles o)
- ifM isDirect
+ ifM (versionSupportsUnlockedPointers <||> isDirect)
( go withFilesMaybeModified
- , go withFilesUnlocked
+ , go withFilesOldUnlocked
)
{- Pass file off to git-add. -}
@@ -86,9 +73,6 @@ addFile file = do
Annex.Queue.addCommand "add" (ps++[Param "--"]) [file]
return True
-{- The add subcommand annexes a file, generating a key for it using a
- - backend, and then moving it into the annex directory and setting up
- - the symlink pointing to its content. -}
start :: FilePath -> CommandStart
start file = ifAnnexed file addpresent add
where
@@ -103,13 +87,22 @@ start file = ifAnnexed file addpresent add
next $ if isSymbolicLink s
then next $ addFile file
else perform file
- addpresent key = ifM isDirect
+ addpresent key = ifM versionSupportsUnlockedPointers
( do
ms <- liftIO $ catchMaybeIO $ getSymbolicLinkStatus file
case ms of
Just s | isSymbolicLink s -> fixup key
- _ -> ifM (goodContent key file) ( stop , add )
- , fixup key
+ _ -> ifM (sameInodeCache file =<< Database.Keys.getInodeCaches key)
+ ( stop, add )
+ , ifM isDirect
+ ( do
+ ms <- liftIO $ catchMaybeIO $ getSymbolicLinkStatus file
+ case ms of
+ Just s | isSymbolicLink s -> fixup key
+ _ -> ifM (goodContent key file)
+ ( stop , add )
+ , fixup key
+ )
)
fixup key = do
-- the annexed symlink is present but not yet added to git
@@ -119,188 +112,14 @@ start file = ifAnnexed file addpresent add
void $ addAssociatedFile key file
next $ next $ cleanup file key Nothing =<< inAnnex key
-{- The file that's being added is locked down before a key is generated,
- - to prevent it from being modified in between. This lock down is not
- - perfect at best (and pretty weak at worst). For example, it does not
- - guard against files that are already opened for write by another process.
- - So a KeySource is returned. Its inodeCache can be used to detect any
- - changes that might be made to the file after it was locked down.
- -
- - When possible, the file is hard linked to a temp directory. This guards
- - against some changes, like deletion or overwrite of the file, and
- - allows lsof checks to be done more efficiently when adding a lot of files.
- -
- - Lockdown can fail if a file gets deleted, and Nothing will be returned.
- -}
-lockDown :: FilePath -> Annex (Maybe KeySource)
-lockDown = either
- (\e -> warning (show e) >> return Nothing)
- (return . Just)
- <=< lockDown'
-
-lockDown' :: FilePath -> Annex (Either IOException KeySource)
-lockDown' file = ifM crippledFileSystem
- ( withTSDelta $ liftIO . tryIO . nohardlink
- , tryIO $ do
- tmp <- fromRepo gitAnnexTmpMiscDir
- createAnnexDirectory tmp
- go tmp
- )
- where
- {- In indirect mode, the write bit is removed from the file as part
- - of lock down to guard against further writes, and because objects
- - in the annex have their write bit disabled anyway.
- -
- - Freezing the content early also lets us fail early when
- - someone else owns the file.
- -
- - This is not done in direct mode, because files there need to
- - remain writable at all times.
- -}
- go tmp = do
- unlessM isDirect $
- freezeContent file
- withTSDelta $ \delta -> liftIO $ do
- (tmpfile, h) <- openTempFile tmp $
- relatedTemplate $ takeFileName file
- hClose h
- nukeFile tmpfile
- withhardlink delta tmpfile `catchIO` const (nohardlink delta)
- nohardlink delta = do
- cache <- genInodeCache file delta
- return KeySource
- { keyFilename = file
- , contentLocation = file
- , inodeCache = cache
- }
- withhardlink delta tmpfile = do
- createLink file tmpfile
- cache <- genInodeCache tmpfile delta
- return KeySource
- { keyFilename = file
- , contentLocation = tmpfile
- , inodeCache = cache
- }
-
-{- Ingests a locked down file into the annex.
- -
- - In direct mode, leaves the file alone, and just updates bookkeeping
- - information.
- -}
-ingest :: Maybe KeySource -> Annex (Maybe Key, Maybe InodeCache)
-ingest Nothing = return (Nothing, Nothing)
-ingest (Just source) = withTSDelta $ \delta -> do
- backend <- chooseBackend $ keyFilename source
- k <- genKey source backend
- let src = contentLocation source
- ms <- liftIO $ catchMaybeIO $ getFileStatus src
- mcache <- maybe (pure Nothing) (liftIO . toInodeCache delta src) ms
- case (mcache, inodeCache source) of
- (_, Nothing) -> go k mcache ms
- (Just newc, Just c) | compareStrong c newc -> go k mcache ms
- _ -> failure "changed while it was being added"
- where
- go k mcache ms = ifM isDirect
- ( godirect k mcache ms
- , goindirect k mcache ms
- )
-
- goindirect (Just (key, _)) mcache ms = do
- catchNonAsync (moveAnnex key $ contentLocation source)
- (undo (keyFilename source) key)
- maybe noop (genMetaData key (keyFilename source)) ms
- liftIO $ nukeFile $ keyFilename source
- return (Just key, mcache)
- goindirect _ _ _ = failure "failed to generate a key"
-
- godirect (Just (key, _)) (Just cache) ms = do
- addInodeCache key cache
- maybe noop (genMetaData key (keyFilename source)) ms
- finishIngestDirect key source
- return (Just key, Just cache)
- godirect _ _ _ = failure "failed to generate a key"
-
- failure msg = do
- warning $ keyFilename source ++ " " ++ msg
- when (contentLocation source /= keyFilename source) $
- liftIO $ nukeFile $ contentLocation source
- return (Nothing, Nothing)
-
-finishIngestDirect :: Key -> KeySource -> Annex ()
-finishIngestDirect key source = do
- void $ addAssociatedFile key $ keyFilename source
- when (contentLocation source /= keyFilename source) $
- liftIO $ nukeFile $ contentLocation source
-
- {- Copy to any other locations using the same key. -}
- otherfs <- filter (/= keyFilename source) <$> associatedFiles key
- forM_ otherfs $
- addContentWhenNotPresent key (keyFilename source)
-
perform :: FilePath -> CommandPerform
-perform file = lockDown file >>= ingest >>= go
+perform file = do
+ lockingfile <- not <$> isDirect
+ lockDown lockingfile file >>= ingest >>= go
where
go (Just key, cache) = next $ cleanup file key cache True
go (Nothing, _) = stop
-{- On error, put the file back so it doesn't seem to have vanished.
- - This can be called before or after the symlink is in place. -}
-undo :: FilePath -> Key -> SomeException -> Annex a
-undo file key e = do
- whenM (inAnnex key) $ do
- liftIO $ nukeFile file
- -- The key could be used by other files too, so leave the
- -- content in the annex, and make a copy back to the file.
- obj <- calcRepo $ gitAnnexLocation key
- unlessM (liftIO $ copyFileExternal CopyTimeStamps obj file) $
- warning $ "Unable to restore content of " ++ file ++ "; it should be located in " ++ obj
- thawContent file
- throwM e
-
-{- Creates the symlink to the annexed content, returns the link target. -}
-link :: FilePath -> Key -> Maybe InodeCache -> Annex String
-link file key mcache = flip catchNonAsync (undo file key) $ do
- l <- calcRepo $ gitAnnexLink file key
- replaceFile file $ makeAnnexLink l
-
- -- touch symlink to have same time as the original file,
- -- as provided in the InodeCache
- case mcache of
-#if defined(WITH_CLIBS) && ! defined(__ANDROID__)
- Just c -> liftIO $ touch file (TimeSpec $ inodeCacheToMtime c) False
-#else
- Just _ -> noop
-#endif
- Nothing -> noop
-
- return l
-
-{- Creates the symlink to the annexed content, and stages it in git.
- -
- - As long as the filesystem supports symlinks, we use
- - git add, rather than directly staging the symlink to git.
- - Using git add is best because it allows the queuing to work
- - and is faster (staging the symlink runs hash-object commands each time).
- - Also, using git add allows it to skip gitignored files, unless forced
- - to include them.
- -}
-addLink :: FilePath -> Key -> Maybe InodeCache -> Annex ()
-addLink file key mcache = ifM (coreSymlinks <$> Annex.getGitConfig)
- ( do
- _ <- link file key mcache
- ps <- forceParams
- Annex.Queue.addCommand "add" (ps++[Param "--"]) [file]
- , do
- l <- link file key mcache
- addAnnexLink l file
- )
-
-forceParams :: Annex [CommandParam]
-forceParams = ifM (Annex.getState Annex.force)
- ( return [Param "-f"]
- , return []
- )
-
cleanup :: FilePath -> Key -> Maybe InodeCache -> Bool -> CommandCleanup
cleanup file key mcache hascontent = do
ifM (isDirect <&&> pure hascontent)
diff --git a/Command/AddUnused.hs b/Command/AddUnused.hs
index 2b315eada..57fd0cf38 100644
--- a/Command/AddUnused.hs
+++ b/Command/AddUnused.hs
@@ -10,7 +10,7 @@ module Command.AddUnused where
import Common.Annex
import Logs.Location
import Command
-import qualified Command.Add
+import Annex.Ingest
import Command.Unused (withUnusedMaps, UnusedMaps(..), startUnused)
import Types.Key
@@ -31,7 +31,7 @@ start = startUnused "addunused" perform
perform :: Key -> CommandPerform
perform key = next $ do
logStatus key InfoPresent
- Command.Add.addLink file key Nothing
+ addLink file key Nothing
return True
where
file = "unused." ++ key2file key
diff --git a/Command/AddUrl.hs b/Command/AddUrl.hs
index 2989c5830..746a6725c 100644
--- a/Command/AddUrl.hs
+++ b/Command/AddUrl.hs
@@ -14,14 +14,15 @@ import Network.URI
import Common.Annex
import Command
import Backend
-import qualified Command.Add
import qualified Annex
import qualified Annex.Queue
import qualified Annex.Url as Url
import qualified Backend.URL
import qualified Remote
import qualified Types.Remote as Remote
+import qualified Command.Add
import Annex.Content
+import Annex.Ingest
import Annex.UUID
import Logs.Web
import Types.Key
@@ -373,7 +374,7 @@ cleanup u url file key mtmp = case mtmp of
when (isJust mtmp) $
logStatus key InfoPresent
setUrlPresent u key url
- Command.Add.addLink file key Nothing
+ addLink file key Nothing
whenM isDirect $ do
void $ addAssociatedFile key file
{- For moveAnnex to work in direct mode, the symlink
diff --git a/Command/ConfigList.hs b/Command/ConfigList.hs
index 46c909107..997016e8e 100644
--- a/Command/ConfigList.hs
+++ b/Command/ConfigList.hs
@@ -46,7 +46,7 @@ findOrGenUUID = do
else ifM (Annex.Branch.hasSibling <||> (isJust <$> Fields.getField Fields.autoInit))
( do
liftIO checkNotReadOnly
- initialize Nothing
+ initialize Nothing Nothing
getUUID
, return NoUUID
)
diff --git a/Command/Direct.hs b/Command/Direct.hs
index 162780dd5..9cfd258eb 100644
--- a/Command/Direct.hs
+++ b/Command/Direct.hs
@@ -14,6 +14,7 @@ import qualified Git.LsFiles
import qualified Git.Branch
import Config
import Annex.Direct
+import Annex.Version
cmd :: Command
cmd = notBareRepo $ noDaemonRunning $
@@ -24,7 +25,10 @@ seek :: CmdParams -> CommandSeek
seek = withNothing start
start :: CommandStart
-start = ifM isDirect ( stop , next perform )
+start = ifM versionSupportsDirectMode
+ ( ifM isDirect ( stop , next perform )
+ , error "Direct mode is not suppported by this repository version. Use git-annex unlock instead."
+ )
perform :: CommandPerform
perform = do
diff --git a/Command/Fsck.hs b/Command/Fsck.hs
index 1531d2ab7..46de4ac96 100644
--- a/Command/Fsck.hs
+++ b/Command/Fsck.hs
@@ -34,6 +34,7 @@ import Utility.HumanTime
import Utility.CopyFile
import Git.FilePath
import Utility.PID
+import qualified Database.Keys
#ifdef WITH_DATABASE
import qualified Database.Fsck as FsckDb
@@ -118,16 +119,18 @@ start from inc file key = do
go = runFsck inc file key
perform :: Key -> FilePath -> Backend -> NumCopies -> Annex Bool
-perform key file backend numcopies = check
- -- order matters
- [ fixLink key file
- , verifyLocationLog key file
- , verifyDirectMapping key file
- , verifyDirectMode key file
- , checkKeySize key
- , checkBackend backend key (Just file)
- , checkKeyNumCopies key (Just file) numcopies
- ]
+perform key file backend numcopies = do
+ keystatus <- getKeyStatus key
+ check
+ -- order matters
+ [ fixLink key file
+ , verifyLocationLog key keystatus file
+ , verifyDirectMapping key file
+ , verifyDirectMode key file
+ , checkKeySize key keystatus
+ , checkBackend backend key keystatus (Just file)
+ , checkKeyNumCopies key (Just file) numcopies
+ ]
{- To fsck a remote, the content is retrieved to a tmp file,
- and checked locally. -}
@@ -183,19 +186,19 @@ startKey inc key numcopies =
performKey key backend numcopies
performKey :: Key -> Backend -> NumCopies -> Annex Bool
-performKey key backend numcopies = check
- [ verifyLocationLog key (key2file key)
- , checkKeySize key
- , checkBackend backend key Nothing
- , checkKeyNumCopies key Nothing numcopies
- ]
+performKey key backend numcopies = do
+ keystatus <- getKeyStatus key
+ check
+ [ verifyLocationLog key keystatus (key2file key)
+ , checkKeySize key keystatus
+ , checkBackend backend key keystatus Nothing
+ , checkKeyNumCopies key Nothing numcopies
+ ]
check :: [Annex Bool] -> Annex Bool
check cs = and <$> sequence cs
-{- Checks that the file's link points correctly to the content.
- -
- - In direct mode, there is only a link when the content is not present.
+{- Checks that symlinks points correctly to the annexed content.
-}
fixLink :: Key -> FilePath -> Annex Bool
fixLink key file = do
@@ -214,19 +217,23 @@ fixLink key file = do
{- Checks that the location log reflects the current status of the key,
- in this repository only. -}
-verifyLocationLog :: Key -> String -> Annex Bool
-verifyLocationLog key desc = do
- present <- inAnnex key
+verifyLocationLog :: Key -> KeyStatus -> String -> Annex Bool
+verifyLocationLog key keystatus desc = do
+ obj <- calcRepo $ gitAnnexLocation key
+ present <- if isKeyUnlocked keystatus
+ then liftIO (doesFileExist obj)
+ else inAnnex key
direct <- isDirect
u <- getUUID
- {- Since we're checking that a key's file is present, throw
+ {- Since we're checking that a key's object file is present, throw
- in a permission fixup here too. -}
- file <- calcRepo $ gitAnnexLocation key
- when (present && not direct) $
- freezeContent file
- whenM (liftIO $ doesDirectoryExist $ parentDir file) $
- freezeContentDir file
+ when (present && not direct) $ void $ tryIO $
+ if isKeyUnlocked keystatus
+ then thawContent obj
+ else freezeContent obj
+ whenM (liftIO $ doesDirectoryExist $ parentDir obj) $
+ freezeContentDir obj
{- In direct mode, modified files will show up as not present,
- but that is expected and not something to do anything about. -}
@@ -288,18 +295,16 @@ verifyDirectMode key file = do
{- The size of the data for a key is checked against the size encoded in
- the key's metadata, if available.
-
- - Not checked in direct mode, because files can be changed directly.
+ - Not checked when a file is unlocked, or in direct mode.
-}
-checkKeySize :: Key -> Annex Bool
-checkKeySize key = ifM isDirect
- ( return True
- , do
- file <- calcRepo $ gitAnnexLocation key
- ifM (liftIO $ doesFileExist file)
- ( checkKeySizeOr badContent key file
- , return True
- )
- )
+checkKeySize :: Key -> KeyStatus -> Annex Bool
+checkKeySize _ KeyUnlocked = return True
+checkKeySize key _ = do
+ file <- calcRepo $ gitAnnexLocation key
+ ifM (liftIO $ doesFileExist file)
+ ( checkKeySizeOr badContent key file
+ , return True
+ )
checkKeySizeRemote :: Key -> Remote -> Maybe FilePath -> Annex Bool
checkKeySizeRemote _ _ Nothing = return True
@@ -326,18 +331,26 @@ checkKeySizeOr bad key file = case Types.Key.keySize key of
, msg
]
-{- Runs the backend specific check on a key's content.
+{- Runs the backend specific check on a key's content object.
+ -
+ - When a file is unlocked, it may be a hard link to the object,
+ - thus when the user modifies the file, the object will be modified and
+ - not pass the check, and we don't want to find an error in this case.
+ - So, skip the check if the key is unlocked and modified.
-
- In direct mode this is not done if the file has clearly been modified,
- because modification of direct mode files is allowed. It's still done
- if the file does not appear modified, to catch disk corruption, etc.
-}
-checkBackend :: Backend -> Key -> Maybe FilePath -> Annex Bool
-checkBackend backend key mfile = go =<< isDirect
+checkBackend :: Backend -> Key -> KeyStatus -> Maybe FilePath -> Annex Bool
+checkBackend backend key keystatus mfile = go =<< isDirect
where
go False = do
content <- calcRepo $ gitAnnexLocation key
- checkBackendOr badContent backend key content
+ ifM (pure (isKeyUnlocked keystatus) <&&> (not <$> isUnmodified key content))
+ ( nocheck
+ , checkBackendOr badContent backend key content
+ )
go True = maybe nocheck checkdirect mfile
checkdirect file = ifM (goodContent key file)
( checkBackendOr' (badContentDirect file) backend key file
@@ -582,3 +595,20 @@ withFsckDb (StartIncremental h) a = a h
withFsckDb NonIncremental _ = noop
withFsckDb (ScheduleIncremental _ _ i) a = withFsckDb i a
#endif
+
+data KeyStatus = KeyLocked | KeyUnlocked | KeyMissing
+
+isKeyUnlocked :: KeyStatus -> Bool
+isKeyUnlocked KeyUnlocked = True
+isKeyUnlocked KeyLocked = False
+isKeyUnlocked KeyMissing = False
+
+getKeyStatus :: Key -> Annex KeyStatus
+getKeyStatus key = ifM isDirect
+ ( return KeyUnlocked
+ , catchDefaultIO KeyMissing $ do
+ obj <- calcRepo $ gitAnnexLocation key
+ unlocked <- ((> 1) . linkCount <$> liftIO (getFileStatus obj))
+ <&&> (not . null <$> Database.Keys.getAssociatedFiles key)
+ return $ if unlocked then KeyUnlocked else KeyLocked
+ )
diff --git a/Command/Indirect.hs b/Command/Indirect.hs
index c12c91a48..06897e292 100644
--- a/Command/Indirect.hs
+++ b/Command/Indirect.hs
@@ -20,7 +20,7 @@ import Annex.Content
import Annex.Content.Direct
import Annex.CatFile
import Annex.Init
-import qualified Command.Add
+import Annex.Ingest
cmd :: Command
cmd = notBareRepo $ noDaemonRunning $
@@ -76,7 +76,7 @@ perform = do
return Nothing
| otherwise ->
maybe noop (fromdirect f)
- =<< catKey sha mode
+ =<< catKey sha
_ -> noop
go _ = noop
@@ -90,7 +90,7 @@ perform = do
Right _ -> do
l <- calcRepo $ gitAnnexLink f k
liftIO $ createSymbolicLink l f
- Left e -> catchNonAsync (Command.Add.undo f k e)
+ Left e -> catchNonAsync (restoreFile f k e)
warnlocked
showEndOk
diff --git a/Command/Init.hs b/Command/Init.hs
index d969669f8..94d8168a6 100644
--- a/Command/Init.hs
+++ b/Command/Init.hs
@@ -10,25 +10,44 @@ module Command.Init where
import Common.Annex
import Command
import Annex.Init
+import Annex.Version
import qualified Annex.SpecialRemote
cmd :: Command
cmd = dontCheck repoExists $
command "init" SectionSetup "initialize git-annex"
- paramDesc (withParams seek)
+ paramDesc (seek <$$> optParser)
-seek :: CmdParams -> CommandSeek
-seek = withWords start
+data InitOptions = InitOptions
+ { initDesc :: String
+ , initVersion :: Maybe Version
+ }
-start :: [String] -> CommandStart
-start ws = do
- showStart "init" description
- next $ perform description
- where
- description = unwords ws
+optParser :: CmdParamsDesc -> Parser InitOptions
+optParser desc = InitOptions
+ <$> (unwords <$> cmdParams desc)
+ <*> optional (option (str >>= parseVersion)
+ ( long "version" <> metavar paramValue
+ <> help "Override default annex.version"
+ ))
-perform :: String -> CommandPerform
-perform description = do
- initialize $ if null description then Nothing else Just description
+parseVersion :: Monad m => String -> m Version
+parseVersion v
+ | v `elem` supportedVersions = return v
+ | otherwise = fail $ v ++ " is not a currently supported repository version"
+
+seek :: InitOptions -> CommandSeek
+seek = commandAction . start
+
+start :: InitOptions -> CommandStart
+start os = do
+ showStart "init" (initDesc os)
+ next $ perform os
+
+perform :: InitOptions -> CommandPerform
+perform os = do
+ initialize
+ (if null (initDesc os) then Nothing else Just (initDesc os))
+ (initVersion os)
Annex.SpecialRemote.autoEnable
next $ return True
diff --git a/Command/Lock.hs b/Command/Lock.hs
index 7711ec3b8..1be6e9c76 100644
--- a/Command/Lock.hs
+++ b/Command/Lock.hs
@@ -1,6 +1,6 @@
{- git-annex command
-
- - Copyright 2010 Joey Hess <id@joeyh.name>
+ - Copyright 2010,2015 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@@ -11,6 +11,16 @@ import Common.Annex
import Command
import qualified Annex.Queue
import qualified Annex
+import Annex.Version
+import Annex.Content
+import Annex.Link
+import Annex.InodeSentinal
+import Annex.Perms
+import Annex.ReplaceFile
+import Utility.InodeCache
+import qualified Database.Keys
+import Annex.Ingest
+import Logs.Location
cmd :: Command
cmd = notDirect $ withGlobalOptions annexedMatchingOptions $
@@ -19,18 +29,90 @@ cmd = notDirect $ withGlobalOptions annexedMatchingOptions $
paramPaths (withParams seek)
seek :: CmdParams -> CommandSeek
-seek ps = do
- withFilesUnlocked start ps
- withFilesUnlockedToBeCommitted start ps
+seek ps = ifM versionSupportsUnlockedPointers
+ ( withFilesInGit (whenAnnexed startNew) ps
+ , do
+ withFilesOldUnlocked startOld ps
+ withFilesOldUnlockedToBeCommitted startOld ps
+ )
-start :: FilePath -> CommandStart
-start file = do
+startNew :: FilePath -> Key -> CommandStart
+startNew file key = ifM (isJust <$> isAnnexLink file)
+ ( stop
+ , do
+ showStart "lock" file
+ go =<< isPointerFile file
+ )
+ where
+ go (Just key')
+ | key' == key = cont False
+ | otherwise = errorModified
+ go Nothing =
+ ifM (isUnmodified key file)
+ ( cont False
+ , ifM (Annex.getState Annex.force)
+ ( cont True
+ , errorModified
+ )
+ )
+ cont = next . performNew file key
+
+performNew :: FilePath -> Key -> Bool -> CommandPerform
+performNew file key filemodified = do
+ lockdown =<< calcRepo (gitAnnexLocation key)
+ addLink file key
+ =<< withTSDelta (liftIO . genInodeCache file)
+ next $ cleanupNew file key
+ where
+ lockdown obj = do
+ ifM (catchBoolIO $ sameInodeCache obj =<< Database.Keys.getInodeCaches key)
+ ( breakhardlink obj
+ , repopulate obj
+ )
+ whenM (liftIO $ doesFileExist obj) $
+ freezeContent obj
+
+ -- It's ok if the file is hard linked to obj, but if some other
+ -- associated file is, we need to break that link to lock down obj.
+ breakhardlink obj = whenM (catchBoolIO $ (> 1) . linkCount <$> liftIO (getFileStatus obj)) $ do
+ mfc <- withTSDelta (liftIO . genInodeCache file)
+ unlessM (sameInodeCache obj (maybeToList mfc)) $ do
+ modifyContent obj $ replaceFile obj $ \tmp -> do
+ unlessM (checkedCopyFile key obj tmp) $
+ error "unable to lock file; need more free disk space"
+ Database.Keys.storeInodeCaches key [obj]
+
+ -- Try to repopulate obj from an unmodified associated file.
+ repopulate obj
+ | filemodified = modifyContent obj $ do
+ fs <- Database.Keys.getAssociatedFiles key
+ mfile <- firstM (isUnmodified key) fs
+ liftIO $ nukeFile obj
+ case mfile of
+ Just unmodified ->
+ unlessM (checkedCopyFile key unmodified obj)
+ lostcontent
+ Nothing -> lostcontent
+ | otherwise = modifyContent obj $
+ liftIO $ renameFile file obj
+ lostcontent = logStatus key InfoMissing
+
+cleanupNew :: FilePath -> Key -> CommandCleanup
+cleanupNew file key = do
+ Database.Keys.removeAssociatedFile key file
+ return True
+
+startOld :: FilePath -> CommandStart
+startOld file = do
showStart "lock" file
- unlessM (Annex.getState Annex.force) $
- error "Locking this file would discard any changes you have made to it. Use 'git annex add' to stage your changes. (Or, use --force to override)"
- next $ perform file
+ unlessM (Annex.getState Annex.force)
+ errorModified
+ next $ performOld file
-perform :: FilePath -> CommandPerform
-perform file = do
+performOld :: FilePath -> CommandPerform
+performOld file = do
Annex.Queue.addCommand "checkout" [Param "--"] [file]
- next $ return True -- no cleanup needed
+ next $ return True
+
+errorModified :: a
+errorModified = error "Locking this file would discard any changes you have made to it. Use 'git annex add' to stage your changes. (Or, use --force to override)"
diff --git a/Command/Migrate.hs b/Command/Migrate.hs
index d1c7902d7..b8d2eea87 100644
--- a/Command/Migrate.hs
+++ b/Command/Migrate.hs
@@ -72,7 +72,7 @@ perform file oldkey oldbackend newbackend = go =<< genkey
go (Just (newkey, knowngoodcontent))
| knowngoodcontent = finish newkey
| otherwise = stopUnless checkcontent $ finish newkey
- checkcontent = Command.Fsck.checkBackend oldbackend oldkey $ Just file
+ checkcontent = Command.Fsck.checkBackend oldbackend oldkey Command.Fsck.KeyLocked $ Just file
finish newkey = stopUnless (Command.ReKey.linkKey oldkey newkey) $
next $ Command.ReKey.cleanup file oldkey newkey
genkey = case maybe Nothing (\fm -> fm oldkey newbackend (Just file)) (fastMigrate oldbackend) of
diff --git a/Command/PreCommit.hs b/Command/PreCommit.hs
index 2d62b51f3..cbf7f6e3d 100644
--- a/Command/PreCommit.hs
+++ b/Command/PreCommit.hs
@@ -16,7 +16,9 @@ import qualified Command.Add
import qualified Command.Fix
import Annex.Direct
import Annex.Hook
+import Annex.Link
import Annex.View
+import Annex.Version
import Annex.View.ViewedFile
import Annex.LockFile
import Logs.View
@@ -41,17 +43,22 @@ seek ps = lockPreCommitHook $ ifM isDirect
withWords startDirect ps
runAnnexHook preCommitAnnexHook
, do
- ifM (liftIO Git.haveFalseIndex)
+ ifM (not <$> versionSupportsUnlockedPointers <&&> liftIO Git.haveFalseIndex)
( do
(fs, cleanup) <- inRepo $ Git.typeChangedStaged ps
- whenM (anyM isUnlocked fs) $
+ whenM (anyM isOldUnlocked fs) $
error "Cannot make a partial commit with unlocked annexed files. You should `git annex add` the files you want to commit, and then run git commit."
void $ liftIO cleanup
, do
-- fix symlinks to files being committed
- withFilesToBeCommitted (whenAnnexed Command.Fix.start) ps
+ flip withFilesToBeCommitted ps $ \f ->
+ maybe stop (Command.Fix.start f)
+ =<< isAnnexLink f
-- inject unlocked files into the annex
- withFilesUnlockedToBeCommitted startIndirect ps
+ -- (not needed when repo version uses
+ -- unlocked pointer files)
+ unlessM versionSupportsUnlockedPointers $
+ withFilesOldUnlockedToBeCommitted startInjectUnlocked ps
)
runAnnexHook preCommitAnnexHook
-- committing changes to a view updates metadata
@@ -64,8 +71,8 @@ seek ps = lockPreCommitHook $ ifM isDirect
)
-startIndirect :: FilePath -> CommandStart
-startIndirect f = next $ do
+startInjectUnlocked :: FilePath -> CommandStart
+startInjectUnlocked f = next $ do
unlessM (callCommandAction $ Command.Add.start f) $
error $ "failed to add " ++ f ++ "; canceling commit"
next $ return True
diff --git a/Command/ReKey.hs b/Command/ReKey.hs
index fe13d4dd4..9fb8515c0 100644
--- a/Command/ReKey.hs
+++ b/Command/ReKey.hs
@@ -12,7 +12,7 @@ import Command
import qualified Annex
import Types.Key
import Annex.Content
-import qualified Command.Add
+import Annex.Ingest
import Logs.Web
import Logs.Location
import Utility.CopyFile
@@ -70,6 +70,6 @@ cleanup file oldkey newkey = do
-- Update symlink to use the new key.
liftIO $ removeFile file
- Command.Add.addLink file newkey Nothing
+ addLink file newkey Nothing
logStatus newkey InfoPresent
return True
diff --git a/Command/Reinit.hs b/Command/Reinit.hs
index 1be692871..e2c00a3d2 100644
--- a/Command/Reinit.hs
+++ b/Command/Reinit.hs
@@ -38,6 +38,6 @@ perform s = do
then return $ toUUID s
else Remote.nameToUUID s
storeUUID u
- initialize'
+ initialize' Nothing
Annex.SpecialRemote.autoEnable
next $ return True
diff --git a/Command/Smudge.hs b/Command/Smudge.hs
new file mode 100644
index 000000000..bde440f7e
--- /dev/null
+++ b/Command/Smudge.hs
@@ -0,0 +1,115 @@
+{- git-annex command
+ -
+ - Copyright 2015 Joey Hess <id@joeyh.name>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+module Command.Smudge where
+
+import Common.Annex
+import Command
+import Annex.Content
+import Annex.Link
+import Annex.MetaData
+import Annex.FileMatcher
+import Annex.InodeSentinal
+import Annex.Ingest
+import Utility.InodeCache
+import Types.KeySource
+import Backend
+import Logs.Location
+import qualified Database.Keys
+
+import qualified Data.ByteString.Lazy as B
+
+cmd :: Command
+cmd = noCommit $ noMessages $
+ command "smudge" SectionPlumbing
+ "git smudge filter"
+ paramFile (seek <$$> optParser)
+
+data SmudgeOptions = SmudgeOptions
+ { smudgeFile :: FilePath
+ , cleanOption :: Bool
+ }
+
+optParser :: CmdParamsDesc -> Parser SmudgeOptions
+optParser desc = SmudgeOptions
+ <$> argument str ( metavar desc )
+ <*> switch ( long "clean" <> help "clean filter" )
+
+seek :: SmudgeOptions -> CommandSeek
+seek o = commandAction $
+ (if cleanOption o then clean else smudge) (smudgeFile o)
+
+-- Smudge filter is fed git file content, and if it's a pointer to an
+-- available annex object, should output its content.
+smudge :: FilePath -> CommandStart
+smudge file = do
+ b <- liftIO $ B.hGetContents stdin
+ case parseLinkOrPointer b of
+ Nothing -> liftIO $ B.putStr b
+ Just k -> do
+ -- A previous unlocked checkout of the file may have
+ -- led to the annex object getting modified;
+ -- don't provide such modified content as it
+ -- will be confusing. inAnnex will detect such
+ -- modifications.
+ ifM (inAnnex k)
+ ( do
+ content <- calcRepo (gitAnnexLocation k)
+ liftIO $ B.putStr . fromMaybe b
+ =<< catchMaybeIO (B.readFile content)
+ , liftIO $ B.putStr b
+ )
+ Database.Keys.addAssociatedFile k file
+ stop
+
+-- Clean filter is fed file content on stdin, decides if a file
+-- should be stored in the annex, and outputs a pointer to its
+-- injested content.
+clean :: FilePath -> CommandStart
+clean file = do
+ b <- liftIO $ B.hGetContents stdin
+ if isJust (parseLinkOrPointer b)
+ then liftIO $ B.hPut stdout b
+ else ifM (shouldAnnex file)
+ ( liftIO . emitPointer =<< ingestLocal file
+ , liftIO $ B.hPut stdout b
+ )
+ stop
+
+shouldAnnex :: FilePath -> Annex Bool
+shouldAnnex file = do
+ matcher <- largeFilesMatcher
+ checkFileMatcher matcher file
+
+-- TODO: Use main ingest code instead?
+ingestLocal :: FilePath -> Annex Key
+ingestLocal file = do
+ backend <- chooseBackend file
+ ic <- withTSDelta (liftIO . genInodeCache file)
+ let source = KeySource
+ { keyFilename = file
+ , contentLocation = file
+ , inodeCache = ic
+ }
+ k <- fst . fromMaybe (error "failed to generate a key")
+ <$> genKey source backend
+ -- Hard link (or copy) file content to annex object
+ -- to prevent it from being lost when git checks out
+ -- a branch not containing this file.
+ r <- linkAnnex k file ic
+ case r of
+ LinkAnnexFailed -> error "Problem adding file to the annex"
+ LinkAnnexOk -> logStatus k InfoPresent
+ LinkAnnexNoop -> noop
+ genMetaData k file
+ =<< liftIO (getFileStatus file)
+ cleanOldKeys file k
+ Database.Keys.addAssociatedFile k file
+ return k
+
+emitPointer :: Key -> IO ()
+emitPointer = putStr . formatPointer
diff --git a/Command/Unannex.hs b/Command/Unannex.hs
index fdf976d3e..9bde19106 100644
--- a/Command/Unannex.hs
+++ b/Command/Unannex.hs
@@ -15,12 +15,14 @@ import Config
import qualified Annex
import Annex.Content
import Annex.Content.Direct
+import Annex.Version
import qualified Git.Command
import qualified Git.Branch
import qualified Git.Ref
import qualified Git.DiffTree as DiffTree
import Utility.CopyFile
import Command.PreCommit (lockPreCommitHook)
+import qualified Database.Keys
cmd :: Command
cmd = withGlobalOptions annexedMatchingOptions $
@@ -32,7 +34,7 @@ seek :: CmdParams -> CommandSeek
seek = wrapUnannex . (withFilesInGit $ whenAnnexed start)
wrapUnannex :: Annex a -> Annex a
-wrapUnannex a = ifM isDirect
+wrapUnannex a = ifM (versionSupportsUnlockedPointers <||> isDirect)
( a
{- Run with the pre-commit hook disabled, to avoid confusing
- behavior if an unannexed file is added back to git as
@@ -85,6 +87,7 @@ performIndirect file key = do
cleanupIndirect :: FilePath -> Key -> CommandCleanup
cleanupIndirect file key = do
+ Database.Keys.removeAssociatedFile key file
src <- calcRepo $ gitAnnexLocation key
ifM (Annex.getState Annex.fast)
( do
diff --git a/Command/Undo.hs b/Command/Undo.hs
index c647dfba4..0692dce34 100644
--- a/Command/Undo.hs
+++ b/Command/Undo.hs
@@ -72,7 +72,7 @@ perform p = do
f <- mkrel di
whenM isDirect $
maybe noop (`removeDirect` f)
- =<< catKey (srcsha di) (srcmode di)
+ =<< catKey (srcsha di)
liftIO $ nukeFile f
forM_ adds $ \di -> do
@@ -80,6 +80,6 @@ perform p = do
inRepo $ Git.run [Param "checkout", Param "--", File f]
whenM isDirect $
maybe noop (`toDirect` f)
- =<< catKey (dstsha di) (dstmode di)
+ =<< catKey (dstsha di)
next $ liftIO cleanup
diff --git a/Command/Unlock.hs b/Command/Unlock.hs
index d1b1d0e90..b82f78096 100644
--- a/Command/Unlock.hs
+++ b/Command/Unlock.hs
@@ -1,6 +1,6 @@
{- git-annex command
-
- - Copyright 2010 Joey Hess <id@joeyh.name>
+ - Copyright 2010,2015 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@@ -11,6 +11,11 @@ import Common.Annex
import Command
import Annex.Content
import Annex.CatFile
+import Annex.Version
+import Annex.Link
+import Annex.ReplaceFile
+import Annex.InodeSentinal
+import Utility.InodeCache
import Utility.CopyFile
cmd :: Command
@@ -26,14 +31,46 @@ mkcmd n d = notDirect $ withGlobalOptions annexedMatchingOptions $
seek :: CmdParams -> CommandSeek
seek = withFilesInGit $ whenAnnexed start
-{- The unlock subcommand replaces the symlink with a copy of the file's
- - content. -}
+{- Before v6, the unlock subcommand replaces the symlink with a copy of
+ - the file's content. In v6 and above, it converts the file from a symlink
+ - to a pointer. -}
start :: FilePath -> Key -> CommandStart
-start file key = do
- showStart "unlock" file
+start file key = ifM (isJust <$> isAnnexLink file)
+ ( do
+ showStart "unlock" file
+ ifM (inAnnex key)
+ ( ifM versionSupportsUnlockedPointers
+ ( next $ performNew file key
+ , startOld file key
+ )
+ , do
+ warning "content not present; cannot unlock"
+ next $ next $ return False
+ )
+ , stop
+ )
+
+performNew :: FilePath -> Key -> CommandPerform
+performNew dest key = do
+ src <- calcRepo (gitAnnexLocation key)
+ srcic <- withTSDelta (liftIO . genInodeCache src)
+ replaceFile dest $ \tmp -> do
+ r <- linkAnnex' key src srcic tmp
+ case r of
+ LinkAnnexOk -> return ()
+ _ -> error "linkAnnex failed"
+ next $ cleanupNew dest key
+
+cleanupNew :: FilePath -> Key -> CommandCleanup
+cleanupNew dest key = do
+ stagePointerFile dest =<< hashPointerFile key
+ return True
+
+startOld :: FilePath -> Key -> CommandStart
+startOld file key =
ifM (inAnnex key)
( ifM (isJust <$> catKeyFileHEAD file)
- ( next $ perform file key
+ ( next $ performOld file key
, do
warning "this has not yet been committed to git; cannot unlock it"
next $ next $ return False
@@ -43,8 +80,8 @@ start file key = do
next $ next $ return False
)
-perform :: FilePath -> Key -> CommandPerform
-perform dest key = ifM (checkDiskSpace Nothing key 0 True)
+performOld :: FilePath -> Key -> CommandPerform
+performOld dest key = ifM (checkDiskSpace Nothing key 0 True)
( do
src <- calcRepo $ gitAnnexLocation key
tmpdest <- fromRepo $ gitAnnexTmpObjectLocation key
diff --git a/Command/Unused.hs b/Command/Unused.hs
index 4756cda5d..4353bd075 100644
--- a/Command/Unused.hs
+++ b/Command/Unused.hs
@@ -24,7 +24,6 @@ import qualified Git.Branch
import qualified Git.RefLog
import qualified Git.LsFiles as LsFiles
import qualified Git.DiffTree as DiffTree
-import qualified Backend
import qualified Remote
import qualified Annex.Branch
import Annex.CatFile
@@ -215,7 +214,7 @@ withKeysReferenced' mdir initial a = do
Just dir -> inRepo $ LsFiles.inRepo [dir]
go v [] = return v
go v (f:fs) = do
- x <- Backend.lookupFile f
+ x <- lookupFile f
case x of
Nothing -> go v fs
Just k -> do
@@ -266,7 +265,7 @@ withKeysReferencedInGitRef a ref = do
forM_ ts $ tKey lookAtWorkingTree >=> maybe noop a
liftIO $ void clean
where
- tKey True = Backend.lookupFile . getTopFilePath . DiffTree.file
+ tKey True = lookupFile . getTopFilePath . DiffTree.file
tKey False = fileKey . takeFileName . decodeBS <$$>
catFile ref . getTopFilePath . DiffTree.file
diff --git a/Command/Upgrade.hs b/Command/Upgrade.hs
index c02a6709f..8a34022e3 100644
--- a/Command/Upgrade.hs
+++ b/Command/Upgrade.hs
@@ -13,6 +13,7 @@ import Upgrade
cmd :: Command
cmd = dontCheck repoExists $ -- because an old version may not seem to exist
+ noDaemonRunning $ -- avoid upgrading repo out from under daemon
command "upgrade" SectionMaintenance "upgrade repository layout"
paramNothing (withParams seek)
diff --git a/Command/Version.hs b/Command/Version.hs
index 72bbe4064..c5a9fcef2 100644
--- a/Command/Version.hs
+++ b/Command/Version.hs
@@ -50,7 +50,8 @@ showVersion = do
liftIO $ do
showPackageVersion
vinfo "local repository version" $ fromMaybe "unknown" v
- vinfo "supported repository version" supportedVersion
+ vinfo "supported repository versions" $
+ unwords supportedVersions
vinfo "upgrade supported from repository versions" $
unwords upgradableVersions
diff --git a/Config.hs b/Config.hs
index 4af4f1284..f3833b17e 100644
--- a/Config.hs
+++ b/Config.hs
@@ -90,3 +90,21 @@ setCrippledFileSystem :: Bool -> Annex ()
setCrippledFileSystem b = do
setConfig (annexConfig "crippledfilesystem") (Git.Config.boolConfig b)
Annex.changeGitConfig $ \c -> c { annexCrippledFileSystem = b }
+
+configureSmudgeFilter :: Annex ()
+configureSmudgeFilter = do
+ setConfig (ConfigKey "filter.annex.smudge") "git-annex smudge %f"
+ setConfig (ConfigKey "filter.annex.clean") "git-annex smudge --clean %f"
+ lf <- Annex.fromRepo Git.attributesLocal
+ gf <- Annex.fromRepo Git.attributes
+ lfs <- readattr lf
+ gfs <- readattr gf
+ liftIO $ unless ("filter=annex" `isInfixOf` (lfs ++ gfs)) $ do
+ createDirectoryIfMissing True (takeDirectory lf)
+ writeFile lf (lfs ++ "\n" ++ stdattr)
+ where
+ readattr = liftIO . catchDefaultIO "" . readFileStrictAnyEncoding
+ stdattr = unlines
+ [ "* filter=annex"
+ , ".* !filter"
+ ]
diff --git a/Database/Fsck.hs b/Database/Fsck.hs
index ed00e62d8..e7ece34ed 100644
--- a/Database/Fsck.hs
+++ b/Database/Fsck.hs
@@ -21,7 +21,7 @@ module Database.Fsck (
) where
import Database.Types
-import qualified Database.Handle as H
+import qualified Database.Queue as H
import Locations
import Utility.PosixFiles
import Utility.Exception
@@ -31,13 +31,12 @@ import Types.Key
import Types.UUID
import Annex.Perms
import Annex.LockFile
-import Messages
import Database.Persist.TH
import Database.Esqueleto hiding (Key)
import Data.Time.Clock
-data FsckHandle = FsckHandle H.DbHandle UUID
+data FsckHandle = FsckHandle H.DbQueue UUID
{- Each key stored in the database has already been fscked as part
- of the latest incremental fsck pass. -}
@@ -59,7 +58,7 @@ newPass u = isJust <$> tryExclusiveLock (gitAnnexFsckDbLock u) go
go = liftIO . void . tryIO . removeDirectoryRecursive
=<< fromRepo (gitAnnexFsckDbDir u)
-{- Opens the database, creating it atomically if it doesn't exist yet. -}
+{- Opens the database, creating it if it doesn't exist yet. -}
openDb :: UUID -> Annex FsckHandle
openDb u = do
dbdir <- fromRepo (gitAnnexFsckDbDir u)
@@ -77,16 +76,12 @@ openDb u = do
void $ tryIO $ removeDirectoryRecursive dbdir
rename tmpdbdir dbdir
lockFileCached =<< fromRepo (gitAnnexFsckDbLock u)
- h <- liftIO $ H.openDb db "fscked"
-
- -- work around https://github.com/yesodweb/persistent/issues/474
- liftIO setConsoleEncoding
-
+ h <- liftIO $ H.openDbQueue db "fscked"
return $ FsckHandle h u
closeDb :: FsckHandle -> Annex ()
closeDb (FsckHandle h u) = do
- liftIO $ H.closeDb h
+ liftIO $ H.closeDbQueue h
unlockFile =<< fromRepo (gitAnnexFsckDbLock u)
addDb :: FsckHandle -> Key -> IO ()
@@ -102,8 +97,9 @@ addDb (FsckHandle h _) k = H.queueDb h checkcommit $
now <- getCurrentTime
return $ diffUTCTime lastcommittime now > 300
+{- Doesn't know about keys that were just added with addDb. -}
inDb :: FsckHandle -> Key -> IO Bool
-inDb (FsckHandle h _) = H.queryDb h . inDb' . toSKey
+inDb (FsckHandle h _) = H.queryDbQueue h . inDb' . toSKey
inDb' :: SKey -> SqlPersistM Bool
inDb' sk = do
diff --git a/Database/Handle.hs b/Database/Handle.hs
index 439e7c18b..748feaa97 100644
--- a/Database/Handle.hs
+++ b/Database/Handle.hs
@@ -11,17 +11,15 @@ module Database.Handle (
DbHandle,
initDb,
openDb,
+ TableName,
queryDb,
closeDb,
- Size,
- queueDb,
- flushQueueDb,
commitDb,
+ commitDb',
) where
import Utility.Exception
-import Utility.Monad
-import Messages
+import Utility.FileSystemEncoding
import Database.Persist.Sqlite
import qualified Database.Sqlite as Sqlite
@@ -29,22 +27,22 @@ import Control.Monad
import Control.Monad.IO.Class (liftIO)
import Control.Concurrent
import Control.Concurrent.Async
-import Control.Exception (throwIO)
+import Control.Exception (throwIO, BlockedIndefinitelyOnMVar(..))
import qualified Data.Text as T
import Control.Monad.Trans.Resource (runResourceT)
import Control.Monad.Logger (runNoLoggingT)
import Data.List
-import Data.Time.Clock
+import System.IO
{- A DbHandle is a reference to a worker thread that communicates with
- the database. It has a MVar which Jobs are submitted to. -}
-data DbHandle = DbHandle (Async ()) (MVar Job) (MVar DbQueue)
+data DbHandle = DbHandle (Async ()) (MVar Job)
{- Ensures that the database is initialized. Pass the migration action for
- the database.
-
- - The database is put into WAL mode, to prevent readers from blocking
- - writers, and prevent a writer from blocking readers.
+ - The database is initialized using WAL mode, to prevent readers
+ - from blocking writers, and prevent a writer from blocking readers.
-}
initDb :: FilePath -> SqlPersistM () -> IO ()
initDb f migration = do
@@ -60,67 +58,27 @@ enableWAL db = do
void $ Sqlite.finalize stmt
Sqlite.close conn
+{- Name of a table that should exist once the database is initialized. -}
+type TableName = String
+
{- Opens the database, but does not perform any migrations. Only use
- if the database is known to exist and have the right tables. -}
openDb :: FilePath -> TableName -> IO DbHandle
openDb db tablename = do
jobs <- newEmptyMVar
worker <- async (workerThread (T.pack db) tablename jobs)
- q <- newMVar =<< emptyDbQueue
- return $ DbHandle worker jobs q
-
-data Job
- = QueryJob (SqlPersistM ())
- | ChangeJob ((SqlPersistM () -> IO ()) -> IO ())
- | CloseJob
-
-type TableName = String
-
-workerThread :: T.Text -> TableName -> MVar Job -> IO ()
-workerThread db tablename jobs = catchNonAsync (run loop) showerr
- where
- showerr e = liftIO $ warningIO $
- "sqlite worker thread crashed: " ++ show e
-
- loop = do
- job <- liftIO $ takeMVar jobs
- case job of
- QueryJob a -> a >> loop
- -- change is run in a separate database connection
- -- since sqlite only supports a single writer at a
- -- time, and it may crash the database connection
- ChangeJob a -> liftIO (a run) >> loop
- CloseJob -> return ()
- -- like runSqlite, but calls settle on the raw sql Connection.
- run a = do
- conn <- Sqlite.open db
- settle conn
- runResourceT $ runNoLoggingT $
- withSqlConn (wrapConnection conn) $
- runSqlConn a
+ -- work around https://github.com/yesodweb/persistent/issues/474
+ liftIO setConsoleEncoding
- -- Work around a bug in sqlite: New database connections can
- -- sometimes take a while to become usable; select statements will
- -- fail with ErrorBusy for some time. So, loop until a select
- -- succeeds; once one succeeds the connection will stay usable.
- -- <http://thread.gmane.org/gmane.comp.db.sqlite.general/93116>
- settle conn = do
- r <- tryNonAsync $ do
- stmt <- Sqlite.prepare conn nullselect
- void $ Sqlite.step stmt
- void $ Sqlite.finalize stmt
- case r of
- Right _ -> return ()
- Left e -> do
- if "ErrorBusy" `isInfixOf` show e
- then do
- threadDelay 1000 -- 1/1000th second
- settle conn
- else throwIO e
-
- -- This should succeed for any table.
- nullselect = T.pack $ "SELECT null from " ++ tablename ++ " limit 1"
+ return $ DbHandle worker jobs
+
+{- This is optional; when the DbHandle gets garbage collected it will
+ - auto-close. -}
+closeDb :: DbHandle -> IO ()
+closeDb (DbHandle worker jobs) = do
+ putMVar jobs CloseJob
+ wait worker
{- Makes a query using the DbHandle. This should not be used to make
- changes to the database!
@@ -133,71 +91,21 @@ workerThread db tablename jobs = catchNonAsync (run loop) showerr
- it is able to run.
-}
queryDb :: DbHandle -> SqlPersistM a -> IO a
-queryDb (DbHandle _ jobs _) a = do
+queryDb (DbHandle _ jobs) a = do
res <- newEmptyMVar
putMVar jobs $ QueryJob $
liftIO . putMVar res =<< tryNonAsync a
(either throwIO return =<< takeMVar res)
`catchNonAsync` (const $ error "sqlite query crashed")
-closeDb :: DbHandle -> IO ()
-closeDb h@(DbHandle worker jobs _) = do
- flushQueueDb h
- putMVar jobs CloseJob
- wait worker
-
-type Size = Int
-
-type LastCommitTime = UTCTime
-
-{- A queue of actions to perform, with a count of the number of actions
- - queued, and a last commit time. -}
-data DbQueue = DbQueue Size LastCommitTime (SqlPersistM ())
-
-emptyDbQueue :: IO DbQueue
-emptyDbQueue = do
- now <- getCurrentTime
- return $ DbQueue 0 now (return ())
-
-{- Queues a change to be made to the database. It will be buffered
- - to be committed later, unless the commitchecker action returns true.
- -
- - (Be sure to call closeDb or flushQueueDb to ensure the change
- - gets committed.)
+{- Writes a change to the database.
-
- - Transactions built up by queueDb are sent to sqlite all at once.
- - If sqlite fails due to another change being made concurrently by another
- - process, the transaction is put back in the queue. This solves
- - the sqlite multiple writer problem.
+ - If a database is opened multiple times and there's a concurrent writer,
+ - the write could fail. Retries repeatedly for up to 10 seconds,
+ - which should avoid all but the most exceptional problems.
-}
-queueDb
- :: DbHandle
- -> (Size -> LastCommitTime -> IO Bool)
- -> SqlPersistM ()
- -> IO ()
-queueDb h@(DbHandle _ _ qvar) commitchecker a = do
- DbQueue sz lastcommittime qa <- takeMVar qvar
- let !sz' = sz + 1
- let qa' = qa >> a
- let enqueue = putMVar qvar
- ifM (commitchecker sz' lastcommittime)
- ( do
- r <- commitDb h qa'
- case r of
- Left _ -> enqueue $ DbQueue sz' lastcommittime qa'
- Right _ -> do
- now <- getCurrentTime
- enqueue $ DbQueue 0 now (return ())
- , enqueue $ DbQueue sz' lastcommittime qa'
- )
-
-{- If flushing the queue fails, this could be because there is another
- - writer to the database. Retry repeatedly for up to 10 seconds. -}
-flushQueueDb :: DbHandle -> IO ()
-flushQueueDb h@(DbHandle _ _ qvar) = do
- DbQueue sz _ qa <- takeMVar qvar
- when (sz > 0) $
- robustly Nothing 100 (commitDb h qa)
+commitDb :: DbHandle -> SqlPersistM () -> IO ()
+commitDb h wa = robustly Nothing 100 (commitDb' h wa)
where
robustly :: Maybe SomeException -> Int -> IO (Either SomeException ()) -> IO ()
robustly e 0 _ = error $ "failed to commit changes to sqlite database: " ++ show e
@@ -209,9 +117,69 @@ flushQueueDb h@(DbHandle _ _ qvar) = do
threadDelay 100000 -- 1/10th second
robustly (Just e) (n-1) a
-commitDb :: DbHandle -> SqlPersistM () -> IO (Either SomeException ())
-commitDb (DbHandle _ jobs _) a = do
+commitDb' :: DbHandle -> SqlPersistM () -> IO (Either SomeException ())
+commitDb' (DbHandle _ jobs) a = do
res <- newEmptyMVar
putMVar jobs $ ChangeJob $ \runner ->
liftIO $ putMVar res =<< tryNonAsync (runner a)
takeMVar res
+
+data Job
+ = QueryJob (SqlPersistM ())
+ | ChangeJob ((SqlPersistM () -> IO ()) -> IO ())
+ | CloseJob
+
+workerThread :: T.Text -> TableName -> MVar Job -> IO ()
+workerThread db tablename jobs =
+ catchNonAsync (runSqliteRobustly tablename db loop) showerr
+ where
+ showerr e = hPutStrLn stderr $
+ "sqlite worker thread crashed: " ++ show e
+
+ getjob :: IO (Either BlockedIndefinitelyOnMVar Job)
+ getjob = try $ takeMVar jobs
+
+ loop = do
+ job <- liftIO getjob
+ case job of
+ -- Exception is thrown when the MVar is garbage
+ -- collected, which means the whole DbHandle
+ -- is not used any longer. Shutdown cleanly.
+ Left BlockedIndefinitelyOnMVar -> return ()
+ Right CloseJob -> return ()
+ Right (QueryJob a) -> a >> loop
+ -- change is run in a separate database connection
+ -- since sqlite only supports a single writer at a
+ -- time, and it may crash the database connection
+ Right (ChangeJob a) -> liftIO (a (runSqliteRobustly tablename db)) >> loop
+
+-- like runSqlite, but calls settle on the raw sql Connection.
+runSqliteRobustly :: TableName -> T.Text -> (SqlPersistM a) -> IO a
+runSqliteRobustly tablename db a = do
+ conn <- Sqlite.open db
+ settle conn
+ runResourceT $ runNoLoggingT $
+ withSqlConn (wrapConnection conn) $
+ runSqlConn a
+ where
+ -- Work around a bug in sqlite: New database connections can
+ -- sometimes take a while to become usable; select statements will
+ -- fail with ErrorBusy for some time. So, loop until a select
+ -- succeeds; once one succeeds the connection will stay usable.
+ -- <http://thread.gmane.org/gmane.comp.db.sqlite.general/93116>
+ settle conn = do
+ r <- tryNonAsync $ do
+ stmt <- Sqlite.prepare conn nullselect
+ void $ Sqlite.step stmt
+ void $ Sqlite.finalize stmt
+ case r of
+ Right _ -> return ()
+ Left e -> do
+ if "ErrorBusy" `isInfixOf` show e
+ then do
+ threadDelay 1000 -- 1/1000th second
+ settle conn
+ else throwIO e
+
+ -- This should succeed for any table.
+ nullselect = T.pack $ "SELECT null from " ++ tablename ++ " limit 1"
diff --git a/Database/Keys.hs b/Database/Keys.hs
new file mode 100644
index 000000000..f5a28c704
--- /dev/null
+++ b/Database/Keys.hs
@@ -0,0 +1,237 @@
+{- Sqlite database of information about Keys
+ -
+ - Copyright 2015 Joey Hess <id@joeyh.name>
+ -:
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+{-# LANGUAGE QuasiQuotes, TypeFamilies, TemplateHaskell #-}
+{-# LANGUAGE OverloadedStrings, GADTs, FlexibleContexts #-}
+{-# LANGUAGE MultiParamTypeClasses, GeneralizedNewtypeDeriving #-}
+{-# LANGUAGE RankNTypes #-}
+
+module Database.Keys (
+ DbHandle,
+ addAssociatedFile,
+ getAssociatedFiles,
+ getAssociatedKey,
+ removeAssociatedFile,
+ storeInodeCaches,
+ addInodeCaches,
+ getInodeCaches,
+ removeInodeCaches,
+ AssociatedId,
+ ContentId,
+) where
+
+import Database.Types
+import Database.Keys.Handle
+import qualified Database.Queue as H
+import Locations
+import Common hiding (delete)
+import Annex
+import Types.Key
+import Annex.Perms
+import Annex.LockFile
+import Utility.InodeCache
+import Annex.InodeSentinal
+
+import Database.Persist.TH
+import Database.Esqueleto hiding (Key)
+import Data.Time.Clock
+
+share [mkPersist sqlSettings, mkMigrate "migrateKeysDb"] [persistLowerCase|
+Associated
+ key SKey
+ file FilePath
+ KeyFileIndex key file
+Content
+ key SKey
+ cache SInodeCache
+ KeyCacheIndex key cache
+|]
+
+newtype ReadHandle = ReadHandle H.DbQueue
+
+type Reader v = ReadHandle -> Annex v
+
+{- Runs an action that reads from the database.
+ -
+ - If the database doesn't already exist, it's not created; mempty is
+ - returned instead. This way, when the keys database is not in use,
+ - there's minimal overhead in checking it.
+ -
+ - If the database is already open, any writes are flushed to it, to ensure
+ - consistency.
+ -
+ - Any queued writes will be flushed before the read.
+ -}
+runReader :: Monoid v => Reader v -> Annex v
+runReader a = do
+ h <- getDbHandle
+ withDbState h go
+ where
+ go DbEmpty = return (mempty, DbEmpty)
+ go st@(DbOpen qh) = do
+ liftIO $ H.flushDbQueue qh
+ v <- a (ReadHandle qh)
+ return (v, st)
+ go DbClosed = do
+ st' <- openDb False DbClosed
+ v <- case st' of
+ (DbOpen qh) -> a (ReadHandle qh)
+ _ -> return mempty
+ return (v, st')
+
+readDb :: SqlPersistM a -> ReadHandle -> Annex a
+readDb a (ReadHandle h) = liftIO $ H.queryDbQueue h a
+
+newtype WriteHandle = WriteHandle H.DbQueue
+
+type Writer = WriteHandle -> Annex ()
+
+{- Runs an action that writes to the database. Typically this is used to
+ - queue changes, which will be flushed at a later point.
+ -
+ - The database is created if it doesn't exist yet. -}
+runWriter :: Writer -> Annex ()
+runWriter a = do
+ h <- getDbHandle
+ withDbState h go
+ where
+ go st@(DbOpen qh) = do
+ v <- a (WriteHandle qh)
+ return (v, st)
+ go st = do
+ st' <- openDb True st
+ v <- case st' of
+ DbOpen qh -> a (WriteHandle qh)
+ _ -> error "internal"
+ return (v, st')
+
+queueDb :: SqlPersistM () -> WriteHandle -> Annex ()
+queueDb a (WriteHandle h) = liftIO $ H.queueDb h checkcommit a
+ where
+ -- commit queue after 1000 changes or 5 minutes, whichever comes first
+ checkcommit sz lastcommittime
+ | sz > 1000 = return True
+ | otherwise = do
+ now <- getCurrentTime
+ return $ diffUTCTime lastcommittime now > 300
+
+{- Gets the handle cached in Annex state; creates a new one if it's not yet
+ - available, but doesn't open the database. -}
+getDbHandle :: Annex DbHandle
+getDbHandle = go =<< getState keysdbhandle
+ where
+ go (Just h) = pure h
+ go Nothing = do
+ h <- liftIO newDbHandle
+ changeState $ \s -> s { keysdbhandle = Just h }
+ return h
+
+{- Opens the database, perhaps creating it if it doesn't exist yet.
+ -
+ - Multiple readers and writers can have the database open at the same
+ - time. Database.Handle deals with the concurrency issues.
+ - The lock is held while opening the database, so that when
+ - the database doesn't exist yet, one caller wins the lock and
+ - can create it undisturbed.
+ -}
+openDb :: Bool -> DbState -> Annex DbState
+openDb _ st@(DbOpen _) = return st
+openDb False DbEmpty = return DbEmpty
+openDb createdb _ = withExclusiveLock gitAnnexKeysDbLock $ do
+ dbdir <- fromRepo gitAnnexKeysDb
+ let db = dbdir </> "db"
+ dbexists <- liftIO $ doesFileExist db
+ case (dbexists, createdb) of
+ (True, _) -> open db
+ (False, True) -> do
+ liftIO $ do
+ createDirectoryIfMissing True dbdir
+ H.initDb db $ void $
+ runMigrationSilent migrateKeysDb
+ setAnnexDirPerm dbdir
+ setAnnexFilePerm db
+ open db
+ (False, False) -> return DbEmpty
+ where
+ open db = liftIO $ DbOpen <$> H.openDbQueue db "content"
+
+addAssociatedFile :: Key -> FilePath -> Annex ()
+addAssociatedFile k f = runWriter $ addAssociatedFile' k f
+
+addAssociatedFile' :: Key -> FilePath -> Writer
+addAssociatedFile' k f = queueDb $ do
+ -- If the same file was associated with a different key before,
+ -- remove that.
+ delete $ from $ \r -> do
+ where_ (r ^. AssociatedFile ==. val f &&. r ^. AssociatedKey ==. val sk)
+ void $ insertUnique $ Associated sk f
+ where
+ sk = toSKey k
+
+{- Note that the files returned were once associated with the key, but
+ - some of them may not be any longer. -}
+getAssociatedFiles :: Key -> Annex [FilePath]
+getAssociatedFiles = runReader . getAssociatedFiles' . toSKey
+
+getAssociatedFiles' :: SKey -> Reader [FilePath]
+getAssociatedFiles' sk = readDb $ do
+ l <- select $ from $ \r -> do
+ where_ (r ^. AssociatedKey ==. val sk)
+ return (r ^. AssociatedFile)
+ return $ map unValue l
+
+{- Gets any keys that are on record as having a particular associated file.
+ - (Should be one or none but the database doesn't enforce that.) -}
+getAssociatedKey :: FilePath -> Annex [Key]
+getAssociatedKey = runReader . getAssociatedKey'
+
+getAssociatedKey' :: FilePath -> Reader [Key]
+getAssociatedKey' f = readDb $ do
+ l <- select $ from $ \r -> do
+ where_ (r ^. AssociatedFile ==. val f)
+ return (r ^. AssociatedKey)
+ return $ map (fromSKey . unValue) l
+
+removeAssociatedFile :: Key -> FilePath -> Annex ()
+removeAssociatedFile k = runWriter . removeAssociatedFile' (toSKey k)
+
+removeAssociatedFile' :: SKey -> FilePath -> Writer
+removeAssociatedFile' sk f = queueDb $
+ delete $ from $ \r -> do
+ where_ (r ^. AssociatedKey ==. val sk &&. r ^. AssociatedFile ==. val f)
+
+{- Stats the files, and stores their InodeCaches. -}
+storeInodeCaches :: Key -> [FilePath] -> Annex ()
+storeInodeCaches k fs = withTSDelta $ \d ->
+ addInodeCaches k . catMaybes =<< liftIO (mapM (`genInodeCache` d) fs)
+
+addInodeCaches :: Key -> [InodeCache] -> Annex ()
+addInodeCaches k is = runWriter $ addInodeCaches' (toSKey k) is
+
+addInodeCaches' :: SKey -> [InodeCache] -> Writer
+addInodeCaches' sk is = queueDb $
+ forM_ is $ \i -> insertUnique $ Content sk (toSInodeCache i)
+
+{- A key may have multiple InodeCaches; one for the annex object, and one
+ - for each pointer file that is a copy of it. -}
+getInodeCaches :: Key -> Annex [InodeCache]
+getInodeCaches = runReader . getInodeCaches' . toSKey
+
+getInodeCaches' :: SKey -> Reader [InodeCache]
+getInodeCaches' sk = readDb $ do
+ l <- select $ from $ \r -> do
+ where_ (r ^. ContentKey ==. val sk)
+ return (r ^. ContentCache)
+ return $ map (fromSInodeCache . unValue) l
+
+removeInodeCaches :: Key -> Annex ()
+removeInodeCaches = runWriter . removeInodeCaches' . toSKey
+
+removeInodeCaches' :: SKey -> Writer
+removeInodeCaches' sk = queueDb $
+ delete $ from $ \r -> do
+ where_ (r ^. ContentKey ==. val sk)
diff --git a/Database/Keys/Handle.hs b/Database/Keys/Handle.hs
new file mode 100644
index 000000000..5a5912b0b
--- /dev/null
+++ b/Database/Keys/Handle.hs
@@ -0,0 +1,55 @@
+{- Handle for the Keys database.
+ -
+ - Copyright 2015 Joey Hess <id@joeyh.name>
+ -:
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+module Database.Keys.Handle (
+ DbHandle,
+ newDbHandle,
+ DbState(..),
+ withDbState,
+ flushDbQueue,
+) where
+
+import qualified Database.Queue as H
+import Utility.Exception
+
+import Control.Concurrent
+import Control.Monad.IO.Class (liftIO, MonadIO)
+
+-- The MVar is always left full except when actions are run
+-- that access the database.
+newtype DbHandle = DbHandle (MVar DbState)
+
+-- The database can be closed or open, but it also may have been
+-- tried to open (for read) and didn't exist yet.
+data DbState = DbClosed | DbOpen H.DbQueue | DbEmpty
+
+newDbHandle :: IO DbHandle
+newDbHandle = DbHandle <$> newMVar DbClosed
+
+-- Runs an action on the state of the handle, which can change its state.
+-- The MVar is empty while the action runs, which blocks other users
+-- of the handle from running.
+withDbState
+ :: (MonadIO m, MonadCatch m)
+ => DbHandle
+ -> (DbState
+ -> m (v, DbState))
+ -> m v
+withDbState (DbHandle mvar) a = do
+ st <- liftIO $ takeMVar mvar
+ go st `onException` (liftIO $ putMVar mvar st)
+ where
+ go st = do
+ (v, st') <- a st
+ liftIO $ putMVar mvar st'
+ return v
+
+flushDbQueue :: DbHandle -> IO ()
+flushDbQueue (DbHandle mvar) = go =<< readMVar mvar
+ where
+ go (DbOpen qh) = H.flushDbQueue qh
+ go _ = return ()
diff --git a/Database/Queue.hs b/Database/Queue.hs
new file mode 100644
index 000000000..99fbacb9b
--- /dev/null
+++ b/Database/Queue.hs
@@ -0,0 +1,107 @@
+{- Persistent sqlite database queues
+ -
+ - Copyright 2015 Joey Hess <id@joeyh.name>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+{-# LANGUAGE BangPatterns #-}
+
+module Database.Queue (
+ DbQueue,
+ initDb,
+ openDbQueue,
+ queryDbQueue,
+ closeDbQueue,
+ flushDbQueue,
+ QueueSize,
+ queueDb,
+) where
+
+import Utility.Monad
+import Database.Handle
+
+import Database.Persist.Sqlite
+import Control.Concurrent
+import Data.Time.Clock
+
+{- A DbQueue wraps a DbHandle, adding a queue of writes to perform.
+ -
+ - This is efficient when there are frequent writes, but
+ - reads will not immediately have access to queued writes. -}
+data DbQueue = DQ DbHandle (MVar Queue)
+
+{- Opens the database queue, but does not perform any migrations. Only use
+ - if the database is known to exist and have the right tables; ie after
+ - running initDb. -}
+openDbQueue :: FilePath -> TableName -> IO DbQueue
+openDbQueue db tablename = DQ
+ <$> openDb db tablename
+ <*> (newMVar =<< emptyQueue)
+
+{- This or flushDbQueue must be called, eg at program exit to ensure
+ - queued changes get written to the database. -}
+closeDbQueue :: DbQueue -> IO ()
+closeDbQueue h@(DQ hdl _) = do
+ flushDbQueue h
+ closeDb hdl
+
+{- Blocks until all queued changes have been written to the database. -}
+flushDbQueue :: DbQueue -> IO ()
+flushDbQueue (DQ hdl qvar) = do
+ q@(Queue sz _ qa) <- takeMVar qvar
+ if sz > 0
+ then do
+ commitDb hdl qa
+ putMVar qvar =<< emptyQueue
+ else putMVar qvar q
+
+{- Makes a query using the DbQueue's database connection.
+ - This should not be used to make changes to the database!
+ -
+ - Queries will not return changes that have been recently queued,
+ - so use with care.
+ -}
+queryDbQueue :: DbQueue -> SqlPersistM a -> IO a
+queryDbQueue (DQ hdl _) = queryDb hdl
+
+{- A queue of actions to perform, with a count of the number of actions
+ - queued, and a last commit time. -}
+data Queue = Queue QueueSize LastCommitTime (SqlPersistM ())
+
+type QueueSize = Int
+
+type LastCommitTime = UTCTime
+
+emptyQueue :: IO Queue
+emptyQueue = do
+ now <- getCurrentTime
+ return $ Queue 0 now (return ())
+
+{- Queues a change to be made to the database. It will be queued
+ - to be committed later, unless the commitchecker action returns true,
+ - in which case any previously queued changes are also committed.
+ -
+ - Transactions built up by queueDb are sent to sqlite all at once.
+ - If sqlite fails due to another change being made concurrently by another
+ - process, the transaction is put back in the queue. This avoids
+ - the sqlite multiple writer problem.
+ -}
+queueDb
+ :: DbQueue
+ -> (QueueSize -> LastCommitTime -> IO Bool)
+ -> SqlPersistM ()
+ -> IO ()
+queueDb (DQ hdl qvar) commitchecker a = do
+ Queue sz lastcommittime qa <- takeMVar qvar
+ let !sz' = sz + 1
+ let qa' = qa >> a
+ let enqueue = putMVar qvar
+ ifM (commitchecker sz' lastcommittime)
+ ( do
+ r <- commitDb' hdl qa'
+ case r of
+ Left _ -> enqueue $ Queue sz' lastcommittime qa'
+ Right _ -> enqueue =<< emptyQueue
+ , enqueue $ Queue sz' lastcommittime qa'
+ )
diff --git a/Database/Types.hs b/Database/Types.hs
index dee56832b..1476a693a 100644
--- a/Database/Types.hs
+++ b/Database/Types.hs
@@ -13,6 +13,7 @@ import Database.Persist.TH
import Data.Maybe
import Types.Key
+import Utility.InodeCache
-- A serialized Key
newtype SKey = SKey String
@@ -22,6 +23,18 @@ toSKey :: Key -> SKey
toSKey = SKey . key2file
fromSKey :: SKey -> Key
-fromSKey (SKey s) = fromMaybe (error $ "bad serialied key " ++ s) (file2key s)
+fromSKey (SKey s) = fromMaybe (error $ "bad serialied Key " ++ s) (file2key s)
derivePersistField "SKey"
+
+-- A serialized InodeCache
+newtype SInodeCache = I String
+ deriving (Show, Read)
+
+toSInodeCache :: InodeCache -> SInodeCache
+toSInodeCache = I . showInodeCache
+
+fromSInodeCache :: SInodeCache -> InodeCache
+fromSInodeCache (I s) = fromMaybe (error $ "bad serialied InodeCache " ++ s) (readInodeCache s)
+
+derivePersistField "SInodeCache"
diff --git a/Git.hs b/Git.hs
index 1bc789f85..6f7769c87 100644
--- a/Git.hs
+++ b/Git.hs
@@ -28,6 +28,7 @@ module Git (
repoPath,
localGitDir,
attributes,
+ attributesLocal,
hookPath,
assertLocal,
adjustPath,
@@ -125,8 +126,11 @@ assertLocal repo action
{- Path to a repository's gitattributes file. -}
attributes :: Repo -> FilePath
attributes repo
- | repoIsLocalBare repo = repoPath repo ++ "/info/.gitattributes"
- | otherwise = repoPath repo ++ "/.gitattributes"
+ | repoIsLocalBare repo = attributesLocal repo
+ | otherwise = repoPath repo </> ".gitattributes"
+
+attributesLocal :: Repo -> FilePath
+attributesLocal repo = localGitDir repo </> "info" </> "attributes"
{- Path to a given hook script in a repository, only if the hook exists
- and is executable. -}
diff --git a/Limit.hs b/Limit.hs
index 6930ab06d..437c65bc3 100644
--- a/Limit.hs
+++ b/Limit.hs
@@ -11,8 +11,8 @@ import Common.Annex
import qualified Annex
import qualified Utility.Matcher
import qualified Remote
-import qualified Backend
import Annex.Content
+import Annex.WorkTree
import Annex.Action
import Annex.UUID
import Logs.Trust
@@ -201,22 +201,22 @@ limitAnything _ _ = return True
{- Adds a limit to skip files not believed to be present in all
- repositories in the specified group. -}
addInAllGroup :: String -> Annex ()
-addInAllGroup groupname = do
- m <- groupMap
- addLimit $ limitInAllGroup m groupname
-
-limitInAllGroup :: GroupMap -> MkLimit Annex
-limitInAllGroup m groupname
- | S.null want = Right $ const $ const $ return True
- | otherwise = Right $ \notpresent -> checkKey $ check notpresent
- where
- want = fromMaybe S.empty $ M.lookup groupname $ uuidsByGroup m
- check notpresent key
+addInAllGroup groupname = addLimit $ limitInAllGroup groupMap groupname
+
+limitInAllGroup :: Annex GroupMap -> MkLimit Annex
+limitInAllGroup getgroupmap groupname = Right $ \notpresent mi -> do
+ m <- getgroupmap
+ let want = fromMaybe S.empty $ M.lookup groupname $ uuidsByGroup m
+ if S.null want
+ then return True
-- optimisation: Check if a wanted uuid is notpresent.
- | not (S.null (S.intersection want notpresent)) = return False
- | otherwise = do
- present <- S.fromList <$> Remote.keyLocations key
- return $ S.null $ want `S.difference` present
+ else if not (S.null (S.intersection want notpresent))
+ then return False
+ else checkKey (check want) mi
+ where
+ check want key = do
+ present <- S.fromList <$> Remote.keyLocations key
+ return $ S.null $ want `S.difference` present
{- Adds a limit to skip files not using a specified key-value backend. -}
addInBackend :: String -> Annex ()
@@ -277,7 +277,7 @@ addTimeLimit s = do
else return True
lookupFileKey :: FileInfo -> Annex (Maybe Key)
-lookupFileKey = Backend.lookupFile . currFile
+lookupFileKey = lookupFile . currFile
checkKey :: (Key -> Annex Bool) -> MatchInfo -> Annex Bool
checkKey a (MatchingFile fi) = lookupFileKey fi >>= maybe (return False) a
diff --git a/Locations.hs b/Locations.hs
index ba6115155..200297321 100644
--- a/Locations.hs
+++ b/Locations.hs
@@ -29,6 +29,8 @@ module Locations (
gitAnnexBadDir,
gitAnnexBadLocation,
gitAnnexUnusedLog,
+ gitAnnexKeysDb,
+ gitAnnexKeysDbLock,
gitAnnexFsckState,
gitAnnexFsckDbDir,
gitAnnexFsckDbLock,
@@ -237,6 +239,14 @@ gitAnnexBadLocation key r = gitAnnexBadDir r </> keyFile key
gitAnnexUnusedLog :: FilePath -> Git.Repo -> FilePath
gitAnnexUnusedLog prefix r = gitAnnexDir r </> (prefix ++ "unused")
+{- .git/annex/keys/ contains a database of information about keys. -}
+gitAnnexKeysDb :: Git.Repo -> FilePath
+gitAnnexKeysDb r = gitAnnexDir r </> "keys"
+
+{- Lock file for the keys database. -}
+gitAnnexKeysDbLock :: Git.Repo -> FilePath
+gitAnnexKeysDbLock r = gitAnnexKeysDb r ++ "lck"
+
{- .git/annex/fsck/uuid/ is used to store information about incremental
- fscks. -}
gitAnnexFsckDir :: UUID -> Git.Repo -> FilePath
diff --git a/Logs/PreferredContent.hs b/Logs/PreferredContent.hs
index c21d67010..035c098f6 100644
--- a/Logs/PreferredContent.hs
+++ b/Logs/PreferredContent.hs
@@ -102,7 +102,7 @@ makeMatcher groupmap configmap groupwantedmap u = go True True
| null (lefts tokens) = generate $ rights tokens
| otherwise = unknownMatcher u
where
- tokens = exprParser matchstandard matchgroupwanted groupmap configmap (Just u) expr
+ tokens = exprParser matchstandard matchgroupwanted (pure groupmap) configmap (Just u) expr
matchstandard
| expandstandard = maybe (unknownMatcher u) (go False False)
(standardPreferredContent <$> getStandardGroup mygroups)
@@ -133,7 +133,7 @@ checkPreferredContentExpression expr = case parsedToMatcher tokens of
Left e -> Just e
Right _ -> Nothing
where
- tokens = exprParser matchAll matchAll emptyGroupMap M.empty Nothing expr
+ tokens = exprParser matchAll matchAll (pure emptyGroupMap) M.empty Nothing expr
{- Puts a UUID in a standard group, and sets its preferred content to use
- the standard expression for that group (unless preferred content is
diff --git a/Messages.hs b/Messages.hs
index a49f20711..b62e6d2a7 100644
--- a/Messages.hs
+++ b/Messages.hs
@@ -31,7 +31,6 @@ module Messages (
showHeader,
showRaw,
setupConsole,
- setConsoleEncoding,
enableDebugOutput,
disableDebugOutput,
debugEnabled,
@@ -183,13 +182,6 @@ setupConsole = do
updateGlobalLogger rootLoggerName (setLevel NOTICE . setHandlers [s])
setConsoleEncoding
-{- This avoids ghc's output layer crashing on invalid encoded characters in
- - filenames when printing them out. -}
-setConsoleEncoding :: IO ()
-setConsoleEncoding = do
- fileEncoding stdout
- fileEncoding stderr
-
{- Log formatter with precision into fractions of a second. -}
preciseLogFormatter :: LogFormatter a
preciseLogFormatter = tfLogFormatter "%F %X%Q" "[$time] $msg"
diff --git a/Remote/Git.hs b/Remote/Git.hs
index 890e40b51..6dc5345c9 100644
--- a/Remote/Git.hs
+++ b/Remote/Git.hs
@@ -672,7 +672,7 @@ wantHardLink :: Annex Bool
wantHardLink = (annexHardLink <$> Annex.getGitConfig) <&&> (not <$> isDirect)
-- Copies from src to dest, updating a meter. If the copy finishes
--- successfully, calls a final check action, which must also success, or
+-- successfully, calls a final check action, which must also succeed, or
-- returns false.
--
-- If either the remote or local repository wants to use hard links,
diff --git a/Test.hs b/Test.hs
index f4035f605..4656e0a2b 100644
--- a/Test.hs
+++ b/Test.hs
@@ -38,6 +38,7 @@ import Common
import qualified Utility.SafeCommand
import qualified Annex
import qualified Annex.UUID
+import qualified Annex.Version
import qualified Backend
import qualified Git.CurrentRepo
import qualified Git.Filename
@@ -65,6 +66,7 @@ import qualified Types.Messages
import qualified Config
import qualified Config.Cost
import qualified Crypto
+import qualified Annex.WorkTree
import qualified Annex.Init
import qualified Annex.CatFile
import qualified Annex.View
@@ -117,18 +119,17 @@ ingredients =
]
tests :: TestTree
-tests = testGroup "Tests"
- -- Test both direct and indirect mode.
- -- Windows is only going to use direct mode, so don't test twice.
- [ properties
+tests = testGroup "Tests" $ properties :
+ map (\(d, te) -> withTestMode te (unitTests d)) testmodes
+ where
+ testmodes =
+ [ ("v6", TestMode { forceDirect = False, annexVersion = "6" })
+ , ("v5", TestMode { forceDirect = False, annexVersion = "5" })
+ -- Windows will only use direct mode, so don't test twice.
#ifndef mingw32_HOST_OS
- , withTestEnv True $ unitTests "(direct)"
- , withTestEnv False $ unitTests "(indirect)"
-#else
- , withTestEnv False $ unitTests ""
+ , ("v5 direct", TestMode { forceDirect = True, annexVersion = "5" })
+ ]
#endif
- ]
-
properties :: TestTree
properties = localOption (QuickCheckTests 1000) $ testGroup "QuickCheck"
@@ -242,8 +243,11 @@ unitTests note = testGroup ("Unit Tests " ++ note)
-- this test case create the main repo
test_init :: Assertion
test_init = innewrepo $ do
- git_annex "init" [reponame] @? "init failed"
- handleforcedirect
+ ver <- annexVersion <$> getTestMode
+ if ver == Annex.Version.defaultVersion
+ then git_annex "init" [reponame] @? "init failed"
+ else git_annex "init" [reponame, "--version", ver] @? "init failed"
+ setupTestMode
where
reponame = "test repo"
@@ -294,7 +298,6 @@ test_shared_clone = intmpsharedclonerepo $ do
, "--get"
, "annex.hardlink"
]
- print v
v == Just "true\n"
@? "shared clone of repo did not get annex.hardlink set"
@@ -534,10 +537,13 @@ test_lock = intmpclonerepoInDirect $ do
annexed_notpresent annexedfile
-- regression test: unlock of newly added, not committed file
- -- should fail
+ -- should fail in v5 mode. In v6 mode, this is allowed.
writeFile "newfile" "foo"
git_annex "add" ["newfile"] @? "add new file failed"
- not <$> git_annex "unlock" ["newfile"] @? "unlock failed to fail on newly added, never committed file"
+ ifM (annexeval Annex.Version.versionSupportsUnlockedPointers)
+ ( git_annex "unlock" ["newfile"] @? "unlock failed on newly added, never committed file in v6 repository"
+ , not <$> git_annex "unlock" ["newfile"] @? "unlock failed to fail on newly added, never committed file in v5 repository"
+ )
git_annex "get" [annexedfile] @? "get of file failed"
annexed_present annexedfile
@@ -549,12 +555,21 @@ test_lock = intmpclonerepoInDirect $ do
writeFile annexedfile $ content annexedfile ++ "foo"
not <$> git_annex "lock" [annexedfile] @? "lock failed to fail without --force"
git_annex "lock" ["--force", annexedfile] @? "lock --force failed"
+ -- In v6 mode, the original content of the file is not always
+ -- preserved after modification, so re-get it.
+ git_annex "get" [annexedfile] @? "get of file failed after lock --force"
annexed_present annexedfile
git_annex "unlock" [annexedfile] @? "unlock failed"
unannexed annexedfile
changecontent annexedfile
- git_annex "add" [annexedfile] @? "add of modified file failed"
- runchecks [checklink, checkunwritable] annexedfile
+ ifM (annexeval Annex.Version.versionSupportsUnlockedPointers)
+ ( do
+ boolSystem "git" [Param "add", Param annexedfile] @? "add of modified file failed"
+ runchecks [checkregularfile, checkwritable] annexedfile
+ , do
+ git_annex "add" [annexedfile] @? "add of modified file failed"
+ runchecks [checklink, checkunwritable] annexedfile
+ )
c <- readFile annexedfile
assertEqual "content of modified file" c (changedcontent annexedfile)
r' <- git_annex "drop" [annexedfile]
@@ -580,7 +595,10 @@ test_edit' precommit = intmpclonerepoInDirect $ do
@? "pre-commit failed"
else boolSystem "git" [Param "commit", Param "-q", Param "-m", Param "contentchanged"]
@? "git commit of edited file failed"
- runchecks [checklink, checkunwritable] annexedfile
+ ifM (annexeval Annex.Version.versionSupportsUnlockedPointers)
+ ( runchecks [checkregularfile, checkwritable] annexedfile
+ , runchecks [checklink, checkunwritable] annexedfile
+ )
c <- readFile annexedfile
assertEqual "content of modified file" c (changedcontent annexedfile)
not <$> git_annex "drop" [annexedfile] @? "drop wrongly succeeded with no known copy of modified file"
@@ -590,8 +608,12 @@ test_partial_commit = intmpclonerepoInDirect $ do
git_annex "get" [annexedfile] @? "get of file failed"
annexed_present annexedfile
git_annex "unlock" [annexedfile] @? "unlock failed"
- not <$> boolSystem "git" [Param "commit", Param "-q", Param "-m", Param "test", File annexedfile]
- @? "partial commit of unlocked file not blocked by pre-commit hook"
+ ifM (annexeval Annex.Version.versionSupportsUnlockedPointers)
+ ( boolSystem "git" [Param "commit", Param "-q", Param "-m", Param "test", File annexedfile]
+ @? "partial commit of unlocked file should be allowed in v6 repository"
+ , not <$> boolSystem "git" [Param "commit", Param "-q", Param "-m", Param "test", File annexedfile]
+ @? "partial commit of unlocked file not blocked by pre-commit hook"
+ )
test_fix :: Assertion
test_fix = intmpclonerepoInDirect $ do
@@ -617,9 +639,13 @@ test_direct :: Assertion
test_direct = intmpclonerepoInDirect $ do
git_annex "get" [annexedfile] @? "get of file failed"
annexed_present annexedfile
- git_annex "direct" [] @? "switch to direct mode failed"
- annexed_present annexedfile
- git_annex "indirect" [] @? "switch to indirect mode failed"
+ ifM (annexeval Annex.Version.versionSupportsUnlockedPointers)
+ ( not <$> git_annex "direct" [] @? "switch to direct mode failed to fail in v6 repository"
+ , do
+ git_annex "direct" [] @? "switch to direct mode failed"
+ annexed_present annexedfile
+ git_annex "indirect" [] @? "switch to indirect mode failed"
+ )
test_trust :: Assertion
test_trust = intmpclonerepo $ do
@@ -810,7 +836,7 @@ test_unused = intmpclonerepoInDirect $ do
assertEqual ("unused keys differ " ++ desc)
(sort expectedkeys) (sort unusedkeys)
findkey f = do
- r <- Backend.lookupFile f
+ r <- Annex.WorkTree.lookupFile f
return $ fromJust r
test_describe :: Assertion
@@ -1056,8 +1082,9 @@ test_nonannexed_file_conflict_resolution :: Assertion
test_nonannexed_file_conflict_resolution = do
check True False
check False False
- check True True
- check False True
+ whenM (annexeval Annex.Version.versionSupportsDirectMode) $ do
+ check True True
+ check False True
where
check inr1 switchdirect = withtmpclonerepo $ \r1 ->
withtmpclonerepo $ \r2 ->
@@ -1106,8 +1133,9 @@ test_nonannexed_symlink_conflict_resolution :: Assertion
test_nonannexed_symlink_conflict_resolution = do
check True False
check False False
- check True True
- check False True
+ whenM (annexeval Annex.Version.versionSupportsDirectMode) $ do
+ check True True
+ check False True
where
check inr1 switchdirect = withtmpclonerepo $ \r1 ->
withtmpclonerepo $ \r2 ->
@@ -1380,7 +1408,7 @@ test_crypto = do
(c,k) <- annexeval $ do
uuid <- Remote.nameToUUID "foo"
rs <- Logs.Remote.readRemoteLog
- Just k <- Backend.lookupFile annexedfile
+ Just k <- Annex.WorkTree.lookupFile annexedfile
return (fromJust $ M.lookup uuid rs, k)
let key = if scheme `elem` ["hybrid","pubkey"]
then Just $ Utility.Gpg.KeyIds [Utility.Gpg.testKeyId]
@@ -1505,7 +1533,7 @@ intmpclonerepoInDirect a = intmpclonerepo $
)
where
isdirect = annexeval $ do
- Annex.Init.initialize Nothing
+ Annex.Init.initialize Nothing Nothing
Config.isDirect
checkRepo :: Types.Annex a -> FilePath -> IO a
@@ -1584,11 +1612,14 @@ clonerepo old new cfg = do
]
boolSystem "git" cloneparams @? "git clone failed"
configrepo new
- indir new $
- git_annex "init" ["-q", new] @? "git annex init failed"
+ indir new $ do
+ ver <- annexVersion <$> getTestMode
+ if ver == Annex.Version.defaultVersion
+ then git_annex "init" ["-q", new] @? "git annex init failed"
+ else git_annex "init" ["-q", new, "--version", ver] @? "git annex init failed"
unless (bareClone cfg) $
indir new $
- handleforcedirect
+ setupTestMode
return new
configrepo :: FilePath -> IO ()
@@ -1599,10 +1630,6 @@ configrepo dir = indir dir $ do
-- avoid signed commits by test suite
boolSystem "git" [Param "config", Param "commit.gpgsign", Param "false"] @? "git config failed"
-handleforcedirect :: IO ()
-handleforcedirect = whenM ((==) "1" <$> Utility.Env.getEnvDefault "FORCEDIRECT" "") $
- git_annex "direct" ["-q"] @? "git annex direct failed"
-
ensuretmpdir :: IO ()
ensuretmpdir = do
e <- doesDirectoryExist tmpdir
@@ -1666,10 +1693,10 @@ checkunwritable f = unlessM (annexeval Config.isDirect) $ do
checkwritable :: FilePath -> Assertion
checkwritable f = do
- r <- tryIO $ writeFile f $ content f
- case r of
- Left _ -> assertFailure $ "unable to modify " ++ f
- Right _ -> return ()
+ s <- getFileStatus f
+ let mode = fileMode s
+ unless (mode == mode `unionFileModes` ownerWriteMode) $
+ assertFailure $ "unable to modify " ++ f
checkdangling :: FilePath -> Assertion
checkdangling f = ifM (annexeval Config.crippledFileSystem)
@@ -1684,7 +1711,7 @@ checkdangling f = ifM (annexeval Config.crippledFileSystem)
checklocationlog :: FilePath -> Bool -> Assertion
checklocationlog f expected = do
thisuuid <- annexeval Annex.UUID.getUUID
- r <- annexeval $ Backend.lookupFile f
+ r <- annexeval $ Annex.WorkTree.lookupFile f
case r of
Just k -> do
uuids <- annexeval $ Remote.keyLocations k
@@ -1695,7 +1722,7 @@ checklocationlog f expected = do
checkbackend :: FilePath -> Types.Backend -> Assertion
checkbackend file expected = do
b <- annexeval $ maybe (return Nothing) (Backend.getBackend file)
- =<< Backend.lookupFile file
+ =<< Annex.WorkTree.lookupFile file
assertEqual ("backend for " ++ file) (Just expected) b
inlocationlog :: FilePath -> Assertion
@@ -1721,11 +1748,16 @@ annexed_present = runchecks
unannexed :: FilePath -> Assertion
unannexed = runchecks [checkregularfile, checkcontent, checkwritable]
-withTestEnv :: Bool -> TestTree -> TestTree
-withTestEnv forcedirect = withResource prepare release . const
+data TestMode = TestMode
+ { forceDirect :: Bool
+ , annexVersion :: String
+ } deriving (Read, Show)
+
+withTestMode :: TestMode -> TestTree -> TestTree
+withTestMode testmode = withResource prepare release . const
where
prepare = do
- setTestEnv forcedirect
+ setTestMode testmode
case tryIngredients [consoleTestReporter] mempty initTests of
Nothing -> error "No tests found!?"
Just act -> unlessM act $
@@ -1733,8 +1765,8 @@ withTestEnv forcedirect = withResource prepare release . const
return ()
release _ = cleanup' True tmpdir
-setTestEnv :: Bool -> IO ()
-setTestEnv forcedirect = do
+setTestMode :: TestMode -> IO ()
+setTestMode testmode = do
whenM (doesDirectoryExist tmpdir) $
error $ "The temporary directory " ++ tmpdir ++ " already exists; cannot run test suite."
@@ -1754,9 +1786,24 @@ setTestEnv forcedirect = do
, ("GIT_COMMITTER_NAME", "git-annex test")
-- force gpg into batch mode for the tests
, ("GPG_BATCH", "1")
- , ("FORCEDIRECT", if forcedirect then "1" else "")
+ , ("TESTMODE", show testmode)
]
+getTestMode :: IO TestMode
+getTestMode = Prelude.read <$> Utility.Env.getEnvDefault "TESTMODE" ""
+
+setupTestMode :: IO ()
+setupTestMode = do
+ testmode <- getTestMode
+ when (forceDirect testmode) $
+ git_annex "direct" ["-q"] @? "git annex direct failed"
+ whenM (annexeval Annex.Version.versionSupportsUnlockedPointers) $
+ boolSystem "git"
+ [ Param "config"
+ , Param "annex.largefiles"
+ , Param ("exclude=" ++ ingitfile)
+ ] @? "git config annex.largefiles failed"
+
changeToTmpDir :: FilePath -> IO ()
changeToTmpDir t = do
topdir <- Utility.Env.getEnvDefault "TOPDIR" (error "TOPDIR not set")
@@ -1791,7 +1838,7 @@ sha1annexedfiledup :: String
sha1annexedfiledup = "sha1foodup"
ingitfile :: String
-ingitfile = "bar"
+ingitfile = "bar.c"
content :: FilePath -> String
content f
diff --git a/Types/KeySource.hs b/Types/KeySource.hs
index 7c2fd13d5..25774588a 100644
--- a/Types/KeySource.hs
+++ b/Types/KeySource.hs
@@ -9,7 +9,7 @@ module Types.KeySource where
import Utility.InodeCache
-{- When content is in the process of being added to the annex,
+{- When content is in the process of being ingested into the annex,
- and a Key generated from it, this data type is used.
-
- The contentLocation may be different from the filename
@@ -19,7 +19,7 @@ import Utility.InodeCache
- of a different Key.
-
- The inodeCache can be used to detect some types of modifications to
- - files that may be made while they're in the process of being added.
+ - files that may be made while they're in the process of being ingested.
-}
data KeySource = KeySource
{ keyFilename :: FilePath
diff --git a/Upgrade.hs b/Upgrade.hs
index 8d205a874..f9dfb7258 100644
--- a/Upgrade.hs
+++ b/Upgrade.hs
@@ -18,13 +18,14 @@ import qualified Upgrade.V1
import qualified Upgrade.V2
import qualified Upgrade.V3
import qualified Upgrade.V4
+import qualified Upgrade.V5
checkUpgrade :: Version -> Annex ()
checkUpgrade = maybe noop error <=< needsUpgrade
needsUpgrade :: Version -> Annex (Maybe String)
needsUpgrade v
- | v == supportedVersion = ok
+ | v `elem` supportedVersions = ok
| v `elem` autoUpgradeableVersions = ifM (upgrade True)
( ok
, err "Automatic upgrade failed!"
@@ -40,7 +41,7 @@ upgrade :: Bool -> Annex Bool
upgrade automatic = do
upgraded <- go =<< getVersion
when upgraded $
- setVersion supportedVersion
+ setVersion latestVersion
return upgraded
where
#ifndef mingw32_HOST_OS
@@ -53,4 +54,5 @@ upgrade automatic = do
go (Just "2") = Upgrade.V2.upgrade
go (Just "3") = Upgrade.V3.upgrade automatic
go (Just "4") = Upgrade.V4.upgrade automatic
+ go (Just "5") = Upgrade.V5.upgrade automatic
go _ = return True
diff --git a/Upgrade/V1.hs b/Upgrade/V1.hs
index 801cdafa0..507af9e3b 100644
--- a/Upgrade/V1.hs
+++ b/Upgrade/V1.hs
@@ -54,14 +54,14 @@ upgrade = do
ifM (fromRepo Git.repoIsLocalBare)
( do
moveContent
- setVersion supportedVersion
+ setVersion latestVersion
, do
moveContent
updateSymlinks
moveLocationLogs
Annex.Queue.flush
- setVersion supportedVersion
+ setVersion latestVersion
)
Upgrade.V2.upgrade
diff --git a/Upgrade/V5.hs b/Upgrade/V5.hs
new file mode 100644
index 000000000..f6d18df43
--- /dev/null
+++ b/Upgrade/V5.hs
@@ -0,0 +1,104 @@
+{- git-annex v5 -> v6 upgrade support
+ -
+ - Copyright 2015 Joey Hess <id@joeyh.name>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+module Upgrade.V5 where
+
+import Common.Annex
+import Config
+import Annex.InodeSentinal
+import Annex.Link
+import Annex.Direct
+import Annex.Content
+import Annex.WorkTree
+import qualified Database.Keys
+import qualified Annex.Content.Direct as Direct
+import qualified Git
+import qualified Git.LsFiles
+import qualified Git.Branch
+import Git.FileMode
+import Utility.InodeCache
+
+upgrade :: Bool -> Annex Bool
+upgrade automatic = do
+ unless automatic $
+ showAction "v5 to v6"
+ whenM isDirect $ do
+ {- Since upgrade from direct mode changes how files
+ - are represented in git, commit any changes in the
+ - work tree first. -}
+ whenM stageDirect $ do
+ unless automatic $
+ showAction "committing first"
+ upgradeDirectCommit automatic
+ "commit before upgrade to annex.version 6"
+ setDirect False
+ upgradeDirectWorkTree
+ removeDirectCruft
+ showLongNote "Upgraded repository out of direct mode."
+ showLongNote "Changes have been staged for all annexed files in this repository; you should run `git commit` to commit these changes."
+ showLongNote "Any other clones of this repository that use direct mode need to be upgraded now, too."
+ configureSmudgeFilter
+ -- Inode sentinal file was only used in direct mode and when
+ -- locking down files as they were added. In v6, it's used more
+ -- extensively, so make sure it exists, since old repos that didn't
+ -- use direct mode may not have created it.
+ unlessM (isDirect) $
+ createInodeSentinalFile True
+ return True
+
+upgradeDirectCommit :: Bool -> String -> Annex ()
+upgradeDirectCommit automatic msg =
+ void $ inRepo $ Git.Branch.commitCommand commitmode
+ [ Param "-m"
+ , Param msg
+ ]
+ where
+ commitmode = if automatic then Git.Branch.AutomaticCommit else Git.Branch.ManualCommit
+
+{- Walk work tree from top and convert all annex symlinks to pointer files,
+ - staging them in the index, and updating the work tree files with
+ - either the content of the object, or the pointer file content. -}
+upgradeDirectWorkTree :: Annex ()
+upgradeDirectWorkTree = do
+ top <- fromRepo Git.repoPath
+ (l, clean) <- inRepo $ Git.LsFiles.stagedDetails [top]
+ forM_ l go
+ void $ liftIO clean
+ where
+ go (f, Just _sha, Just mode) | isSymLink mode = do
+ mk <- lookupFile f
+ case mk of
+ Nothing -> noop
+ Just k -> do
+ ifM (isJust <$> getAnnexLinkTarget f)
+ ( writepointer f k
+ , fromdirect f k
+ )
+ stagePointerFile f =<< hashPointerFile k
+ Database.Keys.addAssociatedFile k f
+ return ()
+ go _ = noop
+
+ fromdirect f k = do
+ -- If linkAnnex fails for some reason, the work tree file
+ -- still has the content; the annex object file is just
+ -- not populated with it. Since the work tree file
+ -- is recorded as an associated file, things will still
+ -- work that way, it's just not ideal.
+ ic <- withTSDelta (liftIO . genInodeCache f)
+ void $ linkAnnex k f ic
+ writepointer f k = liftIO $ do
+ nukeFile f
+ writeFile f (formatPointer k)
+
+{- Remove all direct mode bookkeeping files. -}
+removeDirectCruft :: Annex ()
+removeDirectCruft = mapM_ go =<< getKeysPresent InAnywhere
+ where
+ go k = do
+ Direct.removeInodeCache k
+ Direct.removeAssociatedFiles k
diff --git a/Utility/FileSystemEncoding.hs b/Utility/FileSystemEncoding.hs
index 67341d371..eab98337a 100644
--- a/Utility/FileSystemEncoding.hs
+++ b/Utility/FileSystemEncoding.hs
@@ -19,6 +19,7 @@ module Utility.FileSystemEncoding (
encodeW8NUL,
decodeW8NUL,
truncateFilePath,
+ setConsoleEncoding,
) where
import qualified GHC.Foreign as GHC
@@ -164,3 +165,10 @@ truncateFilePath n = reverse . go [] n . L8.fromString
else go (c:coll) (cnt - x') (L8.drop 1 bs)
_ -> coll
#endif
+
+{- This avoids ghc's output layer crashing on invalid encoded characters in
+ - filenames when printing them out. -}
+setConsoleEncoding :: IO ()
+setConsoleEncoding = do
+ fileEncoding stdout
+ fileEncoding stderr
diff --git a/Utility/InodeCache.hs b/Utility/InodeCache.hs
index b5fe9034e..8bd7ae0cd 100644
--- a/Utility/InodeCache.hs
+++ b/Utility/InodeCache.hs
@@ -1,7 +1,7 @@
{- Caching a file's inode, size, and modification time
- to see when it's changed.
-
- - Copyright 2013, 2014 Joey Hess <id@joeyh.name>
+ - Copyright 2013-2014 Joey Hess <id@joeyh.name>
-
- License: BSD-2-clause
-}
diff --git a/debian/changelog b/debian/changelog
index 73035a683..1ce79936e 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,24 @@
+git-annex (6.20151225) unstable; urgency=medium
+
+ * Added v6 repository mode, but v5 is still the default for now.
+ * The upgrade to version 6 is not done fully automatically, because
+ upgrading a direct mode repository to version 6 will prevent old
+ versions of git-annex from working in other clones of that repository.
+ * init: --version parameter added to control which supported repository
+ version to use.
+ * smudge: New command, used for git smudge filter.
+ This will replace direct mode.
+ * init, upgrade: Configure .git/info/attributes to use git-annex as a smudge
+ filter. Note that this changes the default behavior of git add in a
+ newly initialized repository; it will add files to the annex.
+ * unlock, lock: In v6 mode, unlocking a file changes it from a symlink to a
+ pointer file, and this change can be committed to the git repository.
+ * add: In v6 mode, adds modified files to the annex.
+ * assistant: In v6 mode, adds files in unlocked mode, so they can
+ continue to be modified.
+
+ -- Joey Hess <id@joeyh.name> Tue, 08 Dec 2015 11:14:03 -0400
+
git-annex (5.20151219) UNRELEASED; urgency=medium
* status: On crippled filesystems, was displaying M for all annexed files
diff --git a/doc/direct_mode.mdwn b/doc/direct_mode.mdwn
index 4c2cb2dd7..d3e1067f9 100644
--- a/doc/direct_mode.mdwn
+++ b/doc/direct_mode.mdwn
@@ -9,6 +9,13 @@ understand how to update its working tree.
[[!toc]]
+## deprecated
+
+Direct mode is deprecated! Intead, git-annex v6 repositories can simply
+have files that are unlocked and thus can be directly accessed and
+modified. See [[upgrades]] for details about the transition to v6
+repositories.
+
## enabling (and disabling) direct mode
Normally, git-annex repositories start off in indirect mode. With some
diff --git a/doc/git-annex-add.mdwn b/doc/git-annex-add.mdwn
index cfeb8a98e..7f796fec1 100644
--- a/doc/git-annex-add.mdwn
+++ b/doc/git-annex-add.mdwn
@@ -11,12 +11,18 @@ git annex add `[path ...]`
Adds files in the path to the annex. If no path is specified, adds
files from the current directory and below.
-Normally, files that are already checked into git, or that git has been
-configured to ignore will be silently skipped.
-
-If annex.largefiles is configured, and does not match a file that is being
-added, `git annex add` will behave the same as `git add` and add the
-non-large file directly to the git repository, instead of to the annex.
+Files that are already checked into git and are unmodified, or that
+git has been configured to ignore will be silently skipped.
+
+If annex.largefiles is configured, and does not match a file, `git annex
+add` will behave the same as `git add` and add the non-large file directly
+to the git repository, instead of to the annex.
+
+Large files are added to the annex in locked form, which prevents further
+modification of their content unless unlocked by [[git-annex-unlock]](1).
+(This is not the case however when a repository is in direct mode.)
+To add a file to the annex in unlocked form, `git add` can be used instead
+(that only works when the repository has annex.version 6 or higher).
This command can also be used to add symbolic links, both symlinks to
annexed content, and other symlinks.
diff --git a/doc/git-annex-direct.mdwn b/doc/git-annex-direct.mdwn
index 457ae3116..3cade1a8c 100644
--- a/doc/git-annex-direct.mdwn
+++ b/doc/git-annex-direct.mdwn
@@ -17,12 +17,18 @@ Note that git commands that operate on the work tree will refuse to
run in direct mode repositories. Use `git annex proxy` to safely run such
commands.
+Note that the direct mode/indirect mode distinction is removed in v6
+git-annex repositories. In such a repository, you can
+use [[git-annex-unlock]](1) to make a file's content be directly present.
+
# SEE ALSO
[[git-annex]](1)
[[git-annex-indirect]](1)
+[[git-annex-unlock]](1)
+
# AUTHOR
Joey Hess <id@joeyh.name>
diff --git a/doc/git-annex-indirect.mdwn b/doc/git-annex-indirect.mdwn
index 99def6144..321e0fb36 100644
--- a/doc/git-annex-indirect.mdwn
+++ b/doc/git-annex-indirect.mdwn
@@ -11,9 +11,8 @@ git annex indirect
Switches a repository back from direct mode to the default, indirect
mode.
-Some systems cannot support git-annex in indirect mode, because they
-do not support symbolic links. Repositories on such systems instead
-default to using direct mode.
+Note that the direct mode/indirect mode distinction is removed in v6
+git-annex repositories.
# SEE ALSO
diff --git a/doc/git-annex-init.mdwn b/doc/git-annex-init.mdwn
index 145705105..29522181d 100644
--- a/doc/git-annex-init.mdwn
+++ b/doc/git-annex-init.mdwn
@@ -24,6 +24,13 @@ mark it as dead (see [[git-annex-dead]](1)).
This command is entirely safe, although usually pointless, to run inside an
already initialized git-annex repository.
+# OPTIONS
+
+* `--version=N`
+
+ Force the repository to be initialized using a different annex.version
+ than the current default.
+
# SEE ALSO
[[git-annex]](1)
diff --git a/doc/git-annex-lock.mdwn b/doc/git-annex-lock.mdwn
index 4bf279fb2..b9e5d3450 100644
--- a/doc/git-annex-lock.mdwn
+++ b/doc/git-annex-lock.mdwn
@@ -9,7 +9,7 @@ git annex lock `[path ...]`
# DESCRIPTION
Use this to undo an unlock command if you don't want to modify
-the files, or have made modifications you want to discard.
+the files any longer, or have made modifications you want to discard.
# OPTIONS
diff --git a/doc/git-annex-pre-commit.mdwn b/doc/git-annex-pre-commit.mdwn
index bc1e86e18..21e5aef68 100644
--- a/doc/git-annex-pre-commit.mdwn
+++ b/doc/git-annex-pre-commit.mdwn
@@ -12,10 +12,14 @@ This is meant to be called from git's pre-commit hook. `git annex init`
automatically creates a pre-commit hook using this.
Fixes up symlinks that are staged as part of a commit, to ensure they
-point to annexed content. Also handles injecting changes to unlocked
-files into the annex. When in a view, updates metadata to reflect changes
+point to annexed content.
+
+When in a view, updates metadata to reflect changes
made to files in the view.
+When in a repository that has not been upgraded to annex.version 6,
+also handles injecting changes to unlocked files into the annex.
+
# SEE ALSO
[[git-annex]](1)
diff --git a/doc/git-annex-smudge.mdwn b/doc/git-annex-smudge.mdwn
new file mode 100644
index 000000000..7439c8784
--- /dev/null
+++ b/doc/git-annex-smudge.mdwn
@@ -0,0 +1,43 @@
+# NAME
+
+git-annex smudge - git filter driver for git-annex
+
+# SYNOPSIS
+
+git annex smudge [--clean] file
+
+# DESCRIPTION
+
+This command lets git-annex be used as a git filter driver which lets
+annexed files in the git repository to be unlocked at all times, instead
+of being symlinks.
+
+When adding a file with `git add`, the annex.largefiles config is
+consulted to decide if a given file should be added to git as-is,
+or if its content are large enough to need to use git-annex.
+
+The git configuration to use this command as a filter driver is as follows.
+This is normally set up for you by git-annex init, so you should
+not need to configure it manually.
+
+ [filter "annex"]
+ smudge = git-annex smudge %f
+ clean = git-annex smudge --clean %f
+
+To make git use that filter driver, it needs to be configured in
+the .gitattributes file or in `.git/config/attributes`. The latter
+is normally configured when a repository is initialized, with the following
+contents:
+
+ * filter=annex
+ .* !filter
+
+# SEE ALSO
+
+[[git-annex]](1)
+
+# AUTHOR
+
+Joey Hess <id@joeyh.name>
+
+Warning: Automatically converted into a man page by mdwn2man. Edit with care.
diff --git a/doc/git-annex-unlock.mdwn b/doc/git-annex-unlock.mdwn
index ac8c21185..123146836 100644
--- a/doc/git-annex-unlock.mdwn
+++ b/doc/git-annex-unlock.mdwn
@@ -11,8 +11,16 @@ git annex unlock `[path ...]`
Normally, the content of annexed files is protected from being changed.
Unlocking an annexed file allows it to be modified. This replaces the
symlink for each specified file with a copy of the file's content.
-You can then modify it and `git annex add` (or `git commit`) to inject
-it back into the annex.
+You can then modify it and `git annex add` (or `git commit`) to save your
+changes.
+
+In repositories with annex.version 5 or earlier, unlocking a file is local
+to the repository, and is temporary. With version 6, unlocking a file
+changes how it is stored in the git repository (from a symlink to a pointer
+file), so you can commit it like any other change. Also in version 6, you
+can use `git add` to add a fie to the annex in unlocked form. This allows
+workflows where a file starts out unlocked, is modified as necessary, and
+is locked once it reaches its final version.
# OPTIONS
diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn
index 2020ccf3f..1a2fd6e67 100644
--- a/doc/git-annex.mdwn
+++ b/doc/git-annex.mdwn
@@ -626,6 +626,14 @@ subdirectories).
See [[git-annex-diffdriver]](1) for details.
+* `smudge`
+
+ This command lets git-annex be used as a git filter driver, allowing
+ annexed files in the git repository to be unlocked at all times, instead
+ of being symlinks.
+
+ See [[git-annex-smudge]](1) for details.
+
* `remotedaemon`
Detects when network remotes have received git pushes and fetches from them.
diff --git a/doc/todo/smudge.mdwn b/doc/todo/smudge.mdwn
index aea0c9b98..a62e19f68 100644
--- a/doc/todo/smudge.mdwn
+++ b/doc/todo/smudge.mdwn
@@ -158,7 +158,8 @@ Using git-annex on a crippled filesystem that does not support symlinks.
Data:
* An annex pointer file has as its first line the git-annex key
- that it's standing in for. Subsequent lines of the file might
+ that it's standing in for (prefixed with "annex/objects/", similar to
+ an annex symlink target). Subsequent lines of the file might
be a message saying that the file's content is not currently available.
An annex pointer file is checked into the git repository the same way
that an annex symlink is checked in.
@@ -177,8 +178,8 @@ Configuration:
the annex. Other files are passed through the smudge/clean as-is and
have their contents stored in git.
-* annex.direct is repurposed to configure how the assistant adds files.
- When set to true, they're added unlocked.
+* annex.direct is repurposed to configure how git-annex adds files.
+ When set to false, it adds symlinks and when true it adds pointer files.
git-annex clean:
@@ -232,15 +233,11 @@ git annex lock/unlock:
transition repositories to using pointers, and a cleaner unlock/lock
for repos using symlinks.
- unlock will stage a pointer file, and will copy the content of the object
- out of .git/annex/objects to the work tree file. (Might want a --hardlink
- switch.)
+ unlock will stage a pointer file, and will link the content of the object
+ from .git/annex/objects to the work tree file.
- lock will replace the current work tree file with the symlink, and stage it.
- Note that multiple work tree files could point to the same object.
- So, if the link count is > 1, replace the annex object with a copy of
- itself to break such a hard link. Always finish by locking down the
- permissions of the annex object.
+ lock will replace the current work tree file with the symlink, and stage it,
+ and lock down the permissions of the annex object.
#### file map
@@ -248,7 +245,8 @@ The file map needs to map from `Key -> [File]`. `File -> Key`
seems useful to have, but in practice is not worthwhile.
Drop and get operations need to know what files in the work tree use a
-given key in order to update the work tree.
+given key in order to update the work tree. And, we don't want to
+overwrite a work tree file if it's been modified when dropping or getting.
git-annex commands that look at annex symlinks to get keys to act on will
need fall back to either consulting the file map, or looking at the staged
@@ -275,13 +273,14 @@ In particular:
* Is the smudge filter called at any other time? Seems unlikely but then
there could be situations with a detached work tree or such.
* Does git call any useful hooks when removing a file from the work tree,
- or converting it to not be annexed?
+ or converting it to not be annexed, or for `git mv` of an annexed file?
No!
From this analysis, any file map generated by the smudge/clean filters
is necessary potentially innaccurate. It may list deleted files.
It may or may not reflect current unstaged changes from the work tree.
+
Follows that any use of the file map needs to verify the info from it,
and throw out bad cached info (updating the map to match reality).
@@ -306,17 +305,71 @@ just look at the repo content in the first place..
annex.version changes to 6
-Upgrade should be handled automatically.
+git config for filter.annex.smudge and filter.annex.clean is set up.
-On upgrade, update .gitattributes with a stock configuration, unless
-it already mentions "filter=annex".
+.gitattributes is updated with a stock configuration,
+unless it already mentions "filter=annex".
Upgrading a direct mode repo needs to switch it out of bare mode, and
needs to run `git annex unlock` on all files (or reach the same result).
So will need to stage changes to all annexed files.
When a repo has some clones indirect and some direct, the upgraded repo
-will have all files unlocked, necessarily in all clones.
+will have all files unlocked, necessarily in all clones. This happens
+automatically, because when the direct repos are upgraded that causes the
+files to be unlocked, while the indirect upgrades don't touch the files.
+
+#### implementation todo list
+
+* Still a few test suite failues for v6 with locked files.
+* Test suite should make pass for v6 with unlocked files.
+* Reconcile staged changes into the associated files database, whenever
+ the database is queried. This is needed to handle eg:
+ git add largefile
+ git mv largefile othername
+ git annex move othername --to foo
+ # fails to drop content from associated file othername,
+ # because it doesn't know it has that name
+ # git commit clears up this mess
+* Interaction with shared clones. Should avoid hard linking from/to a
+ object in a shared clone if either repository has the object unlocked.
+ (And should avoid unlocking an object if it's hard linked to a shared clone,
+ but that's already accomplished because it avoids unlocking an object if
+ it's hard linked at all)
+* Make automatic merge conflict resolution work for pointer files.
+ - Should probably automatically handle merge conflicts between annex
+ symlinks and pointer files too. Maybe by always resulting in a pointer
+ file, since the symlinks don't work everwhere.
+* Crippled filesystem should cause all files to be transparently unlocked.
+ Note that this presents problems when dealing with merge conflicts and
+ when pushing changes committed in such a repo. Ideally, should avoid
+ committing implicit unlocks, or should prevent such commits leaking out
+ in pushes.
+* Dropping a smudged file causes git status (and git annex status)
+ to show it as modified, because the timestamp has changed.
+ Getting a smudged file can also cause this.
+ Upgrading a direct mode repo also leaves files in this state.
+ User can use `git add` to clear it up, but better to avoid this,
+ by updating stat info in the index.
+ (May need to use libgit2 to do this, cannot find
+ any plumbing except git-update-index, which is very inneficient for
+ smudged files.)
+* Audit code for all uses of isDirect. These places almost always need
+ adjusting to support v6, if they haven't already.
+* Optimisation: See if the database schema can be improved to speed things
+ up. Are there enough indexes? getAssociatedKey in particular does a
+ reverse lookup and might benefit from an index.
+* Optimisation: Reads from the Keys database avoid doing anything if the
+ database doesn't exist. This makes v5 repos, or v6 with all locked files
+ faster. However, if a v6 repo unlocks and then re-locks a file, its
+ database will exist, and so this optimisation will no longer apply.
+ Could try to detect when the database is empty, and remove it or avoid reads.
+
+* Eventually (but not yet), make v6 the default for new repositories.
+ Note that the assistant forces repos into direct mode; that will need to
+ be changed then.
+* Later still, remove support for direct mode, and enable automatic
+ v5 to v6 upgrades.
----
diff --git a/doc/upgrades.mdwn b/doc/upgrades.mdwn
index f5e9cbc3a..9d30c2f14 100644
--- a/doc/upgrades.mdwn
+++ b/doc/upgrades.mdwn
@@ -43,6 +43,46 @@ conflicts first before upgrading git-annex.
The upgrade events, so far:
+## v5 -> v6 (git-annex version 6.x)
+
+The upgrade from v5 to v6 is handled manually. Run `git-annex upgrade`
+perform the upgrade.
+
+Warning: All places that a direct mode repository is cloned to should be
+running git-annex version 6.x before you upgrade the repository.
+This is necessary because the contents of the repository are changed
+in the upgrade, and the old version of git-annex won't be able to
+access files after the repo is upgraded.
+
+This upgrade does away with the direct mode/indirect mode distinction.
+A v6 git-annex repository can have some files locked and other files
+unlocked, and all git and git-annex commands can be used on both locked and
+unlocked files. (Although for locked files to work, the filesystem
+must support symbolic links..)
+
+The behavior of some commands changes in an upgraded repository:
+
+* `git add` will add files to the annex, in unlocked mode, rather than
+ adding them directly to the git repository. To cause some files to be
+ added directly to git, you can configure `annex.largefiles`. For
+ example:
+
+ git config annex.largefiles "largerthan=100kb and not (include=*.c or include=*.h)"
+
+* `git annex unlock` and `git annex lock` change how the pointer to
+ the annexed content is stored in git.
+
+If a repository is only used in indirect mode, you can use git-annex
+v5 and v6 in different clones of the same indirect mode repository without
+problems.
+
+On upgrade, all files in a direct mode repository will be converted to
+unlocked files. The upgrade will stage changes to all annexed files in
+the git repository, which you can then commit.
+
+If a repository has some clones using direct mode and some using indirect
+mode, all the files will end up unlocked in all clones after the upgrade.
+
## v4 -> v5 (git-annex version 5.x)
The upgrade from v4 to v5 is handled