summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Joey Hess <joeyh@joeyh.name>2015-12-09 17:47:05 -0400
committerGravatar Joey Hess <joeyh@joeyh.name>2015-12-09 17:54:54 -0400
commitd2d501503d9aa19787466129394141c794bb84e6 (patch)
tree3a0e3982e420ec42e2d90672628c4d63c7f78eff
parent76ccac53916d308aa4806d38bb8cfb6a9d1f9081 (diff)
use InodeCache when dropping a key to see if a pointer file can be safely reset
The Keys database can hold multiple inode caches for a given key. One for the annex object, and one for each pointer file, which may not be hard linked to it. Inode caches for a key are recorded when its content is added to the annex, but only if it has known pointer files. This is to avoid the overhead of maintaining the database when not needed. When the smudge filter outputs a file's content, the inode cache is not updated, because git's smudge interface doesn't let us write the file. So, dropping will fall back to doing an expensive verification then. Ideally, git's interface would be improved, and then the inode cache could be updated then too.
-rw-r--r--Annex/Action.hs2
-rw-r--r--Annex/Content.hs22
-rw-r--r--Command/Smudge.hs9
-rw-r--r--Database/Keys.hs60
-rw-r--r--Utility/InodeCache.hs2
-rw-r--r--doc/todo/smudge.mdwn13
6 files changed, 62 insertions, 46 deletions
diff --git a/Annex/Action.hs b/Annex/Action.hs
index f59c9c2f4..348487e7c 100644
--- a/Annex/Action.hs
+++ b/Annex/Action.hs
@@ -17,6 +17,7 @@ import System.Posix.Signals
import Common.Annex
import qualified Annex
import Annex.Content
+import qualified Database.Keys
{- Actions to perform each time ran. -}
startup :: Annex ()
@@ -32,4 +33,5 @@ shutdown :: Bool -> Annex ()
shutdown nocommit = do
saveState nocommit
sequence_ =<< M.elems <$> Annex.getState Annex.cleanup
+ Database.Keys.shutdown
liftIO reapZombies -- zombies from long-running git processes
diff --git a/Annex/Content.hs b/Annex/Content.hs
index a530245b3..e635b97a3 100644
--- a/Annex/Content.hs
+++ b/Annex/Content.hs
@@ -451,7 +451,9 @@ moveAnnex key src = withObjectLoc key storeobject storedirect
fs <- Database.Keys.getAssociatedFiles key
if null fs
then freezeContent dest
- else mapM_ (populatePointerFile key dest) fs
+ else do
+ mapM_ (populatePointerFile key dest) fs
+ Database.Keys.storeInodeCaches key (dest:fs)
)
storeindirect = storeobject =<< calcRepo (gitAnnexLocation key)
@@ -505,7 +507,9 @@ linkAnnex key src = do
( return LinkAnnexNoop
, modifyContent dest $
ifM (liftIO $ createLinkOrCopy src dest)
- ( return LinkAnnexOk
+ ( do
+ Database.Keys.storeInodeCaches key [dest, src]
+ return LinkAnnexOk
, return LinkAnnexFailed
)
)
@@ -601,6 +605,7 @@ removeAnnex (ContentRemovalLock key) = withObjectLoc key remove removedirect
removeInodeCache key
mapM_ (void . tryIO . resetPointerFile key)
=<< Database.Keys.getAssociatedFiles key
+ Database.Keys.removeInodeCaches key
removedirect fs = do
cache <- recordedInodeCache key
removeInodeCache key
@@ -613,8 +618,7 @@ removeAnnex (ContentRemovalLock key) = withObjectLoc key remove removedirect
{- To safely reset a pointer file, it has to be the unmodified content of
- the key. The expensive way to tell is to do a verification of its content.
- The cheaper way is to see if the InodeCache for the key matches the
- - file.
- -}
+ - file. -}
resetPointerFile :: Key -> FilePath -> Annex ()
resetPointerFile key f = go =<< geti
where
@@ -624,10 +628,14 @@ resetPointerFile key f = go =<< geti
secureErase f
liftIO $ nukeFile f
liftIO $ writeFile f (formatPointer key)
- , noop
+ -- Can't delete the pointer file.
+ -- If it was a hard link to the annex object,
+ -- that object might have been frozen as part of the
+ -- removal process, so thaw it.
+ , thawContent f
)
- cheapcheck fc = maybe (return False) (compareInodeCaches fc)
- =<< Database.Keys.getInodeCache key
+ cheapcheck fc = anyM (compareInodeCaches fc)
+ =<< Database.Keys.getInodeCaches key
expensivecheck fc = ifM (verifyKeyContent AlwaysVerify Types.Remote.UnVerified key f)
-- The file could have been modified while it was
-- being verified. Detect that.
diff --git a/Command/Smudge.hs b/Command/Smudge.hs
index b532ac3d1..14d3a7f41 100644
--- a/Command/Smudge.hs
+++ b/Command/Smudge.hs
@@ -48,7 +48,7 @@ smudge file = do
case parseLinkOrPointer b of
Nothing -> liftIO $ B.putStr b
Just k -> do
- updateAssociatedFiles k file
+ Database.Keys.addAssociatedFile k file
content <- calcRepo (gitAnnexLocation k)
liftIO $ B.hPut stdout . fromMaybe b
=<< catchMaybeIO (B.readFile content)
@@ -65,7 +65,7 @@ clean file = do
else ifM (shouldAnnex file)
( do
k <- ingest file
- updateAssociatedFiles k file
+ Database.Keys.addAssociatedFile k file
liftIO $ emitPointer k
, liftIO $ B.hPut stdout b
)
@@ -100,8 +100,3 @@ ingest file = do
emitPointer :: Key -> IO ()
emitPointer = putStrLn . formatPointer
-
-updateAssociatedFiles :: Key -> FilePath -> Annex ()
-updateAssociatedFiles k f = do
- Database.Keys.addAssociatedFile k f
- Database.Keys.flushDb
diff --git a/Database/Keys.hs b/Database/Keys.hs
index 092c0d900..78d583d63 100644
--- a/Database/Keys.hs
+++ b/Database/Keys.hs
@@ -13,15 +13,17 @@
module Database.Keys (
DbHandle,
openDb,
- flushDb,
closeDb,
+ shutdown,
addAssociatedFile,
getAssociatedFiles,
removeAssociatedFile,
- setInodeCache,
- getInodeCache,
+ storeInodeCaches,
+ addInodeCaches,
+ getInodeCaches,
+ removeInodeCaches,
AssociatedId,
- DataId,
+ ContentId,
) where
import Database.Types
@@ -35,6 +37,7 @@ import Annex.Perms
import Annex.LockFile
import Messages
import Utility.InodeCache
+import Annex.InodeSentinal
import Database.Persist.TH
import Database.Esqueleto hiding (Key)
@@ -44,10 +47,10 @@ Associated
key SKey
file FilePath
KeyFileIndex key file
-Data
+Content
key SKey
- inodeCache SInodeCache
- KeyIndex key
+ cache SInodeCache
+ KeyCacheIndex key cache
|]
{- Opens the database, creating it if it doesn't exist yet. -}
@@ -62,7 +65,7 @@ openDb = withExclusiveLock gitAnnexKeysDbLock $ do
runMigrationSilent migrateKeysDb
setAnnexDirPerm dbdir
setAnnexFilePerm db
- h <- liftIO $ H.openDb db "data"
+ h <- liftIO $ H.openDb db "content"
-- work around https://github.com/yesodweb/persistent/issues/474
liftIO setConsoleEncoding
@@ -85,9 +88,12 @@ dbHandle = maybe startup return =<< Annex.getState Annex.keysdbhandle
Annex.changeState $ \s -> s { Annex.keysdbhandle = Just h }
return h
-{- Flushes any changes made to the database. -}
-flushDb :: Annex ()
-flushDb = withDbHandle H.flushQueueDb
+shutdown :: Annex ()
+shutdown = maybe noop go =<< Annex.getState Annex.keysdbhandle
+ where
+ go h = do
+ Annex.changeState $ \s -> s { Annex.keysdbhandle = Nothing }
+ liftIO $ closeDb h
addAssociatedFile :: Key -> FilePath -> Annex ()
addAssociatedFile k f = withDbHandle $ \h -> H.queueDb h (\_ _ -> pure True) $ do
@@ -113,21 +119,35 @@ getAssociatedFiles' sk = do
return $ map unValue l
removeAssociatedFile :: Key -> FilePath -> Annex ()
-removeAssociatedFile k f = withDbHandle $ \h -> H.queueDb h (\_ _ -> pure True) $
+removeAssociatedFile k f = withDbHandle $ \h -> H.queueDb h (\_ _ -> pure True) $
delete $ from $ \r -> do
where_ (r ^. AssociatedKey ==. val sk &&. r ^. AssociatedFile ==. val f)
where
sk = toSKey k
-setInodeCache :: Key -> InodeCache -> Annex ()
-setInodeCache k i = withDbHandle $ \h -> H.queueDb h (\_ _ -> pure True) $
- void $ upsert (Data (toSKey k) (toSInodeCache i)) []
+{- Stats the files, and stores their InodeCaches. -}
+storeInodeCaches :: Key -> [FilePath] -> Annex ()
+storeInodeCaches k fs = withTSDelta $ \d ->
+ addInodeCaches k . catMaybes =<< liftIO (mapM (`genInodeCache` d) fs)
+
+addInodeCaches :: Key -> [InodeCache] -> Annex ()
+addInodeCaches k is = withDbHandle $ \h -> H.queueDb h (\_ _ -> pure True) $
+ forM_ is $ \i -> insertUnique $ Content (toSKey k) (toSInodeCache i)
-getInodeCache :: Key -> Annex (Maybe (InodeCache))
-getInodeCache k = withDbHandle $ \h -> H.queryDb h $ do
+{- A key may have multiple InodeCaches; one for the annex object, and one
+ - for each pointer file that is a copy of it. -}
+getInodeCaches :: Key -> Annex [InodeCache]
+getInodeCaches k = withDbHandle $ \h -> H.queryDb h $ do
l <- select $ from $ \r -> do
- where_ (r ^. DataKey ==. val sk)
- return (r ^. DataInodeCache)
- return $ headMaybe $ map (fromSInodeCache . unValue) l
+ where_ (r ^. ContentKey ==. val sk)
+ return (r ^. ContentCache)
+ return $ map (fromSInodeCache . unValue) l
+ where
+ sk = toSKey k
+
+removeInodeCaches :: Key -> Annex ()
+removeInodeCaches k = withDbHandle $ \h -> H.queueDb h (\_ _ -> pure True) $
+ delete $ from $ \r -> do
+ where_ (r ^. ContentKey ==. val sk)
where
sk = toSKey k
diff --git a/Utility/InodeCache.hs b/Utility/InodeCache.hs
index b5fe9034e..8bd7ae0cd 100644
--- a/Utility/InodeCache.hs
+++ b/Utility/InodeCache.hs
@@ -1,7 +1,7 @@
{- Caching a file's inode, size, and modification time
- to see when it's changed.
-
- - Copyright 2013, 2014 Joey Hess <id@joeyh.name>
+ - Copyright 2013-2014 Joey Hess <id@joeyh.name>
-
- License: BSD-2-clause
-}
diff --git a/doc/todo/smudge.mdwn b/doc/todo/smudge.mdwn
index 60cc65f3f..cc8da67d0 100644
--- a/doc/todo/smudge.mdwn
+++ b/doc/todo/smudge.mdwn
@@ -328,6 +328,8 @@ files to be unlocked, while the indirect upgrades don't touch the files.
* inAnnex check should fail in the case where an annexed object is unlocked
and has had its content changed. Could use an InodeCache for
such objects. This parallels how inAnnex checks work for direct mode.
+* Also, Annex.Content.prepSendAnnex should check the InodeCache for
+ changes.
* Reconcile staged changes into the associated files database, whenever
the database is queried.
* See if the cases where the associated files database is not used can be
@@ -338,17 +340,6 @@ files to be unlocked, while the indirect upgrades don't touch the files.
(when not in direct mode).
However, beware over-optimisation breaking the assistant or perhaps other
long-lived processes.
-* Update pointer files when dropping the content of a key.
- - Check the associated files database to find associated files for the key.
- - Verify that worktree files are not modified from the annexed object.
- How? InodeCache could be maintained, but the smudge filer interface
- wouldn't let it be updated when smudging a file. May need to take
- an expensive path:
- 1. stat object file
- 2. stat worktree file
- 3. if same stat, ok else hash worktree file
- 4. stat worktree file again after checking hash; make sure it's
- unchanged from earlier stat
* Convert `git annex unlock` to stage a pointer file, and hard link to the
annexed object (or write pointer file if annexed object not present).
- Also needs to thaw annex object file