diff options
author | Joey Hess <joey@kitenet.net> | 2014-02-24 14:41:33 -0400 |
---|---|---|
committer | Joey Hess <joey@kitenet.net> | 2014-02-24 14:41:33 -0400 |
commit | 820750d37839e72f449d364224d23f7035d23e2c (patch) | |
tree | 3d1f87db8f2b9e99e9a5cb4074e1f173d0fb2ee7 | |
parent | d56f245b212d6a9ec88ad9d69dee4f0cc22daa5d (diff) |
Preserve metadata when staging a new version of an annexed file.
Performance impact: When adding a large tree of new files, this needs
to do some git cat-file queries to check if any of the files already
existed and might need a metadata copy. I tried a benchmark in a copy
of my sound repository (so there was already a significant git tree
to check against.
Adding 10000 small files, with a cold cache:
before: 1m48.539s
after: 1m52.791s
So, impact is 0.0004 seconds per file added. Which seems acceptable, so did
not add some kind of configuration to enable/disable this.
This commit was sponsored by Lisa Feilen.
-rw-r--r-- | Annex/CatFile.hs | 3 | ||||
-rw-r--r-- | Annex/MetaData.hs | 30 | ||||
-rw-r--r-- | Command/Add.hs | 4 | ||||
-rw-r--r-- | Logs/MetaData.hs | 19 | ||||
-rw-r--r-- | debian/changelog | 1 |
5 files changed, 42 insertions, 15 deletions
diff --git a/Annex/CatFile.hs b/Annex/CatFile.hs index 54a4d1099..87d179a62 100644 --- a/Annex/CatFile.hs +++ b/Annex/CatFile.hs @@ -87,8 +87,7 @@ catKey' modeguaranteed ref mode | modeguaranteed = catObject ref | otherwise = L.take 8192 <$> catObject ref -{- Looks up the file mode corresponding to the Ref using the running - - cat-file. +{- Looks up the key corresponding to the Ref using the running cat-file. - - Currently this always has to look in HEAD, because cat-file --batch - does not offer a way to specify that we want to look up a tree object diff --git a/Annex/MetaData.hs b/Annex/MetaData.hs index ef235b51f..b7850a868 100644 --- a/Annex/MetaData.hs +++ b/Annex/MetaData.hs @@ -11,6 +11,7 @@ import Common.Annex import qualified Annex import Types.MetaData import Logs.MetaData +import Annex.CatFile import qualified Data.Set as S import qualified Data.Map as M @@ -27,18 +28,27 @@ yearMetaField = MetaField "year" monthMetaField :: MetaField monthMetaField = MetaField "month" -{- Generates metadata for a file that has just been ingested into the - - annex. Passed the FileStatus of the content file. +{- Adds metadata for a file that has just been ingested into the + - annex, but has not yet been committed to git. - - - Does not overwrite any existing metadata values for the key. + - When the file has been modified, the metadata is copied over + - from the old key to the new key. Note that it looks at the old key as + - committed to HEAD -- the new key may or may not have already been staged + - in th annex. + - + - Also, can generate new metadata, if configured to do so. -} -genMetaData :: Key -> FileStatus -> Annex () -genMetaData key status = whenM (annexGenMetaData <$> Annex.getGitConfig) $ do - metadata <- getCurrentMetaData key - let metadata' = genMetaData' status metadata - unless (metadata' == emptyMetaData) $ - addMetaData key metadata' - +genMetaData :: Key -> FilePath -> FileStatus -> Annex () +genMetaData key file status = do + maybe noop (flip copyMetaData key) =<< catKeyFileHEAD file + whenM (annexGenMetaData <$> Annex.getGitConfig) $ do + metadata <- getCurrentMetaData key + let metadata' = genMetaData' status metadata + unless (metadata' == emptyMetaData) $ + addMetaData key metadata' + +{- Generates metadata from the FileStatus. + - Does not overwrite any existing metadata values. -} genMetaData' :: FileStatus -> MetaData -> MetaData genMetaData' status old = MetaData $ M.fromList $ filter isnew [ (yearMetaField, S.singleton $ toMetaValue $ show y) diff --git a/Command/Add.hs b/Command/Add.hs index 0906ae531..662ce4242 100644 --- a/Command/Add.hs +++ b/Command/Add.hs @@ -161,14 +161,14 @@ ingest (Just source) = do goindirect (Just (key, _)) mcache ms = do catchAnnex (moveAnnex key $ contentLocation source) (undo (keyFilename source) key) - maybe noop (genMetaData key) ms + maybe noop (genMetaData key (keyFilename source)) ms liftIO $ nukeFile $ keyFilename source return $ (Just key, mcache) goindirect _ _ _ = failure "failed to generate a key" godirect (Just (key, _)) (Just cache) ms = do addInodeCache key cache - maybe noop (genMetaData key) ms + maybe noop (genMetaData key (keyFilename source)) ms finishIngestDirect key source return $ (Just key, Just cache) godirect _ _ _ = failure "failed to generate a key" diff --git a/Logs/MetaData.hs b/Logs/MetaData.hs index 63314bcef..6702c3733 100644 --- a/Logs/MetaData.hs +++ b/Logs/MetaData.hs @@ -28,10 +28,10 @@ module Logs.MetaData ( getCurrentMetaData, - getMetaData, addMetaData, addMetaData', currentMetaData, + copyMetaData, ) where import Common.Annex @@ -135,3 +135,20 @@ simplifyLog s = case sl of where older = value l unique = older `differenceMetaData` newer + +{- Copies the metadata from the old key to the new key. + - + - The exact content of the metadata file is copied, so that the timestamps + - remain the same, and because this is more space-efficient in the git + - repository. + - + - Any metadata already attached to the new key is not preserved. + -} +copyMetaData :: Key -> Key -> Annex () +copyMetaData oldkey newkey + | oldkey == newkey = noop + | otherwise = do + l <- getMetaData oldkey + unless (S.null l) $ + Annex.Branch.change (metaDataLogFile newkey) $ + const $ showLog l diff --git a/debian/changelog b/debian/changelog index 543504c12..8c157aeb2 100644 --- a/debian/changelog +++ b/debian/changelog @@ -11,6 +11,7 @@ git-annex (5.20140222) UNRELEASED; urgency=medium tag/showname. * annex.genmetadata can be set to make git-annex automatically set metadata (year and month) when adding files. + * Preserve metadata when staging a new version of an annexed file. * metadata: Field names limited to alphanumerics and a few whitelisted punctuation characters to avoid issues with views, etc. * metadata: Support --json |