diff options
Diffstat (limited to 'Annex/Ingest.hs')
-rw-r--r-- | Annex/Ingest.hs | 220 |
1 files changed, 220 insertions, 0 deletions
diff --git a/Annex/Ingest.hs b/Annex/Ingest.hs new file mode 100644 index 000000000..0fd32a042 --- /dev/null +++ b/Annex/Ingest.hs @@ -0,0 +1,220 @@ +{- git-annex content ingestion + - + - Copyright 2010-2015 Joey Hess <id@joeyh.name> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +{-# LANGUAGE CPP #-} + +module Annex.Ingest ( + lockDown, + ingest, + finishIngestDirect, + addLink, + makeLink, + restoreFile, + forceParams, +) where + +import Common.Annex +import Types.KeySource +import Backend +import Annex.Content +import Annex.Content.Direct +import Annex.Perms +import Annex.Link +import Annex.MetaData +import qualified Annex +import qualified Annex.Queue +import Config +import Utility.InodeCache +import Annex.ReplaceFile +import Utility.Tmp +import Utility.CopyFile +import Annex.InodeSentinal +import Annex.Version +#ifdef WITH_CLIBS +#ifndef __ANDROID__ +import Utility.Touch +#endif +#endif + +import Control.Exception (IOException) + +{- The file that's being ingested is locked down before a key is generated, + - to prevent it from being modified in between. This lock down is not + - perfect at best (and pretty weak at worst). For example, it does not + - guard against files that are already opened for write by another process. + - So a KeySource is returned. Its inodeCache can be used to detect any + - changes that might be made to the file after it was locked down. + - + - When possible, the file is hard linked to a temp directory. This guards + - against some changes, like deletion or overwrite of the file, and + - allows lsof checks to be done more efficiently when adding a lot of files. + - + - Lockdown can fail if a file gets deleted, and Nothing will be returned. + -} +lockDown :: FilePath -> Annex (Maybe KeySource) +lockDown = either + (\e -> warning (show e) >> return Nothing) + (return . Just) + <=< lockDown' + +lockDown' :: FilePath -> Annex (Either IOException KeySource) +lockDown' file = ifM crippledFileSystem + ( withTSDelta $ liftIO . tryIO . nohardlink + , tryIO $ do + tmp <- fromRepo gitAnnexTmpMiscDir + createAnnexDirectory tmp + go tmp + ) + where + {- In indirect mode, the write bit is removed from the file as part + - of lock down to guard against further writes, and because objects + - in the annex have their write bit disabled anyway. + - + - Freezing the content early also lets us fail early when + - someone else owns the file. + - + - This is not done in direct mode, because files there need to + - remain writable at all times. + -} + go tmp = do + unlessM isDirect $ + freezeContent file + withTSDelta $ \delta -> liftIO $ do + (tmpfile, h) <- openTempFile tmp $ + relatedTemplate $ takeFileName file + hClose h + nukeFile tmpfile + withhardlink delta tmpfile `catchIO` const (nohardlink delta) + nohardlink delta = do + cache <- genInodeCache file delta + return KeySource + { keyFilename = file + , contentLocation = file + , inodeCache = cache + } + withhardlink delta tmpfile = do + createLink file tmpfile + cache <- genInodeCache tmpfile delta + return KeySource + { keyFilename = file + , contentLocation = tmpfile + , inodeCache = cache + } + +{- Ingests a locked down file into the annex. + - + - In direct mode, leaves the file alone, and just updates bookkeeping + - information. + -} +ingest :: Maybe KeySource -> Annex (Maybe Key, Maybe InodeCache) +ingest Nothing = return (Nothing, Nothing) +ingest (Just source) = withTSDelta $ \delta -> do + backend <- chooseBackend $ keyFilename source + k <- genKey source backend + let src = contentLocation source + ms <- liftIO $ catchMaybeIO $ getFileStatus src + mcache <- maybe (pure Nothing) (liftIO . toInodeCache delta src) ms + case (mcache, inodeCache source) of + (_, Nothing) -> go k mcache ms + (Just newc, Just c) | compareStrong c newc -> go k mcache ms + _ -> failure "changed while it was being added" + where + go k mcache ms = ifM isDirect + ( godirect k mcache ms + , goindirect k mcache ms + ) + + goindirect (Just (key, _)) mcache ms = do + catchNonAsync (moveAnnex key $ contentLocation source) + (restoreFile (keyFilename source) key) + maybe noop (genMetaData key (keyFilename source)) ms + liftIO $ nukeFile $ keyFilename source + return (Just key, mcache) + goindirect _ _ _ = failure "failed to generate a key" + + godirect (Just (key, _)) (Just cache) ms = do + addInodeCache key cache + maybe noop (genMetaData key (keyFilename source)) ms + finishIngestDirect key source + return (Just key, Just cache) + godirect _ _ _ = failure "failed to generate a key" + + failure msg = do + warning $ keyFilename source ++ " " ++ msg + when (contentLocation source /= keyFilename source) $ + liftIO $ nukeFile $ contentLocation source + return (Nothing, Nothing) + +finishIngestDirect :: Key -> KeySource -> Annex () +finishIngestDirect key source = do + void $ addAssociatedFile key $ keyFilename source + when (contentLocation source /= keyFilename source) $ + liftIO $ nukeFile $ contentLocation source + + {- Copy to any other locations using the same key. -} + otherfs <- filter (/= keyFilename source) <$> associatedFiles key + forM_ otherfs $ + addContentWhenNotPresent key (keyFilename source) + +{- On error, put the file back so it doesn't seem to have vanished. + - This can be called before or after the symlink is in place. -} +restoreFile :: FilePath -> Key -> SomeException -> Annex a +restoreFile file key e = do + whenM (inAnnex key) $ do + liftIO $ nukeFile file + -- The key could be used by other files too, so leave the + -- content in the annex, and make a copy back to the file. + obj <- calcRepo $ gitAnnexLocation key + unlessM (liftIO $ copyFileExternal CopyTimeStamps obj file) $ + warning $ "Unable to restore content of " ++ file ++ "; it should be located in " ++ obj + thawContent file + throwM e + +{- Creates the symlink to the annexed content, returns the link target. -} +makeLink :: FilePath -> Key -> Maybe InodeCache -> Annex String +makeLink file key mcache = flip catchNonAsync (restoreFile file key) $ do + l <- calcRepo $ gitAnnexLink file key + replaceFile file $ makeAnnexLink l + + -- touch symlink to have same time as the original file, + -- as provided in the InodeCache + case mcache of +#if defined(WITH_CLIBS) && ! defined(__ANDROID__) + Just c -> liftIO $ touch file (TimeSpec $ inodeCacheToMtime c) False +#else + Just _ -> noop +#endif + Nothing -> noop + + return l + +{- Creates the symlink to the annexed content, and stages it in git. + - + - As long as the filesystem supports symlinks, we use + - git add, rather than directly staging the symlink to git. + - Using git add is best because it allows the queuing to work + - and is faster (staging the symlink runs hash-object commands each time). + - Also, using git add allows it to skip gitignored files, unless forced + - to include them. + -} +addLink :: FilePath -> Key -> Maybe InodeCache -> Annex () +addLink file key mcache = ifM (coreSymlinks <$> Annex.getGitConfig) + ( do + _ <- makeLink file key mcache + ps <- forceParams + Annex.Queue.addCommand "add" (ps++[Param "--"]) [file] + , do + l <- makeLink file key mcache + addAnnexLink l file + ) + +{- Parameters to pass to git add, forcing addition of ignored files. -} +forceParams :: Annex [CommandParam] +forceParams = ifM (Annex.getState Annex.force) + ( return [Param "-f"] + , return [] + ) |