aboutsummaryrefslogtreecommitdiff
path: root/Annex/Ingest.hs
diff options
context:
space:
mode:
authorGravatar Joey Hess <joeyh@joeyh.name>2015-12-22 13:23:33 -0400
committerGravatar Joey Hess <joeyh@joeyh.name>2015-12-22 13:42:58 -0400
commit0da2a837d1634164695591029072b88cd56b46e7 (patch)
tree7c8dea2beeb08007df09c0b6f02d1b7943b7970b /Annex/Ingest.hs
parent0ac35d9b0ea997f9d6051d86889f4bb4df914498 (diff)
refactoring
no behavior changes
Diffstat (limited to 'Annex/Ingest.hs')
-rw-r--r--Annex/Ingest.hs220
1 files changed, 220 insertions, 0 deletions
diff --git a/Annex/Ingest.hs b/Annex/Ingest.hs
new file mode 100644
index 000000000..0fd32a042
--- /dev/null
+++ b/Annex/Ingest.hs
@@ -0,0 +1,220 @@
+{- git-annex content ingestion
+ -
+ - Copyright 2010-2015 Joey Hess <id@joeyh.name>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+{-# LANGUAGE CPP #-}
+
+module Annex.Ingest (
+ lockDown,
+ ingest,
+ finishIngestDirect,
+ addLink,
+ makeLink,
+ restoreFile,
+ forceParams,
+) where
+
+import Common.Annex
+import Types.KeySource
+import Backend
+import Annex.Content
+import Annex.Content.Direct
+import Annex.Perms
+import Annex.Link
+import Annex.MetaData
+import qualified Annex
+import qualified Annex.Queue
+import Config
+import Utility.InodeCache
+import Annex.ReplaceFile
+import Utility.Tmp
+import Utility.CopyFile
+import Annex.InodeSentinal
+import Annex.Version
+#ifdef WITH_CLIBS
+#ifndef __ANDROID__
+import Utility.Touch
+#endif
+#endif
+
+import Control.Exception (IOException)
+
+{- The file that's being ingested is locked down before a key is generated,
+ - to prevent it from being modified in between. This lock down is not
+ - perfect at best (and pretty weak at worst). For example, it does not
+ - guard against files that are already opened for write by another process.
+ - So a KeySource is returned. Its inodeCache can be used to detect any
+ - changes that might be made to the file after it was locked down.
+ -
+ - When possible, the file is hard linked to a temp directory. This guards
+ - against some changes, like deletion or overwrite of the file, and
+ - allows lsof checks to be done more efficiently when adding a lot of files.
+ -
+ - Lockdown can fail if a file gets deleted, and Nothing will be returned.
+ -}
+lockDown :: FilePath -> Annex (Maybe KeySource)
+lockDown = either
+ (\e -> warning (show e) >> return Nothing)
+ (return . Just)
+ <=< lockDown'
+
+lockDown' :: FilePath -> Annex (Either IOException KeySource)
+lockDown' file = ifM crippledFileSystem
+ ( withTSDelta $ liftIO . tryIO . nohardlink
+ , tryIO $ do
+ tmp <- fromRepo gitAnnexTmpMiscDir
+ createAnnexDirectory tmp
+ go tmp
+ )
+ where
+ {- In indirect mode, the write bit is removed from the file as part
+ - of lock down to guard against further writes, and because objects
+ - in the annex have their write bit disabled anyway.
+ -
+ - Freezing the content early also lets us fail early when
+ - someone else owns the file.
+ -
+ - This is not done in direct mode, because files there need to
+ - remain writable at all times.
+ -}
+ go tmp = do
+ unlessM isDirect $
+ freezeContent file
+ withTSDelta $ \delta -> liftIO $ do
+ (tmpfile, h) <- openTempFile tmp $
+ relatedTemplate $ takeFileName file
+ hClose h
+ nukeFile tmpfile
+ withhardlink delta tmpfile `catchIO` const (nohardlink delta)
+ nohardlink delta = do
+ cache <- genInodeCache file delta
+ return KeySource
+ { keyFilename = file
+ , contentLocation = file
+ , inodeCache = cache
+ }
+ withhardlink delta tmpfile = do
+ createLink file tmpfile
+ cache <- genInodeCache tmpfile delta
+ return KeySource
+ { keyFilename = file
+ , contentLocation = tmpfile
+ , inodeCache = cache
+ }
+
+{- Ingests a locked down file into the annex.
+ -
+ - In direct mode, leaves the file alone, and just updates bookkeeping
+ - information.
+ -}
+ingest :: Maybe KeySource -> Annex (Maybe Key, Maybe InodeCache)
+ingest Nothing = return (Nothing, Nothing)
+ingest (Just source) = withTSDelta $ \delta -> do
+ backend <- chooseBackend $ keyFilename source
+ k <- genKey source backend
+ let src = contentLocation source
+ ms <- liftIO $ catchMaybeIO $ getFileStatus src
+ mcache <- maybe (pure Nothing) (liftIO . toInodeCache delta src) ms
+ case (mcache, inodeCache source) of
+ (_, Nothing) -> go k mcache ms
+ (Just newc, Just c) | compareStrong c newc -> go k mcache ms
+ _ -> failure "changed while it was being added"
+ where
+ go k mcache ms = ifM isDirect
+ ( godirect k mcache ms
+ , goindirect k mcache ms
+ )
+
+ goindirect (Just (key, _)) mcache ms = do
+ catchNonAsync (moveAnnex key $ contentLocation source)
+ (restoreFile (keyFilename source) key)
+ maybe noop (genMetaData key (keyFilename source)) ms
+ liftIO $ nukeFile $ keyFilename source
+ return (Just key, mcache)
+ goindirect _ _ _ = failure "failed to generate a key"
+
+ godirect (Just (key, _)) (Just cache) ms = do
+ addInodeCache key cache
+ maybe noop (genMetaData key (keyFilename source)) ms
+ finishIngestDirect key source
+ return (Just key, Just cache)
+ godirect _ _ _ = failure "failed to generate a key"
+
+ failure msg = do
+ warning $ keyFilename source ++ " " ++ msg
+ when (contentLocation source /= keyFilename source) $
+ liftIO $ nukeFile $ contentLocation source
+ return (Nothing, Nothing)
+
+finishIngestDirect :: Key -> KeySource -> Annex ()
+finishIngestDirect key source = do
+ void $ addAssociatedFile key $ keyFilename source
+ when (contentLocation source /= keyFilename source) $
+ liftIO $ nukeFile $ contentLocation source
+
+ {- Copy to any other locations using the same key. -}
+ otherfs <- filter (/= keyFilename source) <$> associatedFiles key
+ forM_ otherfs $
+ addContentWhenNotPresent key (keyFilename source)
+
+{- On error, put the file back so it doesn't seem to have vanished.
+ - This can be called before or after the symlink is in place. -}
+restoreFile :: FilePath -> Key -> SomeException -> Annex a
+restoreFile file key e = do
+ whenM (inAnnex key) $ do
+ liftIO $ nukeFile file
+ -- The key could be used by other files too, so leave the
+ -- content in the annex, and make a copy back to the file.
+ obj <- calcRepo $ gitAnnexLocation key
+ unlessM (liftIO $ copyFileExternal CopyTimeStamps obj file) $
+ warning $ "Unable to restore content of " ++ file ++ "; it should be located in " ++ obj
+ thawContent file
+ throwM e
+
+{- Creates the symlink to the annexed content, returns the link target. -}
+makeLink :: FilePath -> Key -> Maybe InodeCache -> Annex String
+makeLink file key mcache = flip catchNonAsync (restoreFile file key) $ do
+ l <- calcRepo $ gitAnnexLink file key
+ replaceFile file $ makeAnnexLink l
+
+ -- touch symlink to have same time as the original file,
+ -- as provided in the InodeCache
+ case mcache of
+#if defined(WITH_CLIBS) && ! defined(__ANDROID__)
+ Just c -> liftIO $ touch file (TimeSpec $ inodeCacheToMtime c) False
+#else
+ Just _ -> noop
+#endif
+ Nothing -> noop
+
+ return l
+
+{- Creates the symlink to the annexed content, and stages it in git.
+ -
+ - As long as the filesystem supports symlinks, we use
+ - git add, rather than directly staging the symlink to git.
+ - Using git add is best because it allows the queuing to work
+ - and is faster (staging the symlink runs hash-object commands each time).
+ - Also, using git add allows it to skip gitignored files, unless forced
+ - to include them.
+ -}
+addLink :: FilePath -> Key -> Maybe InodeCache -> Annex ()
+addLink file key mcache = ifM (coreSymlinks <$> Annex.getGitConfig)
+ ( do
+ _ <- makeLink file key mcache
+ ps <- forceParams
+ Annex.Queue.addCommand "add" (ps++[Param "--"]) [file]
+ , do
+ l <- makeLink file key mcache
+ addAnnexLink l file
+ )
+
+{- Parameters to pass to git add, forcing addition of ignored files. -}
+forceParams :: Annex [CommandParam]
+forceParams = ifM (Annex.getState Annex.force)
+ ( return [Param "-f"]
+ , return []
+ )