summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Annex/Ingest.hs220
-rw-r--r--Assistant/Threads/Committer.hs10
-rw-r--r--Command/Add.hs197
-rw-r--r--Command/AddUnused.hs4
-rw-r--r--Command/AddUrl.hs5
-rw-r--r--Command/Indirect.hs4
-rw-r--r--Command/Lock.hs4
-rw-r--r--Command/ReKey.hs4
-rw-r--r--Types/KeySource.hs4
9 files changed, 239 insertions, 213 deletions
diff --git a/Annex/Ingest.hs b/Annex/Ingest.hs
new file mode 100644
index 000000000..0fd32a042
--- /dev/null
+++ b/Annex/Ingest.hs
@@ -0,0 +1,220 @@
+{- git-annex content ingestion
+ -
+ - Copyright 2010-2015 Joey Hess <id@joeyh.name>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+{-# LANGUAGE CPP #-}
+
+module Annex.Ingest (
+ lockDown,
+ ingest,
+ finishIngestDirect,
+ addLink,
+ makeLink,
+ restoreFile,
+ forceParams,
+) where
+
+import Common.Annex
+import Types.KeySource
+import Backend
+import Annex.Content
+import Annex.Content.Direct
+import Annex.Perms
+import Annex.Link
+import Annex.MetaData
+import qualified Annex
+import qualified Annex.Queue
+import Config
+import Utility.InodeCache
+import Annex.ReplaceFile
+import Utility.Tmp
+import Utility.CopyFile
+import Annex.InodeSentinal
+import Annex.Version
+#ifdef WITH_CLIBS
+#ifndef __ANDROID__
+import Utility.Touch
+#endif
+#endif
+
+import Control.Exception (IOException)
+
+{- The file that's being ingested is locked down before a key is generated,
+ - to prevent it from being modified in between. This lock down is not
+ - perfect at best (and pretty weak at worst). For example, it does not
+ - guard against files that are already opened for write by another process.
+ - So a KeySource is returned. Its inodeCache can be used to detect any
+ - changes that might be made to the file after it was locked down.
+ -
+ - When possible, the file is hard linked to a temp directory. This guards
+ - against some changes, like deletion or overwrite of the file, and
+ - allows lsof checks to be done more efficiently when adding a lot of files.
+ -
+ - Lockdown can fail if a file gets deleted, and Nothing will be returned.
+ -}
+lockDown :: FilePath -> Annex (Maybe KeySource)
+lockDown = either
+ (\e -> warning (show e) >> return Nothing)
+ (return . Just)
+ <=< lockDown'
+
+lockDown' :: FilePath -> Annex (Either IOException KeySource)
+lockDown' file = ifM crippledFileSystem
+ ( withTSDelta $ liftIO . tryIO . nohardlink
+ , tryIO $ do
+ tmp <- fromRepo gitAnnexTmpMiscDir
+ createAnnexDirectory tmp
+ go tmp
+ )
+ where
+ {- In indirect mode, the write bit is removed from the file as part
+ - of lock down to guard against further writes, and because objects
+ - in the annex have their write bit disabled anyway.
+ -
+ - Freezing the content early also lets us fail early when
+ - someone else owns the file.
+ -
+ - This is not done in direct mode, because files there need to
+ - remain writable at all times.
+ -}
+ go tmp = do
+ unlessM isDirect $
+ freezeContent file
+ withTSDelta $ \delta -> liftIO $ do
+ (tmpfile, h) <- openTempFile tmp $
+ relatedTemplate $ takeFileName file
+ hClose h
+ nukeFile tmpfile
+ withhardlink delta tmpfile `catchIO` const (nohardlink delta)
+ nohardlink delta = do
+ cache <- genInodeCache file delta
+ return KeySource
+ { keyFilename = file
+ , contentLocation = file
+ , inodeCache = cache
+ }
+ withhardlink delta tmpfile = do
+ createLink file tmpfile
+ cache <- genInodeCache tmpfile delta
+ return KeySource
+ { keyFilename = file
+ , contentLocation = tmpfile
+ , inodeCache = cache
+ }
+
+{- Ingests a locked down file into the annex.
+ -
+ - In direct mode, leaves the file alone, and just updates bookkeeping
+ - information.
+ -}
+ingest :: Maybe KeySource -> Annex (Maybe Key, Maybe InodeCache)
+ingest Nothing = return (Nothing, Nothing)
+ingest (Just source) = withTSDelta $ \delta -> do
+ backend <- chooseBackend $ keyFilename source
+ k <- genKey source backend
+ let src = contentLocation source
+ ms <- liftIO $ catchMaybeIO $ getFileStatus src
+ mcache <- maybe (pure Nothing) (liftIO . toInodeCache delta src) ms
+ case (mcache, inodeCache source) of
+ (_, Nothing) -> go k mcache ms
+ (Just newc, Just c) | compareStrong c newc -> go k mcache ms
+ _ -> failure "changed while it was being added"
+ where
+ go k mcache ms = ifM isDirect
+ ( godirect k mcache ms
+ , goindirect k mcache ms
+ )
+
+ goindirect (Just (key, _)) mcache ms = do
+ catchNonAsync (moveAnnex key $ contentLocation source)
+ (restoreFile (keyFilename source) key)
+ maybe noop (genMetaData key (keyFilename source)) ms
+ liftIO $ nukeFile $ keyFilename source
+ return (Just key, mcache)
+ goindirect _ _ _ = failure "failed to generate a key"
+
+ godirect (Just (key, _)) (Just cache) ms = do
+ addInodeCache key cache
+ maybe noop (genMetaData key (keyFilename source)) ms
+ finishIngestDirect key source
+ return (Just key, Just cache)
+ godirect _ _ _ = failure "failed to generate a key"
+
+ failure msg = do
+ warning $ keyFilename source ++ " " ++ msg
+ when (contentLocation source /= keyFilename source) $
+ liftIO $ nukeFile $ contentLocation source
+ return (Nothing, Nothing)
+
+finishIngestDirect :: Key -> KeySource -> Annex ()
+finishIngestDirect key source = do
+ void $ addAssociatedFile key $ keyFilename source
+ when (contentLocation source /= keyFilename source) $
+ liftIO $ nukeFile $ contentLocation source
+
+ {- Copy to any other locations using the same key. -}
+ otherfs <- filter (/= keyFilename source) <$> associatedFiles key
+ forM_ otherfs $
+ addContentWhenNotPresent key (keyFilename source)
+
+{- On error, put the file back so it doesn't seem to have vanished.
+ - This can be called before or after the symlink is in place. -}
+restoreFile :: FilePath -> Key -> SomeException -> Annex a
+restoreFile file key e = do
+ whenM (inAnnex key) $ do
+ liftIO $ nukeFile file
+ -- The key could be used by other files too, so leave the
+ -- content in the annex, and make a copy back to the file.
+ obj <- calcRepo $ gitAnnexLocation key
+ unlessM (liftIO $ copyFileExternal CopyTimeStamps obj file) $
+ warning $ "Unable to restore content of " ++ file ++ "; it should be located in " ++ obj
+ thawContent file
+ throwM e
+
+{- Creates the symlink to the annexed content, returns the link target. -}
+makeLink :: FilePath -> Key -> Maybe InodeCache -> Annex String
+makeLink file key mcache = flip catchNonAsync (restoreFile file key) $ do
+ l <- calcRepo $ gitAnnexLink file key
+ replaceFile file $ makeAnnexLink l
+
+ -- touch symlink to have same time as the original file,
+ -- as provided in the InodeCache
+ case mcache of
+#if defined(WITH_CLIBS) && ! defined(__ANDROID__)
+ Just c -> liftIO $ touch file (TimeSpec $ inodeCacheToMtime c) False
+#else
+ Just _ -> noop
+#endif
+ Nothing -> noop
+
+ return l
+
+{- Creates the symlink to the annexed content, and stages it in git.
+ -
+ - As long as the filesystem supports symlinks, we use
+ - git add, rather than directly staging the symlink to git.
+ - Using git add is best because it allows the queuing to work
+ - and is faster (staging the symlink runs hash-object commands each time).
+ - Also, using git add allows it to skip gitignored files, unless forced
+ - to include them.
+ -}
+addLink :: FilePath -> Key -> Maybe InodeCache -> Annex ()
+addLink file key mcache = ifM (coreSymlinks <$> Annex.getGitConfig)
+ ( do
+ _ <- makeLink file key mcache
+ ps <- forceParams
+ Annex.Queue.addCommand "add" (ps++[Param "--"]) [file]
+ , do
+ l <- makeLink file key mcache
+ addAnnexLink l file
+ )
+
+{- Parameters to pass to git add, forcing addition of ignored files. -}
+forceParams :: Annex [CommandParam]
+forceParams = ifM (Annex.getState Annex.force)
+ ( return [Param "-f"]
+ , return []
+ )
diff --git a/Assistant/Threads/Committer.hs b/Assistant/Threads/Committer.hs
index 891df8419..5e8df56c8 100644
--- a/Assistant/Threads/Committer.hs
+++ b/Assistant/Threads/Committer.hs
@@ -21,13 +21,13 @@ import Logs.Transfer
import Logs.Location
import qualified Annex.Queue
import qualified Git.LsFiles
-import qualified Command.Add
import Utility.ThreadScheduler
import qualified Utility.Lsof as Lsof
import qualified Utility.DirWatcher as DirWatcher
import Types.KeySource
import Config
import Annex.Content
+import Annex.Ingest
import Annex.Link
import Annex.CatFile
import Annex.InodeSentinal
@@ -314,7 +314,7 @@ handleAdds havelsof delayadd cs = returnWhen (null incomplete) $ do
doadd = sanitycheck ks $ do
(mkey, mcache) <- liftAnnex $ do
showStart "add" $ keyFilename ks
- Command.Add.ingest $ Just ks
+ ingest $ Just ks
maybe (failedingest change) (done change mcache $ keyFilename ks) mkey
add _ = return Nothing
@@ -344,7 +344,7 @@ handleAdds havelsof delayadd cs = returnWhen (null incomplete) $ do
fastadddirect :: Change -> Key -> Assistant (Maybe Change)
fastadddirect change key = do
let source = keySource change
- liftAnnex $ Command.Add.finishIngestDirect key source
+ liftAnnex $ finishIngestDirect key source
done change Nothing (keyFilename source) key
fastaddunlocked :: Change -> Key -> Assistant (Maybe Change)
@@ -377,7 +377,7 @@ handleAdds havelsof delayadd cs = returnWhen (null incomplete) $ do
, do
link <- ifM isDirect
( calcRepo $ gitAnnexLink file key
- , Command.Add.link file key mcache
+ , makeLink file key mcache
)
whenM (pure DirWatcher.eventsCoalesce <||> isDirect) $
stageSymlink file =<< hashSymlink link
@@ -424,7 +424,7 @@ safeToAdd _ _ [] [] = return []
safeToAdd havelsof delayadd pending inprocess = do
maybe noop (liftIO . threadDelaySeconds) delayadd
liftAnnex $ do
- keysources <- forM pending $ Command.Add.lockDown . changeFile
+ keysources <- forM pending $ lockDown . changeFile
let inprocess' = inprocess ++ mapMaybe mkinprocess (zip pending keysources)
openfiles <- if havelsof
then S.fromList . map fst3 . filter openwrite <$>
diff --git a/Command/Add.hs b/Command/Add.hs
index ab4e3a9d1..b1b830cbc 100644
--- a/Command/Add.hs
+++ b/Command/Add.hs
@@ -5,39 +5,23 @@
- Licensed under the GNU GPL version 3 or higher.
-}
-{-# LANGUAGE CPP #-}
-
module Command.Add where
import Common.Annex
import Command
-import Types.KeySource
-import Backend
+import Annex.Ingest
import Logs.Location
import Annex.Content
import Annex.Content.Direct
-import Annex.Perms
import Annex.Link
-import Annex.MetaData
import qualified Annex
import qualified Annex.Queue
-#ifdef WITH_CLIBS
-#ifndef __ANDROID__
-import Utility.Touch
-#endif
-#endif
import Config
import Utility.InodeCache
import Annex.FileMatcher
-import Annex.ReplaceFile
-import Utility.Tmp
-import Utility.CopyFile
-import Annex.InodeSentinal
import Annex.Version
import qualified Database.Keys
-import Control.Exception (IOException)
-
cmd :: Command
cmd = notBareRepo $ withGlobalOptions (jobsOption : fileMatchingOptions) $
command "add" SectionCommon "add files to annex"
@@ -89,9 +73,6 @@ addFile file = do
Annex.Queue.addCommand "add" (ps++[Param "--"]) [file]
return True
-{- The add subcommand annexes a file, generating a key for it using a
- - backend, and then moving it into the annex directory and setting up
- - the symlink pointing to its content. -}
start :: FilePath -> CommandStart
start file = ifAnnexed file addpresent add
where
@@ -131,188 +112,12 @@ start file = ifAnnexed file addpresent add
void $ addAssociatedFile key file
next $ next $ cleanup file key Nothing =<< inAnnex key
-{- The file that's being added is locked down before a key is generated,
- - to prevent it from being modified in between. This lock down is not
- - perfect at best (and pretty weak at worst). For example, it does not
- - guard against files that are already opened for write by another process.
- - So a KeySource is returned. Its inodeCache can be used to detect any
- - changes that might be made to the file after it was locked down.
- -
- - When possible, the file is hard linked to a temp directory. This guards
- - against some changes, like deletion or overwrite of the file, and
- - allows lsof checks to be done more efficiently when adding a lot of files.
- -
- - Lockdown can fail if a file gets deleted, and Nothing will be returned.
- -}
-lockDown :: FilePath -> Annex (Maybe KeySource)
-lockDown = either
- (\e -> warning (show e) >> return Nothing)
- (return . Just)
- <=< lockDown'
-
-lockDown' :: FilePath -> Annex (Either IOException KeySource)
-lockDown' file = ifM crippledFileSystem
- ( withTSDelta $ liftIO . tryIO . nohardlink
- , tryIO $ do
- tmp <- fromRepo gitAnnexTmpMiscDir
- createAnnexDirectory tmp
- go tmp
- )
- where
- {- In indirect mode, the write bit is removed from the file as part
- - of lock down to guard against further writes, and because objects
- - in the annex have their write bit disabled anyway.
- -
- - Freezing the content early also lets us fail early when
- - someone else owns the file.
- -
- - This is not done in direct mode, because files there need to
- - remain writable at all times.
- -}
- go tmp = do
- unlessM isDirect $
- freezeContent file
- withTSDelta $ \delta -> liftIO $ do
- (tmpfile, h) <- openTempFile tmp $
- relatedTemplate $ takeFileName file
- hClose h
- nukeFile tmpfile
- withhardlink delta tmpfile `catchIO` const (nohardlink delta)
- nohardlink delta = do
- cache <- genInodeCache file delta
- return KeySource
- { keyFilename = file
- , contentLocation = file
- , inodeCache = cache
- }
- withhardlink delta tmpfile = do
- createLink file tmpfile
- cache <- genInodeCache tmpfile delta
- return KeySource
- { keyFilename = file
- , contentLocation = tmpfile
- , inodeCache = cache
- }
-
-{- Ingests a locked down file into the annex.
- -
- - In direct mode, leaves the file alone, and just updates bookkeeping
- - information.
- -}
-ingest :: Maybe KeySource -> Annex (Maybe Key, Maybe InodeCache)
-ingest Nothing = return (Nothing, Nothing)
-ingest (Just source) = withTSDelta $ \delta -> do
- backend <- chooseBackend $ keyFilename source
- k <- genKey source backend
- let src = contentLocation source
- ms <- liftIO $ catchMaybeIO $ getFileStatus src
- mcache <- maybe (pure Nothing) (liftIO . toInodeCache delta src) ms
- case (mcache, inodeCache source) of
- (_, Nothing) -> go k mcache ms
- (Just newc, Just c) | compareStrong c newc -> go k mcache ms
- _ -> failure "changed while it was being added"
- where
- go k mcache ms = ifM isDirect
- ( godirect k mcache ms
- , goindirect k mcache ms
- )
-
- goindirect (Just (key, _)) mcache ms = do
- catchNonAsync (moveAnnex key $ contentLocation source)
- (undo (keyFilename source) key)
- maybe noop (genMetaData key (keyFilename source)) ms
- liftIO $ nukeFile $ keyFilename source
- return (Just key, mcache)
- goindirect _ _ _ = failure "failed to generate a key"
-
- godirect (Just (key, _)) (Just cache) ms = do
- addInodeCache key cache
- maybe noop (genMetaData key (keyFilename source)) ms
- finishIngestDirect key source
- return (Just key, Just cache)
- godirect _ _ _ = failure "failed to generate a key"
-
- failure msg = do
- warning $ keyFilename source ++ " " ++ msg
- when (contentLocation source /= keyFilename source) $
- liftIO $ nukeFile $ contentLocation source
- return (Nothing, Nothing)
-
-finishIngestDirect :: Key -> KeySource -> Annex ()
-finishIngestDirect key source = do
- void $ addAssociatedFile key $ keyFilename source
- when (contentLocation source /= keyFilename source) $
- liftIO $ nukeFile $ contentLocation source
-
- {- Copy to any other locations using the same key. -}
- otherfs <- filter (/= keyFilename source) <$> associatedFiles key
- forM_ otherfs $
- addContentWhenNotPresent key (keyFilename source)
-
perform :: FilePath -> CommandPerform
perform file = lockDown file >>= ingest >>= go
where
go (Just key, cache) = next $ cleanup file key cache True
go (Nothing, _) = stop
-{- On error, put the file back so it doesn't seem to have vanished.
- - This can be called before or after the symlink is in place. -}
-undo :: FilePath -> Key -> SomeException -> Annex a
-undo file key e = do
- whenM (inAnnex key) $ do
- liftIO $ nukeFile file
- -- The key could be used by other files too, so leave the
- -- content in the annex, and make a copy back to the file.
- obj <- calcRepo $ gitAnnexLocation key
- unlessM (liftIO $ copyFileExternal CopyTimeStamps obj file) $
- warning $ "Unable to restore content of " ++ file ++ "; it should be located in " ++ obj
- thawContent file
- throwM e
-
-{- Creates the symlink to the annexed content, returns the link target. -}
-link :: FilePath -> Key -> Maybe InodeCache -> Annex String
-link file key mcache = flip catchNonAsync (undo file key) $ do
- l <- calcRepo $ gitAnnexLink file key
- replaceFile file $ makeAnnexLink l
-
- -- touch symlink to have same time as the original file,
- -- as provided in the InodeCache
- case mcache of
-#if defined(WITH_CLIBS) && ! defined(__ANDROID__)
- Just c -> liftIO $ touch file (TimeSpec $ inodeCacheToMtime c) False
-#else
- Just _ -> noop
-#endif
- Nothing -> noop
-
- return l
-
-{- Creates the symlink to the annexed content, and stages it in git.
- -
- - As long as the filesystem supports symlinks, we use
- - git add, rather than directly staging the symlink to git.
- - Using git add is best because it allows the queuing to work
- - and is faster (staging the symlink runs hash-object commands each time).
- - Also, using git add allows it to skip gitignored files, unless forced
- - to include them.
- -}
-addLink :: FilePath -> Key -> Maybe InodeCache -> Annex ()
-addLink file key mcache = ifM (coreSymlinks <$> Annex.getGitConfig)
- ( do
- _ <- link file key mcache
- ps <- forceParams
- Annex.Queue.addCommand "add" (ps++[Param "--"]) [file]
- , do
- l <- link file key mcache
- addAnnexLink l file
- )
-
-forceParams :: Annex [CommandParam]
-forceParams = ifM (Annex.getState Annex.force)
- ( return [Param "-f"]
- , return []
- )
-
cleanup :: FilePath -> Key -> Maybe InodeCache -> Bool -> CommandCleanup
cleanup file key mcache hascontent = do
ifM (isDirect <&&> pure hascontent)
diff --git a/Command/AddUnused.hs b/Command/AddUnused.hs
index 2b315eada..57fd0cf38 100644
--- a/Command/AddUnused.hs
+++ b/Command/AddUnused.hs
@@ -10,7 +10,7 @@ module Command.AddUnused where
import Common.Annex
import Logs.Location
import Command
-import qualified Command.Add
+import Annex.Ingest
import Command.Unused (withUnusedMaps, UnusedMaps(..), startUnused)
import Types.Key
@@ -31,7 +31,7 @@ start = startUnused "addunused" perform
perform :: Key -> CommandPerform
perform key = next $ do
logStatus key InfoPresent
- Command.Add.addLink file key Nothing
+ addLink file key Nothing
return True
where
file = "unused." ++ key2file key
diff --git a/Command/AddUrl.hs b/Command/AddUrl.hs
index de83d8c9b..659274e49 100644
--- a/Command/AddUrl.hs
+++ b/Command/AddUrl.hs
@@ -14,14 +14,15 @@ import Network.URI
import Common.Annex
import Command
import Backend
-import qualified Command.Add
import qualified Annex
import qualified Annex.Queue
import qualified Annex.Url as Url
import qualified Backend.URL
import qualified Remote
import qualified Types.Remote as Remote
+import qualified Command.Add
import Annex.Content
+import Annex.Ingest
import Annex.UUID
import Logs.Web
import Types.Key
@@ -359,7 +360,7 @@ cleanup u url file key mtmp = case mtmp of
when (isJust mtmp) $
logStatus key InfoPresent
setUrlPresent u key url
- Command.Add.addLink file key Nothing
+ addLink file key Nothing
whenM isDirect $ do
void $ addAssociatedFile key file
{- For moveAnnex to work in direct mode, the symlink
diff --git a/Command/Indirect.hs b/Command/Indirect.hs
index f5234b4dc..06897e292 100644
--- a/Command/Indirect.hs
+++ b/Command/Indirect.hs
@@ -20,7 +20,7 @@ import Annex.Content
import Annex.Content.Direct
import Annex.CatFile
import Annex.Init
-import qualified Command.Add
+import Annex.Ingest
cmd :: Command
cmd = notBareRepo $ noDaemonRunning $
@@ -90,7 +90,7 @@ perform = do
Right _ -> do
l <- calcRepo $ gitAnnexLink f k
liftIO $ createSymbolicLink l f
- Left e -> catchNonAsync (Command.Add.undo f k e)
+ Left e -> catchNonAsync (restoreFile f k e)
warnlocked
showEndOk
diff --git a/Command/Lock.hs b/Command/Lock.hs
index 741c18c15..1be6e9c76 100644
--- a/Command/Lock.hs
+++ b/Command/Lock.hs
@@ -19,7 +19,7 @@ import Annex.Perms
import Annex.ReplaceFile
import Utility.InodeCache
import qualified Database.Keys
-import qualified Command.Add
+import Annex.Ingest
import Logs.Location
cmd :: Command
@@ -60,7 +60,7 @@ startNew file key = ifM (isJust <$> isAnnexLink file)
performNew :: FilePath -> Key -> Bool -> CommandPerform
performNew file key filemodified = do
lockdown =<< calcRepo (gitAnnexLocation key)
- Command.Add.addLink file key
+ addLink file key
=<< withTSDelta (liftIO . genInodeCache file)
next $ cleanupNew file key
where
diff --git a/Command/ReKey.hs b/Command/ReKey.hs
index fe13d4dd4..9fb8515c0 100644
--- a/Command/ReKey.hs
+++ b/Command/ReKey.hs
@@ -12,7 +12,7 @@ import Command
import qualified Annex
import Types.Key
import Annex.Content
-import qualified Command.Add
+import Annex.Ingest
import Logs.Web
import Logs.Location
import Utility.CopyFile
@@ -70,6 +70,6 @@ cleanup file oldkey newkey = do
-- Update symlink to use the new key.
liftIO $ removeFile file
- Command.Add.addLink file newkey Nothing
+ addLink file newkey Nothing
logStatus newkey InfoPresent
return True
diff --git a/Types/KeySource.hs b/Types/KeySource.hs
index 7c2fd13d5..25774588a 100644
--- a/Types/KeySource.hs
+++ b/Types/KeySource.hs
@@ -9,7 +9,7 @@ module Types.KeySource where
import Utility.InodeCache
-{- When content is in the process of being added to the annex,
+{- When content is in the process of being ingested into the annex,
- and a Key generated from it, this data type is used.
-
- The contentLocation may be different from the filename
@@ -19,7 +19,7 @@ import Utility.InodeCache
- of a different Key.
-
- The inodeCache can be used to detect some types of modifications to
- - files that may be made while they're in the process of being added.
+ - files that may be made while they're in the process of being ingested.
-}
data KeySource = KeySource
{ keyFilename :: FilePath