diff options
35 files changed, 526 insertions, 93 deletions
@@ -128,7 +128,7 @@ newState gitrepo = AnnexState {- Makes an Annex state object for the specified git repo. - Ensures the config is read, if it was not already. -} new :: Git.Repo -> IO AnnexState -new gitrepo = newState <$> Git.Config.read gitrepo +new = newState <$$> Git.Config.read {- performs an action in the Annex monad -} run :: AnnexState -> Annex a -> IO (a, AnnexState) diff --git a/Command/Fsck.hs b/Command/Fsck.hs index 7bfc46f4a..10cca489b 100644 --- a/Command/Fsck.hs +++ b/Command/Fsck.hs @@ -94,7 +94,7 @@ performRemote key file backend numcopies remote = ( return True , ifM (Annex.getState Annex.fast) ( return False - , Remote.retrieveKeyFile remote key tmp + , Remote.retrieveKeyFile remote key Nothing tmp ) ) diff --git a/Command/Get.hs b/Command/Get.hs index c4ba48312..a5901ba66 100644 --- a/Command/Get.hs +++ b/Command/Get.hs @@ -12,6 +12,7 @@ import Command import qualified Remote import Annex.Content import qualified Command.Move +import Logs.Transfer def :: [Command] def = [withOptions [Command.Move.fromOption] $ command "get" paramPaths seek @@ -25,24 +26,24 @@ start :: Maybe Remote -> FilePath -> (Key, Backend) -> CommandStart start from file (key, _) = stopUnless (not <$> inAnnex key) $ autoCopies file key (<) $ \_numcopies -> case from of - Nothing -> go $ perform key + Nothing -> go $ perform key file Just src -> -- get --from = copy --from stopUnless (Command.Move.fromOk src key) $ - go $ Command.Move.fromPerform src False key + go $ Command.Move.fromPerform src False key file where go a = do showStart "get" file - next a + next a -perform :: Key -> CommandPerform -perform key = stopUnless (getViaTmp key $ getKeyFile key) $ +perform :: Key -> FilePath -> CommandPerform +perform key file = stopUnless (getViaTmp key $ getKeyFile key file) $ next $ return True -- no cleanup needed {- Try to find a copy of the file in one of the remotes, - and copy it to here. -} -getKeyFile :: Key -> FilePath -> Annex Bool -getKeyFile key file = dispatch =<< Remote.keyPossibilities key +getKeyFile :: Key -> FilePath -> FilePath -> Annex Bool +getKeyFile key file dest = dispatch =<< Remote.keyPossibilities key where dispatch [] = do showNote "not available" @@ -64,7 +65,7 @@ getKeyFile key file = dispatch =<< Remote.keyPossibilities key | Remote.hasKeyCheap r = either (const False) id <$> Remote.hasKey r key | otherwise = return True - docopy r continue = do + docopy r continue = download (Remote.uuid r) key (Just file) $ do showAction $ "from " ++ Remote.name r - ifM (Remote.retrieveKeyFile r key file) + ifM (Remote.retrieveKeyFile r key (Just file) dest) ( return True , continue) diff --git a/Command/Move.hs b/Command/Move.hs index 6ec7cd90a..e7c11e80d 100644 --- a/Command/Move.hs +++ b/Command/Move.hs @@ -16,6 +16,7 @@ import qualified Remote import Annex.UUID import qualified Option import Logs.Presence +import Logs.Transfer def :: [Command] def = [withOptions options $ command "move" paramPaths seek @@ -68,9 +69,9 @@ toStart dest move file key = do then stop -- not here, so nothing to do else do showMoveAction move file - next $ toPerform dest move key -toPerform :: Remote -> Bool -> Key -> CommandPerform -toPerform dest move key = moveLock move key $ do + next $ toPerform dest move key file +toPerform :: Remote -> Bool -> Key -> FilePath -> CommandPerform +toPerform dest move key file = moveLock move key $ do -- Checking the remote is expensive, so not done in the start step. -- In fast mode, location tracking is assumed to be correct, -- and an explicit check is not done, when copying. When moving, @@ -88,7 +89,8 @@ toPerform dest move key = moveLock move key $ do stop Right False -> do showAction $ "to " ++ Remote.name dest - ok <- Remote.storeKey dest key + ok <- upload (Remote.uuid dest) key (Just file) $ + Remote.storeKey dest key (Just file) if ok then finish else do @@ -118,7 +120,7 @@ fromStart src move file key where go = stopUnless (fromOk src key) $ do showMoveAction move file - next $ fromPerform src move key + next $ fromPerform src move key file fromOk :: Remote -> Key -> Annex Bool fromOk src key | Remote.hasKeyCheap src = @@ -129,13 +131,14 @@ fromOk src key u <- getUUID remotes <- Remote.keyPossibilities key return $ u /= Remote.uuid src && elem src remotes -fromPerform :: Remote -> Bool -> Key -> CommandPerform -fromPerform src move key = moveLock move key $ +fromPerform :: Remote -> Bool -> Key -> FilePath -> CommandPerform +fromPerform src move key file = moveLock move key $ ifM (inAnnex key) ( handle move True - , do + , download (Remote.uuid src) key (Just file) $ do showAction $ "from " ++ Remote.name src - ok <- getViaTmp key $ Remote.retrieveKeyFile src key + ok <- getViaTmp key $ + Remote.retrieveKeyFile src key (Just file) handle move ok ) where diff --git a/Command/Status.hs b/Command/Status.hs index 2540a92da..eff21bb50 100644 --- a/Command/Status.hs +++ b/Command/Status.hs @@ -31,6 +31,7 @@ import Logs.Trust import Remote import Config import Utility.Percentage +import Logs.Transfer -- a named computation that produces a statistic type Stat = StatState (Maybe (String, StatState String)) @@ -70,6 +71,7 @@ fast_stats = , remote_list SemiTrusted "semitrusted" , remote_list UnTrusted "untrusted" , remote_list DeadTrusted "dead" + , transfer_list , disk_size ] slow_stats :: [Stat] @@ -170,6 +172,24 @@ bloom_info = stat "bloom filter size" $ json id $ do return $ size ++ note +transfer_list :: Stat +transfer_list = stat "transfers in progress" $ nojson $ lift $ do + uuidmap <- Remote.remoteMap id + ts <- getTransfers + if null ts + then return "none" + else return $ pp uuidmap "" $ sort ts + where + pp _ c [] = c + pp uuidmap c ((t, i):xs) = "\n\t" ++ line uuidmap t i ++ pp uuidmap c xs + line uuidmap t i = unwords + [ show (transferDirection t) ++ "ing" + , fromMaybe (show $ transferKey t) (associatedFile i) + , if transferDirection t == Upload then "to" else "from" + , maybe (fromUUID $ transferRemote t) Remote.name $ + M.lookup (transferRemote t) uuidmap + ] + disk_size :: Stat disk_size = stat "available local disk space" $ json id $ lift $ calcfree @@ -26,6 +26,7 @@ import Utility.SafeCommand as X import Utility.Path as X import Utility.Directory as X import Utility.Monad as X +import Utility.Applicative as X import Utility.FileSystemEncoding as X import Utility.PartialPrelude as X diff --git a/Locations.hs b/Locations.hs index cd3f55d46..082a72a50 100644 --- a/Locations.hs +++ b/Locations.hs @@ -18,6 +18,7 @@ module Locations ( gitAnnexBadDir, gitAnnexBadLocation, gitAnnexUnusedLog, + gitAnnexTransferDir, gitAnnexJournalDir, gitAnnexJournalLock, gitAnnexIndex, @@ -127,6 +128,11 @@ gitAnnexBadLocation key r = gitAnnexBadDir r </> keyFile key gitAnnexUnusedLog :: FilePath -> Git.Repo -> FilePath gitAnnexUnusedLog prefix r = gitAnnexDir r </> (prefix ++ "unused") +{- .git/annex/transfer/ is used is used to record keys currently + - being transferred. -} +gitAnnexTransferDir :: Git.Repo -> FilePath +gitAnnexTransferDir r = addTrailingPathSeparator $ gitAnnexDir r </> "transfer" + {- .git/annex/journal/ is used to journal changes made to the git-annex - branch -} gitAnnexJournalDir :: Git.Repo -> FilePath diff --git a/Logs/Presence.hs b/Logs/Presence.hs index 933426718..e75e1e4e6 100644 --- a/Logs/Presence.hs +++ b/Logs/Presence.hs @@ -48,7 +48,7 @@ addLog file line = Annex.Branch.change file $ \s -> {- Reads a log file. - Note that the LogLines returned may be in any order. -} readLog :: FilePath -> Annex [LogLine] -readLog file = parseLog <$> Annex.Branch.get file +readLog = parseLog <$$> Annex.Branch.get {- Parses a log file. Unparseable lines are ignored. -} parseLog :: String -> [LogLine] diff --git a/Logs/Transfer.hs b/Logs/Transfer.hs new file mode 100644 index 000000000..526241f93 --- /dev/null +++ b/Logs/Transfer.hs @@ -0,0 +1,167 @@ +{- git-annex transfer information files + - + - Copyright 2012 Joey Hess <joey@kitenet.net> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Logs.Transfer where + +import Common.Annex +import Annex.Perms +import Annex.Exception +import qualified Git +import Types.Remote + +import Control.Concurrent +import System.Posix.Types +import Data.Time.Clock + +{- Enough information to uniquely identify a transfer, used as the filename + - of the transfer information file. -} +data Transfer = Transfer + { transferDirection :: Direction + , transferRemote :: UUID + , transferKey :: Key + } + deriving (Show, Eq, Ord) + +{- Information about a Transfer, stored in the transfer information file. -} +data TransferInfo = TransferInfo + { startedTime :: UTCTime + , transferPid :: Maybe ProcessID + , transferThread :: Maybe ThreadId + , bytesComplete :: Maybe Integer + , associatedFile :: Maybe FilePath + } + deriving (Show, Eq, Ord) + +data Direction = Upload | Download + deriving (Eq, Ord) + +instance Show Direction where + show Upload = "upload" + show Download = "download" + +readDirection :: String -> Maybe Direction +readDirection "upload" = Just Upload +readDirection "download" = Just Download +readDirection _ = Nothing + +upload :: UUID -> Key -> AssociatedFile -> Annex a -> Annex a +upload u key file a = transfer (Transfer Upload u key) file a + +download :: UUID -> Key -> AssociatedFile -> Annex a -> Annex a +download u key file a = transfer (Transfer Download u key) file a + +{- Runs a transfer action. Creates and locks the transfer information file + - while the action is running. Will throw an error if the transfer is + - already in progress. + -} +transfer :: Transfer -> Maybe FilePath -> Annex a -> Annex a +transfer t file a = do + tfile <- fromRepo $ transferFile t + createAnnexDirectory $ takeDirectory tfile + mode <- annexFileMode + info <- liftIO $ TransferInfo + <$> getCurrentTime + <*> pure Nothing -- pid not stored in file, so omitted for speed + <*> pure Nothing -- threadid not stored in file, so omitted for speed + <*> pure Nothing -- not 0; transfer may be resuming + <*> pure file + bracketIO (prep tfile mode info) (cleanup tfile) a + where + prep tfile mode info = do + fd <- openFd tfile ReadWrite (Just mode) + defaultFileFlags { trunc = True } + locked <- catchMaybeIO $ + setLock fd (WriteLock, AbsoluteSeek, 0, 0) + when (locked == Nothing) $ + error $ "transfer already in progress" + h <- fdToHandle fd + hPutStr h $ writeTransferInfo info + hFlush h + return h + cleanup tfile h = do + removeFile tfile + hClose h + +{- If a transfer is still running, returns its TransferInfo. -} +checkTransfer :: Transfer -> Annex (Maybe TransferInfo) +checkTransfer t = do + mode <- annexFileMode + tfile <- fromRepo $ transferFile t + mfd <- liftIO $ catchMaybeIO $ + openFd tfile ReadOnly (Just mode) defaultFileFlags + case mfd of + Nothing -> return Nothing -- failed to open file; not running + Just fd -> do + locked <- liftIO $ + getLock fd (WriteLock, AbsoluteSeek, 0, 0) + case locked of + Nothing -> do + liftIO $ closeFd fd + return Nothing + Just (pid, _) -> liftIO $ do + h <- fdToHandle fd + info <- readTransferInfo pid + <$> hGetContentsStrict h + hClose h + return info + +{- Gets all currently running transfers. -} +getTransfers :: Annex [(Transfer, TransferInfo)] +getTransfers = do + transfers <- catMaybes . map parseTransferFile <$> findfiles + infos <- mapM checkTransfer transfers + return $ map (\(t, Just i) -> (t, i)) $ + filter running $ zip transfers infos + where + findfiles = liftIO . dirContentsRecursive + =<< fromRepo gitAnnexTransferDir + running (_, i) = isJust i + +{- The transfer information file to use for a given Transfer. -} +transferFile :: Transfer -> Git.Repo -> FilePath +transferFile (Transfer direction u key) r = gitAnnexTransferDir r + </> show direction + </> fromUUID u + </> keyFile key + +{- Parses a transfer information filename to a Transfer. -} +parseTransferFile :: FilePath -> Maybe Transfer +parseTransferFile file = + case drop (length bits - 3) bits of + [direction, u, key] -> Transfer + <$> readDirection direction + <*> pure (toUUID u) + <*> fileKey key + _ -> Nothing + where + bits = splitDirectories file + +writeTransferInfo :: TransferInfo -> String +writeTransferInfo info = unlines + -- transferPid is not included; instead obtained by looking at + -- the process that locks the file. + -- transferThread is not included; not relevant for other processes + [ show $ startedTime info + -- bytesComplete is not included; changes too fast + , fromMaybe "" $ associatedFile info -- comes last; arbitrary content + ] + +readTransferInfo :: ProcessID -> String -> Maybe TransferInfo +readTransferInfo pid s = + case bits of + [time] -> TransferInfo + <$> readish time + <*> pure (Just pid) + <*> pure Nothing + <*> pure Nothing + <*> pure filename + _ -> Nothing + where + (bits, filebits) = splitAt 1 $ lines s + filename + | null filebits = Nothing + | otherwise = Just $ join "\n" filebits diff --git a/Remote/Bup.hs b/Remote/Bup.hs index f1a36e468..0d1b606d3 100644 --- a/Remote/Bup.hs +++ b/Remote/Bup.hs @@ -108,8 +108,8 @@ bupSplitParams r buprepo k src = do return $ bupParams "split" buprepo (os ++ [Param "-n", Param (bupRef k), src]) -store :: Git.Repo -> BupRepo -> Key -> Annex Bool -store r buprepo k = do +store :: Git.Repo -> BupRepo -> Key -> AssociatedFile -> Annex Bool +store r buprepo k _f = do src <- inRepo $ gitAnnexLocation k params <- bupSplitParams r buprepo k (File src) liftIO $ boolSystem "bup" params @@ -122,11 +122,11 @@ storeEncrypted r buprepo (cipher, enck) k = do withEncryptedHandle cipher (L.readFile src) $ \h -> pipeBup params (Just h) Nothing -retrieve :: BupRepo -> Key -> FilePath -> Annex Bool -retrieve buprepo k f = do +retrieve :: BupRepo -> Key -> AssociatedFile -> FilePath -> Annex Bool +retrieve buprepo k _f d = do let params = bupParams "join" buprepo [Param $ bupRef k] liftIO $ catchBoolIO $ do - tofile <- openFile f WriteMode + tofile <- openFile d WriteMode pipeBup params Nothing (Just tofile) retrieveCheap :: BupRepo -> Key -> FilePath -> Annex Bool diff --git a/Remote/Directory.hs b/Remote/Directory.hs index a5b0ff2a2..6b158730e 100644 --- a/Remote/Directory.hs +++ b/Remote/Directory.hs @@ -122,8 +122,8 @@ withCheckedFiles check (Just _) d k a = go $ locations d k withStoredFiles :: ChunkSize -> FilePath -> Key -> ([FilePath] -> IO Bool) -> IO Bool withStoredFiles = withCheckedFiles doesFileExist -store :: FilePath -> ChunkSize -> Key -> Annex Bool -store d chunksize k = do +store :: FilePath -> ChunkSize -> Key -> AssociatedFile -> Annex Bool +store d chunksize k _f = do src <- inRepo $ gitAnnexLocation k metered k $ \meterupdate -> storeHelper d chunksize k $ \dests -> @@ -242,8 +242,8 @@ storeHelper d chunksize key a = prep <&&> check <&&> go preventWrite dir return (not $ null stored) -retrieve :: FilePath -> ChunkSize -> Key -> FilePath -> Annex Bool -retrieve d chunksize k f = metered k $ \meterupdate -> +retrieve :: FilePath -> ChunkSize -> Key -> AssociatedFile -> FilePath -> Annex Bool +retrieve d chunksize k _ f = metered k $ \meterupdate -> liftIO $ withStoredFiles chunksize d k $ \files -> catchBoolIO $ do meteredWriteFile' meterupdate f files feeder @@ -272,7 +272,7 @@ retrieveCheap d _ k f = liftIO $ withStoredFiles Nothing d k go remove :: FilePath -> ChunkSize -> Key -> Annex Bool remove d chunksize k = liftIO $ withStoredFiles chunksize d k go where - go files = all id <$> mapM removefile files + go = all id <$$> mapM removefile removefile file = catchBoolIO $ do let dir = parentDir file allowWrite dir diff --git a/Remote/Git.hs b/Remote/Git.hs index 60a881803..0b839c9a5 100644 --- a/Remote/Git.hs +++ b/Remote/Git.hs @@ -21,6 +21,7 @@ import qualified Git.Config import qualified Git.Construct import qualified Annex import Logs.Presence +import Logs.Transfer import Annex.UUID import qualified Annex.Content import qualified Annex.BranchState @@ -219,14 +220,19 @@ dropKey r key ] {- Tries to copy a key's content from a remote's annex to a file. -} -copyFromRemote :: Git.Repo -> Key -> FilePath -> Annex Bool -copyFromRemote r key file +copyFromRemote :: Git.Repo -> Key -> AssociatedFile -> FilePath -> Annex Bool +copyFromRemote r key file dest | not $ Git.repoIsUrl r = guardUsable r False $ do params <- rsyncParams r - loc <- liftIO $ gitAnnexLocation key r - rsyncOrCopyFile params loc file - | Git.repoIsSsh r = rsyncHelper =<< rsyncParamsRemote r True key file - | Git.repoIsHttp r = Annex.Content.downloadUrl (keyUrls r key) file + u <- getUUID + -- run copy from perspective of remote + liftIO $ onLocal r $ do + ensureInitialized + loc <- inRepo $ gitAnnexLocation key + upload u key file $ + rsyncOrCopyFile params loc dest + | Git.repoIsSsh r = rsyncHelper =<< rsyncParamsRemote r True key dest + | Git.repoIsHttp r = Annex.Content.downloadUrl (keyUrls r key) dest | otherwise = error "copying from non-ssh, non-http repo not supported" copyFromRemoteCheap :: Git.Repo -> Key -> FilePath -> Annex Bool @@ -236,23 +242,25 @@ copyFromRemoteCheap r key file liftIO $ catchBoolIO $ createSymbolicLink loc file >> return True | Git.repoIsSsh r = ifM (Annex.Content.preseedTmp key file) - ( copyFromRemote r key file + ( copyFromRemote r key Nothing file , return False ) | otherwise = return False {- Tries to copy a key's content to a remote's annex. -} -copyToRemote :: Git.Repo -> Key -> Annex Bool -copyToRemote r key +copyToRemote :: Git.Repo -> Key -> AssociatedFile -> Annex Bool +copyToRemote r key file | not $ Git.repoIsUrl r = guardUsable r False $ commitOnCleanup r $ do keysrc <- inRepo $ gitAnnexLocation key params <- rsyncParams r + u <- getUUID -- run copy from perspective of remote liftIO $ onLocal r $ do ensureInitialized - Annex.Content.saveState True `after` - Annex.Content.getViaTmp key - (rsyncOrCopyFile params keysrc) + download u key file $ + Annex.Content.saveState True `after` + Annex.Content.getViaTmp key + (rsyncOrCopyFile params keysrc) | Git.repoIsSsh r = commitOnCleanup r $ do keysrc <- inRepo $ gitAnnexLocation key rsyncHelper =<< rsyncParamsRemote r False key keysrc diff --git a/Remote/Helper/Encryptable.hs b/Remote/Helper/Encryptable.hs index 789a1d996..6d5405d9e 100644 --- a/Remote/Helper/Encryptable.hs +++ b/Remote/Helper/Encryptable.hs @@ -59,14 +59,14 @@ encryptableRemote c storeKeyEncrypted retrieveKeyFileEncrypted r = cost = cost r + encryptedRemoteCostAdj } where - store k = cip k >>= maybe - (storeKey r k) + store k f = cip k >>= maybe + (storeKey r k f) (`storeKeyEncrypted` k) - retrieve k f = cip k >>= maybe - (retrieveKeyFile r k f) - (\enck -> retrieveKeyFileEncrypted enck k f) - retrieveCheap k f = cip k >>= maybe - (retrieveKeyFileCheap r k f) + retrieve k f d = cip k >>= maybe + (retrieveKeyFile r k f d) + (\enck -> retrieveKeyFileEncrypted enck k d) + retrieveCheap k d = cip k >>= maybe + (retrieveKeyFileCheap r k d) (\_ -> return False) withkey a k = cip k >>= maybe (a k) (a . snd) cip = cipherKey c diff --git a/Remote/Helper/Hooks.hs b/Remote/Helper/Hooks.hs index d85959062..0a6b22081 100644 --- a/Remote/Helper/Hooks.hs +++ b/Remote/Helper/Hooks.hs @@ -27,8 +27,8 @@ addHooks' r Nothing Nothing = r addHooks' r starthook stophook = r' where r' = r - { storeKey = \k -> wrapper $ storeKey r k - , retrieveKeyFile = \k f -> wrapper $ retrieveKeyFile r k f + { storeKey = \k f -> wrapper $ storeKey r k f + , retrieveKeyFile = \k f d -> wrapper $ retrieveKeyFile r k f d , retrieveKeyFileCheap = \k f -> wrapper $ retrieveKeyFileCheap r k f , removeKey = \k -> wrapper $ removeKey r k , hasKey = \k -> wrapper $ hasKey r k diff --git a/Remote/Hook.hs b/Remote/Hook.hs index 5fb793e65..9e8d3c620 100644 --- a/Remote/Hook.hs +++ b/Remote/Hook.hs @@ -101,8 +101,8 @@ runHook hooktype hook k f a = maybe (return False) run =<< lookupHook hooktype h return False ) -store :: String -> Key -> Annex Bool -store h k = do +store :: String -> Key -> AssociatedFile -> Annex Bool +store h k _f = do src <- inRepo $ gitAnnexLocation k runHook h "store" k (Just src) $ return True @@ -112,8 +112,8 @@ storeEncrypted h (cipher, enck) k = withTmp enck $ \tmp -> do liftIO $ withEncryptedContent cipher (L.readFile src) $ L.writeFile tmp runHook h "store" enck (Just tmp) $ return True -retrieve :: String -> Key -> FilePath -> Annex Bool -retrieve h k f = runHook h "retrieve" k (Just f) $ return True +retrieve :: String -> Key -> AssociatedFile -> FilePath -> Annex Bool +retrieve h k _f d = runHook h "retrieve" k (Just d) $ return True retrieveCheap :: String -> Key -> FilePath -> Annex Bool retrieveCheap _ _ _ = return False diff --git a/Remote/Rsync.hs b/Remote/Rsync.hs index 6207e1425..887c68339 100644 --- a/Remote/Rsync.hs +++ b/Remote/Rsync.hs @@ -99,8 +99,8 @@ rsyncUrls o k = map use annexHashes use h = rsyncUrl o </> h k </> rsyncEscape o (f </> f) f = keyFile k -store :: RsyncOpts -> Key -> Annex Bool -store o k = rsyncSend o k <=< inRepo $ gitAnnexLocation k +store :: RsyncOpts -> Key -> AssociatedFile -> Annex Bool +store o k _f = rsyncSend o k <=< inRepo $ gitAnnexLocation k storeEncrypted :: RsyncOpts -> (Cipher, Key) -> Key -> Annex Bool storeEncrypted o (cipher, enck) k = withTmp enck $ \tmp -> do @@ -108,8 +108,8 @@ storeEncrypted o (cipher, enck) k = withTmp enck $ \tmp -> do liftIO $ withEncryptedContent cipher (L.readFile src) $ L.writeFile tmp rsyncSend o enck tmp -retrieve :: RsyncOpts -> Key -> FilePath -> Annex Bool -retrieve o k f = untilTrue (rsyncUrls o k) $ \u -> rsyncRemote o +retrieve :: RsyncOpts -> Key -> AssociatedFile -> FilePath -> Annex Bool +retrieve o k _ f = untilTrue (rsyncUrls o k) $ \u -> rsyncRemote o -- use inplace when retrieving to support resuming [ Param "--inplace" , Param u @@ -117,11 +117,11 @@ retrieve o k f = untilTrue (rsyncUrls o k) $ \u -> rsyncRemote o ] retrieveCheap :: RsyncOpts -> Key -> FilePath -> Annex Bool -retrieveCheap o k f = ifM (preseedTmp k f) ( retrieve o k f , return False ) +retrieveCheap o k f = ifM (preseedTmp k f) ( retrieve o k undefined f , return False ) retrieveEncrypted :: RsyncOpts -> (Cipher, Key) -> Key -> FilePath -> Annex Bool retrieveEncrypted o (cipher, enck) _ f = withTmp enck $ \tmp -> do - ifM (retrieve o enck tmp) + ifM (retrieve o enck undefined tmp) ( liftIO $ catchBoolIO $ do withDecryptedContent cipher (L.readFile tmp) $ L.writeFile f return True diff --git a/Remote/S3.hs b/Remote/S3.hs index 18d4915dc..dca08fff8 100644 --- a/Remote/S3.hs +++ b/Remote/S3.hs @@ -113,8 +113,8 @@ s3Setup u c = handlehost $ M.lookup "host" c -- be human-readable M.delete "bucket" defaults -store :: Remote -> Key -> Annex Bool -store r k = s3Action r False $ \(conn, bucket) -> do +store :: Remote -> Key -> AssociatedFile -> Annex Bool +store r k _f = s3Action r False $ \(conn, bucket) -> do dest <- inRepo $ gitAnnexLocation k res <- liftIO $ storeHelper (conn, bucket) r k dest s3Bool res @@ -149,12 +149,12 @@ storeHelper (conn, bucket) r k file = do xheaders = filter isxheader $ M.assocs $ fromJust $ config r isxheader (h, _) = "x-amz-" `isPrefixOf` h -retrieve :: Remote -> Key -> FilePath -> Annex Bool -retrieve r k f = s3Action r False $ \(conn, bucket) -> do +retrieve :: Remote -> Key -> AssociatedFile -> FilePath -> Annex Bool +retrieve r k _f d = s3Action r False $ \(conn, bucket) -> do res <- liftIO $ getObject conn $ bucketKey r bucket k case res of Right o -> do - liftIO $ L.writeFile f $ obj_data o + liftIO $ L.writeFile d $ obj_data o return True Left e -> s3Warning e diff --git a/Remote/Web.hs b/Remote/Web.hs index 5fc592326..2516240ab 100644 --- a/Remote/Web.hs +++ b/Remote/Web.hs @@ -51,21 +51,21 @@ gen r _ _ = remotetype = remote } -downloadKey :: Key -> FilePath -> Annex Bool -downloadKey key file = get =<< getUrls key +downloadKey :: Key -> AssociatedFile -> FilePath -> Annex Bool +downloadKey key _file dest = get =<< getUrls key where get [] = do warning "no known url" return False get urls = do showOutput -- make way for download progress bar - downloadUrl urls file + downloadUrl urls dest downloadKeyCheap :: Key -> FilePath -> Annex Bool downloadKeyCheap _ _ = return False -uploadKey :: Key -> Annex Bool -uploadKey _ = do +uploadKey :: Key -> AssociatedFile -> Annex Bool +uploadKey _ _ = do warning "upload to web not supported" return False diff --git a/Types/Remote.hs b/Types/Remote.hs index 9bac2ca0f..c7628165c 100644 --- a/Types/Remote.hs +++ b/Types/Remote.hs @@ -33,6 +33,9 @@ data RemoteTypeA a = RemoteType { instance Eq (RemoteTypeA a) where x == y = typename x == typename y +{- A filename associated with a Key, for display to user. -} +type AssociatedFile = Maybe FilePath + {- An individual remote. -} data RemoteA a = Remote { -- each Remote has a unique uuid @@ -42,9 +45,9 @@ data RemoteA a = Remote { -- Remotes have a use cost; higher is more expensive cost :: Int, -- Transfers a key to the remote. - storeKey :: Key -> a Bool, + storeKey :: Key -> AssociatedFile -> a Bool, -- retrieves a key's contents to a file - retrieveKeyFile :: Key -> FilePath -> a Bool, + retrieveKeyFile :: Key -> AssociatedFile -> FilePath -> a Bool, -- retrieves a key's contents to a tmp file, if it can be done cheaply retrieveKeyFileCheap :: Key -> FilePath -> a Bool, -- removes a key's contents diff --git a/Utility/Applicative.hs b/Utility/Applicative.hs new file mode 100644 index 000000000..64400c801 --- /dev/null +++ b/Utility/Applicative.hs @@ -0,0 +1,16 @@ +{- applicative stuff + - + - Copyright 2012 Joey Hess <joey@kitenet.net> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Utility.Applicative where + +{- Like <$> , but supports one level of currying. + - + - foo v = bar <$> action v == foo = bar <$$> action + -} +(<$$>) :: Functor f => (a -> b) -> (c -> f a) -> c -> f b +f <$$> v = fmap f . v +infixr 4 <$$> diff --git a/debian/changelog b/debian/changelog index 46afb6e4d..c279614ca 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,4 +1,12 @@ -git-annex (3.20120625) UNRELEASED; urgency=low +git-annex (3.20120630) UNRELEASED; urgency=low + + * get, move, copy: Now refuse to do anything when the requested file + transfer is already in progress by another process. + * status: Lists transfers that are currently in progress. + + -- Joey Hess <joeyh@debian.org> Sun, 01 Jul 2012 15:04:37 -0400 + +git-annex (3.20120629) unstable; urgency=low * cabal: Only try to use inotify on Linux. * Version build dependency on STM, and allow building without it, @@ -11,7 +19,7 @@ git-annex (3.20120625) UNRELEASED; urgency=low in their names. * sync: Automatically resolves merge conflicts. - -- Joey Hess <joeyh@debian.org> Mon, 25 Jun 2012 11:38:12 -0400 + -- Joey Hess <joeyh@debian.org> Fri, 29 Jun 2012 10:17:49 -0400 git-annex (3.20120624) unstable; urgency=low diff --git a/doc/bugs/Issue_on_OSX_with_some_system_limits/comment_2_b14e697c211843163285aaa8de5bf4c6._comment b/doc/bugs/Issue_on_OSX_with_some_system_limits/comment_2_b14e697c211843163285aaa8de5bf4c6._comment new file mode 100644 index 000000000..17dcf7634 --- /dev/null +++ b/doc/bugs/Issue_on_OSX_with_some_system_limits/comment_2_b14e697c211843163285aaa8de5bf4c6._comment @@ -0,0 +1,12 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawkSq2FDpK2n66QRUxtqqdbyDuwgbQmUWus" + nickname="Jimmy" + subject="comment 2" + date="2012-06-29T12:02:48Z" + content=""" +Doing, + + sudo sysctl -w kern.maxfilesperproc=400000 + +Somewhat works for me, git-annex watch at least starts up and takes a while to scan the directory, but it's not ideal. Also, creating files seems to work okay, when I remove a file the changes don't seem to get pushed across my other repos, running a sync on the remote repo fixes things. +"""]] diff --git a/doc/bugs/watcher_commits_unlocked_files.mdwn b/doc/bugs/watcher_commits_unlocked_files.mdwn new file mode 100644 index 000000000..ef64921f1 --- /dev/null +++ b/doc/bugs/watcher_commits_unlocked_files.mdwn @@ -0,0 +1,28 @@ +When having "git annex watch" running, unlocking files causes the watcher +to immediately lock/commit them. + +---- + +Possible approaches: + +* The watcher could detect unlocked files by checking if newly added files + are a typechange of a file already in git. But this would add git overhead + to every file add. +* `git annex unlock` could add some type of flag file, which the assistant + could check. This would work fine, for users who want to use `git annex + unlock` with the assistant. That's probably not simple enough for most + users, though. +* There could be a UI in the assistant to pick a file and unlock it. + The assistant would have its own list of files it knows are unlocked. + But I'm trying to avoid mandatory UI to use the assistant. +* Perhaps instead, have a directory, like "edit". The assistant could notice + when files move into this special directory, and automatically unlock them. + Then when they're moved out, automatically commit them. +* Alternatively, files that are moved out of the repository entirely could be + automatically unlocked, and then when they're moved back in, it would + automatically do the right thing. This may be worth implementing in + combination with the "edit" directory, as different use cases would work + better with one or the other. However, I don't currently get inotify + events when files are moved out of the repository (well, I do, but it + just says "file moved", with no forwarding address, so I don't know + how to find the file to unlock it. diff --git a/doc/bugs/watcher_commits_unlocked_files/comment_1_f70e1912fde0eee59e208307df06b503._comment b/doc/bugs/watcher_commits_unlocked_files/comment_1_f70e1912fde0eee59e208307df06b503._comment new file mode 100644 index 000000000..a06b8fe82 --- /dev/null +++ b/doc/bugs/watcher_commits_unlocked_files/comment_1_f70e1912fde0eee59e208307df06b503._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawkSq2FDpK2n66QRUxtqqdbyDuwgbQmUWus" + nickname="Jimmy" + subject="comment 1" + date="2012-06-28T13:39:18Z" + content=""" +That is a known problem/bug which is listed at [[design/assistant/inotify]] +"""]] diff --git a/doc/design/assistant/blog/day_20__data_transfer_design.mdwn b/doc/design/assistant/blog/day_20__data_transfer_design.mdwn new file mode 100644 index 000000000..4f47ae63c --- /dev/null +++ b/doc/design/assistant/blog/day_20__data_transfer_design.mdwn @@ -0,0 +1,22 @@ +Today is a planning day. I have only a few days left before I'm off to +Nicaragua for [DebConf](http://debconf12.debconf.org/), where I'll only +have smaller chunks of time without interruptions. So it's important to get +some well-defined smallish chunks designed that I can work on later. See +bulleted action items below (now moved to [[syncing]]. Each +should be around 1-2 hours unless it turns out to be 8 hours... :) + +First, worked on writing down a design, and some data types, for data transfer +tracking (see [[syncing]] page). Found that writing down these simple data +types before I started slinging code has clarified things a lot for me. + +Most importantly, I realized that I will need to modify `git-annex-shell` +to record on disk what transfers it's doing, so the assistant can get that +information and use it to both avoid redundant transfers (potentially a big +problem!), and later to allow the user to control them using the web app. + +While eventually the user will be able to use the web app to prioritize +transfers, stop and start, throttle, etc, it's important to get the default +behavior right. So I'm thinking about things like how to prioritize uploads +vs downloads, when it's appropriate to have multiple downloads running at +once, etc. + diff --git a/doc/design/assistant/blog/day_21__transfer_tracking.mdwn b/doc/design/assistant/blog/day_21__transfer_tracking.mdwn new file mode 100644 index 000000000..79c0b6438 --- /dev/null +++ b/doc/design/assistant/blog/day_21__transfer_tracking.mdwn @@ -0,0 +1,28 @@ +Worked today on two action items from my last blog post: + +* on-disk transfers in progress information files (read/write/enumerate) +* locking for the files, so redundant transfer races can be detected, + and failed transfers noticed + +That's all done, and used by the `get`, `copy`, and `move` subcommands. + +Also, I made `git-annex status` use that information to display any +file transfers that are currently in progress: + + joey@gnu:~/lib/sound/misc>git annex status + [...] + transfers in progress: + downloading Vic-303.mp3 from leech + +(Webapp, here we come!) + +However... Files being sent or received by `git-annex-shell` don't yet +have this transfer info recorded. The problem is that to do so, +`git-annex-shell` will need to be run with a `--remote=` parameter. But +old versions will of course fail when run with such an unknown parameter. + +This is a problem I last faced in December 2011 when adding the `--uuid=` +parameter. That time I punted and required the remote `git-annex-shell` be +updated to a new enough version to accept it. But as git-annex gets more widely +used and packaged, that's becoming less an option. I need to find a real +solution to this problem. diff --git a/doc/design/assistant/inotify.mdwn b/doc/design/assistant/inotify.mdwn index 47b8c84a3..7b600090a 100644 --- a/doc/design/assistant/inotify.mdwn +++ b/doc/design/assistant/inotify.mdwn @@ -8,13 +8,15 @@ available! * If a file is checked into git as a normal file and gets modified (or merged, etc), it will be converted into an annexed file. - See [[blog/day_7__bugfixes]] + See [[blog/day_7__bugfixes]]. * When you `git annex unlock` a file, it will immediately be re-locked. + See [[bugs/watcher_commits_unlocked_files]]. * Kqueue has to open every directory it watches, so too many directories will run it out of the max number of open files (typically 1024), and fail. I may need to fork off multiple watcher processes to handle this. + See [[bugs/Issue_on_OSX_with_some_system_limits]]. ## beyond Linux @@ -42,6 +44,8 @@ I'd also like to support OSX and if possible the BSDs. * [man page](http://www.freebsd.org/cgi/man.cgi?query=kqueue&apropos=0&sektion=0&format=html) * <https://github.com/gorakhargosh/watchdog/blob/master/src/watchdog/observers/kqueue.py> (good example program) + *kqueue is now supported* + * hfsevents ([haskell bindings](http://hackage.haskell.org/package/hfsevents)) is OSX specific. @@ -71,9 +75,6 @@ I'd also like to support OSX and if possible the BSDs. - honor .gitignore, not adding files it excludes (difficult, probably needs my own .gitignore parser to avoid excessive running of git commands to check for ignored files) -- Possibly, when a directory is moved out of the annex location, - unannex its contents. (Does inotify tell us where the directory moved - to so we can access it?) ## the races @@ -125,6 +126,17 @@ Many races need to be dealt with by this code. Here are some of them. Not a problem; The removal event removes the old file from the index, and the add event adds the new one. +* Symlink appears, but is then deleted before it can be processed. + + Leads to an ugly message, otherwise no problem: + + ./me: readSymbolicLink: does not exist (No such file or directory) + + Here `me` is a file that was in a conflicted merge, which got + removed as part of the resolution. This is probably coming from the watcher + thread, which sees the newly added symlink (created by the git merge), + but finds it deleted (by the conflict resolver) by the time it processes it. + ## done - on startup, add any files that have appeared since last run **done** diff --git a/doc/design/assistant/progressbars.mdwn b/doc/design/assistant/progressbars.mdwn index 2ade05aa5..ee7384274 100644 --- a/doc/design/assistant/progressbars.mdwn +++ b/doc/design/assistant/progressbars.mdwn @@ -9,6 +9,6 @@ To get this info for downloads, git-annex can watch the file as it arrives and use its size. TODO: What about uploads? Will i have to parse rsync's progresss output? -Feed it via a named pipe? Ugh. +Feed it via a named pipe? Ugh. Check into librsync. This is one of those potentially hidden but time consuming problems. diff --git a/doc/design/assistant/syncing.mdwn b/doc/design/assistant/syncing.mdwn index 50e6fb4f1..5476b56f1 100644 --- a/doc/design/assistant/syncing.mdwn +++ b/doc/design/assistant/syncing.mdwn @@ -1,6 +1,37 @@ Once files are added (or removed or moved), need to send those changes to all the other git clones, at both the git level and the key/value level. +## action items + +* on-disk transfers in progress information files (read/write/enumerate) + **done** +* locking for the files, so redundant transfer races can be detected, + and failed transfers noticed **done** +* transfer info for git-annex-shell (problem: how to add a switch + with the necessary info w/o breaking backwards compatability?) +* update files as transfers proceed. See [[progressbars]] + (updating for downloads is easy; for uploads is hard) +* add Transfer queue TChan +* enqueue Transfers (Uploads) as new files are added to the annex by + Watcher. +* enqueue Tranferrs (Downloads) as new dangling symlinks are noticed by + Watcher. +* add TransferInfo Map to DaemonStatus for tracking transfers in progress. +* Poll transfer in progress info files for changes (use inotify again! + wow! hammer, meet nail..), and update the TransferInfo Map +* Write basic Transfer handling thread. Multiple such threads need to be + able to be run at once. Each will need its own independant copy of the + Annex state monad. +* Write transfer control thread, which decides when to launch transfers. +* At startup, and possibly periodically, look for files we have that + location tracking indicates remotes do not, and enqueue Uploads for + them. Also, enqueue Downloads for any files we're missing. +* Find a way to probe available outgoing bandwidth, to throttle so + we don't bufferbloat the network to death. +* git-annex needs a simple speed control knob, which can be plumbed + through to, at least, rsync. A good job for an hour in an + airport somewhere. + ## git syncing 1. Can use `git annex sync`, which already handles bidirectional syncing. @@ -45,6 +76,46 @@ and with appropriate rate limiting and control facilities. This probably will need lots of refinements to get working well. +### first pass: flood syncing + +Before mapping the network, the best we can do is flood all files out to every +reachable remote. This is worth doing first, since it's the simplest way to +get the basic functionality of the assistant to work. And we'll need this +anyway. + +### transfer tracking + +* Upload added to queue by the watcher thread when it adds content. +* Download added to queue by the watcher thread when it seens new symlinks + that lack content. +* Transfer threads started/stopped as necessary to move data. + (May sometimes want multiple threads downloading, or uploading, or even both.) + + type TransferQueue = TChan [Transfer] + -- add (M.Map Transfer TransferInfo) to DaemonStatus + + startTransfer :: Transfer -> Annex TransferID + + stopTransfer :: TransferID -> IO () + +The assistant needs to find out when `git-annex-shell` is receiving or +sending (triggered by another remote), so it can add data for those too. +This is important to avoid uploading content to a remote that is already +downloading it from us, or vice versa, as well as to in future let the web +app manage transfers as user desires. + +For files being received, it can see the temp file, but other than lsof +there's no good way to find the pid (and I'd rather not kill blindly). + +For files being sent, there's no filesystem indication. So git-annex-shell +(and other git-annex transfer processes) should write a status file to disk. + +Can use file locking on these status files to claim upload/download rights, +which will avoid races. + +This status file can also be updated periodically to show amount of transfer +complete (necessary for tracking uploads). + ## other considerations This assumes the network is connected. It's often not, so the diff --git a/doc/download.mdwn b/doc/download.mdwn index f0f17e141..242de13c3 100644 --- a/doc/download.mdwn +++ b/doc/download.mdwn @@ -18,6 +18,7 @@ others need some manual work. See [[install]] for details. The git repository has some branches: +* `assistant` contains the new change-tracking daemon * `ghc7.0` supports versions of ghc older than 7.4, which had a major change to filename encoding. * `old-monad-control` is for systems that don't have a newer monad-control @@ -25,6 +26,7 @@ The git repository has some branches: * `no-ifelse` avoids using the IFelse library (merge it into master if you need it) * `no-bloom` avoids using bloom filters. (merge it into master if you need it) +* `no-s3` avoids using the S3 library (merge it into master if you need it) * `debian-stable` contains the latest backport of git-annex to Debian stable. * `tweak-fetch` adds support for the git tweak-fetch hook, which has diff --git a/doc/install/OSX/comment_12_60d13f2c8e008af1041bea565a392c83._comment b/doc/install/OSX/comment_12_60d13f2c8e008af1041bea565a392c83._comment new file mode 100644 index 000000000..e2e85aaa9 --- /dev/null +++ b/doc/install/OSX/comment_12_60d13f2c8e008af1041bea565a392c83._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawnHrjHxJAm39x8DR4bnbazQO6H0nMNuY9c" + nickname="Damien" + subject="sha256 alternative" + date="2012-06-30T14:34:11Z" + content=""" +in reply to comment 6: On my Mac (10.7.4) there's `/usr/bin/shasum -a 256 <file>` command that will produce the same output as `sha256sum <file>`. +"""]] diff --git a/doc/install/OSX/comment_13_a6f48c87c2d6eabe379d6e10a6cac453._comment b/doc/install/OSX/comment_13_a6f48c87c2d6eabe379d6e10a6cac453._comment new file mode 100644 index 000000000..e5ce62b13 --- /dev/null +++ b/doc/install/OSX/comment_13_a6f48c87c2d6eabe379d6e10a6cac453._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawnHrjHxJAm39x8DR4bnbazQO6H0nMNuY9c" + nickname="Damien" + subject="gnu commands" + date="2012-07-01T17:03:57Z" + content=""" +…and another approach to the same problem: apparently git-annex also relies on the GNU coreutils (for instance, when doing `git annex get .`, `cp` complains about `illegal option -- -`). I do have the GNU coreutils installed with Homebrew, but they are all prefixed with `g`. So maybe you should try `gsha256sum` and `gcp` before `sha256sum` and `cp`, that seems like a more general solution. +"""]] diff --git a/doc/news/version_3.20120605.mdwn b/doc/news/version_3.20120605.mdwn deleted file mode 100644 index ed0a09177..000000000 --- a/doc/news/version_3.20120605.mdwn +++ /dev/null @@ -1,11 +0,0 @@ -git-annex 3.20120605 released with [[!toggle text="these changes"]] -[[!toggleable text=""" - * sync: Show a nicer message if a user tries to sync to a special remote. - * lock: Reset unlocked file to index, rather than to branch head. - * import: New subcommand, pulls files from a directory outside the annex - and adds them. - * Fix display of warning message when encountering a file that uses an - unsupported backend. - * Require that the SHA256 backend can be used when building, since it's the - default. - * Preserve parent environment when running hooks of the hook special remote."""]]
\ No newline at end of file diff --git a/doc/news/version_3.20120629.mdwn b/doc/news/version_3.20120629.mdwn new file mode 100644 index 000000000..e6b98ae99 --- /dev/null +++ b/doc/news/version_3.20120629.mdwn @@ -0,0 +1,12 @@ +git-annex 3.20120629 released with [[!toggle text="these changes"]] +[[!toggleable text=""" + * cabal: Only try to use inotify on Linux. + * Version build dependency on STM, and allow building without it, + which disables the watch command. + * Avoid ugly failure mode when moving content from a local repository + that is not available. + * Got rid of the last place that did utf8 decoding. + * Accept arbitrarily encoded repository filepaths etc when reading + git config output. This fixes support for remotes with unusual characters + in their names. + * sync: Automatically resolves merge conflicts."""]]
\ No newline at end of file diff --git a/git-annex.cabal b/git-annex.cabal index f55940695..0bd35e14f 100644 --- a/git-annex.cabal +++ b/git-annex.cabal @@ -1,5 +1,5 @@ Name: git-annex -Version: 3.20120625 +Version: 3.20120629 Cabal-Version: >= 1.8 License: GPL Maintainer: Joey Hess <joey@kitenet.net> |