diff options
Diffstat (limited to 'Remote')
31 files changed, 3397 insertions, 1725 deletions
diff --git a/Remote/BitTorrent.hs b/Remote/BitTorrent.hs new file mode 100644 index 000000000..fe49d023a --- /dev/null +++ b/Remote/BitTorrent.hs @@ -0,0 +1,391 @@ +{- BitTorrent remote. + - + - Copyright 2014 Joey Hess <id@joeyh.name> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +{-# LANGUAGE CPP #-} + +module Remote.BitTorrent (remote) where + +import Common.Annex +import Types.Remote +import qualified Annex +import qualified Git +import qualified Git.Construct +import Config.Cost +import Logs.Web +import Types.UrlContents +import Types.CleanupActions +import Types.Key +import Utility.Metered +import Utility.Tmp +import Backend.URL +import Annex.Perms +import Annex.UUID +import qualified Annex.Url as Url + +import Network.URI + +#ifdef WITH_TORRENTPARSER +import Data.Torrent +import qualified Data.ByteString.Lazy as B +#endif + +remote :: RemoteType +remote = RemoteType { + typename = "bittorrent", + enumerate = list, + generate = gen, + setup = error "not supported" +} + +-- There is only one bittorrent remote, and it always exists. +list :: Annex [Git.Repo] +list = do + r <- liftIO $ Git.Construct.remoteNamed "bittorrent" (pure Git.Construct.fromUnknown) + return [r] + +gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote) +gen r _ c gc = + return $ Just Remote + { uuid = bitTorrentUUID + , cost = expensiveRemoteCost + , name = Git.repoDescribe r + , storeKey = uploadKey + , retrieveKeyFile = downloadKey + , retrieveKeyFileCheap = downloadKeyCheap + , removeKey = dropKey + , checkPresent = checkKey + , checkPresentCheap = False + , whereisKey = Nothing + , remoteFsck = Nothing + , repairRepo = Nothing + , config = c + , gitconfig = gc + , localpath = Nothing + , repo = r + , readonly = True + , availability = GloballyAvailable + , remotetype = remote + , mkUnavailable = return Nothing + , getInfo = return [] + , claimUrl = Just (pure . isSupportedUrl) + , checkUrl = Just checkTorrentUrl + } + +downloadKey :: Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex Bool +downloadKey key _file dest p = + get . map (torrentUrlNum . fst . getDownloader) =<< getBitTorrentUrls key + where + get [] = do + warning "could not download torrent" + return False + get urls = do + showOutput -- make way for download progress bar + untilTrue urls $ \(u, filenum) -> do + registerTorrentCleanup u + checkDependencies + ifM (downloadTorrentFile u) + ( downloadTorrentContent key u dest filenum p + , return False + ) + +downloadKeyCheap :: Key -> FilePath -> Annex Bool +downloadKeyCheap _ _ = return False + +uploadKey :: Key -> AssociatedFile -> MeterUpdate -> Annex Bool +uploadKey _ _ _ = do + warning "upload to bittorrent not supported" + return False + +dropKey :: Key -> Annex Bool +dropKey k = do + mapM_ (setUrlMissing bitTorrentUUID k) =<< getBitTorrentUrls k + return True + +{- We punt and don't try to check if a torrent has enough seeders + - with all the pieces etc. That would be quite hard.. and even if + - implemented, it tells us nothing about the later state of the torrent. + -} +checkKey :: Key -> Annex Bool +checkKey = error "cannot reliably check torrent status" + +getBitTorrentUrls :: Key -> Annex [URLString] +getBitTorrentUrls key = filter supported <$> getUrls key + where + supported u = + let (u', dl) = (getDownloader u) + in dl == OtherDownloader && isSupportedUrl u' + +isSupportedUrl :: URLString -> Bool +isSupportedUrl u = isTorrentMagnetUrl u || isTorrentUrl u + +isTorrentUrl :: URLString -> Bool +isTorrentUrl = maybe False (\u -> ".torrent" `isSuffixOf` uriPath u) . parseURI + +isTorrentMagnetUrl :: URLString -> Bool +isTorrentMagnetUrl u = "magnet:" `isPrefixOf` u && checkbt (parseURI u) + where + checkbt (Just uri) | "xt=urn:btih:" `isInfixOf` uriQuery uri = True + checkbt _ = False + +checkTorrentUrl :: URLString -> Annex UrlContents +checkTorrentUrl u = do + checkDependencies + registerTorrentCleanup u + ifM (downloadTorrentFile u) + ( torrentContents u + , error "could not download torrent file" + ) + +{- To specify which file inside a multi-url torrent, the file number is + - appended to the url. -} +torrentUrlWithNum :: URLString -> Int -> URLString +torrentUrlWithNum u n = u ++ "#" ++ show n + +torrentUrlNum :: URLString -> (URLString, Int) +torrentUrlNum u + | '#' `elem` u = + let (n, ru) = separate (== '#') (reverse u) + in (reverse ru, fromMaybe 1 $ readish $ reverse n) + | otherwise = (u, 1) + +{- A Key corresponding to the URL of a torrent file. -} +torrentUrlKey :: URLString -> Annex Key +torrentUrlKey u = fromUrl (fst $ torrentUrlNum u) Nothing + +{- Temporary directory used to download a torrent. -} +tmpTorrentDir :: URLString -> Annex FilePath +tmpTorrentDir u = do + d <- fromRepo gitAnnexTmpMiscDir + f <- keyFile <$> torrentUrlKey u + return (d </> f) + +{- Temporary filename to use to store the torrent file. -} +tmpTorrentFile :: URLString -> Annex FilePath +tmpTorrentFile u = fromRepo . gitAnnexTmpObjectLocation =<< torrentUrlKey u + +{- A cleanup action is registered to delete the torrent file and its + - associated temp directory when git-annex exits. + - + - This allows multiple actions that use the same torrent file and temp + - directory to run in a single git-annex run. + -} +registerTorrentCleanup :: URLString -> Annex () +registerTorrentCleanup u = Annex.addCleanup (TorrentCleanup u) $ do + liftIO . nukeFile =<< tmpTorrentFile u + d <- tmpTorrentDir u + liftIO $ whenM (doesDirectoryExist d) $ + removeDirectoryRecursive d + +{- Downloads the torrent file. (Not its contents.) -} +downloadTorrentFile :: URLString -> Annex Bool +downloadTorrentFile u = do + torrent <- tmpTorrentFile u + ifM (liftIO $ doesFileExist torrent) + ( return True + , do + showAction "downloading torrent file" + showOutput + createAnnexDirectory (parentDir torrent) + if isTorrentMagnetUrl u + then do + tmpdir <- tmpTorrentDir u + let metadir = tmpdir </> "meta" + createAnnexDirectory metadir + ok <- downloadMagnetLink u metadir torrent + liftIO $ removeDirectoryRecursive metadir + return ok + else do + misctmp <- fromRepo gitAnnexTmpMiscDir + withTmpFileIn misctmp "torrent" $ \f h -> do + liftIO $ hClose h + ok <- Url.withUrlOptions $ Url.download u f + when ok $ + liftIO $ renameFile f torrent + return ok + ) + +downloadMagnetLink :: URLString -> FilePath -> FilePath -> Annex Bool +downloadMagnetLink u metadir dest = ifM download + ( liftIO $ do + ts <- filter (".torrent" `isPrefixOf`) + <$> dirContents metadir + case ts of + (t:[]) -> do + renameFile t dest + return True + _ -> return False + , return False + ) + where + download = runAria + [ Param "--bt-metadata-only" + , Param "--bt-save-metadata" + , Param u + , Param "--seed-time=0" + , Param "--summary-interval=0" + , Param "-d" + , File metadir + ] + +downloadTorrentContent :: Key -> URLString -> FilePath -> Int -> MeterUpdate -> Annex Bool +downloadTorrentContent k u dest filenum p = do + torrent <- tmpTorrentFile u + tmpdir <- tmpTorrentDir u + createAnnexDirectory tmpdir + f <- wantedfile torrent + showOutput + ifM (download torrent tmpdir <&&> liftIO (doesFileExist (tmpdir </> f))) + ( do + liftIO $ renameFile (tmpdir </> f) dest + return True + , return False + ) + where + download torrent tmpdir = ariaProgress (keySize k) p + [ Param $ "--select-file=" ++ show filenum + , File torrent + , Param "-d" + , File tmpdir + , Param "--seed-time=0" + , Param "--summary-interval=0" + , Param "--file-allocation=none" + -- Needed so aria will resume partially downloaded files + -- in multi-file torrents. + , Param "--check-integrity=true" + ] + + {- aria2c will create part of the directory structure + - contained in the torrent. It may download parts of other files + - in addition to the one we asked for. So, we need to find + - out the filename we want based on the filenum. + -} + wantedfile torrent = do + fs <- liftIO $ map fst <$> torrentFileSizes torrent + if length fs >= filenum + then return (fs !! (filenum - 1)) + else error "Number of files in torrent seems to have changed." + +checkDependencies :: Annex () +checkDependencies = do + missing <- liftIO $ filterM (not <$$> inPath) deps + unless (null missing) $ + error $ "need to install additional software in order to download from bittorrent: " ++ unwords missing + where + deps = + [ "aria2c" +#ifndef TORRENT + , "btshowmetainfo" +#endif + ] + +ariaParams :: [CommandParam] -> Annex [CommandParam] +ariaParams ps = do + opts <- map Param . annexAriaTorrentOptions <$> Annex.getGitConfig + return (ps ++ opts) + +runAria :: [CommandParam] -> Annex Bool +runAria ps = liftIO . boolSystem "aria2c" =<< ariaParams ps + +-- Parse aria output to find "(n%)" and update the progress meter +-- with it. The output is also output to stdout. +ariaProgress :: Maybe Integer -> MeterUpdate -> [CommandParam] -> Annex Bool +ariaProgress Nothing _ ps = runAria ps +ariaProgress (Just sz) meter ps = + liftIO . commandMeter (parseAriaProgress sz) meter "aria2c" + =<< ariaParams ps + +parseAriaProgress :: Integer -> ProgressParser +parseAriaProgress totalsize = go [] . reverse . split ['\r'] + where + go remainder [] = (Nothing, remainder) + go remainder (x:xs) = case readish (findpercent x) of + Nothing -> go (x++remainder) xs + Just p -> (Just (frompercent p), remainder) + + -- "(N%)" + findpercent = takeWhile (/= '%') . drop 1 . dropWhile (/= '(') + + frompercent p = toBytesProcessed $ totalsize * p `div` 100 + +{- Used only if the haskell torrent library is not available. -} +#ifndef WITH_TORRENTPARSER +btshowmetainfo :: FilePath -> String -> IO [String] +btshowmetainfo torrent field = + findfield [] . lines <$> readProcess "btshowmetainfo" [torrent] + where + findfield c [] = reverse c + findfield c (l:ls) + | l == fieldkey = multiline c ls + | fieldkey `isPrefixOf` l = + findfield ((drop (length fieldkey) l):c) ls + | otherwise = findfield c ls + + multiline c (l:ls) + | " " `isPrefixOf` l = multiline (drop 3 l:c) ls + | otherwise = findfield c ls + multiline c [] = findfield c [] + + fieldkey = field ++ take (14 - length field) (repeat '.') ++ ": " +#endif + +{- Examines the torrent file and gets the list of files in it, + - and their sizes. + -} +torrentFileSizes :: FilePath -> IO [(FilePath, Integer)] +torrentFileSizes torrent = do +#ifdef WITH_TORRENTPARSER + let mkfile = joinPath . map (scrub . decodeBS) + b <- B.readFile torrent + return $ case readTorrent b of + Left e -> error $ "failed to parse torrent: " ++ e + Right t -> case tInfo t of + SingleFile { tLength = l, tName = f } -> + [ (mkfile [f], l) ] + MultiFile { tFiles = fs, tName = dir } -> + map (\tf -> (mkfile $ dir:filePath tf, fileLength tf)) fs + where +#else + files <- getfield "files" + if null files + then do + fnl <- getfield "file name" + szl <- map readish <$> getfield "file size" + case (fnl, szl) of + ((fn:[]), (Just sz:[])) -> return [(scrub fn, sz)] + _ -> parsefailed (show (fnl, szl)) + else do + v <- getfield "directory name" + case v of + (d:[]) -> return $ map (splitsize d) files + _ -> parsefailed (show v) + where + getfield = btshowmetainfo torrent + parsefailed s = error $ "failed to parse btshowmetainfo output for torrent file: " ++ show s + + -- btshowmetainfo outputs a list of "filename (size)" + splitsize d l = (scrub (d </> fn), sz) + where + sz = fromMaybe (parsefailed l) $ readish $ + reverse $ takeWhile (/= '(') $ dropWhile (== ')') $ + reverse l + fn = reverse $ drop 2 $ + dropWhile (/= '(') $ dropWhile (== ')') $ reverse l +#endif + -- a malicious torrent file might try to do directory traversal + scrub f = if isAbsolute f || any (== "..") (splitPath f) + then error "found unsafe filename in torrent!" + else f + +torrentContents :: URLString -> Annex UrlContents +torrentContents u = convert + <$> (liftIO . torrentFileSizes =<< tmpTorrentFile u) + where + convert [(fn, sz)] = UrlContents (Just sz) (Just (mkSafeFilePath fn)) + convert l = UrlMulti $ map mkmulti (zip l [1..]) + + mkmulti ((fn, sz), n) = + (torrentUrlWithNum u n, Just sz, mkSafeFilePath $ joinPath $ drop 1 $ splitPath fn) diff --git a/Remote/Bup.hs b/Remote/Bup.hs index 4e79eca42..01501dc9e 100644 --- a/Remote/Bup.hs +++ b/Remote/Bup.hs @@ -1,18 +1,18 @@ {- Using bup as a remote. - - - Copyright 2011 Joey Hess <joey@kitenet.net> + - Copyright 2011-2014 Joey Hess <id@joeyh.name> - - Licensed under the GNU GPL version 3 or higher. -} module Remote.Bup (remote) where -import qualified Data.ByteString.Lazy as L import qualified Data.Map as M -import System.Process +import qualified Data.ByteString.Lazy as L import Data.ByteString.Lazy.UTF8 (fromString) import Common.Annex +import qualified Annex import Types.Remote import Types.Key import Types.Creds @@ -25,12 +25,9 @@ import Config import Config.Cost import qualified Remote.Helper.Ssh as Ssh import Remote.Helper.Special -import Remote.Helper.Encryptable import Remote.Helper.Messages -import Crypto import Utility.Hash import Utility.UserInfo -import Annex.Content import Annex.UUID import Utility.Metered @@ -53,16 +50,16 @@ gen r u c gc = do else expensiveRemoteCost (u', bupr') <- getBupUUID bupr u - let new = Remote + let this = Remote { uuid = u' , cost = cst , name = Git.repoDescribe r - , storeKey = store new buprepo - , retrieveKeyFile = retrieve buprepo + , storeKey = storeKeyDummy + , retrieveKeyFile = retreiveKeyFileDummy , retrieveKeyFileCheap = retrieveCheap buprepo - , removeKey = remove - , hasKey = checkPresent r bupr' - , hasKeyCheap = bupLocal buprepo + , removeKey = removeKeyDummy + , checkPresent = checkPresentDummy + , checkPresentCheap = bupLocal buprepo , whereisKey = Nothing , remoteFsck = Nothing , repairRepo = Nothing @@ -75,13 +72,23 @@ gen r u c gc = do , remotetype = remote , availability = if bupLocal buprepo then LocallyAvailable else GloballyAvailable , readonly = False + , mkUnavailable = return Nothing + , getInfo = return [("repo", buprepo)] + , claimUrl = Nothing + , checkUrl = Nothing } - return $ Just $ encryptableRemote c - (storeEncrypted new buprepo) - (retrieveEncrypted buprepo) - new + return $ Just $ specialRemote' specialcfg c + (simplyPrepare $ store this buprepo) + (simplyPrepare $ retrieve buprepo) + (simplyPrepare $ remove buprepo) + (simplyPrepare $ checkKey r bupr') + this where buprepo = fromMaybe (error "missing buprepo") $ remoteAnnexBupRepo gc + specialcfg = (specialRemoteCfg c) + -- chunking would not improve bup + { chunkConfig = NoChunks + } bupSetup :: Maybe UUID -> Maybe CredPair -> RemoteConfig -> Annex (RemoteConfig, UUID) bupSetup mu _ c = do @@ -90,7 +97,7 @@ bupSetup mu _ c = do -- verify configuration is sane let buprepo = fromMaybe (error "Specify buprepo=") $ M.lookup "buprepo" c - c' <- encryptionSetup c + (c', _encsetup) <- encryptionSetup c -- bup init will create the repository. -- (If the repository already exists, bup init again appears safe.) @@ -114,85 +121,61 @@ bup command buprepo params = do showOutput -- make way for bup output liftIO $ boolSystem "bup" $ bupParams command buprepo params -pipeBup :: [CommandParam] -> Maybe Handle -> Maybe Handle -> IO Bool -pipeBup params inh outh = do - p <- runProcess "bup" (toCommand params) - Nothing Nothing inh outh Nothing - ok <- waitForProcess p - case ok of - ExitSuccess -> return True - _ -> return False - bupSplitParams :: Remote -> BupRepo -> Key -> [CommandParam] -> Annex [CommandParam] bupSplitParams r buprepo k src = do let os = map Param $ remoteAnnexBupSplitOptions $ gitconfig r showOutput -- make way for bup output return $ bupParams "split" buprepo - (os ++ [Param "-n", Param (bupRef k)] ++ src) - -store :: Remote -> BupRepo -> Key -> AssociatedFile -> MeterUpdate -> Annex Bool -store r buprepo k _f _p = sendAnnex k (rollback k buprepo) $ \src -> do - params <- bupSplitParams r buprepo k [File src] - liftIO $ boolSystem "bup" params + (os ++ [Param "-q", Param "-n", Param (bupRef k)] ++ src) -storeEncrypted :: Remote -> BupRepo -> (Cipher, Key) -> Key -> MeterUpdate -> Annex Bool -storeEncrypted r buprepo (cipher, enck) k _p = - sendAnnex k (rollback enck buprepo) $ \src -> do - params <- bupSplitParams r buprepo enck [] - liftIO $ catchBoolIO $ - encrypt (getGpgEncParams r) cipher (feedFile src) $ \h -> - pipeBup params (Just h) Nothing +store :: Remote -> BupRepo -> Storer +store r buprepo = byteStorer $ \k b p -> do + params <- bupSplitParams r buprepo k [] + let cmd = proc "bup" (toCommand params) + liftIO $ withHandle StdinHandle createProcessSuccess cmd $ \h -> do + meteredWrite p h b + return True -retrieve :: BupRepo -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex Bool -retrieve buprepo k _f d _p = do +retrieve :: BupRepo -> Retriever +retrieve buprepo = byteRetriever $ \k sink -> do let params = bupParams "join" buprepo [Param $ bupRef k] - liftIO $ catchBoolIO $ withFile d WriteMode $ - pipeBup params Nothing . Just + let p = proc "bup" (toCommand params) + (_, Just h, _, pid) <- liftIO $ createProcess $ p { std_out = CreatePipe } + liftIO (hClose h >> forceSuccessProcess p pid) + `after` (sink =<< liftIO (L.hGetContents h)) retrieveCheap :: BupRepo -> Key -> FilePath -> Annex Bool retrieveCheap _ _ _ = return False -retrieveEncrypted :: BupRepo -> (Cipher, Key) -> Key -> FilePath -> MeterUpdate -> Annex Bool -retrieveEncrypted buprepo (cipher, enck) _ f _p = liftIO $ catchBoolIO $ - withHandle StdoutHandle createProcessSuccess p $ \h -> do - decrypt cipher (\toh -> L.hPut toh =<< L.hGetContents h) $ - readBytes $ L.writeFile f - return True - where - params = bupParams "join" buprepo [Param $ bupRef enck] - p = proc "bup" $ toCommand params - -remove :: Key -> Annex Bool -remove _ = do - warning "content cannot be removed from bup remote" - return False - {- Cannot revert having stored a key in bup, but at least the data for the - key will be used for deltaing data of other keys stored later. - - We can, however, remove the git branch that bup created for the key. -} -rollback :: Key -> BupRepo -> Annex () -rollback k bupr = go =<< liftIO (bup2GitRemote bupr) +remove :: BupRepo -> Remover +remove buprepo k = do + go =<< liftIO (bup2GitRemote buprepo) + warning "content cannot be completely removed from bup remote" + return True where go r | Git.repoIsUrl r = void $ onBupRemote r boolSystem "git" params - | otherwise = void $ liftIO $ catchMaybeIO $ - boolSystem "git" $ Git.Command.gitCommandLine params r - params = [ Params "branch -D", Param (bupRef k) ] + | otherwise = void $ liftIO $ catchMaybeIO $ do + r' <- Git.Config.read r + boolSystem "git" $ Git.Command.gitCommandLine params r' + params = [ Params "branch -q -D", Param (bupRef k) ] {- Bup does not provide a way to tell if a given dataset is present - in a bup repository. One way it to check if the git repository has - a branch matching the name (as created by bup split -n). -} -checkPresent :: Git.Repo -> Git.Repo -> Key -> Annex (Either String Bool) -checkPresent r bupr k +checkKey :: Git.Repo -> Git.Repo -> CheckPresent +checkKey r bupr k | Git.repoIsUrl bupr = do showChecking r - ok <- onBupRemote bupr boolSystem "git" params - return $ Right ok - | otherwise = liftIO $ catchMsgIO $ - boolSystem "git" $ Git.Command.gitCommandLine params bupr + onBupRemote bupr boolSystem "git" params + | otherwise = liftIO $ boolSystem "git" $ + Git.Command.gitCommandLine params bupr where params = [ Params "show-ref --quiet --verify" @@ -223,7 +206,8 @@ storeBupUUID u buprepo = do onBupRemote :: Git.Repo -> (FilePath -> [CommandParam] -> IO a) -> FilePath -> [CommandParam] -> Annex a onBupRemote r a command params = do - sshparams <- Ssh.toRepo r [Param $ + c <- Annex.getRemoteGitConfig r + sshparams <- Ssh.toRepo r c [Param $ "cd " ++ dir ++ " && " ++ unwords (command : toCommand params)] liftIO $ a "ssh" sshparams where diff --git a/Remote/Ddar.hs b/Remote/Ddar.hs new file mode 100644 index 000000000..7495fcd42 --- /dev/null +++ b/Remote/Ddar.hs @@ -0,0 +1,199 @@ +{- Using ddar as a remote. Based on bup and rsync remotes. + - + - Copyright 2011 Joey Hess <id@joeyh.name> + - Copyright 2014 Robie Basak <robie@justgohome.co.uk> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Remote.Ddar (remote) where + +import qualified Data.Map as M +import qualified Data.ByteString.Lazy as L +import System.IO.Error + +import Common.Annex +import Types.Remote +import Types.Key +import Types.Creds +import qualified Git +import Config +import Config.Cost +import Remote.Helper.Special +import Annex.Ssh +import Annex.UUID + +data DdarRepo = DdarRepo + { ddarRepoConfig :: RemoteGitConfig + , ddarRepoLocation :: String + } + +remote :: RemoteType +remote = RemoteType { + typename = "ddar", + enumerate = findSpecialRemotes "ddarrepo", + generate = gen, + setup = ddarSetup +} + +gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote) +gen r u c gc = do + cst <- remoteCost gc $ + if ddarLocal ddarrepo + then nearlyCheapRemoteCost + else expensiveRemoteCost + return $ Just $ specialRemote' specialcfg c + (simplyPrepare $ store ddarrepo) + (simplyPrepare $ retrieve ddarrepo) + (simplyPrepare $ remove ddarrepo) + (simplyPrepare $ checkKey ddarrepo) + (this cst) + where + this cst = Remote + { uuid = u + , cost = cst + , name = Git.repoDescribe r + , storeKey = storeKeyDummy + , retrieveKeyFile = retreiveKeyFileDummy + , retrieveKeyFileCheap = retrieveCheap + , removeKey = removeKeyDummy + , checkPresent = checkPresentDummy + , checkPresentCheap = ddarLocal ddarrepo + , whereisKey = Nothing + , remoteFsck = Nothing + , repairRepo = Nothing + , config = c + , repo = r + , gitconfig = gc + , localpath = if ddarLocal ddarrepo && not (null $ ddarRepoLocation ddarrepo) + then Just $ ddarRepoLocation ddarrepo + else Nothing + , remotetype = remote + , availability = if ddarLocal ddarrepo then LocallyAvailable else GloballyAvailable + , readonly = False + , mkUnavailable = return Nothing + , getInfo = return [("repo", ddarRepoLocation ddarrepo)] + , claimUrl = Nothing + , checkUrl = Nothing + } + ddarrepo = maybe (error "missing ddarrepo") (DdarRepo gc) (remoteAnnexDdarRepo gc) + specialcfg = (specialRemoteCfg c) + -- chunking would not improve ddar + { chunkConfig = NoChunks + } + +ddarSetup :: Maybe UUID -> Maybe CredPair -> RemoteConfig -> Annex (RemoteConfig, UUID) +ddarSetup mu _ c = do + u <- maybe (liftIO genUUID) return mu + + -- verify configuration is sane + let ddarrepo = fromMaybe (error "Specify ddarrepo=") $ + M.lookup "ddarrepo" c + (c', _encsetup) <- encryptionSetup c + + -- The ddarrepo is stored in git config, as well as this repo's + -- persistant state, so it can vary between hosts. + gitConfigSpecialRemote u c' "ddarrepo" ddarrepo + + return (c', u) + +store :: DdarRepo -> Storer +store ddarrepo = fileStorer $ \k src _p -> do + let params = + [ Param "c" + , Param "-N" + , Param $ key2file k + , Param $ ddarRepoLocation ddarrepo + , File src + ] + liftIO $ boolSystem "ddar" params + +{- Convert remote DdarRepo to host and path on remote end -} +splitRemoteDdarRepo :: DdarRepo -> (String, String) +splitRemoteDdarRepo ddarrepo = + (host, ddarrepo') + where + (host, remainder) = span (/= ':') (ddarRepoLocation ddarrepo) + ddarrepo' = drop 1 remainder + +{- Return the command and parameters to use for a ddar call that may need to be + - made on a remote repository. This will call ssh if needed. -} +ddarRemoteCall :: DdarRepo -> Char -> [CommandParam] -> Annex (String, [CommandParam]) +ddarRemoteCall ddarrepo cmd params + | ddarLocal ddarrepo = return ("ddar", localParams) + | otherwise = do + os <- sshOptions (host, Nothing) (ddarRepoConfig ddarrepo) remoteParams + return ("ssh", os) + where + (host, ddarrepo') = splitRemoteDdarRepo ddarrepo + localParams = Param [cmd] : Param (ddarRepoLocation ddarrepo) : params + remoteParams = Param host : Param "ddar" : Param [cmd] : Param ddarrepo' : params + +{- Specialized ddarRemoteCall that includes extraction command and flags -} +ddarExtractRemoteCall :: DdarRepo -> Key -> Annex (String, [CommandParam]) +ddarExtractRemoteCall ddarrepo k = + ddarRemoteCall ddarrepo 'x' [Param "--force-stdout", Param $ key2file k] + +retrieve :: DdarRepo -> Retriever +retrieve ddarrepo = byteRetriever $ \k sink -> do + (cmd, params) <- ddarExtractRemoteCall ddarrepo k + let p = (proc cmd $ toCommand params) { std_out = CreatePipe } + (_, Just h, _, pid) <- liftIO $ createProcess p + liftIO (hClose h >> forceSuccessProcess p pid) + `after` (sink =<< liftIO (L.hGetContents h)) + +retrieveCheap :: Key -> FilePath -> Annex Bool +retrieveCheap _ _ = return False + +remove :: DdarRepo -> Remover +remove ddarrepo key = do + (cmd, params) <- ddarRemoteCall ddarrepo 'd' [Param $ key2file key] + liftIO $ boolSystem cmd params + +ddarDirectoryExists :: DdarRepo -> Annex (Either String Bool) +ddarDirectoryExists ddarrepo + | ddarLocal ddarrepo = do + maybeStatus <- liftIO $ tryJust (guard . isDoesNotExistError) $ getFileStatus $ ddarRepoLocation ddarrepo + return $ case maybeStatus of + Left _ -> Right False + Right status -> Right $ isDirectory status + | otherwise = do + ps <- sshOptions (host, Nothing) (ddarRepoConfig ddarrepo) params + exitCode <- liftIO $ safeSystem "ssh" ps + case exitCode of + ExitSuccess -> return $ Right True + ExitFailure 1 -> return $ Right False + ExitFailure code -> return $ Left $ "ssh call " ++ + show (unwords $ toCommand params) ++ + " failed with status " ++ show code + where + (host, ddarrepo') = splitRemoteDdarRepo ddarrepo + params = + [ Param host + , Param "test" + , Param "-d" + , Param ddarrepo' + ] + +{- Use "ddar t" to determine if a given key is present in a ddar archive -} +inDdarManifest :: DdarRepo -> Key -> Annex (Either String Bool) +inDdarManifest ddarrepo k = do + (cmd, params) <- ddarRemoteCall ddarrepo 't' [] + let p = proc cmd $ toCommand params + liftIO $ catchMsgIO $ withHandle StdoutHandle createProcessSuccess p $ \h -> do + contents <- hGetContents h + return $ elem k' $ lines contents + where + k' = key2file k + +checkKey :: DdarRepo -> CheckPresent +checkKey ddarrepo key = do + directoryExists <- ddarDirectoryExists ddarrepo + case directoryExists of + Left e -> error e + Right True -> either error return + =<< inDdarManifest ddarrepo key + Right False -> return False + +ddarLocal :: DdarRepo -> Bool +ddarLocal = notElem ':' . ddarRepoLocation diff --git a/Remote/Directory.hs b/Remote/Directory.hs index afa2296ec..2eeb79317 100644 --- a/Remote/Directory.hs +++ b/Remote/Directory.hs @@ -1,18 +1,21 @@ {- A "remote" that is just a filesystem directory. - - - Copyright 2011-2012 Joey Hess <joey@kitenet.net> + - Copyright 2011-2014 Joey Hess <id@joeyh.name> - - Licensed under the GNU GPL version 3 or higher. -} {-# LANGUAGE CPP #-} -module Remote.Directory (remote) where +module Remote.Directory ( + remote, + finalizeStoreGeneric, + removeDirGeneric, +) where import qualified Data.ByteString.Lazy as L -import qualified Data.ByteString as S import qualified Data.Map as M -import Data.Int +import Data.Default import Common.Annex import Types.Remote @@ -22,9 +25,7 @@ import Config.Cost import Config import Utility.FileMode import Remote.Helper.Special -import Remote.Helper.Encryptable -import Remote.Helper.Chunked -import Crypto +import qualified Remote.Directory.LegacyChunked as Legacy import Annex.Content import Annex.UUID import Utility.Metered @@ -40,31 +41,38 @@ remote = RemoteType { gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote) gen r u c gc = do cst <- remoteCost gc cheapRemoteCost - let chunksize = chunkSize c - return $ Just $ encryptableRemote c - (storeEncrypted dir (getGpgEncParams (c,gc)) chunksize) - (retrieveEncrypted dir chunksize) - Remote { - uuid = u, - cost = cst, - name = Git.repoDescribe r, - storeKey = store dir chunksize, - retrieveKeyFile = retrieve dir chunksize, - retrieveKeyFileCheap = retrieveCheap dir chunksize, - removeKey = remove dir, - hasKey = checkPresent dir chunksize, - hasKeyCheap = True, - whereisKey = Nothing, - remoteFsck = Nothing, - repairRepo = Nothing, - config = c, - repo = r, - gitconfig = gc, - localpath = Just dir, - readonly = False, - availability = LocallyAvailable, - remotetype = remote - } + let chunkconfig = getChunkConfig c + return $ Just $ specialRemote c + (prepareStore dir chunkconfig) + (retrieve dir chunkconfig) + (simplyPrepare $ remove dir) + (simplyPrepare $ checkKey dir chunkconfig) + Remote + { uuid = u + , cost = cst + , name = Git.repoDescribe r + , storeKey = storeKeyDummy + , retrieveKeyFile = retreiveKeyFileDummy + , retrieveKeyFileCheap = retrieveCheap dir chunkconfig + , removeKey = removeKeyDummy + , checkPresent = checkPresentDummy + , checkPresentCheap = True + , whereisKey = Nothing + , remoteFsck = Nothing + , repairRepo = Nothing + , config = c + , repo = r + , gitconfig = gc + , localpath = Just dir + , readonly = False + , availability = LocallyAvailable + , remotetype = remote + , mkUnavailable = gen r u c $ + gc { remoteAnnexDirectory = Just "/dev/null" } + , getInfo = return [("directory", dir)] + , claimUrl = Nothing + , checkUrl = Nothing + } where dir = fromMaybe (error "missing directory") $ remoteAnnexDirectory gc @@ -77,179 +85,125 @@ directorySetup mu _ c = do absdir <- liftIO $ absPath dir liftIO $ unlessM (doesDirectoryExist absdir) $ error $ "Directory does not exist: " ++ absdir - c' <- encryptionSetup c + (c', _encsetup) <- encryptionSetup c -- The directory is stored in git config, not in this remote's -- persistant state, so it can vary between hosts. gitConfigSpecialRemote u c' "directory" absdir return (M.delete "directory" c', u) -{- Locations to try to access a given Key in the Directory. - - We try more than since we used to write to different hash directories. -} +{- Locations to try to access a given Key in the directory. + - We try more than one since we used to write to different hash + - directories. -} locations :: FilePath -> Key -> [FilePath] locations d k = map (d </>) (keyPaths k) +{- Returns the location off a Key in the directory. If the key is + - present, returns the location that is actually used, otherwise + - returns the first, default location. -} +getLocation :: FilePath -> Key -> IO FilePath +getLocation d k = do + let locs = locations d k + fromMaybe (Prelude.head locs) <$> firstM doesFileExist locs + {- Directory where the file(s) for a key are stored. -} storeDir :: FilePath -> Key -> FilePath -storeDir d k = addTrailingPathSeparator $ d </> hashDirLower k </> keyFile k +storeDir d k = addTrailingPathSeparator $ d </> hashDirLower def k </> keyFile k -{- Where we store temporary data for a key as it's being uploaded. -} +{- Where we store temporary data for a key, in the directory, as it's being + - written. -} tmpDir :: FilePath -> Key -> FilePath tmpDir d k = addTrailingPathSeparator $ d </> "tmp" </> keyFile k -withCheckedFiles :: (FilePath -> IO Bool) -> ChunkSize -> FilePath -> Key -> ([FilePath] -> IO Bool) -> IO Bool -withCheckedFiles _ _ [] _ _ = return False -withCheckedFiles check Nothing d k a = go $ locations d k - where - go [] = return False - go (f:fs) = ifM (check f) ( a [f] , go fs ) -withCheckedFiles check (Just _) d k a = go $ locations d k - where - go [] = return False - go (f:fs) = do - let chunkcount = f ++ chunkCount - ifM (check chunkcount) - ( do - chunks <- listChunks f <$> readFile chunkcount - ifM (allM check chunks) - ( a chunks , return False ) - , do - chunks <- probeChunks f check - if null chunks - then go fs - else a chunks - ) - -withStoredFiles :: ChunkSize -> FilePath -> Key -> ([FilePath] -> IO Bool) -> IO Bool -withStoredFiles = withCheckedFiles doesFileExist - -store :: FilePath -> ChunkSize -> Key -> AssociatedFile -> MeterUpdate -> Annex Bool -store d chunksize k _f p = sendAnnex k (void $ remove d k) $ \src -> - metered (Just p) k $ \meterupdate -> - storeHelper d chunksize k k $ \dests -> - case chunksize of - Nothing -> do - let dest = Prelude.head dests - meteredWriteFile meterupdate dest - =<< L.readFile src - return [dest] - Just _ -> - storeSplit meterupdate chunksize dests - =<< L.readFile src - -storeEncrypted :: FilePath -> [CommandParam] -> ChunkSize -> (Cipher, Key) -> Key -> MeterUpdate -> Annex Bool -storeEncrypted d gpgOpts chunksize (cipher, enck) k p = sendAnnex k (void $ remove d enck) $ \src -> - metered (Just p) k $ \meterupdate -> - storeHelper d chunksize enck k $ \dests -> - encrypt gpgOpts cipher (feedFile src) $ readBytes $ \b -> - case chunksize of - Nothing -> do - let dest = Prelude.head dests - meteredWriteFile meterupdate dest b - return [dest] - Just _ -> storeSplit meterupdate chunksize dests b - -{- Splits a ByteString into chunks and writes to dests, obeying configured - - chunk size (not to be confused with the L.ByteString chunk size). - - Note: Must always write at least one file, even for empty ByteString. -} -storeSplit :: MeterUpdate -> ChunkSize -> [FilePath] -> L.ByteString -> IO [FilePath] -storeSplit _ Nothing _ _ = error "bad storeSplit call" -storeSplit _ _ [] _ = error "bad storeSplit call" -storeSplit meterupdate (Just chunksize) alldests@(firstdest:_) b - | L.null b = do - -- must always write at least one file, even for empty - L.writeFile firstdest b - return [firstdest] - | otherwise = storeSplit' meterupdate chunksize alldests (L.toChunks b) [] -storeSplit' :: MeterUpdate -> Int64 -> [FilePath] -> [S.ByteString] -> [FilePath] -> IO [FilePath] -storeSplit' _ _ [] _ _ = error "ran out of dests" -storeSplit' _ _ _ [] c = return $ reverse c -storeSplit' meterupdate chunksize (d:dests) bs c = do - bs' <- withFile d WriteMode $ - feed zeroBytesProcessed chunksize bs - storeSplit' meterupdate chunksize dests bs' (d:c) - where - feed _ _ [] _ = return [] - feed bytes sz (l:ls) h = do - let len = S.length l - let s = fromIntegral len - if s <= sz || sz == chunksize - then do - S.hPut h l - let bytes' = addBytesProcessed bytes len - meterupdate bytes' - feed bytes' (sz - s) ls h - else return (l:ls) - -storeHelper :: FilePath -> ChunkSize -> Key -> Key -> ([FilePath] -> IO [FilePath]) -> Annex Bool -storeHelper d chunksize key origkey storer = check <&&> go - where - tmpdir = tmpDir d key - destdir = storeDir d key - {- An encrypted key does not have a known size, - - so check that the size of the original key is available as free - - space. -} - check = do - liftIO $ createDirectoryIfMissing True tmpdir - checkDiskSpace (Just tmpdir) origkey 0 - go = liftIO $ catchBoolIO $ - storeChunks key tmpdir destdir chunksize storer recorder finalizer - finalizer tmp dest = do - void $ tryIO $ allowWrite dest -- may already exist - void $ tryIO $ removeDirectoryRecursive dest -- or not exist - createDirectoryIfMissing True (parentDir dest) - renameDirectory tmp dest - -- may fail on some filesystems - void $ tryIO $ do - mapM_ preventWrite =<< dirContents dest - preventWrite dest - recorder f s = do - void $ tryIO $ allowWrite f - writeFile f s - void $ tryIO $ preventWrite f - -retrieve :: FilePath -> ChunkSize -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex Bool -retrieve d chunksize k _ f p = metered (Just p) k $ \meterupdate -> - liftIO $ withStoredFiles chunksize d k $ \files -> - catchBoolIO $ do - meteredWriteFileChunks meterupdate f files L.readFile - return True - -retrieveEncrypted :: FilePath -> ChunkSize -> (Cipher, Key) -> Key -> FilePath -> MeterUpdate -> Annex Bool -retrieveEncrypted d chunksize (cipher, enck) k f p = metered (Just p) k $ \meterupdate -> - liftIO $ withStoredFiles chunksize d enck $ \files -> - catchBoolIO $ do - decrypt cipher (feeder files) $ - readBytes $ meteredWriteFile meterupdate f +{- Check if there is enough free disk space in the remote's directory to + - store the key. Note that the unencrypted key size is checked. -} +prepareStore :: FilePath -> ChunkConfig -> Preparer Storer +prepareStore d chunkconfig = checkPrepare + (\k -> checkDiskSpace (Just d) k 0) + (byteStorer $ store d chunkconfig) + +store :: FilePath -> ChunkConfig -> Key -> L.ByteString -> MeterUpdate -> Annex Bool +store d chunkconfig k b p = liftIO $ do + void $ tryIO $ createDirectoryIfMissing True tmpdir + case chunkconfig of + LegacyChunks chunksize -> Legacy.store chunksize finalizeStoreGeneric k b p tmpdir destdir + _ -> do + let tmpf = tmpdir </> keyFile k + meteredWriteFile p tmpf b + finalizeStoreGeneric tmpdir destdir return True where - feeder files h = forM_ files $ L.hPut h <=< L.readFile - -retrieveCheap :: FilePath -> ChunkSize -> Key -> FilePath -> Annex Bool -retrieveCheap _ (Just _) _ _ = return False -- no cheap retrieval for chunks + tmpdir = tmpDir d k + destdir = storeDir d k + +{- Passed a temp directory that contains the files that should be placed + - in the dest directory, moves it into place. Anything already existing + - in the dest directory will be deleted. File permissions will be locked + - down. -} +finalizeStoreGeneric :: FilePath -> FilePath -> IO () +finalizeStoreGeneric tmp dest = do + void $ tryIO $ allowWrite dest -- may already exist + void $ tryIO $ removeDirectoryRecursive dest -- or not exist + createDirectoryIfMissing True (parentDir dest) + renameDirectory tmp dest + -- may fail on some filesystems + void $ tryIO $ do + mapM_ preventWrite =<< dirContents dest + preventWrite dest + +retrieve :: FilePath -> ChunkConfig -> Preparer Retriever +retrieve d (LegacyChunks _) = Legacy.retrieve locations d +retrieve d _ = simplyPrepare $ byteRetriever $ \k sink -> + sink =<< liftIO (L.readFile =<< getLocation d k) + +retrieveCheap :: FilePath -> ChunkConfig -> Key -> FilePath -> Annex Bool +-- no cheap retrieval possible for chunks +retrieveCheap _ (UnpaddedChunks _) _ _ = return False +retrieveCheap _ (LegacyChunks _) _ _ = return False #ifndef mingw32_HOST_OS -retrieveCheap d _ k f = liftIO $ withStoredFiles Nothing d k go - where - go [file] = catchBoolIO $ createSymbolicLink file f >> return True - go _files = return False +retrieveCheap d NoChunks k f = liftIO $ catchBoolIO $ do + file <- getLocation d k + createSymbolicLink file f + return True #else retrieveCheap _ _ _ _ = return False #endif -remove :: FilePath -> Key -> Annex Bool -remove d k = liftIO $ do +remove :: FilePath -> Remover +remove d k = liftIO $ removeDirGeneric d (storeDir d k) + +{- Removes the directory, which must be located under the topdir. + - + - Succeeds even on directories and contents that do not have write + - permission. + - + - If the directory does not exist, succeeds as long as the topdir does + - exist. If the topdir does not exist, fails, because in this case the + - remote is not currently accessible and probably still has the content + - we were supposed to remove from it. + -} +removeDirGeneric :: FilePath -> FilePath -> IO Bool +removeDirGeneric topdir dir = do void $ tryIO $ allowWrite dir #ifdef mingw32_HOST_OS {- Windows needs the files inside the directory to be writable - before it can delete them. -} void $ tryIO $ mapM_ allowWrite =<< dirContents dir #endif - catchBoolIO $ do + ok <- catchBoolIO $ do removeDirectoryRecursive dir return True - where - dir = storeDir d k - -checkPresent :: FilePath -> ChunkSize -> Key -> Annex (Either String Bool) -checkPresent d chunksize k = liftIO $ catchMsgIO $ withStoredFiles chunksize d k $ - const $ return True -- withStoredFiles checked that it exists + if ok + then return ok + else doesDirectoryExist topdir <&&> (not <$> doesDirectoryExist dir) + +checkKey :: FilePath -> ChunkConfig -> CheckPresent +checkKey d (LegacyChunks _) k = Legacy.checkKey d locations k +checkKey d _ k = liftIO $ + ifM (anyM doesFileExist (locations d k)) + ( return True + , ifM (doesDirectoryExist d) + ( return False + , error $ "directory " ++ d ++ " is not accessible" + ) + ) diff --git a/Remote/Directory/LegacyChunked.hs b/Remote/Directory/LegacyChunked.hs new file mode 100644 index 000000000..72d52f95d --- /dev/null +++ b/Remote/Directory/LegacyChunked.hs @@ -0,0 +1,109 @@ +{- Legacy chunksize support for directory special remote. + - + - Can be removed eventually. + - + - Copyright 2011-2012 Joey Hess <id@joeyh.name> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Remote.Directory.LegacyChunked where + +import qualified Data.ByteString.Lazy as L +import qualified Data.ByteString as S + +import Common.Annex +import Utility.FileMode +import Remote.Helper.Special +import qualified Remote.Helper.Chunked.Legacy as Legacy +import Annex.Perms +import Utility.Metered + +withCheckedFiles :: (FilePath -> IO Bool) -> FilePath -> (FilePath -> Key -> [FilePath]) -> Key -> ([FilePath] -> IO Bool) -> IO Bool +withCheckedFiles _ [] _locations _ _ = return False +withCheckedFiles check d locations k a = go $ locations d k + where + go [] = return False + go (f:fs) = do + let chunkcount = f ++ Legacy.chunkCount + ifM (check chunkcount) + ( do + chunks <- Legacy.listChunks f <$> readFile chunkcount + ifM (allM check chunks) + ( a chunks , return False ) + , do + chunks <- Legacy.probeChunks f check + if null chunks + then go fs + else a chunks + ) +withStoredFiles :: FilePath -> (FilePath -> Key -> [FilePath]) -> Key -> ([FilePath] -> IO Bool) -> IO Bool +withStoredFiles = withCheckedFiles doesFileExist + +{- Splits a ByteString into chunks and writes to dests, obeying configured + - chunk size (not to be confused with the L.ByteString chunk size). -} +storeLegacyChunked :: MeterUpdate -> ChunkSize -> [FilePath] -> L.ByteString -> IO [FilePath] +storeLegacyChunked _ _ [] _ = error "bad storeLegacyChunked call" +storeLegacyChunked meterupdate chunksize alldests@(firstdest:_) b + | L.null b = do + -- always write at least one file, even for empty + L.writeFile firstdest b + return [firstdest] + | otherwise = storeLegacyChunked' meterupdate chunksize alldests (L.toChunks b) [] +storeLegacyChunked' :: MeterUpdate -> ChunkSize -> [FilePath] -> [S.ByteString] -> [FilePath] -> IO [FilePath] +storeLegacyChunked' _ _ [] _ _ = error "ran out of dests" +storeLegacyChunked' _ _ _ [] c = return $ reverse c +storeLegacyChunked' meterupdate chunksize (d:dests) bs c = do + bs' <- withFile d WriteMode $ + feed zeroBytesProcessed chunksize bs + storeLegacyChunked' meterupdate chunksize dests bs' (d:c) + where + feed _ _ [] _ = return [] + feed bytes sz (l:ls) h = do + let len = S.length l + let s = fromIntegral len + if s <= sz || sz == chunksize + then do + S.hPut h l + let bytes' = addBytesProcessed bytes len + meterupdate bytes' + feed bytes' (sz - s) ls h + else return (l:ls) + +storeHelper :: (FilePath -> FilePath -> IO ()) -> Key -> ([FilePath] -> IO [FilePath]) -> FilePath -> FilePath -> IO Bool +storeHelper finalizer key storer tmpdir destdir = do + void $ liftIO $ tryIO $ createDirectoryIfMissing True tmpdir + Legacy.storeChunks key tmpdir destdir storer recorder finalizer + where + recorder f s = do + void $ tryIO $ allowWrite f + writeFile f s + void $ tryIO $ preventWrite f + +store :: ChunkSize -> (FilePath -> FilePath -> IO ()) -> Key -> L.ByteString -> MeterUpdate -> FilePath -> FilePath -> IO Bool +store chunksize finalizer k b p = storeHelper finalizer k $ \dests -> + storeLegacyChunked p chunksize dests b + +{- Need to get a single ByteString containing every chunk. + - Done very innefficiently, by writing to a temp file. + - :/ This is legacy code.. + -} +retrieve :: (FilePath -> Key -> [FilePath]) -> FilePath -> Preparer Retriever +retrieve locations d basek a = do + showLongNote "This remote uses the deprecated chunksize setting. So this will be quite slow." + tmpdir <- fromRepo $ gitAnnexTmpMiscDir + createAnnexDirectory tmpdir + let tmp = tmpdir </> keyFile basek ++ ".directorylegacy.tmp" + a $ Just $ byteRetriever $ \k sink -> do + liftIO $ void $ withStoredFiles d locations k $ \fs -> do + forM_ fs $ + S.appendFile tmp <=< S.readFile + return True + b <- liftIO $ L.readFile tmp + liftIO $ nukeFile tmp + sink b + +checkKey :: FilePath -> (FilePath -> Key -> [FilePath]) -> Key -> Annex Bool +checkKey d locations k = liftIO $ withStoredFiles d locations k $ + -- withStoredFiles checked that it exists + const $ return True diff --git a/Remote/External.hs b/Remote/External.hs index 9be9175c7..0579400ed 100644 --- a/Remote/External.hs +++ b/Remote/External.hs @@ -1,6 +1,6 @@ {- External special remote interface. - - - Copyright 2013 Joey Hess <joey@kitenet.net> + - Copyright 2013 Joey Hess <id@joeyh.name> - - Licensed under the GNU GPL version 3 or higher. -} @@ -12,26 +12,22 @@ import qualified Annex import Common.Annex import Types.Remote import Types.CleanupActions +import Types.UrlContents import qualified Git import Config import Remote.Helper.Special -import Remote.Helper.Encryptable -import Crypto import Utility.Metered import Logs.Transfer import Logs.PreferredContent.Raw import Logs.RemoteState +import Logs.Web import Config.Cost -import Annex.Content import Annex.UUID -import Annex.Exception import Creds import Control.Concurrent.STM -import System.Process (std_in, std_out, std_err) import System.Log.Logger (debugM) import qualified Data.Map as M -import qualified Data.ByteString.Lazy as L remote :: RemoteType remote = RemoteType { @@ -47,30 +43,37 @@ gen r u c gc = do Annex.addCleanup (RemoteCleanup u) $ stopExternal external cst <- getCost external r gc avail <- getAvailability external r gc - return $ Just $ encryptableRemote c - (storeEncrypted external $ getGpgEncParams (c,gc)) - (retrieveEncrypted external) - Remote { - uuid = u, - cost = cst, - name = Git.repoDescribe r, - storeKey = store external, - retrieveKeyFile = retrieve external, - retrieveKeyFileCheap = \_ _ -> return False, - removeKey = remove external, - hasKey = checkPresent external, - hasKeyCheap = False, - whereisKey = Nothing, - remoteFsck = Nothing, - repairRepo = Nothing, - config = c, - localpath = Nothing, - repo = r, - gitconfig = gc, - readonly = False, - availability = avail, - remotetype = remote - } + return $ Just $ specialRemote c + (simplyPrepare $ store external) + (simplyPrepare $ retrieve external) + (simplyPrepare $ remove external) + (simplyPrepare $ checkKey external) + Remote + { uuid = u + , cost = cst + , name = Git.repoDescribe r + , storeKey = storeKeyDummy + , retrieveKeyFile = retreiveKeyFileDummy + , retrieveKeyFileCheap = \_ _ -> return False + , removeKey = removeKeyDummy + , checkPresent = checkPresentDummy + , checkPresentCheap = False + , whereisKey = Nothing + , remoteFsck = Nothing + , repairRepo = Nothing + , config = c + , localpath = Nothing + , repo = r + , gitconfig = gc + , readonly = False + , availability = avail + , remotetype = remote + , mkUnavailable = gen r u c $ + gc { remoteAnnexExternalType = Just "!dne!" } + , getInfo = return [("externaltype", externaltype)] + , claimUrl = Just (claimurl external) + , checkUrl = Just (checkurl external) + } where externaltype = fromMaybe (error "missing externaltype") (remoteAnnexExternalType gc) @@ -79,7 +82,7 @@ externalSetup mu _ c = do u <- maybe (liftIO genUUID) return mu let externaltype = fromMaybe (error "Specify externaltype=") $ M.lookup "externaltype" c - c' <- encryptionSetup c + (c', _encsetup) <- encryptionSetup c external <- newExternal externaltype u c' handleRequest external INITREMOTE Nothing $ \resp -> case resp of @@ -91,25 +94,8 @@ externalSetup mu _ c = do gitConfigSpecialRemote u c'' "externaltype" externaltype return (c'', u) -store :: External -> Key -> AssociatedFile -> MeterUpdate -> Annex Bool -store external k _f p = sendAnnex k rollback $ \f -> - metered (Just p) k $ - storeHelper external k f - where - rollback = void $ remove external k - -storeEncrypted :: External -> [CommandParam] -> (Cipher, Key) -> Key -> MeterUpdate -> Annex Bool -storeEncrypted external gpgOpts (cipher, enck) k p = withTmp enck $ \tmp -> - sendAnnex k rollback $ \src -> do - metered (Just p) k $ \meterupdate -> do - liftIO $ encrypt gpgOpts cipher (feedFile src) $ - readBytes $ L.writeFile tmp - storeHelper external enck tmp meterupdate - where - rollback = void $ remove external enck - -storeHelper :: External -> Key -> FilePath -> MeterUpdate -> Annex Bool -storeHelper external k f p = safely $ +store :: External -> Storer +store external = fileStorer $ \k f p -> handleRequest external (TRANSFER Upload k f) (Just p) $ \resp -> case resp of TRANSFER_SUCCESS Upload k' | k == k' -> @@ -120,34 +106,18 @@ storeHelper external k f p = safely $ return False _ -> Nothing -retrieve :: External -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex Bool -retrieve external k _f d p = metered (Just p) k $ - retrieveHelper external k d - -retrieveEncrypted :: External -> (Cipher, Key) -> Key -> FilePath -> MeterUpdate -> Annex Bool -retrieveEncrypted external (cipher, enck) k f p = withTmp enck $ \tmp -> - metered (Just p) k $ \meterupdate -> - ifM (retrieveHelper external enck tmp meterupdate) - ( liftIO $ catchBoolIO $ do - decrypt cipher (feedFile tmp) $ - readBytes $ L.writeFile f - return True - , return False - ) - -retrieveHelper :: External -> Key -> FilePath -> MeterUpdate -> Annex Bool -retrieveHelper external k d p = safely $ +retrieve :: External -> Retriever +retrieve external = fileRetriever $ \d k p -> handleRequest external (TRANSFER Download k d) (Just p) $ \resp -> case resp of TRANSFER_SUCCESS Download k' - | k == k' -> Just $ return True + | k == k' -> Just $ return () TRANSFER_FAILURE Download k' errmsg | k == k' -> Just $ do - warning errmsg - return False + error errmsg _ -> Nothing -remove :: External -> Key -> Annex Bool +remove :: External -> Remover remove external k = safely $ handleRequest external (REMOVE k) Nothing $ \resp -> case resp of @@ -159,8 +129,8 @@ remove external k = safely $ return False _ -> Nothing -checkPresent :: External -> Key -> Annex (Either String Bool) -checkPresent external k = either (Left . show) id <$> tryAnnex go +checkKey :: External -> CheckPresent +checkKey external k = either error id <$> go where go = handleRequest external (CHECKPRESENT k) Nothing $ \resp -> case resp of @@ -173,7 +143,7 @@ checkPresent external k = either (Left . show) id <$> tryAnnex go _ -> Nothing safely :: Annex Bool -> Annex Bool -safely a = go =<< tryAnnex a +safely a = go =<< tryNonAsync a where go (Right r) = return r go (Left e) = do @@ -204,7 +174,7 @@ handleRequest' lck external req mp responsehandler go | otherwise = go where - go = do + go = do sendMessage lck external req loop loop = receiveMessage lck external responsehandler @@ -214,7 +184,7 @@ handleRequest' lck external req mp responsehandler handleRemoteRequest (PROGRESS bytesprocessed) = maybe noop (\a -> liftIO $ a bytesprocessed) mp handleRemoteRequest (DIRHASH k) = - send $ VALUE $ hashDirMixed k + send $ VALUE $ hashDirMixed def k handleRemoteRequest (SETCONFIG setting value) = liftIO $ atomically $ do let v = externalConfig external @@ -226,7 +196,7 @@ handleRequest' lck external req mp responsehandler send $ VALUE value handleRemoteRequest (SETCREDS setting login password) = do c <- liftIO $ atomically $ readTMVar $ externalConfig external - c' <- setRemoteCredPair c (credstorage setting) $ + c' <- setRemoteCredPair encryptionAlreadySetup c (credstorage setting) $ Just (login, password) void $ liftIO $ atomically $ swapTMVar (externalConfig external) c' handleRemoteRequest (GETCREDS setting) = do @@ -249,6 +219,14 @@ handleRequest' lck external req mp responsehandler state <- fromMaybe "" <$> getRemoteState (externalUUID external) key send $ VALUE state + handleRemoteRequest (SETURLPRESENT key url) = + setUrlPresent (externalUUID external) key url + handleRemoteRequest (SETURLMISSING key url) = + setUrlMissing (externalUUID external) key url + handleRemoteRequest (GETURLS key prefix) = do + mapM_ (send . VALUE . fst . getDownloader) + =<< getUrlsWithPrefix key prefix + send (VALUE "") -- end of list handleRemoteRequest (DEBUG msg) = liftIO $ debugM "external" msg handleRemoteRequest (VERSION _) = sendMessage lck external $ ERROR "too late to send VERSION" @@ -443,3 +421,27 @@ getAvailability external r gc = maybe query return (remoteAnnexAvailability gc) _ -> Nothing setRemoteAvailability r avail return avail + +claimurl :: External -> URLString -> Annex Bool +claimurl external url = + handleRequest external (CLAIMURL url) Nothing $ \req -> case req of + CLAIMURL_SUCCESS -> Just $ return True + CLAIMURL_FAILURE -> Just $ return False + UNSUPPORTED_REQUEST -> Just $ return False + _ -> Nothing + +checkurl :: External -> URLString -> Annex UrlContents +checkurl external url = + handleRequest external (CHECKURL url) Nothing $ \req -> case req of + CHECKURL_CONTENTS sz f -> Just $ return $ UrlContents sz + (if null f then Nothing else Just $ mkSafeFilePath f) + -- Treat a single item multi response specially to + -- simplify the external remote implementation. + CHECKURL_MULTI ((_, sz, f):[]) -> + Just $ return $ UrlContents sz $ Just $ mkSafeFilePath f + CHECKURL_MULTI l -> Just $ return $ UrlMulti $ map mkmulti l + CHECKURL_FAILURE errmsg -> Just $ error errmsg + UNSUPPORTED_REQUEST -> error "CHECKURL not implemented by external special remote" + _ -> Nothing + where + mkmulti (u, s, f) = (u, s, mkSafeFilePath f) diff --git a/Remote/External/Types.hs b/Remote/External/Types.hs index 1e17a2c4c..d0fb2ff7a 100644 --- a/Remote/External/Types.hs +++ b/Remote/External/Types.hs @@ -1,11 +1,12 @@ {- External special remote data types. - - - Copyright 2013 Joey Hess <joey@kitenet.net> + - Copyright 2013 Joey Hess <id@joeyh.name> - - Licensed under the GNU GPL version 3 or higher. -} {-# LANGUAGE FlexibleInstances, TypeSynonymInstances #-} +{-# OPTIONS_GHC -fno-warn-orphans #-} module Remote.External.Types ( External(..), @@ -15,9 +16,9 @@ module Remote.External.Types ( withExternalLock, ExternalState(..), PrepareStatus(..), - parseMessage, - Sendable(..), - Receivable(..), + Proto.parseMessage, + Proto.Sendable(..), + Proto.Receivable(..), Request(..), needsPREPARE, Response(..), @@ -31,7 +32,6 @@ module Remote.External.Types ( ) where import Common.Annex -import Annex.Exception import Types.Key (file2key, key2file) import Types.StandardGroups (PreferredContentExpression) import Utility.Metered (BytesProcessed(..)) @@ -39,12 +39,12 @@ import Logs.Transfer (Direction(..)) import Config.Cost (Cost) import Types.Remote (RemoteConfig) import Types.Availability (Availability(..)) +import Utility.Url (URLString) +import qualified Utility.SimpleProtocol as Proto -import Data.Char import Control.Concurrent.STM -- If the remote is not yet running, the ExternalState TMVar is empty. --- The data External = External { externalType :: ExternalType , externalUUID :: UUID @@ -85,28 +85,14 @@ withExternalLock external = bracketIO setup cleanup cleanup = atomically . putTMVar v v = externalLock external --- Messages that git-annex can send. -class Sendable m where - formatMessage :: m -> [String] - --- Messages that git-annex can receive. -class Receivable m where - -- Passed the first word of the message, returns - -- a Parser that can be be fed the rest of the message to generate - -- the value. - parseCommand :: String -> Parser m - -parseMessage :: (Receivable m) => String -> Maybe m -parseMessage s = parseCommand command rest - where - (command, rest) = splitWord s - -- Messages that can be sent to the external remote to request it do something. data Request = PREPARE | INITREMOTE | GETCOST | GETAVAILABILITY + | CLAIMURL URLString + | CHECKURL URLString | TRANSFER Direction Key FilePath | CHECKPRESENT Key | REMOVE Key @@ -118,15 +104,21 @@ needsPREPARE PREPARE = False needsPREPARE INITREMOTE = False needsPREPARE _ = True -instance Sendable Request where +instance Proto.Sendable Request where formatMessage PREPARE = ["PREPARE"] formatMessage INITREMOTE = ["INITREMOTE"] formatMessage GETCOST = ["GETCOST"] formatMessage GETAVAILABILITY = ["GETAVAILABILITY"] + formatMessage (CLAIMURL url) = [ "CLAIMURL", Proto.serialize url ] + formatMessage (CHECKURL url) = [ "CHECKURL", Proto.serialize url ] formatMessage (TRANSFER direction key file) = - [ "TRANSFER", serialize direction, serialize key, serialize file ] - formatMessage (CHECKPRESENT key) = [ "CHECKPRESENT", serialize key ] - formatMessage (REMOVE key) = [ "REMOVE", serialize key ] + [ "TRANSFER" + , Proto.serialize direction + , Proto.serialize key + , Proto.serialize file + ] + formatMessage (CHECKPRESENT key) = [ "CHECKPRESENT", Proto.serialize key ] + formatMessage (REMOVE key) = [ "REMOVE", Proto.serialize key ] -- Responses the external remote can make to requests. data Response @@ -143,25 +135,35 @@ data Response | AVAILABILITY Availability | INITREMOTE_SUCCESS | INITREMOTE_FAILURE ErrorMsg + | CLAIMURL_SUCCESS + | CLAIMURL_FAILURE + | CHECKURL_CONTENTS Size FilePath + | CHECKURL_MULTI [(URLString, Size, FilePath)] + | CHECKURL_FAILURE ErrorMsg | UNSUPPORTED_REQUEST deriving (Show) -instance Receivable Response where - parseCommand "PREPARE-SUCCESS" = parse0 PREPARE_SUCCESS - parseCommand "PREPARE-FAILURE" = parse1 PREPARE_FAILURE - parseCommand "TRANSFER-SUCCESS" = parse2 TRANSFER_SUCCESS - parseCommand "TRANSFER-FAILURE" = parse3 TRANSFER_FAILURE - parseCommand "CHECKPRESENT-SUCCESS" = parse1 CHECKPRESENT_SUCCESS - parseCommand "CHECKPRESENT-FAILURE" = parse1 CHECKPRESENT_FAILURE - parseCommand "CHECKPRESENT-UNKNOWN" = parse2 CHECKPRESENT_UNKNOWN - parseCommand "REMOVE-SUCCESS" = parse1 REMOVE_SUCCESS - parseCommand "REMOVE-FAILURE" = parse2 REMOVE_FAILURE - parseCommand "COST" = parse1 COST - parseCommand "AVAILABILITY" = parse1 AVAILABILITY - parseCommand "INITREMOTE-SUCCESS" = parse0 INITREMOTE_SUCCESS - parseCommand "INITREMOTE-FAILURE" = parse1 INITREMOTE_FAILURE - parseCommand "UNSUPPORTED-REQUEST" = parse0 UNSUPPORTED_REQUEST - parseCommand _ = parseFail +instance Proto.Receivable Response where + parseCommand "PREPARE-SUCCESS" = Proto.parse0 PREPARE_SUCCESS + parseCommand "PREPARE-FAILURE" = Proto.parse1 PREPARE_FAILURE + parseCommand "TRANSFER-SUCCESS" = Proto.parse2 TRANSFER_SUCCESS + parseCommand "TRANSFER-FAILURE" = Proto.parse3 TRANSFER_FAILURE + parseCommand "CHECKPRESENT-SUCCESS" = Proto.parse1 CHECKPRESENT_SUCCESS + parseCommand "CHECKPRESENT-FAILURE" = Proto.parse1 CHECKPRESENT_FAILURE + parseCommand "CHECKPRESENT-UNKNOWN" = Proto.parse2 CHECKPRESENT_UNKNOWN + parseCommand "REMOVE-SUCCESS" = Proto.parse1 REMOVE_SUCCESS + parseCommand "REMOVE-FAILURE" = Proto.parse2 REMOVE_FAILURE + parseCommand "COST" = Proto.parse1 COST + parseCommand "AVAILABILITY" = Proto.parse1 AVAILABILITY + parseCommand "INITREMOTE-SUCCESS" = Proto.parse0 INITREMOTE_SUCCESS + parseCommand "INITREMOTE-FAILURE" = Proto.parse1 INITREMOTE_FAILURE + parseCommand "CLAIMURL-SUCCESS" = Proto.parse0 CLAIMURL_SUCCESS + parseCommand "CLAIMURL-FAILURE" = Proto.parse0 CLAIMURL_FAILURE + parseCommand "CHECKURL-CONTENTS" = Proto.parse2 CHECKURL_CONTENTS + parseCommand "CHECKURL-MULTI" = Proto.parse1 CHECKURL_MULTI + parseCommand "CHECKURL-FAILURE" = Proto.parse1 CHECKURL_FAILURE + parseCommand "UNSUPPORTED-REQUEST" = Proto.parse0 UNSUPPORTED_REQUEST + parseCommand _ = Proto.parseFail -- Requests that the external remote can send at any time it's in control. data RemoteRequest @@ -178,25 +180,31 @@ data RemoteRequest | GETWANTED | SETSTATE Key String | GETSTATE Key + | SETURLPRESENT Key URLString + | SETURLMISSING Key URLString + | GETURLS Key String | DEBUG String deriving (Show) -instance Receivable RemoteRequest where - parseCommand "VERSION" = parse1 VERSION - parseCommand "PROGRESS" = parse1 PROGRESS - parseCommand "DIRHASH" = parse1 DIRHASH - parseCommand "SETCONFIG" = parse2 SETCONFIG - parseCommand "GETCONFIG" = parse1 GETCONFIG - parseCommand "SETCREDS" = parse3 SETCREDS - parseCommand "GETCREDS" = parse1 GETCREDS - parseCommand "GETUUID" = parse0 GETUUID - parseCommand "GETGITDIR" = parse0 GETGITDIR - parseCommand "SETWANTED" = parse1 SETWANTED - parseCommand "GETWANTED" = parse0 GETWANTED - parseCommand "SETSTATE" = parse2 SETSTATE - parseCommand "GETSTATE" = parse1 GETSTATE - parseCommand "DEBUG" = parse1 DEBUG - parseCommand _ = parseFail +instance Proto.Receivable RemoteRequest where + parseCommand "VERSION" = Proto.parse1 VERSION + parseCommand "PROGRESS" = Proto.parse1 PROGRESS + parseCommand "DIRHASH" = Proto.parse1 DIRHASH + parseCommand "SETCONFIG" = Proto.parse2 SETCONFIG + parseCommand "GETCONFIG" = Proto.parse1 GETCONFIG + parseCommand "SETCREDS" = Proto.parse3 SETCREDS + parseCommand "GETCREDS" = Proto.parse1 GETCREDS + parseCommand "GETUUID" = Proto.parse0 GETUUID + parseCommand "GETGITDIR" = Proto.parse0 GETGITDIR + parseCommand "SETWANTED" = Proto.parse1 SETWANTED + parseCommand "GETWANTED" = Proto.parse0 GETWANTED + parseCommand "SETSTATE" = Proto.parse2 SETSTATE + parseCommand "GETSTATE" = Proto.parse1 GETSTATE + parseCommand "SETURLPRESENT" = Proto.parse2 SETURLPRESENT + parseCommand "SETURLMISSING" = Proto.parse2 SETURLMISSING + parseCommand "GETURLS" = Proto.parse2 GETURLS + parseCommand "DEBUG" = Proto.parse1 DEBUG + parseCommand _ = Proto.parseFail -- Responses to RemoteRequest. data RemoteResponse @@ -204,36 +212,33 @@ data RemoteResponse | CREDS String String deriving (Show) -instance Sendable RemoteResponse where - formatMessage (VALUE s) = [ "VALUE", serialize s ] - formatMessage (CREDS login password) = [ "CREDS", serialize login, serialize password ] +instance Proto.Sendable RemoteResponse where + formatMessage (VALUE s) = [ "VALUE", Proto.serialize s ] + formatMessage (CREDS login password) = [ "CREDS", Proto.serialize login, Proto.serialize password ] -- Messages that can be sent at any time by either git-annex or the remote. data AsyncMessage = ERROR ErrorMsg deriving (Show) -instance Sendable AsyncMessage where - formatMessage (ERROR err) = [ "ERROR", serialize err ] +instance Proto.Sendable AsyncMessage where + formatMessage (ERROR err) = [ "ERROR", Proto.serialize err ] -instance Receivable AsyncMessage where - parseCommand "ERROR" = parse1 ERROR - parseCommand _ = parseFail +instance Proto.Receivable AsyncMessage where + parseCommand "ERROR" = Proto.parse1 ERROR + parseCommand _ = Proto.parseFail -- Data types used for parameters when communicating with the remote. -- All are serializable. type ErrorMsg = String type Setting = String type ProtocolVersion = Int +type Size = Maybe Integer supportedProtocolVersions :: [ProtocolVersion] supportedProtocolVersions = [1] -class ExternalSerializable a where - serialize :: a -> String - deserialize :: String -> Maybe a - -instance ExternalSerializable Direction where +instance Proto.Serializable Direction where serialize Upload = "STORE" serialize Download = "RETRIEVE" @@ -241,23 +246,29 @@ instance ExternalSerializable Direction where deserialize "RETRIEVE" = Just Download deserialize _ = Nothing -instance ExternalSerializable Key where +instance Proto.Serializable Key where serialize = key2file deserialize = file2key -instance ExternalSerializable [Char] where +instance Proto.Serializable [Char] where serialize = id deserialize = Just -instance ExternalSerializable ProtocolVersion where +instance Proto.Serializable ProtocolVersion where serialize = show deserialize = readish -instance ExternalSerializable Cost where +instance Proto.Serializable Cost where serialize = show deserialize = readish -instance ExternalSerializable Availability where +instance Proto.Serializable Size where + serialize (Just s) = show s + serialize Nothing = "UNKNOWN" + deserialize "UNKNOWN" = Just Nothing + deserialize s = maybe Nothing (Just . Just) (readish s) + +instance Proto.Serializable Availability where serialize GloballyAvailable = "GLOBAL" serialize LocallyAvailable = "LOCAL" @@ -265,37 +276,15 @@ instance ExternalSerializable Availability where deserialize "LOCAL" = Just LocallyAvailable deserialize _ = Nothing -instance ExternalSerializable BytesProcessed where +instance Proto.Serializable BytesProcessed where serialize (BytesProcessed n) = show n deserialize = BytesProcessed <$$> readish -{- Parsing the parameters of messages. Using the right parseN ensures - - that the string is split into exactly the requested number of words, - - which allows the last parameter of a message to contain arbitrary - - whitespace, etc, without needing any special quoting. - -} -type Parser a = String -> Maybe a - -parseFail :: Parser a -parseFail _ = Nothing - -parse0 :: a -> Parser a -parse0 mk "" = Just mk -parse0 _ _ = Nothing - -parse1 :: ExternalSerializable p1 => (p1 -> a) -> Parser a -parse1 mk p1 = mk <$> deserialize p1 - -parse2 :: (ExternalSerializable p1, ExternalSerializable p2) => (p1 -> p2 -> a) -> Parser a -parse2 mk s = mk <$> deserialize p1 <*> deserialize p2 - where - (p1, p2) = splitWord s - -parse3 :: (ExternalSerializable p1, ExternalSerializable p2, ExternalSerializable p3) => (p1 -> p2 -> p3 -> a) -> Parser a -parse3 mk s = mk <$> deserialize p1 <*> deserialize p2 <*> deserialize p3 - where - (p1, rest) = splitWord s - (p2, p3) = splitWord rest - -splitWord :: String -> (String, String) -splitWord = separate isSpace +instance Proto.Serializable [(URLString, Size, FilePath)] where + serialize = unwords . map go + where + go (url, sz, f) = url ++ " " ++ maybe "UNKNOWN" show sz ++ " " ++ f + deserialize = Just . go [] . words + where + go c (url:sz:f:rest) = go ((url, readish sz, f):c) rest + go c _ = reverse c diff --git a/Remote/GCrypt.hs b/Remote/GCrypt.hs index 60c2df73e..54c90536f 100644 --- a/Remote/GCrypt.hs +++ b/Remote/GCrypt.hs @@ -1,13 +1,13 @@ {- git remotes encrypted using git-remote-gcrypt - - - Copyright 2013 Joey Hess <joey@kitenet.net> + - Copyright 2013 Joey Hess <id@joeyh.name> - - Licensed under the GNU GPL version 3 or higher. -} module Remote.GCrypt ( remote, - gen, + chainGen, getGCryptUUID, coreGCryptId, setupRepo @@ -15,7 +15,8 @@ module Remote.GCrypt ( import qualified Data.Map as M import qualified Data.ByteString.Lazy as L -import Control.Exception.Extensible +import Control.Exception +import Data.Default import Common.Annex import Types.Remote @@ -29,7 +30,6 @@ import qualified Git.GCrypt import qualified Git.Construct import qualified Git.Types as Git () import qualified Annex.Branch -import qualified Annex.Content import Config import Config.Cost import Remote.Helper.Git @@ -38,16 +38,15 @@ import Remote.Helper.Special import Remote.Helper.Messages import qualified Remote.Helper.Ssh as Ssh import Utility.Metered -import Crypto import Annex.UUID import Annex.Ssh import qualified Remote.Rsync +import qualified Remote.Directory import Utility.Rsync import Utility.Tmp import Logs.Remote import Logs.Transfer import Utility.Gpg -import Annex.Content remote :: RemoteType remote = RemoteType { @@ -59,19 +58,24 @@ remote = RemoteType { setup = gCryptSetup } -gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote) -gen gcryptr u c gc = do +chainGen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote) +chainGen gcryptr u c gc = do g <- gitRepo -- get underlying git repo with real path, not gcrypt path r <- liftIO $ Git.GCrypt.encryptedRemote g gcryptr let r' = r { Git.remoteName = Git.remoteName gcryptr } + gen r' u c gc + +gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote) +gen baser u c gc = do -- doublecheck that cache matches underlying repo's gcrypt-id -- (which might not be set), only for local repos - (mgcryptid, r'') <- getGCryptId True r' - case (mgcryptid, Git.GCrypt.remoteRepoId g (Git.remoteName gcryptr)) of + (mgcryptid, r) <- getGCryptId True baser gc + g <- gitRepo + case (mgcryptid, Git.GCrypt.remoteRepoId g (Git.remoteName baser)) of (Just gcryptid, Just cachedgcryptid) - | gcryptid /= cachedgcryptid -> resetup gcryptid r'' - _ -> gen' r'' u c gc + | gcryptid /= cachedgcryptid -> resetup gcryptid r + _ -> gen' r u c gc where -- A different drive may have been mounted, making a different -- gcrypt remote available. So need to set the cached @@ -81,10 +85,10 @@ gen gcryptr u c gc = do resetup gcryptid r = do let u' = genUUIDInNameSpace gCryptNameSpace gcryptid v <- M.lookup u' <$> readRemoteLog - case (Git.remoteName gcryptr, v) of + case (Git.remoteName baser, v) of (Just remotename, Just c') -> do setGcryptEncryption c' remotename - setConfig (remoteConfig gcryptr "uuid") (fromUUID u') + setConfig (remoteConfig baser "uuid") (fromUUID u') setConfig (ConfigKey $ Git.GCrypt.remoteConfigKey "gcrypt-id" remotename) gcryptid gen' r u' c' gc _ -> do @@ -95,18 +99,18 @@ gen' :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remo gen' r u c gc = do cst <- remoteCost gc $ if repoCheap r then nearlyCheapRemoteCost else expensiveRemoteCost - (rsynctransport, rsyncurl) <- rsyncTransportToObjects r + (rsynctransport, rsyncurl) <- rsyncTransportToObjects r gc let rsyncopts = Remote.Rsync.genRsyncOpts c gc rsynctransport rsyncurl let this = Remote { uuid = u , cost = cst , name = Git.repoDescribe r - , storeKey = \_ _ _ -> noCrypto - , retrieveKeyFile = \_ _ _ _ -> noCrypto + , storeKey = storeKeyDummy + , retrieveKeyFile = retreiveKeyFileDummy , retrieveKeyFileCheap = \_ _ -> return False - , removeKey = remove this rsyncopts - , hasKey = checkPresent this rsyncopts - , hasKeyCheap = repoCheap r + , removeKey = removeKeyDummy + , checkPresent = checkPresentDummy + , checkPresentCheap = repoCheap r , whereisKey = Nothing , remoteFsck = Nothing , repairRepo = Nothing @@ -117,46 +121,58 @@ gen' r u c gc = do , readonly = Git.repoIsHttp r , availability = availabilityCalc r , remotetype = remote + , mkUnavailable = return Nothing + , getInfo = gitRepoInfo this + , claimUrl = Nothing + , checkUrl = Nothing } - return $ Just $ encryptableRemote c - (store this rsyncopts) - (retrieve this rsyncopts) + return $ Just $ specialRemote' specialcfg c + (simplyPrepare $ store this rsyncopts) + (simplyPrepare $ retrieve this rsyncopts) + (simplyPrepare $ remove this rsyncopts) + (simplyPrepare $ checkKey this rsyncopts) this + where + specialcfg + | Git.repoIsUrl r = (specialRemoteCfg c) + -- Rsync displays its own progress. + { displayProgress = False } + | otherwise = specialRemoteCfg c -rsyncTransportToObjects :: Git.Repo -> Annex ([CommandParam], String) -rsyncTransportToObjects r = do - (rsynctransport, rsyncurl, _) <- rsyncTransport r +rsyncTransportToObjects :: Git.Repo -> RemoteGitConfig -> Annex ([CommandParam], String) +rsyncTransportToObjects r gc = do + (rsynctransport, rsyncurl, _) <- rsyncTransport r gc return (rsynctransport, rsyncurl ++ "/annex/objects") -rsyncTransport :: Git.Repo -> Annex ([CommandParam], String, AccessMethod) -rsyncTransport r +rsyncTransport :: Git.Repo -> RemoteGitConfig -> Annex ([CommandParam], String, AccessMethod) +rsyncTransport r gc | "ssh://" `isPrefixOf` loc = sshtransport $ break (== '/') $ drop (length "ssh://") loc | "//:" `isInfixOf` loc = othertransport | ":" `isInfixOf` loc = sshtransport $ separate (== ':') loc | otherwise = othertransport where - loc = Git.repoLocation r + loc = Git.repoLocation r sshtransport (host, path) = do let rsyncpath = if "/~/" `isPrefixOf` path then drop 3 path else path - opts <- sshCachingOptions (host, Nothing) [] + opts <- sshOptions (host, Nothing) gc [] return (rsyncShell $ Param "ssh" : opts, host ++ ":" ++ rsyncpath, AccessShell) othertransport = return ([], loc, AccessDirect) noCrypto :: Annex a noCrypto = error "cannot use gcrypt remote without encryption enabled" -unsupportedUrl :: Annex a +unsupportedUrl :: a unsupportedUrl = error "using non-ssh remote repo url with gcrypt is not supported" gCryptSetup :: Maybe UUID -> Maybe CredPair -> RemoteConfig -> Annex (RemoteConfig, UUID) gCryptSetup mu _ c = go $ M.lookup "gitrepo" c where remotename = fromJust (M.lookup "name" c) - go Nothing = error "Specify gitrepo=" + go Nothing = error "Specify gitrepo=" go (Just gitrepo) = do - c' <- encryptionSetup c + (c', _encsetup) <- encryptionSetup c inRepo $ Git.Command.run [ Params "remote add" , Param remotename @@ -189,7 +205,7 @@ gCryptSetup mu _ c = go $ M.lookup "gitrepo" c method <- setupRepo gcryptid =<< inRepo (Git.Construct.fromRemoteLocation gitrepo) gitConfigSpecialRemote u c' "gcrypt" (fromAccessMethod method) return (c', u) - else error $ "uuid mismatch " ++ show (u, mu, gcryptid) + else error $ "uuid mismatch; expected " ++ show mu ++ " but remote gitrepo has " ++ show u ++ " (" ++ show gcryptid ++ ")" {- Sets up the gcrypt repository. The repository is either a local - repo, or it is accessed via rsync directly, or it is accessed over ssh @@ -202,7 +218,7 @@ gCryptSetup mu _ c = go $ M.lookup "gitrepo" c setupRepo :: Git.GCrypt.GCryptId -> Git.Repo -> Annex AccessMethod setupRepo gcryptid r | Git.repoIsUrl r = do - (_, _, accessmethod) <- rsyncTransport r + (_, _, accessmethod) <- rsyncTransport r def case accessmethod of AccessDirect -> rsyncsetup AccessShell -> ifM gitannexshellsetup @@ -222,9 +238,9 @@ setupRepo gcryptid r - create the objectDir on the remote, - which is needed for direct rsync of objects to work. -} - rsyncsetup = Remote.Rsync.withRsyncScratchDir $ \tmp -> do + rsyncsetup = Remote.Rsync.withRsyncScratchDir $ \tmp -> do liftIO $ createDirectoryIfMissing True $ tmp </> objectDir - (rsynctransport, rsyncurl, _) <- rsyncTransport r + (rsynctransport, rsyncurl, _) <- rsyncTransport r def let tmpconfig = tmp </> "config" void $ liftIO $ rsync $ rsynctransport ++ [ Param $ rsyncurl ++ "/config" @@ -244,29 +260,38 @@ setupRepo gcryptid r {- Ask git-annex-shell to configure the repository as a gcrypt - repository. May fail if it is too old. -} - gitannexshellsetup = Ssh.onRemote r (boolSystem, False) + gitannexshellsetup = Ssh.onRemote r (boolSystem, return False) "gcryptsetup" [ Param gcryptid ] [] denyNonFastForwards = "receive.denyNonFastForwards" -shellOrRsync :: Remote -> Annex a -> Annex a -> Annex a -shellOrRsync r ashell arsync = case method of - AccessShell -> ashell - _ -> arsync +isShell :: Remote -> Bool +isShell r = case method of + AccessShell -> True + _ -> False where - method = toAccessMethod $ fromMaybe "" $ + method = toAccessMethod $ fromMaybe "" $ remoteAnnexGCrypt $ gitconfig r +shellOrRsync :: Remote -> Annex a -> Annex a -> Annex a +shellOrRsync r ashell arsync + | isShell r = ashell + | otherwise = arsync + {- Configure gcrypt to use the same list of keyids that - were passed to initremote as its participants. - Also, configure it to use a signing key that is in the list of - participants, which gcrypt requires is the case, and may not be - depending on system configuration. - - - (For shared encryption, gcrypt's default behavior is used.) -} + - (For shared encryption, gcrypt's default behavior is used.) + - + - Also, sets gcrypt-publish-participants to avoid unncessary gpg + - passphrase prompts. + -} setGcryptEncryption :: RemoteConfig -> String -> Annex () setGcryptEncryption c remotename = do - let participants = ConfigKey $ Git.GCrypt.remoteParticipantConfigKey remotename + let participants = remoteconfig Git.GCrypt.remoteParticipantConfigKey case extractCipher c of Nothing -> noCrypto Just (EncryptedCipher _ _ (KeyIds { keyIds = ks})) -> do @@ -278,80 +303,66 @@ setGcryptEncryption c remotename = do (k:_) -> setConfig signingkey k Just (SharedCipher _) -> unsetConfig participants + setConfig (remoteconfig Git.GCrypt.remotePublishParticipantConfigKey) + (Git.Config.boolConfig True) + where + remoteconfig n = ConfigKey $ n remotename -store :: Remote -> Remote.Rsync.RsyncOpts -> (Cipher, Key) -> Key -> MeterUpdate -> Annex Bool -store r rsyncopts (cipher, enck) k p - | not $ Git.repoIsUrl (repo r) = guardUsable (repo r) False $ - metered (Just p) k $ \meterupdate -> spoolencrypted $ \h -> do - let dest = gCryptLocation r enck - createDirectoryIfMissing True $ parentDir dest - readBytes (meteredWriteFile meterupdate dest) h +store :: Remote -> Remote.Rsync.RsyncOpts -> Storer +store r rsyncopts + | not $ Git.repoIsUrl (repo r) = + byteStorer $ \k b p -> guardUsable (repo r) (return False) $ liftIO $ do + let tmpdir = Git.repoLocation (repo r) </> "tmp" </> keyFile k + void $ tryIO $ createDirectoryIfMissing True tmpdir + let tmpf = tmpdir </> keyFile k + meteredWriteFile p tmpf b + let destdir = parentDir $ gCryptLocation r k + Remote.Directory.finalizeStoreGeneric tmpdir destdir return True - | Git.repoIsSsh (repo r) = shellOrRsync r storeshell storersync + | Git.repoIsSsh (repo r) = if isShell r + then fileStorer $ \k f p -> Ssh.rsyncHelper (Just p) + =<< Ssh.rsyncParamsRemote False r Upload k f Nothing + else fileStorer $ Remote.Rsync.store rsyncopts | otherwise = unsupportedUrl - where - gpgopts = getGpgEncParams r - storersync = Remote.Rsync.storeEncrypted rsyncopts gpgopts (cipher, enck) k p - storeshell = withTmp enck $ \tmp -> - ifM (spoolencrypted $ readBytes $ \b -> catchBoolIO $ L.writeFile tmp b >> return True) - ( Ssh.rsyncHelper (Just p) - =<< Ssh.rsyncParamsRemote False r Upload enck tmp Nothing - , return False - ) - spoolencrypted a = Annex.Content.sendAnnex k noop $ \src -> - liftIO $ catchBoolIO $ - encrypt gpgopts cipher (feedFile src) a - -retrieve :: Remote -> Remote.Rsync.RsyncOpts -> (Cipher, Key) -> Key -> FilePath -> MeterUpdate -> Annex Bool -retrieve r rsyncopts (cipher, enck) k d p - | not $ Git.repoIsUrl (repo r) = guardUsable (repo r) False $ do - retrievewith $ L.readFile src - return True - | Git.repoIsSsh (repo r) = shellOrRsync r retrieveshell retrieversync + +retrieve :: Remote -> Remote.Rsync.RsyncOpts -> Retriever +retrieve r rsyncopts + | not $ Git.repoIsUrl (repo r) = byteRetriever $ \k sink -> + guardUsable (repo r) (return False) $ + sink =<< liftIO (L.readFile $ gCryptLocation r k) + | Git.repoIsSsh (repo r) = if isShell r + then fileRetriever $ \f k p -> + unlessM (Ssh.rsyncHelper (Just p) =<< Ssh.rsyncParamsRemote False r Download k f Nothing) $ + error "rsync failed" + else fileRetriever $ Remote.Rsync.retrieve rsyncopts | otherwise = unsupportedUrl where - src = gCryptLocation r enck - retrievewith a = metered (Just p) k $ \meterupdate -> liftIO $ - a >>= \b -> - decrypt cipher (feedBytes b) - (readBytes $ meteredWriteFile meterupdate d) - retrieversync = Remote.Rsync.retrieveEncrypted rsyncopts (cipher, enck) k d p - retrieveshell = withTmp enck $ \tmp -> - ifM (Ssh.rsyncHelper (Just p) =<< Ssh.rsyncParamsRemote False r Download enck tmp Nothing) - ( liftIO $ catchBoolIO $ do - decrypt cipher (feedFile tmp) $ - readBytes $ L.writeFile d - return True - , return False - ) - -remove :: Remote -> Remote.Rsync.RsyncOpts -> Key -> Annex Bool + +remove :: Remote -> Remote.Rsync.RsyncOpts -> Remover remove r rsyncopts k - | not $ Git.repoIsUrl (repo r) = guardUsable (repo r) False $ do - liftIO $ removeDirectoryRecursive $ parentDir $ gCryptLocation r k - return True + | not $ Git.repoIsUrl (repo r) = guardUsable (repo r) (return False) $ + liftIO $ Remote.Directory.removeDirGeneric (Git.repoLocation (repo r)) (parentDir (gCryptLocation r k)) | Git.repoIsSsh (repo r) = shellOrRsync r removeshell removersync | otherwise = unsupportedUrl where removersync = Remote.Rsync.remove rsyncopts k removeshell = Ssh.dropKey (repo r) k -checkPresent :: Remote -> Remote.Rsync.RsyncOpts -> Key -> Annex (Either String Bool) -checkPresent r rsyncopts k +checkKey :: Remote -> Remote.Rsync.RsyncOpts -> CheckPresent +checkKey r rsyncopts k | not $ Git.repoIsUrl (repo r) = guardUsable (repo r) (cantCheck $ repo r) $ - liftIO $ catchDefaultIO (cantCheck $ repo r) $ - Right <$> doesFileExist (gCryptLocation r k) + liftIO $ doesFileExist (gCryptLocation r k) | Git.repoIsSsh (repo r) = shellOrRsync r checkshell checkrsync | otherwise = unsupportedUrl where - checkrsync = Remote.Rsync.checkPresent (repo r) rsyncopts k + checkrsync = Remote.Rsync.checkKey (repo r) rsyncopts k checkshell = Ssh.inAnnex (repo r) k {- Annexed objects are hashed using lower-case directories for max - portability. -} gCryptLocation :: Remote -> Key -> FilePath -gCryptLocation r key = Git.repoLocation (repo r) </> objectDir </> keyPath key hashDirLower +gCryptLocation r key = Git.repoLocation (repo r) </> objectDir </> keyPath key (hashDirLower def) data AccessMethod = AccessDirect | AccessShell @@ -365,7 +376,7 @@ toAccessMethod _ = AccessDirect getGCryptUUID :: Bool -> Git.Repo -> Annex (Maybe UUID) getGCryptUUID fast r = (genUUIDInNameSpace gCryptNameSpace <$>) . fst - <$> getGCryptId fast r + <$> getGCryptId fast r def coreGCryptId :: String coreGCryptId = "core.gcrypt-id" @@ -378,22 +389,22 @@ coreGCryptId = "core.gcrypt-id" - tries git-annex-shell and direct rsync of the git config file. - - (Also returns a version of input repo with its config read.) -} -getGCryptId :: Bool -> Git.Repo -> Annex (Maybe Git.GCrypt.GCryptId, Git.Repo) -getGCryptId fast r +getGCryptId :: Bool -> Git.Repo -> RemoteGitConfig -> Annex (Maybe Git.GCrypt.GCryptId, Git.Repo) +getGCryptId fast r gc | Git.repoIsLocal r || Git.repoIsLocalUnknown r = extract <$> liftIO (catchMaybeIO $ Git.Config.read r) | not fast = extract . liftM fst <$> getM (eitherToMaybe <$>) - [ Ssh.onRemote r (Git.Config.fromPipe r, Left undefined) "configlist" [] [] - , getConfigViaRsync r + [ Ssh.onRemote r (Git.Config.fromPipe r, return (Left undefined)) "configlist" [] [] + , getConfigViaRsync r gc ] | otherwise = return (Nothing, r) where extract Nothing = (Nothing, r) extract (Just r') = (Git.Config.getMaybe coreGCryptId r', r') -getConfigViaRsync :: Git.Repo -> Annex (Either SomeException (Git.Repo, String)) -getConfigViaRsync r = do - (rsynctransport, rsyncurl, _) <- rsyncTransport r +getConfigViaRsync :: Git.Repo -> RemoteGitConfig -> Annex (Either SomeException (Git.Repo, String)) +getConfigViaRsync r gc = do + (rsynctransport, rsyncurl, _) <- rsyncTransport r gc liftIO $ do withTmpFile "tmpconfig" $ \tmpconfig _ -> do void $ rsync $ rsynctransport ++ diff --git a/Remote/Git.hs b/Remote/Git.hs index 995d66779..01fc0d663 100644 --- a/Remote/Git.hs +++ b/Remote/Git.hs @@ -1,6 +1,6 @@ {- Standard git remotes. - - - Copyright 2011-2012 Joey Hess <joey@kitenet.net> + - Copyright 2011-2012 Joey Hess <id@joeyh.name> - - Licensed under the GNU GPL version 3 or higher. -} @@ -22,11 +22,11 @@ import qualified Git.Config import qualified Git.Construct import qualified Git.Command import qualified Git.GCrypt +import qualified Git.Types as Git import qualified Annex import Logs.Presence -import Logs.Transfer +import Annex.Transfer import Annex.UUID -import Annex.Exception import qualified Annex.Content import qualified Annex.BranchState import qualified Annex.Branch @@ -50,19 +50,20 @@ import Remote.Helper.Messages import qualified Remote.Helper.Ssh as Ssh import qualified Remote.GCrypt import Config.Files +import Creds +import Annex.CatFile import Control.Concurrent import Control.Concurrent.MSampleVar -import System.Process (std_in, std_err) import qualified Data.Map as M -import Control.Exception.Extensible +import Network.URI remote :: RemoteType remote = RemoteType { typename = "git", enumerate = list, generate = gen, - setup = error "not supported" + setup = gitSetup } list :: Annex [Git.Repo] @@ -80,6 +81,35 @@ list = do Git.Construct.remoteNamed n $ Git.Construct.fromRemoteLocation url g +{- Git remotes are normally set up using standard git command, not + - git-annex initremote and enableremote. + - + - For initremote, the git remote must already be set up, and have a uuid. + - Initremote simply remembers its location. + - + - enableremote simply sets up a git remote using the stored location. + - No attempt is made to make the remote be accessible via ssh key setup, + - etc. + -} +gitSetup :: Maybe UUID -> Maybe CredPair -> RemoteConfig -> Annex (RemoteConfig, UUID) +gitSetup Nothing _ c = do + let location = fromMaybe (error "Specify location=url") $ + Url.parseURIRelaxed =<< M.lookup "location" c + g <- Annex.gitRepo + u <- case filter (\r -> Git.location r == Git.Url location) (Git.remotes g) of + [r] -> getRepoUUID r + [] -> error "could not find existing git remote with specified location" + _ -> error "found multiple git remotes with specified location" + return (c, u) +gitSetup (Just u) _ c = do + inRepo $ Git.Command.run + [ Param "remote" + , Param "add" + , Param $ fromMaybe (error "no name") (M.lookup "name" c) + , Param $ fromMaybe (error "no location") (M.lookup "location" c) + ] + return (c, u) + {- It's assumed to be cheap to read the config of non-URL remotes, so this is - done each time git-annex is run in a way that uses remotes. - @@ -87,10 +117,9 @@ list = do - cached UUID value. -} configRead :: Git.Repo -> Annex Git.Repo configRead r = do - g <- fromRepo id - let c = extractRemoteGitConfig g (Git.repoDescribe r) + gc <- Annex.getRemoteGitConfig r u <- getRepoUUID r - case (repoCheap r, remoteAnnexIgnore c, u) of + case (repoCheap r, remoteAnnexIgnore gc, u) of (_, True, _) -> return r (True, _, _) -> tryGitConfigRead r (False, _, NoUUID) -> tryGitConfigRead r @@ -98,7 +127,7 @@ configRead r = do gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote) gen r u c gc - | Git.GCrypt.isEncrypted r = Remote.GCrypt.gen r u c gc + | Git.GCrypt.isEncrypted r = Remote.GCrypt.chainGen r u c gc | otherwise = go <$> remoteCost gc defcst where defcst = if repoCheap r then cheapRemoteCost else expensiveRemoteCost @@ -112,8 +141,8 @@ gen r u c gc , retrieveKeyFile = copyFromRemote new , retrieveKeyFileCheap = copyFromRemoteCheap new , removeKey = dropKey new - , hasKey = inAnnex new - , hasKeyCheap = repoCheap r + , checkPresent = inAnnex new + , checkPresentCheap = repoCheap r , whereisKey = Nothing , remoteFsck = if Git.repoIsUrl r then Nothing @@ -129,8 +158,25 @@ gen r u c gc , readonly = Git.repoIsHttp r , availability = availabilityCalc r , remotetype = remote + , mkUnavailable = unavailable r u c gc + , getInfo = gitRepoInfo new + , claimUrl = Nothing + , checkUrl = Nothing } +unavailable :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote) +unavailable r u c gc = gen r' u c gc + where + r' = case Git.location r of + Git.Local { Git.gitdir = d } -> + r { Git.location = Git.LocalUnknown d } + Git.Url url -> case uriAuthority url of + Just auth -> + let auth' = auth { uriRegName = "!dne!" } + in r { Git.location = Git.Url (url { uriAuthority = Just auth' })} + Nothing -> r { Git.location = Git.Unknown } + _ -> r -- already unavailable + {- Checks relatively inexpensively if a repository is available for use. -} repoAvail :: Git.Repo -> Annex Bool repoAvail r @@ -153,7 +199,7 @@ tryGitConfigRead :: Git.Repo -> Annex Git.Repo tryGitConfigRead r | haveconfig r = return r -- already read | Git.repoIsSsh r = store $ do - v <- Ssh.onRemote r (pipedconfig, Left undefined) "configlist" [] [] + v <- Ssh.onRemote r (pipedconfig, return (Left undefined)) "configlist" [] [] case v of Right r' | haveconfig r' -> return r' @@ -162,20 +208,11 @@ tryGitConfigRead r | Git.repoIsHttp r = store geturlconfig | Git.GCrypt.isEncrypted r = handlegcrypt =<< getConfigMaybe (remoteConfig r "uuid") | Git.repoIsUrl r = return r - | otherwise = store $ safely $ do - s <- Annex.new r - Annex.eval s $ do - Annex.BranchState.disableUpdate - ensureInitialized - Annex.getState Annex.repo + | otherwise = store $ liftIO $ + readlocalannexconfig `catchNonAsync` (const $ return r) where haveconfig = not . M.null . Git.config - -- Reading config can fail due to IO error or - -- for other reasons; catch all possible exceptions. - safely a = either (const $ return r) return - =<< liftIO (try a :: IO (Either SomeException Git.Repo)) - pipedconfig cmd params = do v <- Git.Config.fromPipe r cmd params case v of @@ -197,7 +234,7 @@ tryGitConfigRead r ) case v of Left _ -> do - set_ignore "not usable by git-annex" + set_ignore "not usable by git-annex" False return r Right r' -> do -- Cache when http remote is not bare for @@ -225,15 +262,18 @@ tryGitConfigRead r configlist_failed = case Git.remoteName r of Nothing -> return r Just n -> do - whenM (inRepo $ Git.Command.runBool [Param "fetch", Param "--quiet", Param n]) $ - set_ignore "does not have git-annex installed" + whenM (inRepo $ Git.Command.runBool [Param "fetch", Param "--quiet", Param n]) $ do + set_ignore "does not have git-annex installed" True return r - set_ignore msg = do + set_ignore msg longmessage = do let k = "annex-ignore" case Git.remoteName r of Nothing -> noop - Just n -> warning $ "Remote " ++ n ++ " " ++ msg ++ "; setting " ++ k + Just n -> do + warning $ "Remote " ++ n ++ " " ++ msg ++ "; setting " ++ k + when longmessage $ + warning $ "This could be a problem with the git-annex installation on the remote. Please make sure that git-annex-shell is available in PATH when you ssh into the remote. Once you have fixed the git-annex installation, run: git config remote." ++ n ++ "." ++ k ++ " false" setremote k (Git.Config.boolConfig True) setremote k v = case Git.remoteName r of @@ -251,31 +291,34 @@ tryGitConfigRead r Just v -> store $ liftIO $ setUUID r $ genUUIDInNameSpace gCryptNameSpace v -{- Checks if a given remote has the content for a key inAnnex. - - If the remote cannot be accessed, or if it cannot determine - - whether it has the content, returns a Left error message. - -} -inAnnex :: Remote -> Key -> Annex (Either String Bool) + {- The local repo may not yet be initialized, so try to initialize + - it if allowed. However, if that fails, still return the read + - git config. -} + readlocalannexconfig = do + s <- Annex.new r + Annex.eval s $ do + Annex.BranchState.disableUpdate + void $ tryNonAsync $ ensureInitialized + Annex.getState Annex.repo + +{- Checks if a given remote has the content for a key in its annex. -} +inAnnex :: Remote -> Key -> Annex Bool inAnnex rmt key | Git.repoIsHttp r = checkhttp | Git.repoIsUrl r = checkremote | otherwise = checklocal where - r = repo rmt + r = repo rmt checkhttp = do showChecking r ifM (Url.withUrlOptions $ \uo -> anyM (\u -> Url.checkBoth u (keySize key) uo) (keyUrls rmt key)) - ( return $ Right True - , return $ Left "not found" + ( return True + , error "not found" ) checkremote = Ssh.inAnnex r key - checklocal = guardUsable r (cantCheck r) $ dispatch <$> check - where - check = either (Left . show) Right - <$> tryAnnex (onLocal rmt $ Annex.Content.inAnnexSafe key) - dispatch (Left e) = Left e - dispatch (Right (Just b)) = Right b - dispatch (Right Nothing) = cantCheck r + checklocal = guardUsable r (cantCheck r) $ + maybe (cantCheck r) return + =<< onLocal rmt (Annex.Content.inAnnexSafe key) keyUrls :: Remote -> Key -> [String] keyUrls r key = map tourl locs' @@ -284,25 +327,28 @@ keyUrls r key = map tourl locs' -- If the remote is known to not be bare, try the hash locations -- used for non-bare repos first, as an optimisation. locs - | remoteAnnexBare (gitconfig r) == Just False = reverse (annexLocations key) - | otherwise = annexLocations key + | remoteAnnexBare remoteconfig == Just False = reverse (annexLocations cfg key) + | otherwise = annexLocations cfg key #ifndef mingw32_HOST_OS locs' = locs #else locs' = map (replace "\\" "/") locs #endif + remoteconfig = gitconfig r + cfg = fromJust $ remoteGitConfig remoteconfig dropKey :: Remote -> Key -> Annex Bool dropKey r key | not $ Git.repoIsUrl (repo r) = - guardUsable (repo r) False $ commitOnCleanup r $ onLocal r $ do - ensureInitialized - whenM (Annex.Content.inAnnex key) $ do - Annex.Content.lockContent key $ - Annex.Content.removeAnnex key - logStatus key InfoMissing - Annex.Content.saveState True - return True + guardUsable (repo r) (return False) $ + commitOnCleanup r $ onLocal r $ do + ensureInitialized + whenM (Annex.Content.inAnnex key) $ do + Annex.Content.lockContent key + Annex.Content.removeAnnex + logStatus key InfoMissing + Annex.Content.saveState True + return True | Git.repoIsHttp (repo r) = error "dropping from http remote not supported" | otherwise = commitOnCleanup r $ Ssh.dropKey (repo r) key @@ -311,18 +357,32 @@ copyFromRemote :: Remote -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> copyFromRemote r key file dest _p = copyFromRemote' r key file dest copyFromRemote' :: Remote -> Key -> AssociatedFile -> FilePath -> Annex Bool copyFromRemote' r key file dest - | not $ Git.repoIsUrl (repo r) = guardUsable (repo r) False $ do - let params = Ssh.rsyncParams r Download + | not $ Git.repoIsUrl (repo r) = guardUsable (repo r) (return False) $ do + params <- Ssh.rsyncParams r Download u <- getUUID +#ifndef mingw32_HOST_OS + hardlink <- annexHardLink <$> Annex.getGitConfig +#endif -- run copy from perspective of remote onLocal r $ do ensureInitialized v <- Annex.Content.prepSendAnnex key case v of Nothing -> return False - Just (object, checksuccess) -> - upload u key file noRetry - (rsyncOrCopyFile params object dest) + Just (object, checksuccess) -> do + let copier = rsyncOrCopyFile params object dest +#ifndef mingw32_HOST_OS + let linker = createLink object dest >> return True + go <- ifM (pure hardlink <&&> not <$> isDirect) + ( return $ \m -> liftIO (catchBoolIO linker) + <||> copier m + , return copier + ) +#else + let go = copier +#endif + runTransfer (Transfer Download u key) + file noRetry go <&&> checksuccess | Git.repoIsSsh (repo r) = feedprogressback $ \feeder -> do direct <- isDirect @@ -357,6 +417,7 @@ copyFromRemote' r key file dest Just (cmd, params) <- Ssh.git_annex_shell (repo r) "transferinfo" [Param $ key2file key] fields v <- liftIO (newEmptySV :: IO (MSampleVar Integer)) + pidv <- liftIO $ newEmptyMVar tid <- liftIO $ forkIO $ void $ tryIO $ do bytes <- readSV v p <- createProcess $ @@ -364,6 +425,7 @@ copyFromRemote' r key file dest { std_in = CreatePipe , std_err = CreatePipe } + putMVar pidv (processHandle p) hClose $ stderrHandle p let h = stdinHandle p let send b = do @@ -373,12 +435,17 @@ copyFromRemote' r key file dest forever $ send =<< readSV v let feeder = writeSV v . fromBytesProcessed - bracketIO noop (const $ tryIO $ killThread tid) (const $ a feeder) + let cleanup = do + void $ tryIO $ killThread tid + tryNonAsync $ + maybe noop (void . waitForProcess) + =<< tryTakeMVar pidv + bracketIO noop (const cleanup) (const $ a feeder) copyFromRemoteCheap :: Remote -> Key -> FilePath -> Annex Bool #ifndef mingw32_HOST_OS copyFromRemoteCheap r key file - | not $ Git.repoIsUrl (repo r) = guardUsable (repo r) False $ do + | not $ Git.repoIsUrl (repo r) = guardUsable (repo r) (return False) $ do loc <- liftIO $ gitAnnexLocation key (repo r) $ fromJust $ remoteGitConfig $ gitconfig r liftIO $ catchBoolIO $ createSymbolicLink loc file >> return True @@ -396,7 +463,7 @@ copyFromRemoteCheap _ _ _ = return False copyToRemote :: Remote -> Key -> AssociatedFile -> MeterUpdate -> Annex Bool copyToRemote r key file p | not $ Git.repoIsUrl (repo r) = - guardUsable (repo r) False $ commitOnCleanup r $ + guardUsable (repo r) (return False) $ commitOnCleanup r $ copylocal =<< Annex.Content.prepSendAnnex key | Git.repoIsSsh (repo r) = commitOnCleanup r $ Annex.Content.sendAnnex key noop $ \object -> do @@ -411,14 +478,14 @@ copyToRemote r key file p -- the remote's Annex, but it needs access to the current -- Annex monad's state. checksuccessio <- Annex.withCurrentState checksuccess - let params = Ssh.rsyncParams r Upload + params <- Ssh.rsyncParams r Upload u <- getUUID -- run copy from perspective of remote onLocal r $ ifM (Annex.Content.inAnnex key) ( return True , do ensureInitialized - download u key file noRetry $ const $ + runTransfer (Transfer Download u key) file noRetry $ const $ Annex.Content.saveState True `after` Annex.Content.getViaTmpChecked (liftIO checksuccessio) key (\d -> rsyncOrCopyFile params object d p) @@ -434,12 +501,12 @@ fsckOnRemote r params | otherwise = return $ do program <- readProgramFile r' <- Git.Config.read r - env <- getEnvironment - let env' = addEntries + environ <- getEnvironment + let environ' = addEntries [ ("GIT_WORK_TREE", Git.repoPath r') , ("GIT_DIR", Git.localGitDir r') - ] env - batchCommandEnv program (Param "fsck" : params) $ Just env' + ] environ + batchCommandEnv program (Param "fsck" : params) (Just environ') {- The passed repair action is run in the Annex monad of the remote. -} repairRemote :: Git.Repo -> Annex Bool -> Annex (IO Bool) @@ -453,6 +520,8 @@ repairRemote r a = return $ do {- Runs an action from the perspective of a local remote. - - The AnnexState is cached for speed and to avoid resource leaks. + - However, catFileStop is called to avoid git-cat-file processes hanging + - around on removable media. - - The repository's git-annex branch is not updated, as an optimisation. - No caller of onLocal can query data from the branch and be ensured @@ -473,7 +542,8 @@ onLocal r a = do cache st = Annex.changeState $ \s -> s { Annex.remoteannexstate = M.insert (uuid r) st (Annex.remoteannexstate s) } go st a' = do - (ret, st') <- liftIO $ Annex.run st a' + (ret, st') <- liftIO $ Annex.run st $ + catFileStop `after` a' cache st' return ret @@ -492,12 +562,10 @@ rsyncOrCopyFile rsyncparams src dest p = docopy = liftIO $ bracket (forkIO $ watchfilesize zeroBytesProcessed) (void . tryIO . killThread) - (const $ copyFileExternal src dest) + (const $ copyFileExternal CopyTimeStamps src dest) watchfilesize oldsz = do threadDelay 500000 -- 0.5 seconds - v <- catchMaybeIO $ - toBytesProcessed . fileSize - <$> getFileStatus dest + v <- catchMaybeIO $ toBytesProcessed <$> getFileSize dest case v of Just sz | sz /= oldsz -> do diff --git a/Remote/Glacier.hs b/Remote/Glacier.hs index fe6f53a77..289008266 100644 --- a/Remote/Glacier.hs +++ b/Remote/Glacier.hs @@ -1,14 +1,15 @@ {- Amazon Glacier remotes. - - - Copyright 2012 Joey Hess <joey@kitenet.net> + - Copyright 2012 Joey Hess <id@joeyh.name> - - Licensed under the GNU GPL version 3 or higher. -} -module Remote.Glacier (remote, jobList) where +module Remote.Glacier (remote, jobList, checkSaneGlacierCommand) where import qualified Data.Map as M import qualified Data.Text as T +import qualified Data.ByteString.Lazy as L import Common.Annex import Types.Remote @@ -17,18 +18,13 @@ import qualified Git import Config import Config.Cost import Remote.Helper.Special -import Remote.Helper.Encryptable import qualified Remote.Helper.AWS as AWS -import Crypto import Creds import Utility.Metered import qualified Annex -import Annex.Content import Annex.UUID import Utility.Env -import System.Process - type Vault = String type Archive = FilePath @@ -43,46 +39,57 @@ remote = RemoteType { gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote) gen r u c gc = new <$> remoteCost gc veryExpensiveRemoteCost where - new cst = Just $ encryptableRemote c - (storeEncrypted this) - (retrieveEncrypted this) + new cst = Just $ specialRemote' specialcfg c + (prepareStore this) + (prepareRetrieve this) + (simplyPrepare $ remove this) + (simplyPrepare $ checkKey this) this where - this = Remote { - uuid = u, - cost = cst, - name = Git.repoDescribe r, - storeKey = store this, - retrieveKeyFile = retrieve this, - retrieveKeyFileCheap = retrieveCheap this, - removeKey = remove this, - hasKey = checkPresent this, - hasKeyCheap = False, - whereisKey = Nothing, - remoteFsck = Nothing, - repairRepo = Nothing, - config = c, - repo = r, - gitconfig = gc, - localpath = Nothing, - readonly = False, - availability = GloballyAvailable, - remotetype = remote + this = Remote + { uuid = u + , cost = cst + , name = Git.repoDescribe r + , storeKey = storeKeyDummy + , retrieveKeyFile = retreiveKeyFileDummy + , retrieveKeyFileCheap = retrieveCheap this + , removeKey = removeKeyDummy + , checkPresent = checkPresentDummy + , checkPresentCheap = False + , whereisKey = Nothing + , remoteFsck = Nothing + , repairRepo = Nothing + , config = c + , repo = r + , gitconfig = gc + , localpath = Nothing + , readonly = False + , availability = GloballyAvailable + , remotetype = remote + , mkUnavailable = return Nothing + , getInfo = includeCredsInfo c (AWS.creds u) $ + [ ("glacier vault", getVault c) ] + , claimUrl = Nothing + , checkUrl = Nothing + } + specialcfg = (specialRemoteCfg c) + -- Disabled until jobList gets support for chunks. + { chunkConfig = NoChunks } glacierSetup :: Maybe UUID -> Maybe CredPair -> RemoteConfig -> Annex (RemoteConfig, UUID) glacierSetup mu mcreds c = do u <- maybe (liftIO genUUID) return mu - c' <- setRemoteCredPair c (AWS.creds u) mcreds - glacierSetup' (isJust mu) u c' -glacierSetup' :: Bool -> UUID -> RemoteConfig -> Annex (RemoteConfig, UUID) -glacierSetup' enabling u c = do - c' <- encryptionSetup c - let fullconfig = c' `M.union` defaults + glacierSetup' (isJust mu) u mcreds c +glacierSetup' :: Bool -> UUID -> Maybe CredPair -> RemoteConfig -> Annex (RemoteConfig, UUID) +glacierSetup' enabling u mcreds c = do + (c', encsetup) <- encryptionSetup c + c'' <- setRemoteCredPair encsetup c' (AWS.creds u) mcreds + let fullconfig = c'' `M.union` defaults unless enabling $ genVault fullconfig u gitConfigSpecialRemote u fullconfig "glacier" "true" - return (c', u) + return (fullconfig, u) where remotename = fromJust (M.lookup "name" c) defvault = remotename ++ "-" ++ fromUUID u @@ -91,38 +98,18 @@ glacierSetup' enabling u c = do , ("vault", defvault) ] -store :: Remote -> Key -> AssociatedFile -> MeterUpdate -> Annex Bool -store r k _f p +prepareStore :: Remote -> Preparer Storer +prepareStore r = checkPrepare nonEmpty (byteStorer $ store r) + +nonEmpty :: Key -> Annex Bool +nonEmpty k | keySize k == Just 0 = do warning "Cannot store empty files in Glacier." return False - | otherwise = sendAnnex k (void $ remove r k) $ \src -> - metered (Just p) k $ \meterupdate -> - storeHelper r k $ streamMeteredFile src meterupdate - -storeEncrypted :: Remote -> (Cipher, Key) -> Key -> MeterUpdate -> Annex Bool -storeEncrypted r (cipher, enck) k p = sendAnnex k (void $ remove r enck) $ \src -> - metered (Just p) k $ \meterupdate -> - storeHelper r enck $ \h -> - encrypt (getGpgEncParams r) cipher (feedFile src) - (readBytes $ meteredWrite meterupdate h) + | otherwise = return True -retrieve :: Remote -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex Bool -retrieve r k _f d p = metered (Just p) k $ \meterupdate -> - retrieveHelper r k $ - readBytes $ meteredWriteFile meterupdate d - -retrieveCheap :: Remote -> Key -> FilePath -> Annex Bool -retrieveCheap _ _ _ = return False - -retrieveEncrypted :: Remote -> (Cipher, Key) -> Key -> FilePath -> MeterUpdate -> Annex Bool -retrieveEncrypted r (cipher, enck) k d p = metered (Just p) k $ \meterupdate -> - retrieveHelper r enck $ readBytes $ \b -> - decrypt cipher (feedBytes b) $ - readBytes $ meteredWriteFile meterupdate d - -storeHelper :: Remote -> Key -> (Handle -> IO ()) -> Annex Bool -storeHelper r k feeder = go =<< glacierEnv c u +store :: Remote -> Key -> L.ByteString -> MeterUpdate -> Annex Bool +store r k b p = go =<< glacierEnv c u where c = config r u = uuid r @@ -135,14 +122,17 @@ storeHelper r k feeder = go =<< glacierEnv c u ] go Nothing = return False go (Just e) = do - let p = (proc "glacier" (toCommand params)) { env = Just e } + let cmd = (proc "glacier" (toCommand params)) { env = Just e } liftIO $ catchBoolIO $ - withHandle StdinHandle createProcessSuccess p $ \h -> do - feeder h + withHandle StdinHandle createProcessSuccess cmd $ \h -> do + meteredWrite p h b return True -retrieveHelper :: Remote -> Key -> (Handle -> IO ()) -> Annex Bool -retrieveHelper r k reader = go =<< glacierEnv c u +prepareRetrieve :: Remote -> Preparer Retriever +prepareRetrieve = simplyPrepare . byteRetriever . retrieve + +retrieve :: Remote -> Key -> (L.ByteString -> Annex Bool) -> Annex Bool +retrieve r k sink = go =<< glacierEnv c u where c = config r u = uuid r @@ -153,48 +143,52 @@ retrieveHelper r k reader = go =<< glacierEnv c u , Param $ getVault $ config r , Param $ archive r k ] - go Nothing = return False + go Nothing = error "cannot retrieve from glacier" go (Just e) = do - let p = (proc "glacier" (toCommand params)) { env = Just e } - ok <- liftIO $ catchBoolIO $ - withHandle StdoutHandle createProcessSuccess p $ \h -> - ifM (hIsEOF h) - ( return False - , do - reader h - return True - ) - unless ok later + let cmd = (proc "glacier" (toCommand params)) + { env = Just e + , std_out = CreatePipe + } + (_, Just h, _, pid) <- liftIO $ createProcess cmd + -- Glacier cannot store empty files, so if the output is + -- empty, the content is not available yet. + ok <- ifM (liftIO $ hIsEOF h) + ( return False + , sink =<< liftIO (L.hGetContents h) + ) + liftIO $ hClose h + liftIO $ forceSuccessProcess cmd pid + unless ok $ do + showLongNote "Recommend you wait up to 4 hours, and then run this command again." return ok - later = showLongNote "Recommend you wait up to 4 hours, and then run this command again." -remove :: Remote -> Key -> Annex Bool +retrieveCheap :: Remote -> Key -> FilePath -> Annex Bool +retrieveCheap _ _ _ = return False + +remove :: Remote -> Remover remove r k = glacierAction r [ Param "archive" + , Param "delete" , Param $ getVault $ config r , Param $ archive r k ] -checkPresent :: Remote -> Key -> Annex (Either String Bool) -checkPresent r k = do +checkKey :: Remote -> CheckPresent +checkKey r k = do showAction $ "checking " ++ name r go =<< glacierEnv (config r) (uuid r) where - go Nothing = return $ Left "cannot check glacier" + go Nothing = error "cannot check glacier" go (Just e) = do {- glacier checkpresent outputs the archive name to stdout if - it's present. -} - v <- liftIO $ catchMsgIO $ - readProcessEnv "glacier" (toCommand params) (Just e) - case v of - Right s -> do - let probablypresent = key2file k `elem` lines s - if probablypresent - then ifM (Annex.getFlag "trustglacier") - ( return $ Right True, untrusted ) - else return $ Right False - Left err -> return $ Left err + s <- liftIO $ readProcessEnv "glacier" (toCommand params) (Just e) + let probablypresent = key2file k `elem` lines s + if probablypresent + then ifM (Annex.getFlag "trustglacier") + ( return True, error untrusted ) + else return False params = glacierParams (config r) [ Param "archive" @@ -204,7 +198,7 @@ checkPresent r k = do , Param $ archive r k ] - untrusted = return $ Left $ unlines + untrusted = unlines [ "Glacier's inventory says it has a copy." , "However, the inventory could be out of date, if it was recently removed." , "(Use --trust-glacier if you're sure it's still in Glacier.)" @@ -225,10 +219,13 @@ glacierParams :: RemoteConfig -> [CommandParam] -> [CommandParam] glacierParams c params = datacenter:params where datacenter = Param $ "--region=" ++ - fromJust (M.lookup "datacenter" c) + fromMaybe (error "Missing datacenter configuration") + (M.lookup "datacenter" c) glacierEnv :: RemoteConfig -> UUID -> Annex (Maybe [(String, String)]) -glacierEnv c u = go =<< getRemoteCredPairFor "glacier" c creds +glacierEnv c u = do + liftIO checkSaneGlacierCommand + go =<< getRemoteCredPairFor "glacier" c creds where go Nothing = return Nothing go (Just (user, pass)) = do @@ -239,7 +236,8 @@ glacierEnv c u = go =<< getRemoteCredPairFor "glacier" c creds (uk, pk) = credPairEnvironment creds getVault :: RemoteConfig -> Vault -getVault = fromJust . M.lookup "vault" +getVault = fromMaybe (error "Missing vault configuration") + . M.lookup "vault" archive :: Remote -> Key -> Archive archive r k = fileprefix ++ key2file k @@ -261,6 +259,10 @@ genVault c u = unlessM (runGlacier c u params) $ - - A complication is that `glacier job list` will display the encrypted - keys when the remote is encrypted. + - + - Dealing with encrypted chunked keys would be tricky. However, there + - seems to be no benefit to using chunking with glacier, so chunking is + - not supported. -} jobList :: Remote -> [Key] -> Annex ([Key], [Key]) jobList r keys = go =<< glacierEnv (config r) (uuid r) @@ -282,7 +284,8 @@ jobList r keys = go =<< glacierEnv (config r) (uuid r) then return nada else do enckeys <- forM keys $ \k -> - maybe k snd <$> cipherKey (config r) k + maybe k (\(_, enck) -> enck k) + <$> cipherKey (config r) let keymap = M.fromList $ zip enckeys keys let convert = mapMaybe (`M.lookup` keymap) return (convert succeeded, convert failed) @@ -300,3 +303,14 @@ jobList r keys = go =<< glacierEnv (config r) (uuid r) | otherwise -> parse c rest parse c (_:rest) = parse c rest + +-- boto's version of glacier exits 0 when given a parameter it doesn't +-- understand. See https://github.com/boto/boto/issues/2942 +checkSaneGlacierCommand :: IO () +checkSaneGlacierCommand = + whenM ((Nothing /=) <$> catchMaybeIO shouldfail) $ + error wrongcmd + where + test = proc "glacier" ["--compatibility-test-git-annex"] + shouldfail = withQuietOutput createProcessSuccess test + wrongcmd = "The glacier program in PATH seems to be from boto, not glacier-cli. Cannot use this program." diff --git a/Remote/Helper/AWS.hs b/Remote/Helper/AWS.hs index 0687a5ee1..145c48714 100644 --- a/Remote/Helper/AWS.hs +++ b/Remote/Helper/AWS.hs @@ -1,11 +1,12 @@ {- Amazon Web Services common infrastructure. - - - Copyright 2011,2012 Joey Hess <joey@kitenet.net> + - Copyright 2011-2014 Joey Hess <id@joeyh.name> - - Licensed under the GNU GPL version 3 or higher. -} -{-# LANGUAGE OverloadedStrings, TupleSections #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE TupleSections #-} module Remote.Helper.AWS where @@ -13,6 +14,9 @@ import Common.Annex import Creds import qualified Data.Map as M +import qualified Data.ByteString as B +import qualified Data.Text as T +import Data.Text.Encoding (encodeUtf8) import Data.Text (Text) creds :: UUID -> CredPairStorage @@ -33,9 +37,10 @@ regionMap = M.fromList . regionInfo defaultRegion :: Service -> Region defaultRegion = snd . Prelude.head . regionInfo -{- S3 and Glacier use different names for some regions. Ie, "us-east-1" - - cannot be used with S3, while "US" cannot be used with Glacier. Dunno why. - - Also, Glacier is not yet available in all regions. -} +data ServiceRegion = BothRegion Region | S3Region Region | GlacierRegion Region + +{- The "US" and "EU" names are used as location constraints when creating a + - S3 bucket. -} regionInfo :: Service -> [(Text, Region)] regionInfo service = map (\(t, r) -> (t, fromServiceRegion r)) $ filter (matchingService . snd) $ @@ -45,6 +50,7 @@ regionInfo service = map (\(t, r) -> (t, fromServiceRegion r)) $ [ ("US East (N. Virginia)", [S3Region "US", GlacierRegion "us-east-1"]) , ("US West (Oregon)", [BothRegion "us-west-2"]) , ("US West (N. California)", [BothRegion "us-west-1"]) + , ("EU (Frankfurt)", [BothRegion "eu-central-1"]) , ("EU (Ireland)", [S3Region "EU", GlacierRegion "eu-west-1"]) , ("Asia Pacific (Singapore)", [S3Region "ap-southeast-1"]) , ("Asia Pacific (Tokyo)", [BothRegion "ap-northeast-1"]) @@ -60,4 +66,10 @@ regionInfo service = map (\(t, r) -> (t, fromServiceRegion r)) $ matchingService (S3Region _) = service == S3 matchingService (GlacierRegion _) = service == Glacier -data ServiceRegion = BothRegion Region | S3Region Region | GlacierRegion Region +s3HostName :: Region -> B.ByteString +s3HostName "US" = "s3.amazonaws.com" +s3HostName "EU" = "s3-eu-west-1.amazonaws.com" +s3HostName r = encodeUtf8 $ T.concat ["s3-", r, ".amazonaws.com"] + +s3DefaultHost :: String +s3DefaultHost = "s3.amazonaws.com" diff --git a/Remote/Helper/Chunked.hs b/Remote/Helper/Chunked.hs index ad3b04d49..2f21ba66c 100644 --- a/Remote/Helper/Chunked.hs +++ b/Remote/Helper/Chunked.hs @@ -1,144 +1,409 @@ {- git-annex chunked remotes - - - Copyright 2012 Joey Hess <joey@kitenet.net> + - Copyright 2014 Joey Hess <id@joeyh.name> - - Licensed under the GNU GPL version 3 or higher. -} -module Remote.Helper.Chunked where +module Remote.Helper.Chunked ( + ChunkSize, + ChunkConfig(..), + describeChunkConfig, + getChunkConfig, + storeChunks, + removeChunks, + retrieveChunks, + checkPresentChunks, +) where import Common.Annex import Utility.DataUnits +import Types.StoreRetrieve import Types.Remote +import Types.Key +import Logs.Chunk import Utility.Metered +import Crypto (EncKey) +import Backend (isStableKey) -import qualified Data.Map as M import qualified Data.ByteString.Lazy as L -import Data.Int -import qualified Control.Exception as E +import qualified Data.Map as M + +data ChunkConfig + = NoChunks + | UnpaddedChunks ChunkSize + | LegacyChunks ChunkSize + deriving (Show) + +describeChunkConfig :: ChunkConfig -> String +describeChunkConfig NoChunks = "none" +describeChunkConfig (UnpaddedChunks sz) = describeChunkSize sz ++ "chunks" +describeChunkConfig (LegacyChunks sz) = describeChunkSize sz ++ " chunks (old style)" -type ChunkSize = Maybe Int64 +describeChunkSize :: ChunkSize -> String +describeChunkSize sz = roughSize storageUnits False (fromIntegral sz) -{- Gets a remote's configured chunk size. -} -chunkSize :: RemoteConfig -> ChunkSize -chunkSize m = +noChunks :: ChunkConfig -> Bool +noChunks NoChunks = True +noChunks _ = False + +getChunkConfig :: RemoteConfig -> ChunkConfig +getChunkConfig m = case M.lookup "chunksize" m of - Nothing -> Nothing - Just v -> case readSize dataUnits v of - Nothing -> error "bad chunksize" - Just size - | size <= 0 -> error "bad chunksize" - | otherwise -> Just $ fromInteger size - -{- This is an extension that's added to the usual file (or whatever) - - where the remote stores a key. -} -type ChunkExt = String - -{- A record of the number of chunks used. + Nothing -> case M.lookup "chunk" m of + Nothing -> NoChunks + Just v -> readsz UnpaddedChunks v "chunk" + Just v -> readsz LegacyChunks v "chunksize" + where + readsz c v f = case readSize dataUnits v of + Just size + | size == 0 -> NoChunks + | size > 0 -> c (fromInteger size) + _ -> error $ "bad configuration " ++ f ++ "=" ++ v + +-- An infinite stream of chunk keys, starting from chunk 1. +newtype ChunkKeyStream = ChunkKeyStream [Key] + +chunkKeyStream :: Key -> ChunkSize -> ChunkKeyStream +chunkKeyStream basek chunksize = ChunkKeyStream $ map mk [1..] + where + mk chunknum = sizedk { keyChunkNum = Just chunknum } + sizedk = basek { keyChunkSize = Just (toInteger chunksize) } + +nextChunkKeyStream :: ChunkKeyStream -> (Key, ChunkKeyStream) +nextChunkKeyStream (ChunkKeyStream (k:l)) = (k, ChunkKeyStream l) +nextChunkKeyStream (ChunkKeyStream []) = undefined -- stream is infinite! + +takeChunkKeyStream :: ChunkCount -> ChunkKeyStream -> [Key] +takeChunkKeyStream n (ChunkKeyStream l) = genericTake n l + +-- Number of chunks already consumed from the stream. +numChunks :: ChunkKeyStream -> Integer +numChunks = pred . fromJust . keyChunkNum . fst . nextChunkKeyStream + +{- Splits up the key's content into chunks, passing each chunk to + - the storer action, along with a corresponding chunk key and a + - progress meter update callback. + - + - To support resuming, the checker is used to find the first missing + - chunk key. Storing starts from that chunk. - - - While this can be guessed at based on the size of the key, encryption - - makes that larger. Also, using this helps deal with changes to chunksize - - over the life of a remote. + - This buffers each chunk in memory, so can use a lot of memory + - with a large ChunkSize. + - More optimal versions of this can be written, that rely + - on L.toChunks to split the lazy bytestring into chunks (typically + - smaller than the ChunkSize), and eg, write those chunks to a Handle. + - But this is the best that can be done with the storer interface that + - writes a whole L.ByteString at a time. -} -chunkCount :: ChunkExt -chunkCount = ".chunkcount" +storeChunks + :: UUID + -> ChunkConfig + -> Key + -> FilePath + -> MeterUpdate + -> Storer + -> CheckPresent + -> Annex Bool +storeChunks u chunkconfig k f p storer checker = + case chunkconfig of + (UnpaddedChunks chunksize) | isStableKey k -> + bracketIO open close (go chunksize) + _ -> storer k (FileContent f) p + where + open = tryIO $ openBinaryFile f ReadMode -{- An infinite stream of extensions to use for chunks. -} -chunkStream :: [ChunkExt] -chunkStream = map (\n -> ".chunk" ++ show n) [1 :: Integer ..] + close (Right h) = hClose h + close (Left _) = noop -{- Parses the String from the chunkCount file, and returns the files that - - are used to store the chunks. -} -listChunks :: FilePath -> String -> [FilePath] -listChunks basedest chunkcount = take count $ map (basedest ++) chunkStream - where - count = fromMaybe 0 $ readish chunkcount + go _ (Left e) = do + warning (show e) + return False + go chunksize (Right h) = do + let chunkkeys = chunkKeyStream k chunksize + (chunkkeys', startpos) <- seekResume h chunkkeys checker + b <- liftIO $ L.hGetContents h + gochunks p startpos chunksize b chunkkeys' -{- For use when there is no chunkCount file; uses the action to find - - chunks, and returns them, or Nothing if none found. Relies on - - storeChunks's finalizer atomically moving the chunks into place once all - - are written. + gochunks :: MeterUpdate -> BytesProcessed -> ChunkSize -> L.ByteString -> ChunkKeyStream -> Annex Bool + gochunks meterupdate startpos chunksize = loop startpos . splitchunk + where + splitchunk = L.splitAt chunksize + + loop bytesprocessed (chunk, bs) chunkkeys + | L.null chunk && numchunks > 0 = do + -- Once all chunks are successfully + -- stored, update the chunk log. + chunksStored u k (FixedSizeChunks chunksize) numchunks + return True + | otherwise = do + liftIO $ meterupdate' zeroBytesProcessed + let (chunkkey, chunkkeys') = nextChunkKeyStream chunkkeys + ifM (storer chunkkey (ByteContent chunk) meterupdate') + ( do + let bytesprocessed' = addBytesProcessed bytesprocessed (L.length chunk) + loop bytesprocessed' (splitchunk bs) chunkkeys' + , return False + ) + where + numchunks = numChunks chunkkeys + {- The MeterUpdate that is passed to the action + - storing a chunk is offset, so that it reflects + - the total bytes that have already been stored + - in previous chunks. -} + meterupdate' = offsetMeterUpdate meterupdate bytesprocessed + +{- Check if any of the chunk keys are present. If found, seek forward + - in the Handle, so it will be read starting at the first missing chunk. + - Returns the ChunkKeyStream truncated to start at the first missing + - chunk, and the number of bytes skipped due to resuming. - - - This is only needed to work around a bug that caused the chunkCount file - - not to be written. + - As an optimisation, if the file fits into a single chunk, there's no need + - to check if that chunk is present -- we know it's not, because otherwise + - the whole file would be present and there would be no reason to try to + - store it. -} -probeChunks :: FilePath -> (FilePath -> IO Bool) -> IO [FilePath] -probeChunks basedest check = go [] $ map (basedest ++) chunkStream +seekResume + :: Handle + -> ChunkKeyStream + -> CheckPresent + -> Annex (ChunkKeyStream, BytesProcessed) +seekResume h chunkkeys checker = do + sz <- liftIO (hFileSize h) + if sz <= fromMaybe 0 (keyChunkSize $ fst $ nextChunkKeyStream chunkkeys) + then return (chunkkeys, zeroBytesProcessed) + else check 0 chunkkeys sz where - go l [] = return (reverse l) - go l (c:cs) = ifM (check c) - ( go (c:l) cs - , go l [] - ) - -{- Given the base destination to use to store a value, - - generates a stream of temporary destinations (just one when not chunking) - - and passes it to an action, which should chunk and store the data, - - and return the destinations it stored to, or [] on error. Then - - calls the recorder to write the chunk count (if chunking). Finally, the - - finalizer is called to rename the tmp into the dest - - (and do any other cleanup). + check pos cks sz + | pos >= sz = do + -- All chunks are already stored! + liftIO $ hSeek h AbsoluteSeek sz + return (cks, toBytesProcessed sz) + | otherwise = do + v <- tryNonAsync (checker k) + case v of + Right True -> + check pos' cks' sz + _ -> do + when (pos > 0) $ + liftIO $ hSeek h AbsoluteSeek pos + return (cks, toBytesProcessed pos) + where + (k, cks') = nextChunkKeyStream cks + pos' = pos + fromMaybe 0 (keyChunkSize k) + +{- Removes all chunks of a key from a remote, by calling a remover + - action on each. + - + - The remover action should succeed even if asked to + - remove a key that is not present on the remote. + - + - This action may be called on a chunked key. It will simply remove it. + -} +removeChunks :: (Key -> Annex Bool) -> UUID -> ChunkConfig -> EncKey -> Key -> Annex Bool +removeChunks remover u chunkconfig encryptor k = do + ls <- chunkKeys u chunkconfig k + ok <- allM (remover . encryptor) (concat ls) + when ok $ do + let chunksizes = catMaybes $ map (keyChunkSize <=< headMaybe) ls + forM_ chunksizes $ chunksRemoved u k . FixedSizeChunks . fromIntegral + return ok + +{- Retrieves a key from a remote, using a retriever action. + - + - When the remote is chunked, tries each of the options returned by + - chunkKeys until it finds one where the retriever successfully + - gets the first chunked key. The content of that key, and any + - other chunks in the list is fed to the sink. + - + - If retrival of one of the subsequent chunks throws an exception, + - gives up and returns False. Note that partial data may have been + - written to the sink in this case. + - + - Resuming is supported when using chunks. When the destination file + - already exists, it skips to the next chunked key that would be needed + - to resume. -} -storeChunks :: Key -> FilePath -> FilePath -> ChunkSize -> ([FilePath] -> IO [FilePath]) -> (FilePath -> String -> IO ()) -> (FilePath -> FilePath -> IO ()) -> IO Bool -storeChunks key tmp dest chunksize storer recorder finalizer = either onerr return - =<< (E.try go :: IO (Either E.SomeException Bool)) +retrieveChunks + :: Retriever + -> UUID + -> ChunkConfig + -> EncKey + -> Key + -> FilePath + -> MeterUpdate + -> (Maybe Handle -> Maybe MeterUpdate -> ContentSource -> Annex Bool) + -> Annex Bool +retrieveChunks retriever u chunkconfig encryptor basek dest basep sink + | noChunks chunkconfig = + -- Optimisation: Try the unchunked key first, to avoid + -- looking in the git-annex branch for chunk counts + -- that are likely not there. + getunchunked `catchNonAsync` + const (go =<< chunkKeysOnly u basek) + | otherwise = go =<< chunkKeys u chunkconfig basek where - go = do - stored <- storer tmpdests - when (isJust chunksize) $ do - let chunkcount = basef ++ chunkCount - recorder chunkcount (show $ length stored) - finalizer tmp dest - return (not $ null stored) - onerr e = do - print e + go ls = do + currsize <- liftIO $ catchMaybeIO $ getFileSize dest + let ls' = maybe ls (setupResume ls) currsize + if any null ls' + then return True -- dest is already complete + else firstavail currsize ls' `catchNonAsync` giveup + + giveup e = do + warning (show e) return False - basef = tmp ++ keyFile key - tmpdests - | isNothing chunksize = [basef] - | otherwise = map (basef ++ ) chunkStream + firstavail _ [] = return False + firstavail currsize ([]:ls) = firstavail currsize ls + firstavail currsize ((k:ks):ls) + | k == basek = getunchunked + `catchNonAsync` (const $ firstavail currsize ls) + | otherwise = do + let offset = resumeOffset currsize k + let p = maybe basep + (offsetMeterUpdate basep . toBytesProcessed) + offset + v <- tryNonAsync $ + retriever (encryptor k) p $ \content -> + bracketIO (maybe opennew openresume offset) hClose $ \h -> do + void $ tosink (Just h) p content + let sz = toBytesProcessed $ + fromMaybe 0 $ keyChunkSize k + getrest p h sz sz ks + `catchNonAsync` giveup + case v of + Left e + | null ls -> giveup e + | otherwise -> firstavail currsize ls + Right r -> return r + + getrest _ _ _ _ [] = return True + getrest p h sz bytesprocessed (k:ks) = do + let p' = offsetMeterUpdate p bytesprocessed + liftIO $ p' zeroBytesProcessed + ifM (retriever (encryptor k) p' $ tosink (Just h) p') + ( getrest p h sz (addBytesProcessed bytesprocessed sz) ks + , giveup "chunk retrieval failed" + ) + + getunchunked = retriever (encryptor basek) basep $ tosink Nothing basep -{- Given a list of destinations to use, chunks the data according to the - - ChunkSize, and runs the storer action to store each chunk. Returns - - the destinations where data was stored, or [] on error. + opennew = openBinaryFile dest WriteMode + + -- Open the file and seek to the start point in order to resume. + openresume startpoint = do + -- ReadWriteMode allows seeking; AppendMode does not. + h <- openBinaryFile dest ReadWriteMode + hSeek h AbsoluteSeek startpoint + return h + + {- Progress meter updating is a bit tricky: If the Retriever + - populates a file, it is responsible for updating progress + - as the file is being retrieved. + - + - However, if the Retriever generates a lazy ByteString, + - it is not responsible for updating progress (often it cannot). + - Instead, the sink is passed a meter to update as it consumes + - the ByteString. + -} + tosink h p content = sink h p' content + where + p' + | isByteContent content = Just p + | otherwise = Nothing + +{- Can resume when the chunk's offset is at or before the end of + - the dest file. -} +resumeOffset :: Maybe Integer -> Key -> Maybe Integer +resumeOffset Nothing _ = Nothing +resumeOffset currsize k + | offset <= currsize = offset + | otherwise = Nothing + where + offset = chunkKeyOffset k + +{- Drops chunks that are already present in a file, based on its size. + - Keeps any non-chunk keys. + -} +setupResume :: [[Key]] -> Integer -> [[Key]] +setupResume ls currsize = map dropunneeded ls + where + dropunneeded [] = [] + dropunneeded l@(k:_) = case keyChunkSize k of + Just chunksize | chunksize > 0 -> + genericDrop (currsize `div` chunksize) l + _ -> l + +{- Checks if a key is present in a remote. This requires any one + - of the lists of options returned by chunkKeys to all check out + - as being present using the checker action. - - - This buffers each chunk in memory. - - More optimal versions of this can be written, that rely - - on L.toChunks to split the lazy bytestring into chunks (typically - - smaller than the ChunkSize), and eg, write those chunks to a Handle. - - But this is the best that can be done with the storer interface that - - writes a whole L.ByteString at a time. + - Throws an exception if the remote is not accessible. -} -storeChunked :: ChunkSize -> [FilePath] -> (FilePath -> L.ByteString -> IO ()) -> L.ByteString -> IO [FilePath] -storeChunked chunksize dests storer content = either onerr return - =<< (E.try (go chunksize dests) :: IO (Either E.SomeException [FilePath])) +checkPresentChunks + :: CheckPresent + -> UUID + -> ChunkConfig + -> EncKey + -> Key + -> Annex Bool +checkPresentChunks checker u chunkconfig encryptor basek + | noChunks chunkconfig = do + -- Optimisation: Try the unchunked key first, to avoid + -- looking in the git-annex branch for chunk counts + -- that are likely not there. + v <- check basek + case v of + Right True -> return True + Left e -> checklists (Just e) =<< chunkKeysOnly u basek + _ -> checklists Nothing =<< chunkKeysOnly u basek + | otherwise = checklists Nothing =<< chunkKeys u chunkconfig basek where - go _ [] = return [] -- no dests!? - go Nothing (d:_) = do - storer d content - return [d] - go (Just sz) _ - -- always write a chunk, even if the data is 0 bytes - | L.null content = go Nothing dests - | otherwise = storechunks sz [] dests content - - onerr e = do - print e - return [] + checklists Nothing [] = return False + checklists (Just deferrederror) [] = throwM deferrederror + checklists d (l:ls) + | not (null l) = do + v <- checkchunks l + case v of + Left e -> checklists (Just e) ls + Right True -> return True + Right False -> checklists Nothing ls + | otherwise = checklists d ls - storechunks _ _ [] _ = return [] -- ran out of dests - storechunks sz useddests (d:ds) b - | L.null b = return $ reverse useddests - | otherwise = do - let (chunk, b') = L.splitAt sz b - storer d chunk - storechunks sz (d:useddests) ds b' - -{- Writes a series of chunks to a file. The feeder is called to get - - each chunk. -} -meteredWriteFileChunks :: MeterUpdate -> FilePath -> [v] -> (v -> IO L.ByteString) -> IO () -meteredWriteFileChunks meterupdate dest chunks feeder = - withBinaryFile dest WriteMode $ \h -> - forM_ chunks $ - meteredWrite meterupdate h <=< feeder + checkchunks :: [Key] -> Annex (Either SomeException Bool) + checkchunks [] = return (Right True) + checkchunks (k:ks) = do + v <- check k + case v of + Right True -> checkchunks ks + Right False -> return $ Right False + Left e -> return $ Left e + + check = tryNonAsync . checker . encryptor + +{- A key can be stored in a remote unchunked, or as a list of chunked keys. + - This can be the case whether or not the remote is currently configured + - to use chunking. + - + - It's even possible for a remote to have the same key stored multiple + - times with different chunk sizes! + - + - This finds all possible lists of keys that might be on the remote that + - can be combined to get back the requested key, in order from most to + - least likely to exist. + -} +chunkKeys :: UUID -> ChunkConfig -> Key -> Annex [[Key]] +chunkKeys u chunkconfig k = do + l <- chunkKeysOnly u k + return $ if noChunks chunkconfig + then [k] : l + else l ++ [[k]] + +chunkKeysOnly :: UUID -> Key -> Annex [[Key]] +chunkKeysOnly u k = map (toChunkList k) <$> getCurrentChunks u k + +toChunkList :: Key -> (ChunkMethod, ChunkCount) -> [Key] +toChunkList k (FixedSizeChunks chunksize, chunkcount) = + takeChunkKeyStream chunkcount $ chunkKeyStream k chunksize +toChunkList _ (UnknownChunks _, _) = [] diff --git a/Remote/Helper/Chunked/Legacy.hs b/Remote/Helper/Chunked/Legacy.hs new file mode 100644 index 000000000..ae3a29f32 --- /dev/null +++ b/Remote/Helper/Chunked/Legacy.hs @@ -0,0 +1,126 @@ +{- legacy git-annex chunked remotes + - + - Copyright 2012 Joey Hess <id@joeyh.name> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Remote.Helper.Chunked.Legacy where + +import Common.Annex +import Remote.Helper.Chunked +import Utility.Metered + +import qualified Data.ByteString.Lazy as L +import qualified Control.Exception as E + +{- This is an extension that's added to the usual file (or whatever) + - where the remote stores a key. -} +type ChunkExt = String + +{- A record of the number of chunks used. + - + - While this can be guessed at based on the size of the key, encryption + - makes that larger. Also, using this helps deal with changes to chunksize + - over the life of a remote. + -} +chunkCount :: ChunkExt +chunkCount = ".chunkcount" + +{- An infinite stream of extensions to use for chunks. -} +chunkStream :: [ChunkExt] +chunkStream = map (\n -> ".chunk" ++ show n) [1 :: Integer ..] + +{- Parses the String from the chunkCount file, and returns the files that + - are used to store the chunks. -} +listChunks :: FilePath -> String -> [FilePath] +listChunks basedest chunkcount = take count $ map (basedest ++) chunkStream + where + count = fromMaybe 0 $ readish chunkcount + +{- For use when there is no chunkCount file; uses the action to find + - chunks, and returns them, or Nothing if none found. Relies on + - storeChunks's finalizer atomically moving the chunks into place once all + - are written. + - + - This is only needed to work around a bug that caused the chunkCount file + - not to be written. + -} +probeChunks :: FilePath -> (FilePath -> IO Bool) -> IO [FilePath] +probeChunks basedest check = go [] $ map (basedest ++) chunkStream + where + go l [] = return (reverse l) + go l (c:cs) = ifM (check c) + ( go (c:l) cs + , go l [] + ) + +{- Given the base destination to use to store a value, + - generates a stream of temporary destinations, + - and passes it to an action, which should chunk and store the data, + - and return the destinations it stored to, or [] on error. Then + - calls the recorder to write the chunk count. Finally, the + - finalizer is called to rename the tmp into the dest + - (and do any other cleanup). + -} +storeChunks :: Key -> FilePath -> FilePath -> ([FilePath] -> IO [FilePath]) -> (FilePath -> String -> IO ()) -> (FilePath -> FilePath -> IO ()) -> IO Bool +storeChunks key tmp dest storer recorder finalizer = either onerr return + =<< (E.try go :: IO (Either E.SomeException Bool)) + where + go = do + stored <- storer tmpdests + let chunkcount = basef ++ chunkCount + recorder chunkcount (show $ length stored) + finalizer tmp dest + return (not $ null stored) + onerr e = do + warningIO (show e) + return False + + basef = tmp ++ keyFile key + tmpdests = map (basef ++ ) chunkStream + +{- Given a list of destinations to use, chunks the data according to the + - ChunkSize, and runs the storer action to store each chunk. Returns + - the destinations where data was stored, or [] on error. + - + - This buffers each chunk in memory. + - More optimal versions of this can be written, that rely + - on L.toChunks to split the lazy bytestring into chunks (typically + - smaller than the ChunkSize), and eg, write those chunks to a Handle. + - But this is the best that can be done with the storer interface that + - writes a whole L.ByteString at a time. + -} +storeChunked :: ChunkSize -> [FilePath] -> (FilePath -> L.ByteString -> IO ()) -> L.ByteString -> IO [FilePath] +storeChunked chunksize dests storer content = either onerr return + =<< (E.try (go (Just chunksize) dests) :: IO (Either E.SomeException [FilePath])) + where + go _ [] = return [] -- no dests!? + go Nothing (d:_) = do + storer d content + return [d] + go (Just sz) _ + -- always write a chunk, even if the data is 0 bytes + | L.null content = go Nothing dests + | otherwise = storechunks sz [] dests content + + onerr e = do + warningIO (show e) + return [] + + storechunks _ _ [] _ = return [] -- ran out of dests + storechunks sz useddests (d:ds) b + | L.null b = return $ reverse useddests + | otherwise = do + let (chunk, b') = L.splitAt sz b + storer d chunk + storechunks sz (d:useddests) ds b' + +{- Writes a series of chunks to a file. The feeder is called to get + - each chunk. + -} +meteredWriteFileChunks :: MeterUpdate -> FilePath -> [v] -> (v -> IO L.ByteString) -> IO () +meteredWriteFileChunks meterupdate dest chunks feeder = + withBinaryFile dest WriteMode $ \h -> + forM_ chunks $ + meteredWrite meterupdate h <=< feeder diff --git a/Remote/Helper/Encryptable.hs b/Remote/Helper/Encryptable.hs index a6e79ddc4..c1243a518 100644 --- a/Remote/Helper/Encryptable.hs +++ b/Remote/Helper/Encryptable.hs @@ -1,11 +1,23 @@ {- common functions for encryptable remotes - - - Copyright 2011 Joey Hess <joey@kitenet.net> + - Copyright 2011 Joey Hess <id@joeyh.name> - - Licensed under the GNU GPL version 3 or higher. -} -module Remote.Helper.Encryptable where +module Remote.Helper.Encryptable ( + EncryptionIsSetup, + encryptionSetup, + noEncryptionUsed, + encryptionAlreadySetup, + remoteCipher, + remoteCipher', + embedCreds, + cipherKey, + storeCipher, + extractCipher, + describeEncryption, +) where import qualified Data.Map as M @@ -14,15 +26,28 @@ import Types.Remote import Crypto import Types.Crypto import qualified Annex -import Config.Cost import Utility.Base64 -import Utility.Metered + +-- Used to ensure that encryption has been set up before trying to +-- eg, store creds in the remote config that would need to use the +-- encryption setup. +data EncryptionIsSetup = EncryptionIsSetup | NoEncryption + +-- Remotes that don't use encryption can use this instead of +-- encryptionSetup. +noEncryptionUsed :: EncryptionIsSetup +noEncryptionUsed = NoEncryption + +-- Using this avoids the type-safe check, so you'd better be sure +-- of what you're doing. +encryptionAlreadySetup :: EncryptionIsSetup +encryptionAlreadySetup = EncryptionIsSetup {- Encryption setup for a remote. The user must specify whether to use - an encryption key, or not encrypt. An encrypted cipher is created, or is - updated to be accessible to an additional encryption key. Or the user - could opt to use a shared cipher, which is stored unencrypted. -} -encryptionSetup :: RemoteConfig -> Annex RemoteConfig +encryptionSetup :: RemoteConfig -> Annex (RemoteConfig, EncryptionIsSetup) encryptionSetup c = maybe genCipher updateCipher $ extractCipher c where -- The type of encryption @@ -30,11 +55,11 @@ encryptionSetup c = maybe genCipher updateCipher $ extractCipher c -- Generate a new cipher, depending on the chosen encryption scheme genCipher = case encryption of _ | M.member "cipher" c || M.member "cipherkeys" c -> cannotchange - Just "none" -> return c + Just "none" -> return (c, NoEncryption) Just "shared" -> use "encryption setup" . genSharedCipher =<< highRandomQuality -- hybrid encryption is the default when a keyid is - -- specified but no encryption + -- specified but no encryption _ | maybe (M.member "keyid" c) (== "hybrid") encryption -> use "encryption setup" . genEncryptedCipher key Hybrid =<< highRandomQuality @@ -50,7 +75,7 @@ encryptionSetup c = maybe genCipher updateCipher $ extractCipher c cannotchange = error "Cannot set encryption type of existing remotes." -- Update an existing cipher if possible. updateCipher v = case v of - SharedCipher _ | maybe True (== "shared") encryption -> return c' + SharedCipher _ | maybe True (== "shared") encryption -> return (c', EncryptionIsSetup) EncryptedCipher _ variant _ | maybe True (== if variant == Hybrid then "hybrid" else "pubkey") encryption -> use "encryption update" $ updateEncryptedCipher newkeys v @@ -59,64 +84,34 @@ encryptionSetup c = maybe genCipher updateCipher $ extractCipher c showNote m cipher <- liftIO a showNote $ describeCipher cipher - return $ storeCipher c' cipher + return (storeCipher c' cipher, EncryptionIsSetup) highRandomQuality = (&&) (maybe True ( /= "false") $ M.lookup "highRandomQuality" c) <$> fmap not (Annex.getState Annex.fast) c' = foldr M.delete c - -- git-annex used to remove 'encryption' as well, since - -- it was redundant; we now need to keep it for - -- public-key incryption, hence we leave it on newer - -- remotes (while being backward-compatible). + -- git-annex used to remove 'encryption' as well, since + -- it was redundant; we now need to keep it for + -- public-key encryption, hence we leave it on newer + -- remotes (while being backward-compatible). [ "keyid", "keyid+", "keyid-", "highRandomQuality" ] -{- Modifies a Remote to support encryption. - - - - Two additional functions must be provided by the remote, - - to support storing and retrieving encrypted content. -} -encryptableRemote - :: RemoteConfig - -> ((Cipher, Key) -> Key -> MeterUpdate -> Annex Bool) - -> ((Cipher, Key) -> Key -> FilePath -> MeterUpdate -> Annex Bool) - -> Remote - -> Remote -encryptableRemote c storeKeyEncrypted retrieveKeyFileEncrypted r = - r { - storeKey = store, - retrieveKeyFile = retrieve, - retrieveKeyFileCheap = retrieveCheap, - removeKey = withkey $ removeKey r, - hasKey = withkey $ hasKey r, - cost = cost r + encryptedRemoteCostAdj - } - where - store k f p = cip k >>= maybe - (storeKey r k f p) - (\enck -> storeKeyEncrypted enck k p) - retrieve k f d p = cip k >>= maybe - (retrieveKeyFile r k f d p) - (\enck -> retrieveKeyFileEncrypted enck k d p) - retrieveCheap k d = cip k >>= maybe - (retrieveKeyFileCheap r k d) - (\_ -> return False) - withkey a k = cip k >>= maybe (a k) (a . snd) - cip = cipherKey c +remoteCipher :: RemoteConfig -> Annex (Maybe Cipher) +remoteCipher = fmap fst <$$> remoteCipher' {- Gets encryption Cipher. The decrypted Ciphers are cached in the Annex - state. -} -remoteCipher :: RemoteConfig -> Annex (Maybe Cipher) -remoteCipher c = go $ extractCipher c +remoteCipher' :: RemoteConfig -> Annex (Maybe (Cipher, StorableCipher)) +remoteCipher' c = go $ extractCipher c where go Nothing = return Nothing go (Just encipher) = do cache <- Annex.getState Annex.ciphers case M.lookup encipher cache of - Just cipher -> return $ Just cipher + Just cipher -> return $ Just (cipher, encipher) Nothing -> do - showNote "gpg" cipher <- liftIO $ decryptCipher encipher Annex.changeState (\s -> s { Annex.ciphers = M.insert encipher cipher cache }) - return $ Just cipher + return $ Just (cipher, encipher) {- Checks if the remote's config allows storing creds in the remote's config. - @@ -133,11 +128,11 @@ embedCreds c | isJust (M.lookup "cipherkeys" c) && isJust (M.lookup "cipher" c) = True | otherwise = False -{- Gets encryption Cipher, and encrypted version of Key. -} -cipherKey :: RemoteConfig -> Key -> Annex (Maybe (Cipher, Key)) -cipherKey c k = fmap make <$> remoteCipher c +{- Gets encryption Cipher, and key encryptor. -} +cipherKey :: RemoteConfig -> Annex (Maybe (Cipher, EncKey)) +cipherKey c = fmap make <$> remoteCipher c where - make ciphertext = (ciphertext, encryptKey mac ciphertext k) + make ciphertext = (ciphertext, encryptKey mac ciphertext) mac = fromMaybe defaultMac $ M.lookup "mac" c >>= readMac {- Stores an StorableCipher in a remote's configuration. -} @@ -162,3 +157,15 @@ extractCipher c = case (M.lookup "cipher" c, _ -> Nothing where readkeys = KeyIds . split "," + +describeEncryption :: RemoteConfig -> String +describeEncryption c = case extractCipher c of + Nothing -> "not encrypted" + (Just (SharedCipher _)) -> "encrypted (encryption key stored in git repository)" + (Just (EncryptedCipher _ v (KeyIds { keyIds = ks }))) -> unwords $ catMaybes + [ Just "encrypted (to gpg keys:" + , Just (unwords ks ++ ")") + , case v of + PubKey -> Nothing + Hybrid -> Just "(hybrid mode)" + ] diff --git a/Remote/Helper/Git.hs b/Remote/Helper/Git.hs index d76cb2ee7..3b438a0bb 100644 --- a/Remote/Helper/Git.hs +++ b/Remote/Helper/Git.hs @@ -1,6 +1,6 @@ {- Utilities for git remotes. - - - Copyright 2011-2014 Joey Hess <joey@kitenet.net> + - Copyright 2011-2014 Joey Hess <id@joeyh.name> - - Licensed under the GNU GPL version 3 or higher. -} @@ -10,6 +10,9 @@ module Remote.Helper.Git where import Common.Annex import qualified Git import Types.Availability +import qualified Types.Remote as Remote + +import Data.Time.Clock.POSIX repoCheap :: Git.Repo -> Bool repoCheap = not . Git.repoIsUrl @@ -26,7 +29,20 @@ availabilityCalc r {- Avoids performing an action on a local repository that's not usable. - Does not check that the repository is still available on disk. -} -guardUsable :: Git.Repo -> a -> Annex a -> Annex a -guardUsable r onerr a - | Git.repoIsLocalUnknown r = return onerr +guardUsable :: Git.Repo -> Annex a -> Annex a -> Annex a +guardUsable r fallback a + | Git.repoIsLocalUnknown r = fallback | otherwise = a + +gitRepoInfo :: Remote -> Annex [(String, String)] +gitRepoInfo r = do + d <- fromRepo Git.localGitDir + mtimes <- liftIO $ mapM (modificationTime <$$> getFileStatus) + =<< dirContentsRecursive (d </> "refs" </> "remotes" </> Remote.name r) + let lastsynctime = case mtimes of + [] -> "never" + _ -> show $ posixSecondsToUTCTime $ realToFrac $ maximum mtimes + return + [ ("repository location", Git.repoLocation (Remote.repo r)) + , ("last synced", lastsynctime) + ] diff --git a/Remote/Helper/Hooks.hs b/Remote/Helper/Hooks.hs index b7deae577..3765281be 100644 --- a/Remote/Helper/Hooks.hs +++ b/Remote/Helper/Hooks.hs @@ -1,6 +1,6 @@ {- Adds hooks to remotes. - - - Copyright 2012 Joey Hess <joey@kitenet.net> + - Copyright 2012 Joey Hess <id@joeyh.name> - - Licensed under the GNU GPL version 3 or higher. -} @@ -15,11 +15,10 @@ import Common.Annex import Types.Remote import Types.CleanupActions import qualified Annex -import Annex.LockPool +import Annex.LockFile +import Utility.LockFile #ifndef mingw32_HOST_OS import Annex.Perms -#else -import Utility.WinLock #endif {- Modifies a remote's access functions to first run the @@ -39,7 +38,7 @@ addHooks' r starthook stophook = r' , retrieveKeyFile = \k f d p -> wrapper $ retrieveKeyFile r k f d p , retrieveKeyFileCheap = \k f -> wrapper $ retrieveKeyFileCheap r k f , removeKey = wrapper . removeKey r - , hasKey = wrapper . hasKey r + , checkPresent = wrapper . checkPresent r } where wrapper = runHooks r' starthook stophook @@ -48,7 +47,7 @@ runHooks :: Remote -> Maybe String -> Maybe String -> Annex a -> Annex a runHooks r starthook stophook a = do dir <- fromRepo gitAnnexRemotesDir let lck = dir </> remoteid ++ ".lck" - whenM (notElem lck . M.keys <$> getPool) $ do + whenM (notElem lck . M.keys <$> getLockPool) $ do liftIO $ createDirectoryIfMissing True dir firstrun lck a @@ -63,7 +62,7 @@ runHooks r starthook stophook a = do -- of it from running the stophook. If another -- instance is shutting down right now, this -- will block waiting for its exclusive lock to clear. - lockFile lck + lockFileShared lck -- The starthook is run even if some other git-annex -- is already running, and ran it before. @@ -84,19 +83,12 @@ runHooks r starthook stophook a = do unlockFile lck #ifndef mingw32_HOST_OS mode <- annexFileMode - fd <- liftIO $ noUmask mode $ - openFd lck ReadWrite (Just mode) defaultFileFlags - v <- liftIO $ tryIO $ - setLock fd (WriteLock, AbsoluteSeek, 0, 0) - case v of - Left _ -> noop - Right _ -> run stophook - liftIO $ closeFd fd + v <- liftIO $ noUmask mode $ tryLockExclusive (Just mode) lck #else v <- liftIO $ lockExclusive lck +#endif case v of Nothing -> noop Just lockhandle -> do run stophook liftIO $ dropLock lockhandle -#endif diff --git a/Remote/Helper/Http.hs b/Remote/Helper/Http.hs new file mode 100644 index 000000000..81c1654ef --- /dev/null +++ b/Remote/Helper/Http.hs @@ -0,0 +1,85 @@ +{- helpers for remotes using http + - + - Copyright 2014 Joey Hess <id@joeyh.name> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +{-# LANGUAGE BangPatterns #-} + +module Remote.Helper.Http where + +import Common.Annex +import Types.StoreRetrieve +import Utility.Metered +import Remote.Helper.Special +import Network.HTTP.Client (RequestBody(..), Response, responseStatus, responseBody, BodyReader, NeedsPopper) +import Network.HTTP.Types + +import qualified Data.ByteString.Lazy as L +import qualified Data.ByteString as S +import Control.Concurrent + +-- A storer that expects to be provided with a http RequestBody containing +-- the content to store. +-- +-- Implemented as a fileStorer, so that the content can be streamed +-- from the file in constant space. +httpStorer :: (Key -> RequestBody -> Annex Bool) -> Storer +httpStorer a = fileStorer $ \k f m -> a k =<< liftIO (httpBodyStorer f m) + +-- Reads the file and generates a streaming request body, that will update +-- the meter as it's sent. +httpBodyStorer :: FilePath -> MeterUpdate -> IO RequestBody +httpBodyStorer src m = do + size <- getFileSize src + let streamer sink = withMeteredFile src m $ \b -> byteStringPopper b sink + return $ RequestBodyStream (fromInteger size) streamer + +byteStringPopper :: L.ByteString -> NeedsPopper () -> IO () +byteStringPopper b sink = do + mvar <- newMVar $ L.toChunks b + let getnextchunk = modifyMVar mvar $ \v -> + case v of + [] -> return ([], S.empty) + (c:cs) -> return (cs, c) + sink getnextchunk + +{- Makes a Popper that streams a given number of chunks of a given + - size from the handle, updating the meter as the chunks are read. -} +handlePopper :: Integer -> Int -> MeterUpdate -> Handle -> NeedsPopper () -> IO () +handlePopper numchunks chunksize meterupdate h sink = do + mvar <- newMVar zeroBytesProcessed + let getnextchunk = do + sent <- takeMVar mvar + if sent >= target + then do + putMVar mvar sent + return S.empty + else do + b <- S.hGet h chunksize + let !sent' = addBytesProcessed sent chunksize + putMVar mvar sent' + meterupdate sent' + return b + sink getnextchunk + where + target = toBytesProcessed (numchunks * fromIntegral chunksize) + +-- Reads the http body and stores it to the specified file, updating the +-- meter as it goes. +httpBodyRetriever :: FilePath -> MeterUpdate -> Response BodyReader -> IO () +httpBodyRetriever dest meterupdate resp + | responseStatus resp /= ok200 = error $ show $ responseStatus resp + | otherwise = bracket (openBinaryFile dest WriteMode) hClose (go zeroBytesProcessed) + where + reader = responseBody resp + go sofar h = do + b <- reader + if S.null b + then return () + else do + let sofar' = addBytesProcessed sofar $ S.length b + S.hPut h b + meterupdate sofar' + go sofar' h diff --git a/Remote/Helper/Messages.hs b/Remote/Helper/Messages.hs index c4b1966dc..30db70fbb 100644 --- a/Remote/Helper/Messages.hs +++ b/Remote/Helper/Messages.hs @@ -1,6 +1,6 @@ {- git-annex remote messages - - - Copyright 2013 Joey Hess <joey@kitenet.net> + - Copyright 2013 Joey Hess <id@joeyh.name> - - Licensed under the GNU GPL version 3 or higher. -} @@ -9,9 +9,19 @@ module Remote.Helper.Messages where import Common.Annex import qualified Git +import qualified Types.Remote as Remote showChecking :: Git.Repo -> Annex () showChecking r = showAction $ "checking " ++ Git.repoDescribe r -cantCheck :: Git.Repo -> Either String Bool -cantCheck r = Left $ "unable to check " ++ Git.repoDescribe r +class Checkable a where + descCheckable :: a -> String + +instance Checkable Git.Repo where + descCheckable = Git.repoDescribe + +instance Checkable (Remote.RemoteA a) where + descCheckable = Remote.name + +cantCheck :: Checkable a => a -> e +cantCheck v = error $ "unable to check " ++ descCheckable v diff --git a/Remote/Helper/ReadOnly.hs b/Remote/Helper/ReadOnly.hs index cd92a083c..2e327a040 100644 --- a/Remote/Helper/ReadOnly.hs +++ b/Remote/Helper/ReadOnly.hs @@ -1,6 +1,6 @@ {- Adds readonly support to remotes. - - - Copyright 2013 Joey Hess <joey@kitenet.net> + - Copyright 2013 Joey Hess <id@joeyh.name> - - Licensed under the GNU GPL version 3 or higher. -} diff --git a/Remote/Helper/Special.hs b/Remote/Helper/Special.hs index 7fc421f46..9f219e8b1 100644 --- a/Remote/Helper/Special.hs +++ b/Remote/Helper/Special.hs @@ -1,20 +1,54 @@ -{- common functions for special remotes +{- helpers for special remotes - - - Copyright 2011 Joey Hess <joey@kitenet.net> + - Copyright 2011-2014 Joey Hess <id@joeyh.name> - - Licensed under the GNU GPL version 3 or higher. -} -module Remote.Helper.Special where - -import qualified Data.Map as M +module Remote.Helper.Special ( + findSpecialRemotes, + gitConfigSpecialRemote, + Preparer, + Storer, + Retriever, + Remover, + CheckPresent, + simplyPrepare, + ContentSource, + checkPrepare, + resourcePrepare, + fileStorer, + byteStorer, + fileRetriever, + byteRetriever, + storeKeyDummy, + retreiveKeyFileDummy, + removeKeyDummy, + checkPresentDummy, + SpecialRemoteCfg(..), + specialRemoteCfg, + specialRemote, + specialRemote', + module X +) where import Common.Annex +import Types.StoreRetrieve import Types.Remote +import Crypto +import Config.Cost +import Utility.Metered +import Remote.Helper.Chunked as X +import Remote.Helper.Encryptable as X +import Remote.Helper.Messages +import Annex.Content import qualified Git import qualified Git.Command import qualified Git.Construct +import qualified Data.ByteString.Lazy as L +import qualified Data.Map as M + {- Special remotes don't have a configured url, so Git.Repo does not - automatically generate remotes for them. This looks for a different - configuration key instead. @@ -25,7 +59,7 @@ findSpecialRemotes s = do liftIO $ mapM construct $ remotepairs m where remotepairs = M.toList . M.filterWithKey match - construct (k,_) = Git.Construct.remoteNamedFromKey k Git.Construct.fromUnknown + construct (k,_) = Git.Construct.remoteNamedFromKey k (pure Git.Construct.fromUnknown) match k _ = startswith "remote." k && endswith (".annex-"++s) k {- Sets up configuration for a special remote in .git/config. -} @@ -38,3 +72,204 @@ gitConfigSpecialRemote u c k v = do [Param "config", Param (configsetting a), Param b] remotename = fromJust (M.lookup "name" c) configsetting s = "remote." ++ remotename ++ "." ++ s + +-- Use when nothing needs to be done to prepare a helper. +simplyPrepare :: helper -> Preparer helper +simplyPrepare helper _ a = a $ Just helper + +-- Use to run a check when preparing a helper. +checkPrepare :: (Key -> Annex Bool) -> helper -> Preparer helper +checkPrepare checker helper k a = ifM (checker k) + ( a (Just helper) + , a Nothing + ) + +-- Use to acquire a resource when preparing a helper. +resourcePrepare :: (Key -> (r -> Annex Bool) -> Annex Bool) -> (r -> helper) -> Preparer helper +resourcePrepare withr helper k a = withr k $ \r -> + a (Just (helper r)) + +-- A Storer that expects to be provided with a file containing +-- the content of the key to store. +fileStorer :: (Key -> FilePath -> MeterUpdate -> Annex Bool) -> Storer +fileStorer a k (FileContent f) m = a k f m +fileStorer a k (ByteContent b) m = withTmp k $ \f -> do + liftIO $ L.writeFile f b + a k f m + +-- A Storer that expects to be provided with a L.ByteString of +-- the content to store. +byteStorer :: (Key -> L.ByteString -> MeterUpdate -> Annex Bool) -> Storer +byteStorer a k c m = withBytes c $ \b -> a k b m + +-- A Retriever that writes the content of a Key to a provided file. +-- It is responsible for updating the progress meter as it retrieves data. +fileRetriever :: (FilePath -> Key -> MeterUpdate -> Annex ()) -> Retriever +fileRetriever a k m callback = do + f <- prepTmp k + a f k m + callback (FileContent f) + +-- A Retriever that generates a lazy ByteString containing the Key's +-- content, and passes it to a callback action which will fully consume it +-- before returning. +byteRetriever :: (Key -> (L.ByteString -> Annex Bool) -> Annex Bool) -> Retriever +byteRetriever a k _m callback = a k (callback . ByteContent) + +{- The base Remote that is provided to specialRemote needs to have + - storeKey, retrieveKeyFile, removeKey, and checkPresent methods, + - but they are never actually used (since specialRemote replaces them). + - Here are some dummy ones. + -} +storeKeyDummy :: Key -> AssociatedFile -> MeterUpdate -> Annex Bool +storeKeyDummy _ _ _ = return False +retreiveKeyFileDummy :: Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex Bool +retreiveKeyFileDummy _ _ _ _ = return False +removeKeyDummy :: Key -> Annex Bool +removeKeyDummy _ = return False +checkPresentDummy :: Key -> Annex Bool +checkPresentDummy _ = error "missing checkPresent implementation" + +type RemoteModifier + = RemoteConfig + -> Preparer Storer + -> Preparer Retriever + -> Preparer Remover + -> Preparer CheckPresent + -> Remote + -> Remote + +data SpecialRemoteCfg = SpecialRemoteCfg + { chunkConfig :: ChunkConfig + , displayProgress :: Bool + } + +specialRemoteCfg :: RemoteConfig -> SpecialRemoteCfg +specialRemoteCfg c = SpecialRemoteCfg (getChunkConfig c) True + +-- Modifies a base Remote to support both chunking and encryption, +-- which special remotes typically should support. +specialRemote :: RemoteModifier +specialRemote c = specialRemote' (specialRemoteCfg c) c + +specialRemote' :: SpecialRemoteCfg -> RemoteModifier +specialRemote' cfg c preparestorer prepareretriever prepareremover preparecheckpresent baser = encr + where + encr = baser + { storeKey = \k _f p -> cip >>= storeKeyGen k p + , retrieveKeyFile = \k _f d p -> cip >>= retrieveKeyFileGen k d p + , retrieveKeyFileCheap = \k d -> cip >>= maybe + (retrieveKeyFileCheap baser k d) + -- retrieval of encrypted keys is never cheap + (\_ -> return False) + , removeKey = \k -> cip >>= removeKeyGen k + , checkPresent = \k -> cip >>= checkPresentGen k + , cost = maybe + (cost baser) + (const $ cost baser + encryptedRemoteCostAdj) + (extractCipher c) + , getInfo = do + l <- getInfo baser + return $ l ++ + [ ("encryption", describeEncryption c) + , ("chunking", describeChunkConfig (chunkConfig cfg)) + ] + } + cip = cipherKey c + gpgopts = getGpgEncParams encr + + safely a = catchNonAsync a (\e -> warning (show e) >> return False) + + -- chunk, then encrypt, then feed to the storer + storeKeyGen k p enc = safely $ preparestorer k $ safely . go + where + go (Just storer) = sendAnnex k rollback $ \src -> + displayprogress p k $ \p' -> + storeChunks (uuid baser) chunkconfig k src p' + (storechunk enc storer) + (checkPresent baser) + go Nothing = return False + rollback = void $ removeKey encr k + + storechunk Nothing storer k content p = storer k content p + storechunk (Just (cipher, enck)) storer k content p = + withBytes content $ \b -> + encrypt gpgopts cipher (feedBytes b) $ + readBytes $ \encb -> + storer (enck k) (ByteContent encb) p + + -- call retrieve-r to get chunks; decrypt them; stream to dest file + retrieveKeyFileGen k dest p enc = + safely $ prepareretriever k $ safely . go + where + go (Just retriever) = displayprogress p k $ \p' -> + retrieveChunks retriever (uuid baser) chunkconfig + enck k dest p' (sink dest enc) + go Nothing = return False + enck = maybe id snd enc + + removeKeyGen k enc = safely $ prepareremover k $ safely . go + where + go (Just remover) = removeChunks remover (uuid baser) chunkconfig enck k + go Nothing = return False + enck = maybe id snd enc + + checkPresentGen k enc = preparecheckpresent k go + where + go (Just checker) = checkPresentChunks checker (uuid baser) chunkconfig enck k + go Nothing = cantCheck baser + enck = maybe id snd enc + + chunkconfig = chunkConfig cfg + + displayprogress p k a + | displayProgress cfg = metered (Just p) k a + | otherwise = a p + +{- Sink callback for retrieveChunks. Stores the file content into the + - provided Handle, decrypting it first if necessary. + - + - If the remote did not store the content using chunks, no Handle + - will be provided, and it's up to us to open the destination file. + - + - Note that when neither chunking nor encryption is used, and the remote + - provides FileContent, that file only needs to be renamed + - into place. (And it may even already be in the right place..) + -} +sink + :: FilePath + -> Maybe (Cipher, EncKey) + -> Maybe Handle + -> Maybe MeterUpdate + -> ContentSource + -> Annex Bool +sink dest enc mh mp content = do + case (enc, mh, content) of + (Nothing, Nothing, FileContent f) + | f == dest -> noop + | otherwise -> liftIO $ moveFile f dest + (Just (cipher, _), _, ByteContent b) -> + decrypt cipher (feedBytes b) $ + readBytes write + (Just (cipher, _), _, FileContent f) -> do + withBytes content $ \b -> + decrypt cipher (feedBytes b) $ + readBytes write + liftIO $ nukeFile f + (Nothing, _, FileContent f) -> do + withBytes content write + liftIO $ nukeFile f + (Nothing, _, ByteContent b) -> write b + return True + where + write b = case mh of + Just h -> liftIO $ b `streamto` h + Nothing -> liftIO $ bracket opendest hClose (b `streamto`) + streamto b h = case mp of + Just p -> meteredWrite p h b + Nothing -> L.hPut h b + opendest = openBinaryFile dest WriteMode + +withBytes :: ContentSource -> (L.ByteString -> Annex a) -> Annex a +withBytes (ByteContent b) a = a b +withBytes (FileContent f) a = a =<< liftIO (L.readFile f) diff --git a/Remote/Helper/Ssh.hs b/Remote/Helper/Ssh.hs index 8de88953f..3addf2384 100644 --- a/Remote/Helper/Ssh.hs +++ b/Remote/Helper/Ssh.hs @@ -1,6 +1,6 @@ {- git-annex remote access with ssh and git-annex-shell - - - Copyright 2011-2013 Joey Hess <joey@kitenet.net> + - Copyright 2011-2013 Joey Hess <id@joeyh.name> - - Licensed under the GNU GPL version 3 or higher. -} @@ -8,30 +8,29 @@ module Remote.Helper.Ssh where import Common.Annex +import qualified Annex import qualified Git import qualified Git.Url import Annex.UUID import Annex.Ssh import CmdLine.GitAnnexShell.Fields (Field, fieldName) import qualified CmdLine.GitAnnexShell.Fields as Fields -import Types.GitConfig import Types.Key import Remote.Helper.Messages import Utility.Metered import Utility.Rsync import Types.Remote import Logs.Transfer +import Config {- Generates parameters to ssh to a repository's host and run a command. - Caller is responsible for doing any neccessary shellEscaping of the - passed command. -} -toRepo :: Git.Repo -> [CommandParam] -> Annex [CommandParam] -toRepo r sshcmd = do - g <- fromRepo id - let c = extractRemoteGitConfig g (Git.repoDescribe r) - let opts = map Param $ remoteAnnexSshOptions c +toRepo :: Git.Repo -> RemoteGitConfig -> [CommandParam] -> Annex [CommandParam] +toRepo r gc sshcmd = do + let opts = map Param $ remoteAnnexSshOptions gc let host = fromMaybe (error "bad ssh url") $ Git.Url.hostuser r - params <- sshCachingOptions (host, Git.Url.port r) opts + params <- sshOptions (host, Git.Url.port r) gc opts return $ params ++ Param host : sshcmd {- Generates parameters to run a git-annex-shell command on a remote @@ -40,16 +39,18 @@ git_annex_shell :: Git.Repo -> String -> [CommandParam] -> [(Field, String)] -> git_annex_shell r command params fields | not $ Git.repoIsUrl r = return $ Just (shellcmd, shellopts ++ fieldopts) | Git.repoIsSsh r = do + gc <- Annex.getRemoteGitConfig r u <- getRepoUUID r - sshparams <- toRepo r [Param $ sshcmd u ] + sshparams <- toRepo r gc [Param $ sshcmd u gc] return $ Just ("ssh", sshparams) | otherwise = return Nothing where dir = Git.repoPath r shellcmd = "git-annex-shell" shellopts = Param command : File dir : params - sshcmd u = unwords $ - shellcmd : map shellEscape (toCommand shellopts) ++ + sshcmd u gc = unwords $ + fromMaybe shellcmd (remoteAnnexShell gc) + : map shellEscape (toCommand shellopts) ++ uuidcheck u ++ map shellEscape (toCommand fieldopts) uuidcheck NoUUID = [] @@ -68,7 +69,7 @@ git_annex_shell r command params fields - a specified error value. -} onRemote :: Git.Repo - -> (FilePath -> [CommandParam] -> IO a, a) + -> (FilePath -> [CommandParam] -> IO a, Annex a) -> String -> [CommandParam] -> [(Field, String)] @@ -77,22 +78,22 @@ onRemote r (with, errorval) command params fields = do s <- git_annex_shell r command params fields case s of Just (c, ps) -> liftIO $ with c ps - Nothing -> return errorval + Nothing -> errorval {- Checks if a remote contains a key. -} -inAnnex :: Git.Repo -> Key -> Annex (Either String Bool) +inAnnex :: Git.Repo -> Key -> Annex Bool inAnnex r k = do showChecking r onRemote r (check, cantCheck r) "inannex" [Param $ key2file k] [] where - check c p = dispatch <$> safeSystem c p - dispatch ExitSuccess = Right True - dispatch (ExitFailure 1) = Right False + check c p = dispatch =<< safeSystem c p + dispatch ExitSuccess = return True + dispatch (ExitFailure 1) = return False dispatch _ = cantCheck r {- Removes a key from a remote. -} dropKey :: Git.Repo -> Key -> Annex Bool -dropKey r key = onRemote r (boolSystem, False) "dropkey" +dropKey r key = onRemote r (boolSystem, return False) "dropkey" [ Params "--quiet --force" , Param $ key2file key ] @@ -122,7 +123,7 @@ rsyncParamsRemote direct r direction key file afile = do fields -- Convert the ssh command into rsync command line. let eparam = rsyncShell (Param shellcmd:shellparams) - let o = rsyncParams r direction + o <- rsyncParams r direction return $ if direction == Download then o ++ rsyncopts eparam dummy (File file) else o ++ rsyncopts eparam (File file) dummy @@ -140,9 +141,19 @@ rsyncParamsRemote direct r direction key file afile = do dummy = Param "dummy:" -- --inplace to resume partial files -rsyncParams :: Remote -> Direction -> [CommandParam] -rsyncParams r direction = Params "--progress --inplace" : - map Param (remoteAnnexRsyncOptions gc ++ dps) +-- +-- Only use --perms when not on a crippled file system, as rsync +-- will fail trying to restore file perms onto a filesystem that does not +-- support them. +rsyncParams :: Remote -> Direction -> Annex [CommandParam] +rsyncParams r direction = do + crippled <- crippledFileSystem + return $ map Param $ catMaybes + [ Just "--progress" + , Just "--inplace" + , if crippled then Nothing else Just "--perms" + ] + ++ remoteAnnexRsyncOptions gc ++ dps where dps | direction == Download = remoteAnnexRsyncDownloadOptions gc diff --git a/Remote/Hook.hs b/Remote/Hook.hs index 3735c228c..592564772 100644 --- a/Remote/Hook.hs +++ b/Remote/Hook.hs @@ -1,15 +1,12 @@ {- A remote that provides hooks to run shell commands. - - - Copyright 2011 Joey Hess <joey@kitenet.net> + - Copyright 2011 Joey Hess <id@joeyh.name> - - Licensed under the GNU GPL version 3 or higher. -} module Remote.Hook (remote) where -import qualified Data.ByteString.Lazy as L -import qualified Data.Map as M - import Common.Annex import Types.Remote import Types.Key @@ -17,14 +14,12 @@ import Types.Creds import qualified Git import Config import Config.Cost -import Annex.Content import Annex.UUID import Remote.Helper.Special -import Remote.Helper.Encryptable -import Crypto -import Utility.Metered import Utility.Env +import qualified Data.Map as M + type Action = String type HookName = String @@ -39,30 +34,37 @@ remote = RemoteType { gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote) gen r u c gc = do cst <- remoteCost gc expensiveRemoteCost - return $ Just $ encryptableRemote c - (storeEncrypted hooktype $ getGpgEncParams (c,gc)) - (retrieveEncrypted hooktype) - Remote { - uuid = u, - cost = cst, - name = Git.repoDescribe r, - storeKey = store hooktype, - retrieveKeyFile = retrieve hooktype, - retrieveKeyFileCheap = retrieveCheap hooktype, - removeKey = remove hooktype, - hasKey = checkPresent r hooktype, - hasKeyCheap = False, - whereisKey = Nothing, - remoteFsck = Nothing, - repairRepo = Nothing, - config = c, - localpath = Nothing, - repo = r, - gitconfig = gc, - readonly = False, - availability = GloballyAvailable, - remotetype = remote - } + return $ Just $ specialRemote c + (simplyPrepare $ store hooktype) + (simplyPrepare $ retrieve hooktype) + (simplyPrepare $ remove hooktype) + (simplyPrepare $ checkKey r hooktype) + Remote + { uuid = u + , cost = cst + , name = Git.repoDescribe r + , storeKey = storeKeyDummy + , retrieveKeyFile = retreiveKeyFileDummy + , retrieveKeyFileCheap = retrieveCheap hooktype + , removeKey = removeKeyDummy + , checkPresent = checkPresentDummy + , checkPresentCheap = False + , whereisKey = Nothing + , remoteFsck = Nothing + , repairRepo = Nothing + , config = c + , localpath = Nothing + , repo = r + , gitconfig = gc + , readonly = False + , availability = GloballyAvailable + , remotetype = remote + , mkUnavailable = gen r u c $ + gc { remoteAnnexHookType = Just "!dne!" } + , getInfo = return [("hooktype", hooktype)] + , claimUrl = Nothing + , checkUrl = Nothing + } where hooktype = fromMaybe (error "missing hooktype") $ remoteAnnexHookType gc @@ -71,7 +73,7 @@ hookSetup mu _ c = do u <- maybe (liftIO genUUID) return mu let hooktype = fromMaybe (error "Specify hooktype=") $ M.lookup "hooktype" c - c' <- encryptionSetup c + (c', _encsetup) <- encryptionSetup c gitConfigSpecialRemote u c' "hooktype" hooktype return (c', u) @@ -79,16 +81,16 @@ hookEnv :: Action -> Key -> Maybe FilePath -> IO (Maybe [(String, String)]) hookEnv action k f = Just <$> mergeenv (fileenv f ++ keyenv) where mergeenv l = addEntries l <$> getEnvironment - env s v = ("ANNEX_" ++ s, v) + envvar s v = ("ANNEX_" ++ s, v) keyenv = catMaybes - [ Just $ env "KEY" (key2file k) - , Just $ env "ACTION" action - , env "HASH_1" <$> headMaybe hashbits - , env "HASH_2" <$> headMaybe (drop 1 hashbits) + [ Just $ envvar "KEY" (key2file k) + , Just $ envvar "ACTION" action + , envvar "HASH_1" <$> headMaybe hashbits + , envvar "HASH_2" <$> headMaybe (drop 1 hashbits) ] fileenv Nothing = [] - fileenv (Just file) = [env "FILE" file] - hashbits = map takeDirectory $ splitPath $ hashDirMixed k + fileenv (Just file) = [envvar "FILE" file] + hashbits = map takeDirectory $ splitPath $ hashDirMixed def k lookupHook :: HookName -> Action -> Annex (Maybe String) lookupHook hookname action = do @@ -118,42 +120,30 @@ runHook hook action k f a = maybe (return False) run =<< lookupHook hook action return False ) -store :: HookName -> Key -> AssociatedFile -> MeterUpdate -> Annex Bool -store h k _f _p = sendAnnex k (void $ remove h k) $ \src -> +store :: HookName -> Storer +store h = fileStorer $ \k src _p -> runHook h "store" k (Just src) $ return True -storeEncrypted :: HookName -> [CommandParam] -> (Cipher, Key) -> Key -> MeterUpdate -> Annex Bool -storeEncrypted h gpgOpts (cipher, enck) k _p = withTmp enck $ \tmp -> - sendAnnex k (void $ remove h enck) $ \src -> do - liftIO $ encrypt gpgOpts cipher (feedFile src) $ - readBytes $ L.writeFile tmp - runHook h "store" enck (Just tmp) $ return True - -retrieve :: HookName -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex Bool -retrieve h k _f d _p = runHook h "retrieve" k (Just d) $ return True +retrieve :: HookName -> Retriever +retrieve h = fileRetriever $ \d k _p -> + unlessM (runHook h "retrieve" k (Just d) $ return True) $ + error "failed to retrieve content" retrieveCheap :: HookName -> Key -> FilePath -> Annex Bool retrieveCheap _ _ _ = return False -retrieveEncrypted :: HookName -> (Cipher, Key) -> Key -> FilePath -> MeterUpdate -> Annex Bool -retrieveEncrypted h (cipher, enck) _ f _p = withTmp enck $ \tmp -> - runHook h "retrieve" enck (Just tmp) $ liftIO $ catchBoolIO $ do - decrypt cipher (feedFile tmp) $ - readBytes $ L.writeFile f - return True - -remove :: HookName -> Key -> Annex Bool +remove :: HookName -> Remover remove h k = runHook h "remove" k Nothing $ return True -checkPresent :: Git.Repo -> HookName -> Key -> Annex (Either String Bool) -checkPresent r h k = do +checkKey :: Git.Repo -> HookName -> CheckPresent +checkKey r h k = do showAction $ "checking " ++ Git.repoDescribe r v <- lookupHook h action - liftIO $ catchMsgIO $ check v + liftIO $ check v where - action = "checkpresent" + action = "checkpresent" findkey s = key2file k `elem` lines s check Nothing = error $ action ++ " hook misconfigured" check (Just hook) = do - env <- hookEnv action k Nothing - findkey <$> readProcessEnv "sh" ["-c", hook] env + environ <- hookEnv action k Nothing + findkey <$> readProcessEnv "sh" ["-c", hook] environ diff --git a/Remote/List.hs b/Remote/List.hs index e3afc939c..49b0a35f2 100644 --- a/Remote/List.hs +++ b/Remote/List.hs @@ -2,7 +2,7 @@ {- git-annex remote list - - - Copyright 2011,2012 Joey Hess <joey@kitenet.net> + - Copyright 2011,2012 Joey Hess <id@joeyh.name> - - Licensed under the GNU GPL version 3 or higher. -} @@ -15,7 +15,6 @@ import Common.Annex import qualified Annex import Logs.Remote import Types.Remote -import Types.GitConfig import Annex.UUID import Remote.Helper.Hooks import Remote.Helper.ReadOnly @@ -31,6 +30,7 @@ import qualified Remote.Bup import qualified Remote.Directory import qualified Remote.Rsync import qualified Remote.Web +import qualified Remote.BitTorrent #ifdef WITH_WEBDAV import qualified Remote.WebDAV #endif @@ -38,6 +38,7 @@ import qualified Remote.WebDAV import qualified Remote.Tahoe #endif import qualified Remote.Glacier +import qualified Remote.Ddar import qualified Remote.Hook import qualified Remote.External @@ -52,6 +53,7 @@ remoteTypes = , Remote.Directory.remote , Remote.Rsync.remote , Remote.Web.remote + , Remote.BitTorrent.remote #ifdef WITH_WEBDAV , Remote.WebDAV.remote #endif @@ -59,6 +61,7 @@ remoteTypes = , Remote.Tahoe.remote #endif , Remote.Glacier.remote + , Remote.Ddar.remote , Remote.Hook.remote , Remote.External.remote ] @@ -92,8 +95,7 @@ remoteListRefresh = do remoteGen :: M.Map UUID RemoteConfig -> RemoteType -> Git.Repo -> Annex (Maybe Remote) remoteGen m t r = do u <- getRepoUUID r - g <- fromRepo id - let gc = extractRemoteGitConfig g (Git.repoDescribe r) + gc <- Annex.getRemoteGitConfig r let c = fromMaybe M.empty $ M.lookup u m mrmt <- generate t r u c gc return $ adjustReadOnly . addHooks <$> mrmt diff --git a/Remote/Rsync.hs b/Remote/Rsync.hs index 7d051d6cd..f39081299 100644 --- a/Remote/Rsync.hs +++ b/Remote/Rsync.hs @@ -1,6 +1,6 @@ {- A remote that is only accessible by rsync. - - - Copyright 2011 Joey Hess <joey@kitenet.net> + - Copyright 2011 Joey Hess <id@joeyh.name> - - Licensed under the GNU GPL version 3 or higher. -} @@ -9,10 +9,10 @@ module Remote.Rsync ( remote, - storeEncrypted, - retrieveEncrypted, + store, + retrieve, remove, - checkPresent, + checkKey, withRsyncScratchDir, genRsyncOpts, RsyncOpts @@ -27,7 +27,6 @@ import Annex.Content import Annex.UUID import Annex.Ssh import Remote.Helper.Special -import Remote.Helper.Encryptable import Remote.Rsync.RsyncUrl import Crypto import Utility.Rsync @@ -37,8 +36,9 @@ import Utility.PID import Annex.Perms import Logs.Transfer import Types.Creds +import Types.Key (isChunkKey) +import Annex.DirHashes -import qualified Data.ByteString.Lazy as L import qualified Data.Map as M remote :: RemoteType @@ -56,19 +56,21 @@ gen r u c gc = do fromMaybe (error "missing rsyncurl") $ remoteAnnexRsyncUrl gc let o = genRsyncOpts c gc transport url let islocal = rsyncUrlIsPath $ rsyncUrl o - return $ Just $ encryptableRemote c - (storeEncrypted o $ getGpgEncParams (c,gc)) - (retrieveEncrypted o) + return $ Just $ specialRemote' specialcfg c + (simplyPrepare $ fileStorer $ store o) + (simplyPrepare $ fileRetriever $ retrieve o) + (simplyPrepare $ remove o) + (simplyPrepare $ checkKey r o) Remote { uuid = u , cost = cst , name = Git.repoDescribe r - , storeKey = store o - , retrieveKeyFile = retrieve o + , storeKey = storeKeyDummy + , retrieveKeyFile = retreiveKeyFileDummy , retrieveKeyFileCheap = retrieveCheap o - , removeKey = remove o - , hasKey = checkPresent r o - , hasKeyCheap = False + , removeKey = removeKeyDummy + , checkPresent = checkPresentDummy + , checkPresentCheap = False , whereisKey = Nothing , remoteFsck = Nothing , repairRepo = Nothing @@ -81,12 +83,20 @@ gen r u c gc = do , readonly = False , availability = if islocal then LocallyAvailable else GloballyAvailable , remotetype = remote + , mkUnavailable = return Nothing + , getInfo = return [("url", url)] + , claimUrl = Nothing + , checkUrl = Nothing } + where + specialcfg = (specialRemoteCfg c) + -- Rsync displays its own progress. + { displayProgress = False } genRsyncOpts :: RemoteConfig -> RemoteGitConfig -> [CommandParam] -> RsyncUrl -> RsyncOpts genRsyncOpts c gc transport url = RsyncOpts { rsyncUrl = url - , rsyncOptions = opts [] + , rsyncOptions = transport ++ opts [] , rsyncUploadOptions = transport ++ opts (remoteAnnexRsyncUploadOptions gc) , rsyncDownloadOptions = transport ++ opts (remoteAnnexRsyncDownloadOptions gc) , rsyncShellEscape = M.lookup "shellescape" c /= Just "no" @@ -111,8 +121,8 @@ rsyncTransport gc url let (port, sshopts') = sshReadPort sshopts userhost = takeWhile (/=':') url -- Connection caching - (Param "ssh":) <$> sshCachingOptions - (userhost, port) + (Param "ssh":) <$> sshOptions + (userhost, port) gc (map Param $ loginopt ++ sshopts') "rsh":rshopts -> return $ map Param $ "rsh" : loginopt ++ rshopts @@ -132,40 +142,58 @@ rsyncSetup mu _ c = do -- verify configuration is sane let url = fromMaybe (error "Specify rsyncurl=") $ M.lookup "rsyncurl" c - c' <- encryptionSetup c + (c', _encsetup) <- encryptionSetup c -- The rsyncurl is stored in git config, not only in this remote's -- persistant state, so it can vary between hosts. gitConfigSpecialRemote u c' "rsyncurl" url return (c', u) -store :: RsyncOpts -> Key -> AssociatedFile -> MeterUpdate -> Annex Bool -store o k _f p = sendAnnex k (void $ remove o k) $ rsyncSend o p k False - -storeEncrypted :: RsyncOpts -> [CommandParam] -> (Cipher, Key) -> Key -> MeterUpdate -> Annex Bool -storeEncrypted o gpgOpts (cipher, enck) k p = withTmp enck $ \tmp -> - sendAnnex k (void $ remove o enck) $ \src -> do - liftIO $ encrypt gpgOpts cipher (feedFile src) $ - readBytes $ L.writeFile tmp - rsyncSend o p enck True tmp +{- To send a single key is slightly tricky; need to build up a temporary + - directory structure to pass to rsync so it can create the hash + - directories. + - + - This would not be necessary if the hash directory structure used locally + - was always the same as that used on the rsync remote. So if that's ever + - unified, this gets nicer. + - (When we have the right hash directory structure, we can just + - pass --include=X --include=X/Y --include=X/Y/file --exclude=*) + -} +store :: RsyncOpts -> Key -> FilePath -> MeterUpdate -> Annex Bool +store o k src meterupdate = withRsyncScratchDir $ \tmp -> do + let dest = tmp </> Prelude.head (keyPaths k) + liftIO $ createDirectoryIfMissing True $ parentDir dest + ok <- liftIO $ if canrename + then do + rename src dest + return True + else createLinkOrCopy src dest + ps <- sendParams + if ok + then showResumable $ rsyncRemote Upload o (Just meterupdate) $ ps ++ + [ Param "--recursive" + , partialParams + -- tmp/ to send contents of tmp dir + , File $ addTrailingPathSeparator tmp + , Param $ rsyncUrl o + ] + else return False + where + {- If the key being sent is encrypted or chunked, the file + - containing its content is a temp file, and so can be + - renamed into place. Otherwise, the file is the annexed + - object file, and has to be copied or hard linked into place. -} + canrename = isEncKey k || isChunkKey k -retrieve :: RsyncOpts -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex Bool -retrieve o k _ f p = rsyncRetrieve o k f (Just p) +retrieve :: RsyncOpts -> FilePath -> Key -> MeterUpdate -> Annex () +retrieve o f k p = + unlessM (rsyncRetrieve o k f (Just p)) $ + error "rsync failed" retrieveCheap :: RsyncOpts -> Key -> FilePath -> Annex Bool retrieveCheap o k f = ifM (preseedTmp k f) ( rsyncRetrieve o k f Nothing , return False ) -retrieveEncrypted :: RsyncOpts -> (Cipher, Key) -> Key -> FilePath -> MeterUpdate -> Annex Bool -retrieveEncrypted o (cipher, enck) _ f p = withTmp enck $ \tmp -> - ifM (rsyncRetrieve o enck tmp (Just p)) - ( liftIO $ catchBoolIO $ do - decrypt cipher (feedFile tmp) $ - readBytes $ L.writeFile f - return True - , return False - ) - -remove :: RsyncOpts -> Key -> Annex Bool +remove :: RsyncOpts -> Remover remove o k = do ps <- sendParams withRsyncScratchDir $ \tmp -> liftIO $ do @@ -185,22 +213,20 @@ remove o k = do - content could be. Note that the parent directories have - to also be explicitly included, due to how rsync - traverses directories. -} - includes = concatMap use annexHashes - use h = let dir = h k in + includes = concatMap use dirHashes + use h = let dir = h def k in [ parentDir dir , dir -- match content directory and anything in it , dir </> keyFile k </> "***" ] -checkPresent :: Git.Repo -> RsyncOpts -> Key -> Annex (Either String Bool) -checkPresent r o k = do +checkKey :: Git.Repo -> RsyncOpts -> CheckPresent +checkKey r o k = do showAction $ "checking " ++ Git.repoDescribe r -- note: Does not currently differentiate between rsync failing -- to connect, and the file not being present. - Right <$> check - where - check = untilTrue (rsyncUrls o k) $ \u -> + untilTrue (rsyncUrls o k) $ \u -> liftIO $ catchBoolIO $ do withQuietOutput createProcessSuccess $ proc "rsync" $ toCommand $ @@ -238,8 +264,8 @@ withRsyncScratchDir a = do removeDirectoryRecursive d rsyncRetrieve :: RsyncOpts -> Key -> FilePath -> Maybe MeterUpdate -> Annex Bool -rsyncRetrieve o k dest callback = - showResumable $ untilTrue (rsyncUrls o k) $ \u -> rsyncRemote Download o callback +rsyncRetrieve o k dest meterupdate = + showResumable $ untilTrue (rsyncUrls o k) $ \u -> rsyncRemote Download o meterupdate -- use inplace when retrieving to support resuming [ Param "--inplace" , Param u @@ -263,33 +289,3 @@ rsyncRemote direction o callback params = do opts | direction == Download = rsyncDownloadOptions o | otherwise = rsyncUploadOptions o - -{- To send a single key is slightly tricky; need to build up a temporary - - directory structure to pass to rsync so it can create the hash - - directories. - - - - This would not be necessary if the hash directory structure used locally - - was always the same as that used on the rsync remote. So if that's ever - - unified, this gets nicer. - - (When we have the right hash directory structure, we can just - - pass --include=X --include=X/Y --include=X/Y/file --exclude=*) - -} -rsyncSend :: RsyncOpts -> MeterUpdate -> Key -> Bool -> FilePath -> Annex Bool -rsyncSend o callback k canrename src = withRsyncScratchDir $ \tmp -> do - let dest = tmp </> Prelude.head (keyPaths k) - liftIO $ createDirectoryIfMissing True $ parentDir dest - ok <- liftIO $ if canrename - then do - rename src dest - return True - else createLinkOrCopy src dest - ps <- sendParams - if ok - then showResumable $ rsyncRemote Upload o (Just callback) $ ps ++ - [ Param "--recursive" - , partialParams - -- tmp/ to send contents of tmp dir - , File $ addTrailingPathSeparator tmp - , Param $ rsyncUrl o - ] - else return False diff --git a/Remote/Rsync/RsyncUrl.hs b/Remote/Rsync/RsyncUrl.hs index 61bbe2f3f..a7a2ac8f6 100644 --- a/Remote/Rsync/RsyncUrl.hs +++ b/Remote/Rsync/RsyncUrl.hs @@ -1,6 +1,6 @@ {- Rsync urls. - - - Copyright 2014 Joey Hess <joey@kitenet.net> + - Copyright 2014 Joey Hess <id@joeyh.name> - - Licensed under the GNU GPL version 3 or higher. -} @@ -14,10 +14,12 @@ import Locations import Utility.Rsync import Utility.SafeCommand +import Data.Default import System.FilePath.Posix #ifdef mingw32_HOST_OS import Data.String.Utils #endif +import Annex.DirHashes type RsyncUrl = String @@ -35,12 +37,12 @@ rsyncEscape o u | otherwise = u rsyncUrls :: RsyncOpts -> Key -> [RsyncUrl] -rsyncUrls o k = map use annexHashes +rsyncUrls o k = map use dirHashes where use h = rsyncUrl o </> hash h </> rsyncEscape o (f </> f) f = keyFile k #ifndef mingw32_HOST_OS - hash h = h k + hash h = h def k #else - hash h = replace "\\" "/" (h k) + hash h = replace "\\" "/" (h def k) #endif diff --git a/Remote/S3.hs b/Remote/S3.hs index c1a99abcd..b0c1de114 100644 --- a/Remote/S3.hs +++ b/Remote/S3.hs @@ -1,21 +1,33 @@ {- S3 remotes - - - Copyright 2011-2013 Joey Hess <joey@kitenet.net> + - Copyright 2011-2014 Joey Hess <id@joeyh.name> - - Licensed under the GNU GPL version 3 or higher. -} -module Remote.S3 (remote, iaHost, isIA, isIAHost, iaItemUrl) where +{-# LANGUAGE TypeFamilies #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE CPP #-} -import Network.AWS.AWSConnection -import Network.AWS.S3Object hiding (getStorageClass) -import Network.AWS.S3Bucket hiding (size) -import Network.AWS.AWSResult +module Remote.S3 (remote, iaHost, configIA, iaItemUrl) where + +import qualified Aws as AWS +import qualified Aws.Core as AWS +import qualified Aws.S3 as S3 import qualified Data.Text as T -import qualified Data.ByteString.Lazy.Char8 as L +import qualified Data.Text.Encoding as T +import qualified Data.ByteString.Lazy as L +import qualified Data.ByteString as S import qualified Data.Map as M import Data.Char import Network.Socket (HostName) +import Network.HTTP.Conduit (Manager, newManager, closeManager) +import Network.HTTP.Client (defaultManagerSettings, managerResponseTimeout, responseStatus, responseBody, RequestBody(..)) +import Network.HTTP.Types +import Control.Monad.Trans.Resource +import Control.Monad.Catch +import Data.Conduit +import Data.IORef import Common.Annex import Types.Remote @@ -24,16 +36,15 @@ import qualified Git import Config import Config.Cost import Remote.Helper.Special -import Remote.Helper.Encryptable +import Remote.Helper.Http import qualified Remote.Helper.AWS as AWS -import Crypto import Creds -import Utility.Metered -import Annex.Content import Annex.UUID import Logs.Web +import Utility.Metered +import Utility.DataUnits -type Bucket = String +type BucketName = String remote :: RemoteType remote = RemoteType { @@ -44,50 +55,64 @@ remote = RemoteType { } gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote) -gen r u c gc = new <$> remoteCost gc expensiveRemoteCost +gen r u c gc = do + cst <- remoteCost gc expensiveRemoteCost + info <- extractS3Info c + return $ new cst info where - new cst = Just $ encryptableRemote c - (storeEncrypted this) - (retrieveEncrypted this) + new cst info = Just $ specialRemote c + (prepareS3 this info $ store this) + (prepareS3 this info retrieve) + (prepareS3 this info remove) + (prepareS3 this info $ checkKey this) this where - this = Remote { - uuid = u, - cost = cst, - name = Git.repoDescribe r, - storeKey = store this, - retrieveKeyFile = retrieve this, - retrieveKeyFileCheap = retrieveCheap this, - removeKey = remove this c, - hasKey = checkPresent this, - hasKeyCheap = False, - whereisKey = Nothing, - remoteFsck = Nothing, - repairRepo = Nothing, - config = c, - repo = r, - gitconfig = gc, - localpath = Nothing, - readonly = False, - availability = GloballyAvailable, - remotetype = remote - } + this = Remote + { uuid = u + , cost = cst + , name = Git.repoDescribe r + , storeKey = storeKeyDummy + , retrieveKeyFile = retreiveKeyFileDummy + , retrieveKeyFileCheap = retrieveCheap + , removeKey = removeKeyDummy + , checkPresent = checkPresentDummy + , checkPresentCheap = False + , whereisKey = Nothing + , remoteFsck = Nothing + , repairRepo = Nothing + , config = c + , repo = r + , gitconfig = gc + , localpath = Nothing + , readonly = False + , availability = GloballyAvailable + , remotetype = remote + , mkUnavailable = gen r u (M.insert "host" "!dne!" c) gc + , getInfo = includeCredsInfo c (AWS.creds u) $ catMaybes + [ Just ("bucket", fromMaybe "unknown" (getBucketName c)) + , if configIA c + then Just ("internet archive item", iaItemUrl $ fromMaybe "unknown" $ getBucketName c) + else Nothing + , Just ("partsize", maybe "unlimited" (roughSize storageUnits False) (getPartSize c)) + ] + , claimUrl = Nothing + , checkUrl = Nothing + } s3Setup :: Maybe UUID -> Maybe CredPair -> RemoteConfig -> Annex (RemoteConfig, UUID) s3Setup mu mcreds c = do u <- maybe (liftIO genUUID) return mu - c' <- setRemoteCredPair c (AWS.creds u) mcreds - s3Setup' u c' -s3Setup' :: UUID -> RemoteConfig -> Annex (RemoteConfig, UUID) -s3Setup' u c = if isIA c then archiveorg else defaulthost + s3Setup' u mcreds c +s3Setup' :: UUID -> Maybe CredPair -> RemoteConfig -> Annex (RemoteConfig, UUID) +s3Setup' u mcreds c = if configIA c then archiveorg else defaulthost where remotename = fromJust (M.lookup "name" c) defbucket = remotename ++ "-" ++ fromUUID u defaults = M.fromList [ ("datacenter", T.unpack $ AWS.defaultRegion AWS.S3) , ("storageclass", "STANDARD") - , ("host", defaultAmazonS3Host) - , ("port", show defaultAmazonS3Port) + , ("host", AWS.s3DefaultHost) + , ("port", "80") , ("bucket", defbucket) ] @@ -96,250 +121,400 @@ s3Setup' u c = if isIA c then archiveorg else defaulthost return (fullconfig, u) defaulthost = do - c' <- encryptionSetup c - let fullconfig = c' `M.union` defaults + (c', encsetup) <- encryptionSetup c + c'' <- setRemoteCredPair encsetup c' (AWS.creds u) mcreds + let fullconfig = c'' `M.union` defaults genBucket fullconfig u use fullconfig archiveorg = do showNote "Internet Archive mode" + c' <- setRemoteCredPair noEncryptionUsed c (AWS.creds u) mcreds -- Ensure user enters a valid bucket name, since -- this determines the name of the archive.org item. - let bucket = replace " " "-" $ map toLower $ + let validbucket = replace " " "-" $ map toLower $ fromMaybe (error "specify bucket=") $ - getBucket c + getBucketName c' let archiveconfig = - -- hS3 does not pass through x-archive-* headers + -- IA acdepts x-amz-* as an alias for x-archive-* M.mapKeys (replace "x-archive-" "x-amz-") $ -- encryption does not make sense here M.insert "encryption" "none" $ - M.insert "bucket" bucket $ - M.union c $ + M.insert "bucket" validbucket $ + M.union c' $ -- special constraints on key names - M.insert "mungekeys" "ia" $ - -- bucket created only when files are uploaded - M.insert "x-amz-auto-make-bucket" "1" defaults - writeUUIDFile archiveconfig u + M.insert "mungekeys" "ia" defaults + info <- extractS3Info archiveconfig + withS3Handle archiveconfig u info $ + writeUUIDFile archiveconfig u use archiveconfig -store :: Remote -> Key -> AssociatedFile -> MeterUpdate -> Annex Bool -store r k _f p = s3Action r False $ \(conn, bucket) -> - sendAnnex k (void $ remove' r k) $ \src -> do - ok <- s3Bool =<< storeHelper (conn, bucket) r k p src - - -- Store public URL to item in Internet Archive. - when (ok && isIA (config r)) $ - setUrlPresent k (iaKeyUrl r k) - - return ok - -storeEncrypted :: Remote -> (Cipher, Key) -> Key -> MeterUpdate -> Annex Bool -storeEncrypted r (cipher, enck) k p = s3Action r False $ \(conn, bucket) -> - -- To get file size of the encrypted content, have to use a temp file. - -- (An alternative would be chunking to to a constant size.) - withTmp enck $ \tmp -> sendAnnex k (void $ remove' r enck) $ \src -> do - liftIO $ encrypt (getGpgEncParams r) cipher (feedFile src) $ - readBytes $ L.writeFile tmp - s3Bool =<< storeHelper (conn, bucket) r enck p tmp - -storeHelper :: (AWSConnection, Bucket) -> Remote -> Key -> MeterUpdate -> FilePath -> Annex (AWSResult ()) -storeHelper (conn, bucket) r k p file = do - size <- maybe getsize (return . fromIntegral) $ keySize k - meteredBytes (Just p) size $ \meterupdate -> - liftIO $ withMeteredFile file meterupdate $ \content -> do - -- size is provided to S3 so the whole content - -- does not need to be buffered to calculate it - let object = S3Object - bucket (bucketFile r k) "" - (("Content-Length", show size) : getXheaders (config r)) - content - sendObject conn $ - setStorageClass (getStorageClass $ config r) object +-- Sets up a http connection manager for S3 encdpoint, which allows +-- http connections to be reused across calls to the helper. +prepareS3 :: Remote -> S3Info -> (S3Handle -> helper) -> Preparer helper +prepareS3 r info = resourcePrepare $ const $ + withS3Handle (config r) (uuid r) info + +store :: Remote -> S3Handle -> Storer +store r h = fileStorer $ \k f p -> do + case partSize (hinfo h) of + Just partsz | partsz > 0 -> do + fsz <- liftIO $ getFileSize f + if fsz > partsz + then multipartupload fsz partsz k f p + else singlepartupload k f p + _ -> singlepartupload k f p + -- Store public URL to item in Internet Archive. + when (isIA (hinfo h) && not (isChunkKey k)) $ + setUrlPresent webUUID k (iaKeyUrl r k) + return True where - getsize = liftIO $ fromIntegral . fileSize <$> getFileStatus file - -retrieve :: Remote -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex Bool -retrieve r k _f d p = s3Action r False $ \(conn, bucket) -> - metered (Just p) k $ \meterupdate -> do - res <- liftIO $ getObject conn $ bucketKey r bucket k - case res of - Right o -> do - liftIO $ meteredWriteFile meterupdate d $ - obj_data o - return True - Left e -> s3Warning e - -retrieveCheap :: Remote -> Key -> FilePath -> Annex Bool -retrieveCheap _ _ _ = return False - -retrieveEncrypted :: Remote -> (Cipher, Key) -> Key -> FilePath -> MeterUpdate -> Annex Bool -retrieveEncrypted r (cipher, enck) k d p = s3Action r False $ \(conn, bucket) -> - metered (Just p) k $ \meterupdate -> do - res <- liftIO $ getObject conn $ bucketKey r bucket enck - case res of - Right o -> liftIO $ decrypt cipher (\h -> meteredWrite meterupdate h $ obj_data o) $ - readBytes $ \content -> do - L.writeFile d content - return True - Left e -> s3Warning e + singlepartupload k f p = do + rbody <- liftIO $ httpBodyStorer f p + void $ sendS3Handle h $ putObject h (bucketObject (hinfo h) k) rbody + multipartupload fsz partsz k f p = do +#if MIN_VERSION_aws(0,10,6) + let info = hinfo h + let object = bucketObject info k + + let startreq = (S3.postInitiateMultipartUpload (bucket info) object) + { S3.imuStorageClass = Just (storageClass info) + , S3.imuMetadata = metaHeaders info + , S3.imuAutoMakeBucket = isIA info + , S3.imuExpires = Nothing -- TODO set some reasonable expiry + } + uploadid <- S3.imurUploadId <$> sendS3Handle h startreq + + -- The actual part size will be a even multiple of the + -- 32k chunk size that hGetUntilMetered uses. + let partsz' = (partsz `div` toInteger defaultChunkSize) * toInteger defaultChunkSize + + -- Send parts of the file, taking care to stream each part + -- w/o buffering in memory, since the parts can be large. + etags <- bracketIO (openBinaryFile f ReadMode) hClose $ \fh -> do + let sendparts meter etags partnum = do + pos <- liftIO $ hTell fh + if pos >= fsz + then return (reverse etags) + else do + -- Calculate size of part that will + -- be read. + let sz = if fsz - pos < partsz' + then fsz - pos + else partsz' + let p' = offsetMeterUpdate p (toBytesProcessed pos) + let numchunks = ceiling (fromIntegral sz / fromIntegral defaultChunkSize :: Double) + let popper = handlePopper numchunks defaultChunkSize p' fh + let req = S3.uploadPart (bucket info) object partnum uploadid $ + RequestBodyStream (fromIntegral sz) popper + S3.UploadPartResponse _ etag <- sendS3Handle h req + sendparts (offsetMeterUpdate meter (toBytesProcessed sz)) (etag:etags) (partnum + 1) + sendparts p [] 1 + + void $ sendS3Handle h $ S3.postCompleteMultipartUpload + (bucket info) object uploadid (zip [1..] etags) +#else + warning $ "Cannot do multipart upload (partsize " ++ show partsz ++ ") of large file (" ++ show fsz ++ "); built with too old a version of the aws library." + singlepartupload k f p +#endif + +{- Implemented as a fileRetriever, that uses conduit to stream the chunks + - out to the file. Would be better to implement a byteRetriever, but + - that is difficult. -} +retrieve :: S3Handle -> Retriever +retrieve h = fileRetriever $ \f k p -> liftIO $ runResourceT $ do + (fr, fh) <- allocate (openFile f WriteMode) hClose + let req = S3.getObject (bucket info) (bucketObject info k) + S3.GetObjectResponse { S3.gorResponse = rsp } <- sendS3Handle' h req + responseBody rsp $$+- sinkprogressfile fh p zeroBytesProcessed + release fr + where + info = hinfo h + sinkprogressfile fh meterupdate sofar = do + mbs <- await + case mbs of + Nothing -> return () + Just bs -> do + let sofar' = addBytesProcessed sofar (S.length bs) + liftIO $ do + void $ meterupdate sofar' + S.hPut fh bs + sinkprogressfile fh meterupdate sofar' + +retrieveCheap :: Key -> FilePath -> Annex Bool +retrieveCheap _ _ = return False {- Internet Archive doesn't easily allow removing content. - While it may remove the file, there are generally other files - derived from it that it does not remove. -} -remove :: Remote -> RemoteConfig -> Key -> Annex Bool -remove r c k - | isIA c = do +remove :: S3Handle -> Remover +remove h k + | isIA info = do warning "Cannot remove content from the Internet Archive" return False - | otherwise = remove' r k - -remove' :: Remote -> Key -> Annex Bool -remove' r k = s3Action r False $ \(conn, bucket) -> - s3Bool =<< liftIO (deleteObject conn $ bucketKey r bucket k) - -checkPresent :: Remote -> Key -> Annex (Either String Bool) -checkPresent r k = s3Action r noconn $ \(conn, bucket) -> do - showAction $ "checking " ++ name r - res <- liftIO $ getObjectInfo conn $ bucketKey r bucket k - case res of - Right _ -> return $ Right True - Left (AWSError _ _) -> return $ Right False - Left e -> return $ Left (s3Error e) + | otherwise = do + res <- tryNonAsync $ sendS3Handle h $ + S3.DeleteObject (bucketObject info k) (bucket info) + return $ either (const False) (const True) res where - noconn = Left $ error "S3 not configured" - -s3Warning :: ReqError -> Annex Bool -s3Warning e = do - warning $ prettyReqError e - return False - -s3Error :: ReqError -> a -s3Error e = error $ prettyReqError e - -s3Bool :: AWSResult () -> Annex Bool -s3Bool (Right _) = return True -s3Bool (Left e) = s3Warning e - -s3Action :: Remote -> a -> ((AWSConnection, Bucket) -> Annex a) -> Annex a -s3Action r noconn action = do - let bucket = M.lookup "bucket" $ config r - conn <- s3Connection (config r) (uuid r) - case (bucket, conn) of - (Just b, Just c) -> action (c, b) - _ -> return noconn - -bucketFile :: Remote -> Key -> FilePath -bucketFile r = munge . key2file - where - munge s = case M.lookup "mungekeys" c of - Just "ia" -> iaMunge $ filePrefix c ++ s - _ -> filePrefix c ++ s - c = config r - -filePrefix :: RemoteConfig -> String -filePrefix = M.findWithDefault "" "fileprefix" - -bucketKey :: Remote -> Bucket -> Key -> S3Object -bucketKey r bucket k = S3Object bucket (bucketFile r k) "" [] L.empty + info = hinfo h -{- Internet Archive limits filenames to a subset of ascii, - - with no whitespace. Other characters are xml entity - - encoded. -} -iaMunge :: String -> String -iaMunge = (>>= munge) +checkKey :: Remote -> S3Handle -> CheckPresent +checkKey r h k = do + showAction $ "checking " ++ name r +#if MIN_VERSION_aws(0,10,0) + rsp <- go + return (isJust $ S3.horMetadata rsp) +#else + catchMissingException $ do + void go + return True +#endif where - munge c - | isAsciiUpper c || isAsciiLower c || isNumber c = [c] - | c `elem` "_-.\"" = [c] - | isSpace c = [] - | otherwise = "&" ++ show (ord c) ++ ";" - + go = sendS3Handle h $ + S3.headObject (bucket (hinfo h)) (bucketObject (hinfo h) k) + +#if ! MIN_VERSION_aws(0,10,0) + {- Catch exception headObject returns when an object is not present + - in the bucket, and returns False. All other exceptions indicate a + - check error and are let through. -} + catchMissingException :: Annex Bool -> Annex Bool + catchMissingException a = catchJust missing a (const $ return False) + where + missing :: AWS.HeaderException -> Maybe () + missing e + | AWS.headerErrorMessage e == "ETag missing" = Just () + | otherwise = Nothing +#endif + +{- Generate the bucket if it does not already exist, including creating the + - UUID file within the bucket. + - + - Some ACLs can allow read/write to buckets, but not querying them, + - so first check if the UUID file already exists and we can skip doing + - anything. + -} genBucket :: RemoteConfig -> UUID -> Annex () genBucket c u = do - conn <- s3ConnectionRequired c u showAction "checking bucket" - loc <- liftIO $ getBucketLocation conn bucket - case loc of - Right _ -> writeUUIDFile c u - Left err@(NetworkError _) -> s3Error err - Left (AWSError _ _) -> do - showAction $ "creating bucket in " ++ datacenter - res <- liftIO $ createBucketIn conn bucket datacenter - case res of - Right _ -> writeUUIDFile c u - Left err -> s3Error err + info <- extractS3Info c + withS3Handle c u info $ \h -> + go h =<< checkUUIDFile c u h where - bucket = fromJust $ getBucket c + go _ (Right True) = noop + go h _ = do + v <- tryNonAsync $ sendS3Handle h (S3.getBucket $ bucket $ hinfo h) + case v of + Right _ -> noop + Left _ -> do + showAction $ "creating bucket in " ++ datacenter + void $ sendS3Handle h $ + S3.PutBucket (bucket $ hinfo h) Nothing $ + mkLocationConstraint $ + T.pack datacenter + writeUUIDFile c u h + datacenter = fromJust $ M.lookup "datacenter" c {- Writes the UUID to an annex-uuid file within the bucket. - - If the file already exists in the bucket, it must match. - - - Note that IA items do not get created by createBucketIn. - - Rather, they are created the first time a file is stored in them. - - So this also takes care of that. + - Note that IA buckets can only created by having a file + - stored in them. So this also takes care of that. -} -writeUUIDFile :: RemoteConfig -> UUID -> Annex () -writeUUIDFile c u = do - conn <- s3ConnectionRequired c u - go conn =<< liftIO (tryNonAsync $ getObject conn $ mkobject L.empty) +writeUUIDFile :: RemoteConfig -> UUID -> S3Handle -> Annex () +writeUUIDFile c u h = do + v <- checkUUIDFile c u h + case v of + Right True -> noop + _ -> void $ sendS3Handle h mkobject where - go _conn (Right (Right o)) = unless (obj_data o == uuidb) $ - error $ "This bucket is already in use by a different S3 special remote, with UUID: " ++ L.unpack (obj_data o) - go conn _ = do - let object = setStorageClass (getStorageClass c) (mkobject uuidb) - either s3Error return =<< liftIO (sendObject conn object) - - file = filePrefix c ++ "annex-uuid" - uuidb = L.pack $ fromUUID u - bucket = fromJust $ getBucket c + file = T.pack $ uuidFile c + uuidb = L.fromChunks [T.encodeUtf8 $ T.pack $ fromUUID u] - mkobject = S3Object bucket file "" (getXheaders c) + mkobject = putObject h file (RequestBodyLBS uuidb) -s3ConnectionRequired :: RemoteConfig -> UUID -> Annex AWSConnection -s3ConnectionRequired c u = - maybe (error "Cannot connect to S3") return =<< s3Connection c u - -s3Connection :: RemoteConfig -> UUID -> Annex (Maybe AWSConnection) -s3Connection c u = go =<< getRemoteCredPairFor "S3" c (AWS.creds u) +{- Checks if the UUID file exists in the bucket + - and has the specified UUID already. -} +checkUUIDFile :: RemoteConfig -> UUID -> S3Handle -> Annex (Either SomeException Bool) +checkUUIDFile c u h = tryNonAsync $ check <$> get + where + get = liftIO + . runResourceT + . either (pure . Left) (Right <$$> AWS.loadToMemory) + =<< tryS3 (sendS3Handle h (S3.getObject (bucket (hinfo h)) file)) + check (Right (S3.GetObjectMemoryResponse _meta rsp)) = + responseStatus rsp == ok200 && responseBody rsp == uuidb + check (Left _S3Error) = False + + file = T.pack $ uuidFile c + uuidb = L.fromChunks [T.encodeUtf8 $ T.pack $ fromUUID u] + +uuidFile :: RemoteConfig -> FilePath +uuidFile c = getFilePrefix c ++ "annex-uuid" + +putObject :: S3Handle -> T.Text -> RequestBody -> S3.PutObject +putObject h file rbody = (S3.putObject (bucket info) file rbody) + { S3.poStorageClass = Just (storageClass info) + , S3.poMetadata = metaHeaders info + , S3.poAutoMakeBucket = isIA info + } + where + info = hinfo h + +data S3Handle = S3Handle + { hmanager :: Manager + , hawscfg :: AWS.Configuration + , hs3cfg :: S3.S3Configuration AWS.NormalQuery + , hinfo :: S3Info + } + +{- Sends a request to S3 and gets back the response. + - + - Note that pureAws's use of ResourceT is bypassed here; + - the response should be fully processed while the S3Handle + - is still open, eg within a call to withS3Handle. + -} +sendS3Handle + :: (AWS.Transaction req res, AWS.ServiceConfiguration req ~ S3.S3Configuration) + => S3Handle + -> req + -> Annex res +sendS3Handle h r = liftIO $ runResourceT $ sendS3Handle' h r + +sendS3Handle' + :: (AWS.Transaction r a, AWS.ServiceConfiguration r ~ S3.S3Configuration) + => S3Handle + -> r + -> ResourceT IO a +sendS3Handle' h = AWS.pureAws (hawscfg h) (hs3cfg h) (hmanager h) + +withS3Handle :: RemoteConfig -> UUID -> S3Info -> (S3Handle -> Annex a) -> Annex a +withS3Handle c u info a = do + creds <- getRemoteCredPairFor "S3" c (AWS.creds u) + awscreds <- liftIO $ genCredentials $ fromMaybe nocreds creds + let awscfg = AWS.Configuration AWS.Timestamp awscreds (AWS.defaultLog AWS.Error) + bracketIO (newManager httpcfg) closeManager $ \mgr -> + a $ S3Handle mgr awscfg s3cfg info where - go Nothing = return Nothing - go (Just (ak, sk)) = return $ Just $ AWSConnection host port ak sk + s3cfg = s3Configuration c + httpcfg = defaultManagerSettings + { managerResponseTimeout = Nothing } + nocreds = error "Cannot use S3 without credentials configured" +s3Configuration :: RemoteConfig -> S3.S3Configuration AWS.NormalQuery +s3Configuration c = (S3.s3 proto endpoint False) { S3.s3Port = port } + where + proto + | port == 443 = AWS.HTTPS + | otherwise = AWS.HTTP host = fromJust $ M.lookup "host" c + datacenter = fromJust $ M.lookup "datacenter" c + -- When the default S3 host is configured, connect directly to + -- the S3 endpoint for the configured datacenter. + -- When another host is configured, it's used as-is. + endpoint + | host == AWS.s3DefaultHost = AWS.s3HostName $ T.pack datacenter + | otherwise = T.encodeUtf8 $ T.pack host port = let s = fromJust $ M.lookup "port" c in case reads s of [(p, _)] -> p _ -> error $ "bad S3 port value: " ++ s -getBucket :: RemoteConfig -> Maybe Bucket -getBucket = M.lookup "bucket" +tryS3 :: Annex a -> Annex (Either S3.S3Error a) +tryS3 a = (Right <$> a) `catch` (pure . Left) + +data S3Info = S3Info + { bucket :: S3.Bucket + , storageClass :: S3.StorageClass + , bucketObject :: Key -> T.Text + , metaHeaders :: [(T.Text, T.Text)] + , partSize :: Maybe Integer + , isIA :: Bool + } + +extractS3Info :: RemoteConfig -> Annex S3Info +extractS3Info c = do + b <- maybe + (error "S3 bucket not configured") + (return . T.pack) + (getBucketName c) + return $ S3Info + { bucket = b + , storageClass = getStorageClass c + , bucketObject = T.pack . getBucketObject c + , metaHeaders = getMetaHeaders c + , partSize = getPartSize c + , isIA = configIA c + } -getStorageClass :: RemoteConfig -> StorageClass -getStorageClass c = case fromJust $ M.lookup "storageclass" c of - "REDUCED_REDUNDANCY" -> REDUCED_REDUNDANCY - _ -> STANDARD - -getXheaders :: RemoteConfig -> [(String, String)] -getXheaders = filter isxheader . M.assocs +getBucketName :: RemoteConfig -> Maybe BucketName +getBucketName = M.lookup "bucket" + +getStorageClass :: RemoteConfig -> S3.StorageClass +getStorageClass c = case M.lookup "storageclass" c of + Just "REDUCED_REDUNDANCY" -> S3.ReducedRedundancy + _ -> S3.Standard + +getPartSize :: RemoteConfig -> Maybe Integer +getPartSize c = readSize dataUnits =<< M.lookup "partsize" c + +getMetaHeaders :: RemoteConfig -> [(T.Text, T.Text)] +getMetaHeaders = map munge . filter ismetaheader . M.assocs where - isxheader (h, _) = "x-amz-" `isPrefixOf` h + ismetaheader (h, _) = metaprefix `isPrefixOf` h + metaprefix = "x-amz-meta-" + metaprefixlen = length metaprefix + munge (k, v) = (T.pack $ drop metaprefixlen k, T.pack v) + +getFilePrefix :: RemoteConfig -> String +getFilePrefix = M.findWithDefault "" "fileprefix" + +getBucketObject :: RemoteConfig -> Key -> FilePath +getBucketObject c = munge . key2file + where + munge s = case M.lookup "mungekeys" c of + Just "ia" -> iaMunge $ getFilePrefix c ++ s + _ -> getFilePrefix c ++ s + +{- Internet Archive limits filenames to a subset of ascii, + - with no whitespace. Other characters are xml entity + - encoded. -} +iaMunge :: String -> String +iaMunge = (>>= munge) + where + munge c + | isAsciiUpper c || isAsciiLower c || isNumber c = [c] + | c `elem` "_-.\"" = [c] + | isSpace c = [] + | otherwise = "&" ++ show (ord c) ++ ";" + +configIA :: RemoteConfig -> Bool +configIA = maybe False isIAHost . M.lookup "host" {- Hostname to use for archive.org S3. -} iaHost :: HostName iaHost = "s3.us.archive.org" -isIA :: RemoteConfig -> Bool -isIA c = maybe False isIAHost (M.lookup "host" c) - isIAHost :: HostName -> Bool isIAHost h = ".archive.org" `isSuffixOf` map toLower h -iaItemUrl :: Bucket -> URLString -iaItemUrl bucket = "http://archive.org/details/" ++ bucket +iaItemUrl :: BucketName -> URLString +iaItemUrl b = "http://archive.org/details/" ++ b iaKeyUrl :: Remote -> Key -> URLString -iaKeyUrl r k = "http://archive.org/download/" ++ bucket ++ "/" ++ bucketFile r k +iaKeyUrl r k = "http://archive.org/download/" ++ b ++ "/" ++ getBucketObject (config r) k where - bucket = fromMaybe "" $ getBucket $ config r + b = fromMaybe "" $ getBucketName $ config r + +genCredentials :: CredPair -> IO AWS.Credentials +genCredentials (keyid, secret) = AWS.Credentials + <$> pure (T.encodeUtf8 (T.pack keyid)) + <*> pure (T.encodeUtf8 (T.pack secret)) + <*> newIORef [] + <*> pure Nothing + +mkLocationConstraint :: AWS.Region -> S3.LocationConstraint +mkLocationConstraint "US" = S3.locationUsClassic +mkLocationConstraint r = r diff --git a/Remote/Tahoe.hs b/Remote/Tahoe.hs index 56a17eb62..56bf66427 100644 --- a/Remote/Tahoe.hs +++ b/Remote/Tahoe.hs @@ -13,7 +13,7 @@ - - Tahoe has its own encryption, so git-annex's encryption is not used. - - - Copyright 2014 Joey Hess <joey@kitenet.net> + - Copyright 2014 Joey Hess <id@joeyh.name> - - Licensed under the GNU GPL version 3 or higher. -} @@ -64,27 +64,31 @@ gen r u c gc = do hdl <- liftIO $ TahoeHandle <$> maybe (defaultTahoeConfigDir u) return (remoteAnnexTahoe gc) <*> newEmptyTMVarIO - return $ Just $ Remote { - uuid = u, - cost = cst, - name = Git.repoDescribe r, - storeKey = store u hdl, - retrieveKeyFile = retrieve u hdl, - retrieveKeyFileCheap = \_ _ -> return False, - removeKey = remove, - hasKey = checkPresent u hdl, - hasKeyCheap = False, - whereisKey = Nothing, - remoteFsck = Nothing, - repairRepo = Nothing, - config = c, - repo = r, - gitconfig = gc, - localpath = Nothing, - readonly = False, - availability = GloballyAvailable, - remotetype = remote - } + return $ Just $ Remote + { uuid = u + , cost = cst + , name = Git.repoDescribe r + , storeKey = store u hdl + , retrieveKeyFile = retrieve u hdl + , retrieveKeyFileCheap = \_ _ -> return False + , removeKey = remove + , checkPresent = checkKey u hdl + , checkPresentCheap = False + , whereisKey = Nothing + , remoteFsck = Nothing + , repairRepo = Nothing + , config = c + , repo = r + , gitconfig = gc + , localpath = Nothing + , readonly = False + , availability = GloballyAvailable + , remotetype = remote + , mkUnavailable = return Nothing + , getInfo = return [] + , claimUrl = Nothing + , checkUrl = Nothing + } tahoeSetup :: Maybe UUID -> Maybe CredPair -> RemoteConfig -> Annex (RemoteConfig, UUID) tahoeSetup mu _ c = do @@ -123,14 +127,16 @@ remove _k = do warning "content cannot be removed from tahoe remote" return False -checkPresent :: UUID -> TahoeHandle -> Key -> Annex (Either String Bool) -checkPresent u hdl k = go =<< getCapability u k +checkKey :: UUID -> TahoeHandle -> Key -> Annex Bool +checkKey u hdl k = go =<< getCapability u k where - go Nothing = return (Right False) - go (Just cap) = liftIO $ parseCheck <$> readTahoe hdl "check" - [ Param "--raw" - , Param cap - ] + go Nothing = return False + go (Just cap) = liftIO $ do + v <- parseCheck <$> readTahoe hdl "check" + [ Param "--raw" + , Param cap + ] + either error return v defaultTahoeConfigDir :: UUID -> IO TahoeConfigDir defaultTahoeConfigDir u = do @@ -164,7 +170,7 @@ writeSharedConvergenceSecret configdir scs = getSharedConvergenceSecret :: TahoeConfigDir -> IO SharedConvergenceSecret getSharedConvergenceSecret configdir = go (60 :: Int) where - f = convergenceFile configdir + f = convergenceFile configdir go n | n == 0 = error $ "tahoe did not write " ++ f ++ " after 1 minute. Perhaps the daemon failed to start?" | otherwise = do @@ -187,7 +193,7 @@ startTahoeDaemon configdir = void $ boolTahoe configdir "start" [] withTahoeConfigDir :: TahoeHandle -> (TahoeConfigDir -> IO a) -> IO a withTahoeConfigDir (TahoeHandle configdir v) a = go =<< atomically needsstart where - go True = do + go True = do startTahoeDaemon configdir a configdir go False = a configdir @@ -216,7 +222,7 @@ readTahoe hdl command params = withTahoeConfigDir hdl $ \configdir -> tahoeParams :: TahoeConfigDir -> String -> [CommandParam] -> [CommandParam] tahoeParams configdir command params = - Param command : Param "-d" : File configdir : params + Param "-d" : File configdir : Param command : params storeCapability :: UUID -> Key -> Capability -> Annex () storeCapability u k cap = setRemoteState u k cap diff --git a/Remote/Web.hs b/Remote/Web.hs index ddd1fc1cc..a4a484ca3 100644 --- a/Remote/Web.hs +++ b/Remote/Web.hs @@ -1,6 +1,6 @@ -{- Web remotes. +{- Web remote. - - - Copyright 2011 Joey Hess <joey@kitenet.net> + - Copyright 2011 Joey Hess <id@joeyh.name> - - Licensed under the GNU GPL version 3 or higher. -} @@ -16,6 +16,7 @@ import qualified Git.Construct import Annex.Content import Config.Cost import Logs.Web +import Annex.UUID import Types.Key import Utility.Metered import qualified Annex.Url as Url @@ -37,35 +38,39 @@ remote = RemoteType { -- a new release to the survivors by carrier pigeon.) list :: Annex [Git.Repo] list = do - r <- liftIO $ Git.Construct.remoteNamed "web" Git.Construct.fromUnknown + r <- liftIO $ Git.Construct.remoteNamed "web" (pure Git.Construct.fromUnknown) return [r] gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote) gen r _ c gc = - return $ Just Remote { - uuid = webUUID, - cost = expensiveRemoteCost, - name = Git.repoDescribe r, - storeKey = uploadKey, - retrieveKeyFile = downloadKey, - retrieveKeyFileCheap = downloadKeyCheap, - removeKey = dropKey, - hasKey = checkKey, - hasKeyCheap = False, - whereisKey = Just getUrls, - remoteFsck = Nothing, - repairRepo = Nothing, - config = c, - gitconfig = gc, - localpath = Nothing, - repo = r, - readonly = True, - availability = GloballyAvailable, - remotetype = remote - } + return $ Just Remote + { uuid = webUUID + , cost = expensiveRemoteCost + , name = Git.repoDescribe r + , storeKey = uploadKey + , retrieveKeyFile = downloadKey + , retrieveKeyFileCheap = downloadKeyCheap + , removeKey = dropKey + , checkPresent = checkKey + , checkPresentCheap = False + , whereisKey = Just getWebUrls + , remoteFsck = Nothing + , repairRepo = Nothing + , config = c + , gitconfig = gc + , localpath = Nothing + , repo = r + , readonly = True + , availability = GloballyAvailable + , remotetype = remote + , mkUnavailable = return Nothing + , getInfo = return [] + , claimUrl = Nothing -- implicitly claims all urls + , checkUrl = Nothing + } downloadKey :: Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex Bool -downloadKey key _file dest _p = get =<< getUrls key +downloadKey key _file dest _p = get =<< getWebUrls key where get [] = do warning "no known url" @@ -83,7 +88,7 @@ downloadKey key _file dest _p = get =<< getUrls key warning "quvi support needed for this url" return False #endif - DefaultDownloader -> downloadUrl [u'] dest + _ -> downloadUrl [u'] dest downloadKeyCheap :: Key -> FilePath -> Annex Bool downloadKeyCheap _ _ = return False @@ -95,15 +100,15 @@ uploadKey _ _ _ = do dropKey :: Key -> Annex Bool dropKey k = do - mapM_ (setUrlMissing k) =<< getUrls k + mapM_ (setUrlMissing webUUID k) =<< getWebUrls k return True -checkKey :: Key -> Annex (Either String Bool) +checkKey :: Key -> Annex Bool checkKey key = do - us <- getUrls key + us <- getWebUrls key if null us - then return $ Right False - else return =<< checkKey' key us + then return False + else either error return =<< checkKey' key us checkKey' :: Key -> [URLString] -> Annex (Either String Bool) checkKey' key us = firsthit us (Right False) $ \u -> do let (u', downloader) = getDownloader u @@ -115,13 +120,19 @@ checkKey' key us = firsthit us (Right False) $ \u -> do #else return $ Left "quvi support needed for this url" #endif - DefaultDownloader -> do + _ -> do Url.withUrlOptions $ catchMsgIO . Url.checkBoth u' (keySize key) where - firsthit [] miss _ = return miss + firsthit [] miss _ = return miss firsthit (u:rest) _ a = do r <- a u case r of Right _ -> return r Left _ -> firsthit rest r a + +getWebUrls :: Key -> Annex [URLString] +getWebUrls key = filter supported <$> getUrls key + where + supported u = snd (getDownloader u) + `elem` [WebDownloader, QuviDownloader] diff --git a/Remote/WebDAV.hs b/Remote/WebDAV.hs index 91b83053c..aaebecf41 100644 --- a/Remote/WebDAV.hs +++ b/Remote/WebDAV.hs @@ -1,29 +1,23 @@ {- WebDAV remotes. - - - Copyright 2012-2014 Joey Hess <joey@kitenet.net> + - Copyright 2012-2014 Joey Hess <id@joeyh.name> - - Licensed under the GNU GPL version 3 or higher. -} -{-# LANGUAGE ScopedTypeVariables, CPP #-} +{-# LANGUAGE ScopedTypeVariables #-} module Remote.WebDAV (remote, davCreds, configUrl) where import Network.Protocol.HTTP.DAV import qualified Data.Map as M +import qualified Data.ByteString.Lazy as L import qualified Data.ByteString.UTF8 as B8 import qualified Data.ByteString.Lazy.UTF8 as L8 -import qualified Data.ByteString.Lazy as L -import qualified Control.Exception as E -import qualified Control.Exception.Lifted as EL -#if MIN_VERSION_DAV(0,6,0) import Network.HTTP.Client (HttpException(..)) -#else -import Network.HTTP.Conduit (HttpException(..)) -#endif import Network.HTTP.Types -import System.Log.Logger (debugM) import System.IO.Error +import Control.Monad.Catch import Common.Annex import Types.Remote @@ -31,17 +25,13 @@ import qualified Git import Config import Config.Cost import Remote.Helper.Special -import Remote.Helper.Encryptable -import Remote.Helper.Chunked -import Crypto +import Remote.Helper.Http +import qualified Remote.Helper.Chunked.Legacy as Legacy import Creds import Utility.Metered -import Annex.Content +import Utility.Url (URLString) import Annex.UUID -import Remote.WebDAV.DavUrl - -type DavUser = B8.ByteString -type DavPass = B8.ByteString +import Remote.WebDAV.DavLocation remote :: RemoteType remote = RemoteType { @@ -54,228 +44,194 @@ remote = RemoteType { gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote) gen r u c gc = new <$> remoteCost gc expensiveRemoteCost where - new cst = Just $ encryptableRemote c - (storeEncrypted this) - (retrieveEncrypted this) + new cst = Just $ specialRemote c + (prepareDAV this $ store chunkconfig) + (prepareDAV this $ retrieve chunkconfig) + (prepareDAV this $ remove) + (prepareDAV this $ checkKey this chunkconfig) this where - this = Remote { - uuid = u, - cost = cst, - name = Git.repoDescribe r, - storeKey = store this, - retrieveKeyFile = retrieve this, - retrieveKeyFileCheap = retrieveCheap this, - removeKey = remove this, - hasKey = checkPresent this, - hasKeyCheap = False, - whereisKey = Nothing, - remoteFsck = Nothing, - repairRepo = Nothing, - config = c, - repo = r, - gitconfig = gc, - localpath = Nothing, - readonly = False, - availability = GloballyAvailable, - remotetype = remote - } + this = Remote + { uuid = u + , cost = cst + , name = Git.repoDescribe r + , storeKey = storeKeyDummy + , retrieveKeyFile = retreiveKeyFileDummy + , retrieveKeyFileCheap = retrieveCheap + , removeKey = removeKeyDummy + , checkPresent = checkPresentDummy + , checkPresentCheap = False + , whereisKey = Nothing + , remoteFsck = Nothing + , repairRepo = Nothing + , config = c + , repo = r + , gitconfig = gc + , localpath = Nothing + , readonly = False + , availability = GloballyAvailable + , remotetype = remote + , mkUnavailable = gen r u (M.insert "url" "http://!dne!/" c) gc + , getInfo = includeCredsInfo c (davCreds u) $ + [("url", fromMaybe "unknown" (M.lookup "url" c))] + , claimUrl = Nothing + , checkUrl = Nothing + } + chunkconfig = getChunkConfig c webdavSetup :: Maybe UUID -> Maybe CredPair -> RemoteConfig -> Annex (RemoteConfig, UUID) webdavSetup mu mcreds c = do u <- maybe (liftIO genUUID) return mu - let url = fromMaybe (error "Specify url=") $ - M.lookup "url" c - c' <- encryptionSetup c + url <- case M.lookup "url" c of + Nothing -> error "Specify url=" + Just url -> return url + (c', encsetup) <- encryptionSetup c creds <- maybe (getCreds c' u) (return . Just) mcreds testDav url creds gitConfigSpecialRemote u c' "webdav" "true" - c'' <- setRemoteCredPair c' (davCreds u) creds + c'' <- setRemoteCredPair encsetup c' (davCreds u) creds return (c'', u) -store :: Remote -> Key -> AssociatedFile -> MeterUpdate -> Annex Bool -store r k _f p = metered (Just p) k $ \meterupdate -> - davAction r False $ \(baseurl, user, pass) -> - sendAnnex k (void $ remove r k) $ \src -> - liftIO $ withMeteredFile src meterupdate $ - storeHelper r k baseurl user pass - -storeEncrypted :: Remote -> (Cipher, Key) -> Key -> MeterUpdate -> Annex Bool -storeEncrypted r (cipher, enck) k p = metered (Just p) k $ \meterupdate -> - davAction r False $ \(baseurl, user, pass) -> - sendAnnex k (void $ remove r enck) $ \src -> - liftIO $ encrypt (getGpgEncParams r) cipher - (streamMeteredFile src meterupdate) $ - readBytes $ storeHelper r enck baseurl user pass - -storeHelper :: Remote -> Key -> DavUrl -> DavUser -> DavPass -> L.ByteString -> IO Bool -storeHelper r k baseurl user pass b = catchBoolIO $ do - mkdirRecursiveDAV tmpurl user pass - storeChunks k tmpurl keyurl chunksize storer recorder finalizer - where - tmpurl = tmpLocation baseurl k - keyurl = davLocation baseurl k - chunksize = chunkSize $ config r - storer urls = storeChunked chunksize urls storehttp b - recorder url s = storehttp url (L8.fromString s) - finalizer srcurl desturl = do - void $ tryNonAsync (deleteDAV desturl user pass) - mkdirRecursiveDAV (urlParent desturl) user pass - moveDAV srcurl desturl user pass - storehttp url = putDAV url user pass - -retrieveCheap :: Remote -> Key -> FilePath -> Annex Bool -retrieveCheap _ _ _ = return False - -retrieve :: Remote -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex Bool -retrieve r k _f d p = metered (Just p) k $ \meterupdate -> - davAction r False $ \(baseurl, user, pass) -> liftIO $ catchBoolIO $ - withStoredFiles r k baseurl user pass onerr $ \urls -> do - meteredWriteFileChunks meterupdate d urls $ \url -> do - mb <- getDAV url user pass - case mb of - Nothing -> throwIO "download failed" - Just b -> return b - return True - where - onerr _ = return False - -retrieveEncrypted :: Remote -> (Cipher, Key) -> Key -> FilePath -> MeterUpdate -> Annex Bool -retrieveEncrypted r (cipher, enck) k d p = metered (Just p) k $ \meterupdate -> - davAction r False $ \(baseurl, user, pass) -> liftIO $ catchBoolIO $ - withStoredFiles r enck baseurl user pass onerr $ \urls -> do - decrypt cipher (feeder user pass urls) $ - readBytes $ meteredWriteFile meterupdate d - return True - where - onerr _ = return False - - feeder _ _ [] _ = noop - feeder user pass (url:urls) h = do - mb <- getDAV url user pass - case mb of - Nothing -> throwIO "download failed" - Just b -> do - L.hPut h b - feeder user pass urls h - -remove :: Remote -> Key -> Annex Bool -remove r k = davAction r False $ \(baseurl, user, pass) -> liftIO $ do - -- Delete the key's whole directory, including any chunked - -- files, etc, in a single action. - let url = davLocation baseurl k - isJust . eitherToMaybe <$> tryNonAsync (deleteDAV url user pass) - -checkPresent :: Remote -> Key -> Annex (Either String Bool) -checkPresent r k = davAction r noconn go - where - noconn = Left $ error $ name r ++ " not configured" - - go (baseurl, user, pass) = do - showAction $ "checking " ++ name r - liftIO $ withStoredFiles r k baseurl user pass onerr check - where - check [] = return $ Right True - check (url:urls) = do - v <- existsDAV url user pass - if v == Right True - then check urls - else return v - - {- Failed to read the chunkcount file; see if it's missing, - - or if there's a problem accessing it, - - or perhaps this was an intermittent error. -} - onerr url = do - v <- existsDAV url user pass - return $ if v == Right True - then Left $ "failed to read " ++ url - else v - -withStoredFiles - :: Remote - -> Key - -> DavUrl - -> DavUser - -> DavPass - -> (DavUrl -> IO a) - -> ([DavUrl] -> IO a) - -> IO a -withStoredFiles r k baseurl user pass onerr a - | isJust $ chunkSize $ config r = do - let chunkcount = keyurl ++ chunkCount - v <- getDAV chunkcount user pass +-- Opens a http connection to the DAV server, which will be reused +-- each time the helper is called. +prepareDAV :: Remote -> (Maybe DavHandle -> helper) -> Preparer helper +prepareDAV = resourcePrepare . const . withDAVHandle + +store :: ChunkConfig -> Maybe DavHandle -> Storer +store _ Nothing = byteStorer $ \_k _b _p -> return False +store (LegacyChunks chunksize) (Just dav) = fileStorer $ \k f p -> liftIO $ + withMeteredFile f p $ storeLegacyChunked chunksize k dav +store _ (Just dav) = httpStorer $ \k reqbody -> liftIO $ goDAV dav $ do + let tmp = keyTmpLocation k + let dest = keyLocation k + void $ mkColRecursive tmpDir + inLocation tmp $ + putContentM' (contentType, reqbody) + finalizeStore (baseURL dav) tmp dest + return True + +finalizeStore :: URLString -> DavLocation -> DavLocation -> DAVT IO () +finalizeStore baseurl tmp dest = do + inLocation dest $ void $ safely $ delContentM + maybe noop (void . mkColRecursive) (locationParent dest) + moveDAV baseurl tmp dest + +retrieveCheap :: Key -> FilePath -> Annex Bool +retrieveCheap _ _ = return False + +retrieve :: ChunkConfig -> Maybe DavHandle -> Retriever +retrieve _ Nothing = error "unable to connect" +retrieve (LegacyChunks _) (Just dav) = retrieveLegacyChunked dav +retrieve _ (Just dav) = fileRetriever $ \d k p -> liftIO $ + goDAV dav $ + inLocation (keyLocation k) $ + withContentM $ + httpBodyRetriever d p + +remove :: Maybe DavHandle -> Remover +remove Nothing _ = return False +remove (Just dav) k = liftIO $ do + -- Delete the key's whole directory, including any + -- legacy chunked files, etc, in a single action. + let d = keyDir k + goDAV dav $ do + v <- safely $ inLocation d delContentM case v of - Just s -> a $ listChunks keyurl $ L8.toString s + Just _ -> return True Nothing -> do - chunks <- probeChunks keyurl $ \u -> (== Right True) <$> existsDAV u user pass - if null chunks - then onerr chunkcount - else a chunks - | otherwise = a [keyurl] - where - keyurl = davLocation baseurl k ++ keyFile k - -davAction :: Remote -> a -> ((DavUrl, DavUser, DavPass) -> Annex a) -> Annex a -davAction r unconfigured action = do - mcreds <- getCreds (config r) (uuid r) - case (mcreds, configUrl r) of - (Just (user, pass), Just url) -> - action (url, toDavUser user, toDavPass pass) - _ -> return unconfigured - -configUrl :: Remote -> Maybe DavUrl + v' <- existsDAV d + case v' of + Right False -> return True + _ -> return False + +checkKey :: Remote -> ChunkConfig -> Maybe DavHandle -> CheckPresent +checkKey r _ Nothing _ = error $ name r ++ " not configured" +checkKey r chunkconfig (Just dav) k = do + showAction $ "checking " ++ name r + case chunkconfig of + LegacyChunks _ -> checkKeyLegacyChunked dav k + _ -> do + v <- liftIO $ goDAV dav $ + existsDAV (keyLocation k) + either error return v + +configUrl :: Remote -> Maybe URLString configUrl r = fixup <$> M.lookup "url" (config r) where -- box.com DAV url changed fixup = replace "https://www.box.com/dav/" "https://dav.box.com/dav/" +type DavUser = B8.ByteString +type DavPass = B8.ByteString + +baseURL :: DavHandle -> URLString +baseURL (DavHandle _ _ _ u) = u + + toDavUser :: String -> DavUser toDavUser = B8.fromString toDavPass :: String -> DavPass toDavPass = B8.fromString -{- Creates a directory in WebDAV, if not already present; also creating - - any missing parent directories. -} -mkdirRecursiveDAV :: DavUrl -> DavUser -> DavPass -> IO () -mkdirRecursiveDAV url user pass = go url - where - make u = mkdirDAV u user pass - - go u = do - r <- E.try (make u) :: IO (Either E.SomeException Bool) - case r of - {- Parent directory is missing. Recurse to create - - it, and try once more to create the directory. -} - Right False -> do - go (urlParent u) - void $ make u - {- Directory created successfully -} - Right True -> return () - {- Directory already exists, or some other error - - occurred. In the latter case, whatever wanted - - to use this directory will fail. -} - Left _ -> return () - {- Test if a WebDAV store is usable, by writing to a test file, and then - - deleting the file. Exits with an IO error if not. -} -testDav :: String -> Maybe CredPair -> Annex () -testDav baseurl (Just (u, p)) = do + - deleting the file. + - + - Also ensures that the path of the url exists, trying to create it if not. + - + - Throws an error if store is not usable. + -} +testDav :: URLString -> Maybe CredPair -> Annex () +testDav url (Just (u, p)) = do showSideAction "testing WebDAV server" - test "make directory" $ mkdirRecursiveDAV baseurl user pass - test "write file" $ putDAV testurl user pass L.empty - test "delete file" $ deleteDAV testurl user pass + test $ liftIO $ evalDAVT url $ do + prepDAV user pass + makeParentDirs + void $ mkColRecursive tmpDir + inLocation (tmpLocation "git-annex-test") $ do + putContentM (Nothing, L.empty) + delContentM where - test desc a = liftIO $ - either (\e -> throwIO $ "WebDAV failed to " ++ desc ++ ": " ++ show e) + test a = liftIO $ + either (\e -> throwIO $ "WebDAV test failed: " ++ show e) (const noop) =<< tryNonAsync a user = toDavUser u pass = toDavPass p - testurl = davUrl baseurl "git-annex-test" testDav _ Nothing = error "Need to configure webdav username and password." +{- Tries to make all the parent directories in the WebDAV urls's path, + - right down to the root. + - + - Ignores any failures, which can occur for reasons including the WebDAV + - server only serving up WebDAV in a subdirectory. -} +makeParentDirs :: DAVT IO () +makeParentDirs = go + where + go = do + l <- getDAVLocation + case locationParent l of + Nothing -> noop + Just p -> void $ safely $ inDAVLocation (const p) go + void $ safely mkCol + +{- Checks if the directory exists. If not, tries to create its + - parent directories, all the way down to the root, and finally creates + - it. -} +mkColRecursive :: DavLocation -> DAVT IO Bool +mkColRecursive d = go =<< existsDAV d + where + go (Right True) = return True + go _ = ifM (inLocation d mkCol) + ( return True + , do + case locationParent d of + Nothing -> makeParentDirs + Just parent -> void (mkColRecursive parent) + inLocation d mkCol + ) + getCreds :: RemoteConfig -> UUID -> Annex (Maybe CredPair) getCreds c u = getRemoteCredPairFor "webdav" c (davCreds u) @@ -293,107 +249,142 @@ contentType = Just $ B8.fromString "application/octet-stream" throwIO :: String -> IO a throwIO msg = ioError $ mkIOError userErrorType msg Nothing Nothing -debugDAV :: DavUrl -> String -> IO () -debugDAV msg url = debugM "DAV" $ msg ++ " " ++ url - -{--------------------------------------------------------------------- - - Low-level DAV operations, using the new DAV monad when available. - ---------------------------------------------------------------------} - -putDAV :: DavUrl -> DavUser -> DavPass -> L.ByteString -> IO () -putDAV url user pass b = do - debugDAV "PUT" url -#if MIN_VERSION_DAV(0,6,0) - goDAV url user pass $ putContentM (contentType, b) -#else - putContent url user pass (contentType, b) -#endif - -getDAV :: DavUrl -> DavUser -> DavPass -> IO (Maybe L.ByteString) -getDAV url user pass = do - debugDAV "GET" url - eitherToMaybe <$> tryNonAsync go +moveDAV :: URLString -> DavLocation -> DavLocation -> DAVT IO () +moveDAV baseurl src dest = inLocation src $ moveContentM newurl where -#if MIN_VERSION_DAV(0,6,0) - go = goDAV url user pass $ snd <$> getContentM -#else - go = snd . snd <$> getPropsAndContent url user pass -#endif - -deleteDAV :: DavUrl -> DavUser -> DavPass -> IO () -deleteDAV url user pass = do - debugDAV "DELETE" url -#if MIN_VERSION_DAV(0,6,0) - goDAV url user pass delContentM -#else - deleteContent url user pass -#endif - -moveDAV :: DavUrl -> DavUrl -> DavUser -> DavPass -> IO () -moveDAV url newurl user pass = do - debugDAV ("MOVE to " ++ newurl ++ " from ") url -#if MIN_VERSION_DAV(0,6,0) - goDAV url user pass $ moveContentM newurl' -#else - moveContent url newurl' user pass -#endif - where - newurl' = B8.fromString newurl - -mkdirDAV :: DavUrl -> DavUser -> DavPass -> IO Bool -mkdirDAV url user pass = do - debugDAV "MKDIR" url -#if MIN_VERSION_DAV(0,6,0) - goDAV url user pass mkCol -#else - makeCollection url user pass -#endif - -existsDAV :: DavUrl -> DavUser -> DavPass -> IO (Either String Bool) -existsDAV url user pass = do - debugDAV "EXISTS" url - either (Left . show) id <$> tryNonAsync check + newurl = B8.fromString (locationUrl baseurl dest) + +existsDAV :: DavLocation -> DAVT IO (Either String Bool) +existsDAV l = inLocation l check `catchNonAsync` (\e -> return (Left $ show e)) where - ispresent = return . Right -#if MIN_VERSION_DAV(0,6,0) - check = goDAV url user pass $ do + check = do setDepth Nothing - EL.catchJust - (matchStatusCodeException notFound404) + catchJust + (matchStatusCodeException (== notFound404)) (getPropsM >> ispresent True) (const $ ispresent False) -#else - check = E.catchJust - (matchStatusCodeException notFound404) -#if ! MIN_VERSION_DAV(0,4,0) - (getProps url user pass >> ispresent True) -#else - (getProps url user pass Nothing >> ispresent True) -#endif - (const $ ispresent False) -#endif - -matchStatusCodeException :: Status -> HttpException -> Maybe () -#if MIN_VERSION_DAV(0,6,0) -matchStatusCodeException want (StatusCodeException s _ _) -#else -matchStatusCodeException want (StatusCodeException s _) -#endif - | s == want = Just () + ispresent = return . Right + +matchStatusCodeException :: (Status -> Bool) -> HttpException -> Maybe HttpException +matchStatusCodeException want e@(StatusCodeException s _ _) + | want s = Just e | otherwise = Nothing matchStatusCodeException _ _ = Nothing -#if MIN_VERSION_DAV(0,6,0) -goDAV :: DavUrl -> DavUser -> DavPass -> DAVT IO a -> IO a -goDAV url user pass a = choke $ evalDAVT url $ do +-- Ignores any exceptions when performing a DAV action. +safely :: DAVT IO a -> DAVT IO (Maybe a) +safely = eitherToMaybe <$$> tryNonAsync + +choke :: IO (Either String a) -> IO a +choke f = do + x <- f + case x of + Left e -> error e + Right r -> return r + +data DavHandle = DavHandle DAVContext DavUser DavPass URLString + +withDAVHandle :: Remote -> (Maybe DavHandle -> Annex a) -> Annex a +withDAVHandle r a = do + mcreds <- getCreds (config r) (uuid r) + case (mcreds, configUrl r) of + (Just (user, pass), Just baseurl) -> + withDAVContext baseurl $ \ctx -> + a (Just (DavHandle ctx (toDavUser user) (toDavPass pass) baseurl)) + _ -> a Nothing + +goDAV :: DavHandle -> DAVT IO a -> IO a +goDAV (DavHandle ctx user pass _) a = choke $ run $ prettifyExceptions $ do + prepDAV user pass + a + where + run = fst <$$> runDAVContext ctx + +{- Catch StatusCodeException and trim it to only the statusMessage part, + - eliminating a lot of noise, which can include the whole request that + - failed. The rethrown exception is no longer a StatusCodeException. -} +prettifyExceptions :: DAVT IO a -> DAVT IO a +prettifyExceptions a = catchJust (matchStatusCodeException (const True)) a go + where + go (StatusCodeException status _ _) = error $ unwords + [ "DAV failure:" + , show (statusCode status) + , show (statusMessage status) + ] + go e = throwM e + +prepDAV :: DavUser -> DavPass -> DAVT IO () +prepDAV user pass = do setResponseTimeout Nothing -- disable default (5 second!) timeout setCreds user pass - a + +-- +-- Legacy chunking code, to be removed eventually. +-- + +storeLegacyChunked :: ChunkSize -> Key -> DavHandle -> L.ByteString -> IO Bool +storeLegacyChunked chunksize k dav b = + Legacy.storeChunks k tmp dest storer recorder finalizer + where + storehttp l b' = void $ goDAV dav $ do + maybe noop (void . mkColRecursive) (locationParent l) + inLocation l $ putContentM (contentType, b') + storer locs = Legacy.storeChunked chunksize locs storehttp b + recorder l s = storehttp l (L8.fromString s) + finalizer tmp' dest' = goDAV dav $ + finalizeStore (baseURL dav) tmp' (fromJust $ locationParent dest') + + tmp = keyTmpLocation k + dest = keyLocation k + +retrieveLegacyChunked :: DavHandle -> Retriever +retrieveLegacyChunked dav = fileRetriever $ \d k p -> liftIO $ + withStoredFilesLegacyChunked k dav onerr $ \locs -> + Legacy.meteredWriteFileChunks p d locs $ \l -> + goDAV dav $ + inLocation l $ + snd <$> getContentM + where + onerr = error "download failed" + +checkKeyLegacyChunked :: DavHandle -> CheckPresent +checkKeyLegacyChunked dav k = liftIO $ + either error id <$> withStoredFilesLegacyChunked k dav onerr check + where + check [] = return $ Right True + check (l:ls) = do + v <- goDAV dav $ existsDAV l + if v == Right True + then check ls + else return v + + {- Failed to read the chunkcount file; see if it's missing, + - or if there's a problem accessing it, + - or perhaps this was an intermittent error. -} + onerr f = do + v <- goDAV dav $ existsDAV f + return $ if v == Right True + then Left $ "failed to read " ++ f + else v + +withStoredFilesLegacyChunked + :: Key + -> DavHandle + -> (DavLocation -> IO a) + -> ([DavLocation] -> IO a) + -> IO a +withStoredFilesLegacyChunked k dav onerr a = do + let chunkcount = keyloc ++ Legacy.chunkCount + v <- goDAV dav $ safely $ + inLocation chunkcount $ + snd <$> getContentM + case v of + Just s -> a $ Legacy.listChunks keyloc $ L8.toString s + Nothing -> do + chunks <- Legacy.probeChunks keyloc $ \f -> + (== Right True) <$> goDAV dav (existsDAV f) + if null chunks + then onerr chunkcount + else a chunks where - choke :: IO (Either String a) -> IO a - choke f = do - x <- f - case x of - Left e -> error e - Right r -> return r -#endif + keyloc = keyLocation k diff --git a/Remote/WebDAV/DavLocation.hs b/Remote/WebDAV/DavLocation.hs new file mode 100644 index 000000000..157e7151a --- /dev/null +++ b/Remote/WebDAV/DavLocation.hs @@ -0,0 +1,63 @@ +{- WebDAV locations. + - + - Copyright 2014 Joey Hess <id@joeyh.name> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +{-# LANGUAGE CPP #-} +{-# LANGUAGE FlexibleContexts #-} + +module Remote.WebDAV.DavLocation where + +import Types +import Locations +import Utility.Url (URLString) + +import System.FilePath.Posix -- for manipulating url paths +import Network.Protocol.HTTP.DAV (inDAVLocation, DAVT) +import Control.Monad.IO.Class (MonadIO) +import Data.Default +#ifdef mingw32_HOST_OS +import Data.String.Utils +#endif + +-- Relative to the top of the DAV url. +type DavLocation = String + +{- Runs action in subdirectory, relative to the current location. -} +inLocation :: (MonadIO m) => DavLocation -> DAVT m a -> DAVT m a +inLocation d = inDAVLocation (</> d) + +{- The directory where files(s) for a key are stored. -} +keyDir :: Key -> DavLocation +keyDir k = addTrailingPathSeparator $ hashdir </> keyFile k + where +#ifndef mingw32_HOST_OS + hashdir = hashDirLower def k +#else + hashdir = replace "\\" "/" (hashDirLower def k) +#endif + +keyLocation :: Key -> DavLocation +keyLocation k = keyDir k ++ keyFile k + +{- Where we store temporary data for a key as it's being uploaded. -} +keyTmpLocation :: Key -> DavLocation +keyTmpLocation = addTrailingPathSeparator . tmpLocation . keyFile + +tmpLocation :: FilePath -> DavLocation +tmpLocation f = tmpDir </> f + +tmpDir :: DavLocation +tmpDir = "tmp" + +locationParent :: String -> Maybe String +locationParent loc + | loc `elem` tops = Nothing + | otherwise = Just (takeDirectory loc) + where + tops = ["/", "", "."] + +locationUrl :: URLString -> DavLocation -> URLString +locationUrl baseurl loc = baseurl </> loc diff --git a/Remote/WebDAV/DavUrl.hs b/Remote/WebDAV/DavUrl.hs deleted file mode 100644 index 4862c4f37..000000000 --- a/Remote/WebDAV/DavUrl.hs +++ /dev/null @@ -1,44 +0,0 @@ -{- WebDAV urls. - - - - Copyright 2014 Joey Hess <joey@kitenet.net> - - - - Licensed under the GNU GPL version 3 or higher. - -} - -{-# LANGUAGE CPP #-} - -module Remote.WebDAV.DavUrl where - -import Types -import Locations - -import Network.URI (normalizePathSegments) -import System.FilePath.Posix -#ifdef mingw32_HOST_OS -import Data.String.Utils -#endif - -type DavUrl = String - -{- The directory where files(s) for a key are stored. -} -davLocation :: DavUrl -> Key -> DavUrl -davLocation baseurl k = addTrailingPathSeparator $ - davUrl baseurl $ hashdir </> keyFile k - where -#ifndef mingw32_HOST_OS - hashdir = hashDirLower k -#else - hashdir = replace "\\" "/" (hashDirLower k) -#endif - -{- Where we store temporary data for a key as it's being uploaded. -} -tmpLocation :: DavUrl -> Key -> DavUrl -tmpLocation baseurl k = addTrailingPathSeparator $ - davUrl baseurl $ "tmp" </> keyFile k - -davUrl :: DavUrl -> FilePath -> DavUrl -davUrl baseurl file = baseurl </> file - -urlParent :: DavUrl -> DavUrl -urlParent url = dropTrailingPathSeparator $ - normalizePathSegments (dropTrailingPathSeparator url ++ "/..") |