diff options
-rw-r--r-- | Annex.hs | 3 | ||||
-rw-r--r-- | Command/AddUrl.hs | 181 | ||||
-rw-r--r-- | Command/ReKey.hs | 6 | ||||
-rw-r--r-- | Command/RmUrl.hs | 7 | ||||
-rw-r--r-- | Command/Whereis.hs | 21 | ||||
-rw-r--r-- | Logs/Web.hs | 62 | ||||
-rw-r--r-- | Remote.hs | 13 | ||||
-rw-r--r-- | Remote/Bup.hs | 2 | ||||
-rw-r--r-- | Remote/Ddar.hs | 2 | ||||
-rw-r--r-- | Remote/Directory.hs | 4 | ||||
-rw-r--r-- | Remote/External.hs | 31 | ||||
-rw-r--r-- | Remote/External/Types.hs | 26 | ||||
-rw-r--r-- | Remote/GCrypt.hs | 2 | ||||
-rw-r--r-- | Remote/Git.hs | 2 | ||||
-rw-r--r-- | Remote/Glacier.hs | 4 | ||||
-rw-r--r-- | Remote/Hook.hs | 4 | ||||
-rw-r--r-- | Remote/Rsync.hs | 2 | ||||
-rw-r--r-- | Remote/S3.hs | 6 | ||||
-rw-r--r-- | Remote/Tahoe.hs | 4 | ||||
-rw-r--r-- | Remote/Web.hs | 24 | ||||
-rw-r--r-- | Remote/WebDAV.hs | 4 | ||||
-rw-r--r-- | Types/Remote.hs | 10 | ||||
-rw-r--r-- | debian/changelog | 5 | ||||
-rw-r--r-- | doc/design/external_special_remote_protocol.mdwn | 35 | ||||
-rw-r--r-- | doc/devblog/day_237__extending_addurl.mdwn | 14 | ||||
-rw-r--r-- | doc/internals.mdwn | 5 | ||||
-rw-r--r-- | doc/todo/extensible_addurl.mdwn | 65 |
27 files changed, 432 insertions, 112 deletions
@@ -63,6 +63,7 @@ import Types.CleanupActions import Utility.Quvi (QuviVersion) #endif import Utility.InodeCache +import Utility.Url import "mtl" Control.Monad.Reader import Control.Concurrent @@ -128,6 +129,7 @@ data AnnexState = AnnexState , useragent :: Maybe String , errcounter :: Integer , unusedkeys :: Maybe (S.Set Key) + , tempurls :: M.Map Key URLString #ifdef WITH_QUVI , quviversion :: Maybe QuviVersion #endif @@ -173,6 +175,7 @@ newState c r = AnnexState , useragent = Nothing , errcounter = 0 , unusedkeys = Nothing + , tempurls = M.empty #ifdef WITH_QUVI , quviversion = Nothing #endif diff --git a/Command/AddUrl.hs b/Command/AddUrl.hs index 81da67639..76095d6e4 100644 --- a/Command/AddUrl.hs +++ b/Command/AddUrl.hs @@ -1,6 +1,6 @@ {- git-annex command - - - Copyright 2011-2013 Joey Hess <joey@kitenet.net> + - Copyright 2011-2014 Joey Hess <joey@kitenet.net> - - Licensed under the GNU GPL version 3 or higher. -} @@ -19,6 +19,8 @@ import qualified Annex import qualified Annex.Queue import qualified Annex.Url as Url import qualified Backend.URL +import qualified Remote +import qualified Types.Remote as Remote import Annex.Content import Logs.Web import Types.Key @@ -26,6 +28,7 @@ import Types.KeySource import Config import Annex.Content.Direct import Logs.Location +import Utility.Metered import qualified Annex.Transfer as Transfer #ifdef WITH_QUVI import Annex.Quvi @@ -54,7 +57,71 @@ seek ps = do withStrings (start relaxed f d) ps start :: Bool -> Maybe FilePath -> Maybe Int -> String -> CommandStart -start relaxed optfile pathdepth s = go $ fromMaybe bad $ parseURI s +start relaxed optfile pathdepth s = do + r <- Remote.claimingUrl s + if Remote.uuid r == webUUID + then startWeb relaxed optfile pathdepth s + else startRemote r relaxed optfile pathdepth s + +startRemote :: Remote -> Bool -> Maybe FilePath -> Maybe Int -> String -> CommandStart +startRemote r relaxed optfile pathdepth s = do + url <- case Url.parseURIRelaxed s of + Nothing -> error $ "bad uri " ++ s + Just u -> pure u + pathmax <- liftIO $ fileNameLengthLimit "." + let file = choosefile $ url2file url pathdepth pathmax + showStart "addurl" file + showNote $ "using " ++ Remote.name r + next $ performRemote r relaxed s file + where + choosefile = flip fromMaybe optfile + +performRemote :: Remote -> Bool -> URLString -> FilePath -> CommandPerform +performRemote r relaxed uri file = ifAnnexed file adduri geturi + where + loguri = setDownloader uri OtherDownloader + adduri = addUrlChecked relaxed loguri (Remote.uuid r) checkexistssize + checkexistssize key = do + res <- tryNonAsync $ Remote.checkUrl r uri + case res of + Left e -> do + warning (show e) + return (False, False) + Right Nothing -> + return (True, True) + Right (Just sz) -> + return (True, sz == fromMaybe sz (keySize key)) + geturi = do + dummykey <- Backend.URL.fromUrl uri =<< + if relaxed + then return Nothing + else Remote.checkUrl r uri + liftIO $ createDirectoryIfMissing True (parentDir file) + next $ ifM (Annex.getState Annex.fast <||> pure relaxed) + ( do + res <- tryNonAsync $ Remote.checkUrl r uri + case res of + Left e -> do + warning (show e) + return False + Right size -> do + key <- Backend.URL.fromUrl uri size + cleanup (Remote.uuid r) loguri file key Nothing + return True + , do + -- Set temporary url for the dummy key + -- so that the remote knows what url it + -- should use to download it. + setTempUrl dummykey uri + let downloader = Remote.retrieveKeyFile r dummykey (Just file) + ok <- isJust <$> + downloadWith downloader dummykey (Remote.uuid r) loguri file + removeTempUrl dummykey + return ok + ) + +startWeb :: Bool -> Maybe FilePath -> Maybe Int -> String -> CommandStart +startWeb relaxed optfile pathdepth s = go $ fromMaybe bad $ parseURI s where (s', downloader) = getDownloader s bad = fromMaybe (error $ "bad url " ++ s') $ @@ -62,7 +129,7 @@ start relaxed optfile pathdepth s = go $ fromMaybe bad $ parseURI s choosefile = flip fromMaybe optfile go url = case downloader of QuviDownloader -> usequvi - DefaultDownloader -> + _ -> #ifdef WITH_QUVI ifM (quviSupported s') ( usequvi @@ -75,7 +142,7 @@ start relaxed optfile pathdepth s = go $ fromMaybe bad $ parseURI s pathmax <- liftIO $ fileNameLengthLimit "." let file = choosefile $ url2file url pathdepth pathmax showStart "addurl" file - next $ perform relaxed s' file + next $ performWeb relaxed s' file #ifdef WITH_QUVI badquvi = error $ "quvi does not know how to download url " ++ s' usequvi = do @@ -96,7 +163,9 @@ performQuvi :: Bool -> URLString -> URLString -> FilePath -> CommandPerform performQuvi relaxed pageurl videourl file = ifAnnexed file addurl geturl where quviurl = setDownloader pageurl QuviDownloader - addurl key = next $ cleanup quviurl file key Nothing + addurl key = next $ do + cleanup webUUID quviurl file key Nothing + return True geturl = next $ isJust <$> addUrlFileQuvi relaxed quviurl videourl file #endif @@ -106,7 +175,7 @@ addUrlFileQuvi relaxed quviurl videourl file = do key <- Backend.URL.fromUrl quviurl Nothing ifM (pure relaxed <||> Annex.getState Annex.fast) ( do - cleanup' quviurl file key Nothing + cleanup webUUID quviurl file key Nothing return (Just key) , do {- Get the size, and use that to check @@ -124,55 +193,65 @@ addUrlFileQuvi relaxed quviurl videourl file = do downloadUrl [videourl] tmp if ok then do - cleanup' quviurl file key (Just tmp) + cleanup webUUID quviurl file key (Just tmp) return (Just key) else return Nothing ) #endif -perform :: Bool -> URLString -> FilePath -> CommandPerform -perform relaxed url file = ifAnnexed file addurl geturl +performWeb :: Bool -> URLString -> FilePath -> CommandPerform +performWeb relaxed url file = ifAnnexed file addurl geturl where geturl = next $ isJust <$> addUrlFile relaxed url file - addurl key - | relaxed = do - setUrlPresent key url - next $ return True - | otherwise = ifM (elem url <$> getUrls key) - ( stop - , do - (exists, samesize) <- Url.withUrlOptions $ Url.check url (keySize key) - if exists && samesize - then do - setUrlPresent key url - next $ return True - else do - warning $ "while adding a new url to an already annexed file, " ++ if exists - then "url does not have expected file size (use --relaxed to bypass this check) " ++ url - else "failed to verify url exists: " ++ url - stop - ) + addurl = addUrlChecked relaxed url webUUID checkexistssize + checkexistssize = Url.withUrlOptions . Url.check url . keySize + +addUrlChecked :: Bool -> URLString -> UUID -> (Key -> Annex (Bool, Bool)) -> Key -> CommandPerform +addUrlChecked relaxed url u checkexistssize key + | relaxed = do + setUrlPresent u key url + next $ return True + | otherwise = ifM (elem url <$> getUrls key) + ( stop + , do + (exists, samesize) <- checkexistssize key + if exists && samesize + then do + setUrlPresent u key url + next $ return True + else do + warning $ "while adding a new url to an already annexed file, " ++ if exists + then "url does not have expected file size (use --relaxed to bypass this check) " ++ url + else "failed to verify url exists: " ++ url + stop + ) addUrlFile :: Bool -> URLString -> FilePath -> Annex (Maybe Key) addUrlFile relaxed url file = do liftIO $ createDirectoryIfMissing True (parentDir file) ifM (Annex.getState Annex.fast <||> pure relaxed) ( nodownload relaxed url file - , do - showAction $ "downloading " ++ url ++ " " - download url file + , downloadWeb url file ) -download :: URLString -> FilePath -> Annex (Maybe Key) -download url file = do - {- Generate a dummy key to use for this download, before we can - - examine the file and find its real key. This allows resuming - - downloads, as the dummy key for a given url is stable. -} +downloadWeb :: URLString -> FilePath -> Annex (Maybe Key) +downloadWeb url file = do dummykey <- addSizeUrlKey url =<< Backend.URL.fromUrl url Nothing + let downloader f _ = do + showOutput + downloadUrl [url] f + showAction $ "downloading " ++ url ++ " " + downloadWith downloader dummykey webUUID url file + +{- The Key should be a dummy key, based on the URL, which is used + - for this download, before we can examine the file and find its real key. + - For resuming downloads to work, the dummy key for a given url should be + - stable. -} +downloadWith :: (FilePath -> MeterUpdate -> Annex Bool) -> Key -> UUID -> URLString -> FilePath -> Annex (Maybe Key) +downloadWith downloader dummykey u url file = prepGetViaTmpChecked dummykey Nothing $ do tmp <- fromRepo $ gitAnnexTmpObjectLocation dummykey - showOutput - ifM (runtransfer dummykey tmp) + ifM (runtransfer tmp) ( do backend <- chooseBackend file let source = KeySource @@ -184,15 +263,15 @@ download url file = do case k of Nothing -> return Nothing Just (key, _) -> do - cleanup' url file key (Just tmp) + cleanup u url file key (Just tmp) return (Just key) , return Nothing ) where - runtransfer dummykey tmp = Transfer.notifyTransfer Transfer.Download (Just file) $ - Transfer.download webUUID dummykey (Just file) Transfer.forwardRetry $ const $ do + runtransfer tmp = Transfer.notifyTransfer Transfer.Download (Just file) $ + Transfer.download webUUID dummykey (Just file) Transfer.forwardRetry $ \p -> do liftIO $ createDirectoryIfMissing True (parentDir tmp) - downloadUrl [url] tmp + downloader tmp p {- Hits the url to get the size, if available. - @@ -204,16 +283,11 @@ addSizeUrlKey url key = do size <- snd <$> Url.withUrlOptions (Url.exists url) return $ key { keySize = size } -cleanup :: URLString -> FilePath -> Key -> Maybe FilePath -> Annex Bool -cleanup url file key mtmp = do - cleanup' url file key mtmp - return True - -cleanup' :: URLString -> FilePath -> Key -> Maybe FilePath -> Annex () -cleanup' url file key mtmp = do +cleanup :: UUID -> URLString -> FilePath -> Key -> Maybe FilePath -> Annex () +cleanup u url file key mtmp = do when (isJust mtmp) $ logStatus key InfoPresent - setUrlPresent key url + setUrlPresent u key url Command.Add.addLink file key Nothing whenM isDirect $ do void $ addAssociatedFile key file @@ -230,7 +304,7 @@ nodownload relaxed url file = do if exists then do key <- Backend.URL.fromUrl url size - cleanup' url file key Nothing + cleanup webUUID url file key Nothing return (Just key) else do warning $ "unable to access url: " ++ url @@ -245,8 +319,11 @@ url2file url pathdepth pathmax = case pathdepth of | depth < 0 -> frombits $ reverse . take (negate depth) . reverse | otherwise -> error "bad --pathdepth" where - fullurl = uriRegName auth ++ uriPath url ++ uriQuery url + fullurl = concat + [ maybe "" uriRegName (uriAuthority url) + , uriPath url + , uriQuery url + ] frombits a = intercalate "/" $ a urlbits urlbits = map (truncateFilePath pathmax . sanitizeFilePath) $ filter (not . null) $ split "/" fullurl - auth = fromMaybe (error $ "bad url " ++ show url) $ uriAuthority url diff --git a/Command/ReKey.hs b/Command/ReKey.hs index a203ab8d5..5dadf4e60 100644 --- a/Command/ReKey.hs +++ b/Command/ReKey.hs @@ -16,6 +16,7 @@ import qualified Command.Add import Logs.Web import Logs.Location import Utility.CopyFile +import qualified Remote cmd :: [Command] cmd = [notDirect $ command "rekey" @@ -61,8 +62,9 @@ cleanup file oldkey newkey = do -- If the old key had some associated urls, record them for -- the new key as well. urls <- getUrls oldkey - unless (null urls) $ - mapM_ (setUrlPresent newkey) urls + forM_ urls $ \url -> do + r <- Remote.claimingUrl url + setUrlPresent (Remote.uuid r) newkey url -- Update symlink to use the new key. liftIO $ removeFile file diff --git a/Command/RmUrl.hs b/Command/RmUrl.hs index 1582d0f3f..570004266 100644 --- a/Command/RmUrl.hs +++ b/Command/RmUrl.hs @@ -10,6 +10,7 @@ module Command.RmUrl where import Common.Annex import Command import Logs.Web +import qualified Remote cmd :: [Command] cmd = [notBareRepo $ @@ -26,5 +27,9 @@ start (file, url) = flip whenAnnexed file $ \_ key -> do cleanup :: String -> Key -> CommandCleanup cleanup url key = do - setUrlMissing key url + r <- Remote.claimingUrl url + let url' = if Remote.uuid r == webUUID + then url + else setDownloader url OtherDownloader + setUrlMissing (Remote.uuid r) key url' return True diff --git a/Command/Whereis.hs b/Command/Whereis.hs index 582aaffc2..314c204be 100644 --- a/Command/Whereis.hs +++ b/Command/Whereis.hs @@ -1,6 +1,6 @@ {- git-annex command - - - Copyright 2010 Joey Hess <joey@kitenet.net> + - Copyright 2010-2014 Joey Hess <joey@kitenet.net> - - Licensed under the GNU GPL version 3 or higher. -} @@ -13,6 +13,7 @@ import Common.Annex import Command import Remote import Logs.Trust +import Logs.Web cmd :: [Command] cmd = [noCommit $ withOptions (jsonOption : keyOptions) $ @@ -57,9 +58,17 @@ perform remotemap key = do untrustedheader = "The following untrusted locations may also have copies:\n" performRemote :: Key -> Remote -> Annex () -performRemote key remote = maybe noop go $ whereisKey remote +performRemote key remote = do + ls <- (++) + <$> askremote + <*> claimedurls + unless (null ls) $ showLongNote $ unlines $ + map (\l -> name remote ++ ": " ++ l) ls where - go a = do - ls <- a key - unless (null ls) $ showLongNote $ unlines $ - map (\l -> name remote ++ ": " ++ l) ls + askremote = maybe (pure []) (flip id key) (whereisKey remote) + claimedurls = do + us <- map fst + . filter (\(_, d) -> d == OtherDownloader) + . map getDownloader + <$> getUrls key + filterM (\u -> (==) <$> pure remote <*> claimingUrl u) us diff --git a/Logs/Web.hs b/Logs/Web.hs index 1d16e10b3..c3e5c3432 100644 --- a/Logs/Web.hs +++ b/Logs/Web.hs @@ -1,6 +1,6 @@ {- Web url logs. - - - Copyright 2011, 2013 Joey Hess <joey@kitenet.net> + - Copyright 2011-2014 Joey Hess <joey@kitenet.net> - - Licensed under the GNU GPL version 3 or higher. -} @@ -9,18 +9,23 @@ module Logs.Web ( URLString, webUUID, getUrls, + getUrlsWithPrefix, setUrlPresent, setUrlMissing, knownUrls, Downloader(..), getDownloader, setDownloader, + setTempUrl, + removeTempUrl, ) where import qualified Data.ByteString.Lazy.Char8 as L +import qualified Data.Map as M import Data.Tuple.Utils import Common.Annex +import qualified Annex import Logs import Logs.Presence import Logs.Location @@ -28,8 +33,7 @@ import qualified Annex.Branch import Annex.CatFile import qualified Git import qualified Git.LsFiles - -type URLString = String +import Utility.Url -- Dummy uuid for the whole web. Do not alter. webUUID :: UUID @@ -37,7 +41,10 @@ webUUID = UUID "00000000-0000-0000-0000-000000000001" {- Gets all urls that a key might be available from. -} getUrls :: Key -> Annex [URLString] -getUrls key = go $ urlLogFile key : oldurlLogs key +getUrls key = do + l <- go $ urlLogFile key : oldurlLogs key + tmpl <- Annex.getState (maybeToList . M.lookup key . Annex.tempurls) + return (tmpl ++ l) where go [] = return [] go (l:ls) = do @@ -46,19 +53,21 @@ getUrls key = go $ urlLogFile key : oldurlLogs key then go ls else return us -setUrlPresent :: Key -> URLString -> Annex () -setUrlPresent key url = do +getUrlsWithPrefix :: Key -> String -> Annex [URLString] +getUrlsWithPrefix key prefix = filter (prefix `isPrefixOf`) <$> getUrls key + +setUrlPresent :: UUID -> Key -> URLString -> Annex () +setUrlPresent uuid key url = do us <- getUrls key unless (url `elem` us) $ do addLog (urlLogFile key) =<< logNow InfoPresent url - -- update location log to indicate that the web has the key - logChange key webUUID InfoPresent + logChange key uuid InfoPresent -setUrlMissing :: Key -> URLString -> Annex () -setUrlMissing key url = do +setUrlMissing :: UUID -> Key -> URLString -> Annex () +setUrlMissing uuid key url = do addLog (urlLogFile key) =<< logNow InfoMissing url whenM (null <$> getUrls key) $ - logChange key webUUID InfoMissing + logChange key uuid InfoMissing {- Finds all known urls. -} knownUrls :: Annex [URLString] @@ -78,18 +87,27 @@ knownUrls = do geturls Nothing = return [] geturls (Just logsha) = getLog . L.unpack <$> catObject logsha -data Downloader = DefaultDownloader | QuviDownloader +setTempUrl :: Key -> URLString -> Annex () +setTempUrl key url = Annex.changeState $ \s -> + s { Annex.tempurls = M.insert key url (Annex.tempurls s) } + +removeTempUrl :: Key -> Annex () +removeTempUrl key = Annex.changeState $ \s -> + s { Annex.tempurls = M.delete key (Annex.tempurls s) } + +data Downloader = WebDownloader | QuviDownloader | OtherDownloader + deriving (Eq) + +{- To keep track of how an url is downloaded, it's mangled slightly in + - the log. For quvi, "quvi:" is prefixed. For urls that are handled by + - some other remote, ":" is prefixed. -} +setDownloader :: URLString -> Downloader -> String +setDownloader u WebDownloader = u +setDownloader u QuviDownloader = "quvi:" ++ u +setDownloader u OtherDownloader = ":" ++ u -{- Determines the downloader for an URL. - - - - Some URLs are not downloaded by normal means, and this is indicated - - by prefixing them with downloader: when they are recorded in the url - - logs. -} getDownloader :: URLString -> (URLString, Downloader) getDownloader u = case separate (== ':') u of ("quvi", u') -> (u', QuviDownloader) - _ -> (u, DefaultDownloader) - -setDownloader :: URLString -> Downloader -> URLString -setDownloader u DefaultDownloader = u -setDownloader u QuviDownloader = "quvi:" ++ u + ("", u') -> (u', OtherDownloader) + _ -> (u, WebDownloader) @@ -45,7 +45,8 @@ module Remote ( forceTrust, logStatus, checkAvailable, - isXMPPRemote + isXMPPRemote, + claimingUrl, ) where import qualified Data.Map as M @@ -60,6 +61,7 @@ import Annex.UUID import Logs.UUID import Logs.Trust import Logs.Location hiding (logStatus) +import Logs.Web import Remote.List import Config import Git.Types (RemoteName) @@ -318,3 +320,12 @@ hasKey r k = either (Left . show) Right <$> tryNonAsync (checkPresent r k) hasKeyCheap :: Remote -> Bool hasKeyCheap = checkPresentCheap + +{- The web special remote claims urls by default. -} +claimingUrl :: URLString -> Annex Remote +claimingUrl url = do + rs <- remoteList + let web = Prelude.head $ filter (\r -> uuid r == webUUID) rs + fromMaybe web <$> firstM checkclaim rs + where + checkclaim = maybe (pure False) (flip id url) . claimUrl diff --git a/Remote/Bup.hs b/Remote/Bup.hs index 4f2ddf35a..405ce3056 100644 --- a/Remote/Bup.hs +++ b/Remote/Bup.hs @@ -74,6 +74,8 @@ gen r u c gc = do , readonly = False , mkUnavailable = return Nothing , getInfo = return [("repo", buprepo)] + , claimUrl = Nothing + , checkUrl = const $ return Nothing } return $ Just $ specialRemote' specialcfg c (simplyPrepare $ store this buprepo) diff --git a/Remote/Ddar.hs b/Remote/Ddar.hs index d73919bfd..1b8003dd8 100644 --- a/Remote/Ddar.hs +++ b/Remote/Ddar.hs @@ -71,6 +71,8 @@ gen r u c gc = do , readonly = False , mkUnavailable = return Nothing , getInfo = return [("repo", ddarrepo)] + , claimUrl = Nothing + , checkUrl = const $ return Nothing } ddarrepo = fromMaybe (error "missing ddarrepo") $ remoteAnnexDdarRepo gc specialcfg = (specialRemoteCfg c) diff --git a/Remote/Directory.hs b/Remote/Directory.hs index 2e9e013ab..fec40baa8 100644 --- a/Remote/Directory.hs +++ b/Remote/Directory.hs @@ -68,7 +68,9 @@ gen r u c gc = do remotetype = remote, mkUnavailable = gen r u c $ gc { remoteAnnexDirectory = Just "/dev/null" }, - getInfo = return [("directory", dir)] + getInfo = return [("directory", dir)], + claimUrl = Nothing, + checkUrl = const $ return Nothing } where dir = fromMaybe (error "missing directory") $ remoteAnnexDirectory gc diff --git a/Remote/External.hs b/Remote/External.hs index e907ab0cf..b6928a827 100644 --- a/Remote/External.hs +++ b/Remote/External.hs @@ -19,6 +19,7 @@ import Utility.Metered import Logs.Transfer import Logs.PreferredContent.Raw import Logs.RemoteState +import Logs.Web import Config.Cost import Annex.UUID import Creds @@ -67,8 +68,10 @@ gen r u c gc = do availability = avail, remotetype = remote, mkUnavailable = gen r u c $ - gc { remoteAnnexExternalType = Just "!dne!" } - , getInfo = return [("externaltype", externaltype)] + gc { remoteAnnexExternalType = Just "!dne!" }, + getInfo = return [("externaltype", externaltype)], + claimUrl = Just (claimurl external), + checkUrl = checkurl external } where externaltype = fromMaybe (error "missing externaltype") (remoteAnnexExternalType gc) @@ -215,6 +218,13 @@ handleRequest' lck external req mp responsehandler state <- fromMaybe "" <$> getRemoteState (externalUUID external) key send $ VALUE state + handleRemoteRequest (SETURLPRESENT key url) = + setUrlPresent (externalUUID external) key url + handleRemoteRequest (SETURLMISSING key url) = + setUrlMissing (externalUUID external) key url + handleRemoteRequest (GETURLS key prefix) = do + mapM_ (send . VALUE) =<< getUrlsWithPrefix key prefix + send (VALUE "") -- end of list handleRemoteRequest (DEBUG msg) = liftIO $ debugM "external" msg handleRemoteRequest (VERSION _) = sendMessage lck external $ ERROR "too late to send VERSION" @@ -409,3 +419,20 @@ getAvailability external r gc = maybe query return (remoteAnnexAvailability gc) _ -> Nothing setRemoteAvailability r avail return avail + +claimurl :: External -> URLString -> Annex Bool +claimurl external url = + handleRequest external (CLAIMURL url) Nothing $ \req -> case req of + CLAIMURL_SUCCESS -> Just $ return True + CLAIMURL_FAILURE -> Just $ return False + UNSUPPORTED_REQUEST -> Just $ return False + _ -> Nothing + +checkurl :: External -> URLString -> Annex (Maybe Integer) +checkurl external url = + handleRequest external (CHECKURL url) Nothing $ \req -> case req of + CHECKURL_SIZE sz -> Just $ return $ Just sz + CHECKURL_SIZEUNKNOWN -> Just $ return Nothing + CHECKURL_FAILURE errmsg -> Just $ error errmsg + UNSUPPORTED_REQUEST -> error "CHECKURL not implemented by external special remote" + _ -> Nothing diff --git a/Remote/External/Types.hs b/Remote/External/Types.hs index 3a69ae9ea..b00352702 100644 --- a/Remote/External/Types.hs +++ b/Remote/External/Types.hs @@ -39,6 +39,7 @@ import Logs.Transfer (Direction(..)) import Config.Cost (Cost) import Types.Remote (RemoteConfig) import Types.Availability (Availability(..)) +import Utility.Url (URLString) import qualified Utility.SimpleProtocol as Proto import Control.Concurrent.STM @@ -90,6 +91,8 @@ data Request | INITREMOTE | GETCOST | GETAVAILABILITY + | CLAIMURL URLString + | CHECKURL URLString | TRANSFER Direction Key FilePath | CHECKPRESENT Key | REMOVE Key @@ -106,6 +109,8 @@ instance Proto.Sendable Request where formatMessage INITREMOTE = ["INITREMOTE"] formatMessage GETCOST = ["GETCOST"] formatMessage GETAVAILABILITY = ["GETAVAILABILITY"] + formatMessage (CLAIMURL url) = [ "CLAIMURL", Proto.serialize url ] + formatMessage (CHECKURL url) = [ "CHECKURL", Proto.serialize url ] formatMessage (TRANSFER direction key file) = [ "TRANSFER" , Proto.serialize direction @@ -130,6 +135,11 @@ data Response | AVAILABILITY Availability | INITREMOTE_SUCCESS | INITREMOTE_FAILURE ErrorMsg + | CLAIMURL_SUCCESS + | CLAIMURL_FAILURE + | CHECKURL_SIZE Size + | CHECKURL_SIZEUNKNOWN + | CHECKURL_FAILURE ErrorMsg | UNSUPPORTED_REQUEST deriving (Show) @@ -147,6 +157,11 @@ instance Proto.Receivable Response where parseCommand "AVAILABILITY" = Proto.parse1 AVAILABILITY parseCommand "INITREMOTE-SUCCESS" = Proto.parse0 INITREMOTE_SUCCESS parseCommand "INITREMOTE-FAILURE" = Proto.parse1 INITREMOTE_FAILURE + parseCommand "CLAIMURL-SUCCESS" = Proto.parse0 CLAIMURL_SUCCESS + parseCommand "CLAIMURL-FAILURE" = Proto.parse0 CLAIMURL_FAILURE + parseCommand "CHECKURL-SIZE" = Proto.parse1 CHECKURL_SIZE + parseCommand "CHECKURL-SIZEUNKNOWN" = Proto.parse0 CHECKURL_SIZEUNKNOWN + parseCommand "CHECKURL-FAILURE" = Proto.parse1 CHECKURL_FAILURE parseCommand "UNSUPPORTED-REQUEST" = Proto.parse0 UNSUPPORTED_REQUEST parseCommand _ = Proto.parseFail @@ -165,6 +180,9 @@ data RemoteRequest | GETWANTED | SETSTATE Key String | GETSTATE Key + | SETURLPRESENT Key URLString + | SETURLMISSING Key URLString + | GETURLS Key String | DEBUG String deriving (Show) @@ -182,6 +200,9 @@ instance Proto.Receivable RemoteRequest where parseCommand "GETWANTED" = Proto.parse0 GETWANTED parseCommand "SETSTATE" = Proto.parse2 SETSTATE parseCommand "GETSTATE" = Proto.parse1 GETSTATE + parseCommand "SETURLPRESENT" = Proto.parse2 SETURLPRESENT + parseCommand "SETURLMISSING" = Proto.parse2 SETURLMISSING + parseCommand "GETURLS" = Proto.parse2 GETURLS parseCommand "DEBUG" = Proto.parse1 DEBUG parseCommand _ = Proto.parseFail @@ -212,6 +233,7 @@ instance Proto.Receivable AsyncMessage where type ErrorMsg = String type Setting = String type ProtocolVersion = Int +type Size = Integer supportedProtocolVersions :: [ProtocolVersion] supportedProtocolVersions = [1] @@ -240,6 +262,10 @@ instance Proto.Serializable Cost where serialize = show deserialize = readish +instance Proto.Serializable Size where + serialize = show + deserialize = readish + instance Proto.Serializable Availability where serialize GloballyAvailable = "GLOBAL" serialize LocallyAvailable = "LOCAL" diff --git a/Remote/GCrypt.hs b/Remote/GCrypt.hs index 9aa70d57e..6bf99c135 100644 --- a/Remote/GCrypt.hs +++ b/Remote/GCrypt.hs @@ -122,6 +122,8 @@ gen' r u c gc = do , remotetype = remote , mkUnavailable = return Nothing , getInfo = return $ gitRepoInfo r + , claimUrl = Nothing + , checkUrl = const $ return Nothing } return $ Just $ specialRemote' specialcfg c (simplyPrepare $ store this rsyncopts) diff --git a/Remote/Git.hs b/Remote/Git.hs index 50c34a2bb..74fb81965 100644 --- a/Remote/Git.hs +++ b/Remote/Git.hs @@ -160,6 +160,8 @@ gen r u c gc , remotetype = remote , mkUnavailable = unavailable r u c gc , getInfo = return $ gitRepoInfo r + , claimUrl = Nothing + , checkUrl = const $ return Nothing } unavailable :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote) diff --git a/Remote/Glacier.hs b/Remote/Glacier.hs index 99003f29a..17f755000 100644 --- a/Remote/Glacier.hs +++ b/Remote/Glacier.hs @@ -68,7 +68,9 @@ gen r u c gc = new <$> remoteCost gc veryExpensiveRemoteCost remotetype = remote, mkUnavailable = return Nothing, getInfo = includeCredsInfo c (AWS.creds u) $ - [ ("glacier vault", getVault c) ] + [ ("glacier vault", getVault c) ], + claimUrl = Nothing, + checkUrl = const $ return Nothing } specialcfg = (specialRemoteCfg c) -- Disabled until jobList gets support for chunks. diff --git a/Remote/Hook.hs b/Remote/Hook.hs index f7c428e99..09297a6e2 100644 --- a/Remote/Hook.hs +++ b/Remote/Hook.hs @@ -61,7 +61,9 @@ gen r u c gc = do remotetype = remote, mkUnavailable = gen r u c $ gc { remoteAnnexHookType = Just "!dne!" }, - getInfo = return [("hooktype", hooktype)] + getInfo = return [("hooktype", hooktype)], + claimUrl = Nothing, + checkUrl = const $ return Nothing } where hooktype = fromMaybe (error "missing hooktype") $ remoteAnnexHookType gc diff --git a/Remote/Rsync.hs b/Remote/Rsync.hs index a87d05a33..7a7f68165 100644 --- a/Remote/Rsync.hs +++ b/Remote/Rsync.hs @@ -84,6 +84,8 @@ gen r u c gc = do , remotetype = remote , mkUnavailable = return Nothing , getInfo = return [("url", url)] + , claimUrl = Nothing + , checkUrl = const $ return Nothing } where specialcfg = (specialRemoteCfg c) diff --git a/Remote/S3.hs b/Remote/S3.hs index 844d87902..f56904729 100644 --- a/Remote/S3.hs +++ b/Remote/S3.hs @@ -92,7 +92,9 @@ gen r u c gc = do then Just ("internet archive item", iaItemUrl $ fromMaybe "unknown" $ getBucketName c) else Nothing , Just ("partsize", maybe "unlimited" (roughSize storageUnits False) (getPartSize c)) - ] + ], + claimUrl = Nothing, + checkUrl = const $ return Nothing } s3Setup :: Maybe UUID -> Maybe CredPair -> RemoteConfig -> Annex (RemoteConfig, UUID) @@ -162,7 +164,7 @@ store r h = fileStorer $ \k f p -> do _ -> singlepartupload k f p -- Store public URL to item in Internet Archive. when (isIA (hinfo h) && not (isChunkKey k)) $ - setUrlPresent k (iaKeyUrl r k) + setUrlPresent webUUID k (iaKeyUrl r k) return True where singlepartupload k f p = do diff --git a/Remote/Tahoe.hs b/Remote/Tahoe.hs index 7dd231c06..8b56bbd50 100644 --- a/Remote/Tahoe.hs +++ b/Remote/Tahoe.hs @@ -85,7 +85,9 @@ gen r u c gc = do availability = GloballyAvailable, remotetype = remote, mkUnavailable = return Nothing, - getInfo = return [] + getInfo = return [], + claimUrl = Nothing, + checkUrl = const $ return Nothing } tahoeSetup :: Maybe UUID -> Maybe CredPair -> RemoteConfig -> Annex (RemoteConfig, UUID) diff --git a/Remote/Web.hs b/Remote/Web.hs index 4d4b43c41..3845dddf5 100644 --- a/Remote/Web.hs +++ b/Remote/Web.hs @@ -1,4 +1,4 @@ -{- Web remotes. +{- Web remote. - - Copyright 2011 Joey Hess <joey@kitenet.net> - @@ -52,7 +52,7 @@ gen r _ c gc = removeKey = dropKey, checkPresent = checkKey, checkPresentCheap = False, - whereisKey = Just getUrls, + whereisKey = Just getWebUrls, remoteFsck = Nothing, repairRepo = Nothing, config = c, @@ -63,11 +63,13 @@ gen r _ c gc = availability = GloballyAvailable, remotetype = remote, mkUnavailable = return Nothing, - getInfo = return [] + getInfo = return [], + claimUrl = Nothing, -- implicitly claims all urls + checkUrl = const $ return Nothing } downloadKey :: Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex Bool -downloadKey key _file dest _p = get =<< getUrls key +downloadKey key _file dest _p = get =<< getWebUrls key where get [] = do warning "no known url" @@ -85,7 +87,7 @@ downloadKey key _file dest _p = get =<< getUrls key warning "quvi support needed for this url" return False #endif - DefaultDownloader -> downloadUrl [u'] dest + _ -> downloadUrl [u'] dest downloadKeyCheap :: Key -> FilePath -> Annex Bool downloadKeyCheap _ _ = return False @@ -97,12 +99,12 @@ uploadKey _ _ _ = do dropKey :: Key -> Annex Bool dropKey k = do - mapM_ (setUrlMissing k) =<< getUrls k + mapM_ (setUrlMissing webUUID k) =<< getWebUrls k return True checkKey :: Key -> Annex Bool checkKey key = do - us <- getUrls key + us <- getWebUrls key if null us then return False else either error return =<< checkKey' key us @@ -117,7 +119,7 @@ checkKey' key us = firsthit us (Right False) $ \u -> do #else return $ Left "quvi support needed for this url" #endif - DefaultDownloader -> do + _ -> do Url.withUrlOptions $ catchMsgIO . Url.checkBoth u' (keySize key) where @@ -127,3 +129,9 @@ checkKey' key us = firsthit us (Right False) $ \u -> do case r of Right _ -> return r Left _ -> firsthit rest r a + +getWebUrls :: Key -> Annex [URLString] +getWebUrls key = filter supported <$> getUrls key + where + supported u = snd (getDownloader u) + `elem` [WebDownloader, QuviDownloader] diff --git a/Remote/WebDAV.hs b/Remote/WebDAV.hs index 932ed81e0..57e1dd785 100644 --- a/Remote/WebDAV.hs +++ b/Remote/WebDAV.hs @@ -73,7 +73,9 @@ gen r u c gc = new <$> remoteCost gc expensiveRemoteCost remotetype = remote, mkUnavailable = gen r u (M.insert "url" "http://!dne!/" c) gc, getInfo = includeCredsInfo c (davCreds u) $ - [("url", fromMaybe "unknown" (M.lookup "url" c))] + [("url", fromMaybe "unknown" (M.lookup "url" c))], + claimUrl = Nothing, + checkUrl = const $ return Nothing } chunkconfig = getChunkConfig c diff --git a/Types/Remote.hs b/Types/Remote.hs index 795121763..baa857906 100644 --- a/Types/Remote.hs +++ b/Types/Remote.hs @@ -29,6 +29,7 @@ import Config.Cost import Utility.Metered import Git.Types import Utility.SafeCommand +import Utility.Url type RemoteConfigKey = String type RemoteConfig = M.Map RemoteConfigKey String @@ -100,7 +101,14 @@ data RemoteA a = Remote { -- available for use. All its actions should fail. mkUnavailable :: a (Maybe (RemoteA a)), -- Information about the remote, for git annex info to display. - getInfo :: a [(String, String)] + getInfo :: a [(String, String)], + -- Some remotes can download from an url (or uri). + claimUrl :: Maybe (URLString -> a Bool), + -- Checks that the url is accessible, and gets the size of its + -- content. Returns Nothing if the url is accessible, but + -- its size cannot be determined inexpensively. + -- Throws an exception if the url is inaccessible. + checkUrl :: URLString -> a (Maybe Integer) } instance Show (RemoteA a) where diff --git a/debian/changelog b/debian/changelog index 0a28d9a12..39f165d6b 100644 --- a/debian/changelog +++ b/debian/changelog @@ -2,6 +2,11 @@ git-annex (5.20141204) UNRELEASED; urgency=medium * Webapp: When adding a new box.com remote, use the new style chunking. Thanks, Jon Ander PeƱalba. + * External special remote protocol now includes commands for setting + and getting the urls associated with a key. + * Urls can now be claimed by remotes. This will allow creating, + for example, a external special remote that handles magnet: and + *.torrent urls. -- Joey Hess <id@joeyh.name> Fri, 05 Dec 2014 13:42:08 -0400 diff --git a/doc/design/external_special_remote_protocol.mdwn b/doc/design/external_special_remote_protocol.mdwn index 4219f1193..6c06fd902 100644 --- a/doc/design/external_special_remote_protocol.mdwn +++ b/doc/design/external_special_remote_protocol.mdwn @@ -125,6 +125,16 @@ replying with `UNSUPPORTED-REQUEST` is acceptable. If the remote replies with `UNSUPPORTED-REQUEST`, its availability is assumed to be global. So, only remotes that are only reachable locally need to worry about implementing this. +* `CLAIMURL Url` + Asks the remote if it wishes to claim responsibility for downloading + an url. If so, the remote should send back an `CLAIMURL-SUCCESS` reply. + If not, it can send `CLAIMURL-FAILURE`. +* `CHECKURL Url` + Asks the remote to check if the url's content can currently be downloaded + (without downloading it). If the url is not accessible, send + `CHECKURL-FAILURE`. If the url is accessible and the size is known, + send the size in `CHECKURL-SIZE`. If the url is accessible, but the size + is unknown, send `CHECKURL-SIZEUNOWN`. More optional requests may be added, without changing the protocol version, so if an unknown request is seen, reply with `UNSUPPORTED-REQUEST`. @@ -167,6 +177,18 @@ while it's handling a request. Indicates the INITREMOTE succeeded and the remote is ready to use. * `INITREMOTE-FAILURE ErrorMsg` Indicates that INITREMOTE failed. +* `CLAIMURL-SUCCESS` + Indicates that the CLAIMURL url will be handled by this remote. +* `CLAIMURL-FAILURE` + Indicates that the CLAIMURL url wil not be handled by this remote. +* `CHECKURL-SIZE Size` + Indicates that the requested url has been verified to exist, + and its size is known. The size is in bytes. +* `CHECKURL-SIZEUNKNOWN` + Indicates that the requested url has been verified to exist, + but its size could not be determined. +* `CHECKURL-FAILURE` + Indicates that the requested url could not be accessed. * `UNSUPPORTED-REQUEST` Indicates that the special remote does not know how to handle a request. @@ -247,6 +269,17 @@ in control. * `GETSTATE Key` Gets any state that has been stored for the key. (git-annex replies with VALUE followed by the state.) +* `SETURLPRESENT Key Url` + Records an url (or uri) where the Key can be downloaded from. +* `SETURLMISSING Key Url` + Records that the key can no longer be downloaded from the specified + url (or uri). +* `GETURLS Key Prefix` + Gets the recorded urls where a Key can be downloaded from. + Only urls that start with the Prefix will be returned. The Prefix + may be empty to get all urls. + (git-annex replies one or more times with VALUE for each url. + The final VALUE has an empty value, indicating the end of the url list.) * `DEBUG message` Tells git-annex to display the message if --debug is enabled. @@ -288,7 +321,5 @@ start a new process the next time it needs to use a remote. the remote. However, \n and probably \0 need to be escaped somehow in the file data, which adds complication. * uuid discovery during INITREMOTE. -* Support for getting and setting the list of urls that can be associated - with a key. * Hook into webapp. Needs a way to provide some kind of prompt to the user in the webapp, etc. diff --git a/doc/devblog/day_237__extending_addurl.mdwn b/doc/devblog/day_237__extending_addurl.mdwn new file mode 100644 index 000000000..e0129398e --- /dev/null +++ b/doc/devblog/day_237__extending_addurl.mdwn @@ -0,0 +1,14 @@ +Worked on [[todo/extensible_addurl]] today. When `git annex addurl` is run, +remotes will be asked if they claim the url, and whichever remote does will +be used to download it, and location tracking will indicate that remote +contains the object. This is a masive 1000 line patch touching 30 files, +including follow-on changes in `rmurl` and `whereis` and even `rekey`. + +It should now be possible to build an external special remote that handles +*.torrent and magnet: urls and passes them off to a bittorrent client for +download, for example. + +Another use for this would be to make an external special remote that +uses youtube-dl or some other program than quvi for downloading web videos. +The builtin quvi support could probably be moved out of the web special +remote, to a separate remote. I haven't tried to do that yet. diff --git a/doc/internals.mdwn b/doc/internals.mdwn index 9970a0bbd..b8cb559b6 100644 --- a/doc/internals.mdwn +++ b/doc/internals.mdwn @@ -182,8 +182,9 @@ Example: ## `aaa/bbb/*.log.web` These log files record urls used by the -[[web_special_remote|special_remotes/web]]. Their format is similar -to the location tracking files, but with urls rather than UUIDs. +[[web_special_remote|special_remotes/web]] and sometimes by other remotes. +Their format is similar to the location tracking files, but with urls +rather than UUIDs. ## `aaa/bbb/*.log.rmt` diff --git a/doc/todo/extensible_addurl.mdwn b/doc/todo/extensible_addurl.mdwn index 6eb090305..44b19fef0 100644 --- a/doc/todo/extensible_addurl.mdwn +++ b/doc/todo/extensible_addurl.mdwn @@ -7,16 +7,67 @@ from scientific data repositories that use their own APIs. The basic idea is to have external special remotes (or perhaps built-in ones in some cases), which addurl can use to download an object, referred -to by some uri-like thing. The uri starts with "$downloader:" +to by some uri-like thing. The uri starts with "$downloader:" to indicate +that it's not a regular url and so is not handled by the web special +remote. git annex addurl torrent:$foo git annex addurl CERN:$bar Problem: This requires mapping from the name of the downloader, which is probably the same as the git-annex-remote-$downloader program implementing -the special remote protocol, to the UUID of a remote. That's assuming we -want location tracking to be able to know that a file is both available -from CERN and from a torrent, for example. +the special remote protocol (but not always), to the UUID of a remote. +That's assuming we want location tracking to be able to know that a file is +both available from CERN and from a torrent, for example. + +Solution: Add a new method to remotes: + + claimUrl :: Maybe (URLString -> Annex Bool) + +Remotes that implement this method (including special remotes) will +be queried when such an uri is added, to see which claims it. + +Once the remote is known, addurl --file will record that the Key is present +on that remote, and record the uri in the url log. + +---- + +What about using addurl to add a new file? In this mode, the Key is not yet +known. addurl currently handles this by generating a dummy Key for the url +(hitting the url to get its size), and running a Transfer using the dummy +key that downloads from the web. Once the download is done, the dummy Key +is upgraded to the final Key. + +Something similar could be done for other remotes, but the url log for the +dummy key would need to have the url added to it, for the remote to know +what to download, and then that could be removed after the download. Which +causes ugly churn in git, and would leave a mess if interrupted. + +One option is to add another new method to remotes: + + downloadUrl :: Maybe (URLString -> Annex FilePath) + +Or, the url log could have support added for recording temporary key +urls in memory. (done) + +Another problem is that the size of the Key isn't known. addurl +could always operate in relaxed mode, where it generates a size-less Key. +Or, yet another method could be added: (done) + + sizeUrl :: URLString -> Annex (Maybe Integer) + +---- + +Retrieval of the Key works more or less as usual. The only +difference being that remotes that support this interface can look +at the url log to find the one with the right "$downloader:" prefix, +and so know where to download from. (Much as the web special remote already +does.) + +Prerequisite: Expand the external special remote interface to support +accessing the url log. (done) + +---- It would also be nice to be able to easily configure a regexp that normal urls, if they match, are made to use a particular downloader. So, for @@ -29,7 +80,9 @@ special remote interface, and let a downloader be specified simply by: git config annex.downloader.torrent.command 'aria2c %url $file' -In this case, the UUID used would be the UUID of the web special remote, I -suppose? +This could be implemented in either the web special remote or even in an +external special remote. Some other discussion at <https://github.com/datalad/datalad/issues/10> + +> [[done]]! --[[Joey]] |