diff options
-rw-r--r-- | Remote/List.hs | 6 | ||||
-rw-r--r-- | Remote/Tahoe.hs | 229 | ||||
-rw-r--r-- | Types/GitConfig.hs | 2 | ||||
-rw-r--r-- | debian/changelog | 6 | ||||
-rw-r--r-- | doc/git-annex.mdwn | 5 | ||||
-rw-r--r-- | doc/special_remotes.mdwn | 2 | ||||
-rw-r--r-- | doc/special_remotes/tahoe.mdwn | 43 | ||||
-rw-r--r-- | git-annex.cabal | 7 |
8 files changed, 299 insertions, 1 deletions
diff --git a/Remote/List.hs b/Remote/List.hs index cc7019850..31a9209b1 100644 --- a/Remote/List.hs +++ b/Remote/List.hs @@ -34,6 +34,9 @@ import qualified Remote.Web #ifdef WITH_WEBDAV import qualified Remote.WebDAV #endif +#ifdef WITH_TAHOE +import qualified Remote.Tahoe +#endif import qualified Remote.Glacier import qualified Remote.Hook import qualified Remote.External @@ -52,6 +55,9 @@ remoteTypes = #ifdef WITH_WEBDAV , Remote.WebDAV.remote #endif +#ifdef WITH_TAHOE + , Remote.Tahoe.remote +#endif , Remote.Glacier.remote , Remote.Hook.remote , Remote.External.remote diff --git a/Remote/Tahoe.hs b/Remote/Tahoe.hs new file mode 100644 index 000000000..48dd569b4 --- /dev/null +++ b/Remote/Tahoe.hs @@ -0,0 +1,229 @@ +{- Tahoe-LAFS special remotes. + - + - Tahoe capabilities for accessing objects stored in the remote + - are preserved in the remote state log. + - + - In order to allow multiple clones of a repository to access the same + - tahoe repository, git-annex needs to store the introducer furl, + - and the shared-convergence-secret. These are stored in the remote + - configuration, when embedcreds is enabled. + - + - Using those creds, git-annex sets up a tahoe configuration directory in + - ~/.tahoe/git-annex/UUID/ + - + - Tahoe has its own encryption, so git-annex's encryption is not used. + - + - Copyright 2013 Joey Hess <joey@kitenet.net> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +{-# LANGUAGE OverloadedStrings #-} + +module Remote.Tahoe (remote) where + +import qualified Data.Map as M +import Data.Aeson +import Data.ByteString.Lazy.UTF8 (fromString) + +import Common.Annex +import Types.Remote +import qualified Git +import Config +import Config.Cost +import Remote.Helper.Special +import Annex.UUID +import Annex.Content +import Logs.RemoteState +import Utility.UserInfo +import Utility.Metered +import Utility.Env + +type TahoeConfigDir = FilePath +type SharedConvergenceSecret = String +type IntroducerFurl = String +type Capability = String + +remote :: RemoteType +remote = RemoteType { + typename = "tahoe", + enumerate = findSpecialRemotes "tahoe", + generate = gen, + setup = tahoeSetup +} + +gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote) +gen r u c gc = do + cst <- remoteCost gc expensiveRemoteCost + configdir <- liftIO $ maybe (defaultTahoeConfigDir u) return (remoteAnnexTahoe gc) + return $ Just $ Remote { + uuid = u, + cost = cst, + name = Git.repoDescribe r, + storeKey = store u configdir, + retrieveKeyFile = retrieve u configdir, + retrieveKeyFileCheap = \_ _ -> return False, + removeKey = remove, + hasKey = checkPresent u configdir, + hasKeyCheap = False, + whereisKey = Nothing, + remoteFsck = Nothing, + repairRepo = Nothing, + config = c, + repo = r, + gitconfig = gc, + localpath = Nothing, + readonly = False, + globallyAvailable = True, + remotetype = remote + } + +tahoeSetup :: Maybe UUID -> RemoteConfig -> Annex (RemoteConfig, UUID) +tahoeSetup mu c = do + furl <- fromMaybe (fromMaybe missingfurl $ M.lookup furlk c) + <$> liftIO (getEnv "TAHOE_FURL") + u <- maybe (liftIO genUUID) return mu + configdir <- liftIO $ defaultTahoeConfigDir u + scs <- liftIO $ tahoeConfigure configdir furl (M.lookup scsk c) + let c' = if M.lookup "embedcreds" c == Just "yes" + then flip M.union c $ M.fromList + [ (furlk, furl) + , (scsk, scs) + ] + else c + gitConfigSpecialRemote u c' "tahoe" configdir + return (c', u) + where + scsk = "shared-convergence-secret" + furlk = "introducer-furl" + missingfurl = error "Set TAHOE_FURL to the introducer furl to use." + +store :: UUID -> TahoeConfigDir -> Key -> AssociatedFile -> MeterUpdate -> Annex Bool +store u configdir k _f _p = sendAnnex k noop $ \src -> do + liftIO $ startTahoeDaemon configdir + parsePut <$> liftIO (readTahoe configdir "put" [File src]) >>= maybe + (return False) + (\cap -> storeCapability u k cap >> return True) + +retrieve :: UUID -> TahoeConfigDir -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex Bool +retrieve u configdir k _f d _p = go =<< getCapability u k + where + go Nothing = return False + go (Just cap) = liftIO $ do + startTahoeDaemon configdir + boolTahoe configdir "get" [Param cap, File d] + +remove :: Key -> Annex Bool +remove _k = do + warning "content cannot be removed from tahoe remote" + return False + +checkPresent :: UUID -> TahoeConfigDir -> Key -> Annex (Either String Bool) +checkPresent u configdir k = go =<< getCapability u k + where + go Nothing = return (Right False) + go (Just cap) = liftIO $ do + startTahoeDaemon configdir + parseCheck <$> readTahoe configdir "check" + [ Param "--raw" + , Param cap + ] + +defaultTahoeConfigDir :: UUID -> IO TahoeConfigDir +defaultTahoeConfigDir u = do + h <- myHomeDir + return $ h </> ".tahoe" </> "git-annex" </> fromUUID u + +tahoeConfigure :: TahoeConfigDir -> IntroducerFurl -> Maybe SharedConvergenceSecret -> IO SharedConvergenceSecret +tahoeConfigure configdir furl mscs = do + unlessM (createClient configdir furl) $ + error "tahoe create-client failed" + maybe noop (writeSharedConvergenceSecret configdir) mscs + startTahoeDaemon configdir + getSharedConvergenceSecret configdir + +createClient :: TahoeConfigDir -> IntroducerFurl -> IO Bool +createClient configdir furl = do + createDirectoryIfMissing True (parentDir configdir) + boolTahoe configdir "create-client" + [ Param "--nickname", Param "git-annex" + , Param "--introducer", Param furl + ] + +writeSharedConvergenceSecret :: TahoeConfigDir -> SharedConvergenceSecret -> IO () +writeSharedConvergenceSecret configdir scs = + writeFile (convergenceFile configdir) (unlines [scs]) + +{- The tahoe daemon writes the convergenceFile shortly after it starts + - (it does not need to connect to the network). So, try repeatedly to read + - the file, for up to 1 minute. To avoid reading a partially written + - file, look for the newline after the value. -} +getSharedConvergenceSecret :: TahoeConfigDir -> IO SharedConvergenceSecret +getSharedConvergenceSecret configdir = go (60 :: Int) + where + f = convergenceFile configdir + go n + | n == 0 = error $ "tahoe did not write " ++ f ++ " after 1 minute. Perhaps the daemon failed to start?" + | otherwise = do + v <- catchMaybeIO (readFile f) + case v of + Just s | "\n" `isSuffixOf` s || "\r" `isSuffixOf` s -> + return $ takeWhile (`notElem` "\n\r") s + _ -> go (n - 1) + +convergenceFile :: TahoeConfigDir -> FilePath +convergenceFile configdir = configdir </> "private" </> "convergence" + +{- XXX Avoid starting tahoe if it is already running. -} +startTahoeDaemon :: TahoeConfigDir -> IO () +startTahoeDaemon configdir = void $ boolTahoe configdir "start" [] + +boolTahoe :: TahoeConfigDir -> String -> [CommandParam] -> IO Bool +boolTahoe configdir command params = boolSystem "tahoe" $ + tahoeParams configdir command params + +readTahoe :: TahoeConfigDir -> String -> [CommandParam] -> IO String +readTahoe configdir command params = catchDefaultIO "" $ + readProcess "tahoe" $ toCommand $ + tahoeParams configdir command params + +tahoeParams :: TahoeConfigDir -> String -> [CommandParam] -> [CommandParam] +tahoeParams configdir command params = + Param command : Param "-d" : File configdir : params + +storeCapability :: UUID -> Key -> Capability -> Annex () +storeCapability u k cap = setRemoteState u k cap + +getCapability :: UUID -> Key -> Annex (Maybe Capability) +getCapability u k = getRemoteState u k + +{- tahoe put outputs a single line, containing the capability. -} +parsePut :: String -> Maybe Capability +parsePut s = case lines s of + [cap] | "URI" `isPrefixOf` cap -> Just cap + _ -> Nothing + +{- tahoe check --raw outputs a json document. + - Its contents will vary (for LIT capabilities, it lacks most info), + - but should always contain a results object with a healthy value + - that's true or false. + -} +parseCheck :: String -> Either String Bool +parseCheck s = maybe parseerror (Right . healthy . results) (decode $ fromString s) + where + parseerror + | null s = Left "tahoe check failed to run" + | otherwise = Left "unable to parse tahoe check output" + +data CheckRet = CheckRet { results :: Results } +data Results = Results { healthy :: Bool } + +instance FromJSON CheckRet where + parseJSON (Object v) = CheckRet + <$> v .: "results" + parseJSON _ = mzero + +instance FromJSON Results where + parseJSON (Object v) = Results + <$> v .: "healthy" + parseJSON _ = mzero diff --git a/Types/GitConfig.hs b/Types/GitConfig.hs index fad5127ed..8623258a1 100644 --- a/Types/GitConfig.hs +++ b/Types/GitConfig.hs @@ -110,6 +110,7 @@ data RemoteGitConfig = RemoteGitConfig , remoteAnnexGnupgOptions :: [String] , remoteAnnexRsyncUrl :: Maybe String , remoteAnnexBupRepo :: Maybe String + , remoteAnnexTahoe :: Maybe FilePath , remoteAnnexBupSplitOptions :: [String] , remoteAnnexDirectory :: Maybe FilePath , remoteAnnexGCrypt :: Maybe String @@ -136,6 +137,7 @@ extractRemoteGitConfig r remotename = RemoteGitConfig , remoteAnnexGnupgOptions = getoptions "gnupg-options" , remoteAnnexRsyncUrl = notempty $ getmaybe "rsyncurl" , remoteAnnexBupRepo = getmaybe "buprepo" + , remoteAnnexTahoe = getmaybe "tahoe" , remoteAnnexBupSplitOptions = getoptions "bup-split-options" , remoteAnnexDirectory = notempty $ getmaybe "directory" , remoteAnnexGCrypt = notempty $ getmaybe "gcrypt" diff --git a/debian/changelog b/debian/changelog index 8a973a29e..fc1791d19 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,9 @@ +git-annex (5.20140108) UNRELEASED; urgency=medium + + * Added tahoe special remote. + + -- Joey Hess <joeyh@debian.org> Wed, 08 Jan 2014 13:13:54 -0400 + git-annex (5.20140107) unstable; urgency=medium * mirror: Support --all (and --unused). diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index 27d4df93a..2071f515c 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -1401,6 +1401,11 @@ Here are all the supported configuration settings. Used to identify webdav special remotes. Normally this is automatically set up by `git annex initremote`. +* `remote.<name>.tahoe` + + Used to identify tahoe special remotes. + Points to the configuration directory for tahoe. + * `remote.<name>.annex-xmppaddress` Used to identify the XMPP address of a Jabber buddy. diff --git a/doc/special_remotes.mdwn b/doc/special_remotes.mdwn index 1a87f1a19..02f9bd135 100644 --- a/doc/special_remotes.mdwn +++ b/doc/special_remotes.mdwn @@ -14,6 +14,7 @@ They cannot be used by other git commands though. * [[directory]] * [[rsync]] * [[webdav]] +* [[tahoe]] * [[web]] * [[xmpp]] * [[hook]] @@ -27,7 +28,6 @@ for using git-annex with various services: * [[Amazon_S3|tips/using_Amazon_S3]] * [[Amazon_Glacier|tips/using_Amazon_Glacier]] * [[tips/Internet_Archive_via_S3]] -* [[tahoe-lafs|forum/tips:_special__95__remotes__47__hook_with_tahoe-lafs]] * [[Box.com|tips/using_box.com_as_a_special_remote]] * [[Google drive|tips/googledriveannex]] * [[Google Cloud Storage|tips/using_Google_Cloud_Storage]] diff --git a/doc/special_remotes/tahoe.mdwn b/doc/special_remotes/tahoe.mdwn new file mode 100644 index 000000000..ad87044a4 --- /dev/null +++ b/doc/special_remotes/tahoe.mdwn @@ -0,0 +1,43 @@ +This special remote stores file contents using +[Tahoe-LAFS](http://tahoe-lafs.org/). There are a number of commercial +providers, or you can build your own tahoe storage grid. + +Since Tahoe-LAFS encrypts all data stored in it, git-annex does not do any +additional encryption of its own. + +Note that data stored in a tahoe remote cannot be dropped from it, as +Tahoe-LAFS does not support removing data once it is stored in the Tahoe grid. +This, along with Tahoe's ability to recover data when some nodes fail, +makes a tahoe special remote an excellent choice for storing backups. + +Typically you will have an account on a Tahoe-LAFS storage grid, which +is represented by an "introducer furl". You need to supply this to +git-annex in the `TAHOE_FURL` environment variable when initializing the +remote. git-annex will then generate a tahoe configuration directory for +the remote under `~/.tahoe/git-annex/`, and automatically start the tahoe +daemon as needed. + +## configuration + +These parameters can be passed to `git annex initremote` to configure +the tahoe remote. + +* `embedcreds` - Optional. Set to "yes" embed the tahoe credentials + (specifically the introducer furl and shared-convergence-secret) + inside the git repository, which allows other clones to also use them + in order to access the tahoe grid. + + Think carefully about who can access your git repository, and + whether you want to give them access to your tahoe system before + using embedcreds! + +Setup example: + + # TAHOE_FURL=... git annex initremote tahoe type=tahoe embedcreds=yes + +---- + +An older implementation of tahoe for git-annex used +the hook special remote. It is not compatible with this newer +implementation. See +[[tahoe-lafs|forum/tips:_special__95__remotes__47__hook_with_tahoe-lafs]]. diff --git a/git-annex.cabal b/git-annex.cabal index 6c6fb1f31..81afb5a48 100644 --- a/git-annex.cabal +++ b/git-annex.cabal @@ -75,6 +75,9 @@ Flag Feed Flag Quvi Description: Enable use of quvi to download videos +Flag Tahoe + Description: Enable the tahoe special remote + Flag CryptoHash Description: Enable use of cryptohash for checksumming @@ -192,6 +195,10 @@ Executable git-annex if flag(Quvi) Build-Depends: aeson CPP-Options: -DWITH_QUVI + + if flag(Tahoe) + Build-Depends: aeson + CPP-Options: -DWITH_TAHOE if flag(EKG) Build-Depends: ekg |