aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Remote/List.hs6
-rw-r--r--Remote/Tahoe.hs229
-rw-r--r--Types/GitConfig.hs2
-rw-r--r--debian/changelog6
-rw-r--r--doc/git-annex.mdwn5
-rw-r--r--doc/special_remotes.mdwn2
-rw-r--r--doc/special_remotes/tahoe.mdwn43
-rw-r--r--git-annex.cabal7
8 files changed, 299 insertions, 1 deletions
diff --git a/Remote/List.hs b/Remote/List.hs
index cc7019850..31a9209b1 100644
--- a/Remote/List.hs
+++ b/Remote/List.hs
@@ -34,6 +34,9 @@ import qualified Remote.Web
#ifdef WITH_WEBDAV
import qualified Remote.WebDAV
#endif
+#ifdef WITH_TAHOE
+import qualified Remote.Tahoe
+#endif
import qualified Remote.Glacier
import qualified Remote.Hook
import qualified Remote.External
@@ -52,6 +55,9 @@ remoteTypes =
#ifdef WITH_WEBDAV
, Remote.WebDAV.remote
#endif
+#ifdef WITH_TAHOE
+ , Remote.Tahoe.remote
+#endif
, Remote.Glacier.remote
, Remote.Hook.remote
, Remote.External.remote
diff --git a/Remote/Tahoe.hs b/Remote/Tahoe.hs
new file mode 100644
index 000000000..48dd569b4
--- /dev/null
+++ b/Remote/Tahoe.hs
@@ -0,0 +1,229 @@
+{- Tahoe-LAFS special remotes.
+ -
+ - Tahoe capabilities for accessing objects stored in the remote
+ - are preserved in the remote state log.
+ -
+ - In order to allow multiple clones of a repository to access the same
+ - tahoe repository, git-annex needs to store the introducer furl,
+ - and the shared-convergence-secret. These are stored in the remote
+ - configuration, when embedcreds is enabled.
+ -
+ - Using those creds, git-annex sets up a tahoe configuration directory in
+ - ~/.tahoe/git-annex/UUID/
+ -
+ - Tahoe has its own encryption, so git-annex's encryption is not used.
+ -
+ - Copyright 2013 Joey Hess <joey@kitenet.net>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+{-# LANGUAGE OverloadedStrings #-}
+
+module Remote.Tahoe (remote) where
+
+import qualified Data.Map as M
+import Data.Aeson
+import Data.ByteString.Lazy.UTF8 (fromString)
+
+import Common.Annex
+import Types.Remote
+import qualified Git
+import Config
+import Config.Cost
+import Remote.Helper.Special
+import Annex.UUID
+import Annex.Content
+import Logs.RemoteState
+import Utility.UserInfo
+import Utility.Metered
+import Utility.Env
+
+type TahoeConfigDir = FilePath
+type SharedConvergenceSecret = String
+type IntroducerFurl = String
+type Capability = String
+
+remote :: RemoteType
+remote = RemoteType {
+ typename = "tahoe",
+ enumerate = findSpecialRemotes "tahoe",
+ generate = gen,
+ setup = tahoeSetup
+}
+
+gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote)
+gen r u c gc = do
+ cst <- remoteCost gc expensiveRemoteCost
+ configdir <- liftIO $ maybe (defaultTahoeConfigDir u) return (remoteAnnexTahoe gc)
+ return $ Just $ Remote {
+ uuid = u,
+ cost = cst,
+ name = Git.repoDescribe r,
+ storeKey = store u configdir,
+ retrieveKeyFile = retrieve u configdir,
+ retrieveKeyFileCheap = \_ _ -> return False,
+ removeKey = remove,
+ hasKey = checkPresent u configdir,
+ hasKeyCheap = False,
+ whereisKey = Nothing,
+ remoteFsck = Nothing,
+ repairRepo = Nothing,
+ config = c,
+ repo = r,
+ gitconfig = gc,
+ localpath = Nothing,
+ readonly = False,
+ globallyAvailable = True,
+ remotetype = remote
+ }
+
+tahoeSetup :: Maybe UUID -> RemoteConfig -> Annex (RemoteConfig, UUID)
+tahoeSetup mu c = do
+ furl <- fromMaybe (fromMaybe missingfurl $ M.lookup furlk c)
+ <$> liftIO (getEnv "TAHOE_FURL")
+ u <- maybe (liftIO genUUID) return mu
+ configdir <- liftIO $ defaultTahoeConfigDir u
+ scs <- liftIO $ tahoeConfigure configdir furl (M.lookup scsk c)
+ let c' = if M.lookup "embedcreds" c == Just "yes"
+ then flip M.union c $ M.fromList
+ [ (furlk, furl)
+ , (scsk, scs)
+ ]
+ else c
+ gitConfigSpecialRemote u c' "tahoe" configdir
+ return (c', u)
+ where
+ scsk = "shared-convergence-secret"
+ furlk = "introducer-furl"
+ missingfurl = error "Set TAHOE_FURL to the introducer furl to use."
+
+store :: UUID -> TahoeConfigDir -> Key -> AssociatedFile -> MeterUpdate -> Annex Bool
+store u configdir k _f _p = sendAnnex k noop $ \src -> do
+ liftIO $ startTahoeDaemon configdir
+ parsePut <$> liftIO (readTahoe configdir "put" [File src]) >>= maybe
+ (return False)
+ (\cap -> storeCapability u k cap >> return True)
+
+retrieve :: UUID -> TahoeConfigDir -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex Bool
+retrieve u configdir k _f d _p = go =<< getCapability u k
+ where
+ go Nothing = return False
+ go (Just cap) = liftIO $ do
+ startTahoeDaemon configdir
+ boolTahoe configdir "get" [Param cap, File d]
+
+remove :: Key -> Annex Bool
+remove _k = do
+ warning "content cannot be removed from tahoe remote"
+ return False
+
+checkPresent :: UUID -> TahoeConfigDir -> Key -> Annex (Either String Bool)
+checkPresent u configdir k = go =<< getCapability u k
+ where
+ go Nothing = return (Right False)
+ go (Just cap) = liftIO $ do
+ startTahoeDaemon configdir
+ parseCheck <$> readTahoe configdir "check"
+ [ Param "--raw"
+ , Param cap
+ ]
+
+defaultTahoeConfigDir :: UUID -> IO TahoeConfigDir
+defaultTahoeConfigDir u = do
+ h <- myHomeDir
+ return $ h </> ".tahoe" </> "git-annex" </> fromUUID u
+
+tahoeConfigure :: TahoeConfigDir -> IntroducerFurl -> Maybe SharedConvergenceSecret -> IO SharedConvergenceSecret
+tahoeConfigure configdir furl mscs = do
+ unlessM (createClient configdir furl) $
+ error "tahoe create-client failed"
+ maybe noop (writeSharedConvergenceSecret configdir) mscs
+ startTahoeDaemon configdir
+ getSharedConvergenceSecret configdir
+
+createClient :: TahoeConfigDir -> IntroducerFurl -> IO Bool
+createClient configdir furl = do
+ createDirectoryIfMissing True (parentDir configdir)
+ boolTahoe configdir "create-client"
+ [ Param "--nickname", Param "git-annex"
+ , Param "--introducer", Param furl
+ ]
+
+writeSharedConvergenceSecret :: TahoeConfigDir -> SharedConvergenceSecret -> IO ()
+writeSharedConvergenceSecret configdir scs =
+ writeFile (convergenceFile configdir) (unlines [scs])
+
+{- The tahoe daemon writes the convergenceFile shortly after it starts
+ - (it does not need to connect to the network). So, try repeatedly to read
+ - the file, for up to 1 minute. To avoid reading a partially written
+ - file, look for the newline after the value. -}
+getSharedConvergenceSecret :: TahoeConfigDir -> IO SharedConvergenceSecret
+getSharedConvergenceSecret configdir = go (60 :: Int)
+ where
+ f = convergenceFile configdir
+ go n
+ | n == 0 = error $ "tahoe did not write " ++ f ++ " after 1 minute. Perhaps the daemon failed to start?"
+ | otherwise = do
+ v <- catchMaybeIO (readFile f)
+ case v of
+ Just s | "\n" `isSuffixOf` s || "\r" `isSuffixOf` s ->
+ return $ takeWhile (`notElem` "\n\r") s
+ _ -> go (n - 1)
+
+convergenceFile :: TahoeConfigDir -> FilePath
+convergenceFile configdir = configdir </> "private" </> "convergence"
+
+{- XXX Avoid starting tahoe if it is already running. -}
+startTahoeDaemon :: TahoeConfigDir -> IO ()
+startTahoeDaemon configdir = void $ boolTahoe configdir "start" []
+
+boolTahoe :: TahoeConfigDir -> String -> [CommandParam] -> IO Bool
+boolTahoe configdir command params = boolSystem "tahoe" $
+ tahoeParams configdir command params
+
+readTahoe :: TahoeConfigDir -> String -> [CommandParam] -> IO String
+readTahoe configdir command params = catchDefaultIO "" $
+ readProcess "tahoe" $ toCommand $
+ tahoeParams configdir command params
+
+tahoeParams :: TahoeConfigDir -> String -> [CommandParam] -> [CommandParam]
+tahoeParams configdir command params =
+ Param command : Param "-d" : File configdir : params
+
+storeCapability :: UUID -> Key -> Capability -> Annex ()
+storeCapability u k cap = setRemoteState u k cap
+
+getCapability :: UUID -> Key -> Annex (Maybe Capability)
+getCapability u k = getRemoteState u k
+
+{- tahoe put outputs a single line, containing the capability. -}
+parsePut :: String -> Maybe Capability
+parsePut s = case lines s of
+ [cap] | "URI" `isPrefixOf` cap -> Just cap
+ _ -> Nothing
+
+{- tahoe check --raw outputs a json document.
+ - Its contents will vary (for LIT capabilities, it lacks most info),
+ - but should always contain a results object with a healthy value
+ - that's true or false.
+ -}
+parseCheck :: String -> Either String Bool
+parseCheck s = maybe parseerror (Right . healthy . results) (decode $ fromString s)
+ where
+ parseerror
+ | null s = Left "tahoe check failed to run"
+ | otherwise = Left "unable to parse tahoe check output"
+
+data CheckRet = CheckRet { results :: Results }
+data Results = Results { healthy :: Bool }
+
+instance FromJSON CheckRet where
+ parseJSON (Object v) = CheckRet
+ <$> v .: "results"
+ parseJSON _ = mzero
+
+instance FromJSON Results where
+ parseJSON (Object v) = Results
+ <$> v .: "healthy"
+ parseJSON _ = mzero
diff --git a/Types/GitConfig.hs b/Types/GitConfig.hs
index fad5127ed..8623258a1 100644
--- a/Types/GitConfig.hs
+++ b/Types/GitConfig.hs
@@ -110,6 +110,7 @@ data RemoteGitConfig = RemoteGitConfig
, remoteAnnexGnupgOptions :: [String]
, remoteAnnexRsyncUrl :: Maybe String
, remoteAnnexBupRepo :: Maybe String
+ , remoteAnnexTahoe :: Maybe FilePath
, remoteAnnexBupSplitOptions :: [String]
, remoteAnnexDirectory :: Maybe FilePath
, remoteAnnexGCrypt :: Maybe String
@@ -136,6 +137,7 @@ extractRemoteGitConfig r remotename = RemoteGitConfig
, remoteAnnexGnupgOptions = getoptions "gnupg-options"
, remoteAnnexRsyncUrl = notempty $ getmaybe "rsyncurl"
, remoteAnnexBupRepo = getmaybe "buprepo"
+ , remoteAnnexTahoe = getmaybe "tahoe"
, remoteAnnexBupSplitOptions = getoptions "bup-split-options"
, remoteAnnexDirectory = notempty $ getmaybe "directory"
, remoteAnnexGCrypt = notempty $ getmaybe "gcrypt"
diff --git a/debian/changelog b/debian/changelog
index 8a973a29e..fc1791d19 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,9 @@
+git-annex (5.20140108) UNRELEASED; urgency=medium
+
+ * Added tahoe special remote.
+
+ -- Joey Hess <joeyh@debian.org> Wed, 08 Jan 2014 13:13:54 -0400
+
git-annex (5.20140107) unstable; urgency=medium
* mirror: Support --all (and --unused).
diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn
index 27d4df93a..2071f515c 100644
--- a/doc/git-annex.mdwn
+++ b/doc/git-annex.mdwn
@@ -1401,6 +1401,11 @@ Here are all the supported configuration settings.
Used to identify webdav special remotes.
Normally this is automatically set up by `git annex initremote`.
+* `remote.<name>.tahoe`
+
+ Used to identify tahoe special remotes.
+ Points to the configuration directory for tahoe.
+
* `remote.<name>.annex-xmppaddress`
Used to identify the XMPP address of a Jabber buddy.
diff --git a/doc/special_remotes.mdwn b/doc/special_remotes.mdwn
index 1a87f1a19..02f9bd135 100644
--- a/doc/special_remotes.mdwn
+++ b/doc/special_remotes.mdwn
@@ -14,6 +14,7 @@ They cannot be used by other git commands though.
* [[directory]]
* [[rsync]]
* [[webdav]]
+* [[tahoe]]
* [[web]]
* [[xmpp]]
* [[hook]]
@@ -27,7 +28,6 @@ for using git-annex with various services:
* [[Amazon_S3|tips/using_Amazon_S3]]
* [[Amazon_Glacier|tips/using_Amazon_Glacier]]
* [[tips/Internet_Archive_via_S3]]
-* [[tahoe-lafs|forum/tips:_special__95__remotes__47__hook_with_tahoe-lafs]]
* [[Box.com|tips/using_box.com_as_a_special_remote]]
* [[Google drive|tips/googledriveannex]]
* [[Google Cloud Storage|tips/using_Google_Cloud_Storage]]
diff --git a/doc/special_remotes/tahoe.mdwn b/doc/special_remotes/tahoe.mdwn
new file mode 100644
index 000000000..ad87044a4
--- /dev/null
+++ b/doc/special_remotes/tahoe.mdwn
@@ -0,0 +1,43 @@
+This special remote stores file contents using
+[Tahoe-LAFS](http://tahoe-lafs.org/). There are a number of commercial
+providers, or you can build your own tahoe storage grid.
+
+Since Tahoe-LAFS encrypts all data stored in it, git-annex does not do any
+additional encryption of its own.
+
+Note that data stored in a tahoe remote cannot be dropped from it, as
+Tahoe-LAFS does not support removing data once it is stored in the Tahoe grid.
+This, along with Tahoe's ability to recover data when some nodes fail,
+makes a tahoe special remote an excellent choice for storing backups.
+
+Typically you will have an account on a Tahoe-LAFS storage grid, which
+is represented by an "introducer furl". You need to supply this to
+git-annex in the `TAHOE_FURL` environment variable when initializing the
+remote. git-annex will then generate a tahoe configuration directory for
+the remote under `~/.tahoe/git-annex/`, and automatically start the tahoe
+daemon as needed.
+
+## configuration
+
+These parameters can be passed to `git annex initremote` to configure
+the tahoe remote.
+
+* `embedcreds` - Optional. Set to "yes" embed the tahoe credentials
+ (specifically the introducer furl and shared-convergence-secret)
+ inside the git repository, which allows other clones to also use them
+ in order to access the tahoe grid.
+
+ Think carefully about who can access your git repository, and
+ whether you want to give them access to your tahoe system before
+ using embedcreds!
+
+Setup example:
+
+ # TAHOE_FURL=... git annex initremote tahoe type=tahoe embedcreds=yes
+
+----
+
+An older implementation of tahoe for git-annex used
+the hook special remote. It is not compatible with this newer
+implementation. See
+[[tahoe-lafs|forum/tips:_special__95__remotes__47__hook_with_tahoe-lafs]].
diff --git a/git-annex.cabal b/git-annex.cabal
index 6c6fb1f31..81afb5a48 100644
--- a/git-annex.cabal
+++ b/git-annex.cabal
@@ -75,6 +75,9 @@ Flag Feed
Flag Quvi
Description: Enable use of quvi to download videos
+Flag Tahoe
+ Description: Enable the tahoe special remote
+
Flag CryptoHash
Description: Enable use of cryptohash for checksumming
@@ -192,6 +195,10 @@ Executable git-annex
if flag(Quvi)
Build-Depends: aeson
CPP-Options: -DWITH_QUVI
+
+ if flag(Tahoe)
+ Build-Depends: aeson
+ CPP-Options: -DWITH_TAHOE
if flag(EKG)
Build-Depends: ekg