summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Joey Hess <joey@kitenet.net>2012-03-03 18:05:55 -0400
committerGravatar Joey Hess <joey@kitenet.net>2012-03-03 18:05:55 -0400
commit3436aba6de8cad844691ca979ab5825f7011fb7e (patch)
tree0602b1728860611621dbce3f986cf3e8e43e1290
parent2841d748a49a15080995d6cef7999fe6a38c866a (diff)
Directory special remotes now support chunking files written to them
Avoiding writing files larger than a specified size is useful on certian things. For example, box.com has a file size limit of 100 mb. Could also be useful on really crappy removable media.
-rw-r--r--Remote/Directory.hs218
-rw-r--r--debian/changelog2
-rw-r--r--doc/special_remotes/directory.mdwn19
3 files changed, 183 insertions, 56 deletions
diff --git a/Remote/Directory.hs b/Remote/Directory.hs
index ee2a0d75a..6f7ce8cd6 100644
--- a/Remote/Directory.hs
+++ b/Remote/Directory.hs
@@ -1,6 +1,6 @@
{- A "remote" that is just a filesystem directory.
-
- - Copyright 2011 Joey Hess <joey@kitenet.net>
+ - Copyright 2011-2012 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@@ -19,6 +19,8 @@ import Utility.FileMode
import Remote.Helper.Special
import Remote.Helper.Encryptable
import Crypto
+import Utility.DataUnits
+import Data.Int
remote :: RemoteType
remote = RemoteType {
@@ -32,24 +34,39 @@ gen :: Git.Repo -> UUID -> Maybe RemoteConfig -> Annex Remote
gen r u c = do
dir <- getConfig r "directory" (error "missing directory")
cst <- remoteCost r cheapRemoteCost
+ let chunksize = chunkSize c
return $ encryptableRemote c
- (storeEncrypted dir)
- (retrieveEncrypted dir)
+ (storeEncrypted dir chunksize)
+ (retrieveEncrypted dir chunksize)
Remote {
uuid = u,
cost = cst,
name = Git.repoDescribe r,
- storeKey = store dir,
- retrieveKeyFile = retrieve dir,
- retrieveKeyFileCheap = retrieveCheap dir,
- removeKey = remove dir,
- hasKey = checkPresent dir,
+ storeKey = store dir chunksize,
+ retrieveKeyFile = retrieve dir chunksize,
+ retrieveKeyFileCheap = retrieveCheap dir chunksize,
+ removeKey = remove dir chunksize,
+ hasKey = checkPresent dir chunksize,
hasKeyCheap = True,
whereisKey = Nothing,
config = Nothing,
repo = r,
remotetype = remote
}
+ where
+
+type ChunkSize = Maybe Int64
+
+chunkSize :: Maybe RemoteConfig -> ChunkSize
+chunkSize Nothing = Nothing
+chunkSize (Just m) =
+ case M.lookup "chunksize" m of
+ Nothing -> Nothing
+ Just v -> case readSize dataUnits v of
+ Nothing -> error "bad chunksize"
+ Just size
+ | size <= 0 -> error "bad chunksize"
+ | otherwise -> Just $ fromInteger size
directorySetup :: UUID -> RemoteConfig -> Annex RemoteConfig
directorySetup u c = do
@@ -69,69 +86,158 @@ directorySetup u c = do
locations :: FilePath -> Key -> [FilePath]
locations d k = map (d </>) (keyPaths k)
-withCheckedFile :: (FilePath -> IO Bool) -> FilePath -> Key -> (FilePath -> IO Bool) -> IO Bool
-withCheckedFile _ [] _ _ = return False
-withCheckedFile check d k a = go $ locations d k
+{- An infinite stream of chunks to use for a given file. -}
+chunkStream :: FilePath -> [FilePath]
+chunkStream f = map tochunk [1 :: Integer ..]
+ where
+ tochunk n = f ++ ".chunk" ++ show n
+
+{- A file that records the number of chunks used. -}
+chunkCount :: FilePath -> FilePath
+chunkCount f = f ++ ".chunkcount"
+
+withCheckedFiles :: (FilePath -> IO Bool) -> ChunkSize -> FilePath -> Key -> ([FilePath] -> IO Bool) -> IO Bool
+withCheckedFiles _ _ [] _ _ = return False
+withCheckedFiles check Nothing d k a = go $ locations d k
where
go [] = return False
go (f:fs) = do
use <- check f
if use
- then a f
+ then a [f]
else go fs
+withCheckedFiles check (Just _) d k a = go $ locations d k
+ where
+ go [] = return False
+ go (f:fs) = do
+ let chunkcount = chunkCount f
+ use <- check chunkcount
+ if use
+ then do
+ count <- readcount chunkcount
+ let chunks = take count $ chunkStream f
+ ok <- all id <$> mapM check chunks
+ if ok
+ then a chunks
+ else return False
+ else go fs
+ readcount f = fromMaybe (error $ "cannot parse " ++ f)
+ . (readish :: String -> Maybe Int)
+ <$> readFile f
-withStoredFile :: FilePath -> Key -> (FilePath -> IO Bool) -> IO Bool
-withStoredFile = withCheckedFile doesFileExist
+withStoredFiles :: ChunkSize -> FilePath -> Key -> ([FilePath] -> IO Bool) -> IO Bool
+withStoredFiles = withCheckedFiles doesFileExist
-store :: FilePath -> Key -> Annex Bool
-store d k = do
+store :: FilePath -> ChunkSize -> Key -> Annex Bool
+store d chunksize k = do
src <- inRepo $ gitAnnexLocation k
- liftIO $ catchBoolIO $ storeHelper d k $ copyFileExternal src
-
-storeEncrypted :: FilePath -> (Cipher, Key) -> Key -> Annex Bool
-storeEncrypted d (cipher, enck) k = do
+ liftIO $ catchBoolIO $ storeHelper d chunksize k $ \dests ->
+ case chunksize of
+ Nothing -> do
+ let dest = Prelude.head dests
+ ok <- copyFileExternal src dest
+ return $ if ok then [dest] else []
+ Just _ -> storeSplit chunksize dests =<< L.readFile src
+
+storeEncrypted :: FilePath -> ChunkSize -> (Cipher, Key) -> Key -> Annex Bool
+storeEncrypted d chunksize (cipher, enck) k = do
src <- inRepo $ gitAnnexLocation k
- liftIO $ catchBoolIO $ storeHelper d enck $ encrypt src
+ liftIO $ catchBoolIO $ storeHelper d chunksize enck $ encrypt src
where
- encrypt src dest = do
- withEncryptedContent cipher (L.readFile src) $ L.writeFile dest
- return True
-
-storeHelper :: FilePath -> Key -> (FilePath -> IO Bool) -> IO Bool
-storeHelper d key a = do
- let dest = Prelude.head $ locations d key
- let tmpdest = dest ++ ".tmp"
- let dir = parentDir dest
+ encrypt src dests = withEncryptedContent cipher (L.readFile src) $ \s ->
+ case chunksize of
+ Nothing -> do
+ let dest = Prelude.head dests
+ L.writeFile dest s
+ return [dest]
+ Just _ -> storeSplit chunksize dests s
+
+{- Splits a ByteString into chunks and writes to dests.
+ - Note: Must always write at least one file, even for empty ByteString. -}
+storeSplit :: ChunkSize -> [FilePath] -> L.ByteString -> IO [FilePath]
+storeSplit Nothing _ _ = error "bad storeSplit call"
+storeSplit _ [] _ = error "bad storeSplit call"
+storeSplit (Just chunksize) alldests@(firstdest:_) s
+ | L.null s = do
+ -- must always write at least one file, even for empty
+ L.writeFile firstdest s
+ return [firstdest]
+ | otherwise = storeSplit' chunksize alldests s []
+storeSplit' :: Int64 -> [FilePath] -> L.ByteString -> [FilePath] -> IO [FilePath]
+storeSplit' _ [] _ _ = error "expected an infinite list"
+storeSplit' chunksize (d:dests) s c
+ | L.null s = return $ reverse c
+ | otherwise = do
+ let (chunk, rest) = L.splitAt chunksize s
+ L.writeFile d chunk
+ storeSplit' chunksize dests rest (d:c)
+
+{- Generates a list of destinations to write to in order to store a key.
+ - When chunksize is specified, this list will be a list of chunks.
+ - The action should store the file, and return a list of the destinations
+ - it stored it to, or [] on error.
+ - The stored files are only put into their final place once storage is
+ - complete.
+ -}
+storeHelper :: FilePath -> ChunkSize -> Key -> ([FilePath] -> IO [FilePath]) -> IO Bool
+storeHelper d chunksize key a = do
+ let dir = parentDir desttemplate
createDirectoryIfMissing True dir
allowWrite dir
- ok <- a tmpdest
- when ok $ do
- renameFile tmpdest dest
+ stored <- a tmpdests
+ forM_ stored $ \f -> do
+ let dest = detmpprefix f
+ renameFile f dest
preventWrite dest
- preventWrite dir
- return ok
-
-retrieve :: FilePath -> Key -> FilePath -> Annex Bool
-retrieve d k f = liftIO $ withStoredFile d k $ \file -> copyFileExternal file f
-
-retrieveCheap :: FilePath -> Key -> FilePath -> Annex Bool
-retrieveCheap d k f = liftIO $ withStoredFile d k $ \file ->
- catchBoolIO $ createSymbolicLink file f >> return True
+ when (chunksize /= Nothing) $ do
+ let chunkcount = chunkCount desttemplate
+ _ <- tryIO $ allowWrite chunkcount
+ writeFile chunkcount (show $ length stored)
+ preventWrite chunkcount
+ preventWrite dir
+ return (not $ null stored)
+ where
+ desttemplate = Prelude.head $ locations d key
+ tmpdests = case chunksize of
+ Nothing -> [desttemplate ++ tmpprefix]
+ Just _ -> map (++ tmpprefix) (chunkStream desttemplate)
+ tmpprefix = ".tmp"
+ detmpprefix f = take (length f - tmpprefixlen) f
+ tmpprefixlen = length tmpprefix
+
+retrieve :: FilePath -> ChunkSize -> Key -> FilePath -> Annex Bool
+retrieve d chunksize k f = liftIO $ withStoredFiles chunksize d k go
+ where
+ go [file] = copyFileExternal file f
+ go files = catchBoolIO $ do
+ L.writeFile f =<< (L.concat <$> mapM L.readFile files)
+ return True
-retrieveEncrypted :: FilePath -> (Cipher, Key) -> FilePath -> Annex Bool
-retrieveEncrypted d (cipher, enck) f =
- liftIO $ withStoredFile d enck $ \file -> catchBoolIO $ do
- withDecryptedContent cipher (L.readFile file) $ L.writeFile f
+retrieveEncrypted :: FilePath -> ChunkSize -> (Cipher, Key) -> FilePath -> Annex Bool
+retrieveEncrypted d chunksize (cipher, enck) f =
+ liftIO $ withStoredFiles chunksize d enck $ \files -> catchBoolIO $ do
+ withDecryptedContent cipher (L.concat <$> mapM L.readFile files) $
+ L.writeFile f
return True
-remove :: FilePath -> Key -> Annex Bool
-remove d k = liftIO $ withStoredFile d k $ \file -> catchBoolIO $ do
- let dir = parentDir file
- allowWrite dir
- removeFile file
- removeDirectory dir
- return True
+retrieveCheap :: FilePath -> ChunkSize -> Key -> FilePath -> Annex Bool
+retrieveCheap _ (Just _) _ _ = return False -- no cheap retrieval for chunks
+retrieveCheap d _ k f = liftIO $ withStoredFiles Nothing d k go
+ where
+ go [file] = catchBoolIO $ createSymbolicLink file f >> return True
+ go _files = return False
+
+remove :: FilePath -> ChunkSize -> Key -> Annex Bool
+remove d chunksize k = liftIO $ withStoredFiles chunksize d k go
+ where
+ go files = all id <$> mapM removefile files
+ removefile file = catchBoolIO $ do
+ let dir = parentDir file
+ allowWrite dir
+ removeFile file
+ _ <- tryIO $ removeDirectory dir
+ return True
-checkPresent :: FilePath -> Key -> Annex (Either String Bool)
-checkPresent d k = liftIO $ catchMsgIO $ withStoredFile d k $
- const $ return True -- withStoredFile checked that it exists
+checkPresent :: FilePath -> ChunkSize -> Key -> Annex (Either String Bool)
+checkPresent d chunksize k = liftIO $ catchMsgIO $ withStoredFiles chunksize d k $
+ const $ return True -- withStoredFiles checked that it exists
diff --git a/debian/changelog b/debian/changelog
index 47713e226..d5706d7be 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -2,6 +2,8 @@ git-annex (3.20120230) UNRELEASED; urgency=low
* "here" can be used to refer to the current repository,
which can read better than the old "." (which still works too).
+ * Directory special remotes now support chunking files written to them,
+ avoiding writing files larger than a specified size.
-- Joey Hess <joeyh@debian.org> Thu, 01 Mar 2012 22:34:27 -0400
diff --git a/doc/special_remotes/directory.mdwn b/doc/special_remotes/directory.mdwn
index 0a38c763c..100e06878 100644
--- a/doc/special_remotes/directory.mdwn
+++ b/doc/special_remotes/directory.mdwn
@@ -5,6 +5,25 @@ you want to use it to sneakernet files between systems (possibly with
[[encrypted|encryption]] contents). Just set up both systems to use
the drive's mountpoint as a directory remote.
+## configuration
+
+These parameters can be passed to `git annex initremote` to configure the
+remote:
+
+* `encryption` - Required. Either "none" to disable encryption of content
+ stored in the directory,
+ or a value that can be looked up (using gpg -k) to find a gpg encryption
+ key that will be given access to the remote. Note that additional gpg
+ keys can be given access to a remote by rerunning initremote with
+ the new key id. See [[encryption]].
+* `chunksize` - Avoid storing files larger than the specified size in the
+ directory. For use on directories on mount points that have file size
+ limitations. The default is to never chunk files.
+ Note that changing the chunk size of an existing remote is
+ not recommended.
+ The value can use specified using any commonly used units.
+ Example: `chunksize=100 megabytes`
+
Setup example:
# git annex initremote usbdrive type=directory directory=/media/usbdrive/ encryption=none