diff options
author | Joey Hess <joey@kitenet.net> | 2011-05-16 11:46:34 -0400 |
---|---|---|
committer | Joey Hess <joey@kitenet.net> | 2011-05-16 11:46:34 -0400 |
commit | 2a8efc7af19aa149dbf0ebc158954bb376f9c3a6 (patch) | |
tree | 879eef3ffe3c8bdac8cb9b489f8298a00f058042 | |
parent | 1d2984441c654f01e88e427f3289f8066cd2e6b0 (diff) |
Added filename extension preserving variant backends SHA1E, SHA256E, etc.
-rw-r--r-- | Backend/SHA.hs | 44 | ||||
-rw-r--r-- | debian/changelog | 1 | ||||
-rw-r--r-- | doc/backends.mdwn | 3 | ||||
-rw-r--r-- | doc/walkthrough/Internet_Archive_via_S3.mdwn | 18 |
4 files changed, 49 insertions, 17 deletions
diff --git a/Backend/SHA.hs b/Backend/SHA.hs index d9aeb72aa..6d721038c 100644 --- a/Backend/SHA.hs +++ b/Backend/SHA.hs @@ -14,6 +14,7 @@ import System.IO import System.Directory import Data.Maybe import System.Posix.Files +import System.FilePath import qualified Backend.File import BackendClass @@ -27,11 +28,14 @@ import qualified SysConfig import Key type SHASize = Int - + +sizes :: [Int] +sizes = [1, 256, 512, 224, 384] + backends :: [Backend Annex] -- order is slightly significant; want sha1 first ,and more general -- sizes earlier -backends = catMaybes $ map genBackend [1, 256, 512, 224, 384] +backends = catMaybes $ map genBackend sizes ++ map genBackendE sizes genBackend :: SHASize -> Maybe (Backend Annex) genBackend size @@ -44,6 +48,15 @@ genBackend size , fsckKey = Backend.File.checkKey $ checkKeyChecksum size } +genBackendE :: SHASize -> Maybe (Backend Annex) +genBackendE size = + case genBackend size of + Nothing -> Nothing + Just b -> Just $ b + { name = shaNameE size + , getKey = keyValueE size + } + shaCommand :: SHASize -> Maybe String shaCommand 1 = SysConfig.sha1 shaCommand 256 = SysConfig.sha256 @@ -55,6 +68,9 @@ shaCommand _ = Nothing shaName :: SHASize -> String shaName size = "SHA" ++ show size +shaNameE :: SHASize -> String +shaNameE size = shaName size ++ "E" + shaN :: SHASize -> FilePath -> Annex String shaN size file = do showNote "checksum..." @@ -72,11 +88,25 @@ keyValue :: SHASize -> FilePath -> Annex (Maybe Key) keyValue size file = do s <- shaN size file stat <- liftIO $ getFileStatus file - return $ Just $ stubKey { - keyName = s, - keyBackendName = shaName size, - keySize = Just $ fromIntegral $ fileSize stat - } + return $ Just $ stubKey + { keyName = s + , keyBackendName = shaName size + , keySize = Just $ fromIntegral $ fileSize stat + } + +{- Extension preserving keys. -} +keyValueE :: SHASize -> FilePath -> Annex (Maybe Key) +keyValueE size file = keyValue size file >>= maybe (return Nothing) addE + where + addE k = return $ Just $ k + { keyName = keyName k ++ extension + , keyBackendName = shaNameE size + } + naiveextension = takeExtension file + extension = + if length naiveextension > 6 + then "" -- probably not really an extension + else naiveextension -- A key's checksum is checked during fsck. checkKeyChecksum :: SHASize -> Key -> Annex Bool diff --git a/debian/changelog b/debian/changelog index 59ebb1deb..a7f603981 100644 --- a/debian/changelog +++ b/debian/changelog @@ -6,6 +6,7 @@ git-annex (0.20110504) UNRELEASED; urgency=low limits, disable encryption, support their nonstandard way of creating buckets, and allow x-amz-* headers to be specified in initremote to set item metadata. + * Added filename extension preserving variant backends SHA1E, SHA256E, etc. -- Joey Hess <joeyh@debian.org> Fri, 06 May 2011 15:20:38 -0400 diff --git a/doc/backends.mdwn b/doc/backends.mdwn index b0a2c882a..4290da33b 100644 --- a/doc/backends.mdwn +++ b/doc/backends.mdwn @@ -23,6 +23,9 @@ these backends. * `SHA512`, `SHA384`, `SHA256`, `SHA224` -- Like SHA1, but larger checksums. Mostly useful for the very paranoid, or anyone who is researching checksum collisions and wants to annex their colliding data. ;) +* `SHA1E`, `SHA512E`, etc -- Variants that preserve filename extension as + part of the key. Useful for archival tasks where the filename extension + contains metadata that should be preserved. These backends store file contents in other key/value stores. diff --git a/doc/walkthrough/Internet_Archive_via_S3.mdwn b/doc/walkthrough/Internet_Archive_via_S3.mdwn index e0f8fafb4..f92e0ee9d 100644 --- a/doc/walkthrough/Internet_Archive_via_S3.mdwn +++ b/doc/walkthrough/Internet_Archive_via_S3.mdwn @@ -32,20 +32,18 @@ specify `x-archive-meta*` headers to add metadata as explained in their Then you can annex files and copy them to the remote as usual: - # git annex add photo1.jpeg - add photo1.jpeg ok + # git annex add photo1.jpeg --backend=SHA1E + add photo1.jpeg (checksum...) ok # git annex copy photo1.jpeg --fast --to archive-panama copy (to archive-panama...) ok ------ +Note the use of the SHA1E [[backend|backends]]. It makes most sense +to use the WORM or SHA1E backend for files that will be stored in +the Internet Archive, since the key name will be exposed as the filename +there, and since the Archive does special processing of files based on +their extension. -Note that it probably makes the most sense to use the WORM backend -for files, since that exposes the original filename in the key stored -in the Archive, which allows its special processing for sound files, -movies, etc to be done. - -Also, the Internet Archive has restrictions on what is allowed in a -filename; particularly no spaces are allowed. +---- There seems to be a bug in either hS3 or the archive that breaks authentication when the bucket name contains spaces or upper-case letters.. |