summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Joey Hess <joey@kitenet.net>2011-05-16 11:46:34 -0400
committerGravatar Joey Hess <joey@kitenet.net>2011-05-16 11:46:34 -0400
commit2a8efc7af19aa149dbf0ebc158954bb376f9c3a6 (patch)
tree879eef3ffe3c8bdac8cb9b489f8298a00f058042
parent1d2984441c654f01e88e427f3289f8066cd2e6b0 (diff)
Added filename extension preserving variant backends SHA1E, SHA256E, etc.
-rw-r--r--Backend/SHA.hs44
-rw-r--r--debian/changelog1
-rw-r--r--doc/backends.mdwn3
-rw-r--r--doc/walkthrough/Internet_Archive_via_S3.mdwn18
4 files changed, 49 insertions, 17 deletions
diff --git a/Backend/SHA.hs b/Backend/SHA.hs
index d9aeb72aa..6d721038c 100644
--- a/Backend/SHA.hs
+++ b/Backend/SHA.hs
@@ -14,6 +14,7 @@ import System.IO
import System.Directory
import Data.Maybe
import System.Posix.Files
+import System.FilePath
import qualified Backend.File
import BackendClass
@@ -27,11 +28,14 @@ import qualified SysConfig
import Key
type SHASize = Int
-
+
+sizes :: [Int]
+sizes = [1, 256, 512, 224, 384]
+
backends :: [Backend Annex]
-- order is slightly significant; want sha1 first ,and more general
-- sizes earlier
-backends = catMaybes $ map genBackend [1, 256, 512, 224, 384]
+backends = catMaybes $ map genBackend sizes ++ map genBackendE sizes
genBackend :: SHASize -> Maybe (Backend Annex)
genBackend size
@@ -44,6 +48,15 @@ genBackend size
, fsckKey = Backend.File.checkKey $ checkKeyChecksum size
}
+genBackendE :: SHASize -> Maybe (Backend Annex)
+genBackendE size =
+ case genBackend size of
+ Nothing -> Nothing
+ Just b -> Just $ b
+ { name = shaNameE size
+ , getKey = keyValueE size
+ }
+
shaCommand :: SHASize -> Maybe String
shaCommand 1 = SysConfig.sha1
shaCommand 256 = SysConfig.sha256
@@ -55,6 +68,9 @@ shaCommand _ = Nothing
shaName :: SHASize -> String
shaName size = "SHA" ++ show size
+shaNameE :: SHASize -> String
+shaNameE size = shaName size ++ "E"
+
shaN :: SHASize -> FilePath -> Annex String
shaN size file = do
showNote "checksum..."
@@ -72,11 +88,25 @@ keyValue :: SHASize -> FilePath -> Annex (Maybe Key)
keyValue size file = do
s <- shaN size file
stat <- liftIO $ getFileStatus file
- return $ Just $ stubKey {
- keyName = s,
- keyBackendName = shaName size,
- keySize = Just $ fromIntegral $ fileSize stat
- }
+ return $ Just $ stubKey
+ { keyName = s
+ , keyBackendName = shaName size
+ , keySize = Just $ fromIntegral $ fileSize stat
+ }
+
+{- Extension preserving keys. -}
+keyValueE :: SHASize -> FilePath -> Annex (Maybe Key)
+keyValueE size file = keyValue size file >>= maybe (return Nothing) addE
+ where
+ addE k = return $ Just $ k
+ { keyName = keyName k ++ extension
+ , keyBackendName = shaNameE size
+ }
+ naiveextension = takeExtension file
+ extension =
+ if length naiveextension > 6
+ then "" -- probably not really an extension
+ else naiveextension
-- A key's checksum is checked during fsck.
checkKeyChecksum :: SHASize -> Key -> Annex Bool
diff --git a/debian/changelog b/debian/changelog
index 59ebb1deb..a7f603981 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -6,6 +6,7 @@ git-annex (0.20110504) UNRELEASED; urgency=low
limits, disable encryption, support their nonstandard way of creating
buckets, and allow x-amz-* headers to be specified in initremote to set
item metadata.
+ * Added filename extension preserving variant backends SHA1E, SHA256E, etc.
-- Joey Hess <joeyh@debian.org> Fri, 06 May 2011 15:20:38 -0400
diff --git a/doc/backends.mdwn b/doc/backends.mdwn
index b0a2c882a..4290da33b 100644
--- a/doc/backends.mdwn
+++ b/doc/backends.mdwn
@@ -23,6 +23,9 @@ these backends.
* `SHA512`, `SHA384`, `SHA256`, `SHA224` -- Like SHA1, but larger
checksums. Mostly useful for the very paranoid, or anyone who is
researching checksum collisions and wants to annex their colliding data. ;)
+* `SHA1E`, `SHA512E`, etc -- Variants that preserve filename extension as
+ part of the key. Useful for archival tasks where the filename extension
+ contains metadata that should be preserved.
These backends store file contents in other key/value stores.
diff --git a/doc/walkthrough/Internet_Archive_via_S3.mdwn b/doc/walkthrough/Internet_Archive_via_S3.mdwn
index e0f8fafb4..f92e0ee9d 100644
--- a/doc/walkthrough/Internet_Archive_via_S3.mdwn
+++ b/doc/walkthrough/Internet_Archive_via_S3.mdwn
@@ -32,20 +32,18 @@ specify `x-archive-meta*` headers to add metadata as explained in their
Then you can annex files and copy them to the remote as usual:
- # git annex add photo1.jpeg
- add photo1.jpeg ok
+ # git annex add photo1.jpeg --backend=SHA1E
+ add photo1.jpeg (checksum...) ok
# git annex copy photo1.jpeg --fast --to archive-panama
copy (to archive-panama...) ok
------
+Note the use of the SHA1E [[backend|backends]]. It makes most sense
+to use the WORM or SHA1E backend for files that will be stored in
+the Internet Archive, since the key name will be exposed as the filename
+there, and since the Archive does special processing of files based on
+their extension.
-Note that it probably makes the most sense to use the WORM backend
-for files, since that exposes the original filename in the key stored
-in the Archive, which allows its special processing for sound files,
-movies, etc to be done.
-
-Also, the Internet Archive has restrictions on what is allowed in a
-filename; particularly no spaces are allowed.
+----
There seems to be a bug in either hS3 or the archive that breaks
authentication when the bucket name contains spaces or upper-case letters..