summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Joey Hess <joey@kitenet.net>2010-10-15 18:57:05 -0400
committerGravatar Joey Hess <joey@kitenet.net>2010-10-15 18:57:05 -0400
commit44b8f7c95de84018044ce3669e62d40eac1b91a7 (patch)
treef3abcedf07eb3e4fcc2d71048f3b62cdea7b427d
parent8e742bd89e6bd3d83c44847c0455043809c64c89 (diff)
better worm keys
-rw-r--r--Backend/Worm.hs24
-rw-r--r--git-annex.mdwn8
2 files changed, 26 insertions, 6 deletions
diff --git a/Backend/Worm.hs b/Backend/Worm.hs
index ba79428ef..89fe4bf57 100644
--- a/Backend/Worm.hs
+++ b/Backend/Worm.hs
@@ -3,16 +3,36 @@
module Backend.Worm (backend) where
+import Control.Monad.State
import qualified Backend.File
import BackendTypes
import Utility
import System.FilePath
+import System.Posix.Files
+import Data.Digest.Pure.SHA -- slow, but we only checksum filenames
+import qualified Data.ByteString.Lazy.Char8 as B
backend = Backend.File.backend {
name = "WORM",
getKey = keyValue
}
--- direct mapping from basename of filename to key
+-- A SHA1 of the basename of the filename, plus the file size and
+-- modification time, is used as the unique part of the key. That
+-- allows multiple files with the same names to have different keys,
+-- while also allowing a file to be moved around while retaining the
+-- same key.
+--
+-- The basename of the filename is also included in the key, so it's clear
+-- what the original filename was when a user sees the value.
keyValue :: FilePath -> Annex (Maybe Key)
-keyValue file = return $ Just $ Key ((name backend), (takeFileName file))
+keyValue file = do
+ stat <- liftIO $ getFileStatus file
+ return $ Just $ Key ((name backend), key stat)
+ where
+ key stat = (checksum $ uniqueid stat) ++ sep ++ base
+ checksum s = show $ sha1 $ B.pack s
+ uniqueid stat = (show $ fileSize stat) ++ sep ++
+ (show $ modificationTime stat)
+ base = takeFileName file
+ sep = ":"
diff --git a/git-annex.mdwn b/git-annex.mdwn
index fba9648db..2079b5b46 100644
--- a/git-annex.mdwn
+++ b/git-annex.mdwn
@@ -94,10 +94,10 @@ Multiple pluggable backends are supported, and more than one can be used
to store different files' contents in a given repository.
* `WORM` ("Write Once, Read Many") This backend stores the file's content
- in `.git/annex/`, and assumes that any file with the same basename
- has the same content. So with this backend, files can be moved around,
- but should never be added to or changed. This is the default, and
- the least expensive backend.
+ in `.git/annex/`, and assumes that any file with the same basename,
+ size, and modification time has the same content. So with this backend,
+ files can be moved around, but should never be added to or changed.
+ This is the default, and the least expensive backend.
* `sha1sum` -- This backend stores the file's content in
`.git/annex/`, with a name based on its sha1 checksum. This backend allows
modifications of files to be tracked. Its need to generate checksums