diff options
author | Joey Hess <joey@kitenet.net> | 2010-10-15 18:57:05 -0400 |
---|---|---|
committer | Joey Hess <joey@kitenet.net> | 2010-10-15 18:57:05 -0400 |
commit | 44b8f7c95de84018044ce3669e62d40eac1b91a7 (patch) | |
tree | f3abcedf07eb3e4fcc2d71048f3b62cdea7b427d | |
parent | 8e742bd89e6bd3d83c44847c0455043809c64c89 (diff) |
better worm keys
-rw-r--r-- | Backend/Worm.hs | 24 | ||||
-rw-r--r-- | git-annex.mdwn | 8 |
2 files changed, 26 insertions, 6 deletions
diff --git a/Backend/Worm.hs b/Backend/Worm.hs index ba79428ef..89fe4bf57 100644 --- a/Backend/Worm.hs +++ b/Backend/Worm.hs @@ -3,16 +3,36 @@ module Backend.Worm (backend) where +import Control.Monad.State import qualified Backend.File import BackendTypes import Utility import System.FilePath +import System.Posix.Files +import Data.Digest.Pure.SHA -- slow, but we only checksum filenames +import qualified Data.ByteString.Lazy.Char8 as B backend = Backend.File.backend { name = "WORM", getKey = keyValue } --- direct mapping from basename of filename to key +-- A SHA1 of the basename of the filename, plus the file size and +-- modification time, is used as the unique part of the key. That +-- allows multiple files with the same names to have different keys, +-- while also allowing a file to be moved around while retaining the +-- same key. +-- +-- The basename of the filename is also included in the key, so it's clear +-- what the original filename was when a user sees the value. keyValue :: FilePath -> Annex (Maybe Key) -keyValue file = return $ Just $ Key ((name backend), (takeFileName file)) +keyValue file = do + stat <- liftIO $ getFileStatus file + return $ Just $ Key ((name backend), key stat) + where + key stat = (checksum $ uniqueid stat) ++ sep ++ base + checksum s = show $ sha1 $ B.pack s + uniqueid stat = (show $ fileSize stat) ++ sep ++ + (show $ modificationTime stat) + base = takeFileName file + sep = ":" diff --git a/git-annex.mdwn b/git-annex.mdwn index fba9648db..2079b5b46 100644 --- a/git-annex.mdwn +++ b/git-annex.mdwn @@ -94,10 +94,10 @@ Multiple pluggable backends are supported, and more than one can be used to store different files' contents in a given repository. * `WORM` ("Write Once, Read Many") This backend stores the file's content - in `.git/annex/`, and assumes that any file with the same basename - has the same content. So with this backend, files can be moved around, - but should never be added to or changed. This is the default, and - the least expensive backend. + in `.git/annex/`, and assumes that any file with the same basename, + size, and modification time has the same content. So with this backend, + files can be moved around, but should never be added to or changed. + This is the default, and the least expensive backend. * `sha1sum` -- This backend stores the file's content in `.git/annex/`, with a name based on its sha1 checksum. This backend allows modifications of files to be tracked. Its need to generate checksums |