diff options
37 files changed, 327 insertions, 219 deletions
@@ -25,7 +25,7 @@ import Data.Maybe import qualified GitRepo as Git import qualified GitQueue -import qualified BackendTypes +import qualified BackendClass import Utility -- git-annex's monad @@ -34,8 +34,8 @@ type Annex = StateT AnnexState IO -- internal state storage data AnnexState = AnnexState { repo :: Git.Repo - , backends :: [BackendTypes.Backend Annex] - , supportedBackends :: [BackendTypes.Backend Annex] + , backends :: [BackendClass.Backend Annex] + , supportedBackends :: [BackendClass.Backend Annex] , repoqueue :: GitQueue.Queue , quiet :: Bool , force :: Bool @@ -47,7 +47,7 @@ data AnnexState = AnnexState , remotesread :: Bool } deriving (Show) -newState :: Git.Repo -> [BackendTypes.Backend Annex] -> AnnexState +newState :: Git.Repo -> [BackendClass.Backend Annex] -> AnnexState newState gitrepo allbackends = AnnexState { repo = gitrepo , backends = [] @@ -64,7 +64,7 @@ newState gitrepo allbackends = AnnexState } {- Create and returns an Annex state object for the specified git repo. -} -new :: Git.Repo -> [BackendTypes.Backend Annex] -> IO AnnexState +new :: Git.Repo -> [BackendClass.Backend Annex] -> IO AnnexState new gitrepo allbackends = do gitrepo' <- liftIO $ Git.configRead gitrepo return $ newState gitrepo' allbackends diff --git a/Backend.hs b/Backend.hs index df23e80a3..e1f8f388b 100644 --- a/Backend.hs +++ b/Backend.hs @@ -39,7 +39,8 @@ import Locations import qualified GitRepo as Git import qualified Annex import Types -import qualified BackendTypes as B +import Key +import qualified BackendClass as B import Messages {- List of backends in the order to try them when storing a new key. -} @@ -135,18 +136,19 @@ lookupFile file = do getsymlink = do l <- readSymbolicLink file return $ takeFileName l - makekey bs l = do + makekey bs l = + case fileKey l of + Just k -> makeret k l bs + Nothing -> return Nothing + makeret k l bs = case maybeLookupBackendName bs bname of - Nothing -> do - unless (null kname || null bname || - not (isLinkToAnnex l)) $ - warning skip - return Nothing - Just backend -> return $ Just (k, backend) + Just backend -> return $ Just (k, backend) + Nothing -> do + when (isLinkToAnnex l) $ + warning skip + return Nothing where - k = fileKey l - bname = backendName k - kname = keyName k + bname = keyBackendName k skip = "skipping " ++ file ++ " (unknown backend " ++ bname ++ ")" @@ -164,4 +166,4 @@ chooseBackends fs = do keyBackend :: Key -> Annex (Backend Annex) keyBackend key = do bs <- Annex.getState Annex.supportedBackends - return $ lookupBackendName bs $ backendName key + return $ lookupBackendName bs $ keyBackendName key diff --git a/Backend/File.hs b/Backend/File.hs index d76cd2939..a5e243199 100644 --- a/Backend/File.hs +++ b/Backend/File.hs @@ -18,7 +18,7 @@ import Control.Monad.State import System.Directory import Data.List -import BackendTypes +import BackendClass import LocationLog import Locations import qualified Remotes diff --git a/Backend/SHA.hs b/Backend/SHA.hs index 4eea890ce..056385107 100644 --- a/Backend/SHA.hs +++ b/Backend/SHA.hs @@ -13,9 +13,10 @@ import System.Cmd.Utils import System.IO import System.Directory import Data.Maybe +import System.Posix.Files import qualified Backend.File -import BackendTypes +import BackendClass import Messages import qualified Annex import Locations @@ -23,6 +24,7 @@ import Content import Types import Utility import qualified SysConfig +import Key type SHASize = Int @@ -63,11 +65,16 @@ shaN size file = do where command = "sha" ++ (show size) ++ "sum" --- A key is a checksum of its contents. +{- A key is a checksum of its contents. -} keyValue :: SHASize -> FilePath -> Annex (Maybe Key) keyValue size file = do s <- shaN size file - return $ Just $ Key (shaName size, s) + stat <- liftIO $ getFileStatus file + return $ Just $ stubKey { + keyName = s, + keyBackendName = shaName size, + keySize = Just $ fromIntegral $ fileSize stat + } -- A key's checksum is checked during fsck. checkKeyChecksum :: SHASize -> Key -> Annex Bool diff --git a/Backend/URL.hs b/Backend/URL.hs index 29dc8fefa..02ce3563c 100644 --- a/Backend/URL.hs +++ b/Backend/URL.hs @@ -8,12 +8,12 @@ module Backend.URL (backends) where import Control.Monad.State (liftIO) -import Data.String.Utils import Types -import BackendTypes +import BackendClass import Utility import Messages +import Key backends :: [Backend Annex] backends = [backend] @@ -52,8 +52,8 @@ dummyOk _ = return True downloadUrl :: Key -> FilePath -> Annex Bool downloadUrl key file = do - showNote "downloading" + showNote $ "downloading" showProgress -- make way for curl progress bar liftIO $ boolSystem "curl" [Params "-# -o", File file, File url] where - url = join ":" $ drop 1 $ split ":" $ show key + url = keyName key diff --git a/Backend/WORM.hs b/Backend/WORM.hs index a0d814aa0..a011995da 100644 --- a/Backend/WORM.hs +++ b/Backend/WORM.hs @@ -10,17 +10,17 @@ module Backend.WORM (backends) where import Control.Monad.State import System.FilePath import System.Posix.Files -import System.Posix.Types import System.Directory -import Data.String.Utils +import Data.Maybe import qualified Backend.File -import BackendTypes +import BackendClass import Locations import qualified Annex import Content import Messages import Types +import Key backends :: [Backend Annex] backends = [backend] @@ -32,31 +32,25 @@ backend = Backend.File.backend { fsckKey = Backend.File.checkKey checkKeySize } --- The key is formed from the file size, modification time, and the --- basename of the filename. --- --- That allows multiple files with the same names to have different keys, --- while also allowing a file to be moved around while retaining the --- same key. +{- The key includes the file size, modification time, and the + - basename of the filename. + - + - That allows multiple files with the same names to have different keys, + - while also allowing a file to be moved around while retaining the + - same key. + -} keyValue :: FilePath -> Annex (Maybe Key) keyValue file = do stat <- liftIO $ getFileStatus file - return $ Just $ Key (name backend, key stat) - where - key stat = uniqueid stat ++ sep ++ base - uniqueid stat = show (modificationTime stat) ++ sep ++ - show (fileSize stat) - base = takeFileName file - sep = ":" - -{- Extracts the file size from a key. -} -keySize :: Key -> FileOffset -keySize key = read $ section !! 1 - where - section = split ":" (keyName key) + return $ Just $ Key { + keyName = takeFileName file, + keyBackendName = name backend, + keySize = Just $ fromIntegral $ fileSize stat, + keyMtime = Just $ modificationTime stat + } {- The size of the data for a key is checked against the size encoded in - - the key. Note that the modification time is not checked. -} + - the key's metadata. -} checkKeySize :: Key -> Annex Bool checkKeySize key = do g <- Annex.gitRepo @@ -66,7 +60,7 @@ checkKeySize key = do then return True else do s <- liftIO $ getFileStatus file - if fileSize s == keySize key + if fromIntegral (fileSize s) == fromJust (keySize key) then return True else do dest <- moveBad key diff --git a/BackendClass.hs b/BackendClass.hs new file mode 100644 index 000000000..909ae8f96 --- /dev/null +++ b/BackendClass.hs @@ -0,0 +1,39 @@ +{- git-annex key/value backend data type + - + - Most things should not need this, using Types instead + - + - Copyright 2010 Joey Hess <joey@kitenet.net> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module BackendClass where + +import Key + +data Backend a = Backend { + -- name of this backend + name :: String, + -- converts a filename to a key + getKey :: FilePath -> a (Maybe Key), + -- stores a file's contents to a key + storeFileKey :: FilePath -> Key -> a Bool, + -- retrieves a key's contents to a file + retrieveKeyFile :: Key -> FilePath -> a Bool, + -- removes a key, optionally checking that enough copies are stored + -- elsewhere + removeKey :: Key -> Maybe Int -> a Bool, + -- checks if a backend is storing the content of a key + hasKey :: Key -> a Bool, + -- called during fsck to check a key + -- (second parameter may be the filename associated with it) + -- (third parameter may be the number of copies that there should + -- be of the key) + fsckKey :: Key -> Maybe FilePath -> Maybe Int -> a Bool +} + +instance Show (Backend a) where + show backend = "Backend { name =\"" ++ name backend ++ "\" }" + +instance Eq (Backend a) where + a == b = name a == name b diff --git a/BackendTypes.hs b/BackendTypes.hs deleted file mode 100644 index c0705a550..000000000 --- a/BackendTypes.hs +++ /dev/null @@ -1,79 +0,0 @@ -{- git-annex key/value backend data types - - - - Most things should not need this, using Types instead - - - - Copyright 2010 Joey Hess <joey@kitenet.net> - - - - Licensed under the GNU GPL version 3 or higher. - -} - -module BackendTypes where - -import Data.String.Utils -import Test.QuickCheck - -type KeyName = String -type BackendName = String -newtype Key = Key (BackendName, KeyName) deriving (Eq, Ord) - -data Backend a = Backend { - -- name of this backend - name :: String, - -- converts a filename to a key - getKey :: FilePath -> a (Maybe Key), - -- stores a file's contents to a key - storeFileKey :: FilePath -> Key -> a Bool, - -- retrieves a key's contents to a file - retrieveKeyFile :: Key -> FilePath -> a Bool, - -- removes a key, optionally checking that enough copies are stored - -- elsewhere - removeKey :: Key -> Maybe Int -> a Bool, - -- checks if a backend is storing the content of a key - hasKey :: Key -> a Bool, - -- called during fsck to check a key - -- (second parameter may be the filename associated with it) - -- (third parameter may be the number of copies that there should - -- be of the key) - fsckKey :: Key -> Maybe FilePath -> Maybe Int -> a Bool -} - -instance Show (Backend a) where - show backend = "Backend { name =\"" ++ name backend ++ "\" }" - -instance Eq (Backend a) where - a == b = name a == name b - --- accessors for the parts of a key -keyName :: Key -> KeyName -keyName (Key (_,k)) = k -backendName :: Key -> BackendName -backendName (Key (b,_)) = b - --- constructs a key in a backend -genKey :: Backend a -> KeyName -> Key -genKey b f = Key (name b,f) - --- show a key to convert it to a string; the string includes the --- name of the backend to avoid collisions between key strings -instance Show Key where - show (Key (b, k)) = b ++ ":" ++ k - -instance Read Key where - readsPrec _ s = [(Key (b,k), "")] - where - l = split ":" s - b = if null l then "" else head l - k = join ":" $ drop 1 l - --- for quickcheck -instance Arbitrary Key where - arbitrary = do - backendname <- arbitrary - keyname <- arbitrary - return $ Key (backendname, keyname) - -prop_idempotent_key_read_show :: Key -> Bool -prop_idempotent_key_read_show k - -- backend names will never contain colons - | ':' `elem` (backendName k) = True - | otherwise = k == (read $ show k) diff --git a/Command.hs b/Command.hs index eba7f2cef..41ad884a9 100644 --- a/Command.hs +++ b/Command.hs @@ -22,6 +22,7 @@ import qualified Annex import qualified GitRepo as Git import Locations import Utility +import Key {- A command runs in four stages. - @@ -45,6 +46,8 @@ type CommandCleanup = Annex Bool - functions. -} type CommandSeekStrings = CommandStartString -> CommandSeek type CommandStartString = String -> CommandStart +type CommandSeekKeys = CommandStartKey -> CommandSeek +type CommandStartKey = Key -> CommandStart type BackendFile = (FilePath, Maybe (Backend Annex)) type CommandSeekBackendFiles = CommandStartBackendFile -> CommandSeek type CommandStartBackendFile = BackendFile -> CommandStart @@ -166,8 +169,12 @@ withFilesUnlocked' typechanged a params = do map (\f -> Git.workTree repo ++ "/" ++ f) typechangedfiles unlockedfiles' <- filterFiles unlockedfiles backendPairs a unlockedfiles' -withKeys :: CommandSeekStrings -withKeys a params = return $ map a params +withKeys :: CommandSeekKeys +withKeys a params = return $ map a $ map parse params + where + parse p = case readKey p of + Just k -> k + Nothing -> error "bad key" withTempFile :: CommandSeekStrings withTempFile a params = return $ map a params withNothing :: CommandSeekNothing @@ -228,17 +235,18 @@ paramName = "NAME" paramNothing :: String paramNothing = "" -{- The Key specified by the --key and --backend parameters. -} +{- The Key specified by the --key parameter. -} cmdlineKey :: Annex Key cmdlineKey = do k <- Annex.getState Annex.defaultkey - backends <- Backend.list - return $ genKey (head backends) (keyname' k) + case k of + Nothing -> nokey + Just "" -> nokey + Just kstring -> case readKey kstring of + Nothing -> error "bad key" + Just key -> return key where - keyname' Nothing = badkey - keyname' (Just "") = badkey - keyname' (Just n) = n - badkey = error "please specify the key with --key" + nokey = error "please specify the key with --key" {- Given an original list of files, and an expanded list derived from it, - ensures that the original list's ordering is preserved. diff --git a/Command/DropKey.hs b/Command/DropKey.hs index 8c7566df8..b3cc60961 100644 --- a/Command/DropKey.hs +++ b/Command/DropKey.hs @@ -9,7 +9,6 @@ module Command.DropKey where import Command import qualified Annex -import qualified Backend import LocationLog import Types import Content @@ -22,11 +21,8 @@ command = [Command "dropkey" (paramRepeating paramKey) seek seek :: [CommandSeek] seek = [withKeys start] -{- Drops cached content for a key. -} -start :: CommandStartString -start keyname = do - backends <- Backend.list - let key = genKey (head backends) keyname +start :: CommandStartKey +start key = do present <- inAnnex key force <- Annex.getState Annex.force if not present @@ -34,7 +30,7 @@ start keyname = do else if not force then error "dropkey is can cause data loss; use --force if you're sure you want to do this" else do - showStart "dropkey" keyname + showStart "dropkey" (show key) return $ Just $ perform key perform :: Key -> CommandPerform diff --git a/Command/DropUnused.hs b/Command/DropUnused.hs index 594564cb7..8ed61ba65 100644 --- a/Command/DropUnused.hs +++ b/Command/DropUnused.hs @@ -11,6 +11,7 @@ import Control.Monad (when) import Control.Monad.State (liftIO) import qualified Data.Map as M import System.Directory +import Data.Maybe import Command import Types @@ -19,6 +20,7 @@ import Locations import qualified Annex import qualified Command.Drop import Backend +import Key command :: [Command] command = [Command "dropunused" (paramRepeating paramNumber) seek @@ -55,7 +57,6 @@ readUnusedLog = do return $ M.fromList $ map parse $ lines l else return $ M.empty where - parse line = (head ws, tokey $ unwords $ tail ws) + parse line = (head ws, fromJust $ readKey $ unwords $ tail ws) where ws = words line - tokey s = read s :: Key diff --git a/Command/FromKey.hs b/Command/FromKey.hs index 717d528bc..176d2cd54 100644 --- a/Command/FromKey.hs +++ b/Command/FromKey.hs @@ -16,9 +16,9 @@ import Command import qualified Annex import Utility import qualified Backend -import Types import Content import Messages +import Key command :: [Command] command = [Command "fromkey" paramPath seek diff --git a/Command/InAnnex.hs b/Command/InAnnex.hs index 68ac9a2c6..fa81fc9a4 100644 --- a/Command/InAnnex.hs +++ b/Command/InAnnex.hs @@ -11,9 +11,7 @@ import Control.Monad.State (liftIO) import System.Exit import Command -import Types import Content -import qualified Backend command :: [Command] command = [Command "inannex" (paramRepeating paramKey) seek @@ -22,10 +20,8 @@ command = [Command "inannex" (paramRepeating paramKey) seek seek :: [CommandSeek] seek = [withKeys start] -start :: CommandStartString -start keyname = do - backends <- Backend.list - let key = genKey (head backends) keyname +start :: CommandStartKey +start key = do present <- inAnnex key if present then return Nothing diff --git a/Command/Move.hs b/Command/Move.hs index 3774ccbe9..2d6c973fe 100644 --- a/Command/Move.hs +++ b/Command/Move.hs @@ -20,7 +20,7 @@ import qualified Remotes import UUID import Messages import Utility - + command :: [Command] command = [Command "move" paramPath seek "move content of files to/from another repository"] @@ -136,8 +136,7 @@ fromCleanup :: Git.Repo -> Bool -> Key -> CommandCleanup fromCleanup src True key = do ok <- Remotes.onRemote src (boolSystem, False) "dropkey" [ Params "--quiet --force" - , Param $ "--backend=" ++ backendName key - , Param $ keyName key + , Param $ show key ] -- better safe than sorry: assume the src dropped the key -- even if it seemed to fail; the failure could have occurred diff --git a/Command/RecvKey.hs b/Command/RecvKey.hs index 8a9673050..c7c37d1e3 100644 --- a/Command/RecvKey.hs +++ b/Command/RecvKey.hs @@ -12,10 +12,8 @@ import Control.Monad.State (liftIO) import System.Exit import Command -import Types import CmdLine import Content -import qualified Backend import RsyncFile command :: [Command] @@ -25,10 +23,8 @@ command = [Command "recvkey" paramKey seek seek :: [CommandSeek] seek = [withKeys start] -start :: CommandStartString -start keyname = do - backends <- Backend.list - let key = genKey (head backends) keyname +start :: CommandStartKey +start key = do present <- inAnnex key when present $ error "key is already present in annex" diff --git a/Command/SendKey.hs b/Command/SendKey.hs index cb883b53a..56974bda9 100644 --- a/Command/SendKey.hs +++ b/Command/SendKey.hs @@ -14,9 +14,7 @@ import System.Exit import Locations import qualified Annex import Command -import Types import Content -import qualified Backend import RsyncFile command :: [Command] @@ -26,10 +24,8 @@ command = [Command "sendkey" paramKey seek seek :: [CommandSeek] seek = [withKeys start] -start :: CommandStartString -start keyname = do - backends <- Backend.list - let key = genKey (head backends) keyname +start :: CommandStartKey +start key = do present <- inAnnex key g <- Annex.gitRepo let file = gitAnnexLocation g key diff --git a/Command/Unused.hs b/Command/Unused.hs index a614ce5d9..52e483d87 100644 --- a/Command/Unused.hs +++ b/Command/Unused.hs @@ -126,4 +126,4 @@ tmpKeys = do contents <- liftIO $ getDirectoryContents tmp files <- liftIO $ filterM doesFileExist $ map (tmp </>) contents - return $ map (fileKey . takeFileName) files + return $ catMaybes $ map (fileKey . takeFileName) files diff --git a/Content.hs b/Content.hs index dc675389f..1a5a80a9f 100644 --- a/Content.hs +++ b/Content.hs @@ -26,6 +26,7 @@ import System.Path import Control.Monad (when, unless, filterM) import System.Posix.Files import System.FilePath +import Data.Maybe import Types import Locations @@ -162,7 +163,7 @@ getKeysPresent' dir = do else do contents <- liftIO $ getDirectoryContents dir files <- liftIO $ filterM present contents - return $ map fileKey files + return $ catMaybes $ map fileKey files where present d = do result <- try $ @@ -0,0 +1,87 @@ +{- git-annex Key data type + - + - Copyright 2011 Joey Hess <joey@kitenet.net> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Key ( + Key(..), + stubKey, + readKey, + + prop_idempotent_key_read_show +) where + +import Test.QuickCheck +import Utility +import System.Posix.Types + +{- A Key has a unique name, is associated with a key/value backend, + - and may contain other optional metadata. -} +data Key = Key { + keyName :: String, + keyBackendName :: String, + keySize :: Maybe Integer, + keyMtime :: Maybe EpochTime +} deriving (Eq, Ord) + +stubKey :: Key +stubKey = Key { + keyName = "", + keyBackendName = "", + keySize = Nothing, + keyMtime = Nothing +} + +fieldSep :: Char +fieldSep = '-' + +{- Keys show as strings that are suitable for use as filenames. + - The name field is always shown last, separated by doubled fieldSeps, + - and is the only field allowed to contain the fieldSep. -} +instance Show Key where + show Key { keyBackendName = b, keySize = s, keyMtime = m, keyName = n } = + b +++ ('s' ?: s) +++ ('m' ?: m) +++ (fieldSep : n) + where + "" +++ y = y + x +++ "" = x + x +++ y = x ++ fieldSep:y + c ?: (Just v) = c:(show v) + _ ?: _ = "" + +readKey :: String -> Maybe Key +readKey s = if key == Just stubKey then Nothing else key + where + key = startbackend stubKey s + + startbackend k v = sepfield k v addbackend + + sepfield k v a = case span (/= fieldSep) v of + (v', _:r) -> findfields r $ a k v' + _ -> Nothing + + findfields (c:v) (Just k) + | c == fieldSep = Just $ k { keyName = v } + | otherwise = sepfield k v $ addfield c + findfields _ v = v + + addbackend k v = Just k { keyBackendName = v } + addfield 's' k v = Just k { keySize = readMaybe v } + addfield 'm' k v = Just k { keyMtime = readMaybe v } + addfield _ _ _ = Nothing + +-- for quickcheck +instance Arbitrary Key where + arbitrary = do + n <- arbitrary + b <- elements ['A'..'Z'] + return $ Key { + keyName = n, + keyBackendName = [b], + keySize = Nothing, + keyMtime = Nothing + } + +prop_idempotent_key_read_show :: Key -> Bool +prop_idempotent_key_read_show k = Just k == (readKey $ show k) diff --git a/LocationLog.hs b/LocationLog.hs index f778df386..a939af825 100644 --- a/LocationLog.hs +++ b/LocationLog.hs @@ -123,11 +123,6 @@ logNow s u = do now <- getPOSIXTime return $ LogLine now s u -{- Returns the filename of the log file for a given key. -} -logFile :: Git.Repo -> Key -> String -logFile repo key = - gitStateDir repo ++ keyFile key ++ ".log" - {- Returns a list of repository UUIDs that, according to the log, have - the value of a key. -} keyLocations :: Git.Repo -> Key -> IO [UUID] diff --git a/Locations.hs b/Locations.hs index 908d5b74e..b2d31a1bf 100644 --- a/Locations.hs +++ b/Locations.hs @@ -19,6 +19,7 @@ module Locations ( gitAnnexBadDir, gitAnnexUnusedLog, isLinkToAnnex, + logFile, prop_idempotent_fileKey ) where @@ -26,8 +27,12 @@ module Locations ( import System.FilePath import Data.String.Utils import Data.List +import Bits +import Word +import Data.Hash.MD5 import Types +import Key import qualified GitRepo as Git {- Conventions: @@ -62,7 +67,7 @@ objectDir = addTrailingPathSeparator $ annexDir </> "objects" {- Annexed file's location relative to the .git directory. -} annexLocation :: Key -> FilePath -annexLocation key = objectDir </> f </> f +annexLocation key = objectDir </> hashDir key </> f </> f where f = keyFile key @@ -105,6 +110,11 @@ gitAnnexUnusedLog r = gitAnnexDir r </> "unused" isLinkToAnnex :: FilePath -> Bool isLinkToAnnex s = ("/.git/" ++ objectDir) `isInfixOf` s +{- The filename of the log file for a given key. -} +logFile :: Git.Repo -> Key -> String +logFile repo key = + gitStateDir repo ++ hashDir key ++ keyFile key ++ ".log" + {- Converts a key into a filename fragment. - - Escape "/" in the key name, to keep a flat tree of files and avoid @@ -114,17 +124,49 @@ isLinkToAnnex s = ("/.git/" ++ objectDir) `isInfixOf` s - a slash - "%" is escaped to "&s", and "&" to "&a"; this ensures that the mapping - is one to one. + - ":" is escaped to "&c", because despite it being 2011, people still care + - about FAT. - -} keyFile :: Key -> FilePath -keyFile key = replace "/" "%" $ replace "%" "&s" $ replace "&" "&a" $ show key +keyFile key = replace "/" "%" $ replace ":" "&c" $ + replace "%" "&s" $ replace "&" "&a" $ show key {- Reverses keyFile, converting a filename fragment (ie, the basename of - the symlink target) into a key. -} -fileKey :: FilePath -> Key -fileKey file = read $ - replace "&a" "&" $ replace "&s" "%" $ replace "%" "/" file +fileKey :: FilePath -> Maybe Key +fileKey file = readKey $ + replace "&a" "&" $ replace "&s" "%" $ + replace "&c" ":" $ replace "%" "/" file {- for quickcheck -} prop_idempotent_fileKey :: String -> Bool -prop_idempotent_fileKey s = k == fileKey (keyFile k) - where k = read $ "test:" ++ s +prop_idempotent_fileKey s = Just k == fileKey (keyFile k) + where k = stubKey { keyName = s, keyBackendName = "test" } + +{- Given a key, generates a short directory name to put it in, + - to do hashing to protect against filesystems that dislike having + - many items in a single directory. -} +hashDir :: Key -> FilePath +hashDir k = addTrailingPathSeparator $ take 2 dir </> drop 2 dir + where + dir = take 4 $ abcd_to_dir $ md5 $ Str $ show k + +abcd_to_dir :: ABCD -> String +abcd_to_dir (ABCD (a,b,c,d)) = concat $ map display_32bits_as_dir [a,b,c,d] + +{- modified version of display_32bits_as_hex from Data.Hash.MD5 + - Copyright (C) 2001 Ian Lynagh + - License: Either BSD or GPL + -} +display_32bits_as_dir :: Word32 -> String +display_32bits_as_dir w = trim $ swap_pairs cs + where + -- Need 32 characters to use. To avoid inaverdently making + -- a real word, use the alphabet without vowels. + chars = ['0'..'9'] ++ "bcdfghjklnmpqrstvwxyzZ" + cs = map (\x -> getc $ (shiftR w (6*x)) .&. 31) [0..7] + getc n = chars !! (fromIntegral n) + swap_pairs (x1:x2:xs) = x2:x1:swap_pairs xs + swap_pairs _ = [] + -- Last 2 will always be 00, so omit. + trim s = take 6 s @@ -13,6 +13,7 @@ SysConfig.hs: configure.hs TestConfig.hs Touch.hs: Touch.hsc hsc2hs $< + perl -i -pe 's/^{-# INCLUDE.*//' $@ $(bins): SysConfig.hs Touch.hs $(GHCMAKE) $@ diff --git a/Remotes.hs b/Remotes.hs index 3c9db314c..8b760ac95 100644 --- a/Remotes.hs +++ b/Remotes.hs @@ -153,7 +153,7 @@ inAnnex r key = if Git.repoIsUrl r checkremote = do showNote ("checking " ++ Git.repoDescribe r ++ "...") inannex <- onRemote r (boolSystem, False) "inannex" - [Param ("--backend=" ++ backendName key), Param (keyName key)] + [Param (show key)] return $ Right inannex {- Cost Ordered list of remotes. -} @@ -272,8 +272,7 @@ rsyncParams :: Git.Repo -> Bool -> Key -> FilePath -> Annex [CommandParam] rsyncParams r sending key file = do Just (shellcmd, shellparams) <- git_annex_shell r (if sending then "sendkey" else "recvkey") - [ Param $ "--backend=" ++ backendName key - , Param $ keyName key + [ Param $ show key -- Command is terminated with "--", because -- rsync will tack on its own options afterwards, -- and they need to be ignored. @@ -8,11 +8,9 @@ module Types ( Annex, Backend, - Key, - genKey, - backendName, - keyName + Key ) where -import BackendTypes +import BackendClass import Annex +import Key diff --git a/Upgrade.hs b/Upgrade.hs index 3c16bcc86..eba75bf58 100644 --- a/Upgrade.hs +++ b/Upgrade.hs @@ -13,6 +13,7 @@ import Control.Monad.State (liftIO) import Control.Monad (filterM, forM_) import System.Posix.Files import System.FilePath +import Data.Maybe import Content import Types @@ -30,10 +31,16 @@ upgrade = do version <- getVersion case version of Just "0" -> upgradeFrom0 + Just "1" -> upgradeFrom1 Nothing -> return True -- repo not initted yet, no version Just v | v == currentVersion -> return True Just _ -> error "this version of git-annex is too old for this git repository!" +upgradeFrom1 :: Annex Bool +upgradeFrom1 = do + showSideAction "Upgrading object directory layout..." + error "upgradeFrom1 TODO FIXME" + upgradeFrom0 :: Annex Bool upgradeFrom0 = do showSideAction "Upgrading object directory layout..." @@ -74,7 +81,7 @@ getKeysPresent0' dir = do else do contents <- liftIO $ getDirectoryContents dir files <- liftIO $ filterM present contents - return $ map fileKey files + return $ catMaybes $ map fileKey files where present d = do result <- try $ diff --git a/Version.hs b/Version.hs index 9e31d3c9e..7fdbd1a49 100644 --- a/Version.hs +++ b/Version.hs @@ -16,7 +16,7 @@ import qualified GitRepo as Git import Locations currentVersion :: String -currentVersion = "1" +currentVersion = "2" versionField :: String versionField = "annex.version" diff --git a/debian/changelog b/debian/changelog index e7017a26d..ac7c854ff 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,5 +1,15 @@ git-annex (0.24) UNRELEASED; urgency=low + * Reorganized annexed object store. annex.version=2 + * Colons are now avoided in filenames, so bare clones of git repos + can be put on USB thumb drives formatted with vFAT or similar + filesystems. + * Added two levels of hashing to object directory and .git-annex logs, + to improve scalability with enormous numbers of annexed + objects. (With one hundred million annexed objects, each + directory would contain fewer than 1024 files.) + * The setkey, fromkey, and dropkey subcommands have changed how + the key is specified. --backend is no longer used with these. * Add Suggests on graphviz. Closes: #618039 * When adding files to the annex, the symlinks pointing at the annexed content are made to have the same mtime as the original file. diff --git a/doc/bugs/fat_support.mdwn b/doc/bugs/fat_support.mdwn index 2c6c97385..60633c29b 100644 --- a/doc/bugs/fat_support.mdwn +++ b/doc/bugs/fat_support.mdwn @@ -10,3 +10,6 @@ be VFAT formatted: [[!tag wishlist]] +[[Done]]; in annex.version 2 repos, colons are entirely avoided in +filenames. So a bare git clone can be put on VFAT, and git-annex +used to move stuff --to and --from it, for sneakernet. diff --git a/doc/forum/hashing_objects_directories.mdwn b/doc/forum/hashing_objects_directories.mdwn index 715e972ca..5b7708fb5 100644 --- a/doc/forum/hashing_objects_directories.mdwn +++ b/doc/forum/hashing_objects_directories.mdwn @@ -17,3 +17,11 @@ or anything in between to a paranoid Also the use of a colon specifically breaks FAT32 ([[bugs/fat_support]]), must it be a colon or could an extra directory be used? i.e. `.git/annex/objects/SHA1/*/...` `git annex init` could also create all but the last level directory on initialization. I'm thinking `SHA1/1/1, SHA1/1/2, ..., SHA256/f/f, ..., URL/f/f, ..., WORM/f/f` + +> This is done now with a 2-level hash. It also hashes .git-annex/ log +> files which were the worse problem really. Scales to hundreds of millions +> of files with each dir having 1024 or fewer contents. Example: +> +> `me -> .git/annex/objects/71/9t/WORM-s3-m1300247299--me/WORM-s3-m1300247299--me` +> +> --[[Joey]] diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index 4998a6491..e559e8cba 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -234,11 +234,11 @@ Many git-annex commands will stage changes for later `git commit` by you. This can be used to manually set up a file to link to a specified key in the key-value backend. How you determine an existing key in the backend - varies. For the URL backend, the key is just a URL to the content. + varies. For the URL backend, the key is based on an URL to the content. Example: - git annex fromkey --backend=URL --key=http://www.archive.org/somefile somefile + git annex fromkey --key=URL--http://www.archive.org/somefile somefile * dropkey [key ...] @@ -248,24 +248,18 @@ Many git-annex commands will stage changes for later `git commit` by you. This can be used to drop content for arbitrary keys, which do not need to have a file in the git repository pointing at them. - A backend will typically need to be specified with --backend. If none - is specified, the first configured backend is used. - Example: - git annex dropkey --backend=SHA1 7da006579dd64330eb2456001fd01948430572f2 + git annex dropkey SHA1-s10-7da006579dd64330eb2456001fd01948430572f2 * setkey file This plumbing-level command sets the annexed data for a key to the content of the specified file, and then removes the file. - A backend will typically need to be specified with --backend. If none - is specified, the first configured backend is used. - Example: - git annex setkey --backend=WORM --key=1287765018:3 /tmp/file + git annex setkey --key=WORM-s3-m1287765018--file /tmp/file # OPTIONS @@ -302,7 +296,10 @@ Many git-annex commands will stage changes for later `git commit` by you. * --backend=name - Specifies which key-value backend to use. + Specifies which key-value backend to use. This can be used when + adding a file to the annex, or migrating a file. Once files + are in the annex, their backend is known and this option is not + necessary. * --key=name diff --git a/doc/internals.mdwn b/doc/internals.mdwn index 3f680dd8f..a133320b4 100644 --- a/doc/internals.mdwn +++ b/doc/internals.mdwn @@ -2,12 +2,15 @@ In the world of git, we're not scared about internal implementation details, and sometimes we like to dive in and tweak things by hand. Here's some documentation to that end. -## `.git/annex/objects/*/*` +## `.git/annex/objects/aa/bb/*/*` This is where locally available file contents are actually stored. Files added to the annex get a symlink checked into git that points to the file content. +First there are two levels of directories used for hashing, to prevent +too many things ending up in any one directory. + Each subdirectory has the name of a key in one of the [[key-value_backends|backends]]. The file inside also has the name of the key. This two-level structure is used because it allows the write bit to be removed @@ -41,10 +44,11 @@ Example: e605dca6-446a-11e0-8b2a-002170d25c55 1 26339d22-446b-11e0-9101-002170d25c55 ? -## `.git-annex/*.log` +## `.git-annex/aa/bb/*.log` The remainder of the log files record [[location_tracking]] information -for file contents. The name of the key is the filename, and the content +for file contents. Again these are placed in two levels of subdirectories +for hashing. The name of the key is the filename, and the content consists of a timestamp, either 1 (present) or 0 (not present), and the UUID of the repository that has or lacks the file content. diff --git a/doc/walkthrough/modifying_annexed_files.mdwn b/doc/walkthrough/modifying_annexed_files.mdwn index 3ad4e82ea..f75b73a24 100644 --- a/doc/walkthrough/modifying_annexed_files.mdwn +++ b/doc/walkthrough/modifying_annexed_files.mdwn @@ -27,7 +27,7 @@ and this symlink is what gets committed to git in the end. add my_cool_big_file ok [master 64cda67] changed an annexed file 2 files changed, 2 insertions(+), 1 deletions(-) - create mode 100644 .git-annex/WORM:1289672605:30:file.log + create mode 100644 .git-annex/WORM-s30-m1289672605--file.log There is one problem with using `git commit` like this: Git wants to first stage the entire contents of the file in its index. That can be slow for diff --git a/doc/walkthrough/moving_file_content_between_repositories.mdwn b/doc/walkthrough/moving_file_content_between_repositories.mdwn index d7150f109..6b3e3f4e8 100644 --- a/doc/walkthrough/moving_file_content_between_repositories.mdwn +++ b/doc/walkthrough/moving_file_content_between_repositories.mdwn @@ -9,5 +9,5 @@ makes it very easy. move my_cool_big_file (moving to usbdrive...) ok # git annex move video/hackity_hack_and_kaxxt.mov --from fileserver move video/hackity_hack_and_kaxxt.mov (moving from fileserver...) - WORM:1274316523:86050597:hackity_hack_and_kax 100% 82MB 199.1KB/s 07:02 + WORM-s86050597-m1274316523--hackity_hack_and_kax 100% 82MB 199.1KB/s 07:02 ok diff --git a/doc/walkthrough/unused_data.mdwn b/doc/walkthrough/unused_data.mdwn index 69a581fe1..9be32577c 100644 --- a/doc/walkthrough/unused_data.mdwn +++ b/doc/walkthrough/unused_data.mdwn @@ -12,8 +12,8 @@ eliminate it to save space. unused (checking for unused data...) Some annexed data is no longer pointed to by any files in the repository. NUMBER KEY - 1 WORM:1289672605:3:file - 2 WORM:1289672605:14:file + 1 WORM-s3-m1289672605--file + 2 WORM-s14-m1289672605--file (To see where data was previously used, try: git log --stat -S'KEY') (To remove unwanted data: git-annex dropunused NUMBER) ok diff --git a/doc/walkthrough/using_ssh_remotes.mdwn b/doc/walkthrough/using_ssh_remotes.mdwn index 6af9e1f47..4c2f830de 100644 --- a/doc/walkthrough/using_ssh_remotes.mdwn +++ b/doc/walkthrough/using_ssh_remotes.mdwn @@ -13,7 +13,7 @@ Now you can get files and they will be transferred (using `rsync` via `ssh`): # git annex get my_cool_big_file get my_cool_big_file (getting UUID for origin...) (copying from origin...) - WORM:1285650548:2159:my_cool_big_file 100% 2159 2.1KB/s 00:00 + WORM-s2159-m1285650548--my_cool_big_file 100% 2159 2.1KB/s 00:00 ok When you drop files, git-annex will ssh over to the remote and make diff --git a/doc/walkthrough/using_the_URL_backend.mdwn b/doc/walkthrough/using_the_URL_backend.mdwn index fe79a6be2..585fd0668 100644 --- a/doc/walkthrough/using_the_URL_backend.mdwn +++ b/doc/walkthrough/using_the_URL_backend.mdwn @@ -5,7 +5,7 @@ Another handy backend is the URL backend, which can fetch file's content from remote URLs. Here's how to set up some files in your repository that use this backend: - # git annex fromkey --backend=URL --key=http://www.archive.org/somefile somefile + # git annex fromkey --key=URL--http://www.archive.org/somefile somefile fromkey somefile ok # git commit -m "added a file from the Internet Archive" @@ -29,7 +29,7 @@ import qualified Backend import qualified GitRepo as Git import qualified Locations import qualified Utility -import qualified BackendTypes +import qualified BackendClass import qualified Types import qualified GitAnnex import qualified LocationLog @@ -38,6 +38,7 @@ import qualified Trust import qualified Remotes import qualified Content import qualified Command.DropUnused +import qualified Key main :: IO () main = do @@ -55,7 +56,7 @@ quickcheck :: Test quickcheck = TestLabel "quickcheck" $ TestList [ qctest "prop_idempotent_deencode" Git.prop_idempotent_deencode , qctest "prop_idempotent_fileKey" Locations.prop_idempotent_fileKey - , qctest "prop_idempotent_key_read_show" BackendTypes.prop_idempotent_key_read_show + , qctest "prop_idempotent_key_read_show" Key.prop_idempotent_key_read_show , qctest "prop_idempotent_shellEscape" Utility.prop_idempotent_shellEscape , qctest "prop_idempotent_shellEscape_multiword" Utility.prop_idempotent_shellEscape_multiword , qctest "prop_parentDir_basics" Utility.prop_parentDir_basics @@ -119,10 +120,10 @@ test_add = "git-annex add" ~: TestList [basic, sha1dup] test_setkey :: Test test_setkey = "git-annex setkey/fromkey" ~: TestCase $ inmainrepo $ do writeFile tmp $ content sha1annexedfile - r <- annexeval $ BackendTypes.getKey backendSHA1 tmp - let sha1 = BackendTypes.keyName $ fromJust r - git_annex "setkey" ["-q", "--backend", "SHA1", "--key", sha1, tmp] @? "setkey failed" - git_annex "fromkey" ["-q", "--backend", "SHA1", "--key", sha1, sha1annexedfile] @? "fromkey failed" + r <- annexeval $ BackendClass.getKey backendSHA1 tmp + let key = show $ fromJust r + git_annex "setkey" ["-q", "--key", key, tmp] @? "setkey failed" + git_annex "fromkey" ["-q", "--key", key, sha1annexedfile] @? "fromkey failed" Utility.boolSystem "git" [Utility.Params "commit -q -a -m commit"] @? "git commit failed" annexed_present sha1annexedfile where @@ -438,7 +439,7 @@ test_unused = "git-annex unused/dropunused" ~: intmpclonerepo $ do checkunused [annexedfilekey, sha1annexedfilekey] -- good opportunity to test dropkey also - git_annex "dropkey" ["-q", "--force", BackendTypes.keyName annexedfilekey] + git_annex "dropkey" ["-q", "--force", show annexedfilekey] @? "dropkey failed" checkunused [sha1annexedfilekey] |