diff options
49 files changed, 804 insertions, 308 deletions
@@ -16,6 +16,7 @@ module Annex ( gitRepo, queue, queueRun, + queueRunAt, setConfig, repoConfig ) where @@ -25,7 +26,7 @@ import Data.Maybe import qualified GitRepo as Git import qualified GitQueue -import qualified BackendTypes +import qualified BackendClass import Utility -- git-annex's monad @@ -34,8 +35,8 @@ type Annex = StateT AnnexState IO -- internal state storage data AnnexState = AnnexState { repo :: Git.Repo - , backends :: [BackendTypes.Backend Annex] - , supportedBackends :: [BackendTypes.Backend Annex] + , backends :: [BackendClass.Backend Annex] + , supportedBackends :: [BackendClass.Backend Annex] , repoqueue :: GitQueue.Queue , quiet :: Bool , force :: Bool @@ -47,7 +48,7 @@ data AnnexState = AnnexState , remotesread :: Bool } deriving (Show) -newState :: Git.Repo -> [BackendTypes.Backend Annex] -> AnnexState +newState :: Git.Repo -> [BackendClass.Backend Annex] -> AnnexState newState gitrepo allbackends = AnnexState { repo = gitrepo , backends = [] @@ -64,7 +65,7 @@ newState gitrepo allbackends = AnnexState } {- Create and returns an Annex state object for the specified git repo. -} -new :: Git.Repo -> [BackendTypes.Backend Annex] -> IO AnnexState +new :: Git.Repo -> [BackendClass.Backend Annex] -> IO AnnexState new gitrepo allbackends = do gitrepo' <- liftIO $ Git.configRead gitrepo return $ newState gitrepo' allbackends @@ -109,6 +110,13 @@ queueRun = do liftIO $ GitQueue.run g q put state { repoqueue = GitQueue.empty } +{- Runs the queue if the specified number of items have been queued. -} +queueRunAt :: Integer -> Annex () +queueRunAt n = do + state <- get + let q = repoqueue state + when (GitQueue.size q >= n) queueRun + {- Changes a git config setting in both internal state and .git/config -} setConfig :: String -> String -> Annex () setConfig k value = do diff --git a/Backend.hs b/Backend.hs index df23e80a3..cd14ce50e 100644 --- a/Backend.hs +++ b/Backend.hs @@ -27,7 +27,8 @@ module Backend ( lookupFile, chooseBackends, keyBackend, - lookupBackendName + lookupBackendName, + maybeLookupBackendName ) where import Control.Monad.State @@ -39,7 +40,8 @@ import Locations import qualified GitRepo as Git import qualified Annex import Types -import qualified BackendTypes as B +import Key +import qualified BackendClass as B import Messages {- List of backends in the order to try them when storing a new key. -} @@ -135,18 +137,19 @@ lookupFile file = do getsymlink = do l <- readSymbolicLink file return $ takeFileName l - makekey bs l = do + makekey bs l = + case fileKey l of + Just k -> makeret k l bs + Nothing -> return Nothing + makeret k l bs = case maybeLookupBackendName bs bname of - Nothing -> do - unless (null kname || null bname || - not (isLinkToAnnex l)) $ - warning skip - return Nothing - Just backend -> return $ Just (k, backend) + Just backend -> return $ Just (k, backend) + Nothing -> do + when (isLinkToAnnex l) $ + warning skip + return Nothing where - k = fileKey l - bname = backendName k - kname = keyName k + bname = keyBackendName k skip = "skipping " ++ file ++ " (unknown backend " ++ bname ++ ")" @@ -164,4 +167,4 @@ chooseBackends fs = do keyBackend :: Key -> Annex (Backend Annex) keyBackend key = do bs <- Annex.getState Annex.supportedBackends - return $ lookupBackendName bs $ backendName key + return $ lookupBackendName bs $ keyBackendName key diff --git a/Backend/File.hs b/Backend/File.hs index d76cd2939..a5e243199 100644 --- a/Backend/File.hs +++ b/Backend/File.hs @@ -18,7 +18,7 @@ import Control.Monad.State import System.Directory import Data.List -import BackendTypes +import BackendClass import LocationLog import Locations import qualified Remotes diff --git a/Backend/SHA.hs b/Backend/SHA.hs index 4eea890ce..056385107 100644 --- a/Backend/SHA.hs +++ b/Backend/SHA.hs @@ -13,9 +13,10 @@ import System.Cmd.Utils import System.IO import System.Directory import Data.Maybe +import System.Posix.Files import qualified Backend.File -import BackendTypes +import BackendClass import Messages import qualified Annex import Locations @@ -23,6 +24,7 @@ import Content import Types import Utility import qualified SysConfig +import Key type SHASize = Int @@ -63,11 +65,16 @@ shaN size file = do where command = "sha" ++ (show size) ++ "sum" --- A key is a checksum of its contents. +{- A key is a checksum of its contents. -} keyValue :: SHASize -> FilePath -> Annex (Maybe Key) keyValue size file = do s <- shaN size file - return $ Just $ Key (shaName size, s) + stat <- liftIO $ getFileStatus file + return $ Just $ stubKey { + keyName = s, + keyBackendName = shaName size, + keySize = Just $ fromIntegral $ fileSize stat + } -- A key's checksum is checked during fsck. checkKeyChecksum :: SHASize -> Key -> Annex Bool diff --git a/Backend/URL.hs b/Backend/URL.hs index 29dc8fefa..02ce3563c 100644 --- a/Backend/URL.hs +++ b/Backend/URL.hs @@ -8,12 +8,12 @@ module Backend.URL (backends) where import Control.Monad.State (liftIO) -import Data.String.Utils import Types -import BackendTypes +import BackendClass import Utility import Messages +import Key backends :: [Backend Annex] backends = [backend] @@ -52,8 +52,8 @@ dummyOk _ = return True downloadUrl :: Key -> FilePath -> Annex Bool downloadUrl key file = do - showNote "downloading" + showNote $ "downloading" showProgress -- make way for curl progress bar liftIO $ boolSystem "curl" [Params "-# -o", File file, File url] where - url = join ":" $ drop 1 $ split ":" $ show key + url = keyName key diff --git a/Backend/WORM.hs b/Backend/WORM.hs index a0d814aa0..a011995da 100644 --- a/Backend/WORM.hs +++ b/Backend/WORM.hs @@ -10,17 +10,17 @@ module Backend.WORM (backends) where import Control.Monad.State import System.FilePath import System.Posix.Files -import System.Posix.Types import System.Directory -import Data.String.Utils +import Data.Maybe import qualified Backend.File -import BackendTypes +import BackendClass import Locations import qualified Annex import Content import Messages import Types +import Key backends :: [Backend Annex] backends = [backend] @@ -32,31 +32,25 @@ backend = Backend.File.backend { fsckKey = Backend.File.checkKey checkKeySize } --- The key is formed from the file size, modification time, and the --- basename of the filename. --- --- That allows multiple files with the same names to have different keys, --- while also allowing a file to be moved around while retaining the --- same key. +{- The key includes the file size, modification time, and the + - basename of the filename. + - + - That allows multiple files with the same names to have different keys, + - while also allowing a file to be moved around while retaining the + - same key. + -} keyValue :: FilePath -> Annex (Maybe Key) keyValue file = do stat <- liftIO $ getFileStatus file - return $ Just $ Key (name backend, key stat) - where - key stat = uniqueid stat ++ sep ++ base - uniqueid stat = show (modificationTime stat) ++ sep ++ - show (fileSize stat) - base = takeFileName file - sep = ":" - -{- Extracts the file size from a key. -} -keySize :: Key -> FileOffset -keySize key = read $ section !! 1 - where - section = split ":" (keyName key) + return $ Just $ Key { + keyName = takeFileName file, + keyBackendName = name backend, + keySize = Just $ fromIntegral $ fileSize stat, + keyMtime = Just $ modificationTime stat + } {- The size of the data for a key is checked against the size encoded in - - the key. Note that the modification time is not checked. -} + - the key's metadata. -} checkKeySize :: Key -> Annex Bool checkKeySize key = do g <- Annex.gitRepo @@ -66,7 +60,7 @@ checkKeySize key = do then return True else do s <- liftIO $ getFileStatus file - if fileSize s == keySize key + if fromIntegral (fileSize s) == fromJust (keySize key) then return True else do dest <- moveBad key diff --git a/BackendClass.hs b/BackendClass.hs new file mode 100644 index 000000000..909ae8f96 --- /dev/null +++ b/BackendClass.hs @@ -0,0 +1,39 @@ +{- git-annex key/value backend data type + - + - Most things should not need this, using Types instead + - + - Copyright 2010 Joey Hess <joey@kitenet.net> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module BackendClass where + +import Key + +data Backend a = Backend { + -- name of this backend + name :: String, + -- converts a filename to a key + getKey :: FilePath -> a (Maybe Key), + -- stores a file's contents to a key + storeFileKey :: FilePath -> Key -> a Bool, + -- retrieves a key's contents to a file + retrieveKeyFile :: Key -> FilePath -> a Bool, + -- removes a key, optionally checking that enough copies are stored + -- elsewhere + removeKey :: Key -> Maybe Int -> a Bool, + -- checks if a backend is storing the content of a key + hasKey :: Key -> a Bool, + -- called during fsck to check a key + -- (second parameter may be the filename associated with it) + -- (third parameter may be the number of copies that there should + -- be of the key) + fsckKey :: Key -> Maybe FilePath -> Maybe Int -> a Bool +} + +instance Show (Backend a) where + show backend = "Backend { name =\"" ++ name backend ++ "\" }" + +instance Eq (Backend a) where + a == b = name a == name b diff --git a/BackendTypes.hs b/BackendTypes.hs deleted file mode 100644 index c0705a550..000000000 --- a/BackendTypes.hs +++ /dev/null @@ -1,79 +0,0 @@ -{- git-annex key/value backend data types - - - - Most things should not need this, using Types instead - - - - Copyright 2010 Joey Hess <joey@kitenet.net> - - - - Licensed under the GNU GPL version 3 or higher. - -} - -module BackendTypes where - -import Data.String.Utils -import Test.QuickCheck - -type KeyName = String -type BackendName = String -newtype Key = Key (BackendName, KeyName) deriving (Eq, Ord) - -data Backend a = Backend { - -- name of this backend - name :: String, - -- converts a filename to a key - getKey :: FilePath -> a (Maybe Key), - -- stores a file's contents to a key - storeFileKey :: FilePath -> Key -> a Bool, - -- retrieves a key's contents to a file - retrieveKeyFile :: Key -> FilePath -> a Bool, - -- removes a key, optionally checking that enough copies are stored - -- elsewhere - removeKey :: Key -> Maybe Int -> a Bool, - -- checks if a backend is storing the content of a key - hasKey :: Key -> a Bool, - -- called during fsck to check a key - -- (second parameter may be the filename associated with it) - -- (third parameter may be the number of copies that there should - -- be of the key) - fsckKey :: Key -> Maybe FilePath -> Maybe Int -> a Bool -} - -instance Show (Backend a) where - show backend = "Backend { name =\"" ++ name backend ++ "\" }" - -instance Eq (Backend a) where - a == b = name a == name b - --- accessors for the parts of a key -keyName :: Key -> KeyName -keyName (Key (_,k)) = k -backendName :: Key -> BackendName -backendName (Key (b,_)) = b - --- constructs a key in a backend -genKey :: Backend a -> KeyName -> Key -genKey b f = Key (name b,f) - --- show a key to convert it to a string; the string includes the --- name of the backend to avoid collisions between key strings -instance Show Key where - show (Key (b, k)) = b ++ ":" ++ k - -instance Read Key where - readsPrec _ s = [(Key (b,k), "")] - where - l = split ":" s - b = if null l then "" else head l - k = join ":" $ drop 1 l - --- for quickcheck -instance Arbitrary Key where - arbitrary = do - backendname <- arbitrary - keyname <- arbitrary - return $ Key (backendname, keyname) - -prop_idempotent_key_read_show :: Key -> Bool -prop_idempotent_key_read_show k - -- backend names will never contain colons - | ':' `elem` (backendName k) = True - | otherwise = k == (read $ show k) diff --git a/CmdLine.hs b/CmdLine.hs index b8fd6af7c..0698f2f5e 100644 --- a/CmdLine.hs +++ b/CmdLine.hs @@ -99,7 +99,7 @@ startup = do shutdown :: Annex Bool shutdown = do q <- Annex.getState Annex.repoqueue - unless (q == GitQueue.empty) $ do + unless (0 == GitQueue.size q) $ do showSideAction "Recording state in git..." Annex.queueRun diff --git a/Command.hs b/Command.hs index eba7f2cef..41ad884a9 100644 --- a/Command.hs +++ b/Command.hs @@ -22,6 +22,7 @@ import qualified Annex import qualified GitRepo as Git import Locations import Utility +import Key {- A command runs in four stages. - @@ -45,6 +46,8 @@ type CommandCleanup = Annex Bool - functions. -} type CommandSeekStrings = CommandStartString -> CommandSeek type CommandStartString = String -> CommandStart +type CommandSeekKeys = CommandStartKey -> CommandSeek +type CommandStartKey = Key -> CommandStart type BackendFile = (FilePath, Maybe (Backend Annex)) type CommandSeekBackendFiles = CommandStartBackendFile -> CommandSeek type CommandStartBackendFile = BackendFile -> CommandStart @@ -166,8 +169,12 @@ withFilesUnlocked' typechanged a params = do map (\f -> Git.workTree repo ++ "/" ++ f) typechangedfiles unlockedfiles' <- filterFiles unlockedfiles backendPairs a unlockedfiles' -withKeys :: CommandSeekStrings -withKeys a params = return $ map a params +withKeys :: CommandSeekKeys +withKeys a params = return $ map a $ map parse params + where + parse p = case readKey p of + Just k -> k + Nothing -> error "bad key" withTempFile :: CommandSeekStrings withTempFile a params = return $ map a params withNothing :: CommandSeekNothing @@ -228,17 +235,18 @@ paramName = "NAME" paramNothing :: String paramNothing = "" -{- The Key specified by the --key and --backend parameters. -} +{- The Key specified by the --key parameter. -} cmdlineKey :: Annex Key cmdlineKey = do k <- Annex.getState Annex.defaultkey - backends <- Backend.list - return $ genKey (head backends) (keyname' k) + case k of + Nothing -> nokey + Just "" -> nokey + Just kstring -> case readKey kstring of + Nothing -> error "bad key" + Just key -> return key where - keyname' Nothing = badkey - keyname' (Just "") = badkey - keyname' (Just n) = n - badkey = error "please specify the key with --key" + nokey = error "please specify the key with --key" {- Given an original list of files, and an expanded list derived from it, - ensures that the original list's ordering is preserved. diff --git a/Command/DropKey.hs b/Command/DropKey.hs index 8c7566df8..b3cc60961 100644 --- a/Command/DropKey.hs +++ b/Command/DropKey.hs @@ -9,7 +9,6 @@ module Command.DropKey where import Command import qualified Annex -import qualified Backend import LocationLog import Types import Content @@ -22,11 +21,8 @@ command = [Command "dropkey" (paramRepeating paramKey) seek seek :: [CommandSeek] seek = [withKeys start] -{- Drops cached content for a key. -} -start :: CommandStartString -start keyname = do - backends <- Backend.list - let key = genKey (head backends) keyname +start :: CommandStartKey +start key = do present <- inAnnex key force <- Annex.getState Annex.force if not present @@ -34,7 +30,7 @@ start keyname = do else if not force then error "dropkey is can cause data loss; use --force if you're sure you want to do this" else do - showStart "dropkey" keyname + showStart "dropkey" (show key) return $ Just $ perform key perform :: Key -> CommandPerform diff --git a/Command/DropUnused.hs b/Command/DropUnused.hs index 594564cb7..8ed61ba65 100644 --- a/Command/DropUnused.hs +++ b/Command/DropUnused.hs @@ -11,6 +11,7 @@ import Control.Monad (when) import Control.Monad.State (liftIO) import qualified Data.Map as M import System.Directory +import Data.Maybe import Command import Types @@ -19,6 +20,7 @@ import Locations import qualified Annex import qualified Command.Drop import Backend +import Key command :: [Command] command = [Command "dropunused" (paramRepeating paramNumber) seek @@ -55,7 +57,6 @@ readUnusedLog = do return $ M.fromList $ map parse $ lines l else return $ M.empty where - parse line = (head ws, tokey $ unwords $ tail ws) + parse line = (head ws, fromJust $ readKey $ unwords $ tail ws) where ws = words line - tokey s = read s :: Key diff --git a/Command/FromKey.hs b/Command/FromKey.hs index 717d528bc..176d2cd54 100644 --- a/Command/FromKey.hs +++ b/Command/FromKey.hs @@ -16,9 +16,9 @@ import Command import qualified Annex import Utility import qualified Backend -import Types import Content import Messages +import Key command :: [Command] command = [Command "fromkey" paramPath seek diff --git a/Command/InAnnex.hs b/Command/InAnnex.hs index 68ac9a2c6..fa81fc9a4 100644 --- a/Command/InAnnex.hs +++ b/Command/InAnnex.hs @@ -11,9 +11,7 @@ import Control.Monad.State (liftIO) import System.Exit import Command -import Types import Content -import qualified Backend command :: [Command] command = [Command "inannex" (paramRepeating paramKey) seek @@ -22,10 +20,8 @@ command = [Command "inannex" (paramRepeating paramKey) seek seek :: [CommandSeek] seek = [withKeys start] -start :: CommandStartString -start keyname = do - backends <- Backend.list - let key = genKey (head backends) keyname +start :: CommandStartKey +start key = do present <- inAnnex key if present then return Nothing diff --git a/Command/Init.hs b/Command/Init.hs index 661835169..d9ea394a3 100644 --- a/Command/Init.hs +++ b/Command/Init.hs @@ -8,7 +8,7 @@ module Command.Init where import Control.Monad.State (liftIO) -import Control.Monad (when) +import Control.Monad (when, unless) import System.Directory import System.FilePath @@ -74,12 +74,14 @@ gitAttributesWrite repo = do exists <- doesFileExist attributes if not exists then do - safeWriteFile attributes $ attrLine ++ "\n" + safeWriteFile attributes $ unlines attrLines commit else do content <- readFile attributes - when (all (/= attrLine) (lines content)) $ do - appendFile attributes $ attrLine ++ "\n" + let present = lines content + let missing = filter (\l -> not $ l `elem` present) attrLines + unless (null missing) $ do + appendFile attributes $ unlines missing commit where attributes = Git.attributes repo @@ -91,8 +93,11 @@ gitAttributesWrite repo = do , Param attributes ] -attrLine :: String -attrLine = stateDir </> "*.log merge=union" +attrLines :: [String] +attrLines = + [ stateDir </> "*.log merge=union" + , stateDir </> "*/*/*.log merge=union" + ] {- set up a git pre-commit hook, if one is not already present -} gitPreCommitHookWrite :: Git.Repo -> Annex () diff --git a/Command/Move.hs b/Command/Move.hs index 3774ccbe9..2d6c973fe 100644 --- a/Command/Move.hs +++ b/Command/Move.hs @@ -20,7 +20,7 @@ import qualified Remotes import UUID import Messages import Utility - + command :: [Command] command = [Command "move" paramPath seek "move content of files to/from another repository"] @@ -136,8 +136,7 @@ fromCleanup :: Git.Repo -> Bool -> Key -> CommandCleanup fromCleanup src True key = do ok <- Remotes.onRemote src (boolSystem, False) "dropkey" [ Params "--quiet --force" - , Param $ "--backend=" ++ backendName key - , Param $ keyName key + , Param $ show key ] -- better safe than sorry: assume the src dropped the key -- even if it seemed to fail; the failure could have occurred diff --git a/Command/RecvKey.hs b/Command/RecvKey.hs index 8a9673050..c7c37d1e3 100644 --- a/Command/RecvKey.hs +++ b/Command/RecvKey.hs @@ -12,10 +12,8 @@ import Control.Monad.State (liftIO) import System.Exit import Command -import Types import CmdLine import Content -import qualified Backend import RsyncFile command :: [Command] @@ -25,10 +23,8 @@ command = [Command "recvkey" paramKey seek seek :: [CommandSeek] seek = [withKeys start] -start :: CommandStartString -start keyname = do - backends <- Backend.list - let key = genKey (head backends) keyname +start :: CommandStartKey +start key = do present <- inAnnex key when present $ error "key is already present in annex" diff --git a/Command/SendKey.hs b/Command/SendKey.hs index cb883b53a..56974bda9 100644 --- a/Command/SendKey.hs +++ b/Command/SendKey.hs @@ -14,9 +14,7 @@ import System.Exit import Locations import qualified Annex import Command -import Types import Content -import qualified Backend import RsyncFile command :: [Command] @@ -26,10 +24,8 @@ command = [Command "sendkey" paramKey seek seek :: [CommandSeek] seek = [withKeys start] -start :: CommandStartString -start keyname = do - backends <- Backend.list - let key = genKey (head backends) keyname +start :: CommandStartKey +start key = do present <- inAnnex key g <- Annex.gitRepo let file = gitAnnexLocation g key diff --git a/Command/Uninit.hs b/Command/Uninit.hs index e9406ce3a..e8ac1bbd5 100644 --- a/Command/Uninit.hs +++ b/Command/Uninit.hs @@ -60,4 +60,4 @@ gitAttributesUnWrite repo = do when attrexists $ do c <- readFileStrict attributes safeWriteFile attributes $ unlines $ - filter (/= Command.Init.attrLine) $ lines c + filter (\l -> not $ l `elem` Command.Init.attrLines) $ lines c diff --git a/Command/Unused.hs b/Command/Unused.hs index a614ce5d9..52e483d87 100644 --- a/Command/Unused.hs +++ b/Command/Unused.hs @@ -126,4 +126,4 @@ tmpKeys = do contents <- liftIO $ getDirectoryContents tmp files <- liftIO $ filterM doesFileExist $ map (tmp </>) contents - return $ map (fileKey . takeFileName) files + return $ catMaybes $ map (fileKey . takeFileName) files diff --git a/Command/Upgrade.hs b/Command/Upgrade.hs new file mode 100644 index 000000000..3c9fa3eeb --- /dev/null +++ b/Command/Upgrade.hs @@ -0,0 +1,22 @@ +{- git-annex command + - + - Copyright 2011 Joey Hess <joey@kitenet.net> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Command.Upgrade where + +import Command + +command :: [Command] +command = [Command "upgrade" paramNothing seek "upgrade repository layout"] + +seek :: [CommandSeek] +seek = [withNothing start] + +start :: CommandStartNothing +start = do + -- The actual upgrading is handled by just running any command, + -- so nothing extra needs to be done. + return $ Just $ return $ Just $ return True diff --git a/Content.hs b/Content.hs index dc675389f..4bd8265c2 100644 --- a/Content.hs +++ b/Content.hs @@ -26,6 +26,7 @@ import System.Path import Control.Monad (when, unless, filterM) import System.Posix.Files import System.FilePath +import Data.Maybe import Types import Locations @@ -160,13 +161,20 @@ getKeysPresent' dir = do if (not exists) then return [] else do - contents <- liftIO $ getDirectoryContents dir - files <- liftIO $ filterM present contents - return $ map fileKey files + -- 2 levels of hashing + levela <- liftIO $ subdirContent dir + levelb <- liftIO $ mapM subdirContent levela + contents <- liftIO $ mapM subdirContent (concat levelb) + files <- liftIO $ filterM present (concat contents) + return $ catMaybes $ map (fileKey . takeFileName) files where present d = do result <- try $ - getFileStatus $ dir ++ "/" ++ d ++ "/" ++ takeFileName d + getFileStatus $ d </> takeFileName d case result of Right s -> return $ isRegularFile s Left _ -> return False + subdirContent d = do + c <- getDirectoryContents d + return $ map (d </>) $ filter notcruft c + notcruft f = f /= "." && f /= ".." diff --git a/GitAnnex.hs b/GitAnnex.hs index da91f6e74..b9c22bdfb 100644 --- a/GitAnnex.hs +++ b/GitAnnex.hs @@ -41,6 +41,7 @@ import qualified Command.Trust import qualified Command.Untrust import qualified Command.Semitrust import qualified Command.Map +import qualified Command.Upgrade cmds :: [Command] cmds = concat @@ -70,6 +71,7 @@ cmds = concat , Command.Whereis.command , Command.Migrate.command , Command.Map.command + , Command.Upgrade.command ] options :: [Option] diff --git a/GitQueue.hs b/GitQueue.hs index 07cf9f62f..097516c19 100644 --- a/GitQueue.hs +++ b/GitQueue.hs @@ -9,6 +9,7 @@ module GitQueue ( Queue, empty, add, + size, run ) where @@ -31,22 +32,28 @@ data Action = Action { {- A queue of actions to perform (in any order) on a git repository, - with lists of files to perform them on. This allows coalescing - similar git commands. -} -type Queue = M.Map Action [FilePath] +data Queue = Queue Integer (M.Map Action [FilePath]) + deriving (Show, Eq) {- Constructor for empty queue. -} empty :: Queue -empty = M.empty +empty = Queue 0 M.empty {- Adds an action to a queue. -} add :: Queue -> String -> [CommandParam] -> FilePath -> Queue -add queue subcommand params file = M.insertWith (++) action [file] queue +add (Queue n m) subcommand params file = Queue (n + 1) m' where action = Action subcommand params + m' = M.insertWith' (++) action [file] m + +{- Number of items in a queue. -} +size :: Queue -> Integer +size (Queue n _) = n {- Runs a queue on a git repository. -} run :: Git.Repo -> Queue -> IO () -run repo queue = do - forM_ (M.toList queue) $ uncurry $ runAction repo +run repo (Queue _ m) = do + forM_ (M.toList m) $ uncurry $ runAction repo return () {- Runs an Action on a list of files in a git repository. @@ -0,0 +1,87 @@ +{- git-annex Key data type + - + - Copyright 2011 Joey Hess <joey@kitenet.net> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Key ( + Key(..), + stubKey, + readKey, + + prop_idempotent_key_read_show +) where + +import Test.QuickCheck +import Utility +import System.Posix.Types + +{- A Key has a unique name, is associated with a key/value backend, + - and may contain other optional metadata. -} +data Key = Key { + keyName :: String, + keyBackendName :: String, + keySize :: Maybe Integer, + keyMtime :: Maybe EpochTime +} deriving (Eq, Ord) + +stubKey :: Key +stubKey = Key { + keyName = "", + keyBackendName = "", + keySize = Nothing, + keyMtime = Nothing +} + +fieldSep :: Char +fieldSep = '-' + +{- Keys show as strings that are suitable for use as filenames. + - The name field is always shown last, separated by doubled fieldSeps, + - and is the only field allowed to contain the fieldSep. -} +instance Show Key where + show Key { keyBackendName = b, keySize = s, keyMtime = m, keyName = n } = + b +++ ('s' ?: s) +++ ('m' ?: m) +++ (fieldSep : n) + where + "" +++ y = y + x +++ "" = x + x +++ y = x ++ fieldSep:y + c ?: (Just v) = c:(show v) + _ ?: _ = "" + +readKey :: String -> Maybe Key +readKey s = if key == Just stubKey then Nothing else key + where + key = startbackend stubKey s + + startbackend k v = sepfield k v addbackend + + sepfield k v a = case span (/= fieldSep) v of + (v', _:r) -> findfields r $ a k v' + _ -> Nothing + + findfields (c:v) (Just k) + | c == fieldSep = Just $ k { keyName = v } + | otherwise = sepfield k v $ addfield c + findfields _ v = v + + addbackend k v = Just k { keyBackendName = v } + addfield 's' k v = Just k { keySize = readMaybe v } + addfield 'm' k v = Just k { keyMtime = readMaybe v } + addfield _ _ _ = Nothing + +-- for quickcheck +instance Arbitrary Key where + arbitrary = do + n <- arbitrary + b <- elements ['A'..'Z'] + return $ Key { + keyName = n, + keyBackendName = [b], + keySize = Nothing, + keyMtime = Nothing + } + +prop_idempotent_key_read_show :: Key -> Bool +prop_idempotent_key_read_show k = Just k == (readKey $ show k) diff --git a/LocationLog.hs b/LocationLog.hs index f778df386..f1e54432c 100644 --- a/LocationLog.hs +++ b/LocationLog.hs @@ -24,6 +24,8 @@ module LocationLog ( LogStatus(..), logChange, logFile, + readLog, + writeLog, keyLocations ) where @@ -123,11 +125,6 @@ logNow s u = do now <- getPOSIXTime return $ LogLine now s u -{- Returns the filename of the log file for a given key. -} -logFile :: Git.Repo -> Key -> String -logFile repo key = - gitStateDir repo ++ keyFile key ++ ".log" - {- Returns a list of repository UUIDs that, according to the log, have - the value of a key. -} keyLocations :: Git.Repo -> Key -> IO [UUID] diff --git a/Locations.hs b/Locations.hs index 908d5b74e..3cce4c261 100644 --- a/Locations.hs +++ b/Locations.hs @@ -19,6 +19,7 @@ module Locations ( gitAnnexBadDir, gitAnnexUnusedLog, isLinkToAnnex, + logFile, prop_idempotent_fileKey ) where @@ -26,8 +27,12 @@ module Locations ( import System.FilePath import Data.String.Utils import Data.List +import Bits +import Word +import Data.Hash.MD5 import Types +import Key import qualified GitRepo as Git {- Conventions: @@ -62,7 +67,7 @@ objectDir = addTrailingPathSeparator $ annexDir </> "objects" {- Annexed file's location relative to the .git directory. -} annexLocation :: Key -> FilePath -annexLocation key = objectDir </> f </> f +annexLocation key = objectDir </> hashDir key </> f </> f where f = keyFile key @@ -105,6 +110,11 @@ gitAnnexUnusedLog r = gitAnnexDir r </> "unused" isLinkToAnnex :: FilePath -> Bool isLinkToAnnex s = ("/.git/" ++ objectDir) `isInfixOf` s +{- The filename of the log file for a given key. -} +logFile :: Git.Repo -> Key -> String +logFile repo key = + gitStateDir repo ++ hashDir key ++ keyFile key ++ ".log" + {- Converts a key into a filename fragment. - - Escape "/" in the key name, to keep a flat tree of files and avoid @@ -114,17 +124,49 @@ isLinkToAnnex s = ("/.git/" ++ objectDir) `isInfixOf` s - a slash - "%" is escaped to "&s", and "&" to "&a"; this ensures that the mapping - is one to one. + - ":" is escaped to "&c", because despite it being 2011, people still care + - about FAT. - -} keyFile :: Key -> FilePath -keyFile key = replace "/" "%" $ replace "%" "&s" $ replace "&" "&a" $ show key +keyFile key = replace "/" "%" $ replace ":" "&c" $ + replace "%" "&s" $ replace "&" "&a" $ show key {- Reverses keyFile, converting a filename fragment (ie, the basename of - the symlink target) into a key. -} -fileKey :: FilePath -> Key -fileKey file = read $ - replace "&a" "&" $ replace "&s" "%" $ replace "%" "/" file +fileKey :: FilePath -> Maybe Key +fileKey file = readKey $ + replace "&a" "&" $ replace "&s" "%" $ + replace "&c" ":" $ replace "%" "/" file {- for quickcheck -} prop_idempotent_fileKey :: String -> Bool -prop_idempotent_fileKey s = k == fileKey (keyFile k) - where k = read $ "test:" ++ s +prop_idempotent_fileKey s = Just k == fileKey (keyFile k) + where k = stubKey { keyName = s, keyBackendName = "test" } + +{- Given a key, generates a short directory name to put it in, + - to do hashing to protect against filesystems that dislike having + - many items in a single directory. -} +hashDir :: Key -> FilePath +hashDir k = addTrailingPathSeparator $ take 2 dir </> drop 2 dir + where + dir = take 4 $ abcd_to_dir $ md5 $ Str $ show k + +abcd_to_dir :: ABCD -> String +abcd_to_dir (ABCD (a,b,c,d)) = concat $ map display_32bits_as_dir [a,b,c,d] + +{- modified version of display_32bits_as_hex from Data.Hash.MD5 + - Copyright (C) 2001 Ian Lynagh + - License: Either BSD or GPL + -} +display_32bits_as_dir :: Word32 -> String +display_32bits_as_dir w = trim $ swap_pairs cs + where + -- Need 32 characters to use. To avoid inaverdently making + -- a real word, use letters that appear less frequently. + chars = ['0'..'9'] ++ "zqjxkmvwgpfZQJXKMVWGPF" + cs = map (\x -> getc $ (shiftR w (6*x)) .&. 31) [0..7] + getc n = chars !! (fromIntegral n) + swap_pairs (x1:x2:xs) = x2:x1:swap_pairs xs + swap_pairs _ = [] + -- Last 2 will always be 00, so omit. + trim s = take 6 s @@ -13,6 +13,7 @@ SysConfig.hs: configure.hs TestConfig.hs Touch.hs: Touch.hsc hsc2hs $< + perl -i -pe 's/^{-# INCLUDE.*//' $@ $(bins): SysConfig.hs Touch.hs $(GHCMAKE) $@ diff --git a/Remotes.hs b/Remotes.hs index 3c9db314c..8b760ac95 100644 --- a/Remotes.hs +++ b/Remotes.hs @@ -153,7 +153,7 @@ inAnnex r key = if Git.repoIsUrl r checkremote = do showNote ("checking " ++ Git.repoDescribe r ++ "...") inannex <- onRemote r (boolSystem, False) "inannex" - [Param ("--backend=" ++ backendName key), Param (keyName key)] + [Param (show key)] return $ Right inannex {- Cost Ordered list of remotes. -} @@ -272,8 +272,7 @@ rsyncParams :: Git.Repo -> Bool -> Key -> FilePath -> Annex [CommandParam] rsyncParams r sending key file = do Just (shellcmd, shellparams) <- git_annex_shell r (if sending then "sendkey" else "recvkey") - [ Param $ "--backend=" ++ backendName key - , Param $ keyName key + [ Param $ show key -- Command is terminated with "--", because -- rsync will tack on its own options afterwards, -- and they need to be ignored. @@ -8,11 +8,9 @@ module Types ( Annex, Backend, - Key, - genKey, - backendName, - keyName + Key ) where -import BackendTypes +import BackendClass import Annex +import Key diff --git a/Upgrade.hs b/Upgrade.hs index 3c16bcc86..76dd156f8 100644 --- a/Upgrade.hs +++ b/Upgrade.hs @@ -7,78 +7,17 @@ module Upgrade where -import System.IO.Error (try) -import System.Directory -import Control.Monad.State (liftIO) -import Control.Monad (filterM, forM_) -import System.Posix.Files -import System.FilePath - -import Content import Types -import Locations -import qualified GitRepo as Git -import qualified Annex -import qualified Backend -import Messages import Version -import Utility +import qualified Upgrade.V0 +import qualified Upgrade.V1 {- Uses the annex.version git config setting to automate upgrades. -} upgrade :: Annex Bool upgrade = do version <- getVersion case version of - Just "0" -> upgradeFrom0 - Nothing -> return True -- repo not initted yet, no version - Just v | v == currentVersion -> return True - Just _ -> error "this version of git-annex is too old for this git repository!" - -upgradeFrom0 :: Annex Bool -upgradeFrom0 = do - showSideAction "Upgrading object directory layout..." - g <- Annex.gitRepo - - -- do the reorganisation of the files - let olddir = gitAnnexDir g - keys <- getKeysPresent0' olddir - forM_ keys $ \k -> moveAnnex k $ olddir </> keyFile k - - -- update the symlinks to the files - files <- liftIO $ Git.inRepo g [Git.workTree g] - fixlinks files - Annex.queueRun - - setVersion - - return True - - where - fixlinks [] = return () - fixlinks (f:fs) = do - r <- Backend.lookupFile f - case r of - Nothing -> return () - Just (k, _) -> do - link <- calcGitLink f k - liftIO $ removeFile f - liftIO $ createSymbolicLink link f - Annex.queue "add" [Param "--"] f - fixlinks fs - -getKeysPresent0' :: FilePath -> Annex [Key] -getKeysPresent0' dir = do - exists <- liftIO $ doesDirectoryExist dir - if (not exists) - then return [] - else do - contents <- liftIO $ getDirectoryContents dir - files <- liftIO $ filterM present contents - return $ map fileKey files - where - present d = do - result <- try $ - getFileStatus $ dir ++ "/" ++ takeFileName d - case result of - Right s -> return $ isRegularFile s - Left _ -> return False + "0" -> Upgrade.V0.upgrade + "1" -> Upgrade.V1.upgrade + v | v == currentVersion -> return True + _ -> error "this version of git-annex is too old for this git repository!" diff --git a/Upgrade/V0.hs b/Upgrade/V0.hs new file mode 100644 index 000000000..5ba305817 --- /dev/null +++ b/Upgrade/V0.hs @@ -0,0 +1,63 @@ +{- git-annex v0 -> v1 upgrade support + - + - Copyright 2010 Joey Hess <joey@kitenet.net> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Upgrade.V0 where + +import System.IO.Error (try) +import System.Directory +import Control.Monad.State (liftIO) +import Control.Monad (filterM, forM_) +import System.Posix.Files +import System.FilePath + +import Content +import Types +import Locations +import qualified Annex +import Messages +import qualified Upgrade.V1 + +upgrade :: Annex Bool +upgrade = do + showSideAction "Upgrading object directory layout v0 to v1..." + g <- Annex.gitRepo + + -- do the reorganisation of the key files + let olddir = gitAnnexDir g + keys <- getKeysPresent0 olddir + forM_ keys $ \k -> moveAnnex k $ olddir </> keyFile0 k + + -- update the symlinks to the key files + -- No longer needed here; V1.upgrade does the same thing + + -- Few people had v0 repos, so go the long way around from 0 -> 1 -> 2 + Upgrade.V1.upgrade + +-- these stayed unchanged between v0 and v1 +keyFile0 :: Key -> FilePath +keyFile0 = Upgrade.V1.keyFile1 +fileKey0 :: FilePath -> Key +fileKey0 = Upgrade.V1.fileKey1 +lookupFile0 :: FilePath -> Annex (Maybe (Key, Backend Annex)) +lookupFile0 = Upgrade.V1.lookupFile1 + +getKeysPresent0 :: FilePath -> Annex [Key] +getKeysPresent0 dir = do + exists <- liftIO $ doesDirectoryExist dir + if (not exists) + then return [] + else do + contents <- liftIO $ getDirectoryContents dir + files <- liftIO $ filterM present contents + return $ map fileKey0 files + where + present d = do + result <- try $ + getFileStatus $ dir ++ "/" ++ takeFileName d + case result of + Right s -> return $ isRegularFile s + Left _ -> return False diff --git a/Upgrade/V1.hs b/Upgrade/V1.hs new file mode 100644 index 000000000..270de5f74 --- /dev/null +++ b/Upgrade/V1.hs @@ -0,0 +1,226 @@ +{- git-annex v1 -> v2 upgrade support + - + - Copyright 2011 Joey Hess <joey@kitenet.net> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Upgrade.V1 where + +import System.IO.Error (try) +import System.Directory +import Control.Monad.State (liftIO) +import Control.Monad (filterM, forM_, unless) +import System.Posix.Files +import System.FilePath +import Data.String.Utils +import System.Posix.Types +import Data.Maybe + +import Key +import Content +import Types +import Locations +import LocationLog +import qualified Annex +import qualified GitRepo as Git +import Backend +import Messages +import Version +import Utility +import qualified Command.Init + +-- v2 adds hashing of filenames of content and location log files. +-- Key information is encoded in filenames differently, so +-- both content and location log files move around, and symlinks +-- to content need to be changed. +-- +-- When upgrading a v1 key to v2, file size metadata ought to be +-- added to the key (unless it is a WORM key, which encoded +-- mtime:size in v1). This can only be done when the file content +-- is present. Since upgrades need to happen consistently, +-- (so that two repos get changed the same way by the upgrade, and +-- will merge), that metadata cannot be added on upgrade. +-- +-- Note that file size metadata +-- will only be used for detecting situations where git-annex +-- would run out of disk space, so if some keys don't have it, +-- the impact is minor. At least initially. It could be used in the +-- future by smart auto-repo balancing code, etc. +-- +-- Anyway, since v2 plans ahead for other metadata being included +-- in keys, there should probably be a way to update a key. +-- Something similar to the migrate subcommand could be used, +-- and users could then run that at their leisure. + +upgrade :: Annex Bool +upgrade = do + showSideAction "Upgrading object directory layout v1 to v2..." + + g <- Annex.gitRepo + if Git.repoIsLocalBare g + then do + moveContent + setVersion + else do + moveContent + updateSymlinks + moveLocationLogs + + Annex.queueRun + setVersion + + -- add new line to auto-merge hashed location logs + -- this commits, so has to come after the upgrade + liftIO $ Command.Init.gitAttributesWrite g + + return True + +moveContent :: Annex () +moveContent = do + keys <- getKeysPresent1 + forM_ keys move + where + move k = do + g <- Annex.gitRepo + let f = gitAnnexObjectDir g </> keyFile1 k </> keyFile1 k + let d = parentDir f + liftIO $ allowWrite d + liftIO $ allowWrite f + moveAnnex k f + liftIO $ removeDirectory d + +updateSymlinks :: Annex () +updateSymlinks = do + g <- Annex.gitRepo + files <- liftIO $ Git.inRepo g [Git.workTree g] + forM_ files $ fixlink + where + fixlink f = do + r <- lookupFile1 f + case r of + Nothing -> return () + Just (k, _) -> do + link <- calcGitLink f k + liftIO $ removeFile f + liftIO $ createSymbolicLink link f + Annex.queue "add" [Param "--"] f + Annex.queueRunAt 10240 + +moveLocationLogs :: Annex () +moveLocationLogs = do + logkeys <- oldlocationlogs + forM_ logkeys move + where + oldlocationlogs = do + g <- Annex.gitRepo + let dir = gitStateDir g + contents <- liftIO $ getDirectoryContents dir + return $ catMaybes $ map oldlog2key contents + move (l, k) = do + g <- Annex.gitRepo + let dest = logFile g k + let dir = gitStateDir g + let f = dir </> l + liftIO $ createDirectoryIfMissing True (parentDir dest) + -- could just git mv, but this way deals with + -- log files that are not checked into git, + -- as well as merging with already upgraded + -- logs that have been pulled from elsewhere + old <- liftIO $ readLog f + new <- liftIO $ readLog dest + liftIO $ writeLog dest (old++new) + Annex.queue "add" [Param "--"] dest + Annex.queue "add" [Param "--"] f + Annex.queue "rm" [Param "--quiet", Param "-f", Param "--"] f + Annex.queueRunAt 10240 + +oldlog2key :: FilePath -> Maybe (FilePath, Key) +oldlog2key l = + let len = length l - 4 in + if drop len l == ".log" + then let k = readKey1 (take len l) in + if null (keyName k) || null (keyBackendName k) + then Nothing + else Just (l, k) + else Nothing + +-- WORM backend keys: "WORM:mtime:size:filename" +-- all the rest: "backend:key" +readKey1 :: String -> Key +readKey1 v = Key { keyName = n , keyBackendName = b, keySize = s, keyMtime = t } + where + bits = split ":" v + b = head bits + n = join ":" $ drop (if wormy then 3 else 1) bits + t = if wormy + then Just (read (bits !! 1) :: EpochTime) + else Nothing + s = if wormy + then Just (read (bits !! 2) :: Integer) + else Nothing + wormy = b == "WORM" + +showKey1 :: Key -> String +showKey1 Key { keyName = n , keyBackendName = b, keySize = s, keyMtime = t } = + join ":" $ filter (not . null) [b, showifhere t, showifhere s, n] + where + showifhere Nothing = "" + showifhere (Just v) = show v + +keyFile1 :: Key -> FilePath +keyFile1 key = replace "/" "%" $ replace "%" "&s" $ replace "&" "&a" $ showKey1 key + +fileKey1 :: FilePath -> Key +fileKey1 file = readKey1 $ + replace "&a" "&" $ replace "&s" "%" $ replace "%" "/" file + +logFile1 :: Git.Repo -> Key -> String +logFile1 repo key = gitStateDir repo ++ keyFile1 key ++ ".log" + +lookupFile1 :: FilePath -> Annex (Maybe (Key, Backend Annex)) +lookupFile1 file = do + bs <- Annex.getState Annex.supportedBackends + tl <- liftIO $ try getsymlink + case tl of + Left _ -> return Nothing + Right l -> makekey bs l + where + getsymlink = do + l <- readSymbolicLink file + return $ takeFileName l + makekey bs l = do + case maybeLookupBackendName bs bname of + Nothing -> do + unless (null kname || null bname || + not (isLinkToAnnex l)) $ + warning skip + return Nothing + Just backend -> return $ Just (k, backend) + where + k = fileKey1 l + bname = keyBackendName k + kname = keyName k + skip = "skipping " ++ file ++ + " (unknown backend " ++ bname ++ ")" + +getKeysPresent1 :: Annex [Key] +getKeysPresent1 = do + g <- Annex.gitRepo + getKeysPresent1' $ gitAnnexObjectDir g +getKeysPresent1' :: FilePath -> Annex [Key] +getKeysPresent1' dir = do + exists <- liftIO $ doesDirectoryExist dir + if (not exists) + then return [] + else do + contents <- liftIO $ getDirectoryContents dir + files <- liftIO $ filterM present contents + return $ map fileKey1 files + where + present d = do + result <- try $ + getFileStatus $ dir ++ "/" ++ d ++ "/" ++ takeFileName d + case result of + Right s -> return $ isRegularFile s + Left _ -> return False diff --git a/Version.hs b/Version.hs index 9e31d3c9e..5f414e93b 100644 --- a/Version.hs +++ b/Version.hs @@ -16,26 +16,33 @@ import qualified GitRepo as Git import Locations currentVersion :: String -currentVersion = "1" +currentVersion = "2" versionField :: String versionField = "annex.version" -getVersion :: Annex (Maybe String) +getVersion :: Annex String getVersion = do g <- Annex.gitRepo let v = Git.configGet g versionField "" if not $ null v - then return $ Just v + then return v else do -- version 0 was not recorded in .git/config; -- such a repo should have an gitAnnexDir but no - -- gitAnnexObjectDir + -- gitAnnexObjectDir. + -- + -- version 1 may not be recorded if the user + -- forgot to init. Such a repo should have a + -- gitAnnexObjectDir already. d <- liftIO $ doesDirectoryExist $ gitAnnexDir g o <- liftIO $ doesDirectoryExist $ gitAnnexObjectDir g - if d && not o - then return $ Just "0" - else return Nothing -- no version yet + case (d, o) of + (True, False) -> return "0" + (True, True) -> return "1" + _ -> do + setVersion + return currentVersion setVersion :: Annex () setVersion = Annex.setConfig versionField currentVersion diff --git a/debian/NEWS b/debian/NEWS new file mode 100644 index 000000000..df8518cef --- /dev/null +++ b/debian/NEWS @@ -0,0 +1,11 @@ +git-annex (0.20110316) experimental; urgency=low + + This version reorganises the layout of git-annex's files in your repository. + There is an upgrade process to convert a repository from the old git-annex + to this version. While git-annex will attempt to transparently handle + upgrades, you may want to drive the upgrade process by hand. + + See <http://git-annex.branchable.com/upgrades/> or + /usr/share/doc/git-annex/html/upgrades.html + + -- Joey Hess <joeyh@debian.org> Wed, 16 Mar 2011 15:49:15 -0400 diff --git a/debian/changelog b/debian/changelog index f5fc4eebe..47a914812 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,21 @@ +git-annex (0.20110316) experimental; urgency=low + + * New repository format, annex.version=2. + * The first time git-annex is run in an old format repository, it + will automatically upgrade it to the new format, staging all + necessary changes to git. Also added a "git annex upgrade" command. + * Colons are now avoided in filenames, so bare clones of git repos + can be put on USB thumb drives formatted with vFAT or similar + filesystems. + * Added two levels of hashing to object directory and .git-annex logs, + to improve scalability with enormous numbers of annexed + objects. (With one hundred million annexed objects, each + directory would contain fewer than 1024 files.) + * The setkey, fromkey, and dropkey subcommands have changed how + the key is specified. --backend is no longer used with these. + + -- Joey Hess <joeyh@debian.org> Wed, 16 Mar 2011 16:20:23 -0400 + git-annex (0.24) unstable; urgency=low Branched the 0.24 series, which will be maintained for a while to diff --git a/doc/bugs/fat_support.mdwn b/doc/bugs/fat_support.mdwn index 2c6c97385..60633c29b 100644 --- a/doc/bugs/fat_support.mdwn +++ b/doc/bugs/fat_support.mdwn @@ -10,3 +10,6 @@ be VFAT formatted: [[!tag wishlist]] +[[Done]]; in annex.version 2 repos, colons are entirely avoided in +filenames. So a bare git clone can be put on VFAT, and git-annex +used to move stuff --to and --from it, for sneakernet. diff --git a/doc/bugs/free_space_checking.mdwn b/doc/bugs/free_space_checking.mdwn index 34528a7b3..eaa3294d6 100644 --- a/doc/bugs/free_space_checking.mdwn +++ b/doc/bugs/free_space_checking.mdwn @@ -6,3 +6,13 @@ file around. * And, need a way to tell the size of a file before copying it from a remote, to check local disk space. + + As of annex.version 2, this metadata can be available for any type + of backend. Newly added files will always have file size metadata, + while files that used a SHA backend and were added before the upgrade + won't. + + So, need a migration process from eg SHA1 to SHA1+filesize. It will + find files that lack size info, and rename their keys to add the size + info. Users with old repos can run this on them, to get the missing + info recorded. diff --git a/doc/forum/hashing_objects_directories.mdwn b/doc/forum/hashing_objects_directories.mdwn index 715e972ca..5b7708fb5 100644 --- a/doc/forum/hashing_objects_directories.mdwn +++ b/doc/forum/hashing_objects_directories.mdwn @@ -17,3 +17,11 @@ or anything in between to a paranoid Also the use of a colon specifically breaks FAT32 ([[bugs/fat_support]]), must it be a colon or could an extra directory be used? i.e. `.git/annex/objects/SHA1/*/...` `git annex init` could also create all but the last level directory on initialization. I'm thinking `SHA1/1/1, SHA1/1/2, ..., SHA256/f/f, ..., URL/f/f, ..., WORM/f/f` + +> This is done now with a 2-level hash. It also hashes .git-annex/ log +> files which were the worse problem really. Scales to hundreds of millions +> of files with each dir having 1024 or fewer contents. Example: +> +> `me -> .git/annex/objects/71/9t/WORM-s3-m1300247299--me/WORM-s3-m1300247299--me` +> +> --[[Joey]] diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index 4998a6491..ee4019068 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -234,11 +234,11 @@ Many git-annex commands will stage changes for later `git commit` by you. This can be used to manually set up a file to link to a specified key in the key-value backend. How you determine an existing key in the backend - varies. For the URL backend, the key is just a URL to the content. + varies. For the URL backend, the key is based on an URL to the content. Example: - git annex fromkey --backend=URL --key=http://www.archive.org/somefile somefile + git annex fromkey --key=URL--http://www.archive.org/somefile somefile * dropkey [key ...] @@ -248,24 +248,24 @@ Many git-annex commands will stage changes for later `git commit` by you. This can be used to drop content for arbitrary keys, which do not need to have a file in the git repository pointing at them. - A backend will typically need to be specified with --backend. If none - is specified, the first configured backend is used. - Example: - git annex dropkey --backend=SHA1 7da006579dd64330eb2456001fd01948430572f2 + git annex dropkey SHA1-s10-7da006579dd64330eb2456001fd01948430572f2 * setkey file This plumbing-level command sets the annexed data for a key to the content of the specified file, and then removes the file. - A backend will typically need to be specified with --backend. If none - is specified, the first configured backend is used. - Example: - git annex setkey --backend=WORM --key=1287765018:3 /tmp/file + git annex setkey --key=WORM-s3-m1287765018--file /tmp/file + +* upgrade + + Upgrades the repository to current layout. Upgrades are done automatically + whenever a newer git annex encounters an old repository; this command + allows explcitly starting an upgrade. # OPTIONS @@ -302,7 +302,10 @@ Many git-annex commands will stage changes for later `git commit` by you. * --backend=name - Specifies which key-value backend to use. + Specifies which key-value backend to use. This can be used when + adding a file to the annex, or migrating a file. Once files + are in the annex, their backend is known and this option is not + necessary. * --key=name diff --git a/doc/internals.mdwn b/doc/internals.mdwn index 3f680dd8f..a133320b4 100644 --- a/doc/internals.mdwn +++ b/doc/internals.mdwn @@ -2,12 +2,15 @@ In the world of git, we're not scared about internal implementation details, and sometimes we like to dive in and tweak things by hand. Here's some documentation to that end. -## `.git/annex/objects/*/*` +## `.git/annex/objects/aa/bb/*/*` This is where locally available file contents are actually stored. Files added to the annex get a symlink checked into git that points to the file content. +First there are two levels of directories used for hashing, to prevent +too many things ending up in any one directory. + Each subdirectory has the name of a key in one of the [[key-value_backends|backends]]. The file inside also has the name of the key. This two-level structure is used because it allows the write bit to be removed @@ -41,10 +44,11 @@ Example: e605dca6-446a-11e0-8b2a-002170d25c55 1 26339d22-446b-11e0-9101-002170d25c55 ? -## `.git-annex/*.log` +## `.git-annex/aa/bb/*.log` The remainder of the log files record [[location_tracking]] information -for file contents. The name of the key is the filename, and the content +for file contents. Again these are placed in two levels of subdirectories +for hashing. The name of the key is the filename, and the content consists of a timestamp, either 1 (present) or 0 (not present), and the UUID of the repository that has or lacks the file content. diff --git a/doc/todo/object_dir_reorg_v2.mdwn b/doc/todo/object_dir_reorg_v2.mdwn index 1c2d2f21b..49666ddc7 100644 --- a/doc/todo/object_dir_reorg_v2.mdwn +++ b/doc/todo/object_dir_reorg_v2.mdwn @@ -19,3 +19,7 @@ all users, so this should be the *last* reorg in the forseeable future. (Probably everything after ",k" should be part of the key, even if it contains the "," separator character. Otherwise an escaping mechanism would be needed.) + +[[done]] now! + +Although [[bugs/free_space_checking]] is not quite there --[[Joey]] diff --git a/doc/upgrades.mdwn b/doc/upgrades.mdwn new file mode 100644 index 000000000..1371dc033 --- /dev/null +++ b/doc/upgrades.mdwn @@ -0,0 +1,67 @@ +Occasionally improvments are made to how git-annex stores its data, +that require an upgrade process to convert repositories made with an older +version to be used by a newer version. It's annoying, it should happen +rarely, but sometimes, it's worth it. + +There's a committment that git-annex will always support upgrades from all +past versions. After all, you may have offline drives from an earlier +git-annex, and might want to use them with a newer git-annex. + +## Upgrade process + +git-annex will automatically notice if it is run in a repository that +needs an upgrade, and perform the upgrade before running whatever it +was asked to do. Or you can use the "git annex upgrade" command to +explicitly do an upgrade. The upgrade can tend to take a while, +if you have a lot of files. + +Each clone of a repository should be individually upgraded. +Until a repository's remotes have been upgraded, git-annex +may refuse to communicate with them. + +Generally, start by upgrading one repository, and then you can commit +the changes git-annex staged during upgrade, and push them out to other +repositories. And then upgrade those other repositories. Doing it this +way avoids git-annex doing some duplicate work during the upgrade. + +The upgrade process is guaranteed to be conflict-free. Unless you +already have git conflicts in your repository or between repositories. +Upgrading a repository with conflicts is not recommended; resolve the +conflicts first before upgrading git-annex. + +Example upgrade process: + + cd localrepo + git pull + git annex upgrade + (Upgrading object directory layout v1 to v2...) + git commit -m "upgrade v1 to v2" + git push + + ssh remote + cd remoterepo + git pull + git annex upgrade + ... + +## Upgrade events, so far + +### v1 -> v2 (git-annex version 0.23 to version 0.20110316) + +Involved adding hashing to .git/annex/ and changing the names of all keys. +Symlinks changed. + +Also, hashing was added to location log files in .git-annex/. +And .gitattributes needed to have another line added to it. + +Handled transparently. + +### v0 -> v1 (git-annex version 0.03 to version 0.04) + +Involved a reogranisation of the layout of .git/annex/. Symlinks changed. + +Handled more or less transparently, although git-annex was just 2 weeks +old at the time, and had few users other than Joey. + +This upgrade is belived to still be supported, but has not been tested +lately. diff --git a/doc/walkthrough/modifying_annexed_files.mdwn b/doc/walkthrough/modifying_annexed_files.mdwn index 3ad4e82ea..f75b73a24 100644 --- a/doc/walkthrough/modifying_annexed_files.mdwn +++ b/doc/walkthrough/modifying_annexed_files.mdwn @@ -27,7 +27,7 @@ and this symlink is what gets committed to git in the end. add my_cool_big_file ok [master 64cda67] changed an annexed file 2 files changed, 2 insertions(+), 1 deletions(-) - create mode 100644 .git-annex/WORM:1289672605:30:file.log + create mode 100644 .git-annex/WORM-s30-m1289672605--file.log There is one problem with using `git commit` like this: Git wants to first stage the entire contents of the file in its index. That can be slow for diff --git a/doc/walkthrough/moving_file_content_between_repositories.mdwn b/doc/walkthrough/moving_file_content_between_repositories.mdwn index d7150f109..6b3e3f4e8 100644 --- a/doc/walkthrough/moving_file_content_between_repositories.mdwn +++ b/doc/walkthrough/moving_file_content_between_repositories.mdwn @@ -9,5 +9,5 @@ makes it very easy. move my_cool_big_file (moving to usbdrive...) ok # git annex move video/hackity_hack_and_kaxxt.mov --from fileserver move video/hackity_hack_and_kaxxt.mov (moving from fileserver...) - WORM:1274316523:86050597:hackity_hack_and_kax 100% 82MB 199.1KB/s 07:02 + WORM-s86050597-m1274316523--hackity_hack_and_kax 100% 82MB 199.1KB/s 07:02 ok diff --git a/doc/walkthrough/unused_data.mdwn b/doc/walkthrough/unused_data.mdwn index 69a581fe1..9be32577c 100644 --- a/doc/walkthrough/unused_data.mdwn +++ b/doc/walkthrough/unused_data.mdwn @@ -12,8 +12,8 @@ eliminate it to save space. unused (checking for unused data...) Some annexed data is no longer pointed to by any files in the repository. NUMBER KEY - 1 WORM:1289672605:3:file - 2 WORM:1289672605:14:file + 1 WORM-s3-m1289672605--file + 2 WORM-s14-m1289672605--file (To see where data was previously used, try: git log --stat -S'KEY') (To remove unwanted data: git-annex dropunused NUMBER) ok diff --git a/doc/walkthrough/using_ssh_remotes.mdwn b/doc/walkthrough/using_ssh_remotes.mdwn index 6af9e1f47..4c2f830de 100644 --- a/doc/walkthrough/using_ssh_remotes.mdwn +++ b/doc/walkthrough/using_ssh_remotes.mdwn @@ -13,7 +13,7 @@ Now you can get files and they will be transferred (using `rsync` via `ssh`): # git annex get my_cool_big_file get my_cool_big_file (getting UUID for origin...) (copying from origin...) - WORM:1285650548:2159:my_cool_big_file 100% 2159 2.1KB/s 00:00 + WORM-s2159-m1285650548--my_cool_big_file 100% 2159 2.1KB/s 00:00 ok When you drop files, git-annex will ssh over to the remote and make diff --git a/doc/walkthrough/using_the_URL_backend.mdwn b/doc/walkthrough/using_the_URL_backend.mdwn index fe79a6be2..585fd0668 100644 --- a/doc/walkthrough/using_the_URL_backend.mdwn +++ b/doc/walkthrough/using_the_URL_backend.mdwn @@ -5,7 +5,7 @@ Another handy backend is the URL backend, which can fetch file's content from remote URLs. Here's how to set up some files in your repository that use this backend: - # git annex fromkey --backend=URL --key=http://www.archive.org/somefile somefile + # git annex fromkey --key=URL--http://www.archive.org/somefile somefile fromkey somefile ok # git commit -m "added a file from the Internet Archive" @@ -29,7 +29,7 @@ import qualified Backend import qualified GitRepo as Git import qualified Locations import qualified Utility -import qualified BackendTypes +import qualified BackendClass import qualified Types import qualified GitAnnex import qualified LocationLog @@ -38,6 +38,7 @@ import qualified Trust import qualified Remotes import qualified Content import qualified Command.DropUnused +import qualified Key main :: IO () main = do @@ -55,7 +56,7 @@ quickcheck :: Test quickcheck = TestLabel "quickcheck" $ TestList [ qctest "prop_idempotent_deencode" Git.prop_idempotent_deencode , qctest "prop_idempotent_fileKey" Locations.prop_idempotent_fileKey - , qctest "prop_idempotent_key_read_show" BackendTypes.prop_idempotent_key_read_show + , qctest "prop_idempotent_key_read_show" Key.prop_idempotent_key_read_show , qctest "prop_idempotent_shellEscape" Utility.prop_idempotent_shellEscape , qctest "prop_idempotent_shellEscape_multiword" Utility.prop_idempotent_shellEscape_multiword , qctest "prop_parentDir_basics" Utility.prop_parentDir_basics @@ -119,10 +120,10 @@ test_add = "git-annex add" ~: TestList [basic, sha1dup] test_setkey :: Test test_setkey = "git-annex setkey/fromkey" ~: TestCase $ inmainrepo $ do writeFile tmp $ content sha1annexedfile - r <- annexeval $ BackendTypes.getKey backendSHA1 tmp - let sha1 = BackendTypes.keyName $ fromJust r - git_annex "setkey" ["-q", "--backend", "SHA1", "--key", sha1, tmp] @? "setkey failed" - git_annex "fromkey" ["-q", "--backend", "SHA1", "--key", sha1, sha1annexedfile] @? "fromkey failed" + r <- annexeval $ BackendClass.getKey backendSHA1 tmp + let key = show $ fromJust r + git_annex "setkey" ["-q", "--key", key, tmp] @? "setkey failed" + git_annex "fromkey" ["-q", "--key", key, sha1annexedfile] @? "fromkey failed" Utility.boolSystem "git" [Utility.Params "commit -q -a -m commit"] @? "git commit failed" annexed_present sha1annexedfile where @@ -438,7 +439,7 @@ test_unused = "git-annex unused/dropunused" ~: intmpclonerepo $ do checkunused [annexedfilekey, sha1annexedfilekey] -- good opportunity to test dropkey also - git_annex "dropkey" ["-q", "--force", BackendTypes.keyName annexedfilekey] + git_annex "dropkey" ["-q", "--force", show annexedfilekey] @? "dropkey failed" checkunused [sha1annexedfilekey] |