aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Annex.hs18
-rw-r--r--Backend.hs29
-rw-r--r--Backend/File.hs2
-rw-r--r--Backend/SHA.hs13
-rw-r--r--Backend/URL.hs8
-rw-r--r--Backend/WORM.hs42
-rw-r--r--BackendClass.hs39
-rw-r--r--BackendTypes.hs79
-rw-r--r--CmdLine.hs2
-rw-r--r--Command.hs26
-rw-r--r--Command/DropKey.hs10
-rw-r--r--Command/DropUnused.hs5
-rw-r--r--Command/FromKey.hs2
-rw-r--r--Command/InAnnex.hs8
-rw-r--r--Command/Init.hs17
-rw-r--r--Command/Move.hs5
-rw-r--r--Command/RecvKey.hs8
-rw-r--r--Command/SendKey.hs8
-rw-r--r--Command/Uninit.hs2
-rw-r--r--Command/Unused.hs2
-rw-r--r--Command/Upgrade.hs22
-rw-r--r--Content.hs16
-rw-r--r--GitAnnex.hs2
-rw-r--r--GitQueue.hs17
-rw-r--r--Key.hs87
-rw-r--r--LocationLog.hs7
-rw-r--r--Locations.hs56
-rw-r--r--Makefile1
-rw-r--r--Remotes.hs5
-rw-r--r--Types.hs8
-rw-r--r--Upgrade.hs73
-rw-r--r--Upgrade/V0.hs63
-rw-r--r--Upgrade/V1.hs226
-rw-r--r--Version.hs21
-rw-r--r--debian/NEWS11
-rw-r--r--debian/changelog18
-rw-r--r--doc/bugs/fat_support.mdwn3
-rw-r--r--doc/bugs/free_space_checking.mdwn10
-rw-r--r--doc/forum/hashing_objects_directories.mdwn8
-rw-r--r--doc/git-annex.mdwn25
-rw-r--r--doc/internals.mdwn10
-rw-r--r--doc/todo/object_dir_reorg_v2.mdwn4
-rw-r--r--doc/upgrades.mdwn67
-rw-r--r--doc/walkthrough/modifying_annexed_files.mdwn2
-rw-r--r--doc/walkthrough/moving_file_content_between_repositories.mdwn2
-rw-r--r--doc/walkthrough/unused_data.mdwn4
-rw-r--r--doc/walkthrough/using_ssh_remotes.mdwn2
-rw-r--r--doc/walkthrough/using_the_URL_backend.mdwn2
-rw-r--r--test.hs15
49 files changed, 804 insertions, 308 deletions
diff --git a/Annex.hs b/Annex.hs
index dd3362b29..608151d82 100644
--- a/Annex.hs
+++ b/Annex.hs
@@ -16,6 +16,7 @@ module Annex (
gitRepo,
queue,
queueRun,
+ queueRunAt,
setConfig,
repoConfig
) where
@@ -25,7 +26,7 @@ import Data.Maybe
import qualified GitRepo as Git
import qualified GitQueue
-import qualified BackendTypes
+import qualified BackendClass
import Utility
-- git-annex's monad
@@ -34,8 +35,8 @@ type Annex = StateT AnnexState IO
-- internal state storage
data AnnexState = AnnexState
{ repo :: Git.Repo
- , backends :: [BackendTypes.Backend Annex]
- , supportedBackends :: [BackendTypes.Backend Annex]
+ , backends :: [BackendClass.Backend Annex]
+ , supportedBackends :: [BackendClass.Backend Annex]
, repoqueue :: GitQueue.Queue
, quiet :: Bool
, force :: Bool
@@ -47,7 +48,7 @@ data AnnexState = AnnexState
, remotesread :: Bool
} deriving (Show)
-newState :: Git.Repo -> [BackendTypes.Backend Annex] -> AnnexState
+newState :: Git.Repo -> [BackendClass.Backend Annex] -> AnnexState
newState gitrepo allbackends = AnnexState
{ repo = gitrepo
, backends = []
@@ -64,7 +65,7 @@ newState gitrepo allbackends = AnnexState
}
{- Create and returns an Annex state object for the specified git repo. -}
-new :: Git.Repo -> [BackendTypes.Backend Annex] -> IO AnnexState
+new :: Git.Repo -> [BackendClass.Backend Annex] -> IO AnnexState
new gitrepo allbackends = do
gitrepo' <- liftIO $ Git.configRead gitrepo
return $ newState gitrepo' allbackends
@@ -109,6 +110,13 @@ queueRun = do
liftIO $ GitQueue.run g q
put state { repoqueue = GitQueue.empty }
+{- Runs the queue if the specified number of items have been queued. -}
+queueRunAt :: Integer -> Annex ()
+queueRunAt n = do
+ state <- get
+ let q = repoqueue state
+ when (GitQueue.size q >= n) queueRun
+
{- Changes a git config setting in both internal state and .git/config -}
setConfig :: String -> String -> Annex ()
setConfig k value = do
diff --git a/Backend.hs b/Backend.hs
index df23e80a3..cd14ce50e 100644
--- a/Backend.hs
+++ b/Backend.hs
@@ -27,7 +27,8 @@ module Backend (
lookupFile,
chooseBackends,
keyBackend,
- lookupBackendName
+ lookupBackendName,
+ maybeLookupBackendName
) where
import Control.Monad.State
@@ -39,7 +40,8 @@ import Locations
import qualified GitRepo as Git
import qualified Annex
import Types
-import qualified BackendTypes as B
+import Key
+import qualified BackendClass as B
import Messages
{- List of backends in the order to try them when storing a new key. -}
@@ -135,18 +137,19 @@ lookupFile file = do
getsymlink = do
l <- readSymbolicLink file
return $ takeFileName l
- makekey bs l = do
+ makekey bs l =
+ case fileKey l of
+ Just k -> makeret k l bs
+ Nothing -> return Nothing
+ makeret k l bs =
case maybeLookupBackendName bs bname of
- Nothing -> do
- unless (null kname || null bname ||
- not (isLinkToAnnex l)) $
- warning skip
- return Nothing
- Just backend -> return $ Just (k, backend)
+ Just backend -> return $ Just (k, backend)
+ Nothing -> do
+ when (isLinkToAnnex l) $
+ warning skip
+ return Nothing
where
- k = fileKey l
- bname = backendName k
- kname = keyName k
+ bname = keyBackendName k
skip = "skipping " ++ file ++
" (unknown backend " ++ bname ++ ")"
@@ -164,4 +167,4 @@ chooseBackends fs = do
keyBackend :: Key -> Annex (Backend Annex)
keyBackend key = do
bs <- Annex.getState Annex.supportedBackends
- return $ lookupBackendName bs $ backendName key
+ return $ lookupBackendName bs $ keyBackendName key
diff --git a/Backend/File.hs b/Backend/File.hs
index d76cd2939..a5e243199 100644
--- a/Backend/File.hs
+++ b/Backend/File.hs
@@ -18,7 +18,7 @@ import Control.Monad.State
import System.Directory
import Data.List
-import BackendTypes
+import BackendClass
import LocationLog
import Locations
import qualified Remotes
diff --git a/Backend/SHA.hs b/Backend/SHA.hs
index 4eea890ce..056385107 100644
--- a/Backend/SHA.hs
+++ b/Backend/SHA.hs
@@ -13,9 +13,10 @@ import System.Cmd.Utils
import System.IO
import System.Directory
import Data.Maybe
+import System.Posix.Files
import qualified Backend.File
-import BackendTypes
+import BackendClass
import Messages
import qualified Annex
import Locations
@@ -23,6 +24,7 @@ import Content
import Types
import Utility
import qualified SysConfig
+import Key
type SHASize = Int
@@ -63,11 +65,16 @@ shaN size file = do
where
command = "sha" ++ (show size) ++ "sum"
--- A key is a checksum of its contents.
+{- A key is a checksum of its contents. -}
keyValue :: SHASize -> FilePath -> Annex (Maybe Key)
keyValue size file = do
s <- shaN size file
- return $ Just $ Key (shaName size, s)
+ stat <- liftIO $ getFileStatus file
+ return $ Just $ stubKey {
+ keyName = s,
+ keyBackendName = shaName size,
+ keySize = Just $ fromIntegral $ fileSize stat
+ }
-- A key's checksum is checked during fsck.
checkKeyChecksum :: SHASize -> Key -> Annex Bool
diff --git a/Backend/URL.hs b/Backend/URL.hs
index 29dc8fefa..02ce3563c 100644
--- a/Backend/URL.hs
+++ b/Backend/URL.hs
@@ -8,12 +8,12 @@
module Backend.URL (backends) where
import Control.Monad.State (liftIO)
-import Data.String.Utils
import Types
-import BackendTypes
+import BackendClass
import Utility
import Messages
+import Key
backends :: [Backend Annex]
backends = [backend]
@@ -52,8 +52,8 @@ dummyOk _ = return True
downloadUrl :: Key -> FilePath -> Annex Bool
downloadUrl key file = do
- showNote "downloading"
+ showNote $ "downloading"
showProgress -- make way for curl progress bar
liftIO $ boolSystem "curl" [Params "-# -o", File file, File url]
where
- url = join ":" $ drop 1 $ split ":" $ show key
+ url = keyName key
diff --git a/Backend/WORM.hs b/Backend/WORM.hs
index a0d814aa0..a011995da 100644
--- a/Backend/WORM.hs
+++ b/Backend/WORM.hs
@@ -10,17 +10,17 @@ module Backend.WORM (backends) where
import Control.Monad.State
import System.FilePath
import System.Posix.Files
-import System.Posix.Types
import System.Directory
-import Data.String.Utils
+import Data.Maybe
import qualified Backend.File
-import BackendTypes
+import BackendClass
import Locations
import qualified Annex
import Content
import Messages
import Types
+import Key
backends :: [Backend Annex]
backends = [backend]
@@ -32,31 +32,25 @@ backend = Backend.File.backend {
fsckKey = Backend.File.checkKey checkKeySize
}
--- The key is formed from the file size, modification time, and the
--- basename of the filename.
---
--- That allows multiple files with the same names to have different keys,
--- while also allowing a file to be moved around while retaining the
--- same key.
+{- The key includes the file size, modification time, and the
+ - basename of the filename.
+ -
+ - That allows multiple files with the same names to have different keys,
+ - while also allowing a file to be moved around while retaining the
+ - same key.
+ -}
keyValue :: FilePath -> Annex (Maybe Key)
keyValue file = do
stat <- liftIO $ getFileStatus file
- return $ Just $ Key (name backend, key stat)
- where
- key stat = uniqueid stat ++ sep ++ base
- uniqueid stat = show (modificationTime stat) ++ sep ++
- show (fileSize stat)
- base = takeFileName file
- sep = ":"
-
-{- Extracts the file size from a key. -}
-keySize :: Key -> FileOffset
-keySize key = read $ section !! 1
- where
- section = split ":" (keyName key)
+ return $ Just $ Key {
+ keyName = takeFileName file,
+ keyBackendName = name backend,
+ keySize = Just $ fromIntegral $ fileSize stat,
+ keyMtime = Just $ modificationTime stat
+ }
{- The size of the data for a key is checked against the size encoded in
- - the key. Note that the modification time is not checked. -}
+ - the key's metadata. -}
checkKeySize :: Key -> Annex Bool
checkKeySize key = do
g <- Annex.gitRepo
@@ -66,7 +60,7 @@ checkKeySize key = do
then return True
else do
s <- liftIO $ getFileStatus file
- if fileSize s == keySize key
+ if fromIntegral (fileSize s) == fromJust (keySize key)
then return True
else do
dest <- moveBad key
diff --git a/BackendClass.hs b/BackendClass.hs
new file mode 100644
index 000000000..909ae8f96
--- /dev/null
+++ b/BackendClass.hs
@@ -0,0 +1,39 @@
+{- git-annex key/value backend data type
+ -
+ - Most things should not need this, using Types instead
+ -
+ - Copyright 2010 Joey Hess <joey@kitenet.net>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+module BackendClass where
+
+import Key
+
+data Backend a = Backend {
+ -- name of this backend
+ name :: String,
+ -- converts a filename to a key
+ getKey :: FilePath -> a (Maybe Key),
+ -- stores a file's contents to a key
+ storeFileKey :: FilePath -> Key -> a Bool,
+ -- retrieves a key's contents to a file
+ retrieveKeyFile :: Key -> FilePath -> a Bool,
+ -- removes a key, optionally checking that enough copies are stored
+ -- elsewhere
+ removeKey :: Key -> Maybe Int -> a Bool,
+ -- checks if a backend is storing the content of a key
+ hasKey :: Key -> a Bool,
+ -- called during fsck to check a key
+ -- (second parameter may be the filename associated with it)
+ -- (third parameter may be the number of copies that there should
+ -- be of the key)
+ fsckKey :: Key -> Maybe FilePath -> Maybe Int -> a Bool
+}
+
+instance Show (Backend a) where
+ show backend = "Backend { name =\"" ++ name backend ++ "\" }"
+
+instance Eq (Backend a) where
+ a == b = name a == name b
diff --git a/BackendTypes.hs b/BackendTypes.hs
deleted file mode 100644
index c0705a550..000000000
--- a/BackendTypes.hs
+++ /dev/null
@@ -1,79 +0,0 @@
-{- git-annex key/value backend data types
- -
- - Most things should not need this, using Types instead
- -
- - Copyright 2010 Joey Hess <joey@kitenet.net>
- -
- - Licensed under the GNU GPL version 3 or higher.
- -}
-
-module BackendTypes where
-
-import Data.String.Utils
-import Test.QuickCheck
-
-type KeyName = String
-type BackendName = String
-newtype Key = Key (BackendName, KeyName) deriving (Eq, Ord)
-
-data Backend a = Backend {
- -- name of this backend
- name :: String,
- -- converts a filename to a key
- getKey :: FilePath -> a (Maybe Key),
- -- stores a file's contents to a key
- storeFileKey :: FilePath -> Key -> a Bool,
- -- retrieves a key's contents to a file
- retrieveKeyFile :: Key -> FilePath -> a Bool,
- -- removes a key, optionally checking that enough copies are stored
- -- elsewhere
- removeKey :: Key -> Maybe Int -> a Bool,
- -- checks if a backend is storing the content of a key
- hasKey :: Key -> a Bool,
- -- called during fsck to check a key
- -- (second parameter may be the filename associated with it)
- -- (third parameter may be the number of copies that there should
- -- be of the key)
- fsckKey :: Key -> Maybe FilePath -> Maybe Int -> a Bool
-}
-
-instance Show (Backend a) where
- show backend = "Backend { name =\"" ++ name backend ++ "\" }"
-
-instance Eq (Backend a) where
- a == b = name a == name b
-
--- accessors for the parts of a key
-keyName :: Key -> KeyName
-keyName (Key (_,k)) = k
-backendName :: Key -> BackendName
-backendName (Key (b,_)) = b
-
--- constructs a key in a backend
-genKey :: Backend a -> KeyName -> Key
-genKey b f = Key (name b,f)
-
--- show a key to convert it to a string; the string includes the
--- name of the backend to avoid collisions between key strings
-instance Show Key where
- show (Key (b, k)) = b ++ ":" ++ k
-
-instance Read Key where
- readsPrec _ s = [(Key (b,k), "")]
- where
- l = split ":" s
- b = if null l then "" else head l
- k = join ":" $ drop 1 l
-
--- for quickcheck
-instance Arbitrary Key where
- arbitrary = do
- backendname <- arbitrary
- keyname <- arbitrary
- return $ Key (backendname, keyname)
-
-prop_idempotent_key_read_show :: Key -> Bool
-prop_idempotent_key_read_show k
- -- backend names will never contain colons
- | ':' `elem` (backendName k) = True
- | otherwise = k == (read $ show k)
diff --git a/CmdLine.hs b/CmdLine.hs
index b8fd6af7c..0698f2f5e 100644
--- a/CmdLine.hs
+++ b/CmdLine.hs
@@ -99,7 +99,7 @@ startup = do
shutdown :: Annex Bool
shutdown = do
q <- Annex.getState Annex.repoqueue
- unless (q == GitQueue.empty) $ do
+ unless (0 == GitQueue.size q) $ do
showSideAction "Recording state in git..."
Annex.queueRun
diff --git a/Command.hs b/Command.hs
index eba7f2cef..41ad884a9 100644
--- a/Command.hs
+++ b/Command.hs
@@ -22,6 +22,7 @@ import qualified Annex
import qualified GitRepo as Git
import Locations
import Utility
+import Key
{- A command runs in four stages.
-
@@ -45,6 +46,8 @@ type CommandCleanup = Annex Bool
- functions. -}
type CommandSeekStrings = CommandStartString -> CommandSeek
type CommandStartString = String -> CommandStart
+type CommandSeekKeys = CommandStartKey -> CommandSeek
+type CommandStartKey = Key -> CommandStart
type BackendFile = (FilePath, Maybe (Backend Annex))
type CommandSeekBackendFiles = CommandStartBackendFile -> CommandSeek
type CommandStartBackendFile = BackendFile -> CommandStart
@@ -166,8 +169,12 @@ withFilesUnlocked' typechanged a params = do
map (\f -> Git.workTree repo ++ "/" ++ f) typechangedfiles
unlockedfiles' <- filterFiles unlockedfiles
backendPairs a unlockedfiles'
-withKeys :: CommandSeekStrings
-withKeys a params = return $ map a params
+withKeys :: CommandSeekKeys
+withKeys a params = return $ map a $ map parse params
+ where
+ parse p = case readKey p of
+ Just k -> k
+ Nothing -> error "bad key"
withTempFile :: CommandSeekStrings
withTempFile a params = return $ map a params
withNothing :: CommandSeekNothing
@@ -228,17 +235,18 @@ paramName = "NAME"
paramNothing :: String
paramNothing = ""
-{- The Key specified by the --key and --backend parameters. -}
+{- The Key specified by the --key parameter. -}
cmdlineKey :: Annex Key
cmdlineKey = do
k <- Annex.getState Annex.defaultkey
- backends <- Backend.list
- return $ genKey (head backends) (keyname' k)
+ case k of
+ Nothing -> nokey
+ Just "" -> nokey
+ Just kstring -> case readKey kstring of
+ Nothing -> error "bad key"
+ Just key -> return key
where
- keyname' Nothing = badkey
- keyname' (Just "") = badkey
- keyname' (Just n) = n
- badkey = error "please specify the key with --key"
+ nokey = error "please specify the key with --key"
{- Given an original list of files, and an expanded list derived from it,
- ensures that the original list's ordering is preserved.
diff --git a/Command/DropKey.hs b/Command/DropKey.hs
index 8c7566df8..b3cc60961 100644
--- a/Command/DropKey.hs
+++ b/Command/DropKey.hs
@@ -9,7 +9,6 @@ module Command.DropKey where
import Command
import qualified Annex
-import qualified Backend
import LocationLog
import Types
import Content
@@ -22,11 +21,8 @@ command = [Command "dropkey" (paramRepeating paramKey) seek
seek :: [CommandSeek]
seek = [withKeys start]
-{- Drops cached content for a key. -}
-start :: CommandStartString
-start keyname = do
- backends <- Backend.list
- let key = genKey (head backends) keyname
+start :: CommandStartKey
+start key = do
present <- inAnnex key
force <- Annex.getState Annex.force
if not present
@@ -34,7 +30,7 @@ start keyname = do
else if not force
then error "dropkey is can cause data loss; use --force if you're sure you want to do this"
else do
- showStart "dropkey" keyname
+ showStart "dropkey" (show key)
return $ Just $ perform key
perform :: Key -> CommandPerform
diff --git a/Command/DropUnused.hs b/Command/DropUnused.hs
index 594564cb7..8ed61ba65 100644
--- a/Command/DropUnused.hs
+++ b/Command/DropUnused.hs
@@ -11,6 +11,7 @@ import Control.Monad (when)
import Control.Monad.State (liftIO)
import qualified Data.Map as M
import System.Directory
+import Data.Maybe
import Command
import Types
@@ -19,6 +20,7 @@ import Locations
import qualified Annex
import qualified Command.Drop
import Backend
+import Key
command :: [Command]
command = [Command "dropunused" (paramRepeating paramNumber) seek
@@ -55,7 +57,6 @@ readUnusedLog = do
return $ M.fromList $ map parse $ lines l
else return $ M.empty
where
- parse line = (head ws, tokey $ unwords $ tail ws)
+ parse line = (head ws, fromJust $ readKey $ unwords $ tail ws)
where
ws = words line
- tokey s = read s :: Key
diff --git a/Command/FromKey.hs b/Command/FromKey.hs
index 717d528bc..176d2cd54 100644
--- a/Command/FromKey.hs
+++ b/Command/FromKey.hs
@@ -16,9 +16,9 @@ import Command
import qualified Annex
import Utility
import qualified Backend
-import Types
import Content
import Messages
+import Key
command :: [Command]
command = [Command "fromkey" paramPath seek
diff --git a/Command/InAnnex.hs b/Command/InAnnex.hs
index 68ac9a2c6..fa81fc9a4 100644
--- a/Command/InAnnex.hs
+++ b/Command/InAnnex.hs
@@ -11,9 +11,7 @@ import Control.Monad.State (liftIO)
import System.Exit
import Command
-import Types
import Content
-import qualified Backend
command :: [Command]
command = [Command "inannex" (paramRepeating paramKey) seek
@@ -22,10 +20,8 @@ command = [Command "inannex" (paramRepeating paramKey) seek
seek :: [CommandSeek]
seek = [withKeys start]
-start :: CommandStartString
-start keyname = do
- backends <- Backend.list
- let key = genKey (head backends) keyname
+start :: CommandStartKey
+start key = do
present <- inAnnex key
if present
then return Nothing
diff --git a/Command/Init.hs b/Command/Init.hs
index 661835169..d9ea394a3 100644
--- a/Command/Init.hs
+++ b/Command/Init.hs
@@ -8,7 +8,7 @@
module Command.Init where
import Control.Monad.State (liftIO)
-import Control.Monad (when)
+import Control.Monad (when, unless)
import System.Directory
import System.FilePath
@@ -74,12 +74,14 @@ gitAttributesWrite repo = do
exists <- doesFileExist attributes
if not exists
then do
- safeWriteFile attributes $ attrLine ++ "\n"
+ safeWriteFile attributes $ unlines attrLines
commit
else do
content <- readFile attributes
- when (all (/= attrLine) (lines content)) $ do
- appendFile attributes $ attrLine ++ "\n"
+ let present = lines content
+ let missing = filter (\l -> not $ l `elem` present) attrLines
+ unless (null missing) $ do
+ appendFile attributes $ unlines missing
commit
where
attributes = Git.attributes repo
@@ -91,8 +93,11 @@ gitAttributesWrite repo = do
, Param attributes
]
-attrLine :: String
-attrLine = stateDir </> "*.log merge=union"
+attrLines :: [String]
+attrLines =
+ [ stateDir </> "*.log merge=union"
+ , stateDir </> "*/*/*.log merge=union"
+ ]
{- set up a git pre-commit hook, if one is not already present -}
gitPreCommitHookWrite :: Git.Repo -> Annex ()
diff --git a/Command/Move.hs b/Command/Move.hs
index 3774ccbe9..2d6c973fe 100644
--- a/Command/Move.hs
+++ b/Command/Move.hs
@@ -20,7 +20,7 @@ import qualified Remotes
import UUID
import Messages
import Utility
-
+
command :: [Command]
command = [Command "move" paramPath seek
"move content of files to/from another repository"]
@@ -136,8 +136,7 @@ fromCleanup :: Git.Repo -> Bool -> Key -> CommandCleanup
fromCleanup src True key = do
ok <- Remotes.onRemote src (boolSystem, False) "dropkey"
[ Params "--quiet --force"
- , Param $ "--backend=" ++ backendName key
- , Param $ keyName key
+ , Param $ show key
]
-- better safe than sorry: assume the src dropped the key
-- even if it seemed to fail; the failure could have occurred
diff --git a/Command/RecvKey.hs b/Command/RecvKey.hs
index 8a9673050..c7c37d1e3 100644
--- a/Command/RecvKey.hs
+++ b/Command/RecvKey.hs
@@ -12,10 +12,8 @@ import Control.Monad.State (liftIO)
import System.Exit
import Command
-import Types
import CmdLine
import Content
-import qualified Backend
import RsyncFile
command :: [Command]
@@ -25,10 +23,8 @@ command = [Command "recvkey" paramKey seek
seek :: [CommandSeek]
seek = [withKeys start]
-start :: CommandStartString
-start keyname = do
- backends <- Backend.list
- let key = genKey (head backends) keyname
+start :: CommandStartKey
+start key = do
present <- inAnnex key
when present $
error "key is already present in annex"
diff --git a/Command/SendKey.hs b/Command/SendKey.hs
index cb883b53a..56974bda9 100644
--- a/Command/SendKey.hs
+++ b/Command/SendKey.hs
@@ -14,9 +14,7 @@ import System.Exit
import Locations
import qualified Annex
import Command
-import Types
import Content
-import qualified Backend
import RsyncFile
command :: [Command]
@@ -26,10 +24,8 @@ command = [Command "sendkey" paramKey seek
seek :: [CommandSeek]
seek = [withKeys start]
-start :: CommandStartString
-start keyname = do
- backends <- Backend.list
- let key = genKey (head backends) keyname
+start :: CommandStartKey
+start key = do
present <- inAnnex key
g <- Annex.gitRepo
let file = gitAnnexLocation g key
diff --git a/Command/Uninit.hs b/Command/Uninit.hs
index e9406ce3a..e8ac1bbd5 100644
--- a/Command/Uninit.hs
+++ b/Command/Uninit.hs
@@ -60,4 +60,4 @@ gitAttributesUnWrite repo = do
when attrexists $ do
c <- readFileStrict attributes
safeWriteFile attributes $ unlines $
- filter (/= Command.Init.attrLine) $ lines c
+ filter (\l -> not $ l `elem` Command.Init.attrLines) $ lines c
diff --git a/Command/Unused.hs b/Command/Unused.hs
index a614ce5d9..52e483d87 100644
--- a/Command/Unused.hs
+++ b/Command/Unused.hs
@@ -126,4 +126,4 @@ tmpKeys = do
contents <- liftIO $ getDirectoryContents tmp
files <- liftIO $ filterM doesFileExist $
map (tmp </>) contents
- return $ map (fileKey . takeFileName) files
+ return $ catMaybes $ map (fileKey . takeFileName) files
diff --git a/Command/Upgrade.hs b/Command/Upgrade.hs
new file mode 100644
index 000000000..3c9fa3eeb
--- /dev/null
+++ b/Command/Upgrade.hs
@@ -0,0 +1,22 @@
+{- git-annex command
+ -
+ - Copyright 2011 Joey Hess <joey@kitenet.net>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+module Command.Upgrade where
+
+import Command
+
+command :: [Command]
+command = [Command "upgrade" paramNothing seek "upgrade repository layout"]
+
+seek :: [CommandSeek]
+seek = [withNothing start]
+
+start :: CommandStartNothing
+start = do
+ -- The actual upgrading is handled by just running any command,
+ -- so nothing extra needs to be done.
+ return $ Just $ return $ Just $ return True
diff --git a/Content.hs b/Content.hs
index dc675389f..4bd8265c2 100644
--- a/Content.hs
+++ b/Content.hs
@@ -26,6 +26,7 @@ import System.Path
import Control.Monad (when, unless, filterM)
import System.Posix.Files
import System.FilePath
+import Data.Maybe
import Types
import Locations
@@ -160,13 +161,20 @@ getKeysPresent' dir = do
if (not exists)
then return []
else do
- contents <- liftIO $ getDirectoryContents dir
- files <- liftIO $ filterM present contents
- return $ map fileKey files
+ -- 2 levels of hashing
+ levela <- liftIO $ subdirContent dir
+ levelb <- liftIO $ mapM subdirContent levela
+ contents <- liftIO $ mapM subdirContent (concat levelb)
+ files <- liftIO $ filterM present (concat contents)
+ return $ catMaybes $ map (fileKey . takeFileName) files
where
present d = do
result <- try $
- getFileStatus $ dir ++ "/" ++ d ++ "/" ++ takeFileName d
+ getFileStatus $ d </> takeFileName d
case result of
Right s -> return $ isRegularFile s
Left _ -> return False
+ subdirContent d = do
+ c <- getDirectoryContents d
+ return $ map (d </>) $ filter notcruft c
+ notcruft f = f /= "." && f /= ".."
diff --git a/GitAnnex.hs b/GitAnnex.hs
index da91f6e74..b9c22bdfb 100644
--- a/GitAnnex.hs
+++ b/GitAnnex.hs
@@ -41,6 +41,7 @@ import qualified Command.Trust
import qualified Command.Untrust
import qualified Command.Semitrust
import qualified Command.Map
+import qualified Command.Upgrade
cmds :: [Command]
cmds = concat
@@ -70,6 +71,7 @@ cmds = concat
, Command.Whereis.command
, Command.Migrate.command
, Command.Map.command
+ , Command.Upgrade.command
]
options :: [Option]
diff --git a/GitQueue.hs b/GitQueue.hs
index 07cf9f62f..097516c19 100644
--- a/GitQueue.hs
+++ b/GitQueue.hs
@@ -9,6 +9,7 @@ module GitQueue (
Queue,
empty,
add,
+ size,
run
) where
@@ -31,22 +32,28 @@ data Action = Action {
{- A queue of actions to perform (in any order) on a git repository,
- with lists of files to perform them on. This allows coalescing
- similar git commands. -}
-type Queue = M.Map Action [FilePath]
+data Queue = Queue Integer (M.Map Action [FilePath])
+ deriving (Show, Eq)
{- Constructor for empty queue. -}
empty :: Queue
-empty = M.empty
+empty = Queue 0 M.empty
{- Adds an action to a queue. -}
add :: Queue -> String -> [CommandParam] -> FilePath -> Queue
-add queue subcommand params file = M.insertWith (++) action [file] queue
+add (Queue n m) subcommand params file = Queue (n + 1) m'
where
action = Action subcommand params
+ m' = M.insertWith' (++) action [file] m
+
+{- Number of items in a queue. -}
+size :: Queue -> Integer
+size (Queue n _) = n
{- Runs a queue on a git repository. -}
run :: Git.Repo -> Queue -> IO ()
-run repo queue = do
- forM_ (M.toList queue) $ uncurry $ runAction repo
+run repo (Queue _ m) = do
+ forM_ (M.toList m) $ uncurry $ runAction repo
return ()
{- Runs an Action on a list of files in a git repository.
diff --git a/Key.hs b/Key.hs
new file mode 100644
index 000000000..f52aea31b
--- /dev/null
+++ b/Key.hs
@@ -0,0 +1,87 @@
+{- git-annex Key data type
+ -
+ - Copyright 2011 Joey Hess <joey@kitenet.net>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+module Key (
+ Key(..),
+ stubKey,
+ readKey,
+
+ prop_idempotent_key_read_show
+) where
+
+import Test.QuickCheck
+import Utility
+import System.Posix.Types
+
+{- A Key has a unique name, is associated with a key/value backend,
+ - and may contain other optional metadata. -}
+data Key = Key {
+ keyName :: String,
+ keyBackendName :: String,
+ keySize :: Maybe Integer,
+ keyMtime :: Maybe EpochTime
+} deriving (Eq, Ord)
+
+stubKey :: Key
+stubKey = Key {
+ keyName = "",
+ keyBackendName = "",
+ keySize = Nothing,
+ keyMtime = Nothing
+}
+
+fieldSep :: Char
+fieldSep = '-'
+
+{- Keys show as strings that are suitable for use as filenames.
+ - The name field is always shown last, separated by doubled fieldSeps,
+ - and is the only field allowed to contain the fieldSep. -}
+instance Show Key where
+ show Key { keyBackendName = b, keySize = s, keyMtime = m, keyName = n } =
+ b +++ ('s' ?: s) +++ ('m' ?: m) +++ (fieldSep : n)
+ where
+ "" +++ y = y
+ x +++ "" = x
+ x +++ y = x ++ fieldSep:y
+ c ?: (Just v) = c:(show v)
+ _ ?: _ = ""
+
+readKey :: String -> Maybe Key
+readKey s = if key == Just stubKey then Nothing else key
+ where
+ key = startbackend stubKey s
+
+ startbackend k v = sepfield k v addbackend
+
+ sepfield k v a = case span (/= fieldSep) v of
+ (v', _:r) -> findfields r $ a k v'
+ _ -> Nothing
+
+ findfields (c:v) (Just k)
+ | c == fieldSep = Just $ k { keyName = v }
+ | otherwise = sepfield k v $ addfield c
+ findfields _ v = v
+
+ addbackend k v = Just k { keyBackendName = v }
+ addfield 's' k v = Just k { keySize = readMaybe v }
+ addfield 'm' k v = Just k { keyMtime = readMaybe v }
+ addfield _ _ _ = Nothing
+
+-- for quickcheck
+instance Arbitrary Key where
+ arbitrary = do
+ n <- arbitrary
+ b <- elements ['A'..'Z']
+ return $ Key {
+ keyName = n,
+ keyBackendName = [b],
+ keySize = Nothing,
+ keyMtime = Nothing
+ }
+
+prop_idempotent_key_read_show :: Key -> Bool
+prop_idempotent_key_read_show k = Just k == (readKey $ show k)
diff --git a/LocationLog.hs b/LocationLog.hs
index f778df386..f1e54432c 100644
--- a/LocationLog.hs
+++ b/LocationLog.hs
@@ -24,6 +24,8 @@ module LocationLog (
LogStatus(..),
logChange,
logFile,
+ readLog,
+ writeLog,
keyLocations
) where
@@ -123,11 +125,6 @@ logNow s u = do
now <- getPOSIXTime
return $ LogLine now s u
-{- Returns the filename of the log file for a given key. -}
-logFile :: Git.Repo -> Key -> String
-logFile repo key =
- gitStateDir repo ++ keyFile key ++ ".log"
-
{- Returns a list of repository UUIDs that, according to the log, have
- the value of a key. -}
keyLocations :: Git.Repo -> Key -> IO [UUID]
diff --git a/Locations.hs b/Locations.hs
index 908d5b74e..3cce4c261 100644
--- a/Locations.hs
+++ b/Locations.hs
@@ -19,6 +19,7 @@ module Locations (
gitAnnexBadDir,
gitAnnexUnusedLog,
isLinkToAnnex,
+ logFile,
prop_idempotent_fileKey
) where
@@ -26,8 +27,12 @@ module Locations (
import System.FilePath
import Data.String.Utils
import Data.List
+import Bits
+import Word
+import Data.Hash.MD5
import Types
+import Key
import qualified GitRepo as Git
{- Conventions:
@@ -62,7 +67,7 @@ objectDir = addTrailingPathSeparator $ annexDir </> "objects"
{- Annexed file's location relative to the .git directory. -}
annexLocation :: Key -> FilePath
-annexLocation key = objectDir </> f </> f
+annexLocation key = objectDir </> hashDir key </> f </> f
where
f = keyFile key
@@ -105,6 +110,11 @@ gitAnnexUnusedLog r = gitAnnexDir r </> "unused"
isLinkToAnnex :: FilePath -> Bool
isLinkToAnnex s = ("/.git/" ++ objectDir) `isInfixOf` s
+{- The filename of the log file for a given key. -}
+logFile :: Git.Repo -> Key -> String
+logFile repo key =
+ gitStateDir repo ++ hashDir key ++ keyFile key ++ ".log"
+
{- Converts a key into a filename fragment.
-
- Escape "/" in the key name, to keep a flat tree of files and avoid
@@ -114,17 +124,49 @@ isLinkToAnnex s = ("/.git/" ++ objectDir) `isInfixOf` s
- a slash
- "%" is escaped to "&s", and "&" to "&a"; this ensures that the mapping
- is one to one.
+ - ":" is escaped to "&c", because despite it being 2011, people still care
+ - about FAT.
- -}
keyFile :: Key -> FilePath
-keyFile key = replace "/" "%" $ replace "%" "&s" $ replace "&" "&a" $ show key
+keyFile key = replace "/" "%" $ replace ":" "&c" $
+ replace "%" "&s" $ replace "&" "&a" $ show key
{- Reverses keyFile, converting a filename fragment (ie, the basename of
- the symlink target) into a key. -}
-fileKey :: FilePath -> Key
-fileKey file = read $
- replace "&a" "&" $ replace "&s" "%" $ replace "%" "/" file
+fileKey :: FilePath -> Maybe Key
+fileKey file = readKey $
+ replace "&a" "&" $ replace "&s" "%" $
+ replace "&c" ":" $ replace "%" "/" file
{- for quickcheck -}
prop_idempotent_fileKey :: String -> Bool
-prop_idempotent_fileKey s = k == fileKey (keyFile k)
- where k = read $ "test:" ++ s
+prop_idempotent_fileKey s = Just k == fileKey (keyFile k)
+ where k = stubKey { keyName = s, keyBackendName = "test" }
+
+{- Given a key, generates a short directory name to put it in,
+ - to do hashing to protect against filesystems that dislike having
+ - many items in a single directory. -}
+hashDir :: Key -> FilePath
+hashDir k = addTrailingPathSeparator $ take 2 dir </> drop 2 dir
+ where
+ dir = take 4 $ abcd_to_dir $ md5 $ Str $ show k
+
+abcd_to_dir :: ABCD -> String
+abcd_to_dir (ABCD (a,b,c,d)) = concat $ map display_32bits_as_dir [a,b,c,d]
+
+{- modified version of display_32bits_as_hex from Data.Hash.MD5
+ - Copyright (C) 2001 Ian Lynagh
+ - License: Either BSD or GPL
+ -}
+display_32bits_as_dir :: Word32 -> String
+display_32bits_as_dir w = trim $ swap_pairs cs
+ where
+ -- Need 32 characters to use. To avoid inaverdently making
+ -- a real word, use letters that appear less frequently.
+ chars = ['0'..'9'] ++ "zqjxkmvwgpfZQJXKMVWGPF"
+ cs = map (\x -> getc $ (shiftR w (6*x)) .&. 31) [0..7]
+ getc n = chars !! (fromIntegral n)
+ swap_pairs (x1:x2:xs) = x2:x1:swap_pairs xs
+ swap_pairs _ = []
+ -- Last 2 will always be 00, so omit.
+ trim s = take 6 s
diff --git a/Makefile b/Makefile
index c381ae986..c60e19b31 100644
--- a/Makefile
+++ b/Makefile
@@ -13,6 +13,7 @@ SysConfig.hs: configure.hs TestConfig.hs
Touch.hs: Touch.hsc
hsc2hs $<
+ perl -i -pe 's/^{-# INCLUDE.*//' $@
$(bins): SysConfig.hs Touch.hs
$(GHCMAKE) $@
diff --git a/Remotes.hs b/Remotes.hs
index 3c9db314c..8b760ac95 100644
--- a/Remotes.hs
+++ b/Remotes.hs
@@ -153,7 +153,7 @@ inAnnex r key = if Git.repoIsUrl r
checkremote = do
showNote ("checking " ++ Git.repoDescribe r ++ "...")
inannex <- onRemote r (boolSystem, False) "inannex"
- [Param ("--backend=" ++ backendName key), Param (keyName key)]
+ [Param (show key)]
return $ Right inannex
{- Cost Ordered list of remotes. -}
@@ -272,8 +272,7 @@ rsyncParams :: Git.Repo -> Bool -> Key -> FilePath -> Annex [CommandParam]
rsyncParams r sending key file = do
Just (shellcmd, shellparams) <- git_annex_shell r
(if sending then "sendkey" else "recvkey")
- [ Param $ "--backend=" ++ backendName key
- , Param $ keyName key
+ [ Param $ show key
-- Command is terminated with "--", because
-- rsync will tack on its own options afterwards,
-- and they need to be ignored.
diff --git a/Types.hs b/Types.hs
index 0890efd5e..503e27d31 100644
--- a/Types.hs
+++ b/Types.hs
@@ -8,11 +8,9 @@
module Types (
Annex,
Backend,
- Key,
- genKey,
- backendName,
- keyName
+ Key
) where
-import BackendTypes
+import BackendClass
import Annex
+import Key
diff --git a/Upgrade.hs b/Upgrade.hs
index 3c16bcc86..76dd156f8 100644
--- a/Upgrade.hs
+++ b/Upgrade.hs
@@ -7,78 +7,17 @@
module Upgrade where
-import System.IO.Error (try)
-import System.Directory
-import Control.Monad.State (liftIO)
-import Control.Monad (filterM, forM_)
-import System.Posix.Files
-import System.FilePath
-
-import Content
import Types
-import Locations
-import qualified GitRepo as Git
-import qualified Annex
-import qualified Backend
-import Messages
import Version
-import Utility
+import qualified Upgrade.V0
+import qualified Upgrade.V1
{- Uses the annex.version git config setting to automate upgrades. -}
upgrade :: Annex Bool
upgrade = do
version <- getVersion
case version of
- Just "0" -> upgradeFrom0
- Nothing -> return True -- repo not initted yet, no version
- Just v | v == currentVersion -> return True
- Just _ -> error "this version of git-annex is too old for this git repository!"
-
-upgradeFrom0 :: Annex Bool
-upgradeFrom0 = do
- showSideAction "Upgrading object directory layout..."
- g <- Annex.gitRepo
-
- -- do the reorganisation of the files
- let olddir = gitAnnexDir g
- keys <- getKeysPresent0' olddir
- forM_ keys $ \k -> moveAnnex k $ olddir </> keyFile k
-
- -- update the symlinks to the files
- files <- liftIO $ Git.inRepo g [Git.workTree g]
- fixlinks files
- Annex.queueRun
-
- setVersion
-
- return True
-
- where
- fixlinks [] = return ()
- fixlinks (f:fs) = do
- r <- Backend.lookupFile f
- case r of
- Nothing -> return ()
- Just (k, _) -> do
- link <- calcGitLink f k
- liftIO $ removeFile f
- liftIO $ createSymbolicLink link f
- Annex.queue "add" [Param "--"] f
- fixlinks fs
-
-getKeysPresent0' :: FilePath -> Annex [Key]
-getKeysPresent0' dir = do
- exists <- liftIO $ doesDirectoryExist dir
- if (not exists)
- then return []
- else do
- contents <- liftIO $ getDirectoryContents dir
- files <- liftIO $ filterM present contents
- return $ map fileKey files
- where
- present d = do
- result <- try $
- getFileStatus $ dir ++ "/" ++ takeFileName d
- case result of
- Right s -> return $ isRegularFile s
- Left _ -> return False
+ "0" -> Upgrade.V0.upgrade
+ "1" -> Upgrade.V1.upgrade
+ v | v == currentVersion -> return True
+ _ -> error "this version of git-annex is too old for this git repository!"
diff --git a/Upgrade/V0.hs b/Upgrade/V0.hs
new file mode 100644
index 000000000..5ba305817
--- /dev/null
+++ b/Upgrade/V0.hs
@@ -0,0 +1,63 @@
+{- git-annex v0 -> v1 upgrade support
+ -
+ - Copyright 2010 Joey Hess <joey@kitenet.net>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+module Upgrade.V0 where
+
+import System.IO.Error (try)
+import System.Directory
+import Control.Monad.State (liftIO)
+import Control.Monad (filterM, forM_)
+import System.Posix.Files
+import System.FilePath
+
+import Content
+import Types
+import Locations
+import qualified Annex
+import Messages
+import qualified Upgrade.V1
+
+upgrade :: Annex Bool
+upgrade = do
+ showSideAction "Upgrading object directory layout v0 to v1..."
+ g <- Annex.gitRepo
+
+ -- do the reorganisation of the key files
+ let olddir = gitAnnexDir g
+ keys <- getKeysPresent0 olddir
+ forM_ keys $ \k -> moveAnnex k $ olddir </> keyFile0 k
+
+ -- update the symlinks to the key files
+ -- No longer needed here; V1.upgrade does the same thing
+
+ -- Few people had v0 repos, so go the long way around from 0 -> 1 -> 2
+ Upgrade.V1.upgrade
+
+-- these stayed unchanged between v0 and v1
+keyFile0 :: Key -> FilePath
+keyFile0 = Upgrade.V1.keyFile1
+fileKey0 :: FilePath -> Key
+fileKey0 = Upgrade.V1.fileKey1
+lookupFile0 :: FilePath -> Annex (Maybe (Key, Backend Annex))
+lookupFile0 = Upgrade.V1.lookupFile1
+
+getKeysPresent0 :: FilePath -> Annex [Key]
+getKeysPresent0 dir = do
+ exists <- liftIO $ doesDirectoryExist dir
+ if (not exists)
+ then return []
+ else do
+ contents <- liftIO $ getDirectoryContents dir
+ files <- liftIO $ filterM present contents
+ return $ map fileKey0 files
+ where
+ present d = do
+ result <- try $
+ getFileStatus $ dir ++ "/" ++ takeFileName d
+ case result of
+ Right s -> return $ isRegularFile s
+ Left _ -> return False
diff --git a/Upgrade/V1.hs b/Upgrade/V1.hs
new file mode 100644
index 000000000..270de5f74
--- /dev/null
+++ b/Upgrade/V1.hs
@@ -0,0 +1,226 @@
+{- git-annex v1 -> v2 upgrade support
+ -
+ - Copyright 2011 Joey Hess <joey@kitenet.net>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+module Upgrade.V1 where
+
+import System.IO.Error (try)
+import System.Directory
+import Control.Monad.State (liftIO)
+import Control.Monad (filterM, forM_, unless)
+import System.Posix.Files
+import System.FilePath
+import Data.String.Utils
+import System.Posix.Types
+import Data.Maybe
+
+import Key
+import Content
+import Types
+import Locations
+import LocationLog
+import qualified Annex
+import qualified GitRepo as Git
+import Backend
+import Messages
+import Version
+import Utility
+import qualified Command.Init
+
+-- v2 adds hashing of filenames of content and location log files.
+-- Key information is encoded in filenames differently, so
+-- both content and location log files move around, and symlinks
+-- to content need to be changed.
+--
+-- When upgrading a v1 key to v2, file size metadata ought to be
+-- added to the key (unless it is a WORM key, which encoded
+-- mtime:size in v1). This can only be done when the file content
+-- is present. Since upgrades need to happen consistently,
+-- (so that two repos get changed the same way by the upgrade, and
+-- will merge), that metadata cannot be added on upgrade.
+--
+-- Note that file size metadata
+-- will only be used for detecting situations where git-annex
+-- would run out of disk space, so if some keys don't have it,
+-- the impact is minor. At least initially. It could be used in the
+-- future by smart auto-repo balancing code, etc.
+--
+-- Anyway, since v2 plans ahead for other metadata being included
+-- in keys, there should probably be a way to update a key.
+-- Something similar to the migrate subcommand could be used,
+-- and users could then run that at their leisure.
+
+upgrade :: Annex Bool
+upgrade = do
+ showSideAction "Upgrading object directory layout v1 to v2..."
+
+ g <- Annex.gitRepo
+ if Git.repoIsLocalBare g
+ then do
+ moveContent
+ setVersion
+ else do
+ moveContent
+ updateSymlinks
+ moveLocationLogs
+
+ Annex.queueRun
+ setVersion
+
+ -- add new line to auto-merge hashed location logs
+ -- this commits, so has to come after the upgrade
+ liftIO $ Command.Init.gitAttributesWrite g
+
+ return True
+
+moveContent :: Annex ()
+moveContent = do
+ keys <- getKeysPresent1
+ forM_ keys move
+ where
+ move k = do
+ g <- Annex.gitRepo
+ let f = gitAnnexObjectDir g </> keyFile1 k </> keyFile1 k
+ let d = parentDir f
+ liftIO $ allowWrite d
+ liftIO $ allowWrite f
+ moveAnnex k f
+ liftIO $ removeDirectory d
+
+updateSymlinks :: Annex ()
+updateSymlinks = do
+ g <- Annex.gitRepo
+ files <- liftIO $ Git.inRepo g [Git.workTree g]
+ forM_ files $ fixlink
+ where
+ fixlink f = do
+ r <- lookupFile1 f
+ case r of
+ Nothing -> return ()
+ Just (k, _) -> do
+ link <- calcGitLink f k
+ liftIO $ removeFile f
+ liftIO $ createSymbolicLink link f
+ Annex.queue "add" [Param "--"] f
+ Annex.queueRunAt 10240
+
+moveLocationLogs :: Annex ()
+moveLocationLogs = do
+ logkeys <- oldlocationlogs
+ forM_ logkeys move
+ where
+ oldlocationlogs = do
+ g <- Annex.gitRepo
+ let dir = gitStateDir g
+ contents <- liftIO $ getDirectoryContents dir
+ return $ catMaybes $ map oldlog2key contents
+ move (l, k) = do
+ g <- Annex.gitRepo
+ let dest = logFile g k
+ let dir = gitStateDir g
+ let f = dir </> l
+ liftIO $ createDirectoryIfMissing True (parentDir dest)
+ -- could just git mv, but this way deals with
+ -- log files that are not checked into git,
+ -- as well as merging with already upgraded
+ -- logs that have been pulled from elsewhere
+ old <- liftIO $ readLog f
+ new <- liftIO $ readLog dest
+ liftIO $ writeLog dest (old++new)
+ Annex.queue "add" [Param "--"] dest
+ Annex.queue "add" [Param "--"] f
+ Annex.queue "rm" [Param "--quiet", Param "-f", Param "--"] f
+ Annex.queueRunAt 10240
+
+oldlog2key :: FilePath -> Maybe (FilePath, Key)
+oldlog2key l =
+ let len = length l - 4 in
+ if drop len l == ".log"
+ then let k = readKey1 (take len l) in
+ if null (keyName k) || null (keyBackendName k)
+ then Nothing
+ else Just (l, k)
+ else Nothing
+
+-- WORM backend keys: "WORM:mtime:size:filename"
+-- all the rest: "backend:key"
+readKey1 :: String -> Key
+readKey1 v = Key { keyName = n , keyBackendName = b, keySize = s, keyMtime = t }
+ where
+ bits = split ":" v
+ b = head bits
+ n = join ":" $ drop (if wormy then 3 else 1) bits
+ t = if wormy
+ then Just (read (bits !! 1) :: EpochTime)
+ else Nothing
+ s = if wormy
+ then Just (read (bits !! 2) :: Integer)
+ else Nothing
+ wormy = b == "WORM"
+
+showKey1 :: Key -> String
+showKey1 Key { keyName = n , keyBackendName = b, keySize = s, keyMtime = t } =
+ join ":" $ filter (not . null) [b, showifhere t, showifhere s, n]
+ where
+ showifhere Nothing = ""
+ showifhere (Just v) = show v
+
+keyFile1 :: Key -> FilePath
+keyFile1 key = replace "/" "%" $ replace "%" "&s" $ replace "&" "&a" $ showKey1 key
+
+fileKey1 :: FilePath -> Key
+fileKey1 file = readKey1 $
+ replace "&a" "&" $ replace "&s" "%" $ replace "%" "/" file
+
+logFile1 :: Git.Repo -> Key -> String
+logFile1 repo key = gitStateDir repo ++ keyFile1 key ++ ".log"
+
+lookupFile1 :: FilePath -> Annex (Maybe (Key, Backend Annex))
+lookupFile1 file = do
+ bs <- Annex.getState Annex.supportedBackends
+ tl <- liftIO $ try getsymlink
+ case tl of
+ Left _ -> return Nothing
+ Right l -> makekey bs l
+ where
+ getsymlink = do
+ l <- readSymbolicLink file
+ return $ takeFileName l
+ makekey bs l = do
+ case maybeLookupBackendName bs bname of
+ Nothing -> do
+ unless (null kname || null bname ||
+ not (isLinkToAnnex l)) $
+ warning skip
+ return Nothing
+ Just backend -> return $ Just (k, backend)
+ where
+ k = fileKey1 l
+ bname = keyBackendName k
+ kname = keyName k
+ skip = "skipping " ++ file ++
+ " (unknown backend " ++ bname ++ ")"
+
+getKeysPresent1 :: Annex [Key]
+getKeysPresent1 = do
+ g <- Annex.gitRepo
+ getKeysPresent1' $ gitAnnexObjectDir g
+getKeysPresent1' :: FilePath -> Annex [Key]
+getKeysPresent1' dir = do
+ exists <- liftIO $ doesDirectoryExist dir
+ if (not exists)
+ then return []
+ else do
+ contents <- liftIO $ getDirectoryContents dir
+ files <- liftIO $ filterM present contents
+ return $ map fileKey1 files
+ where
+ present d = do
+ result <- try $
+ getFileStatus $ dir ++ "/" ++ d ++ "/" ++ takeFileName d
+ case result of
+ Right s -> return $ isRegularFile s
+ Left _ -> return False
diff --git a/Version.hs b/Version.hs
index 9e31d3c9e..5f414e93b 100644
--- a/Version.hs
+++ b/Version.hs
@@ -16,26 +16,33 @@ import qualified GitRepo as Git
import Locations
currentVersion :: String
-currentVersion = "1"
+currentVersion = "2"
versionField :: String
versionField = "annex.version"
-getVersion :: Annex (Maybe String)
+getVersion :: Annex String
getVersion = do
g <- Annex.gitRepo
let v = Git.configGet g versionField ""
if not $ null v
- then return $ Just v
+ then return v
else do
-- version 0 was not recorded in .git/config;
-- such a repo should have an gitAnnexDir but no
- -- gitAnnexObjectDir
+ -- gitAnnexObjectDir.
+ --
+ -- version 1 may not be recorded if the user
+ -- forgot to init. Such a repo should have a
+ -- gitAnnexObjectDir already.
d <- liftIO $ doesDirectoryExist $ gitAnnexDir g
o <- liftIO $ doesDirectoryExist $ gitAnnexObjectDir g
- if d && not o
- then return $ Just "0"
- else return Nothing -- no version yet
+ case (d, o) of
+ (True, False) -> return "0"
+ (True, True) -> return "1"
+ _ -> do
+ setVersion
+ return currentVersion
setVersion :: Annex ()
setVersion = Annex.setConfig versionField currentVersion
diff --git a/debian/NEWS b/debian/NEWS
new file mode 100644
index 000000000..df8518cef
--- /dev/null
+++ b/debian/NEWS
@@ -0,0 +1,11 @@
+git-annex (0.20110316) experimental; urgency=low
+
+ This version reorganises the layout of git-annex's files in your repository.
+ There is an upgrade process to convert a repository from the old git-annex
+ to this version. While git-annex will attempt to transparently handle
+ upgrades, you may want to drive the upgrade process by hand.
+
+ See <http://git-annex.branchable.com/upgrades/> or
+ /usr/share/doc/git-annex/html/upgrades.html
+
+ -- Joey Hess <joeyh@debian.org> Wed, 16 Mar 2011 15:49:15 -0400
diff --git a/debian/changelog b/debian/changelog
index f5fc4eebe..47a914812 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,21 @@
+git-annex (0.20110316) experimental; urgency=low
+
+ * New repository format, annex.version=2.
+ * The first time git-annex is run in an old format repository, it
+ will automatically upgrade it to the new format, staging all
+ necessary changes to git. Also added a "git annex upgrade" command.
+ * Colons are now avoided in filenames, so bare clones of git repos
+ can be put on USB thumb drives formatted with vFAT or similar
+ filesystems.
+ * Added two levels of hashing to object directory and .git-annex logs,
+ to improve scalability with enormous numbers of annexed
+ objects. (With one hundred million annexed objects, each
+ directory would contain fewer than 1024 files.)
+ * The setkey, fromkey, and dropkey subcommands have changed how
+ the key is specified. --backend is no longer used with these.
+
+ -- Joey Hess <joeyh@debian.org> Wed, 16 Mar 2011 16:20:23 -0400
+
git-annex (0.24) unstable; urgency=low
Branched the 0.24 series, which will be maintained for a while to
diff --git a/doc/bugs/fat_support.mdwn b/doc/bugs/fat_support.mdwn
index 2c6c97385..60633c29b 100644
--- a/doc/bugs/fat_support.mdwn
+++ b/doc/bugs/fat_support.mdwn
@@ -10,3 +10,6 @@ be VFAT formatted:
[[!tag wishlist]]
+[[Done]]; in annex.version 2 repos, colons are entirely avoided in
+filenames. So a bare git clone can be put on VFAT, and git-annex
+used to move stuff --to and --from it, for sneakernet.
diff --git a/doc/bugs/free_space_checking.mdwn b/doc/bugs/free_space_checking.mdwn
index 34528a7b3..eaa3294d6 100644
--- a/doc/bugs/free_space_checking.mdwn
+++ b/doc/bugs/free_space_checking.mdwn
@@ -6,3 +6,13 @@ file around.
* And, need a way to tell the size of a file before copying it from
a remote, to check local disk space.
+
+ As of annex.version 2, this metadata can be available for any type
+ of backend. Newly added files will always have file size metadata,
+ while files that used a SHA backend and were added before the upgrade
+ won't.
+
+ So, need a migration process from eg SHA1 to SHA1+filesize. It will
+ find files that lack size info, and rename their keys to add the size
+ info. Users with old repos can run this on them, to get the missing
+ info recorded.
diff --git a/doc/forum/hashing_objects_directories.mdwn b/doc/forum/hashing_objects_directories.mdwn
index 715e972ca..5b7708fb5 100644
--- a/doc/forum/hashing_objects_directories.mdwn
+++ b/doc/forum/hashing_objects_directories.mdwn
@@ -17,3 +17,11 @@ or anything in between to a paranoid
Also the use of a colon specifically breaks FAT32 ([[bugs/fat_support]]), must it be a colon or could an extra directory be used? i.e. `.git/annex/objects/SHA1/*/...`
`git annex init` could also create all but the last level directory on initialization. I'm thinking `SHA1/1/1, SHA1/1/2, ..., SHA256/f/f, ..., URL/f/f, ..., WORM/f/f`
+
+> This is done now with a 2-level hash. It also hashes .git-annex/ log
+> files which were the worse problem really. Scales to hundreds of millions
+> of files with each dir having 1024 or fewer contents. Example:
+>
+> `me -> .git/annex/objects/71/9t/WORM-s3-m1300247299--me/WORM-s3-m1300247299--me`
+>
+> --[[Joey]]
diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn
index 4998a6491..ee4019068 100644
--- a/doc/git-annex.mdwn
+++ b/doc/git-annex.mdwn
@@ -234,11 +234,11 @@ Many git-annex commands will stage changes for later `git commit` by you.
This can be used to manually set up a file to link to a specified key
in the key-value backend. How you determine an existing key in the backend
- varies. For the URL backend, the key is just a URL to the content.
+ varies. For the URL backend, the key is based on an URL to the content.
Example:
- git annex fromkey --backend=URL --key=http://www.archive.org/somefile somefile
+ git annex fromkey --key=URL--http://www.archive.org/somefile somefile
* dropkey [key ...]
@@ -248,24 +248,24 @@ Many git-annex commands will stage changes for later `git commit` by you.
This can be used to drop content for arbitrary keys, which do not need
to have a file in the git repository pointing at them.
- A backend will typically need to be specified with --backend. If none
- is specified, the first configured backend is used.
-
Example:
- git annex dropkey --backend=SHA1 7da006579dd64330eb2456001fd01948430572f2
+ git annex dropkey SHA1-s10-7da006579dd64330eb2456001fd01948430572f2
* setkey file
This plumbing-level command sets the annexed data for a key to the content of
the specified file, and then removes the file.
- A backend will typically need to be specified with --backend. If none
- is specified, the first configured backend is used.
-
Example:
- git annex setkey --backend=WORM --key=1287765018:3 /tmp/file
+ git annex setkey --key=WORM-s3-m1287765018--file /tmp/file
+
+* upgrade
+
+ Upgrades the repository to current layout. Upgrades are done automatically
+ whenever a newer git annex encounters an old repository; this command
+ allows explcitly starting an upgrade.
# OPTIONS
@@ -302,7 +302,10 @@ Many git-annex commands will stage changes for later `git commit` by you.
* --backend=name
- Specifies which key-value backend to use.
+ Specifies which key-value backend to use. This can be used when
+ adding a file to the annex, or migrating a file. Once files
+ are in the annex, their backend is known and this option is not
+ necessary.
* --key=name
diff --git a/doc/internals.mdwn b/doc/internals.mdwn
index 3f680dd8f..a133320b4 100644
--- a/doc/internals.mdwn
+++ b/doc/internals.mdwn
@@ -2,12 +2,15 @@ In the world of git, we're not scared about internal implementation
details, and sometimes we like to dive in and tweak things by hand. Here's
some documentation to that end.
-## `.git/annex/objects/*/*`
+## `.git/annex/objects/aa/bb/*/*`
This is where locally available file contents are actually stored.
Files added to the annex get a symlink checked into git that points
to the file content.
+First there are two levels of directories used for hashing, to prevent
+too many things ending up in any one directory.
+
Each subdirectory has the name of a key in one of the
[[key-value_backends|backends]]. The file inside also has the name of the key.
This two-level structure is used because it allows the write bit to be removed
@@ -41,10 +44,11 @@ Example:
e605dca6-446a-11e0-8b2a-002170d25c55 1
26339d22-446b-11e0-9101-002170d25c55 ?
-## `.git-annex/*.log`
+## `.git-annex/aa/bb/*.log`
The remainder of the log files record [[location_tracking]] information
-for file contents. The name of the key is the filename, and the content
+for file contents. Again these are placed in two levels of subdirectories
+for hashing. The name of the key is the filename, and the content
consists of a timestamp, either 1 (present) or 0 (not present), and
the UUID of the repository that has or lacks the file content.
diff --git a/doc/todo/object_dir_reorg_v2.mdwn b/doc/todo/object_dir_reorg_v2.mdwn
index 1c2d2f21b..49666ddc7 100644
--- a/doc/todo/object_dir_reorg_v2.mdwn
+++ b/doc/todo/object_dir_reorg_v2.mdwn
@@ -19,3 +19,7 @@ all users, so this should be the *last* reorg in the forseeable future.
(Probably everything after ",k" should be part of the key, even if it
contains the "," separator character. Otherwise an escaping mechanism
would be needed.)
+
+[[done]] now!
+
+Although [[bugs/free_space_checking]] is not quite there --[[Joey]]
diff --git a/doc/upgrades.mdwn b/doc/upgrades.mdwn
new file mode 100644
index 000000000..1371dc033
--- /dev/null
+++ b/doc/upgrades.mdwn
@@ -0,0 +1,67 @@
+Occasionally improvments are made to how git-annex stores its data,
+that require an upgrade process to convert repositories made with an older
+version to be used by a newer version. It's annoying, it should happen
+rarely, but sometimes, it's worth it.
+
+There's a committment that git-annex will always support upgrades from all
+past versions. After all, you may have offline drives from an earlier
+git-annex, and might want to use them with a newer git-annex.
+
+## Upgrade process
+
+git-annex will automatically notice if it is run in a repository that
+needs an upgrade, and perform the upgrade before running whatever it
+was asked to do. Or you can use the "git annex upgrade" command to
+explicitly do an upgrade. The upgrade can tend to take a while,
+if you have a lot of files.
+
+Each clone of a repository should be individually upgraded.
+Until a repository's remotes have been upgraded, git-annex
+may refuse to communicate with them.
+
+Generally, start by upgrading one repository, and then you can commit
+the changes git-annex staged during upgrade, and push them out to other
+repositories. And then upgrade those other repositories. Doing it this
+way avoids git-annex doing some duplicate work during the upgrade.
+
+The upgrade process is guaranteed to be conflict-free. Unless you
+already have git conflicts in your repository or between repositories.
+Upgrading a repository with conflicts is not recommended; resolve the
+conflicts first before upgrading git-annex.
+
+Example upgrade process:
+
+ cd localrepo
+ git pull
+ git annex upgrade
+ (Upgrading object directory layout v1 to v2...)
+ git commit -m "upgrade v1 to v2"
+ git push
+
+ ssh remote
+ cd remoterepo
+ git pull
+ git annex upgrade
+ ...
+
+## Upgrade events, so far
+
+### v1 -> v2 (git-annex version 0.23 to version 0.20110316)
+
+Involved adding hashing to .git/annex/ and changing the names of all keys.
+Symlinks changed.
+
+Also, hashing was added to location log files in .git-annex/.
+And .gitattributes needed to have another line added to it.
+
+Handled transparently.
+
+### v0 -> v1 (git-annex version 0.03 to version 0.04)
+
+Involved a reogranisation of the layout of .git/annex/. Symlinks changed.
+
+Handled more or less transparently, although git-annex was just 2 weeks
+old at the time, and had few users other than Joey.
+
+This upgrade is belived to still be supported, but has not been tested
+lately.
diff --git a/doc/walkthrough/modifying_annexed_files.mdwn b/doc/walkthrough/modifying_annexed_files.mdwn
index 3ad4e82ea..f75b73a24 100644
--- a/doc/walkthrough/modifying_annexed_files.mdwn
+++ b/doc/walkthrough/modifying_annexed_files.mdwn
@@ -27,7 +27,7 @@ and this symlink is what gets committed to git in the end.
add my_cool_big_file ok
[master 64cda67] changed an annexed file
2 files changed, 2 insertions(+), 1 deletions(-)
- create mode 100644 .git-annex/WORM:1289672605:30:file.log
+ create mode 100644 .git-annex/WORM-s30-m1289672605--file.log
There is one problem with using `git commit` like this: Git wants to first
stage the entire contents of the file in its index. That can be slow for
diff --git a/doc/walkthrough/moving_file_content_between_repositories.mdwn b/doc/walkthrough/moving_file_content_between_repositories.mdwn
index d7150f109..6b3e3f4e8 100644
--- a/doc/walkthrough/moving_file_content_between_repositories.mdwn
+++ b/doc/walkthrough/moving_file_content_between_repositories.mdwn
@@ -9,5 +9,5 @@ makes it very easy.
move my_cool_big_file (moving to usbdrive...) ok
# git annex move video/hackity_hack_and_kaxxt.mov --from fileserver
move video/hackity_hack_and_kaxxt.mov (moving from fileserver...)
- WORM:1274316523:86050597:hackity_hack_and_kax 100% 82MB 199.1KB/s 07:02
+ WORM-s86050597-m1274316523--hackity_hack_and_kax 100% 82MB 199.1KB/s 07:02
ok
diff --git a/doc/walkthrough/unused_data.mdwn b/doc/walkthrough/unused_data.mdwn
index 69a581fe1..9be32577c 100644
--- a/doc/walkthrough/unused_data.mdwn
+++ b/doc/walkthrough/unused_data.mdwn
@@ -12,8 +12,8 @@ eliminate it to save space.
unused (checking for unused data...)
Some annexed data is no longer pointed to by any files in the repository.
NUMBER KEY
- 1 WORM:1289672605:3:file
- 2 WORM:1289672605:14:file
+ 1 WORM-s3-m1289672605--file
+ 2 WORM-s14-m1289672605--file
(To see where data was previously used, try: git log --stat -S'KEY')
(To remove unwanted data: git-annex dropunused NUMBER)
ok
diff --git a/doc/walkthrough/using_ssh_remotes.mdwn b/doc/walkthrough/using_ssh_remotes.mdwn
index 6af9e1f47..4c2f830de 100644
--- a/doc/walkthrough/using_ssh_remotes.mdwn
+++ b/doc/walkthrough/using_ssh_remotes.mdwn
@@ -13,7 +13,7 @@ Now you can get files and they will be transferred (using `rsync` via `ssh`):
# git annex get my_cool_big_file
get my_cool_big_file (getting UUID for origin...) (copying from origin...)
- WORM:1285650548:2159:my_cool_big_file 100% 2159 2.1KB/s 00:00
+ WORM-s2159-m1285650548--my_cool_big_file 100% 2159 2.1KB/s 00:00
ok
When you drop files, git-annex will ssh over to the remote and make
diff --git a/doc/walkthrough/using_the_URL_backend.mdwn b/doc/walkthrough/using_the_URL_backend.mdwn
index fe79a6be2..585fd0668 100644
--- a/doc/walkthrough/using_the_URL_backend.mdwn
+++ b/doc/walkthrough/using_the_URL_backend.mdwn
@@ -5,7 +5,7 @@ Another handy backend is the URL backend, which can fetch file's content
from remote URLs. Here's how to set up some files in your repository
that use this backend:
- # git annex fromkey --backend=URL --key=http://www.archive.org/somefile somefile
+ # git annex fromkey --key=URL--http://www.archive.org/somefile somefile
fromkey somefile ok
# git commit -m "added a file from the Internet Archive"
diff --git a/test.hs b/test.hs
index 31960bb2e..49f7f2ab9 100644
--- a/test.hs
+++ b/test.hs
@@ -29,7 +29,7 @@ import qualified Backend
import qualified GitRepo as Git
import qualified Locations
import qualified Utility
-import qualified BackendTypes
+import qualified BackendClass
import qualified Types
import qualified GitAnnex
import qualified LocationLog
@@ -38,6 +38,7 @@ import qualified Trust
import qualified Remotes
import qualified Content
import qualified Command.DropUnused
+import qualified Key
main :: IO ()
main = do
@@ -55,7 +56,7 @@ quickcheck :: Test
quickcheck = TestLabel "quickcheck" $ TestList
[ qctest "prop_idempotent_deencode" Git.prop_idempotent_deencode
, qctest "prop_idempotent_fileKey" Locations.prop_idempotent_fileKey
- , qctest "prop_idempotent_key_read_show" BackendTypes.prop_idempotent_key_read_show
+ , qctest "prop_idempotent_key_read_show" Key.prop_idempotent_key_read_show
, qctest "prop_idempotent_shellEscape" Utility.prop_idempotent_shellEscape
, qctest "prop_idempotent_shellEscape_multiword" Utility.prop_idempotent_shellEscape_multiword
, qctest "prop_parentDir_basics" Utility.prop_parentDir_basics
@@ -119,10 +120,10 @@ test_add = "git-annex add" ~: TestList [basic, sha1dup]
test_setkey :: Test
test_setkey = "git-annex setkey/fromkey" ~: TestCase $ inmainrepo $ do
writeFile tmp $ content sha1annexedfile
- r <- annexeval $ BackendTypes.getKey backendSHA1 tmp
- let sha1 = BackendTypes.keyName $ fromJust r
- git_annex "setkey" ["-q", "--backend", "SHA1", "--key", sha1, tmp] @? "setkey failed"
- git_annex "fromkey" ["-q", "--backend", "SHA1", "--key", sha1, sha1annexedfile] @? "fromkey failed"
+ r <- annexeval $ BackendClass.getKey backendSHA1 tmp
+ let key = show $ fromJust r
+ git_annex "setkey" ["-q", "--key", key, tmp] @? "setkey failed"
+ git_annex "fromkey" ["-q", "--key", key, sha1annexedfile] @? "fromkey failed"
Utility.boolSystem "git" [Utility.Params "commit -q -a -m commit"] @? "git commit failed"
annexed_present sha1annexedfile
where
@@ -438,7 +439,7 @@ test_unused = "git-annex unused/dropunused" ~: intmpclonerepo $ do
checkunused [annexedfilekey, sha1annexedfilekey]
-- good opportunity to test dropkey also
- git_annex "dropkey" ["-q", "--force", BackendTypes.keyName annexedfilekey]
+ git_annex "dropkey" ["-q", "--force", show annexedfilekey]
@? "dropkey failed"
checkunused [sha1annexedfilekey]