summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Annex.hs10
-rw-r--r--Backend.hs26
-rw-r--r--Backend/File.hs2
-rw-r--r--Backend/SHA.hs13
-rw-r--r--Backend/URL.hs8
-rw-r--r--Backend/WORM.hs42
-rw-r--r--BackendClass.hs39
-rw-r--r--BackendTypes.hs79
-rw-r--r--Command.hs26
-rw-r--r--Command/DropKey.hs10
-rw-r--r--Command/DropUnused.hs5
-rw-r--r--Command/FromKey.hs2
-rw-r--r--Command/InAnnex.hs8
-rw-r--r--Command/Move.hs5
-rw-r--r--Command/RecvKey.hs8
-rw-r--r--Command/SendKey.hs8
-rw-r--r--Command/Unused.hs2
-rw-r--r--Content.hs3
-rw-r--r--Key.hs87
-rw-r--r--LocationLog.hs5
-rw-r--r--Locations.hs56
-rw-r--r--Makefile1
-rw-r--r--Remotes.hs5
-rw-r--r--Types.hs8
-rw-r--r--Upgrade.hs9
-rw-r--r--Version.hs2
-rw-r--r--debian/changelog10
-rw-r--r--doc/bugs/fat_support.mdwn3
-rw-r--r--doc/forum/hashing_objects_directories.mdwn8
-rw-r--r--doc/git-annex.mdwn19
-rw-r--r--doc/internals.mdwn10
-rw-r--r--doc/walkthrough/modifying_annexed_files.mdwn2
-rw-r--r--doc/walkthrough/moving_file_content_between_repositories.mdwn2
-rw-r--r--doc/walkthrough/unused_data.mdwn4
-rw-r--r--doc/walkthrough/using_ssh_remotes.mdwn2
-rw-r--r--doc/walkthrough/using_the_URL_backend.mdwn2
-rw-r--r--test.hs15
37 files changed, 327 insertions, 219 deletions
diff --git a/Annex.hs b/Annex.hs
index dd3362b29..f8cfd0ec9 100644
--- a/Annex.hs
+++ b/Annex.hs
@@ -25,7 +25,7 @@ import Data.Maybe
import qualified GitRepo as Git
import qualified GitQueue
-import qualified BackendTypes
+import qualified BackendClass
import Utility
-- git-annex's monad
@@ -34,8 +34,8 @@ type Annex = StateT AnnexState IO
-- internal state storage
data AnnexState = AnnexState
{ repo :: Git.Repo
- , backends :: [BackendTypes.Backend Annex]
- , supportedBackends :: [BackendTypes.Backend Annex]
+ , backends :: [BackendClass.Backend Annex]
+ , supportedBackends :: [BackendClass.Backend Annex]
, repoqueue :: GitQueue.Queue
, quiet :: Bool
, force :: Bool
@@ -47,7 +47,7 @@ data AnnexState = AnnexState
, remotesread :: Bool
} deriving (Show)
-newState :: Git.Repo -> [BackendTypes.Backend Annex] -> AnnexState
+newState :: Git.Repo -> [BackendClass.Backend Annex] -> AnnexState
newState gitrepo allbackends = AnnexState
{ repo = gitrepo
, backends = []
@@ -64,7 +64,7 @@ newState gitrepo allbackends = AnnexState
}
{- Create and returns an Annex state object for the specified git repo. -}
-new :: Git.Repo -> [BackendTypes.Backend Annex] -> IO AnnexState
+new :: Git.Repo -> [BackendClass.Backend Annex] -> IO AnnexState
new gitrepo allbackends = do
gitrepo' <- liftIO $ Git.configRead gitrepo
return $ newState gitrepo' allbackends
diff --git a/Backend.hs b/Backend.hs
index df23e80a3..e1f8f388b 100644
--- a/Backend.hs
+++ b/Backend.hs
@@ -39,7 +39,8 @@ import Locations
import qualified GitRepo as Git
import qualified Annex
import Types
-import qualified BackendTypes as B
+import Key
+import qualified BackendClass as B
import Messages
{- List of backends in the order to try them when storing a new key. -}
@@ -135,18 +136,19 @@ lookupFile file = do
getsymlink = do
l <- readSymbolicLink file
return $ takeFileName l
- makekey bs l = do
+ makekey bs l =
+ case fileKey l of
+ Just k -> makeret k l bs
+ Nothing -> return Nothing
+ makeret k l bs =
case maybeLookupBackendName bs bname of
- Nothing -> do
- unless (null kname || null bname ||
- not (isLinkToAnnex l)) $
- warning skip
- return Nothing
- Just backend -> return $ Just (k, backend)
+ Just backend -> return $ Just (k, backend)
+ Nothing -> do
+ when (isLinkToAnnex l) $
+ warning skip
+ return Nothing
where
- k = fileKey l
- bname = backendName k
- kname = keyName k
+ bname = keyBackendName k
skip = "skipping " ++ file ++
" (unknown backend " ++ bname ++ ")"
@@ -164,4 +166,4 @@ chooseBackends fs = do
keyBackend :: Key -> Annex (Backend Annex)
keyBackend key = do
bs <- Annex.getState Annex.supportedBackends
- return $ lookupBackendName bs $ backendName key
+ return $ lookupBackendName bs $ keyBackendName key
diff --git a/Backend/File.hs b/Backend/File.hs
index d76cd2939..a5e243199 100644
--- a/Backend/File.hs
+++ b/Backend/File.hs
@@ -18,7 +18,7 @@ import Control.Monad.State
import System.Directory
import Data.List
-import BackendTypes
+import BackendClass
import LocationLog
import Locations
import qualified Remotes
diff --git a/Backend/SHA.hs b/Backend/SHA.hs
index 4eea890ce..056385107 100644
--- a/Backend/SHA.hs
+++ b/Backend/SHA.hs
@@ -13,9 +13,10 @@ import System.Cmd.Utils
import System.IO
import System.Directory
import Data.Maybe
+import System.Posix.Files
import qualified Backend.File
-import BackendTypes
+import BackendClass
import Messages
import qualified Annex
import Locations
@@ -23,6 +24,7 @@ import Content
import Types
import Utility
import qualified SysConfig
+import Key
type SHASize = Int
@@ -63,11 +65,16 @@ shaN size file = do
where
command = "sha" ++ (show size) ++ "sum"
--- A key is a checksum of its contents.
+{- A key is a checksum of its contents. -}
keyValue :: SHASize -> FilePath -> Annex (Maybe Key)
keyValue size file = do
s <- shaN size file
- return $ Just $ Key (shaName size, s)
+ stat <- liftIO $ getFileStatus file
+ return $ Just $ stubKey {
+ keyName = s,
+ keyBackendName = shaName size,
+ keySize = Just $ fromIntegral $ fileSize stat
+ }
-- A key's checksum is checked during fsck.
checkKeyChecksum :: SHASize -> Key -> Annex Bool
diff --git a/Backend/URL.hs b/Backend/URL.hs
index 29dc8fefa..02ce3563c 100644
--- a/Backend/URL.hs
+++ b/Backend/URL.hs
@@ -8,12 +8,12 @@
module Backend.URL (backends) where
import Control.Monad.State (liftIO)
-import Data.String.Utils
import Types
-import BackendTypes
+import BackendClass
import Utility
import Messages
+import Key
backends :: [Backend Annex]
backends = [backend]
@@ -52,8 +52,8 @@ dummyOk _ = return True
downloadUrl :: Key -> FilePath -> Annex Bool
downloadUrl key file = do
- showNote "downloading"
+ showNote $ "downloading"
showProgress -- make way for curl progress bar
liftIO $ boolSystem "curl" [Params "-# -o", File file, File url]
where
- url = join ":" $ drop 1 $ split ":" $ show key
+ url = keyName key
diff --git a/Backend/WORM.hs b/Backend/WORM.hs
index a0d814aa0..a011995da 100644
--- a/Backend/WORM.hs
+++ b/Backend/WORM.hs
@@ -10,17 +10,17 @@ module Backend.WORM (backends) where
import Control.Monad.State
import System.FilePath
import System.Posix.Files
-import System.Posix.Types
import System.Directory
-import Data.String.Utils
+import Data.Maybe
import qualified Backend.File
-import BackendTypes
+import BackendClass
import Locations
import qualified Annex
import Content
import Messages
import Types
+import Key
backends :: [Backend Annex]
backends = [backend]
@@ -32,31 +32,25 @@ backend = Backend.File.backend {
fsckKey = Backend.File.checkKey checkKeySize
}
--- The key is formed from the file size, modification time, and the
--- basename of the filename.
---
--- That allows multiple files with the same names to have different keys,
--- while also allowing a file to be moved around while retaining the
--- same key.
+{- The key includes the file size, modification time, and the
+ - basename of the filename.
+ -
+ - That allows multiple files with the same names to have different keys,
+ - while also allowing a file to be moved around while retaining the
+ - same key.
+ -}
keyValue :: FilePath -> Annex (Maybe Key)
keyValue file = do
stat <- liftIO $ getFileStatus file
- return $ Just $ Key (name backend, key stat)
- where
- key stat = uniqueid stat ++ sep ++ base
- uniqueid stat = show (modificationTime stat) ++ sep ++
- show (fileSize stat)
- base = takeFileName file
- sep = ":"
-
-{- Extracts the file size from a key. -}
-keySize :: Key -> FileOffset
-keySize key = read $ section !! 1
- where
- section = split ":" (keyName key)
+ return $ Just $ Key {
+ keyName = takeFileName file,
+ keyBackendName = name backend,
+ keySize = Just $ fromIntegral $ fileSize stat,
+ keyMtime = Just $ modificationTime stat
+ }
{- The size of the data for a key is checked against the size encoded in
- - the key. Note that the modification time is not checked. -}
+ - the key's metadata. -}
checkKeySize :: Key -> Annex Bool
checkKeySize key = do
g <- Annex.gitRepo
@@ -66,7 +60,7 @@ checkKeySize key = do
then return True
else do
s <- liftIO $ getFileStatus file
- if fileSize s == keySize key
+ if fromIntegral (fileSize s) == fromJust (keySize key)
then return True
else do
dest <- moveBad key
diff --git a/BackendClass.hs b/BackendClass.hs
new file mode 100644
index 000000000..909ae8f96
--- /dev/null
+++ b/BackendClass.hs
@@ -0,0 +1,39 @@
+{- git-annex key/value backend data type
+ -
+ - Most things should not need this, using Types instead
+ -
+ - Copyright 2010 Joey Hess <joey@kitenet.net>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+module BackendClass where
+
+import Key
+
+data Backend a = Backend {
+ -- name of this backend
+ name :: String,
+ -- converts a filename to a key
+ getKey :: FilePath -> a (Maybe Key),
+ -- stores a file's contents to a key
+ storeFileKey :: FilePath -> Key -> a Bool,
+ -- retrieves a key's contents to a file
+ retrieveKeyFile :: Key -> FilePath -> a Bool,
+ -- removes a key, optionally checking that enough copies are stored
+ -- elsewhere
+ removeKey :: Key -> Maybe Int -> a Bool,
+ -- checks if a backend is storing the content of a key
+ hasKey :: Key -> a Bool,
+ -- called during fsck to check a key
+ -- (second parameter may be the filename associated with it)
+ -- (third parameter may be the number of copies that there should
+ -- be of the key)
+ fsckKey :: Key -> Maybe FilePath -> Maybe Int -> a Bool
+}
+
+instance Show (Backend a) where
+ show backend = "Backend { name =\"" ++ name backend ++ "\" }"
+
+instance Eq (Backend a) where
+ a == b = name a == name b
diff --git a/BackendTypes.hs b/BackendTypes.hs
deleted file mode 100644
index c0705a550..000000000
--- a/BackendTypes.hs
+++ /dev/null
@@ -1,79 +0,0 @@
-{- git-annex key/value backend data types
- -
- - Most things should not need this, using Types instead
- -
- - Copyright 2010 Joey Hess <joey@kitenet.net>
- -
- - Licensed under the GNU GPL version 3 or higher.
- -}
-
-module BackendTypes where
-
-import Data.String.Utils
-import Test.QuickCheck
-
-type KeyName = String
-type BackendName = String
-newtype Key = Key (BackendName, KeyName) deriving (Eq, Ord)
-
-data Backend a = Backend {
- -- name of this backend
- name :: String,
- -- converts a filename to a key
- getKey :: FilePath -> a (Maybe Key),
- -- stores a file's contents to a key
- storeFileKey :: FilePath -> Key -> a Bool,
- -- retrieves a key's contents to a file
- retrieveKeyFile :: Key -> FilePath -> a Bool,
- -- removes a key, optionally checking that enough copies are stored
- -- elsewhere
- removeKey :: Key -> Maybe Int -> a Bool,
- -- checks if a backend is storing the content of a key
- hasKey :: Key -> a Bool,
- -- called during fsck to check a key
- -- (second parameter may be the filename associated with it)
- -- (third parameter may be the number of copies that there should
- -- be of the key)
- fsckKey :: Key -> Maybe FilePath -> Maybe Int -> a Bool
-}
-
-instance Show (Backend a) where
- show backend = "Backend { name =\"" ++ name backend ++ "\" }"
-
-instance Eq (Backend a) where
- a == b = name a == name b
-
--- accessors for the parts of a key
-keyName :: Key -> KeyName
-keyName (Key (_,k)) = k
-backendName :: Key -> BackendName
-backendName (Key (b,_)) = b
-
--- constructs a key in a backend
-genKey :: Backend a -> KeyName -> Key
-genKey b f = Key (name b,f)
-
--- show a key to convert it to a string; the string includes the
--- name of the backend to avoid collisions between key strings
-instance Show Key where
- show (Key (b, k)) = b ++ ":" ++ k
-
-instance Read Key where
- readsPrec _ s = [(Key (b,k), "")]
- where
- l = split ":" s
- b = if null l then "" else head l
- k = join ":" $ drop 1 l
-
--- for quickcheck
-instance Arbitrary Key where
- arbitrary = do
- backendname <- arbitrary
- keyname <- arbitrary
- return $ Key (backendname, keyname)
-
-prop_idempotent_key_read_show :: Key -> Bool
-prop_idempotent_key_read_show k
- -- backend names will never contain colons
- | ':' `elem` (backendName k) = True
- | otherwise = k == (read $ show k)
diff --git a/Command.hs b/Command.hs
index eba7f2cef..41ad884a9 100644
--- a/Command.hs
+++ b/Command.hs
@@ -22,6 +22,7 @@ import qualified Annex
import qualified GitRepo as Git
import Locations
import Utility
+import Key
{- A command runs in four stages.
-
@@ -45,6 +46,8 @@ type CommandCleanup = Annex Bool
- functions. -}
type CommandSeekStrings = CommandStartString -> CommandSeek
type CommandStartString = String -> CommandStart
+type CommandSeekKeys = CommandStartKey -> CommandSeek
+type CommandStartKey = Key -> CommandStart
type BackendFile = (FilePath, Maybe (Backend Annex))
type CommandSeekBackendFiles = CommandStartBackendFile -> CommandSeek
type CommandStartBackendFile = BackendFile -> CommandStart
@@ -166,8 +169,12 @@ withFilesUnlocked' typechanged a params = do
map (\f -> Git.workTree repo ++ "/" ++ f) typechangedfiles
unlockedfiles' <- filterFiles unlockedfiles
backendPairs a unlockedfiles'
-withKeys :: CommandSeekStrings
-withKeys a params = return $ map a params
+withKeys :: CommandSeekKeys
+withKeys a params = return $ map a $ map parse params
+ where
+ parse p = case readKey p of
+ Just k -> k
+ Nothing -> error "bad key"
withTempFile :: CommandSeekStrings
withTempFile a params = return $ map a params
withNothing :: CommandSeekNothing
@@ -228,17 +235,18 @@ paramName = "NAME"
paramNothing :: String
paramNothing = ""
-{- The Key specified by the --key and --backend parameters. -}
+{- The Key specified by the --key parameter. -}
cmdlineKey :: Annex Key
cmdlineKey = do
k <- Annex.getState Annex.defaultkey
- backends <- Backend.list
- return $ genKey (head backends) (keyname' k)
+ case k of
+ Nothing -> nokey
+ Just "" -> nokey
+ Just kstring -> case readKey kstring of
+ Nothing -> error "bad key"
+ Just key -> return key
where
- keyname' Nothing = badkey
- keyname' (Just "") = badkey
- keyname' (Just n) = n
- badkey = error "please specify the key with --key"
+ nokey = error "please specify the key with --key"
{- Given an original list of files, and an expanded list derived from it,
- ensures that the original list's ordering is preserved.
diff --git a/Command/DropKey.hs b/Command/DropKey.hs
index 8c7566df8..b3cc60961 100644
--- a/Command/DropKey.hs
+++ b/Command/DropKey.hs
@@ -9,7 +9,6 @@ module Command.DropKey where
import Command
import qualified Annex
-import qualified Backend
import LocationLog
import Types
import Content
@@ -22,11 +21,8 @@ command = [Command "dropkey" (paramRepeating paramKey) seek
seek :: [CommandSeek]
seek = [withKeys start]
-{- Drops cached content for a key. -}
-start :: CommandStartString
-start keyname = do
- backends <- Backend.list
- let key = genKey (head backends) keyname
+start :: CommandStartKey
+start key = do
present <- inAnnex key
force <- Annex.getState Annex.force
if not present
@@ -34,7 +30,7 @@ start keyname = do
else if not force
then error "dropkey is can cause data loss; use --force if you're sure you want to do this"
else do
- showStart "dropkey" keyname
+ showStart "dropkey" (show key)
return $ Just $ perform key
perform :: Key -> CommandPerform
diff --git a/Command/DropUnused.hs b/Command/DropUnused.hs
index 594564cb7..8ed61ba65 100644
--- a/Command/DropUnused.hs
+++ b/Command/DropUnused.hs
@@ -11,6 +11,7 @@ import Control.Monad (when)
import Control.Monad.State (liftIO)
import qualified Data.Map as M
import System.Directory
+import Data.Maybe
import Command
import Types
@@ -19,6 +20,7 @@ import Locations
import qualified Annex
import qualified Command.Drop
import Backend
+import Key
command :: [Command]
command = [Command "dropunused" (paramRepeating paramNumber) seek
@@ -55,7 +57,6 @@ readUnusedLog = do
return $ M.fromList $ map parse $ lines l
else return $ M.empty
where
- parse line = (head ws, tokey $ unwords $ tail ws)
+ parse line = (head ws, fromJust $ readKey $ unwords $ tail ws)
where
ws = words line
- tokey s = read s :: Key
diff --git a/Command/FromKey.hs b/Command/FromKey.hs
index 717d528bc..176d2cd54 100644
--- a/Command/FromKey.hs
+++ b/Command/FromKey.hs
@@ -16,9 +16,9 @@ import Command
import qualified Annex
import Utility
import qualified Backend
-import Types
import Content
import Messages
+import Key
command :: [Command]
command = [Command "fromkey" paramPath seek
diff --git a/Command/InAnnex.hs b/Command/InAnnex.hs
index 68ac9a2c6..fa81fc9a4 100644
--- a/Command/InAnnex.hs
+++ b/Command/InAnnex.hs
@@ -11,9 +11,7 @@ import Control.Monad.State (liftIO)
import System.Exit
import Command
-import Types
import Content
-import qualified Backend
command :: [Command]
command = [Command "inannex" (paramRepeating paramKey) seek
@@ -22,10 +20,8 @@ command = [Command "inannex" (paramRepeating paramKey) seek
seek :: [CommandSeek]
seek = [withKeys start]
-start :: CommandStartString
-start keyname = do
- backends <- Backend.list
- let key = genKey (head backends) keyname
+start :: CommandStartKey
+start key = do
present <- inAnnex key
if present
then return Nothing
diff --git a/Command/Move.hs b/Command/Move.hs
index 3774ccbe9..2d6c973fe 100644
--- a/Command/Move.hs
+++ b/Command/Move.hs
@@ -20,7 +20,7 @@ import qualified Remotes
import UUID
import Messages
import Utility
-
+
command :: [Command]
command = [Command "move" paramPath seek
"move content of files to/from another repository"]
@@ -136,8 +136,7 @@ fromCleanup :: Git.Repo -> Bool -> Key -> CommandCleanup
fromCleanup src True key = do
ok <- Remotes.onRemote src (boolSystem, False) "dropkey"
[ Params "--quiet --force"
- , Param $ "--backend=" ++ backendName key
- , Param $ keyName key
+ , Param $ show key
]
-- better safe than sorry: assume the src dropped the key
-- even if it seemed to fail; the failure could have occurred
diff --git a/Command/RecvKey.hs b/Command/RecvKey.hs
index 8a9673050..c7c37d1e3 100644
--- a/Command/RecvKey.hs
+++ b/Command/RecvKey.hs
@@ -12,10 +12,8 @@ import Control.Monad.State (liftIO)
import System.Exit
import Command
-import Types
import CmdLine
import Content
-import qualified Backend
import RsyncFile
command :: [Command]
@@ -25,10 +23,8 @@ command = [Command "recvkey" paramKey seek
seek :: [CommandSeek]
seek = [withKeys start]
-start :: CommandStartString
-start keyname = do
- backends <- Backend.list
- let key = genKey (head backends) keyname
+start :: CommandStartKey
+start key = do
present <- inAnnex key
when present $
error "key is already present in annex"
diff --git a/Command/SendKey.hs b/Command/SendKey.hs
index cb883b53a..56974bda9 100644
--- a/Command/SendKey.hs
+++ b/Command/SendKey.hs
@@ -14,9 +14,7 @@ import System.Exit
import Locations
import qualified Annex
import Command
-import Types
import Content
-import qualified Backend
import RsyncFile
command :: [Command]
@@ -26,10 +24,8 @@ command = [Command "sendkey" paramKey seek
seek :: [CommandSeek]
seek = [withKeys start]
-start :: CommandStartString
-start keyname = do
- backends <- Backend.list
- let key = genKey (head backends) keyname
+start :: CommandStartKey
+start key = do
present <- inAnnex key
g <- Annex.gitRepo
let file = gitAnnexLocation g key
diff --git a/Command/Unused.hs b/Command/Unused.hs
index a614ce5d9..52e483d87 100644
--- a/Command/Unused.hs
+++ b/Command/Unused.hs
@@ -126,4 +126,4 @@ tmpKeys = do
contents <- liftIO $ getDirectoryContents tmp
files <- liftIO $ filterM doesFileExist $
map (tmp </>) contents
- return $ map (fileKey . takeFileName) files
+ return $ catMaybes $ map (fileKey . takeFileName) files
diff --git a/Content.hs b/Content.hs
index dc675389f..1a5a80a9f 100644
--- a/Content.hs
+++ b/Content.hs
@@ -26,6 +26,7 @@ import System.Path
import Control.Monad (when, unless, filterM)
import System.Posix.Files
import System.FilePath
+import Data.Maybe
import Types
import Locations
@@ -162,7 +163,7 @@ getKeysPresent' dir = do
else do
contents <- liftIO $ getDirectoryContents dir
files <- liftIO $ filterM present contents
- return $ map fileKey files
+ return $ catMaybes $ map fileKey files
where
present d = do
result <- try $
diff --git a/Key.hs b/Key.hs
new file mode 100644
index 000000000..f52aea31b
--- /dev/null
+++ b/Key.hs
@@ -0,0 +1,87 @@
+{- git-annex Key data type
+ -
+ - Copyright 2011 Joey Hess <joey@kitenet.net>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+module Key (
+ Key(..),
+ stubKey,
+ readKey,
+
+ prop_idempotent_key_read_show
+) where
+
+import Test.QuickCheck
+import Utility
+import System.Posix.Types
+
+{- A Key has a unique name, is associated with a key/value backend,
+ - and may contain other optional metadata. -}
+data Key = Key {
+ keyName :: String,
+ keyBackendName :: String,
+ keySize :: Maybe Integer,
+ keyMtime :: Maybe EpochTime
+} deriving (Eq, Ord)
+
+stubKey :: Key
+stubKey = Key {
+ keyName = "",
+ keyBackendName = "",
+ keySize = Nothing,
+ keyMtime = Nothing
+}
+
+fieldSep :: Char
+fieldSep = '-'
+
+{- Keys show as strings that are suitable for use as filenames.
+ - The name field is always shown last, separated by doubled fieldSeps,
+ - and is the only field allowed to contain the fieldSep. -}
+instance Show Key where
+ show Key { keyBackendName = b, keySize = s, keyMtime = m, keyName = n } =
+ b +++ ('s' ?: s) +++ ('m' ?: m) +++ (fieldSep : n)
+ where
+ "" +++ y = y
+ x +++ "" = x
+ x +++ y = x ++ fieldSep:y
+ c ?: (Just v) = c:(show v)
+ _ ?: _ = ""
+
+readKey :: String -> Maybe Key
+readKey s = if key == Just stubKey then Nothing else key
+ where
+ key = startbackend stubKey s
+
+ startbackend k v = sepfield k v addbackend
+
+ sepfield k v a = case span (/= fieldSep) v of
+ (v', _:r) -> findfields r $ a k v'
+ _ -> Nothing
+
+ findfields (c:v) (Just k)
+ | c == fieldSep = Just $ k { keyName = v }
+ | otherwise = sepfield k v $ addfield c
+ findfields _ v = v
+
+ addbackend k v = Just k { keyBackendName = v }
+ addfield 's' k v = Just k { keySize = readMaybe v }
+ addfield 'm' k v = Just k { keyMtime = readMaybe v }
+ addfield _ _ _ = Nothing
+
+-- for quickcheck
+instance Arbitrary Key where
+ arbitrary = do
+ n <- arbitrary
+ b <- elements ['A'..'Z']
+ return $ Key {
+ keyName = n,
+ keyBackendName = [b],
+ keySize = Nothing,
+ keyMtime = Nothing
+ }
+
+prop_idempotent_key_read_show :: Key -> Bool
+prop_idempotent_key_read_show k = Just k == (readKey $ show k)
diff --git a/LocationLog.hs b/LocationLog.hs
index f778df386..a939af825 100644
--- a/LocationLog.hs
+++ b/LocationLog.hs
@@ -123,11 +123,6 @@ logNow s u = do
now <- getPOSIXTime
return $ LogLine now s u
-{- Returns the filename of the log file for a given key. -}
-logFile :: Git.Repo -> Key -> String
-logFile repo key =
- gitStateDir repo ++ keyFile key ++ ".log"
-
{- Returns a list of repository UUIDs that, according to the log, have
- the value of a key. -}
keyLocations :: Git.Repo -> Key -> IO [UUID]
diff --git a/Locations.hs b/Locations.hs
index 908d5b74e..b2d31a1bf 100644
--- a/Locations.hs
+++ b/Locations.hs
@@ -19,6 +19,7 @@ module Locations (
gitAnnexBadDir,
gitAnnexUnusedLog,
isLinkToAnnex,
+ logFile,
prop_idempotent_fileKey
) where
@@ -26,8 +27,12 @@ module Locations (
import System.FilePath
import Data.String.Utils
import Data.List
+import Bits
+import Word
+import Data.Hash.MD5
import Types
+import Key
import qualified GitRepo as Git
{- Conventions:
@@ -62,7 +67,7 @@ objectDir = addTrailingPathSeparator $ annexDir </> "objects"
{- Annexed file's location relative to the .git directory. -}
annexLocation :: Key -> FilePath
-annexLocation key = objectDir </> f </> f
+annexLocation key = objectDir </> hashDir key </> f </> f
where
f = keyFile key
@@ -105,6 +110,11 @@ gitAnnexUnusedLog r = gitAnnexDir r </> "unused"
isLinkToAnnex :: FilePath -> Bool
isLinkToAnnex s = ("/.git/" ++ objectDir) `isInfixOf` s
+{- The filename of the log file for a given key. -}
+logFile :: Git.Repo -> Key -> String
+logFile repo key =
+ gitStateDir repo ++ hashDir key ++ keyFile key ++ ".log"
+
{- Converts a key into a filename fragment.
-
- Escape "/" in the key name, to keep a flat tree of files and avoid
@@ -114,17 +124,49 @@ isLinkToAnnex s = ("/.git/" ++ objectDir) `isInfixOf` s
- a slash
- "%" is escaped to "&s", and "&" to "&a"; this ensures that the mapping
- is one to one.
+ - ":" is escaped to "&c", because despite it being 2011, people still care
+ - about FAT.
- -}
keyFile :: Key -> FilePath
-keyFile key = replace "/" "%" $ replace "%" "&s" $ replace "&" "&a" $ show key
+keyFile key = replace "/" "%" $ replace ":" "&c" $
+ replace "%" "&s" $ replace "&" "&a" $ show key
{- Reverses keyFile, converting a filename fragment (ie, the basename of
- the symlink target) into a key. -}
-fileKey :: FilePath -> Key
-fileKey file = read $
- replace "&a" "&" $ replace "&s" "%" $ replace "%" "/" file
+fileKey :: FilePath -> Maybe Key
+fileKey file = readKey $
+ replace "&a" "&" $ replace "&s" "%" $
+ replace "&c" ":" $ replace "%" "/" file
{- for quickcheck -}
prop_idempotent_fileKey :: String -> Bool
-prop_idempotent_fileKey s = k == fileKey (keyFile k)
- where k = read $ "test:" ++ s
+prop_idempotent_fileKey s = Just k == fileKey (keyFile k)
+ where k = stubKey { keyName = s, keyBackendName = "test" }
+
+{- Given a key, generates a short directory name to put it in,
+ - to do hashing to protect against filesystems that dislike having
+ - many items in a single directory. -}
+hashDir :: Key -> FilePath
+hashDir k = addTrailingPathSeparator $ take 2 dir </> drop 2 dir
+ where
+ dir = take 4 $ abcd_to_dir $ md5 $ Str $ show k
+
+abcd_to_dir :: ABCD -> String
+abcd_to_dir (ABCD (a,b,c,d)) = concat $ map display_32bits_as_dir [a,b,c,d]
+
+{- modified version of display_32bits_as_hex from Data.Hash.MD5
+ - Copyright (C) 2001 Ian Lynagh
+ - License: Either BSD or GPL
+ -}
+display_32bits_as_dir :: Word32 -> String
+display_32bits_as_dir w = trim $ swap_pairs cs
+ where
+ -- Need 32 characters to use. To avoid inaverdently making
+ -- a real word, use the alphabet without vowels.
+ chars = ['0'..'9'] ++ "bcdfghjklnmpqrstvwxyzZ"
+ cs = map (\x -> getc $ (shiftR w (6*x)) .&. 31) [0..7]
+ getc n = chars !! (fromIntegral n)
+ swap_pairs (x1:x2:xs) = x2:x1:swap_pairs xs
+ swap_pairs _ = []
+ -- Last 2 will always be 00, so omit.
+ trim s = take 6 s
diff --git a/Makefile b/Makefile
index c381ae986..c60e19b31 100644
--- a/Makefile
+++ b/Makefile
@@ -13,6 +13,7 @@ SysConfig.hs: configure.hs TestConfig.hs
Touch.hs: Touch.hsc
hsc2hs $<
+ perl -i -pe 's/^{-# INCLUDE.*//' $@
$(bins): SysConfig.hs Touch.hs
$(GHCMAKE) $@
diff --git a/Remotes.hs b/Remotes.hs
index 3c9db314c..8b760ac95 100644
--- a/Remotes.hs
+++ b/Remotes.hs
@@ -153,7 +153,7 @@ inAnnex r key = if Git.repoIsUrl r
checkremote = do
showNote ("checking " ++ Git.repoDescribe r ++ "...")
inannex <- onRemote r (boolSystem, False) "inannex"
- [Param ("--backend=" ++ backendName key), Param (keyName key)]
+ [Param (show key)]
return $ Right inannex
{- Cost Ordered list of remotes. -}
@@ -272,8 +272,7 @@ rsyncParams :: Git.Repo -> Bool -> Key -> FilePath -> Annex [CommandParam]
rsyncParams r sending key file = do
Just (shellcmd, shellparams) <- git_annex_shell r
(if sending then "sendkey" else "recvkey")
- [ Param $ "--backend=" ++ backendName key
- , Param $ keyName key
+ [ Param $ show key
-- Command is terminated with "--", because
-- rsync will tack on its own options afterwards,
-- and they need to be ignored.
diff --git a/Types.hs b/Types.hs
index 0890efd5e..503e27d31 100644
--- a/Types.hs
+++ b/Types.hs
@@ -8,11 +8,9 @@
module Types (
Annex,
Backend,
- Key,
- genKey,
- backendName,
- keyName
+ Key
) where
-import BackendTypes
+import BackendClass
import Annex
+import Key
diff --git a/Upgrade.hs b/Upgrade.hs
index 3c16bcc86..eba75bf58 100644
--- a/Upgrade.hs
+++ b/Upgrade.hs
@@ -13,6 +13,7 @@ import Control.Monad.State (liftIO)
import Control.Monad (filterM, forM_)
import System.Posix.Files
import System.FilePath
+import Data.Maybe
import Content
import Types
@@ -30,10 +31,16 @@ upgrade = do
version <- getVersion
case version of
Just "0" -> upgradeFrom0
+ Just "1" -> upgradeFrom1
Nothing -> return True -- repo not initted yet, no version
Just v | v == currentVersion -> return True
Just _ -> error "this version of git-annex is too old for this git repository!"
+upgradeFrom1 :: Annex Bool
+upgradeFrom1 = do
+ showSideAction "Upgrading object directory layout..."
+ error "upgradeFrom1 TODO FIXME"
+
upgradeFrom0 :: Annex Bool
upgradeFrom0 = do
showSideAction "Upgrading object directory layout..."
@@ -74,7 +81,7 @@ getKeysPresent0' dir = do
else do
contents <- liftIO $ getDirectoryContents dir
files <- liftIO $ filterM present contents
- return $ map fileKey files
+ return $ catMaybes $ map fileKey files
where
present d = do
result <- try $
diff --git a/Version.hs b/Version.hs
index 9e31d3c9e..7fdbd1a49 100644
--- a/Version.hs
+++ b/Version.hs
@@ -16,7 +16,7 @@ import qualified GitRepo as Git
import Locations
currentVersion :: String
-currentVersion = "1"
+currentVersion = "2"
versionField :: String
versionField = "annex.version"
diff --git a/debian/changelog b/debian/changelog
index e7017a26d..ac7c854ff 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,5 +1,15 @@
git-annex (0.24) UNRELEASED; urgency=low
+ * Reorganized annexed object store. annex.version=2
+ * Colons are now avoided in filenames, so bare clones of git repos
+ can be put on USB thumb drives formatted with vFAT or similar
+ filesystems.
+ * Added two levels of hashing to object directory and .git-annex logs,
+ to improve scalability with enormous numbers of annexed
+ objects. (With one hundred million annexed objects, each
+ directory would contain fewer than 1024 files.)
+ * The setkey, fromkey, and dropkey subcommands have changed how
+ the key is specified. --backend is no longer used with these.
* Add Suggests on graphviz. Closes: #618039
* When adding files to the annex, the symlinks pointing at the annexed
content are made to have the same mtime as the original file.
diff --git a/doc/bugs/fat_support.mdwn b/doc/bugs/fat_support.mdwn
index 2c6c97385..60633c29b 100644
--- a/doc/bugs/fat_support.mdwn
+++ b/doc/bugs/fat_support.mdwn
@@ -10,3 +10,6 @@ be VFAT formatted:
[[!tag wishlist]]
+[[Done]]; in annex.version 2 repos, colons are entirely avoided in
+filenames. So a bare git clone can be put on VFAT, and git-annex
+used to move stuff --to and --from it, for sneakernet.
diff --git a/doc/forum/hashing_objects_directories.mdwn b/doc/forum/hashing_objects_directories.mdwn
index 715e972ca..5b7708fb5 100644
--- a/doc/forum/hashing_objects_directories.mdwn
+++ b/doc/forum/hashing_objects_directories.mdwn
@@ -17,3 +17,11 @@ or anything in between to a paranoid
Also the use of a colon specifically breaks FAT32 ([[bugs/fat_support]]), must it be a colon or could an extra directory be used? i.e. `.git/annex/objects/SHA1/*/...`
`git annex init` could also create all but the last level directory on initialization. I'm thinking `SHA1/1/1, SHA1/1/2, ..., SHA256/f/f, ..., URL/f/f, ..., WORM/f/f`
+
+> This is done now with a 2-level hash. It also hashes .git-annex/ log
+> files which were the worse problem really. Scales to hundreds of millions
+> of files with each dir having 1024 or fewer contents. Example:
+>
+> `me -> .git/annex/objects/71/9t/WORM-s3-m1300247299--me/WORM-s3-m1300247299--me`
+>
+> --[[Joey]]
diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn
index 4998a6491..e559e8cba 100644
--- a/doc/git-annex.mdwn
+++ b/doc/git-annex.mdwn
@@ -234,11 +234,11 @@ Many git-annex commands will stage changes for later `git commit` by you.
This can be used to manually set up a file to link to a specified key
in the key-value backend. How you determine an existing key in the backend
- varies. For the URL backend, the key is just a URL to the content.
+ varies. For the URL backend, the key is based on an URL to the content.
Example:
- git annex fromkey --backend=URL --key=http://www.archive.org/somefile somefile
+ git annex fromkey --key=URL--http://www.archive.org/somefile somefile
* dropkey [key ...]
@@ -248,24 +248,18 @@ Many git-annex commands will stage changes for later `git commit` by you.
This can be used to drop content for arbitrary keys, which do not need
to have a file in the git repository pointing at them.
- A backend will typically need to be specified with --backend. If none
- is specified, the first configured backend is used.
-
Example:
- git annex dropkey --backend=SHA1 7da006579dd64330eb2456001fd01948430572f2
+ git annex dropkey SHA1-s10-7da006579dd64330eb2456001fd01948430572f2
* setkey file
This plumbing-level command sets the annexed data for a key to the content of
the specified file, and then removes the file.
- A backend will typically need to be specified with --backend. If none
- is specified, the first configured backend is used.
-
Example:
- git annex setkey --backend=WORM --key=1287765018:3 /tmp/file
+ git annex setkey --key=WORM-s3-m1287765018--file /tmp/file
# OPTIONS
@@ -302,7 +296,10 @@ Many git-annex commands will stage changes for later `git commit` by you.
* --backend=name
- Specifies which key-value backend to use.
+ Specifies which key-value backend to use. This can be used when
+ adding a file to the annex, or migrating a file. Once files
+ are in the annex, their backend is known and this option is not
+ necessary.
* --key=name
diff --git a/doc/internals.mdwn b/doc/internals.mdwn
index 3f680dd8f..a133320b4 100644
--- a/doc/internals.mdwn
+++ b/doc/internals.mdwn
@@ -2,12 +2,15 @@ In the world of git, we're not scared about internal implementation
details, and sometimes we like to dive in and tweak things by hand. Here's
some documentation to that end.
-## `.git/annex/objects/*/*`
+## `.git/annex/objects/aa/bb/*/*`
This is where locally available file contents are actually stored.
Files added to the annex get a symlink checked into git that points
to the file content.
+First there are two levels of directories used for hashing, to prevent
+too many things ending up in any one directory.
+
Each subdirectory has the name of a key in one of the
[[key-value_backends|backends]]. The file inside also has the name of the key.
This two-level structure is used because it allows the write bit to be removed
@@ -41,10 +44,11 @@ Example:
e605dca6-446a-11e0-8b2a-002170d25c55 1
26339d22-446b-11e0-9101-002170d25c55 ?
-## `.git-annex/*.log`
+## `.git-annex/aa/bb/*.log`
The remainder of the log files record [[location_tracking]] information
-for file contents. The name of the key is the filename, and the content
+for file contents. Again these are placed in two levels of subdirectories
+for hashing. The name of the key is the filename, and the content
consists of a timestamp, either 1 (present) or 0 (not present), and
the UUID of the repository that has or lacks the file content.
diff --git a/doc/walkthrough/modifying_annexed_files.mdwn b/doc/walkthrough/modifying_annexed_files.mdwn
index 3ad4e82ea..f75b73a24 100644
--- a/doc/walkthrough/modifying_annexed_files.mdwn
+++ b/doc/walkthrough/modifying_annexed_files.mdwn
@@ -27,7 +27,7 @@ and this symlink is what gets committed to git in the end.
add my_cool_big_file ok
[master 64cda67] changed an annexed file
2 files changed, 2 insertions(+), 1 deletions(-)
- create mode 100644 .git-annex/WORM:1289672605:30:file.log
+ create mode 100644 .git-annex/WORM-s30-m1289672605--file.log
There is one problem with using `git commit` like this: Git wants to first
stage the entire contents of the file in its index. That can be slow for
diff --git a/doc/walkthrough/moving_file_content_between_repositories.mdwn b/doc/walkthrough/moving_file_content_between_repositories.mdwn
index d7150f109..6b3e3f4e8 100644
--- a/doc/walkthrough/moving_file_content_between_repositories.mdwn
+++ b/doc/walkthrough/moving_file_content_between_repositories.mdwn
@@ -9,5 +9,5 @@ makes it very easy.
move my_cool_big_file (moving to usbdrive...) ok
# git annex move video/hackity_hack_and_kaxxt.mov --from fileserver
move video/hackity_hack_and_kaxxt.mov (moving from fileserver...)
- WORM:1274316523:86050597:hackity_hack_and_kax 100% 82MB 199.1KB/s 07:02
+ WORM-s86050597-m1274316523--hackity_hack_and_kax 100% 82MB 199.1KB/s 07:02
ok
diff --git a/doc/walkthrough/unused_data.mdwn b/doc/walkthrough/unused_data.mdwn
index 69a581fe1..9be32577c 100644
--- a/doc/walkthrough/unused_data.mdwn
+++ b/doc/walkthrough/unused_data.mdwn
@@ -12,8 +12,8 @@ eliminate it to save space.
unused (checking for unused data...)
Some annexed data is no longer pointed to by any files in the repository.
NUMBER KEY
- 1 WORM:1289672605:3:file
- 2 WORM:1289672605:14:file
+ 1 WORM-s3-m1289672605--file
+ 2 WORM-s14-m1289672605--file
(To see where data was previously used, try: git log --stat -S'KEY')
(To remove unwanted data: git-annex dropunused NUMBER)
ok
diff --git a/doc/walkthrough/using_ssh_remotes.mdwn b/doc/walkthrough/using_ssh_remotes.mdwn
index 6af9e1f47..4c2f830de 100644
--- a/doc/walkthrough/using_ssh_remotes.mdwn
+++ b/doc/walkthrough/using_ssh_remotes.mdwn
@@ -13,7 +13,7 @@ Now you can get files and they will be transferred (using `rsync` via `ssh`):
# git annex get my_cool_big_file
get my_cool_big_file (getting UUID for origin...) (copying from origin...)
- WORM:1285650548:2159:my_cool_big_file 100% 2159 2.1KB/s 00:00
+ WORM-s2159-m1285650548--my_cool_big_file 100% 2159 2.1KB/s 00:00
ok
When you drop files, git-annex will ssh over to the remote and make
diff --git a/doc/walkthrough/using_the_URL_backend.mdwn b/doc/walkthrough/using_the_URL_backend.mdwn
index fe79a6be2..585fd0668 100644
--- a/doc/walkthrough/using_the_URL_backend.mdwn
+++ b/doc/walkthrough/using_the_URL_backend.mdwn
@@ -5,7 +5,7 @@ Another handy backend is the URL backend, which can fetch file's content
from remote URLs. Here's how to set up some files in your repository
that use this backend:
- # git annex fromkey --backend=URL --key=http://www.archive.org/somefile somefile
+ # git annex fromkey --key=URL--http://www.archive.org/somefile somefile
fromkey somefile ok
# git commit -m "added a file from the Internet Archive"
diff --git a/test.hs b/test.hs
index 31960bb2e..49f7f2ab9 100644
--- a/test.hs
+++ b/test.hs
@@ -29,7 +29,7 @@ import qualified Backend
import qualified GitRepo as Git
import qualified Locations
import qualified Utility
-import qualified BackendTypes
+import qualified BackendClass
import qualified Types
import qualified GitAnnex
import qualified LocationLog
@@ -38,6 +38,7 @@ import qualified Trust
import qualified Remotes
import qualified Content
import qualified Command.DropUnused
+import qualified Key
main :: IO ()
main = do
@@ -55,7 +56,7 @@ quickcheck :: Test
quickcheck = TestLabel "quickcheck" $ TestList
[ qctest "prop_idempotent_deencode" Git.prop_idempotent_deencode
, qctest "prop_idempotent_fileKey" Locations.prop_idempotent_fileKey
- , qctest "prop_idempotent_key_read_show" BackendTypes.prop_idempotent_key_read_show
+ , qctest "prop_idempotent_key_read_show" Key.prop_idempotent_key_read_show
, qctest "prop_idempotent_shellEscape" Utility.prop_idempotent_shellEscape
, qctest "prop_idempotent_shellEscape_multiword" Utility.prop_idempotent_shellEscape_multiword
, qctest "prop_parentDir_basics" Utility.prop_parentDir_basics
@@ -119,10 +120,10 @@ test_add = "git-annex add" ~: TestList [basic, sha1dup]
test_setkey :: Test
test_setkey = "git-annex setkey/fromkey" ~: TestCase $ inmainrepo $ do
writeFile tmp $ content sha1annexedfile
- r <- annexeval $ BackendTypes.getKey backendSHA1 tmp
- let sha1 = BackendTypes.keyName $ fromJust r
- git_annex "setkey" ["-q", "--backend", "SHA1", "--key", sha1, tmp] @? "setkey failed"
- git_annex "fromkey" ["-q", "--backend", "SHA1", "--key", sha1, sha1annexedfile] @? "fromkey failed"
+ r <- annexeval $ BackendClass.getKey backendSHA1 tmp
+ let key = show $ fromJust r
+ git_annex "setkey" ["-q", "--key", key, tmp] @? "setkey failed"
+ git_annex "fromkey" ["-q", "--key", key, sha1annexedfile] @? "fromkey failed"
Utility.boolSystem "git" [Utility.Params "commit -q -a -m commit"] @? "git commit failed"
annexed_present sha1annexedfile
where
@@ -438,7 +439,7 @@ test_unused = "git-annex unused/dropunused" ~: intmpclonerepo $ do
checkunused [annexedfilekey, sha1annexedfilekey]
-- good opportunity to test dropkey also
- git_annex "dropkey" ["-q", "--force", BackendTypes.keyName annexedfilekey]
+ git_annex "dropkey" ["-q", "--force", show annexedfilekey]
@? "dropkey failed"
checkunused [sha1annexedfilekey]