diff options
author | Joey Hess <joey@kitenet.net> | 2010-10-19 19:38:24 -0400 |
---|---|---|
committer | Joey Hess <joey@kitenet.net> | 2010-10-19 19:38:24 -0400 |
commit | c397e5a0f3a041a8cc5038c0f628a330b45fdcae (patch) | |
tree | f645b54215ef60c43220f4eec03d822a9db9bb07 | |
parent | 54db046c5ae2e7fce73fcd5ce4da278b5f8b445c (diff) | |
parent | 972639d85c663855dd0c7476b732dcb319efdb2e (diff) |
Merge branch 'master' of /home/joey/src/git-annex
56 files changed, 2685 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000..13deb526a --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +build/* +git-annex +git-annex.1 +doc/.ikiwiki +html diff --git a/Annex.hs b/Annex.hs new file mode 100644 index 000000000..b68e51355 --- /dev/null +++ b/Annex.hs @@ -0,0 +1,77 @@ +{- git-annex monad -} + +module Annex ( + new, + run, + gitRepo, + gitRepoChange, + backends, + backendsChange, + supportedBackends, + flagIsSet, + flagChange, + Flag(..) +) where + +import Control.Monad.State + +import qualified GitRepo as Git +import Types +import qualified TypeInternals as Internals + +{- Create and returns an Annex state object for the specified git repo. + -} +new :: Git.Repo -> [Backend] -> IO AnnexState +new gitrepo allbackends = do + let s = Internals.AnnexState { + Internals.repo = gitrepo, + Internals.backends = [], + Internals.supportedBackends = allbackends, + Internals.flags = [] + } + (_,s') <- Annex.run s (prep gitrepo) + return s' + where + prep gitrepo = do + -- read git config and update state + gitrepo' <- liftIO $ Git.configRead gitrepo + Annex.gitRepoChange gitrepo' + +-- performs an action in the Annex monad +run state action = runStateT (action) state + +-- Annex monad state accessors +gitRepo :: Annex Git.Repo +gitRepo = do + state <- get + return (Internals.repo state) +gitRepoChange :: Git.Repo -> Annex () +gitRepoChange r = do + state <- get + put state { Internals.repo = r } + return () +backends :: Annex [Backend] +backends = do + state <- get + return (Internals.backends state) +backendsChange :: [Backend] -> Annex () +backendsChange b = do + state <- get + put state { Internals.backends = b } + return () +supportedBackends :: Annex [Backend] +supportedBackends = do + state <- get + return (Internals.supportedBackends state) +flagIsSet :: Flag -> Annex Bool +flagIsSet flag = do + state <- get + return $ elem flag $ Internals.flags state +flagChange :: Flag -> Bool -> Annex () +flagChange flag set = do + state <- get + let f = filter (/= flag) $ Internals.flags state + if (set) + then put state { Internals.flags = (flag:f) } + else put state { Internals.flags = f } + return () diff --git a/Backend.hs b/Backend.hs new file mode 100644 index 000000000..a427234d7 --- /dev/null +++ b/Backend.hs @@ -0,0 +1,116 @@ +{- git-annex key-value storage backends + - + - git-annex uses a key-value abstraction layer to allow files contents to be + - stored in different ways. In theory, any key-value storage system could be + - used to store the file contents, and git-annex would then retrieve them + - as needed and put them in `.git/annex/`. + - + - When a file is annexed, a key is generated from its content and/or metadata. + - This key can later be used to retrieve the file's content (its value). This + - key generation must be stable for a given file content, name, and size. + - + - Multiple pluggable backends are supported, and more than one can be used + - to store different files' contents in a given repository. + - -} + +module Backend ( + storeFileKey, + retrieveKeyFile, + removeKey, + hasKey, + lookupFile +) where + +import Control.Monad.State +import Control.Exception +import System.Directory +import System.FilePath +import Data.String.Utils +import System.Posix.Files + +import Locations +import qualified GitRepo as Git +import qualified Annex +import Utility +import Types +import qualified TypeInternals as Internals + +{- List of backends in the order to try them when storing a new key. -} +backendList :: Annex [Backend] +backendList = do + l <- Annex.backends + if (0 < length l) + then return l + else do + all <- Annex.supportedBackends + g <- Annex.gitRepo + let l = parseBackendList all $ Git.configGet g "annex.backends" "" + Annex.backendsChange l + return l + where + parseBackendList all s = + if (length s == 0) + then all + else map (lookupBackendName all) $ words s + +{- Looks up a backend in the list of supportedBackends -} +lookupBackendName :: [Backend] -> String -> Backend +lookupBackendName all s = + if ((length matches) /= 1) + then error $ "unknown backend " ++ s + else matches !! 0 + where matches = filter (\b -> s == Internals.name b) all + +{- Attempts to store a file in one of the backends. -} +storeFileKey :: FilePath -> Annex (Maybe (Key, Backend)) +storeFileKey file = do + g <- Annex.gitRepo + let relfile = Git.relative g file + b <- backendList + storeFileKey' b file relfile +storeFileKey' [] _ _ = return Nothing +storeFileKey' (b:bs) file relfile = do + try <- (Internals.getKey b) relfile + case (try) of + Nothing -> nextbackend + Just key -> do + stored <- (Internals.storeFileKey b) file key + if (not stored) + then nextbackend + else do + return $ Just (key, b) + where + nextbackend = storeFileKey' bs file relfile + +{- Attempts to retrieve an key from one of the backends, saving it to + - a specified location. -} +retrieveKeyFile :: Backend -> Key -> FilePath -> Annex Bool +retrieveKeyFile backend key dest = (Internals.retrieveKeyFile backend) key dest + +{- Removes a key from a backend. -} +removeKey :: Backend -> Key -> Annex Bool +removeKey backend key = (Internals.removeKey backend) key + +{- Checks if a backend has its key. -} +hasKey :: Key -> Annex Bool +hasKey key = do + all <- Annex.supportedBackends + (Internals.hasKey (lookupBackendName all $ backendName key)) key + +{- Looks up the key and backend corresponding to an annexed file, + - by examining what the file symlinks to. -} +lookupFile :: FilePath -> Annex (Maybe (Key, Backend)) +lookupFile file = do + all <- Annex.supportedBackends + result <- liftIO $ (try (lookup all)::IO (Either SomeException (Maybe (Key, Backend)))) + case (result) of + Left err -> return Nothing + Right succ -> return succ + where + lookup all = do + l <- readSymbolicLink file + return $ Just $ pair all $ takeFileName l + pair all file = (k, b) + where + k = fileKey file + b = lookupBackendName all $ backendName k diff --git a/Backend/File.hs b/Backend/File.hs new file mode 100644 index 000000000..4ea25daa7 --- /dev/null +++ b/Backend/File.hs @@ -0,0 +1,150 @@ +{- git-annex pseudo-backend + - + - This backend does not really do any independant data storage, + - it relies on the file contents in .git/annex/ in this repo, + - and other accessible repos. + - + - This is an abstract backend; getKey has to be implemented to complete + - it. + -} + +module Backend.File (backend) where + +import Control.Monad.State +import System.IO +import System.Cmd +import System.Cmd.Utils +import Control.Exception +import List +import Maybe + +import TypeInternals +import LocationLog +import Locations +import qualified Remotes +import qualified GitRepo as Git +import Utility +import Core +import qualified Annex +import UUID +import qualified Backend + +backend = Backend { + name = mustProvide, + getKey = mustProvide, + storeFileKey = dummyStore, + retrieveKeyFile = copyKeyFile, + removeKey = checkRemoveKey, + hasKey = checkKeyFile +} + +mustProvide = error "must provide this field" + +{- Storing a key is a no-op. -} +dummyStore :: FilePath -> Key -> Annex (Bool) +dummyStore file key = return True + +{- Just check if the .git/annex/ file for the key exists. -} +checkKeyFile :: Key -> Annex Bool +checkKeyFile k = inAnnex k + +{- Try to find a copy of the file in one of the remotes, + - and copy it over to this one. -} +copyKeyFile :: Key -> FilePath -> Annex (Bool) +copyKeyFile key file = do + remotes <- Remotes.withKey key + if (0 == length remotes) + then do + showNote "not available" + showLocations key + return False + else trycopy remotes remotes + where + trycopy full [] = do + showNote "not available" + showTriedRemotes full + showLocations key + return False + trycopy full (r:rs) = do + -- annexLocation needs the git config to have been + -- read for a remote, so do that now, + -- if it hasn't been already + result <- Remotes.tryGitConfigRead r + case (result) of + Left err -> trycopy full rs + Right r' -> do + showNote $ "copying from " ++ (Git.repoDescribe r ) ++ "..." + liftIO $ copyFromRemote r' key file + +{- Tries to copy a file from a remote. -} +copyFromRemote :: Git.Repo -> Key -> FilePath -> IO Bool +copyFromRemote r key file = do + if (Git.repoIsLocal r) + then getlocal + else getremote + where + getlocal = boolSystem "cp" ["-a", location, file] + getremote = return False -- TODO implement get from remote + location = annexLocation r key + +showLocations :: Key -> Annex () +showLocations key = do + g <- Annex.gitRepo + u <- getUUID g + uuids <- liftIO $ keyLocations g key + let uuidsf = filter (\v -> v /= u) uuids + ppuuids <- prettyPrintUUIDs uuidsf + if (0 < length uuidsf) + then showLongNote $ "Try making some of these repositories available:\n" ++ ppuuids + else showLongNote $ "No other repository is known to contain the file." + +showTriedRemotes remotes = + showLongNote $ "I was unable to access these remotes: " ++ + (Remotes.list remotes) + +{- Checks remotes to verify that enough copies of a key exist to allow + - for a key to be safely removed (with no data loss), and fails with an + - error if not. -} +checkRemoveKey :: Key -> Annex (Bool) +checkRemoveKey key = do + force <- Annex.flagIsSet Force + if (force) + then return True + else do + g <- Annex.gitRepo + remotes <- Remotes.withKey key + let numcopies = read $ Git.configGet g config "1" + if (numcopies > length remotes) + then notEnoughCopies numcopies (length remotes) [] + else findcopies numcopies 0 remotes [] + where + config = "annex.numcopies" + findcopies need have [] bad = + if (have >= need) + then return True + else notEnoughCopies need have bad + findcopies need have (r:rs) bad = do + all <- Annex.supportedBackends + result <- liftIO $ ((try $ remoteHasKey r all)::IO (Either SomeException Bool)) + case (result) of + Right True -> findcopies need (have+1) rs bad + Right False -> findcopies need have rs bad + Left _ -> findcopies need have rs (r:bad) + remoteHasKey r all = do + -- To check if a remote has a key, construct a new + -- Annex monad and query its backend. + a <- Annex.new r all + (result, _) <- Annex.run a (Backend.hasKey key) + return result + notEnoughCopies need have bad = do + unsafe + showLongNote $ + "Could only verify the existence of " ++ + (show have) ++ " out of " ++ (show need) ++ + " necessary copies" + if (0 /= length bad) then showTriedRemotes bad else return () + showLocations key + hint + return False + unsafe = showNote "unsafe" + hint = showLongNote $ "(Use --force to override this check, or adjust annex.numcopies.)" diff --git a/Backend/SHA1.hs b/Backend/SHA1.hs new file mode 100644 index 000000000..caece6b78 --- /dev/null +++ b/Backend/SHA1.hs @@ -0,0 +1,16 @@ +{- git-annex "SHA1" backend + - -} + +module Backend.SHA1 (backend) where + +import qualified Backend.File +import TypeInternals + +backend = Backend.File.backend { + name = "SHA1", + getKey = keyValue +} + +-- checksum the file to get its key +keyValue :: FilePath -> Annex (Maybe Key) +keyValue k = error "SHA1 keyValue unimplemented" -- TODO diff --git a/Backend/URL.hs b/Backend/URL.hs new file mode 100644 index 000000000..c9b6ab6df --- /dev/null +++ b/Backend/URL.hs @@ -0,0 +1,47 @@ +{- git-annex "URL" backend + - -} + +module Backend.URL (backend) where + +import Control.Exception +import Control.Monad.State (liftIO) +import Data.String.Utils +import System.Cmd +import System.Cmd.Utils +import System.Exit + +import TypeInternals +import Core + +backend = Backend { + name = "URL", + getKey = keyValue, + storeFileKey = dummyStore, + retrieveKeyFile = downloadUrl, + removeKey = dummyOk, + hasKey = dummyOk +} + +-- cannot generate url from filename +keyValue :: FilePath -> Annex (Maybe Key) +keyValue file = return Nothing + +-- cannot change url contents +dummyStore :: FilePath -> Key -> Annex Bool +dummyStore file url = return False + +-- allow keys to be removed; presumably they can always be downloaded again +dummyOk :: Key -> Annex Bool +dummyOk url = return True + +downloadUrl :: Key -> FilePath -> Annex Bool +downloadUrl key file = do + showNote "downloading" + liftIO $ putStrLn "" -- make way for curl progress bar + result <- liftIO $ (try curl::IO (Either SomeException ())) + case result of + Left err -> return False + Right succ -> return True + where + curl = safeSystem "curl" ["-#", "-o", file, url] + url = join ":" $ drop 1 $ split ":" $ show key diff --git a/Backend/WORM.hs b/Backend/WORM.hs new file mode 100644 index 000000000..0588ddaf8 --- /dev/null +++ b/Backend/WORM.hs @@ -0,0 +1,35 @@ +{- git-annex "WORM" backend -- Write Once, Read Many + - -} + +module Backend.WORM (backend) where + +import Control.Monad.State +import System.FilePath +import System.Posix.Files +import qualified Data.ByteString.Lazy.Char8 as B + +import qualified Backend.File +import TypeInternals +import Utility + +backend = Backend.File.backend { + name = "WORM", + getKey = keyValue +} + +-- The key is formed from the file size, modification time, and the +-- basename of the filename. +-- +-- That allows multiple files with the same names to have different keys, +-- while also allowing a file to be moved around while retaining the +-- same key. +keyValue :: FilePath -> Annex (Maybe Key) +keyValue file = do + stat <- liftIO $ getFileStatus file + return $ Just $ Key ((name backend), key stat) + where + key stat = uniqueid stat ++ sep ++ base + uniqueid stat = (show $ modificationTime stat) ++ sep ++ + (show $ fileSize stat) + base = takeFileName file + sep = ":" diff --git a/BackendList.hs b/BackendList.hs new file mode 100644 index 000000000..25f3ae5ea --- /dev/null +++ b/BackendList.hs @@ -0,0 +1,14 @@ +{- git-annex backend list + - -} + +module BackendList (allBackends) where + +-- When adding a new backend, import it here and add it to the list. +import qualified Backend.WORM +import qualified Backend.SHA1 +import qualified Backend.URL +allBackends = + [ Backend.WORM.backend + , Backend.SHA1.backend + , Backend.URL.backend + ] diff --git a/Commands.hs b/Commands.hs new file mode 100644 index 000000000..2addf714e --- /dev/null +++ b/Commands.hs @@ -0,0 +1,235 @@ +{- git-annex command line -} + +module Commands (parseCmd) where + +import System.Console.GetOpt +import Control.Monad.State (liftIO) +import System.Posix.Files +import System.Directory +import System.Path +import Data.String.Utils +import List +import IO + +import qualified GitRepo as Git +import qualified Annex +import Utility +import Locations +import qualified Backend +import UUID +import LocationLog +import Types +import Core +import qualified Remotes +import qualified TypeInternals + +data CmdWants = FilesInGit | FilesNotInGit | RepoName | SingleString +data Command = Command { + cmdname :: String, + cmdaction :: (String -> Annex ()), + cmdwants :: CmdWants, + cmddesc :: String +} + +cmds :: [Command] +cmds = [ + (Command "add" addCmd FilesNotInGit + "add files to annex") + , (Command "get" getCmd FilesInGit + "make content of annexed files available") + , (Command "drop" dropCmd FilesInGit + "indicate content of files not currently wanted") + , (Command "unannex" unannexCmd FilesInGit + "undo accidential add command") + , (Command "init" initCmd SingleString + "initialize git-annex with repository description") + , (Command "fix" fixCmd FilesInGit + "fix up files' symlinks to point to annexed content") + ] + +options = [ + Option ['f'] ["force"] (NoArg Force) "allow actions that may lose annexed data" + ] + +header = "Usage: git-annex " ++ (join "|" $ map cmdname cmds) ++ " [path ...]" + +usage = usageInfo header options ++ "\nSubcommands:\n" ++ cmddescs + where + cmddescs = unlines $ map (\c -> indent $ showcmd c) cmds + showcmd c = + (cmdname c) ++ + (take (10 - (length (cmdname c))) $ repeat ' ') ++ + (cmddesc c) + indent l = " " ++ l + +{- Finds the type of parameters a command wants, from among the passed + - parameter list. -} +findWanted :: CmdWants -> [String] -> Git.Repo -> IO [String] +findWanted FilesNotInGit params repo = do + files <- mapM (Git.notInRepo repo) params + return $ foldl (++) [] files +findWanted FilesInGit params repo = do + files <- mapM (Git.inRepo repo) params + return $ foldl (++) [] files +findWanted SingleString params _ = do + return $ [unwords params] +findWanted RepoName params _ = do + return $ params + +{- Parses command line and returns a list of flags and a list of + - actions to be run in the Annex monad. -} +parseCmd :: [String] -> AnnexState -> IO ([Flag], [Annex ()]) +parseCmd argv state = do + (flags, params) <- getopt + case (length params) of + 0 -> error usage + _ -> case (lookupCmd (params !! 0)) of + [] -> error usage + [Command _ action want _] -> do + f <- findWanted want (drop 1 params) + (TypeInternals.repo state) + return (flags, map action $ filter notstate f) + where + -- never include files from the state directory + notstate f = stateLoc /= take (length stateLoc) f + getopt = case getOpt Permute options argv of + (flags, params, []) -> return (flags, params) + (_, _, errs) -> ioError (userError (concat errs ++ usage)) + lookupCmd cmd = filter (\c -> cmd == cmdname c) cmds + +{- Annexes a file, storing it in a backend, and then moving it into + - the annex directory and setting up the symlink pointing to its content. -} +addCmd :: FilePath -> Annex () +addCmd file = inBackend file $ do + s <- liftIO $ getSymbolicLinkStatus file + if ((isSymbolicLink s) || (not $ isRegularFile s)) + then return () + else do + showStart "add" file + g <- Annex.gitRepo + stored <- Backend.storeFileKey file + case (stored) of + Nothing -> showEndFail + Just (key, backend) -> do + logStatus key ValuePresent + setup g key + where + setup g key = do + let dest = annexLocation g key + liftIO $ createDirectoryIfMissing True (parentDir dest) + liftIO $ renameFile file dest + link <- calcGitLink file key + liftIO $ createSymbolicLink link file + liftIO $ Git.run g ["add", file] + showEndOk + +{- Undo addCmd. -} +unannexCmd :: FilePath -> Annex () +unannexCmd file = notinBackend file $ \(key, backend) -> do + showStart "unannex" file + Annex.flagChange Force True -- force backend to always remove + Backend.removeKey backend key + logStatus key ValueMissing + g <- Annex.gitRepo + let src = annexLocation g key + moveout g src + where + moveout g src = do + liftIO $ removeFile file + liftIO $ Git.run g ["rm", "--quiet", file] + -- git rm deletes empty directories; + -- put them back + liftIO $ createDirectoryIfMissing True (parentDir file) + liftIO $ renameFile src file + showEndOk + +{- Gets an annexed file from one of the backends. -} +getCmd :: FilePath -> Annex () +getCmd file = notinBackend file $ \(key, backend) -> do + inannex <- inAnnex key + if (inannex) + then return () + else do + showStart "get" file + g <- Annex.gitRepo + let dest = annexLocation g key + let tmp = (annexTmpLocation g) ++ (keyFile key) + liftIO $ createDirectoryIfMissing True (parentDir tmp) + success <- Backend.retrieveKeyFile backend key tmp + if (success) + then do + liftIO $ renameFile tmp dest + logStatus key ValuePresent + showEndOk + else do + showEndFail + +{- Indicates a file's content is not wanted anymore, and should be removed + - if it's safe to do so. -} +dropCmd :: FilePath -> Annex () +dropCmd file = notinBackend file $ \(key, backend) -> do + inbackend <- Backend.hasKey key + if (not inbackend) + then return () -- no-op + else do + showStart "drop" file + success <- Backend.removeKey backend key + if (success) + then do + cleanup key + showEndOk + else showEndFail + where + cleanup key = do + logStatus key ValueMissing + inannex <- inAnnex key + if (inannex) + then do + g <- Annex.gitRepo + let loc = annexLocation g key + liftIO $ removeFile loc + return () + else return () + +{- Fixes the symlink to an annexed file. -} +fixCmd :: String -> Annex () +fixCmd file = notinBackend file $ \(key, backend) -> do + link <- calcGitLink file key + l <- liftIO $ readSymbolicLink file + if (link == l) + then return () + else do + showStart "fix" file + liftIO $ createDirectoryIfMissing True (parentDir file) + liftIO $ removeFile file + liftIO $ createSymbolicLink link file + g <- Annex.gitRepo + liftIO $ Git.run g ["add", file] + showEndOk + +{- Stores description for the repository. -} +initCmd :: String -> Annex () +initCmd description = do + if (0 == length description) + then error $ + "please specify a description of this repository\n" ++ + usage + else do + g <- Annex.gitRepo + u <- getUUID g + describeUUID u description + log <- uuidLog + liftIO $ Git.run g ["add", log] + liftIO $ putStrLn "description set" + +-- helpers +inBackend file a = do + r <- Backend.lookupFile file + case (r) of + Just v -> return () + Nothing -> a +notinBackend file a = do + r <- Backend.lookupFile file + case (r) of + Just v -> a v + Nothing -> return () diff --git a/Core.hs b/Core.hs new file mode 100644 index 000000000..8dc4bff6f --- /dev/null +++ b/Core.hs @@ -0,0 +1,109 @@ +{- git-annex core functions -} + +module Core where + +import Maybe +import System.IO +import System.Directory +import Control.Monad.State (liftIO) +import System.Path +import Data.String.Utils + +import Types +import Locations +import LocationLog +import UUID +import qualified GitRepo as Git +import qualified Annex +import Utility + +{- Sets up a git repo for git-annex. -} +startup :: [Flag] -> Annex () +startup flags = do + mapM (\f -> Annex.flagChange f True) flags + g <- Annex.gitRepo + liftIO $ gitAttributes g + prepUUID + +{- When git-annex is done, it runs this. -} +shutdown :: Annex () +shutdown = do + g <- Annex.gitRepo + + liftIO $ Git.run g ["add", gitStateDir g] + + -- clean up any files left in the temp directory + let tmp = annexTmpLocation g + exists <- liftIO $ doesDirectoryExist tmp + if (exists) + then liftIO $ removeDirectoryRecursive $ tmp + else return () + +{- configure git to use union merge driver on state files, if it is not + - already -} +gitAttributes :: Git.Repo -> IO () +gitAttributes repo = do + exists <- doesFileExist attributes + if (not exists) + then do + writeFile attributes $ attrLine ++ "\n" + commit + else do + content <- readFile attributes + if (all (/= attrLine) (lines content)) + then do + appendFile attributes $ attrLine ++ "\n" + commit + else return () + where + attrLine = stateLoc ++ "*.log merge=union" + attributes = Git.attributes repo + commit = do + Git.run repo ["add", attributes] + Git.run repo ["commit", "-m", "git-annex setup", + attributes] + +{- Checks if a given key is currently present in the annexLocation -} +inAnnex :: Key -> Annex Bool +inAnnex key = do + g <- Annex.gitRepo + liftIO $ doesFileExist $ annexLocation g key + +{- Calculates the relative path to use to link a file to a key. -} +calcGitLink :: FilePath -> Key -> Annex FilePath +calcGitLink file key = do + g <- Annex.gitRepo + cwd <- liftIO $ getCurrentDirectory + let absfile = case (absNormPath cwd file) of + Just f -> f + Nothing -> error $ "unable to normalize " ++ file + return $ (relPathDirToDir (parentDir absfile) (Git.workTree g)) ++ + annexLocationRelative g key + +{- Updates the LocationLog when a key's presence changes. -} +logStatus :: Key -> LogStatus -> Annex () +logStatus key status = do + g <- Annex.gitRepo + u <- getUUID g + liftIO $ logChange g key u status + +{- Output logging -} +showStart :: String -> String -> Annex () +showStart command file = do + liftIO $ putStr $ command ++ " " ++ file + liftIO $ hFlush stdout +showNote :: String -> Annex () +showNote s = do + liftIO $ putStr $ " (" ++ s ++ ")" + liftIO $ hFlush stdout +showLongNote :: String -> Annex () +showLongNote s = do + liftIO $ putStr $ "\n" ++ (indent s) + where + indent s = join "\n" $ map (\l -> " " ++ l) $ lines s +showEndOk :: Annex () +showEndOk = do + liftIO $ putStrLn " ok" +showEndFail :: Annex () +showEndFail = do + liftIO $ putStrLn "\nfailed" diff --git a/GitRepo.hs b/GitRepo.hs new file mode 100644 index 000000000..5b0e68cd6 --- /dev/null +++ b/GitRepo.hs @@ -0,0 +1,267 @@ +{- git repository handling + - + - This is written to be completely independant of git-annex and should be + - suitable for other uses. + - + -} + +module GitRepo ( + Repo, + repoFromCwd, + repoFromPath, + repoFromUrl, + repoIsLocal, + repoIsRemote, + repoDescribe, + workTree, + dir, + relative, + configGet, + configMap, + configRead, + run, + pipeRead, + attributes, + remotes, + remotesAdd, + repoRemoteName, + inRepo, + notInRepo +) where + +import Directory +import System +import System.Directory +import System.Posix.Directory +import System.Path +import System.Cmd +import System.Cmd.Utils +import System.IO +import IO (bracket_) +import Data.String.Utils +import Data.Map as Map hiding (map, split) +import Network.URI +import Maybe + +import Utility + +{- A git repository can be on local disk or remote. Not to be confused + - with a git repo's configured remotes, some of which may be on local + - disk. -} +data Repo = + LocalRepo { + top :: FilePath, + config :: Map String String, + remotes :: [Repo], + -- remoteName holds the name used for this repo in remotes + remoteName :: Maybe String + } | RemoteRepo { + url :: String, + top :: FilePath, + config :: Map String String, + remotes :: [Repo], + remoteName :: Maybe String + } deriving (Show, Read, Eq) + +{- Local Repo constructor. -} +repoFromPath :: FilePath -> Repo +repoFromPath dir = + LocalRepo { + top = dir, + config = Map.empty, + remotes = [], + remoteName = Nothing + } + +{- Remote Repo constructor. Throws exception on invalid url. -} +repoFromUrl :: String -> Repo +repoFromUrl url = + RemoteRepo { + url = url, + top = path url, + config = Map.empty, + remotes = [], + remoteName = Nothing + } + where path url = uriPath $ fromJust $ parseURI url + +{- User-visible description of a git repo. -} +repoDescribe repo = + if (isJust $ remoteName repo) + then fromJust $ remoteName repo + else if (repoIsLocal repo) + then top repo + else url repo + +{- Constructs and returns an updated version of a repo with + - different remotes list. -} +remotesAdd :: Repo -> [Repo] -> Repo +remotesAdd repo rs = repo { remotes = rs } + +{- Returns the name of the remote that corresponds to the repo, if + - it is a remote. Otherwise, "" -} +repoRemoteName r = + if (isJust $ remoteName r) + then fromJust $ remoteName r + else "" + +{- Some code needs to vary between remote and local repos, or bare and + - non-bare, these functions help with that. -} +repoIsLocal repo = case (repo) of + LocalRepo {} -> True + RemoteRepo {} -> False +repoIsRemote repo = not $ repoIsLocal repo +assertlocal repo action = + if (repoIsLocal repo) + then action + else error $ "acting on remote git repo " ++ (repoDescribe repo) ++ + " not supported" +bare :: Repo -> Bool +bare repo = + if (member b (config repo)) + then ("true" == fromJust (Map.lookup b (config repo))) + else error $ "it is not known if git repo " ++ (repoDescribe repo) ++ + " is a bare repository; config not read" + where + b = "core.bare" + +{- Path to a repository's gitattributes file. -} +attributes :: Repo -> String +attributes repo = assertlocal repo $ do + if (bare repo) + then (top repo) ++ "/info/.gitattributes" + else (top repo) ++ "/.gitattributes" + +{- Path to a repository's .git directory, relative to its topdir. -} +dir :: Repo -> String +dir repo = assertlocal repo $ + if (bare repo) + then "" + else ".git" + +{- Path to a repository's --work-tree. -} +workTree :: Repo -> FilePath +workTree repo = top repo + +{- Given a relative or absolute filename in a repository, calculates the + - name to use to refer to the file relative to a git repository's top. + - This is the same form displayed and used by git. -} +relative :: Repo -> String -> String +relative repo file = drop (length absrepo) absfile + where + -- normalize both repo and file, so that repo + -- will be substring of file + absrepo = case (absNormPath "/" (top repo)) of + Just f -> f ++ "/" + Nothing -> error $ "bad repo" ++ (top repo) + absfile = case (secureAbsNormPath absrepo file) of + Just f -> f + Nothing -> error $ file ++ " is not located inside git repository " ++ absrepo + +{- Constructs a git command line operating on the specified repo. -} +gitCommandLine :: Repo -> [String] -> [String] +gitCommandLine repo params = assertlocal repo $ + -- force use of specified repo via --git-dir and --work-tree + ["--git-dir="++(top repo)++"/"++(dir repo), "--work-tree="++(top repo)] ++ params + +{- Runs git in the specified repo. -} +run :: Repo -> [String] -> IO () +run repo params = assertlocal repo $ do + r <- safeSystem "git" (gitCommandLine repo params) + return () + +{- Runs a git subcommand and returns its output. -} +pipeRead :: Repo -> [String] -> IO String +pipeRead repo params = assertlocal repo $ do + pOpen ReadFromPipe "git" (gitCommandLine repo params) $ \h -> do + ret <- hGetContentsStrict h + return ret + +{- Passed a location, recursively scans for all files that + - are checked into git at that location. -} +inRepo :: Repo -> FilePath -> IO [FilePath] +inRepo repo location = do + s <- pipeRead repo ["ls-files", "--cached", "--exclude-standard", location] + return $ lines s + +{- Passed a location, recursively scans for all files that are not checked + - into git, and not gitignored. -} +notInRepo :: Repo -> FilePath -> IO [FilePath] +notInRepo repo location = do + s <- pipeRead repo ["ls-files", "--others", "--exclude-standard", location] + return $ lines s + +{- Runs git config and populates a repo with its config. -} +configRead :: Repo -> IO Repo +configRead repo = assertlocal repo $ do + {- Cannot use pipeRead because it relies on the config having + been already read. Instead, chdir to the repo. -} + cwd <- getCurrentDirectory + bracket_ (changeWorkingDirectory (top repo)) + (\_ -> changeWorkingDirectory cwd) $ + pOpen ReadFromPipe "git" ["config", "--list"] $ \h -> do + val <- hGetContentsStrict h + let r = repo { config = configParse val } + return r { remotes = configRemotes r } + +{- Calculates a list of a repo's configured remotes, by parsing its config. -} +configRemotes :: Repo -> [Repo] +configRemotes repo = map construct remotes + where + remotes = toList $ filter $ config repo + filter = filterWithKey (\k _ -> isremote k) + isremote k = (startswith "remote." k) && (endswith ".url" k) + remotename k = (split "." k) !! 1 + construct (k,v) = (gen v) { remoteName = Just $ remotename k } + gen v = if (isURI v) + then repoFromUrl v + else repoFromPath v + +{- Parses git config --list output into a config map. -} +configParse :: String -> Map.Map String String +configParse s = Map.fromList $ map pair $ lines s + where + pair l = (key l, val l) + key l = (keyval l) !! 0 + val l = join sep $ drop 1 $ keyval l + keyval l = split sep l :: [String] + sep = "=" + +{- Returns a single git config setting, or a default value if not set. -} +configGet :: Repo -> String -> String -> String +configGet repo key defaultValue = + Map.findWithDefault defaultValue key (config repo) + +{- Access to raw config Map -} +configMap :: Repo -> Map String String +configMap repo = config repo + +{- Finds the current git repository, which may be in a parent directory. -} +repoFromCwd :: IO Repo +repoFromCwd = do + cwd <- getCurrentDirectory + top <- seekUp cwd isRepoTop + case top of + (Just dir) -> return $ repoFromPath dir + Nothing -> error "Not in a git repository." + +seekUp :: String -> (String -> IO Bool) -> IO (Maybe String) +seekUp dir want = do + ok <- want dir + if ok + then return (Just dir) + else case (parentDir dir) of + "" -> return Nothing + d -> seekUp d want + +isRepoTop dir = do + r <- isRepo dir + b <- isBareRepo dir + return (r || b) + where + isRepo dir = gitSignature dir ".git" ".git/config" + isBareRepo dir = gitSignature dir "objects" "config" + gitSignature dir subdir file = do + s <- (doesDirectoryExist (dir ++ "/" ++ subdir)) + f <- (doesFileExist (dir ++ "/" ++ file)) + return (s && f) diff --git a/INSTALL b/INSTALL new file mode 100644 index 000000000..f024541c1 --- /dev/null +++ b/INSTALL @@ -0,0 +1 @@ +See doc/install.mdwn for installation instructions. diff --git a/LocationLog.hs b/LocationLog.hs new file mode 100644 index 000000000..785b3330d --- /dev/null +++ b/LocationLog.hs @@ -0,0 +1,160 @@ +{- git-annex location log + - + - git-annex keeps track of on which repository it last saw a value. + - This can be useful when using it for archiving with offline storage. + - When you indicate you --want a file, git-annex will tell you which + - repositories have the value. + - + - Location tracking information is stored in `.git-annex/key.log`. + - Repositories record their UUID and the date when they --get or --drop + - a value. + - + - A line of the log will look like: "date N UUID" + - Where N=1 when the repo has the file, and 0 otherwise. + - + - Git is configured to use a union merge for this file, + - so the lines may be in arbitrary order, but it will never conflict. + -} + +module LocationLog ( + LogStatus(..), + logChange, + keyLocations +) where + +import Data.Time.Clock.POSIX +import Data.Time +import System.Locale +import qualified Data.Map as Map +import System.IO +import System.Directory +import Data.Char + +import qualified GitRepo as Git +import Utility +import UUID +import Types +import Locations + +data LogLine = LogLine { + date :: POSIXTime, + status :: LogStatus, + uuid :: UUID +} deriving (Eq) + +data LogStatus = ValuePresent | ValueMissing | Undefined + deriving (Eq) + +instance Show LogStatus where + show ValuePresent = "1" + show ValueMissing = "0" + show Undefined = "undefined" + +instance Read LogStatus where + readsPrec _ "1" = [(ValuePresent, "")] + readsPrec _ "0" = [(ValueMissing, "")] + readsPrec _ _ = [(Undefined, "")] + +instance Show LogLine where + show (LogLine date status uuid) = unwords + [(show date), (show status), uuid] + +instance Read LogLine where + -- This parser is robust in that even unparsable log lines are + -- read without an exception being thrown. + -- Such lines have a status of Undefined. + readsPrec _ string = + if (length w == 3) + then case (pdate) of + Just v -> good v + Nothing -> undefined + else undefined + where + w = words string + date = w !! 0 + status = read $ w !! 1 + uuid = w !! 2 + pdate = (parseTime defaultTimeLocale "%s%Qs" date) :: Maybe UTCTime + + good v = ret $ LogLine (utcTimeToPOSIXSeconds v) status uuid + undefined = ret $ LogLine (0) Undefined "" + ret v = [(v, "")] + +{- Log a change in the presence of a key's value in a repository. -} +logChange :: Git.Repo -> Key -> UUID -> LogStatus -> IO () +logChange repo key uuid status = do + log <- logNow status uuid + ls <- readLog logfile + writeLog logfile (compactLog $ log:ls) + where + logfile = logFile repo key + +{- Reads a log file. + - Note that the LogLines returned may be in any order. -} +readLog :: FilePath -> IO [LogLine] +readLog file = do + exists <- doesFileExist file + if exists + then do + s <- withFileLocked file ReadMode $ \h -> + hGetContentsStrict h + -- filter out any unparsable lines + return $ filter (\l -> (status l) /= Undefined ) + $ map read $ lines s + else do + return [] + +{- Adds a LogLine to a log file -} +appendLog :: FilePath -> LogLine -> IO () +appendLog file line = do + createDirectoryIfMissing True (parentDir file) + withFileLocked file AppendMode $ \h -> + hPutStrLn h $ show line + +{- Writes a set of lines to a log file -} +writeLog :: FilePath -> [LogLine] -> IO () +writeLog file lines = do + createDirectoryIfMissing True (parentDir file) + withFileLocked file WriteMode $ \h -> + hPutStr h $ unlines $ map show lines + +{- Generates a new LogLine with the current date. -} +logNow :: LogStatus -> UUID -> IO LogLine +logNow status uuid = do + now <- getPOSIXTime + return $ LogLine now status uuid + +{- Returns the filename of the log file for a given key. -} +logFile :: Git.Repo -> Key -> String +logFile repo key = + (gitStateDir repo) ++ (Git.relative repo (keyFile key)) ++ ".log" + +{- Returns a list of repository UUIDs that, according to the log, have + - the value of a key. -} +keyLocations :: Git.Repo -> Key -> IO [UUID] +keyLocations thisrepo key = do + lines <- readLog $ logFile thisrepo key + return $ map uuid (filterPresent lines) + +{- Filters the list of LogLines to find ones where the value + - is (or should still be) present. -} +filterPresent :: [LogLine] -> [LogLine] +filterPresent lines = filter (\l -> ValuePresent == status l) $ compactLog lines + +{- Compacts a set of logs, returning a subset that contains the current + - status. -} +compactLog :: [LogLine] -> [LogLine] +compactLog lines = compactLog' Map.empty lines +compactLog' map [] = Map.elems map +compactLog' map (l:ls) = compactLog' (mapLog map l) ls + +{- Inserts a log into a map of logs, if the log has better (ie, newer) + - information about a repo than the other logs in the map -} +mapLog map log = + if (better) + then Map.insert (uuid log) log map + else map + where + better = case (Map.lookup (uuid log) map) of + Just l -> (date l <= date log) + Nothing -> True diff --git a/Locations.hs b/Locations.hs new file mode 100644 index 000000000..18d416eb4 --- /dev/null +++ b/Locations.hs @@ -0,0 +1,62 @@ +{- git-annex file locations + -} + +module Locations ( + gitStateDir, + stateLoc, + keyFile, + fileKey, + annexLocation, + annexLocationRelative, + annexTmpLocation +) where + +import Data.String.Utils + +import Types +import qualified TypeInternals as Internals +import qualified GitRepo as Git + +{- Long-term, cross-repo state is stored in files inside the .git-annex + - directory, in the git repository's working tree. -} +stateLoc = ".git-annex/" +gitStateDir :: Git.Repo -> FilePath +gitStateDir repo = (Git.workTree repo) ++ "/" ++ stateLoc + +{- An annexed file's content is stored in + - /path/to/repo/.git/annex/<key>, where <key> is of the form + - <backend:fragment> + - + - That allows deriving the key and backend by looking at the symlink to it. + -} +annexLocation :: Git.Repo -> Key -> FilePath +annexLocation r key = + (Git.workTree r) ++ "/" ++ (annexLocationRelative r key) + +{- Annexed file's location relative to git's working tree. -} +annexLocationRelative :: Git.Repo -> Key -> FilePath +annexLocationRelative r key = Git.dir r ++ "/annex/" ++ (keyFile key) + +{- .git-annex/tmp is used for temp files + -} +annexTmpLocation :: Git.Repo -> FilePath +annexTmpLocation r = (Git.workTree r) ++ "/" ++ Git.dir r ++ "/annex/tmp/" + +{- Converts a key into a filename fragment. + - + - Escape "/" in the key name, to keep a flat tree of files and avoid + - issues with keys containing "/../" or ending with "/" etc. + - + - "/" is escaped to "%" because it's short and rarely used, and resembles + - a slash + - "%" is escaped to "&s", and "&" to "&a"; this ensures that the mapping + - is one to one. + - -} +keyFile :: Key -> FilePath +keyFile key = replace "/" "%" $ replace "%" "&s" $ replace "&" "&a" $ show key + +{- Reverses keyFile, converting a filename fragment (ie, the basename of + - the symlink target) into a key. -} +fileKey :: FilePath -> Key +fileKey file = read $ + replace "&a" "&" $ replace "&s" "%" $ replace "%" "/" file diff --git a/Makefile b/Makefile new file mode 100644 index 000000000..d35e82ad5 --- /dev/null +++ b/Makefile @@ -0,0 +1,29 @@ +all: git-annex docs + +git-annex: + mkdir -p build + ghc -odir build -hidir build --make git-annex + +install: + install -d $(DESTDIR)/usr/bin + install git-annex $(DESTDIR)/usr/bin + +# If ikiwiki is available, build static html docs suitable for being +# shipped in the software package. +ifeq ($(shell which ikiwiki),) +IKIWIKI=echo "** ikiwiki not found, skipping building docs" >&2 +else +IKIWIKI=ikiwiki +endif + +docs: + ./mdwn2man git-annex 1 doc/git-annex.mdwn > git-annex.1 + $(IKIWIKI) doc html -v --wikiname git-annex --plugin=goodstuff \ + --no-usedirs --disable-plugin=openid --plugin=sidebar \ + --underlaydir=/dev/null + +clean: + rm -rf build git-annex git-annex.1 + rm -rf doc/.ikiwiki html + +.PHONY: git-annex diff --git a/Remotes.hs b/Remotes.hs new file mode 100644 index 000000000..a0894f418 --- /dev/null +++ b/Remotes.hs @@ -0,0 +1,112 @@ +{- git-annex remote repositories -} + +module Remotes ( + list, + withKey, + tryGitConfigRead +) where + +import Control.Exception +import Control.Monad.State (liftIO) +import qualified Data.Map as Map +import Data.String.Utils +import Data.Either.Utils +import List +import Maybe + +import Types +import qualified GitRepo as Git +import qualified Annex +import LocationLog +import Locations +import UUID + +{- Human visible list of remotes. -} +list :: [Git.Repo] -> String +list remotes = join " " $ map Git.repoDescribe remotes + +{- Cost ordered list of remotes that the LocationLog indicate may have a key. -} +withKey :: Key -> Annex [Git.Repo] +withKey key = do + g <- Annex.gitRepo + uuids <- liftIO $ keyLocations g key + allremotes <- remotesByCost + -- This only uses cached data, so may not include new remotes + -- or remotes whose uuid has changed (eg by a different drive being + -- mounted at their location). So unless it happens to find all + -- remotes, try harder, loading the remotes' configs. + remotes <- reposByUUID allremotes uuids + remotesread <- Annex.flagIsSet RemotesRead + if ((length allremotes /= length remotes) && not remotesread) + then tryharder allremotes uuids + else return remotes + where + tryharder allremotes uuids = do + -- more expensive; read each remote's config + eitherremotes <- mapM tryGitConfigRead allremotes + let allremotes' = map fromEither eitherremotes + remotes' <- reposByUUID allremotes' uuids + Annex.flagChange RemotesRead True + return remotes' + +{- Cost Ordered list of remotes. -} +remotesByCost :: Annex [Git.Repo] +remotesByCost = do + g <- Annex.gitRepo + reposByCost $ Git.remotes g + +{- Orders a list of git repos by cost. -} +reposByCost :: [Git.Repo] -> Annex [Git.Repo] +reposByCost l = do + costpairs <- mapM costpair l + return $ fst $ unzip $ sortBy bycost $ costpairs + where + costpair r = do + cost <- repoCost r + return (r, cost) + bycost (_, c1) (_, c2) = compare c1 c2 + +{- Calculates cost for a repo. + - + - The default cost is 100 for local repositories, and 200 for remote + - repositories; it can also be configured by remote.<name>.annex-cost + -} +repoCost :: Git.Repo -> Annex Int +repoCost r = do + g <- Annex.gitRepo + if ((length $ config g r) > 0) + then return $ read $ config g r + else if (Git.repoIsLocal r) + then return 100 + else return 200 + where + config g r = Git.configGet g (configkey r) "" + configkey r = "remote." ++ (Git.repoRemoteName r) ++ ".annex-cost" + +{- The git configs for the git repo's remotes is not read on startup + - because reading it may be expensive. This function tries to read the + - config for a specified remote, and updates state. If successful, it + - returns the updated git repo. -} +tryGitConfigRead :: Git.Repo -> Annex (Either Git.Repo Git.Repo) +tryGitConfigRead r = do + if (Map.null $ Git.configMap r) + then do + -- configRead can fail due to IO error or + -- for other reasons; catch all possible exceptions + result <- liftIO $ (try (Git.configRead r)::IO (Either SomeException (Git.Repo))) + case (result) of + Left err -> return $ Left r + Right r' -> do + g <- Annex.gitRepo + let l = Git.remotes g + let g' = Git.remotesAdd g $ + exchange l r' + Annex.gitRepoChange g' + return $ Right r' + else return $ Right r -- config already read + where + exchange [] new = [] + exchange (old:ls) new = + if ((Git.repoRemoteName old) == (Git.repoRemoteName new)) + then new:(exchange ls new) + else old:(exchange ls new) diff --git a/TypeInternals.hs b/TypeInternals.hs new file mode 100644 index 000000000..4a9d2653e --- /dev/null +++ b/TypeInternals.hs @@ -0,0 +1,72 @@ +{- git-annex internal data types + - + - Most things should not need this, using Types and/or Annex instead. + -} + +module TypeInternals where + +import Control.Monad.State (StateT) +import Data.String.Utils + +import qualified GitRepo as Git + +data Flag = + Force | -- command-line flags + RemotesRead -- indicates that remote repo configs have been read + deriving (Eq, Read, Show) + +-- git-annex's runtime state type doesn't really belong here, +-- but it uses Backend, so has to be here to avoid a depends loop. +data AnnexState = AnnexState { + repo :: Git.Repo, + backends :: [Backend], + supportedBackends :: [Backend], + flags :: [Flag] +} deriving (Show) + +-- git-annex's monad +type Annex = StateT AnnexState IO + +-- annexed filenames are mapped through a backend into keys +type KeyFrag = String +type BackendName = String +data Key = Key (BackendName, KeyFrag) deriving (Eq) + +-- show a key to convert it to a string; the string includes the +-- name of the backend to avoid collisions between key strings +instance Show Key where + show (Key (b, k)) = b ++ ":" ++ k + +instance Read Key where + readsPrec _ s = [((Key (b,k)) ,"")] + where + l = split ":" s + b = l !! 0 + k = join ":" $ drop 1 l + +-- pulls the backend name out +backendName :: Key -> BackendName +backendName (Key (b,k)) = b + +-- pulls the key fragment out +keyFrag :: Key -> KeyFrag +keyFrag (Key (b,k)) = k + +-- this structure represents a key-value backend +data Backend = Backend { + -- name of this backend + name :: String, + -- converts a filename to a key + getKey :: FilePath -> Annex (Maybe Key), + -- stores a file's contents to a key + storeFileKey :: FilePath -> Key -> Annex Bool, + -- retrieves a key's contents to a file + retrieveKeyFile :: Key -> FilePath -> Annex Bool, + -- removes a key + removeKey :: Key -> Annex Bool, + -- checks if a backend is storing the content of a key + hasKey :: Key -> Annex Bool +} + +instance Show Backend where + show backend = "Backend { name =\"" ++ (name backend) ++ "\" }" diff --git a/Types.hs b/Types.hs new file mode 100644 index 000000000..2284d9267 --- /dev/null +++ b/Types.hs @@ -0,0 +1,13 @@ +{- git-annex abstract data types -} + +module Types ( + Annex, + AnnexState, + Backend, + Key, + backendName, + keyFrag, + Flag(..), +) where + +import TypeInternals diff --git a/UUID.hs b/UUID.hs new file mode 100644 index 000000000..47d305c4f --- /dev/null +++ b/UUID.hs @@ -0,0 +1,140 @@ +{- git-annex uuids + - + - Each git repository used by git-annex has an annex.uuid setting that + - uniquely identifies that repository. + - + -} + +module UUID ( + UUID, + getUUID, + prepUUID, + genUUID, + reposByUUID, + prettyPrintUUIDs, + describeUUID, + uuidLog +) where + +import Control.Monad.State +import Maybe +import List +import System.Cmd.Utils +import System.IO +import System.Directory +import qualified Data.Map as M + +import qualified GitRepo as Git +import Types +import Locations +import qualified Annex +import Utility + +type UUID = String + +configkey="annex.uuid" + +{- Generates a UUID. There is a library for this, but it's not packaged, + - so use the command line tool. -} +genUUID :: IO UUID +genUUID = liftIO $ pOpen ReadFromPipe "uuid" ["-m"] $ \h -> hGetLine h + +{- Looks up a repo's UUID. May return "" if none is known. + - + - UUIDs of remotes are cached in git config, using keys named + - remote.<name>.annex-uuid + - + - -} +getUUID :: Git.Repo -> Annex UUID +getUUID r = do + g <- Annex.gitRepo + + let c = cached r g + let u = uncached r + + if (c /= u && u /= "") + then do + updatecache g r u + return u + else return c + where + uncached r = Git.configGet r "annex.uuid" "" + cached r g = Git.configGet g (cachekey r) "" + updatecache g r u = do + if (g /= r) + then setConfig (cachekey r) u + else return () + cachekey r = "remote." ++ (Git.repoRemoteName r) ++ ".annex-uuid" + +{- Make sure that the repo has an annex.uuid setting. -} +prepUUID :: Annex () +prepUUID = do + g <- Annex.gitRepo + u <- getUUID g + if ("" == u) + then do + uuid <- liftIO $ genUUID + setConfig configkey uuid + else return () + +{- Changes a git config setting in both internal state and .git/config -} +setConfig :: String -> String -> Annex () +setConfig key value = do + g <- Annex.gitRepo + liftIO $ Git.run g ["config", key, value] + -- re-read git config and update the repo's state + g' <- liftIO $ Git.configRead g + Annex.gitRepoChange g' + return () + +{- Filters a list of repos to ones that have listed UUIDs. -} +reposByUUID :: [Git.Repo] -> [UUID] -> Annex [Git.Repo] +reposByUUID repos uuids = do + filterM match repos + where + match r = do + u <- getUUID r + return $ isJust $ elemIndex u uuids + +{- Pretty-prints a list of UUIDs -} +prettyPrintUUIDs :: [UUID] -> Annex String +prettyPrintUUIDs uuids = do + m <- uuidMap + return $ unwords $ map (\u -> "\t"++(prettify m u)++"\n") uuids + where + prettify m u = + if (0 < (length $ findlog m u)) + then u ++ " -- " ++ (findlog m u) + else u + findlog m u = M.findWithDefault "" u m + +{- Records a description for a uuid in the uuidLog. -} +describeUUID :: UUID -> String -> Annex () +describeUUID uuid desc = do + m <- uuidMap + let m' = M.insert uuid desc m + log <- uuidLog + liftIO $ createDirectoryIfMissing True (parentDir log) + liftIO $ withFileLocked log WriteMode (\h -> hPutStr h $ serialize m') + where + serialize m = unlines $ map (\(u, d) -> u++" "++d) $ M.toList m + +{- Read and parse the uuidLog into a Map -} +uuidMap :: Annex (M.Map UUID String) +uuidMap = do + log <- uuidLog + s <- liftIO $ catch + (withFileLocked log ReadMode $ \h -> hGetContentsStrict h) + (\error -> return "") + return $ M.fromList $ map (\l -> pair l) $ lines s + where + pair l = + if (1 < (length $ words l)) + then ((words l) !! 0, unwords $ drop 1 $ words l) + else ("", "") + +{- Filename of uuid.log. -} +uuidLog :: Annex String +uuidLog = do + g <- Annex.gitRepo + return $ (gitStateDir g) ++ "uuid.log" diff --git a/Utility.hs b/Utility.hs new file mode 100644 index 000000000..09b973002 --- /dev/null +++ b/Utility.hs @@ -0,0 +1,110 @@ +{- git-annex utility functions + -} + +module Utility ( + withFileLocked, + hGetContentsStrict, + parentDir, + relPathCwdToDir, + relPathDirToDir, + boolSystem +) where + +import System.IO +import System.Cmd +import System.Exit +import System.Posix.Signals +import Data.Typeable +import System.Posix.IO +import Data.String.Utils +import System.Path +import System.IO.HVFS +import System.FilePath +import System.Directory + +{- Let's just say that Haskell makes reading/writing a file with + - file locking excessively difficult. -} +withFileLocked file mode action = do + -- TODO: find a way to use bracket here + handle <- openFile file mode + lockfd <- handleToFd handle -- closes handle + waitToSetLock lockfd (lockType mode, AbsoluteSeek, 0, 0) + handle' <- fdToHandle lockfd + ret <- action handle' + hClose handle' + return ret + where + lockType ReadMode = ReadLock + lockType _ = WriteLock + +{- A version of hgetContents that is not lazy. Ensures file is + - all read before it gets closed. -} +hGetContentsStrict h = hGetContents h >>= \s -> length s `seq` return s + +{- Returns the parent directory of a path. Parent of / is "" -} +parentDir :: String -> String +parentDir dir = + if length dirs > 0 + then slash ++ (join s $ take ((length dirs) - 1) dirs) + else "" + where + dirs = filter (\x -> length x > 0) $ + split s dir + slash = if (not $ isAbsolute dir) then "" else s + s = [pathSeparator] + +{- Constructs a relative path from the CWD to a directory. + - + - For example, assuming CWD is /tmp/foo/bar: + - relPathCwdToDir "/tmp/foo" == "../" + - relPathCwdToDir "/tmp/foo/bar" == "" + - relPathCwdToDir "/tmp/foo/bar" == "" + -} +relPathCwdToDir :: FilePath -> IO FilePath +relPathCwdToDir dir = do + cwd <- getCurrentDirectory + let absdir = abs cwd dir + return $ relPathDirToDir cwd absdir + where + -- absolute, normalized form of the directory + abs cwd dir = + case (absNormPath cwd dir) of + Just d -> d + Nothing -> error $ "unable to normalize " ++ dir + +{- Constructs a relative path from one directory to another. + - + - Both directories must be absolute, and normalized (eg with absNormpath). + - + - The path will end with "/", unless it is empty. + -} +relPathDirToDir :: FilePath -> FilePath -> FilePath +relPathDirToDir from to = + if (0 < length path) + then addTrailingPathSeparator path + else "" + where + s = [pathSeparator] + pfrom = split s from + pto = split s to + common = map fst $ filter same $ zip pfrom pto + same (c,d) = c == d + uncommon = drop numcommon pto + dotdots = take ((length pfrom) - numcommon) $ repeat ".." + numcommon = length $ common + path = join s $ dotdots ++ uncommon + +{- Run a system command, and returns True or False + - if it succeeded or failed. + - + - An error is thrown if the command exits due to SIGINT, + - to propigate ctrl-c. + -} +boolSystem :: FilePath -> [String] -> IO Bool +boolSystem command params = do + r <- rawSystem command params + case r of + ExitSuccess -> return True + ExitFailure e -> if Just e == cast sigINT + then error $ command ++ "interrupted" + else return False diff --git a/debian/changelog b/debian/changelog new file mode 100644 index 000000000..998754777 --- /dev/null +++ b/debian/changelog @@ -0,0 +1,5 @@ +git-annex (0.01) UNRELEASED; urgency=low + + * First release + + -- Joey Hess <joeyh@debian.org> Thu, 09 Sep 2010 08:24:58 -0400 diff --git a/debian/compat b/debian/compat new file mode 100644 index 000000000..7f8f011eb --- /dev/null +++ b/debian/compat @@ -0,0 +1 @@ +7 diff --git a/debian/control b/debian/control new file mode 100644 index 000000000..83bc8c82b --- /dev/null +++ b/debian/control @@ -0,0 +1,26 @@ +Source: git-annex +Section: utils +Priority: optional +Build-Depends: debhelper (>= 7.0.50), ghc6, libghc6-missingh-dev, ikiwiki +Maintainer: Joey Hess <joeyh@debian.org> +Standards-Version: 3.9.1 +Vcs-Git: git://git.kitenet.net/git-annex +Homepage: http://git-annex.branchable.com/ + +Package: git-annex +Architecture: any +Section: utils +Depends: ${misc:Depends}, ${shlibs:Depends}, git | git-core, uuid +Description: manage files with git, without checking their contents into git + git-annex allows managing files with git, without checking the file + contents into git. While that may seem paradoxical, it is useful when + dealing with files larger than git can currently easily handle, whether due + to limitations in memory, checksumming time, or disk space. + . + Even without file content tracking, being able to manage files with git, + move files around and delete files with versioned directory trees, and use + branches and distributed clones, are all very handy reasons to use git. And + annexed files can co-exist in the same git repository with regularly + versioned files, which is convenient for maintaining documents, Makefiles, + etc that are associated with annexed files but that benefit from full + revision control. diff --git a/debian/copyright b/debian/copyright new file mode 100644 index 000000000..5d0ae13c8 --- /dev/null +++ b/debian/copyright @@ -0,0 +1,5 @@ +Files: * +Copyright: © 2010 Joey Hess <joey@kitenet.net> +License: GPL-2+ + The full text of the GPL is distributed as doc/GPL in this package's + source, or in /usr/share/common-licenses/GPL on Debian systems. diff --git a/debian/docs b/debian/docs new file mode 100644 index 000000000..1936cc1d4 --- /dev/null +++ b/debian/docs @@ -0,0 +1 @@ +html diff --git a/debian/manpages b/debian/manpages new file mode 100644 index 000000000..ca34203aa --- /dev/null +++ b/debian/manpages @@ -0,0 +1 @@ +git-annex.1 diff --git a/debian/rules b/debian/rules new file mode 100755 index 000000000..e0a209a72 --- /dev/null +++ b/debian/rules @@ -0,0 +1,7 @@ +#!/usr/bin/make -f +%: + dh $@ + +# Not intended for use by anyone except the author. +announcedir: + @echo ${HOME}/src/joeywiki/code/git-annex/news diff --git a/doc/GPL b/doc/GPL new file mode 100644 index 000000000..d159169d1 --- /dev/null +++ b/doc/GPL @@ -0,0 +1,339 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + <one line to give the program's name and a brief idea of what it does.> + Copyright (C) <year> <name of author> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + <signature of Ty Coon>, 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. diff --git a/doc/backends.mdwn b/doc/backends.mdwn new file mode 100644 index 000000000..d3ccaec49 --- /dev/null +++ b/doc/backends.mdwn @@ -0,0 +1,21 @@ +git-annex uses a key-value abstraction layer to allow file contents to be +stored in different ways. In theory, any key-value storage system could be +used to store file contents. + +When a file is annexed, a key is generated from its content and/or metadata. +The file checked into git symlinks to the key. This key can later be used +to retrieve the file's content (its value). + +Multiple pluggable backends are supported, and more than one can be used +to store different files' contents in a given repository. + +* `WORM` ("Write Once, Read Many") This backend stores the file's content + only in `.git/annex/`, and assumes that any file with the same basename, + size, and modification time has the same content. So with this backend, + files can be moved around, but should never be added to or changed. + This is the default, and the least expensive backend. +* `SHA1` -- This backend stores the file's content in + `.git/annex/`, with a name based on its sha1 checksum. This backend allows + modifications of files to be tracked. Its need to generate checksums + can make it slower for large files. +* `URL` -- This backend downloads the file's content from an external URL. diff --git a/doc/bugs.mdwn b/doc/bugs.mdwn new file mode 100644 index 000000000..2786e5bf7 --- /dev/null +++ b/doc/bugs.mdwn @@ -0,0 +1,4 @@ +This is git-annex's bug list. Link bugs to [[bugs/done]] when done. + +[[!inline pages="./bugs/* and !./bugs/done and !link(done) +and !*/Discussion" actions=yes postform=yes show=0 archive=yes]] diff --git a/doc/bugs/backendchecksum.mdwn b/doc/bugs/backendchecksum.mdwn new file mode 100644 index 000000000..40ff868c2 --- /dev/null +++ b/doc/bugs/backendchecksum.mdwn @@ -0,0 +1 @@ +This backend is not finished. diff --git a/doc/bugs/branching.mdwn b/doc/bugs/branching.mdwn new file mode 100644 index 000000000..21996ecc0 --- /dev/null +++ b/doc/bugs/branching.mdwn @@ -0,0 +1,36 @@ +The use of `.git-annex` to store logs means that if a repo has branches +and the user switched between them, git-annex will see different logs in +the different branches, and so may miss info about what remotes have which +files (though it can re-learn). + +An alternative would be to store the log data directly in the git repo +as `pristine-tar` does. Problem with that approach is that git won't merge +conflicting changes to log files if they are not in the currently checked +out branch. + +It would be possible to use a branch with a tree like this, to avoid +conflicts: + +key/uuid/time/status + +As long as new files are only added, and old timestamped files deleted, +there would be no conflicts. + +A related problem though is the size of the tree objects git needs to +commit. Having the logs in a separate branch doesn't help with that. +As more keys are added, the tree object size will increase, and git will +take longer and longer to commit, and use more space. One way to deal with +this is simply by splitting the logs amoung subdirectories. Git then can +reuse trees for most directories. (Check: Does it still have to build +dup trees in memory?) + +Another approach would be to have git-annex *delete* old logs. Keep logs +for the currently available files, or something like that. If other log +info is needed, look back through history to find the first occurance of a +log. Maybe even look at other branches -- so if the logs were on master, +a new empty branch could be made and git-annex would still know where to +get keys in that branch. + +Would have to be careful about conflicts when deleting and bringing back +files with the same name. And would need to avoid expensive searching thru +all history to try to find an old log file. diff --git a/doc/bugs/done.mdwn b/doc/bugs/done.mdwn new file mode 100644 index 000000000..a35d42719 --- /dev/null +++ b/doc/bugs/done.mdwn @@ -0,0 +1,4 @@ +recently fixed [[bugs]] + +[[!inline pages="./* and link(./done) and !*/Discussion" sort=mtime show=10 +archive=yes]] diff --git a/doc/bugs/dotdot_problem.mdwn b/doc/bugs/dotdot_problem.mdwn new file mode 100644 index 000000000..9d247a9c0 --- /dev/null +++ b/doc/bugs/dotdot_problem.mdwn @@ -0,0 +1,2 @@ +cannot "git annex ../foo" (GitRepo.relative is buggy and +git-ls-files also refuses w/o --full-name, which would need other changes) diff --git a/doc/bugs/file_copy_progress_bar.mdwn b/doc/bugs/file_copy_progress_bar.mdwn new file mode 100644 index 000000000..cd4ea33b7 --- /dev/null +++ b/doc/bugs/file_copy_progress_bar.mdwn @@ -0,0 +1,3 @@ +Find a way to copy a file with a progress bar, while still preserving +stat. Easiest way might be to use pv and fix up the permissions etc +after? diff --git a/doc/bugs/free_space_checking.mdwn b/doc/bugs/free_space_checking.mdwn new file mode 100644 index 000000000..34528a7b3 --- /dev/null +++ b/doc/bugs/free_space_checking.mdwn @@ -0,0 +1,8 @@ +Should check that there is enough free space before trying to copy a +file around. + +* Need a way to tell how much free space is available on the disk containing + a given repository. + +* And, need a way to tell the size of a file before copying it from + a remote, to check local disk space. diff --git a/doc/bugs/fsck.mdwn b/doc/bugs/fsck.mdwn new file mode 100644 index 000000000..308a1cb63 --- /dev/null +++ b/doc/bugs/fsck.mdwn @@ -0,0 +1 @@ +add a git annex fsck that finds keys that have no referring file diff --git a/doc/bugs/gitrm.mdwn b/doc/bugs/gitrm.mdwn new file mode 100644 index 000000000..d771aa32a --- /dev/null +++ b/doc/bugs/gitrm.mdwn @@ -0,0 +1,2 @@ +how to handle git rm file? (should try to drop keys that have no +referring file, if it seems safe..) diff --git a/doc/bugs/network_remotes.mdwn b/doc/bugs/network_remotes.mdwn new file mode 100644 index 000000000..be43ee20b --- /dev/null +++ b/doc/bugs/network_remotes.mdwn @@ -0,0 +1,3 @@ +Support for remote git repositories (ssh:// specifically can be made to +work, although the other end probably needs to have git-annex +installed..) diff --git a/doc/bugs/pushpull.mdwn b/doc/bugs/pushpull.mdwn new file mode 100644 index 000000000..47da2107f --- /dev/null +++ b/doc/bugs/pushpull.mdwn @@ -0,0 +1,2 @@ +--push/--pull should take a reponame and files, and push those files + to that repo; dropping them from the current repo diff --git a/doc/bugs/symlink_farming_commit_hook.mdwn b/doc/bugs/symlink_farming_commit_hook.mdwn new file mode 100644 index 000000000..af03beb70 --- /dev/null +++ b/doc/bugs/symlink_farming_commit_hook.mdwn @@ -0,0 +1,12 @@ +TODO: implement below + +git-annex does use a lot of symlinks. Specicially, relative symlinks, +that are checked into git. To allow you to move those around without +annoyance, git-annex can run as a post-commit hook. This way, you can `git mv` +a symlink to an annexed file, and as soon as you commit, it will be fixed +up. + +`git annex init` tries to set up a post-commit hook that is itself a symlink +back to git-annex. If you want to have your own shell script in the post-commit +hook, just make it call `git annex` with no parameters. git-annex will detect +when it's run from a git hook and do the necessary fixups. diff --git a/doc/bugs/using_url_backend.mdwn b/doc/bugs/using_url_backend.mdwn new file mode 100644 index 000000000..a0d447c6e --- /dev/null +++ b/doc/bugs/using_url_backend.mdwn @@ -0,0 +1,9 @@ +There is no way to `git annex add` a file using the URL [[backend|backends]]. + +For now, we have to manually make the symlink. Something like this: + + ln -s .git/annex/URL:http:%%www.example.com%foo.tar.gz + +Note the escaping of slashes. + +A `git annex register <url>` command could do this.. diff --git a/doc/contact.mdwn b/doc/contact.mdwn new file mode 100644 index 000000000..1238ca040 --- /dev/null +++ b/doc/contact.mdwn @@ -0,0 +1,4 @@ +Joey Hess <joey@kitenet.net> is the author of git-annex. + +The [VCS-home mailing list](http://lists.madduck.net/listinfo/vcs-home) +is a good place to discuss it. diff --git a/doc/copies.mdwn b/doc/copies.mdwn new file mode 100644 index 000000000..ff66f4e8a --- /dev/null +++ b/doc/copies.mdwn @@ -0,0 +1,30 @@ +The WORM and SHA1 key-value [[backends|backend]] store data inside +your git repository's `.git` directory, not in some external data store. + +It's important that data not get lost by an ill-considered `git annex drop` +command. So, then using those backends, git-annex can be configured to try +to keep N copies of a file's content available across all repositories. By +default, N is 1; it is configured by annex.numcopies. + +`git annex drop` attempts to check with other git remotes, to check that N +copies of the file exist. If enough repositories cannot be verified to have +it, it will retain the file content to avoid data loss. + +For example, consider three repositories: Server, Laptop, and USB. Both Server +and USB have a copy of a file, and N=1. If on Laptop, you `git annex get +$file`, this will transfer it from either Server or USB (depending on which +is available), and there are now 3 copies of the file. + +Suppose you want to free up space on Laptop again, and you `git annex drop` the file +there. If USB is connected, or Server can be contacted, git-annex can check +that it still has a copy of the file, and the content is removed from +Laptop. But if USB is currently disconnected, and Server also cannot be +contacted, it can't verify that it is safe to drop the file, and will +refuse to do so. + +With N=2, in order to drop the file content from Laptop, it would need access +to both USB and Server. + +Note that different repositories can be configured with different values of +N. So just because Laptop has N=2, this does not prevent the number of +copies falling to 1, when USB and Server have N=1. diff --git a/doc/download.mdwn b/doc/download.mdwn new file mode 100644 index 000000000..664f46ed9 --- /dev/null +++ b/doc/download.mdwn @@ -0,0 +1,7 @@ +The main git repository for git-annex is `git://git.kitenet.net/git-annex` +[[gitweb](http://git.kitenet.net/?p=git-annex;a=summary)] + +There are no binary packages yet, but you can build Debian packages from +the source tree with `dpkg-buildpackage`. + +Next: [[install]] diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn new file mode 100644 index 000000000..09b245497 --- /dev/null +++ b/doc/git-annex.mdwn @@ -0,0 +1,148 @@ +# NAME + +git-annex - manage files with git, without checking their contents in + +# SYNOPSIS + +git annex subcommand [path ...] + +# DESCRIPTION + +git-annex allows managing files with git, without checking the file +contents into git. While that may seem paradoxical, it is useful when +dealing with files larger than git can currently easily handle, whether due +to limitations in memory, checksumming time, or disk space. + +Even without file content tracking, being able to manage files with git, +move files around and delete files with versioned directory trees, and use +branches and distributed clones, are all very handy reasons to use git. And +annexed files can co-exist in the same git repository with regularly +versioned files, which is convenient for maintaining documents, Makefiles, +etc that are associated with annexed files but that benefit from full +revision control. + +When a file is annexed, its content is moved into a key-value store, and +a symlink is made that points to the content. These symlinks are checked into +git and versioned like regular files. You can move them around, delete +them, and so on. Pushing to another git repository will make git-annex +there aware of the annexed file, and it can be used to retrieve its +content from the key-value store. + +# EXAMPLES + + # git annex get video/hackity_hack_and_kaxxt.mov + get video/_why_hackity_hack_and_kaxxt.mov (not available) + I was unable to access these remotes: server + Try making some of these repositories available: + 5863d8c0-d9a9-11df-adb2-af51e6559a49 -- my home file server + 58d84e8a-d9ae-11df-a1aa-ab9aa8c00826 -- portable USB drive + ca20064c-dbb5-11df-b2fe-002170d25c55 -- backup SATA drive + failed + # sudo mount /media/usb + # git remote add usbdrive /media/usb + # git annex get video/hackity_hack_and_kaxxt.mov + get video/hackity_hack_and_kaxxt.mov (copying from usbdrive...) ok + # git commit -a -m "got a video I want to rewatch on the plane" + + # git annex add iso + add iso/Debian_5.0.iso ok + # git commit -a -m "saving Debian CD for later" + + # git annex push usbdrive iso + error: push not yet implemented! + # git annex drop iso + drop iso/Debian_5.0.iso ok + # git commit -a -m "freed up space" + +# SUBCOMMANDS + +Like many git commands, git-annex can be passed a path that +is either a file or a directory. In the latter case it acts on all relevant +files in the directory. + +Many git-annex subcommands will stage changes for later `git commit` by you. + +* add [path ...] + + Adds files in the path to the annex. Files that are already checked into + git, or that git has been configured to ignore will be silently skipped. + +* get [path ...] + + Makes the content of annexed files available in this repository. Depending + on the backend used, this will involve copying them from another repository, + or downloading them, or transferring them from some kind of key-value store. + +* drop [path ...] + + Drops the content of annexed files from this repository. + + git-annex may refuse to drop a content if the backend does not think + it is safe to do so. + +* unannex [path ...] + + Use this to undo an accidental add command. This is not the command you + should use if you intentionally annexed a file and don't want its contents + any more. In that case you should use `git annex drop` instead, and you + can also `git rm` the file. + +* init description + + Initializes git-annex with a descripotion of the git repository. + This is an optional, but recommended step. + +* fix [path ...] + + Fixes up symlinks that have become broken to again point to annexed content. + This is useful to run if you have been moving the symlinks around. + +# OPTIONS + +* --force + + Force unsafe actions, such as dropping a file's content when no other + source of it can be verified to still exist. Use with care. + +## CONFIGURATION + +Like other git commands, git-annex is configured via `.git/config`. + +* `annex.uuid` -- a unique UUID for this repository (automatically set) +* `annex.numcopies` -- number of copies of files to keep across all + repositories (default: 1) +* `annex.backends` -- space-separated list of names of + the key-value backends to use. The first listed is used to store + new files. (default: "WORM SHA1 URL") +* `remote.<name>.annex-cost` -- When determining which repository to + transfer annexed files from or to, ones with lower costs are preferred. + The default cost is 100 for local repositories, and 200 for remote + repositories. Note that other factors may be configured when pushing + files to repositories, in particular, whether the repository is on + a filesystem with sufficient free space. +* `remote.<name>.annex-uuid` -- git-annex caches UUIDs of repositories + here. + +# FILES + +These files are used, in your git repository: + +`.git/annex/` contains the annexed file contents that are currently +available. Annexed files in your git repository symlink to that content. + +`.git-annex/uuid.log` is used to map between repository UUID and +decscriptions. You may edit it. + +`.git-annex/*.log` is where git-annex records its content tracking +information. These files should be committed to git. + +`.git-annex/.gitattributes` is configured to use git's union merge driver +to avoid conflicts when merging files in the `.git-annex` directory. + +# AUTHOR + +Joey Hess <joey@ikiwiki.info> + +<http://git-annex.branchable.com/> + +Warning: this page is automatically made into a man page via [mdwn2man](http://git.ikiwiki.info/?p=ikiwiki;a=blob;f=mdwn2man;hb=HEAD). Edit with care diff --git a/doc/index.mdwn b/doc/index.mdwn new file mode 100644 index 000000000..b3a871627 --- /dev/null +++ b/doc/index.mdwn @@ -0,0 +1,50 @@ +git-annex allows managing files with git, without checking the file +contents into git. While that may seem paradoxical, it is useful when +dealing with files larger than git can currently easily handle, whether due +to limitations in memory, checksumming time, or disk space. + +Even without file content tracking, being able to manage files with git, +move files around and delete files with versioned directory trees, and use +branches and distributed clones, are all very handy reasons to use git. And +annexed files can co-exist in the same git repository with regularly +versioned files, which is convenient for maintaining documents, Makefiles, +etc that are associated with annexed files but that benefit from full +revision control. + +[[!sidebar content=""" +* **[[download]]** +* [[install]] +* [[news]] +* [[bugs]] +* [[contact]] +"""]] + + +## sample use cases + +<table> +<tr> +<td>[[!inline feeds=no template=bare pages=use_case/bob]]</td> +<td>[[!inline feeds=no template=bare pages=use_case/alice]]</td> +</tr> +</table> + +If that describes you, or if you're some from column A and some from column +B, then git-annex may be the tool you've been looking for to expand from +keeping all your small important files in git, to managing your large +files with git. + +## documentation + +* [[git-annex man page|git-annex]] +* [[key-value backends|backends]] for data storage +* [[location_tracking]] reminds you where git-annex has seen files +* git-annex prevents accidential data loss by [[tracking copies|copies]] + of your files +* [[what git annex is not|not]] +* git-annex is Free Software, licensed under the [[GPL]]. + +---- + +git-annex's wiki is powered by [Ikiwiki](http://ikiwiki.info/) and +hosted by [Branchable](http://branchable.com/). diff --git a/doc/install.mdwn b/doc/install.mdwn new file mode 100644 index 000000000..cc6fb6fb3 --- /dev/null +++ b/doc/install.mdwn @@ -0,0 +1,7 @@ +To build and use git-annex, you will need: + +* The Haskell Platform: <http://haskell.org/platform/> +* MissingH: <http://github.com/jgoerzen/missingh/wiki> +* uuid: <http://www.ossp.org/pkg/lib/uuid/> + +Then just [[download]] git-annex and run: `make; make install` diff --git a/doc/location_tracking.mdwn b/doc/location_tracking.mdwn new file mode 100644 index 000000000..a7d5c150b --- /dev/null +++ b/doc/location_tracking.mdwn @@ -0,0 +1,28 @@ +git-annex keeps track of in which repositories it last saw a file's content. +This location tracking information is stored in `.git-annex/$key.log`. +Repositories record their UUID and the date when they get or drop +a file's content. (Git is configured to use a union merge for this file, +so the lines may be in arbitrary order, but it will never conflict.) + +This location tracking information is useful if you have multiple +repositories, and not all are always accessible. For example, perhaps one +is on a home file server, and you are away from home. Then git-annex can +tell you what git remote it needs access to in order to get a file: + + # git annex get myfile + get myfile(not available) + I was unable to access these remotes: home + +Another way the location tracking comes in handy is if you put repositories +on removable USB drives, that might be archived away offline in a safe +place. In this sort of case, you probably don't have a git remotes +configured for every USB drive. So git-annex may have to resort to talking +about repository UUIDs. If you have previously used "git annex init" +to attach descriptions to those repositories, it will include their +descriptions to help you with finding them: + + # git annex get myfile + get myfile (not available) + Try making some of these repositories available: + c0a28e06-d7ef-11df-885c-775af44f8882 -- USB archive drive 1 + e1938fee-d95b-11df-96cc-002170d25c55 diff --git a/doc/news.mdwn b/doc/news.mdwn new file mode 100644 index 000000000..d0ff1ca2c --- /dev/null +++ b/doc/news.mdwn @@ -0,0 +1,5 @@ +This is where announcements of new releases, features, and other news is +posted. git-annex users are recommended to subscribe to this page's RSS +feed. + +[[!inline pages="./news/* and !*/Discussion" rootpage="news" show="30"]] diff --git a/doc/not.mdwn b/doc/not.mdwn new file mode 100644 index 000000000..affcb57f1 --- /dev/null +++ b/doc/not.mdwn @@ -0,0 +1,16 @@ +[[!meta title="what git-annex is not"]] + +* git-annex is not a backup system. It may be a useful component of an + [[archival|use_case/bob]] system, or a way to deliver files to a backup + system. + + For a backup system that uses git, take a look at + [bup](http://github.com/apenwarr/bup). + +* git-annex is not unison, but if you're finding unison's checksumming + too slow, or its strict mirroring of everything to both places too + limiting, then git-annex could be a useful alternative. + +* git-annex is not some flaky script that was quickly thrown together. + I wrote it in Haskell because I wanted it to be solid and to compile + down to a binary. diff --git a/doc/templates/bare.tmpl b/doc/templates/bare.tmpl new file mode 100644 index 000000000..2d476b716 --- /dev/null +++ b/doc/templates/bare.tmpl @@ -0,0 +1 @@ +<TMPL_VAR CONTENT> diff --git a/doc/use_case/Alice.mdwn b/doc/use_case/Alice.mdwn new file mode 100644 index 000000000..c42eb3a74 --- /dev/null +++ b/doc/use_case/Alice.mdwn @@ -0,0 +1,18 @@ +### The Nomad + +Alice is always on the move, often with her trusty netbook and a small +handheld terabyte USB drive, or a smaller USB keydrive. She has a server +out there on the net. All these things can have different files on them, +but Alice no longer has to deal with the tedious process of keeping them +manually in sync. + +When she has 1 bar on her cell, Alice queues up interesting files on her +server for later. At a coffee shop, she has git-annex download them to her +USB drive. High in the sky or in a remote cabin, she catches up on +podcasts, videos, and games, first letting git-annex copy them from +her USB drive to the netbook (this saves battery power). + +When she's done, she tells git-annex which to keep and which to remove. +They're all removed from her netbook to save space, and Alice knowns +that next time she syncs up to the net, her changes will be synced back +to her server. diff --git a/doc/use_case/Bob.mdwn b/doc/use_case/Bob.mdwn new file mode 100644 index 000000000..a5dc01b37 --- /dev/null +++ b/doc/use_case/Bob.mdwn @@ -0,0 +1,18 @@ +### The Archivist + +Bob has many drives to archive his data, most of them kept offline, in a +safe place. + +With git-annex, Bob has a single directory tree that includes all +his files, even if their content is being stored offline. He can +reorganize his files using that tree, committing new versions to git, +without worry about accidentially deleting anything. + +When Bob needs access to some files, git-annex can tell him which drive(s) +they're on, and easily make them available. Indeed, every drive knows what +is on every other drive. + +Run in a cron job, git-annex adds new files to achival drives at night. It +also helps Bob keep track of intentional, and unintentional copies of +files, and logs information he can use to decide when it's time to duplicate +the content of old drives. diff --git a/git-annex.hs b/git-annex.hs new file mode 100644 index 000000000..71a21379d --- /dev/null +++ b/git-annex.hs @@ -0,0 +1,47 @@ +{- git-annex main program -} + +import IO (try) +import System.IO +import System.Environment + +import qualified Annex +import Types +import Core +import Commands +import qualified GitRepo as Git +import BackendList + +main = do + args <- getArgs + gitrepo <- Git.repoFromCwd + state <- Annex.new gitrepo allBackends + (flags, actions) <- parseCmd args state + tryRun state $ [startup flags] ++ actions ++ [shutdown] + +{- Runs a list of Annex actions. Catches IO errors and continues + - (but explicitly thrown errors terminate the whole command). + - Propigates an overall error status at the end. + - + - This runs in the IO monad, not in the Annex monad. It seems that + - exceptions can only be caught in the IO monad, not in a stacked monad; + - or more likely I missed an easy way to do it. So, I have to laboriously + - thread AnnexState through this function. + -} +tryRun :: AnnexState -> [Annex ()] -> IO () +tryRun state actions = tryRun' state 0 actions +tryRun' state errnum (a:as) = do + result <- try $ Annex.run state a + case (result) of + Left err -> do + showErr err + tryRun' state (errnum + 1) as + Right (_,state') -> tryRun' state' errnum as +tryRun' state errnum [] = do + if (errnum > 0) + then error $ (show errnum) ++ " failed" + else return () + +{- Exception pretty-printing. -} +showErr e = do + hPutStrLn stderr $ "git-annex: " ++ (show e) + return () diff --git a/mdwn2man b/mdwn2man new file mode 100755 index 000000000..c21253945 --- /dev/null +++ b/mdwn2man @@ -0,0 +1,43 @@ +#!/usr/bin/perl +# Warning: hack + +my $prog=shift; +my $section=shift; + +print ".TH $prog $section\n"; + +while (<>) { + s{(\\?)\[\[([^\s\|\]]+)(\|[^\s\]]+)?\]\]}{$1 ? "[[$2]]" : $2}eg; + s/\`//g; + s/^\s*\./\\&./g; + if (/^#\s/) { + s/^#\s/.SH /; + <>; # blank; + } + s/^ +//; + s/^\t/ /; + s/-/\\-/g; + s/^Warning:.*//g; + s/^$/.PP\n/; + s/^\*\s+(.*)/.IP "$1"/; + next if $_ eq ".PP\n" && $skippara; + if (/^.IP /) { + $inlist=1; + $spippara=0; + } + elsif (/.SH/) { + $skippara=0; + $inlist=0; + } + elsif (/^\./) { + $skippara=1; + } + else { + $skippara=0; + } + if ($inlist && $_ eq ".PP\n") { + $_=".IP\n"; + } + + print $_; +} |