From cfe21e85e7fba61ac588e210f2a9b75f8d081f42 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 4 Oct 2011 00:40:47 -0400 Subject: rename --- Annex/Branch.hs | 336 +++++++++++++++++++++++++++++++++++++++++++++++++++++ Annex/CatFile.hs | 24 ++++ Annex/Common.hs | 13 +++ Annex/Content.hs | 237 +++++++++++++++++++++++++++++++++++++ Annex/Exception.hs | 2 +- Annex/Queue.hs | 42 +++++++ Annex/Version.hs | 46 ++++++++ 7 files changed, 699 insertions(+), 1 deletion(-) create mode 100644 Annex/Branch.hs create mode 100644 Annex/CatFile.hs create mode 100644 Annex/Common.hs create mode 100644 Annex/Content.hs create mode 100644 Annex/Queue.hs create mode 100644 Annex/Version.hs (limited to 'Annex') diff --git a/Annex/Branch.hs b/Annex/Branch.hs new file mode 100644 index 000000000..c6db9deca --- /dev/null +++ b/Annex/Branch.hs @@ -0,0 +1,336 @@ +{- management of the git-annex branch + - + - Copyright 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Annex.Branch ( + create, + update, + get, + change, + commit, + files, + refExists, + hasOrigin, + hasSomeBranch, + name +) where + +import System.IO.Binary +import System.Exit +import qualified Data.ByteString.Lazy.Char8 as L + +import Annex.Common +import Annex.Exception +import Types.BranchState +import qualified Git +import qualified Git.UnionMerge +import qualified Annex +import Annex.CatFile + +type GitRef = String + +{- Name of the branch that is used to store git-annex's information. -} +name :: GitRef +name = "git-annex" + +{- Fully qualified name of the branch. -} +fullname :: GitRef +fullname = "refs/heads/" ++ name + +{- Branch's name in origin. -} +originname :: GitRef +originname = "origin/" ++ name + +{- A separate index file for the branch. -} +index :: Git.Repo -> FilePath +index g = gitAnnexDir g "index" + +{- Populates the branch's index file with the current branch contents. + - + - Usually, this is only done when the index doesn't yet exist, and + - the index is used to build up changes to be commited to the branch, + - and merge in changes from other branches. + -} +genIndex :: Git.Repo -> IO () +genIndex g = Git.UnionMerge.ls_tree g fullname >>= Git.UnionMerge.update_index g + +{- Runs an action using the branch's index file. -} +withIndex :: Annex a -> Annex a +withIndex = withIndex' False +withIndex' :: Bool -> Annex a -> Annex a +withIndex' bootstrapping a = do + g <- gitRepo + let f = index g + + bracketIO (Git.useIndex f) id $ do + unlessM (liftIO $ doesFileExist f) $ do + unless bootstrapping create + liftIO $ createDirectoryIfMissing True $ takeDirectory f + unless bootstrapping $ liftIO $ genIndex g + a + +withIndexUpdate :: Annex a -> Annex a +withIndexUpdate a = update >> withIndex a + +getState :: Annex BranchState +getState = Annex.getState Annex.branchstate + +setState :: BranchState -> Annex () +setState state = Annex.changeState $ \s -> s { Annex.branchstate = state } + +setCache :: FilePath -> String -> Annex () +setCache file content = do + state <- getState + setState state { cachedFile = Just file, cachedContent = content } + +invalidateCache :: Annex () +invalidateCache = do + state <- getState + setState state { cachedFile = Nothing, cachedContent = "" } + +getCache :: FilePath -> Annex (Maybe String) +getCache file = getState >>= go + where + go state + | cachedFile state == Just file = + return $ Just $ cachedContent state + | otherwise = return Nothing + +{- Creates the branch, if it does not already exist. -} +create :: Annex () +create = unlessM hasBranch $ do + g <- gitRepo + e <- hasOrigin + if e + then liftIO $ Git.run g "branch" [Param name, Param originname] + else withIndex' True $ + liftIO $ Git.commit g "branch created" fullname [] + +{- Stages the journal, and commits staged changes to the branch. -} +commit :: String -> Annex () +commit message = do + fs <- getJournalFiles + when (not $ null fs) $ lockJournal $ do + stageJournalFiles fs + g <- gitRepo + withIndex $ liftIO $ Git.commit g message fullname [fullname] + +{- Ensures that the branch is up-to-date; should be called before + - data is read from it. Runs only once per git-annex run. -} +update :: Annex () +update = do + state <- getState + unless (branchUpdated state) $ do + -- check what needs updating before taking the lock + fs <- getJournalFiles + refs <- filterM checkref =<< siblingBranches + unless (null fs && null refs) $ withIndex $ lockJournal $ do + {- Before refs are merged into the index, it's + - important to first stage the journal into the + - index. Otherwise, any changes in the journal + - would later get staged, and might overwrite + - changes made during the merge. + - + - It would be cleaner to handle the merge by + - updating the journal, not the index, with changes + - from the branches. + -} + unless (null fs) $ stageJournalFiles fs + mapM_ mergeref refs + g <- gitRepo + liftIO $ Git.commit g "update" fullname (fullname:refs) + Annex.changeState $ \s -> s { Annex.branchstate = state { branchUpdated = True } } + invalidateCache + where + checkref ref = do + g <- gitRepo + -- checking with log to see if there have been changes + -- is less expensive than always merging + diffs <- liftIO $ Git.pipeRead g [ + Param "log", + Param (name++".."++ref), + Params "--oneline -n1" + ] + return $ not $ L.null diffs + mergeref ref = do + showSideAction $ "merging " ++ + Git.refDescribe ref ++ " into " ++ name + {- By passing only one ref, it is actually + - merged into the index, preserving any + - changes that may already be staged. + - + - However, any changes in the git-annex + - branch that are *not* reflected in the + - index will be removed. So, documentation + - advises users not to directly modify the + - branch. + -} + g <- gitRepo + liftIO $ Git.UnionMerge.merge g [ref] + return $ Just ref + +{- Checks if a git ref exists. -} +refExists :: GitRef -> Annex Bool +refExists ref = do + g <- gitRepo + liftIO $ Git.runBool g "show-ref" + [Param "--verify", Param "-q", Param ref] + +{- Does the main git-annex branch exist? -} +hasBranch :: Annex Bool +hasBranch = refExists fullname + +{- Does origin/git-annex exist? -} +hasOrigin :: Annex Bool +hasOrigin = refExists originname + +{- Does the git-annex branch or a foo/git-annex branch exist? -} +hasSomeBranch :: Annex Bool +hasSomeBranch = not . null <$> siblingBranches + +{- List of all git-annex branches, including the main one and any + - from remotes. -} +siblingBranches :: Annex [String] +siblingBranches = do + g <- gitRepo + r <- liftIO $ Git.pipeRead g [Param "show-ref", Param name] + return $ map (last . words . L.unpack) (L.lines r) + +{- Applies a function to modifiy the content of a file. -} +change :: FilePath -> (String -> String) -> Annex () +change file a = lockJournal $ get file >>= return . a >>= set file + +{- Records new content of a file into the journal. -} +set :: FilePath -> String -> Annex () +set file content = do + setJournalFile file content + setCache file content + +{- Gets the content of a file on the branch, or content from the journal, or + - staged in the index. + - + - Returns an empty string if the file doesn't exist yet. -} +get :: FilePath -> Annex String +get file = do + cached <- getCache file + case cached of + Just content -> return content + Nothing -> do + j <- getJournalFile file + case j of + Just content -> do + setCache file content + return content + Nothing -> withIndexUpdate $ do + content <- catFile fullname file + setCache file content + return content + +{- Lists all files on the branch. There may be duplicates in the list. -} +files :: Annex [FilePath] +files = withIndexUpdate $ do + g <- gitRepo + bfiles <- liftIO $ Git.pipeNullSplit g + [Params "ls-tree --name-only -r -z", Param fullname] + jfiles <- getJournalledFiles + return $ jfiles ++ bfiles + +{- Records content for a file in the branch to the journal. + - + - Using the journal, rather than immediatly staging content to the index + - avoids git needing to rewrite the index after every change. -} +setJournalFile :: FilePath -> String -> Annex () +setJournalFile file content = do + g <- gitRepo + liftIO $ catch (write g) $ const $ do + createDirectoryIfMissing True $ gitAnnexJournalDir g + createDirectoryIfMissing True $ gitAnnexTmpDir g + write g + where + -- journal file is written atomically + write g = do + let jfile = journalFile g file + let tmpfile = gitAnnexTmpDir g takeFileName jfile + writeBinaryFile tmpfile content + renameFile tmpfile jfile + +{- Gets any journalled content for a file in the branch. -} +getJournalFile :: FilePath -> Annex (Maybe String) +getJournalFile file = do + g <- gitRepo + liftIO $ catch (liftM Just . readFileStrict $ journalFile g file) + (const $ return Nothing) + +{- List of files that have updated content in the journal. -} +getJournalledFiles :: Annex [FilePath] +getJournalledFiles = map fileJournal <$> getJournalFiles + +{- List of existing journal files. -} +getJournalFiles :: Annex [FilePath] +getJournalFiles = do + g <- gitRepo + fs <- liftIO $ catch (getDirectoryContents $ gitAnnexJournalDir g) + (const $ return []) + return $ filter (`notElem` [".", ".."]) fs + +{- Stages the specified journalfiles. -} +stageJournalFiles :: [FilePath] -> Annex () +stageJournalFiles fs = do + g <- gitRepo + withIndex $ liftIO $ do + let dir = gitAnnexJournalDir g + let paths = map (dir ) fs + -- inject all the journal files directly into git + -- in one quick command + (pid, fromh, toh) <- hPipeBoth "git" $ toCommand $ + Git.gitCommandLine g [Param "hash-object", Param "-w", Param "--stdin-paths"] + _ <- forkProcess $ do + hPutStr toh $ unlines paths + hClose toh + exitSuccess + hClose toh + s <- hGetContents fromh + -- update the index, also in just one command + Git.UnionMerge.update_index g $ + index_lines (lines s) $ map fileJournal fs + hClose fromh + forceSuccess pid + mapM_ removeFile paths + where + index_lines shas = map genline . zip shas + genline (sha, file) = Git.UnionMerge.update_index_line sha file + +{- Produces a filename to use in the journal for a file on the branch. + - + - The journal typically won't have a lot of files in it, so the hashing + - used in the branch is not necessary, and all the files are put directly + - in the journal directory. + -} +journalFile :: Git.Repo -> FilePath -> FilePath +journalFile repo file = gitAnnexJournalDir repo concatMap mangle file + where + mangle '/' = "_" + mangle '_' = "__" + mangle c = [c] + +{- Converts a journal file (relative to the journal dir) back to the + - filename on the branch. -} +fileJournal :: FilePath -> FilePath +fileJournal = replace "//" "_" . replace "_" "/" + +{- Runs an action that modifies the journal, using locking to avoid + - contention with other git-annex processes. -} +lockJournal :: Annex a -> Annex a +lockJournal a = do + g <- gitRepo + let file = gitAnnexJournalLock g + bracketIO (lock file) unlock a + where + lock file = do + l <- createFile file stdFileMode + waitToSetLock l (WriteLock, AbsoluteSeek, 0, 0) + return l + unlock = closeFd diff --git a/Annex/CatFile.hs b/Annex/CatFile.hs new file mode 100644 index 000000000..4f98815f8 --- /dev/null +++ b/Annex/CatFile.hs @@ -0,0 +1,24 @@ +{- git cat-file interface, with handle automatically stored in the Annex monad + - + - Copyright 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Annex.CatFile ( + catFile +) where + +import Annex.Common +import qualified Git.CatFile +import qualified Annex + +catFile :: String -> FilePath -> Annex String +catFile branch file = maybe startup go =<< Annex.getState Annex.catfilehandle + where + startup = do + g <- gitRepo + h <- liftIO $ Git.CatFile.catFileStart g + Annex.changeState $ \s -> s { Annex.catfilehandle = Just h } + go h + go h = liftIO $ Git.CatFile.catFile h branch file diff --git a/Annex/Common.hs b/Annex/Common.hs new file mode 100644 index 000000000..ca7b1bff7 --- /dev/null +++ b/Annex/Common.hs @@ -0,0 +1,13 @@ +module Annex.Common ( + module Common, + module Types, + module Annex, + module Locations, + module Messages, +) where + +import Common +import Types +import Annex (gitRepo) +import Locations +import Messages diff --git a/Annex/Content.hs b/Annex/Content.hs new file mode 100644 index 000000000..a3fa79da8 --- /dev/null +++ b/Annex/Content.hs @@ -0,0 +1,237 @@ +{- git-annex file content managing + - + - Copyright 2010 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Annex.Content ( + inAnnex, + calcGitLink, + logStatus, + getViaTmp, + getViaTmpUnchecked, + withTmp, + checkDiskSpace, + moveAnnex, + removeAnnex, + fromAnnex, + moveBad, + getKeysPresent, + saveState +) where + +import Annex.Common +import LocationLog +import UUID +import qualified Git +import qualified Annex +import qualified Annex.Queue +import qualified Annex.Branch +import Utility.StatFS +import Utility.FileMode +import Types.Key +import Utility.DataUnits +import Config + +{- Checks if a given key is currently present in the gitAnnexLocation. -} +inAnnex :: Key -> Annex Bool +inAnnex key = do + g <- gitRepo + when (Git.repoIsUrl g) $ error "inAnnex cannot check remote repo" + liftIO $ doesFileExist $ gitAnnexLocation g key + +{- Calculates the relative path to use to link a file to a key. -} +calcGitLink :: FilePath -> Key -> Annex FilePath +calcGitLink file key = do + g <- gitRepo + cwd <- liftIO getCurrentDirectory + let absfile = fromMaybe whoops $ absNormPath cwd file + return $ relPathDirToFile (parentDir absfile) + (Git.workTree g) ".git" annexLocation key + where + whoops = error $ "unable to normalize " ++ file + +{- Updates the LocationLog when a key's presence changes in the current + - repository. -} +logStatus :: Key -> LogStatus -> Annex () +logStatus key status = do + g <- gitRepo + u <- getUUID g + logChange g key u status + +{- Runs an action, passing it a temporary filename to download, + - and if the action succeeds, moves the temp file into + - the annex as a key's content. -} +getViaTmp :: Key -> (FilePath -> Annex Bool) -> Annex Bool +getViaTmp key action = do + g <- gitRepo + let tmp = gitAnnexTmpLocation g key + + -- Check that there is enough free disk space. + -- When the temp file already exists, count the space + -- it is using as free. + e <- liftIO $ doesFileExist tmp + if e + then do + stat <- liftIO $ getFileStatus tmp + checkDiskSpace' (fromIntegral $ fileSize stat) key + else checkDiskSpace key + + when e $ liftIO $ allowWrite tmp + + getViaTmpUnchecked key action + +prepTmp :: Key -> Annex FilePath +prepTmp key = do + g <- gitRepo + let tmp = gitAnnexTmpLocation g key + liftIO $ createDirectoryIfMissing True (parentDir tmp) + return tmp + +{- Like getViaTmp, but does not check that there is enough disk space + - for the incoming key. For use when the key content is already on disk + - and not being copied into place. -} +getViaTmpUnchecked :: Key -> (FilePath -> Annex Bool) -> Annex Bool +getViaTmpUnchecked key action = do + tmp <- prepTmp key + success <- action tmp + if success + then do + moveAnnex key tmp + logStatus key InfoPresent + return True + else do + -- the tmp file is left behind, in case caller wants + -- to resume its transfer + return False + +{- Creates a temp file, runs an action on it, and cleans up the temp file. -} +withTmp :: Key -> (FilePath -> Annex a) -> Annex a +withTmp key action = do + tmp <- prepTmp key + res <- action tmp + liftIO $ whenM (doesFileExist tmp) $ liftIO $ removeFile tmp + return res + +{- Checks that there is disk space available to store a given key, + - throwing an error if not. -} +checkDiskSpace :: Key -> Annex () +checkDiskSpace = checkDiskSpace' 0 + +checkDiskSpace' :: Integer -> Key -> Annex () +checkDiskSpace' adjustment key = do + g <- gitRepo + r <- getConfig g "diskreserve" "" + let reserve = fromMaybe megabyte $ readSize dataUnits r + stats <- liftIO $ getFileSystemStats (gitAnnexDir g) + case (stats, keySize key) of + (Nothing, _) -> return () + (_, Nothing) -> return () + (Just (FileSystemStats { fsStatBytesAvailable = have }), Just need) -> + when (need + reserve > have + adjustment) $ + needmorespace (need + reserve - have - adjustment) + where + megabyte :: Integer + megabyte = 1000000 + needmorespace n = unlessM (Annex.getState Annex.force) $ + error $ "not enough free space, need " ++ + roughSize storageUnits True n ++ + " more (use --force to override this check or adjust annex.diskreserve)" + +{- Moves a file into .git/annex/objects/ + - + - What if the key there already has content? This could happen for + - various reasons; perhaps the same content is being annexed again. + - Perhaps there has been a hash collision generating the keys. + - + - The current strategy is to assume that in this case it's safe to delete + - one of the two copies of the content; and the one already in the annex + - is left there, assuming it's the original, canonical copy. + - + - I considered being more paranoid, and checking that both files had + - the same content. Decided against it because A) users explicitly choose + - a backend based on its hashing properties and so if they're dealing + - with colliding files it's their own fault and B) adding such a check + - would not catch all cases of colliding keys. For example, perhaps + - a remote has a key; if it's then added again with different content then + - the overall system now has two different peices of content for that + - key, and one of them will probably get deleted later. So, adding the + - check here would only raise expectations that git-annex cannot truely + - meet. + -} +moveAnnex :: Key -> FilePath -> Annex () +moveAnnex key src = do + g <- gitRepo + let dest = gitAnnexLocation g key + let dir = parentDir dest + e <- liftIO $ doesFileExist dest + if e + then liftIO $ removeFile src + else liftIO $ do + createDirectoryIfMissing True dir + allowWrite dir -- in case the directory already exists + renameFile src dest + preventWrite dest + preventWrite dir + +withObjectLoc :: Key -> ((FilePath, FilePath) -> Annex a) -> Annex a +withObjectLoc key a = do + g <- gitRepo + let file = gitAnnexLocation g key + let dir = parentDir file + a (dir, file) + +{- Removes a key's file from .git/annex/objects/ -} +removeAnnex :: Key -> Annex () +removeAnnex key = withObjectLoc key $ \(dir, file) -> liftIO $ do + allowWrite dir + removeFile file + removeDirectory dir + +{- Moves a key's file out of .git/annex/objects/ -} +fromAnnex :: Key -> FilePath -> Annex () +fromAnnex key dest = withObjectLoc key $ \(dir, file) -> liftIO $ do + allowWrite dir + allowWrite file + renameFile file dest + removeDirectory dir + +{- Moves a key out of .git/annex/objects/ into .git/annex/bad, and + - returns the file it was moved to. -} +moveBad :: Key -> Annex FilePath +moveBad key = do + g <- gitRepo + let src = gitAnnexLocation g key + let dest = gitAnnexBadDir g takeFileName src + liftIO $ do + createDirectoryIfMissing True (parentDir dest) + allowWrite (parentDir src) + renameFile src dest + removeDirectory (parentDir src) + logStatus key InfoMissing + return dest + +{- List of keys whose content exists in .git/annex/objects/ -} +getKeysPresent :: Annex [Key] +getKeysPresent = do + g <- gitRepo + getKeysPresent' $ gitAnnexObjectDir g +getKeysPresent' :: FilePath -> Annex [Key] +getKeysPresent' dir = do + exists <- liftIO $ doesDirectoryExist dir + if not exists + then return [] + else liftIO $ do + -- 2 levels of hashing + levela <- dirContents dir + levelb <- mapM dirContents levela + contents <- mapM dirContents (concat levelb) + let files = concat contents + return $ mapMaybe (fileKey . takeFileName) files + +{- Things to do to record changes to content. -} +saveState :: Annex () +saveState = do + Annex.Queue.flush False + Annex.Branch.commit "update" diff --git a/Annex/Exception.hs b/Annex/Exception.hs index 549ef4fd5..7ea8fb89a 100644 --- a/Annex/Exception.hs +++ b/Annex/Exception.hs @@ -15,7 +15,7 @@ import Control.Exception.Control (handle) import Control.Monad.IO.Control (liftIOOp) import Control.Exception hiding (handle, throw) -import AnnexCommon +import Annex.Common {- Runs an Annex action, with setup and cleanup both in the IO monad. -} bracketIO :: IO c -> (c -> IO b) -> Annex a -> Annex a diff --git a/Annex/Queue.hs b/Annex/Queue.hs new file mode 100644 index 000000000..8d0a32bec --- /dev/null +++ b/Annex/Queue.hs @@ -0,0 +1,42 @@ +{- git-annex command queue + - + - Copyright 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Annex.Queue ( + add, + flush, + flushWhenFull +) where + +import Annex.Common +import Annex +import qualified Git.Queue + +{- Adds a git command to the queue. -} +add :: String -> [CommandParam] -> [FilePath] -> Annex () +add command params files = do + q <- getState repoqueue + store $ Git.Queue.add q command params files + +{- Runs the queue if it is full. Should be called periodically. -} +flushWhenFull :: Annex () +flushWhenFull = do + q <- getState repoqueue + when (Git.Queue.full q) $ flush False + +{- Runs (and empties) the queue. -} +flush :: Bool -> Annex () +flush silent = do + q <- getState repoqueue + unless (0 == Git.Queue.size q) $ do + unless silent $ + showSideAction "Recording state in git" + g <- gitRepo + q' <- liftIO $ Git.Queue.flush g q + store q' + +store :: Git.Queue.Queue -> Annex () +store q = changeState $ \s -> s { repoqueue = q } diff --git a/Annex/Version.hs b/Annex/Version.hs new file mode 100644 index 000000000..e501dbf2e --- /dev/null +++ b/Annex/Version.hs @@ -0,0 +1,46 @@ +{- git-annex repository versioning + - + - Copyright 2010 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Annex.Version where + +import Annex.Common +import qualified Git +import Config + +type Version = String + +defaultVersion :: Version +defaultVersion = "3" + +supportedVersions :: [Version] +supportedVersions = [defaultVersion] + +upgradableVersions :: [Version] +upgradableVersions = ["0", "1", "2"] + +versionField :: String +versionField = "annex.version" + +getVersion :: Annex (Maybe Version) +getVersion = do + g <- gitRepo + let v = Git.configGet g versionField "" + if not $ null v + then return $ Just v + else return Nothing + +setVersion :: Annex () +setVersion = setConfig versionField defaultVersion + +checkVersion :: Version -> Annex () +checkVersion v + | v `elem` supportedVersions = return () + | v `elem` upgradableVersions = err "Upgrade this repository: git-annex upgrade" + | otherwise = err "Upgrade git-annex." + where + err msg = error $ "Repository version " ++ v ++ + " is not supported. " ++ msg -- cgit v1.2.3