summaryrefslogtreecommitdiff
path: root/Annex/Branch.hs
diff options
context:
space:
mode:
Diffstat (limited to 'Annex/Branch.hs')
-rw-r--r--Annex/Branch.hs336
1 files changed, 336 insertions, 0 deletions
diff --git a/Annex/Branch.hs b/Annex/Branch.hs
new file mode 100644
index 000000000..c6db9deca
--- /dev/null
+++ b/Annex/Branch.hs
@@ -0,0 +1,336 @@
+{- management of the git-annex branch
+ -
+ - Copyright 2011 Joey Hess <joey@kitenet.net>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+module Annex.Branch (
+ create,
+ update,
+ get,
+ change,
+ commit,
+ files,
+ refExists,
+ hasOrigin,
+ hasSomeBranch,
+ name
+) where
+
+import System.IO.Binary
+import System.Exit
+import qualified Data.ByteString.Lazy.Char8 as L
+
+import Annex.Common
+import Annex.Exception
+import Types.BranchState
+import qualified Git
+import qualified Git.UnionMerge
+import qualified Annex
+import Annex.CatFile
+
+type GitRef = String
+
+{- Name of the branch that is used to store git-annex's information. -}
+name :: GitRef
+name = "git-annex"
+
+{- Fully qualified name of the branch. -}
+fullname :: GitRef
+fullname = "refs/heads/" ++ name
+
+{- Branch's name in origin. -}
+originname :: GitRef
+originname = "origin/" ++ name
+
+{- A separate index file for the branch. -}
+index :: Git.Repo -> FilePath
+index g = gitAnnexDir g </> "index"
+
+{- Populates the branch's index file with the current branch contents.
+ -
+ - Usually, this is only done when the index doesn't yet exist, and
+ - the index is used to build up changes to be commited to the branch,
+ - and merge in changes from other branches.
+ -}
+genIndex :: Git.Repo -> IO ()
+genIndex g = Git.UnionMerge.ls_tree g fullname >>= Git.UnionMerge.update_index g
+
+{- Runs an action using the branch's index file. -}
+withIndex :: Annex a -> Annex a
+withIndex = withIndex' False
+withIndex' :: Bool -> Annex a -> Annex a
+withIndex' bootstrapping a = do
+ g <- gitRepo
+ let f = index g
+
+ bracketIO (Git.useIndex f) id $ do
+ unlessM (liftIO $ doesFileExist f) $ do
+ unless bootstrapping create
+ liftIO $ createDirectoryIfMissing True $ takeDirectory f
+ unless bootstrapping $ liftIO $ genIndex g
+ a
+
+withIndexUpdate :: Annex a -> Annex a
+withIndexUpdate a = update >> withIndex a
+
+getState :: Annex BranchState
+getState = Annex.getState Annex.branchstate
+
+setState :: BranchState -> Annex ()
+setState state = Annex.changeState $ \s -> s { Annex.branchstate = state }
+
+setCache :: FilePath -> String -> Annex ()
+setCache file content = do
+ state <- getState
+ setState state { cachedFile = Just file, cachedContent = content }
+
+invalidateCache :: Annex ()
+invalidateCache = do
+ state <- getState
+ setState state { cachedFile = Nothing, cachedContent = "" }
+
+getCache :: FilePath -> Annex (Maybe String)
+getCache file = getState >>= go
+ where
+ go state
+ | cachedFile state == Just file =
+ return $ Just $ cachedContent state
+ | otherwise = return Nothing
+
+{- Creates the branch, if it does not already exist. -}
+create :: Annex ()
+create = unlessM hasBranch $ do
+ g <- gitRepo
+ e <- hasOrigin
+ if e
+ then liftIO $ Git.run g "branch" [Param name, Param originname]
+ else withIndex' True $
+ liftIO $ Git.commit g "branch created" fullname []
+
+{- Stages the journal, and commits staged changes to the branch. -}
+commit :: String -> Annex ()
+commit message = do
+ fs <- getJournalFiles
+ when (not $ null fs) $ lockJournal $ do
+ stageJournalFiles fs
+ g <- gitRepo
+ withIndex $ liftIO $ Git.commit g message fullname [fullname]
+
+{- Ensures that the branch is up-to-date; should be called before
+ - data is read from it. Runs only once per git-annex run. -}
+update :: Annex ()
+update = do
+ state <- getState
+ unless (branchUpdated state) $ do
+ -- check what needs updating before taking the lock
+ fs <- getJournalFiles
+ refs <- filterM checkref =<< siblingBranches
+ unless (null fs && null refs) $ withIndex $ lockJournal $ do
+ {- Before refs are merged into the index, it's
+ - important to first stage the journal into the
+ - index. Otherwise, any changes in the journal
+ - would later get staged, and might overwrite
+ - changes made during the merge.
+ -
+ - It would be cleaner to handle the merge by
+ - updating the journal, not the index, with changes
+ - from the branches.
+ -}
+ unless (null fs) $ stageJournalFiles fs
+ mapM_ mergeref refs
+ g <- gitRepo
+ liftIO $ Git.commit g "update" fullname (fullname:refs)
+ Annex.changeState $ \s -> s { Annex.branchstate = state { branchUpdated = True } }
+ invalidateCache
+ where
+ checkref ref = do
+ g <- gitRepo
+ -- checking with log to see if there have been changes
+ -- is less expensive than always merging
+ diffs <- liftIO $ Git.pipeRead g [
+ Param "log",
+ Param (name++".."++ref),
+ Params "--oneline -n1"
+ ]
+ return $ not $ L.null diffs
+ mergeref ref = do
+ showSideAction $ "merging " ++
+ Git.refDescribe ref ++ " into " ++ name
+ {- By passing only one ref, it is actually
+ - merged into the index, preserving any
+ - changes that may already be staged.
+ -
+ - However, any changes in the git-annex
+ - branch that are *not* reflected in the
+ - index will be removed. So, documentation
+ - advises users not to directly modify the
+ - branch.
+ -}
+ g <- gitRepo
+ liftIO $ Git.UnionMerge.merge g [ref]
+ return $ Just ref
+
+{- Checks if a git ref exists. -}
+refExists :: GitRef -> Annex Bool
+refExists ref = do
+ g <- gitRepo
+ liftIO $ Git.runBool g "show-ref"
+ [Param "--verify", Param "-q", Param ref]
+
+{- Does the main git-annex branch exist? -}
+hasBranch :: Annex Bool
+hasBranch = refExists fullname
+
+{- Does origin/git-annex exist? -}
+hasOrigin :: Annex Bool
+hasOrigin = refExists originname
+
+{- Does the git-annex branch or a foo/git-annex branch exist? -}
+hasSomeBranch :: Annex Bool
+hasSomeBranch = not . null <$> siblingBranches
+
+{- List of all git-annex branches, including the main one and any
+ - from remotes. -}
+siblingBranches :: Annex [String]
+siblingBranches = do
+ g <- gitRepo
+ r <- liftIO $ Git.pipeRead g [Param "show-ref", Param name]
+ return $ map (last . words . L.unpack) (L.lines r)
+
+{- Applies a function to modifiy the content of a file. -}
+change :: FilePath -> (String -> String) -> Annex ()
+change file a = lockJournal $ get file >>= return . a >>= set file
+
+{- Records new content of a file into the journal. -}
+set :: FilePath -> String -> Annex ()
+set file content = do
+ setJournalFile file content
+ setCache file content
+
+{- Gets the content of a file on the branch, or content from the journal, or
+ - staged in the index.
+ -
+ - Returns an empty string if the file doesn't exist yet. -}
+get :: FilePath -> Annex String
+get file = do
+ cached <- getCache file
+ case cached of
+ Just content -> return content
+ Nothing -> do
+ j <- getJournalFile file
+ case j of
+ Just content -> do
+ setCache file content
+ return content
+ Nothing -> withIndexUpdate $ do
+ content <- catFile fullname file
+ setCache file content
+ return content
+
+{- Lists all files on the branch. There may be duplicates in the list. -}
+files :: Annex [FilePath]
+files = withIndexUpdate $ do
+ g <- gitRepo
+ bfiles <- liftIO $ Git.pipeNullSplit g
+ [Params "ls-tree --name-only -r -z", Param fullname]
+ jfiles <- getJournalledFiles
+ return $ jfiles ++ bfiles
+
+{- Records content for a file in the branch to the journal.
+ -
+ - Using the journal, rather than immediatly staging content to the index
+ - avoids git needing to rewrite the index after every change. -}
+setJournalFile :: FilePath -> String -> Annex ()
+setJournalFile file content = do
+ g <- gitRepo
+ liftIO $ catch (write g) $ const $ do
+ createDirectoryIfMissing True $ gitAnnexJournalDir g
+ createDirectoryIfMissing True $ gitAnnexTmpDir g
+ write g
+ where
+ -- journal file is written atomically
+ write g = do
+ let jfile = journalFile g file
+ let tmpfile = gitAnnexTmpDir g </> takeFileName jfile
+ writeBinaryFile tmpfile content
+ renameFile tmpfile jfile
+
+{- Gets any journalled content for a file in the branch. -}
+getJournalFile :: FilePath -> Annex (Maybe String)
+getJournalFile file = do
+ g <- gitRepo
+ liftIO $ catch (liftM Just . readFileStrict $ journalFile g file)
+ (const $ return Nothing)
+
+{- List of files that have updated content in the journal. -}
+getJournalledFiles :: Annex [FilePath]
+getJournalledFiles = map fileJournal <$> getJournalFiles
+
+{- List of existing journal files. -}
+getJournalFiles :: Annex [FilePath]
+getJournalFiles = do
+ g <- gitRepo
+ fs <- liftIO $ catch (getDirectoryContents $ gitAnnexJournalDir g)
+ (const $ return [])
+ return $ filter (`notElem` [".", ".."]) fs
+
+{- Stages the specified journalfiles. -}
+stageJournalFiles :: [FilePath] -> Annex ()
+stageJournalFiles fs = do
+ g <- gitRepo
+ withIndex $ liftIO $ do
+ let dir = gitAnnexJournalDir g
+ let paths = map (dir </>) fs
+ -- inject all the journal files directly into git
+ -- in one quick command
+ (pid, fromh, toh) <- hPipeBoth "git" $ toCommand $
+ Git.gitCommandLine g [Param "hash-object", Param "-w", Param "--stdin-paths"]
+ _ <- forkProcess $ do
+ hPutStr toh $ unlines paths
+ hClose toh
+ exitSuccess
+ hClose toh
+ s <- hGetContents fromh
+ -- update the index, also in just one command
+ Git.UnionMerge.update_index g $
+ index_lines (lines s) $ map fileJournal fs
+ hClose fromh
+ forceSuccess pid
+ mapM_ removeFile paths
+ where
+ index_lines shas = map genline . zip shas
+ genline (sha, file) = Git.UnionMerge.update_index_line sha file
+
+{- Produces a filename to use in the journal for a file on the branch.
+ -
+ - The journal typically won't have a lot of files in it, so the hashing
+ - used in the branch is not necessary, and all the files are put directly
+ - in the journal directory.
+ -}
+journalFile :: Git.Repo -> FilePath -> FilePath
+journalFile repo file = gitAnnexJournalDir repo </> concatMap mangle file
+ where
+ mangle '/' = "_"
+ mangle '_' = "__"
+ mangle c = [c]
+
+{- Converts a journal file (relative to the journal dir) back to the
+ - filename on the branch. -}
+fileJournal :: FilePath -> FilePath
+fileJournal = replace "//" "_" . replace "_" "/"
+
+{- Runs an action that modifies the journal, using locking to avoid
+ - contention with other git-annex processes. -}
+lockJournal :: Annex a -> Annex a
+lockJournal a = do
+ g <- gitRepo
+ let file = gitAnnexJournalLock g
+ bracketIO (lock file) unlock a
+ where
+ lock file = do
+ l <- createFile file stdFileMode
+ waitToSetLock l (WriteLock, AbsoluteSeek, 0, 0)
+ return l
+ unlock = closeFd