diff options
author | Joey Hess <joeyh@debian.org> | 2013-11-27 18:41:44 -0400 |
---|---|---|
committer | Joey Hess <joeyh@debian.org> | 2013-11-27 18:41:44 -0400 |
commit | 2e6d39d426f6b08f236d6071e671a9dcfc799d91 (patch) | |
tree | 1618fd9e34a30409ee0937cb4b3861ec3b5e7bba /Annex |
git-annex (5.20131127) unstable; urgency=low
* webapp: Detect when upgrades are available, and upgrade if the user
desires.
(Only when git-annex is installed using the prebuilt binaries
from git-annex upstream, not from eg Debian.)
* assistant: Detect when the git-annex binary is modified or replaced,
and either prompt the user to restart the program, or automatically
restart it.
* annex.autoupgrade configures both the above upgrade behaviors.
* Added support for quvi 0.9. Slightly suboptimal due to limitations in its
interface compared with the old version.
* Bug fix: annex.version did not get set on automatic upgrade to v5 direct
mode repo, so the upgrade was performed repeatedly, slowing commands down.
* webapp: Fix bug that broke switching between local repositories
that use the new guarded direct mode.
* Android: Fix stripping of the git-annex binary.
* Android: Make terminal app show git-annex version number.
* Android: Re-enable XMPP support.
* reinject: Allow to be used in direct mode.
* Futher improvements to git repo repair. Has now been tested in tens
of thousands of intentionally damaged repos, and successfully
repaired them all.
* Allow use of --unused in bare repository.
# imported from the archive
Diffstat (limited to 'Annex')
-rw-r--r-- | Annex/Branch.hs | 533 | ||||
-rw-r--r-- | Annex/Branch/Transitions.hs | 53 | ||||
-rw-r--r-- | Annex/BranchState.hs | 43 | ||||
-rw-r--r-- | Annex/CatFile.hs | 139 | ||||
-rw-r--r-- | Annex/CheckAttr.hs | 35 | ||||
-rw-r--r-- | Annex/CheckIgnore.hs | 32 | ||||
-rw-r--r-- | Annex/Content.hs | 529 | ||||
-rw-r--r-- | Annex/Content/Direct.hs | 259 | ||||
-rw-r--r-- | Annex/Direct.hs | 306 | ||||
-rw-r--r-- | Annex/Direct/Fixup.hs | 31 | ||||
-rw-r--r-- | Annex/Environment.hs | 65 | ||||
-rw-r--r-- | Annex/Exception.hs | 46 | ||||
-rw-r--r-- | Annex/FileMatcher.hs | 102 | ||||
-rw-r--r-- | Annex/Hook.hs | 42 | ||||
-rw-r--r-- | Annex/Journal.hs | 128 | ||||
-rw-r--r-- | Annex/Link.hs | 105 | ||||
-rw-r--r-- | Annex/LockPool.hs | 56 | ||||
-rw-r--r-- | Annex/Path.hs | 34 | ||||
-rw-r--r-- | Annex/Perms.hs | 125 | ||||
-rw-r--r-- | Annex/Queue.hs | 62 | ||||
-rw-r--r-- | Annex/Quvi.hs | 20 | ||||
-rw-r--r-- | Annex/ReplaceFile.hs | 39 | ||||
-rw-r--r-- | Annex/Ssh.hs | 198 | ||||
-rw-r--r-- | Annex/TaggedPush.hs | 61 | ||||
-rw-r--r-- | Annex/UUID.hs | 96 | ||||
-rw-r--r-- | Annex/Url.hs | 27 | ||||
-rw-r--r-- | Annex/Version.hs | 47 | ||||
-rw-r--r-- | Annex/Wanted.hs | 32 |
28 files changed, 3245 insertions, 0 deletions
diff --git a/Annex/Branch.hs b/Annex/Branch.hs new file mode 100644 index 000000000..9838af25f --- /dev/null +++ b/Annex/Branch.hs @@ -0,0 +1,533 @@ +{- management of the git-annex branch + - + - Copyright 2011-2013 Joey Hess <joey@kitenet.net> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +{-# LANGUAGE CPP #-} + +module Annex.Branch ( + fullname, + name, + hasOrigin, + hasSibling, + siblingBranches, + create, + update, + forceUpdate, + updateTo, + get, + change, + commit, + forceCommit, + files, + withIndex, + performTransitions, +) where + +import qualified Data.ByteString.Lazy.Char8 as L +import qualified Data.Set as S +import qualified Data.Map as M +import qualified Control.Exception as E + +import Common.Annex +import Annex.BranchState +import Annex.Journal +import qualified Git +import qualified Git.Command +import qualified Git.Ref +import qualified Git.Sha +import qualified Git.Branch +import qualified Git.UnionMerge +import qualified Git.UpdateIndex +import Git.HashObject +import Git.Types +import Git.FilePath +import Annex.CatFile +import Annex.Perms +import qualified Annex +import Utility.Env +import Logs +import Logs.Transitions +import Logs.Trust.Pure +import Annex.ReplaceFile +import qualified Annex.Queue +import Annex.Branch.Transitions +import Annex.Exception + +{- Name of the branch that is used to store git-annex's information. -} +name :: Git.Ref +name = Git.Ref "git-annex" + +{- Fully qualified name of the branch. -} +fullname :: Git.Ref +fullname = Git.Ref $ "refs/heads/" ++ show name + +{- Branch's name in origin. -} +originname :: Git.Ref +originname = Git.Ref $ "origin/" ++ show name + +{- Does origin/git-annex exist? -} +hasOrigin :: Annex Bool +hasOrigin = inRepo $ Git.Ref.exists originname + +{- Does the git-annex branch or a sibling foo/git-annex branch exist? -} +hasSibling :: Annex Bool +hasSibling = not . null <$> siblingBranches + +{- List of git-annex (refs, branches), including the main one and any + - from remotes. Duplicate refs are filtered out. -} +siblingBranches :: Annex [(Git.Ref, Git.Branch)] +siblingBranches = inRepo $ Git.Ref.matchingUniq [name] + +{- Creates the branch, if it does not already exist. -} +create :: Annex () +create = void getBranch + +{- Returns the ref of the branch, creating it first if necessary. -} +getBranch :: Annex Git.Ref +getBranch = maybe (hasOrigin >>= go >>= use) return =<< branchsha + where + go True = do + inRepo $ Git.Command.run + [Param "branch", Param $ show name, Param $ show originname] + fromMaybe (error $ "failed to create " ++ show name) + <$> branchsha + go False = withIndex' True $ + inRepo $ Git.Branch.commit "branch created" fullname [] + use sha = do + setIndexSha sha + return sha + branchsha = inRepo $ Git.Ref.sha fullname + +{- Ensures that the branch and index are up-to-date; should be + - called before data is read from it. Runs only once per git-annex run. -} +update :: Annex () +update = runUpdateOnce $ void $ updateTo =<< siblingBranches + +{- Forces an update even if one has already been run. -} +forceUpdate :: Annex Bool +forceUpdate = updateTo =<< siblingBranches + +{- Merges the specified Refs into the index, if they have any changes not + - already in it. The Branch names are only used in the commit message; + - it's even possible that the provided Branches have not been updated to + - point to the Refs yet. + - + - The branch is fast-forwarded if possible, otherwise a merge commit is + - made. + - + - Before Refs are merged into the index, it's important to first stage the + - journal into the index. Otherwise, any changes in the journal would + - later get staged, and might overwrite changes made during the merge. + - This is only done if some of the Refs do need to be merged. + - + - Also handles performing any Transitions that have not yet been + - performed, in either the local branch, or the Refs. + - + - Returns True if any refs were merged in, False otherwise. + -} +updateTo :: [(Git.Ref, Git.Branch)] -> Annex Bool +updateTo pairs = do + -- ensure branch exists, and get its current ref + branchref <- getBranch + dirty <- journalDirty + ignoredrefs <- getIgnoredRefs + (refs, branches) <- unzip <$> filterM (isnewer ignoredrefs) pairs + if null refs + {- Even when no refs need to be merged, the index + - may still be updated if the branch has gotten ahead + - of the index. -} + then whenM (needUpdateIndex branchref) $ lockJournal $ \jl -> do + forceUpdateIndex jl branchref + {- When there are journalled changes + - as well as the branch being updated, + - a commit needs to be done. -} + when dirty $ + go branchref True [] [] jl + else lockJournal $ go branchref dirty refs branches + return $ not $ null refs + where + isnewer ignoredrefs (r, _) + | S.member r ignoredrefs = return False + | otherwise = inRepo $ Git.Branch.changed fullname r + go branchref dirty refs branches jl = withIndex $ do + cleanjournal <- if dirty then stageJournal jl else return noop + let merge_desc = if null branches + then "update" + else "merging " ++ + unwords (map Git.Ref.describe branches) ++ + " into " ++ show name + localtransitions <- parseTransitionsStrictly "local" + <$> getLocal transitionsLog + unless (null branches) $ do + showSideAction merge_desc + mergeIndex jl refs + let commitrefs = nub $ fullname:refs + unlessM (handleTransitions jl localtransitions commitrefs) $ do + ff <- if dirty + then return False + else inRepo $ Git.Branch.fastForward fullname refs + if ff + then updateIndex jl branchref + else commitIndex jl branchref merge_desc commitrefs + liftIO cleanjournal + +{- Gets the content of a file, which may be in the journal, or in the index + - (and committed to the branch). + - + - Updates the branch if necessary, to ensure the most up-to-date available + - content is returned. + - + - Returns an empty string if the file doesn't exist yet. -} +get :: FilePath -> Annex String +get file = do + update + getLocal file + +{- Like get, but does not merge the branch, so the info returned may not + - reflect changes in remotes. + - (Changing the value this returns, and then merging is always the + - same as using get, and then changing its value.) -} +getLocal :: FilePath -> Annex String +getLocal file = go =<< getJournalFileStale file + where + go (Just journalcontent) = return journalcontent + go Nothing = getRaw file + +getRaw :: FilePath -> Annex String +getRaw file = withIndex $ L.unpack <$> catFile fullname file + +{- Applies a function to modifiy the content of a file. + - + - Note that this does not cause the branch to be merged, it only + - modifes the current content of the file on the branch. + -} +change :: FilePath -> (String -> String) -> Annex () +change file a = lockJournal $ \jl -> a <$> getLocal file >>= set jl file + +{- Records new content of a file into the journal -} +set :: JournalLocked -> FilePath -> String -> Annex () +set = setJournalFile + +{- Stages the journal, and commits staged changes to the branch. -} +commit :: String -> Annex () +commit = whenM journalDirty . forceCommit + +{- Commits the current index to the branch even without any journalleda + - changes. -} +forceCommit :: String -> Annex () +forceCommit message = lockJournal $ \jl -> do + cleanjournal <- stageJournal jl + ref <- getBranch + withIndex $ commitIndex jl ref message [fullname] + liftIO cleanjournal + +{- Commits the staged changes in the index to the branch. + - + - Ensures that the branch's index file is first updated to the state + - of the branch at branchref, before running the commit action. This + - is needed because the branch may have had changes pushed to it, that + - are not yet reflected in the index. + - + - Also safely handles a race that can occur if a change is being pushed + - into the branch at the same time. When the race happens, the commit will + - be made on top of the newly pushed change, but without the index file + - being updated to include it. The result is that the newly pushed + - change is reverted. This race is detected and another commit made + - to fix it. + - + - The branchref value can have been obtained using getBranch at any + - previous point, though getting it a long time ago makes the race + - more likely to occur. + -} +commitIndex :: JournalLocked -> Git.Ref -> String -> [Git.Ref] -> Annex () +commitIndex jl branchref message parents = do + showStoringStateAction + commitIndex' jl branchref message parents +commitIndex' :: JournalLocked -> Git.Ref -> String -> [Git.Ref] -> Annex () +commitIndex' jl branchref message parents = do + updateIndex jl branchref + committedref <- inRepo $ Git.Branch.commit message fullname parents + setIndexSha committedref + parentrefs <- commitparents <$> catObject committedref + when (racedetected branchref parentrefs) $ do + liftIO $ print ("race detected", branchref, parentrefs, "committing", (branchref, parents)) + fixrace committedref parentrefs + where + -- look for "parent ref" lines and return the refs + commitparents = map (Git.Ref . snd) . filter isparent . + map (toassoc . L.unpack) . L.lines + toassoc = separate (== ' ') + isparent (k,_) = k == "parent" + + {- The race can be detected by checking the commit's + - parent, which will be the newly pushed branch, + - instead of the expected ref that the index was updated to. -} + racedetected expectedref parentrefs + | expectedref `elem` parentrefs = False -- good parent + | otherwise = True -- race! + + {- To recover from the race, union merge the lost refs + - into the index, and recommit on top of the bad commit. -} + fixrace committedref lostrefs = do + mergeIndex jl lostrefs + commitIndex jl committedref racemessage [committedref] + + racemessage = message ++ " (recovery from race)" + +{- Lists all files on the branch. There may be duplicates in the list. -} +files :: Annex [FilePath] +files = do + update + (++) + <$> branchFiles + <*> getJournalledFilesStale + +{- Files in the branch, not including any from journalled changes, + - and without updating the branch. -} +branchFiles :: Annex [FilePath] +branchFiles = withIndex $ inRepo $ Git.Command.pipeNullSplitZombie + [ Params "ls-tree --name-only -r -z" + , Param $ show fullname + ] + +{- Populates the branch's index file with the current branch contents. + - + - This is only done when the index doesn't yet exist, and the index + - is used to build up changes to be commited to the branch, and merge + - in changes from other branches. + -} +genIndex :: Git.Repo -> IO () +genIndex g = Git.UpdateIndex.streamUpdateIndex g + [Git.UpdateIndex.lsTree fullname g] + +{- Merges the specified refs into the index. + - Any changes staged in the index will be preserved. -} +mergeIndex :: JournalLocked -> [Git.Ref] -> Annex () +mergeIndex jl branches = do + prepareModifyIndex jl + h <- catFileHandle + inRepo $ \g -> Git.UnionMerge.mergeIndex h g branches + +{- Removes any stale git lock file, to avoid git falling over when + - updating the index. + - + - Since all modifications of the index are performed inside this module, + - and only when the journal is locked, the fact that the journal has to be + - locked when this is called ensures that no other process is currently + - modifying the index. So any index.lock file must be stale, caused + - by git running when the system crashed, or the repository's disk was + - removed, etc. + -} +prepareModifyIndex :: JournalLocked -> Annex () +prepareModifyIndex _jl = do + index <- fromRepo gitAnnexIndex + void $ liftIO $ tryIO $ removeFile $ index ++ ".lock" + +{- Runs an action using the branch's index file. -} +withIndex :: Annex a -> Annex a +withIndex = withIndex' False +withIndex' :: Bool -> Annex a -> Annex a +withIndex' bootstrapping a = do + f <- fromRepo gitAnnexIndex + g <- gitRepo +#ifdef __ANDROID__ + {- This should not be necessary on Android, but there is some + - weird getEnvironment breakage. See + - https://github.com/neurocyte/ghc-android/issues/7 + - Use getEnv to get some key environment variables that + - git expects to have. -} + let keyenv = words "USER PATH GIT_EXEC_PATH HOSTNAME HOME" + let getEnvPair k = maybe Nothing (\v -> Just (k, v)) <$> getEnv k + e <- liftIO $ catMaybes <$> forM keyenv getEnvPair +#else + e <- liftIO getEnvironment +#endif + let g' = g { gitEnv = Just $ ("GIT_INDEX_FILE", f):e } + + r <- tryAnnex $ do + Annex.changeState $ \s -> s { Annex.repo = g' } + checkIndexOnce $ unlessM (liftIO $ doesFileExist f) $ do + unless bootstrapping create + createAnnexDirectory $ takeDirectory f + unless bootstrapping $ inRepo genIndex + a + Annex.changeState $ \s -> s { Annex.repo = (Annex.repo s) { gitEnv = gitEnv g} } + either E.throw return r + +{- Updates the branch's index to reflect the current contents of the branch. + - Any changes staged in the index will be preserved. + - + - Compares the ref stored in the lock file with the current + - ref of the branch to see if an update is needed. + -} +updateIndex :: JournalLocked -> Git.Ref -> Annex () +updateIndex jl branchref = whenM (needUpdateIndex branchref) $ + forceUpdateIndex jl branchref + +forceUpdateIndex :: JournalLocked -> Git.Ref -> Annex () +forceUpdateIndex jl branchref = do + withIndex $ mergeIndex jl [fullname] + setIndexSha branchref + +{- Checks if the index needs to be updated. -} +needUpdateIndex :: Git.Ref -> Annex Bool +needUpdateIndex branchref = do + f <- fromRepo gitAnnexIndexStatus + committedref <- Git.Ref . firstLine <$> + liftIO (catchDefaultIO "" $ readFileStrict f) + return (committedref /= branchref) + +{- Record that the branch's index has been updated to correspond to a + - given ref of the branch. -} +setIndexSha :: Git.Ref -> Annex () +setIndexSha ref = do + f <- fromRepo gitAnnexIndexStatus + liftIO $ writeFile f $ show ref ++ "\n" + setAnnexFilePerm f + +{- Stages the journal into the index and returns an action that will + - clean up the staged journal files, which should only be run once + - the index has been committed to the branch. + - + - Before staging, this removes any existing git index file lock. + - This is safe to do because stageJournal is the only thing that + - modifies this index file, and only one can run at a time, because + - the journal is locked. So any existing git index file lock must be + - stale, and the journal must contain any data that was in the process + - of being written to the index file when it crashed. + -} +stageJournal :: JournalLocked -> Annex (IO ()) +stageJournal jl = withIndex $ do + prepareModifyIndex jl + g <- gitRepo + let dir = gitAnnexJournalDir g + fs <- getJournalFiles jl + liftIO $ do + h <- hashObjectStart g + Git.UpdateIndex.streamUpdateIndex g + [genstream dir h fs] + hashObjectStop h + return $ liftIO $ mapM_ (removeFile . (dir </>)) fs + where + genstream dir h fs streamer = forM_ fs $ \file -> do + let path = dir </> file + sha <- hashFile h path + streamer $ Git.UpdateIndex.updateIndexLine + sha FileBlob (asTopFilePath $ fileJournal file) + +{- This is run after the refs have been merged into the index, + - but before the result is committed to the branch. + - (Which is why it's passed the contents of the local branches's + - transition log before that merge took place.) + - + - When the refs contain transitions that have not yet been done locally, + - the transitions are performed on the index, and a new branch + - is created from the result. + - + - When there are transitions recorded locally that have not been done + - to the remote refs, the transitions are performed in the index, + - and committed to the existing branch. In this case, the untransitioned + - remote refs cannot be merged into the branch (since transitions + - throw away history), so they are added to the list of refs to ignore, + - to avoid re-merging content from them again. + -} +handleTransitions :: JournalLocked -> Transitions -> [Git.Ref] -> Annex Bool +handleTransitions jl localts refs = do + m <- M.fromList <$> mapM getreftransition refs + let remotets = M.elems m + if all (localts ==) remotets + then return False + else do + let allts = combineTransitions (localts:remotets) + let (transitionedrefs, untransitionedrefs) = + partition (\r -> M.lookup r m == Just allts) refs + performTransitionsLocked jl allts (localts /= allts) transitionedrefs + ignoreRefs untransitionedrefs + return True + where + getreftransition ref = do + ts <- parseTransitionsStrictly "remote" . L.unpack + <$> catFile ref transitionsLog + return (ref, ts) + +ignoreRefs :: [Git.Ref] -> Annex () +ignoreRefs rs = do + old <- getIgnoredRefs + let s = S.unions [old, S.fromList rs] + f <- fromRepo gitAnnexIgnoredRefs + replaceFile f $ \tmp -> liftIO $ writeFile tmp $ + unlines $ map show $ S.elems s + +getIgnoredRefs :: Annex (S.Set Git.Ref) +getIgnoredRefs = S.fromList . mapMaybe Git.Sha.extractSha . lines <$> content + where + content = do + f <- fromRepo gitAnnexIgnoredRefs + liftIO $ catchDefaultIO "" $ readFile f + +{- Performs the specified transitions on the contents of the index file, + - commits it to the branch, or creates a new branch. + -} +performTransitions :: Transitions -> Bool -> [Ref] -> Annex () +performTransitions ts neednewlocalbranch transitionedrefs = lockJournal $ \jl -> + performTransitionsLocked jl ts neednewlocalbranch transitionedrefs +performTransitionsLocked :: JournalLocked -> Transitions -> Bool -> [Ref] -> Annex () +performTransitionsLocked jl ts neednewlocalbranch transitionedrefs = do + -- For simplicity & speed, we're going to use the Annex.Queue to + -- update the git-annex branch, while it usually holds changes + -- for the head branch. Flush any such changes. + Annex.Queue.flush + withIndex $ do + prepareModifyIndex jl + run $ mapMaybe getTransitionCalculator $ transitionList ts + Annex.Queue.flush + if neednewlocalbranch + then do + committedref <- inRepo $ Git.Branch.commit message fullname transitionedrefs + setIndexSha committedref + else do + ref <- getBranch + commitIndex jl ref message (nub $ fullname:transitionedrefs) + where + message + | neednewlocalbranch && null transitionedrefs = "new branch for transition " ++ tdesc + | otherwise = "continuing transition " ++ tdesc + tdesc = show $ map describeTransition $ transitionList ts + + {- The changes to make to the branch are calculated and applied to + - the branch directly, rather than going through the journal, + - which would be innefficient. (And the journal is not designed + - to hold changes to every file in the branch at once.) + - + - When a file in the branch is changed by transition code, + - that value is remembered and fed into the code for subsequent + - transitions. + -} + run [] = noop + run changers = do + trustmap <- calcTrustMap <$> getRaw trustLog + fs <- branchFiles + hasher <- inRepo hashObjectStart + forM_ fs $ \f -> do + content <- getRaw f + apply changers hasher f content trustmap + liftIO $ hashObjectStop hasher + apply [] _ _ _ _ = return () + apply (changer:rest) hasher file content trustmap = + case changer file content trustmap of + RemoveFile -> do + Annex.Queue.addUpdateIndex + =<< inRepo (Git.UpdateIndex.unstageFile file) + -- File is deleted; can't run any other + -- transitions on it. + return () + ChangeFile content' -> do + sha <- inRepo $ hashObject BlobObject content' + Annex.Queue.addUpdateIndex $ Git.UpdateIndex.pureStreamer $ + Git.UpdateIndex.updateIndexLine sha FileBlob (asTopFilePath file) + apply rest hasher file content' trustmap + PreserveFile -> + apply rest hasher file content trustmap diff --git a/Annex/Branch/Transitions.hs b/Annex/Branch/Transitions.hs new file mode 100644 index 000000000..90002de62 --- /dev/null +++ b/Annex/Branch/Transitions.hs @@ -0,0 +1,53 @@ +{- git-annex branch transitions + - + - Copyright 2013 Joey Hess <joey@kitenet.net> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Annex.Branch.Transitions ( + FileTransition(..), + getTransitionCalculator +) where + +import Logs +import Logs.Transitions +import Logs.UUIDBased as UUIDBased +import Logs.Presence.Pure as Presence +import Types.TrustLevel +import Types.UUID + +import qualified Data.Map as M + +data FileTransition + = ChangeFile String + | RemoveFile + | PreserveFile + +type TransitionCalculator = FilePath -> String -> TrustMap -> FileTransition + +getTransitionCalculator :: Transition -> Maybe TransitionCalculator +getTransitionCalculator ForgetGitHistory = Nothing +getTransitionCalculator ForgetDeadRemotes = Just dropDead + +dropDead :: FilePath -> String -> TrustMap -> FileTransition +dropDead f content trustmap = case getLogVariety f of + Just UUIDBasedLog -> ChangeFile $ + UUIDBased.showLog id $ dropDeadFromUUIDBasedLog trustmap $ UUIDBased.parseLog Just content + Just (PresenceLog _) -> + let newlog = Presence.compactLog $ dropDeadFromPresenceLog trustmap $ Presence.parseLog content + in if null newlog + then RemoveFile + else ChangeFile $ Presence.showLog newlog + Nothing -> PreserveFile + +dropDeadFromUUIDBasedLog :: TrustMap -> UUIDBased.Log String -> UUIDBased.Log String +dropDeadFromUUIDBasedLog trustmap = M.filterWithKey $ notDead trustmap . const + +{- Presence logs can contain UUIDs or other values. Any line that matches + - a dead uuid is dropped; any other values are passed through. -} +dropDeadFromPresenceLog :: TrustMap -> [Presence.LogLine] -> [Presence.LogLine] +dropDeadFromPresenceLog trustmap = filter $ notDead trustmap (toUUID . Presence.info) + +notDead :: TrustMap -> (v -> UUID) -> v -> Bool +notDead trustmap a v = M.findWithDefault SemiTrusted (a v) trustmap /= DeadTrusted diff --git a/Annex/BranchState.hs b/Annex/BranchState.hs new file mode 100644 index 000000000..9b2f9a04c --- /dev/null +++ b/Annex/BranchState.hs @@ -0,0 +1,43 @@ +{- git-annex branch state management + - + - Runtime state about the git-annex branch. + - + - Copyright 2011-2012 Joey Hess <joey@kitenet.net> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Annex.BranchState where + +import Common.Annex +import Types.BranchState +import qualified Annex + +getState :: Annex BranchState +getState = Annex.getState Annex.branchstate + +setState :: BranchState -> Annex () +setState state = Annex.changeState $ \s -> s { Annex.branchstate = state } + +changeState :: (BranchState -> BranchState) -> Annex () +changeState changer = setState =<< changer <$> getState + +{- Runs an action to check that the index file exists, if it's not been + - checked before in this run of git-annex. -} +checkIndexOnce :: Annex () -> Annex () +checkIndexOnce a = unlessM (indexChecked <$> getState) $ do + a + changeState $ \s -> s { indexChecked = True } + +{- Runs an action to update the branch, if it's not been updated before + - in this run of git-annex. -} +runUpdateOnce :: Annex () -> Annex () +runUpdateOnce a = unlessM (branchUpdated <$> getState) $ do + a + disableUpdate + +{- Avoids updating the branch. A useful optimisation when the branch + - is known to have not changed, or git-annex won't be relying on info + - from it. -} +disableUpdate :: Annex () +disableUpdate = changeState $ \s -> s { branchUpdated = True } diff --git a/Annex/CatFile.hs b/Annex/CatFile.hs new file mode 100644 index 000000000..812d032c6 --- /dev/null +++ b/Annex/CatFile.hs @@ -0,0 +1,139 @@ +{- git cat-file interface, with handle automatically stored in the Annex monad + - + - Copyright 2011-2013 Joey Hess <joey@kitenet.net> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Annex.CatFile ( + catFile, + catObject, + catTree, + catObjectDetails, + catFileHandle, + catKey, + catKeyFile, + catKeyFileHEAD, +) where + +import qualified Data.ByteString.Lazy as L +import qualified Data.Map as M +import System.PosixCompat.Types + +import Common.Annex +import qualified Git +import qualified Git.CatFile +import qualified Annex +import Git.Types +import Git.FilePath +import Git.FileMode +import qualified Git.Ref + +catFile :: Git.Branch -> FilePath -> Annex L.ByteString +catFile branch file = do + h <- catFileHandle + liftIO $ Git.CatFile.catFile h branch file + +catObject :: Git.Ref -> Annex L.ByteString +catObject ref = do + h <- catFileHandle + liftIO $ Git.CatFile.catObject h ref + +catTree :: Git.Ref -> Annex [(FilePath, FileMode)] +catTree ref = do + h <- catFileHandle + liftIO $ Git.CatFile.catTree h ref + +catObjectDetails :: Git.Ref -> Annex (Maybe (L.ByteString, Sha, ObjectType)) +catObjectDetails ref = do + h <- catFileHandle + liftIO $ Git.CatFile.catObjectDetails h ref + +{- There can be multiple index files, and a different cat-file is needed + - for each. This is selected by setting GIT_INDEX_FILE in the gitEnv. -} +catFileHandle :: Annex Git.CatFile.CatFileHandle +catFileHandle = do + m <- Annex.getState Annex.catfilehandles + indexfile <- fromMaybe "" . maybe Nothing (lookup "GIT_INDEX_FILE") + <$> fromRepo gitEnv + case M.lookup indexfile m of + Just h -> return h + Nothing -> do + h <- inRepo Git.CatFile.catFileStart + let m' = M.insert indexfile h m + Annex.changeState $ \s -> s { Annex.catfilehandles = m' } + return h + +{- From the Sha or Ref of a symlink back to the key. + - + - Requires a mode witness, to guarantee that the file is a symlink. + -} +catKey :: Ref -> FileMode -> Annex (Maybe Key) +catKey = catKey' True + +catKey' :: Bool -> Ref -> FileMode -> Annex (Maybe Key) +catKey' modeguaranteed ref mode + | isSymLink mode = do + l <- fromInternalGitPath . encodeW8 . L.unpack <$> get + return $ if isLinkToAnnex l + then fileKey $ takeFileName l + else Nothing + | otherwise = return Nothing + where + -- If the mode is not guaranteed to be correct, avoid + -- buffering the whole file content, which might be large. + -- 8192 is enough if it really is a symlink. + get + | modeguaranteed = catObject ref + | otherwise = L.take 8192 <$> catObject ref + +{- Looks up the file mode corresponding to the Ref using the running + - cat-file. + - + - Currently this always has to look in HEAD, because cat-file --batch + - does not offer a way to specify that we want to look up a tree object + - in the index. So if the index has a file staged not as a symlink, + - and it is a symlink in head, the wrong mode is gotten. + - Also, we have to assume the file is a symlink if it's not yet committed + - to HEAD. For these reasons, modeguaranteed is not set. + -} +catKeyChecked :: Bool -> Ref -> Annex (Maybe Key) +catKeyChecked needhead ref@(Ref r) = + catKey' False ref =<< findmode <$> catTree treeref + where + pathparts = split "/" r + dir = intercalate "/" $ take (length pathparts - 1) pathparts + file = fromMaybe "" $ lastMaybe pathparts + treeref = Ref $ if needhead then "HEAD" ++ dir ++ "/" else dir ++ "/" + findmode = fromMaybe symLinkMode . headMaybe . + map snd . filter (\p -> fst p == file) + +{- From a file in the repository back to the key. + - + - Ideally, this should reflect the key that's staged in the index, + - not the key that's committed to HEAD. Unfortunately, git cat-file + - does not refresh the index file after it's started up, so things + - newly staged in the index won't show up. It does, however, notice + - when branches change. + - + - For command-line git-annex use, that doesn't matter. It's perfectly + - reasonable for things staged in the index after the currently running + - git-annex process to not be noticed by it. However, we do want to see + - what's in the index, since it may have uncommitted changes not in HEAD> + - + - For the assistant, this is much more of a problem, since it commits + - files and then needs to be able to immediately look up their keys. + - OTOH, the assistant doesn't keep changes staged in the index for very + - long at all before committing them -- and it won't look at the keys + - of files until after committing them. + - + - So, this gets info from the index, unless running as a daemon. + -} +catKeyFile :: FilePath -> Annex (Maybe Key) +catKeyFile f = ifM (Annex.getState Annex.daemon) + ( catKeyFileHEAD f + , catKeyChecked True $ Git.Ref.fileRef f + ) + +catKeyFileHEAD :: FilePath -> Annex (Maybe Key) +catKeyFileHEAD f = catKeyChecked False $ Git.Ref.fileFromRef Git.Ref.headRef f diff --git a/Annex/CheckAttr.hs b/Annex/CheckAttr.hs new file mode 100644 index 000000000..8eed9e804 --- /dev/null +++ b/Annex/CheckAttr.hs @@ -0,0 +1,35 @@ +{- git check-attr interface, with handle automatically stored in the Annex monad + - + - Copyright 2012 Joey Hess <joey@kitenet.net> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Annex.CheckAttr ( + checkAttr, + checkAttrHandle +) where + +import Common.Annex +import qualified Git.CheckAttr as Git +import qualified Annex + +{- All gitattributes used by git-annex. -} +annexAttrs :: [Git.Attr] +annexAttrs = + [ "annex.backend" + , "annex.numcopies" + ] + +checkAttr :: Git.Attr -> FilePath -> Annex String +checkAttr attr file = do + h <- checkAttrHandle + liftIO $ Git.checkAttr h attr file + +checkAttrHandle :: Annex Git.CheckAttrHandle +checkAttrHandle = maybe startup return =<< Annex.getState Annex.checkattrhandle + where + startup = do + h <- inRepo $ Git.checkAttrStart annexAttrs + Annex.changeState $ \s -> s { Annex.checkattrhandle = Just h } + return h diff --git a/Annex/CheckIgnore.hs b/Annex/CheckIgnore.hs new file mode 100644 index 000000000..d45e652bc --- /dev/null +++ b/Annex/CheckIgnore.hs @@ -0,0 +1,32 @@ +{- git check-ignore interface, with handle automatically stored in + - the Annex monad + - + - Copyright 2013 Joey Hess <joey@kitenet.net> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Annex.CheckIgnore ( + checkIgnored, + checkIgnoreHandle +) where + +import Common.Annex +import qualified Git.CheckIgnore as Git +import qualified Annex + +checkIgnored :: FilePath -> Annex Bool +checkIgnored file = go =<< checkIgnoreHandle + where + go Nothing = return False + go (Just h) = liftIO $ Git.checkIgnored h file + +checkIgnoreHandle :: Annex (Maybe Git.CheckIgnoreHandle) +checkIgnoreHandle = maybe startup return =<< Annex.getState Annex.checkignorehandle + where + startup = do + v <- inRepo Git.checkIgnoreStart + when (isNothing v) $ + warning "The installed version of git is too old for .gitignores to be honored by git-annex." + Annex.changeState $ \s -> s { Annex.checkignorehandle = Just v } + return v diff --git a/Annex/Content.hs b/Annex/Content.hs new file mode 100644 index 000000000..62f1b1ccb --- /dev/null +++ b/Annex/Content.hs @@ -0,0 +1,529 @@ +{- git-annex file content managing + - + - Copyright 2010-2013 Joey Hess <joey@kitenet.net> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +{-# LANGUAGE CPP #-} + +module Annex.Content ( + inAnnex, + inAnnexSafe, + inAnnexCheck, + lockContent, + getViaTmp, + getViaTmpChecked, + getViaTmpUnchecked, + withTmp, + checkDiskSpace, + moveAnnex, + sendAnnex, + prepSendAnnex, + removeAnnex, + fromAnnex, + moveBad, + getKeysPresent, + saveState, + downloadUrl, + preseedTmp, + freezeContent, + thawContent, + dirKeys, + withObjectLoc, +) where + +import System.IO.Unsafe (unsafeInterleaveIO) +import System.PosixCompat.Files + +import Common.Annex +import Logs.Location +import qualified Git +import qualified Annex +import qualified Annex.Queue +import qualified Annex.Branch +import Utility.DiskFree +import Utility.FileMode +import qualified Annex.Url as Url +import Types.Key +import Utility.DataUnits +import Utility.CopyFile +import Config +import Git.SharedRepository +import Annex.Perms +import Annex.Link +import Annex.Content.Direct +import Annex.ReplaceFile +import Annex.Exception + +{- Checks if a given key's content is currently present. -} +inAnnex :: Key -> Annex Bool +inAnnex key = inAnnexCheck key $ liftIO . doesFileExist + +{- Runs an arbitrary check on a key's content. -} +inAnnexCheck :: Key -> (FilePath -> Annex Bool) -> Annex Bool +inAnnexCheck key check = inAnnex' id False check key + +{- Generic inAnnex, handling both indirect and direct mode. + - + - In direct mode, at least one of the associated files must pass the + - check. Additionally, the file must be unmodified. + -} +inAnnex' :: (a -> Bool) -> a -> (FilePath -> Annex a) -> Key -> Annex a +inAnnex' isgood bad check key = withObjectLoc key checkindirect checkdirect + where + checkindirect loc = do + whenM (fromRepo Git.repoIsUrl) $ + error "inAnnex cannot check remote repo" + check loc + checkdirect [] = return bad + checkdirect (loc:locs) = do + r <- check loc + if isgood r + then ifM (goodContent key loc) + ( return r + , checkdirect locs + ) + else checkdirect locs + +{- A safer check; the key's content must not only be present, but + - is not in the process of being removed. -} +inAnnexSafe :: Key -> Annex (Maybe Bool) +inAnnexSafe = inAnnex' (fromMaybe False) (Just False) go + where + go f = liftIO $ openforlock f >>= check +#ifndef mingw32_HOST_OS + openforlock f = catchMaybeIO $ + openFd f ReadOnly Nothing defaultFileFlags +#else + openforlock _ = return $ Just () +#endif + check Nothing = return is_missing +#ifndef mingw32_HOST_OS + check (Just h) = do + v <- getLock h (ReadLock, AbsoluteSeek, 0, 0) + closeFd h + return $ case v of + Just _ -> is_locked + Nothing -> is_unlocked +#else + check (Just _) = return is_unlocked +#endif +#ifndef mingw32_HOST_OS + is_locked = Nothing +#endif + is_unlocked = Just True + is_missing = Just False + +{- Content is exclusively locked while running an action that might remove + - it. (If the content is not present, no locking is done.) -} +lockContent :: Key -> Annex a -> Annex a +#ifndef mingw32_HOST_OS +lockContent key a = do + file <- calcRepo $ gitAnnexLocation key + bracketIO (openforlock file >>= lock) unlock (const a) + where + {- Since files are stored with the write bit disabled, have + - to fiddle with permissions to open for an exclusive lock. -} + openforlock f = catchMaybeIO $ ifM (doesFileExist f) + ( withModifiedFileMode f + (`unionFileModes` ownerWriteMode) + open + , open + ) + where + open = openFd f ReadWrite Nothing defaultFileFlags + lock Nothing = return Nothing + lock (Just fd) = do + v <- tryIO $ setLock fd (WriteLock, AbsoluteSeek, 0, 0) + case v of + Left _ -> error "content is locked" + Right _ -> return $ Just fd + unlock Nothing = noop + unlock (Just l) = closeFd l +#else +lockContent _key a = a -- no locking for Windows! +#endif + +{- Runs an action, passing it a temporary filename to get, + - and if the action succeeds, moves the temp file into + - the annex as a key's content. -} +getViaTmp :: Key -> (FilePath -> Annex Bool) -> Annex Bool +getViaTmp = getViaTmpChecked (return True) + +{- Like getViaTmp, but does not check that there is enough disk space + - for the incoming key. For use when the key content is already on disk + - and not being copied into place. -} +getViaTmpUnchecked :: Key -> (FilePath -> Annex Bool) -> Annex Bool +getViaTmpUnchecked = finishGetViaTmp (return True) + +getViaTmpChecked :: Annex Bool -> Key -> (FilePath -> Annex Bool) -> Annex Bool +getViaTmpChecked check key action = do + tmp <- fromRepo $ gitAnnexTmpLocation key + + -- Check that there is enough free disk space. + -- When the temp file already exists, count the space + -- it is using as free. + e <- liftIO $ doesFileExist tmp + alreadythere <- if e + then fromIntegral . fileSize <$> liftIO (getFileStatus tmp) + else return 0 + ifM (checkDiskSpace Nothing key alreadythere) + ( do + when e $ thawContent tmp + finishGetViaTmp check key action + , return False + ) + +finishGetViaTmp :: Annex Bool -> Key -> (FilePath -> Annex Bool) -> Annex Bool +finishGetViaTmp check key action = do + tmpfile <- prepTmp key + ifM (action tmpfile <&&> check) + ( do + moveAnnex key tmpfile + logStatus key InfoPresent + return True + , do + -- the tmp file is left behind, in case caller wants + -- to resume its transfer + return False + ) + +prepTmp :: Key -> Annex FilePath +prepTmp key = do + tmp <- fromRepo $ gitAnnexTmpLocation key + createAnnexDirectory (parentDir tmp) + return tmp + +{- Creates a temp file, runs an action on it, and cleans up the temp file. -} +withTmp :: Key -> (FilePath -> Annex a) -> Annex a +withTmp key action = do + tmp <- prepTmp key + res <- action tmp + liftIO $ nukeFile tmp + return res + +{- Checks that there is disk space available to store a given key, + - in a destination (or the annex) printing a warning if not. -} +checkDiskSpace :: Maybe FilePath -> Key -> Integer -> Annex Bool +checkDiskSpace destination key alreadythere = do + reserve <- annexDiskReserve <$> Annex.getGitConfig + free <- liftIO . getDiskFree =<< dir + force <- Annex.getState Annex.force + case (free, keySize key) of + (Just have, Just need) -> do + let ok = (need + reserve <= have + alreadythere) || force + unless ok $ + needmorespace (need + reserve - have - alreadythere) + return ok + _ -> return True + where + dir = maybe (fromRepo gitAnnexDir) return destination + needmorespace n = + warning $ "not enough free space, need " ++ + roughSize storageUnits True n ++ + " more" ++ forcemsg + forcemsg = " (use --force to override this check or adjust annex.diskreserve)" + +{- Moves a key's content into .git/annex/objects/ + - + - In direct mode, moves it to the associated file, or files. + - + - What if the key there already has content? This could happen for + - various reasons; perhaps the same content is being annexed again. + - Perhaps there has been a hash collision generating the keys. + - + - The current strategy is to assume that in this case it's safe to delete + - one of the two copies of the content; and the one already in the annex + - is left there, assuming it's the original, canonical copy. + - + - I considered being more paranoid, and checking that both files had + - the same content. Decided against it because A) users explicitly choose + - a backend based on its hashing properties and so if they're dealing + - with colliding files it's their own fault and B) adding such a check + - would not catch all cases of colliding keys. For example, perhaps + - a remote has a key; if it's then added again with different content then + - the overall system now has two different peices of content for that + - key, and one of them will probably get deleted later. So, adding the + - check here would only raise expectations that git-annex cannot truely + - meet. + -} +moveAnnex :: Key -> FilePath -> Annex () +moveAnnex key src = withObjectLoc key storeobject storedirect + where + storeobject dest = ifM (liftIO $ doesFileExist dest) + ( alreadyhave + , modifyContent dest $ do + liftIO $ moveFile src dest + freezeContent dest + ) + storeindirect = storeobject =<< calcRepo (gitAnnexLocation key) + + {- In direct mode, the associated file's content may be locally + - modified. In that case, it's preserved. However, the content + - we're moving into the annex may be the only extant copy, so + - it's important we not lose it. So, when the key's content + - cannot be moved to any associated file, it's stored in indirect + - mode. + -} + storedirect = storedirect' storeindirect + storedirect' fallback [] = fallback + storedirect' fallback (f:fs) = do + thawContent src + v <- isAnnexLink f + if Just key == v + then do + updateInodeCache key src + replaceFile f $ liftIO . moveFile src + chmodContent f + forM_ fs $ + addContentWhenNotPresent key f + else ifM (goodContent key f) + ( storedirect' alreadyhave fs + , storedirect' fallback fs + ) + + alreadyhave = liftIO $ removeFile src + +{- Runs an action to transfer an object's content. + - + - In direct mode, it's possible for the file to change as it's being sent. + - If this happens, runs the rollback action and returns False. The + - rollback action should remove the data that was transferred. + -} +sendAnnex :: Key -> Annex () -> (FilePath -> Annex Bool) -> Annex Bool +sendAnnex key rollback sendobject = go =<< prepSendAnnex key + where + go Nothing = return False + go (Just (f, checksuccess)) = do + r <- sendobject f + ifM checksuccess + ( return r + , do + rollback + return False + ) + +{- Returns a file that contains an object's content, + - and an check to run after the transfer is complete. + - + - In direct mode, it's possible for the file to change as it's being sent, + - and the check detects this case and returns False. + - + - Note that the returned check action is, in some cases, run in the + - Annex monad of the remote that is receiving the object, rather than + - the sender. So it cannot rely on Annex state. + -} +prepSendAnnex :: Key -> Annex (Maybe (FilePath, Annex Bool)) +prepSendAnnex key = withObjectLoc key indirect direct + where + indirect f = return $ Just (f, return True) + direct [] = return Nothing + direct (f:fs) = do + cache <- recordedInodeCache key + -- check that we have a good file + ifM (sameInodeCache f cache) + ( return $ Just (f, sameInodeCache f cache) + , direct fs + ) + +{- Performs an action, passing it the location to use for a key's content. + - + - In direct mode, the associated files will be passed. But, if there are + - no associated files for a key, the indirect mode action will be + - performed instead. -} +withObjectLoc :: Key -> (FilePath -> Annex a) -> ([FilePath] -> Annex a) -> Annex a +withObjectLoc key indirect direct = ifM isDirect + ( do + fs <- associatedFiles key + if null fs + then goindirect + else direct fs + , goindirect + ) + where + goindirect = indirect =<< calcRepo (gitAnnexLocation key) + +cleanObjectLoc :: Key -> Annex () -> Annex () +cleanObjectLoc key cleaner = do + file <- calcRepo $ gitAnnexLocation key + void $ tryAnnexIO $ thawContentDir file + cleaner + liftIO $ removeparents file (3 :: Int) + where + removeparents _ 0 = noop + removeparents file n = do + let dir = parentDir file + maybe noop (const $ removeparents dir (n-1)) + <=< catchMaybeIO $ removeDirectory dir + +{- Removes a key's file from .git/annex/objects/ + - + - In direct mode, deletes the associated files or files, and replaces + - them with symlinks. -} +removeAnnex :: Key -> Annex () +removeAnnex key = withObjectLoc key remove removedirect + where + remove file = cleanObjectLoc key $ do + liftIO $ nukeFile file + removeInodeCache key + removedirect fs = do + cache <- recordedInodeCache key + removeInodeCache key + mapM_ (resetfile cache) fs + resetfile cache f = whenM (sameInodeCache f cache) $ do + l <- inRepo $ gitAnnexLink f key + top <- fromRepo Git.repoPath + cwd <- liftIO getCurrentDirectory + let top' = fromMaybe top $ absNormPath cwd top + let l' = relPathDirToFile top' (fromMaybe l $ absNormPath top' l) + replaceFile f $ makeAnnexLink l' + +{- Moves a key's file out of .git/annex/objects/ -} +fromAnnex :: Key -> FilePath -> Annex () +fromAnnex key dest = cleanObjectLoc key $ do + file <- calcRepo $ gitAnnexLocation key + thawContent file + liftIO $ moveFile file dest + +{- Moves a key out of .git/annex/objects/ into .git/annex/bad, and + - returns the file it was moved to. -} +moveBad :: Key -> Annex FilePath +moveBad key = do + src <- calcRepo $ gitAnnexLocation key + bad <- fromRepo gitAnnexBadDir + let dest = bad </> takeFileName src + createAnnexDirectory (parentDir dest) + cleanObjectLoc key $ + liftIO $ moveFile src dest + logStatus key InfoMissing + return dest + +{- List of keys whose content exists in the annex. -} +getKeysPresent :: Annex [Key] +getKeysPresent = do + direct <- isDirect + dir <- fromRepo gitAnnexObjectDir + liftIO $ traverse direct (2 :: Int) dir + where + traverse direct depth dir = do + contents <- catchDefaultIO [] (dirContents dir) + if depth == 0 + then do + contents' <- filterM (present direct) contents + let keys = mapMaybe (fileKey . takeFileName) contents' + continue keys [] + else do + let deeper = traverse direct (depth - 1) + continue [] (map deeper contents) + continue keys [] = return keys + continue keys (a:as) = do + {- Force lazy traversal with unsafeInterleaveIO. -} + morekeys <- unsafeInterleaveIO a + continue (morekeys++keys) as + + {- In indirect mode, look for the key. In direct mode, + - the inode cache file is only present when a key's content + - is present. -} + present False d = doesFileExist $ contentfile d + present True d = doesFileExist $ contentfile d ++ ".cache" + contentfile d = d </> takeFileName d + +{- Things to do to record changes to content when shutting down. + - + - It's acceptable to avoid committing changes to the branch, + - especially if performing a short-lived action. + -} +saveState :: Bool -> Annex () +saveState nocommit = doSideAction $ do + Annex.Queue.flush + unless nocommit $ + whenM (annexAlwaysCommit <$> Annex.getGitConfig) $ + Annex.Branch.commit "update" + +{- Downloads content from any of a list of urls. -} +downloadUrl :: [Url.URLString] -> FilePath -> Annex Bool +downloadUrl urls file = go =<< annexWebDownloadCommand <$> Annex.getGitConfig + where + go Nothing = do + opts <- map Param . annexWebOptions <$> Annex.getGitConfig + headers <- getHttpHeaders + anyM (\u -> Url.withUserAgent $ Url.download u headers opts file) urls + go (Just basecmd) = liftIO $ anyM (downloadcmd basecmd) urls + downloadcmd basecmd url = + boolSystem "sh" [Param "-c", Param $ gencmd url basecmd] + <&&> doesFileExist file + gencmd url = massReplace + [ ("%file", shellEscape file) + , ("%url", shellEscape url) + ] + +{- Copies a key's content, when present, to a temp file. + - This is used to speed up some rsyncs. -} +preseedTmp :: Key -> FilePath -> Annex Bool +preseedTmp key file = go =<< inAnnex key + where + go False = return False + go True = do + ok <- copy + when ok $ thawContent file + return ok + copy = ifM (liftIO $ doesFileExist file) + ( return True + , do + s <- calcRepo $ gitAnnexLocation key + liftIO $ copyFileExternal s file + ) + +{- Blocks writing to an annexed file, and modifies file permissions to + - allow reading it, per core.sharedRepository setting. -} +freezeContent :: FilePath -> Annex () +freezeContent file = unlessM crippledFileSystem $ + liftIO . go =<< fromRepo getSharedRepository + where + go GroupShared = modifyFileMode file $ + removeModes writeModes . + addModes [ownerReadMode, groupReadMode] + go AllShared = modifyFileMode file $ + removeModes writeModes . + addModes readModes + go _ = modifyFileMode file $ + removeModes writeModes . + addModes [ownerReadMode] + +{- Adjusts read mode of annexed file per core.sharedRepository setting. -} +chmodContent :: FilePath -> Annex () +chmodContent file = unlessM crippledFileSystem $ + liftIO . go =<< fromRepo getSharedRepository + where + go GroupShared = modifyFileMode file $ + addModes [ownerReadMode, groupReadMode] + go AllShared = modifyFileMode file $ + addModes readModes + go _ = modifyFileMode file $ + addModes [ownerReadMode] + +{- Allows writing to an annexed file that freezeContent was called on + - before. -} +thawContent :: FilePath -> Annex () +thawContent file = unlessM crippledFileSystem $ + liftIO . go =<< fromRepo getSharedRepository + where + go GroupShared = groupWriteRead file + go AllShared = groupWriteRead file + go _ = allowWrite file + +{- Finds files directly inside a directory like gitAnnexBadDir + - (not in subdirectories) and returns the corresponding keys. -} +dirKeys :: (Git.Repo -> FilePath) -> Annex [Key] +dirKeys dirspec = do + dir <- fromRepo dirspec + ifM (liftIO $ doesDirectoryExist dir) + ( do + contents <- liftIO $ getDirectoryContents dir + files <- liftIO $ filterM doesFileExist $ + map (dir </>) contents + return $ mapMaybe (fileKey . takeFileName) files + , return [] + ) + diff --git a/Annex/Content/Direct.hs b/Annex/Content/Direct.hs new file mode 100644 index 000000000..a5d71288b --- /dev/null +++ b/Annex/Content/Direct.hs @@ -0,0 +1,259 @@ +{- git-annex file content managing for direct mode + - + - Copyright 2012-2013 Joey Hess <joey@kitenet.net> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Annex.Content.Direct ( + associatedFiles, + associatedFilesRelative, + removeAssociatedFile, + removeAssociatedFileUnchecked, + removeAssociatedFiles, + addAssociatedFile, + goodContent, + recordedInodeCache, + updateInodeCache, + addInodeCache, + writeInodeCache, + compareInodeCaches, + compareInodeCachesWith, + sameInodeCache, + elemInodeCaches, + sameFileStatus, + removeInodeCache, + toInodeCache, + inodesChanged, + createInodeSentinalFile, + addContentWhenNotPresent, +) where + +import Common.Annex +import qualified Annex +import Annex.Perms +import qualified Git +import Utility.Tmp +import Logs.Location +import Utility.InodeCache +import Utility.CopyFile +import Annex.ReplaceFile +import Annex.Link + +{- Absolute FilePaths of Files in the tree that are associated with a key. -} +associatedFiles :: Key -> Annex [FilePath] +associatedFiles key = do + files <- associatedFilesRelative key + top <- fromRepo Git.repoPath + return $ map (top </>) files + +{- List of files in the tree that are associated with a key, relative to + - the top of the repo. -} +associatedFilesRelative :: Key -> Annex [FilePath] +associatedFilesRelative key = do + mapping <- calcRepo $ gitAnnexMapping key + liftIO $ catchDefaultIO [] $ do + h <- openFile mapping ReadMode + fileEncoding h + lines <$> hGetContents h + +{- Changes the associated files information for a key, applying a + - transformation to the list. Returns new associatedFiles value. -} +changeAssociatedFiles :: Key -> ([FilePath] -> [FilePath]) -> Annex [FilePath] +changeAssociatedFiles key transform = do + mapping <- calcRepo $ gitAnnexMapping key + files <- associatedFilesRelative key + let files' = transform files + when (files /= files') $ do + modifyContent mapping $ + liftIO $ viaTmp write mapping $ unlines files' + top <- fromRepo Git.repoPath + return $ map (top </>) files' + where + write file content = do + h <- openFile file WriteMode + fileEncoding h + hPutStr h content + hClose h + +{- Removes the list of associated files. -} +removeAssociatedFiles :: Key -> Annex () +removeAssociatedFiles key = do + mapping <- calcRepo $ gitAnnexMapping key + modifyContent mapping $ + liftIO $ nukeFile mapping + +{- Removes an associated file. Returns new associatedFiles value. + - Checks if this was the last copy of the object, and updates location + - log. -} +removeAssociatedFile :: Key -> FilePath -> Annex [FilePath] +removeAssociatedFile key file = do + fs <- removeAssociatedFileUnchecked key file + when (null fs) $ + logStatus key InfoMissing + return fs + +{- Removes an associated file. Returns new associatedFiles value. -} +removeAssociatedFileUnchecked :: Key -> FilePath -> Annex [FilePath] +removeAssociatedFileUnchecked key file = do + file' <- normaliseAssociatedFile file + changeAssociatedFiles key $ filter (/= file') + +{- Adds an associated file. Returns new associatedFiles value. -} +addAssociatedFile :: Key -> FilePath -> Annex [FilePath] +addAssociatedFile key file = do + file' <- normaliseAssociatedFile file + changeAssociatedFiles key $ \files -> + if file' `elem` files + then files + else file':files + +{- Associated files are always stored relative to the top of the repository. + - The input FilePath is relative to the CWD, or is absolute. -} +normaliseAssociatedFile :: FilePath -> Annex FilePath +normaliseAssociatedFile file = do + top <- fromRepo Git.repoPath + liftIO $ relPathDirToFile top <$> absPath file + +{- Checks if a file in the tree, associated with a key, has not been modified. + - + - To avoid needing to fsck the file's content, which can involve an + - expensive checksum, this relies on a cache that contains the file's + - expected mtime and inode. + -} +goodContent :: Key -> FilePath -> Annex Bool +goodContent key file = sameInodeCache file =<< recordedInodeCache key + +{- Gets the recorded inode cache for a key. + - + - A key can be associated with multiple files, so may return more than + - one. -} +recordedInodeCache :: Key -> Annex [InodeCache] +recordedInodeCache key = withInodeCacheFile key $ \f -> + liftIO $ catchDefaultIO [] $ + mapMaybe readInodeCache . lines <$> readFileStrict f + +{- Caches an inode for a file. + - + - Anything else already cached is preserved. + -} +updateInodeCache :: Key -> FilePath -> Annex () +updateInodeCache key file = maybe noop (addInodeCache key) + =<< liftIO (genInodeCache file) + +{- Adds another inode to the cache for a key. -} +addInodeCache :: Key -> InodeCache -> Annex () +addInodeCache key cache = do + oldcaches <- recordedInodeCache key + unlessM (elemInodeCaches cache oldcaches) $ + writeInodeCache key (cache:oldcaches) + +{- Writes inode cache for a key. -} +writeInodeCache :: Key -> [InodeCache] -> Annex () +writeInodeCache key caches = withInodeCacheFile key $ \f -> + modifyContent f $ + liftIO $ writeFile f $ + unlines $ map showInodeCache caches + +{- Removes an inode cache. -} +removeInodeCache :: Key -> Annex () +removeInodeCache key = withInodeCacheFile key $ \f -> + modifyContent f $ + liftIO $ nukeFile f + +withInodeCacheFile :: Key -> (FilePath -> Annex a) -> Annex a +withInodeCacheFile key a = a =<< calcRepo (gitAnnexInodeCache key) + +{- Checks if a InodeCache matches the current version of a file. -} +sameInodeCache :: FilePath -> [InodeCache] -> Annex Bool +sameInodeCache _ [] = return False +sameInodeCache file old = go =<< liftIO (genInodeCache file) + where + go Nothing = return False + go (Just curr) = elemInodeCaches curr old + +{- Checks if a FileStatus matches the recorded InodeCache of a file. -} +sameFileStatus :: Key -> FileStatus -> Annex Bool +sameFileStatus key status = do + old <- recordedInodeCache key + let curr = toInodeCache status + case (old, curr) of + (_, Just c) -> elemInodeCaches c old + ([], Nothing) -> return True + _ -> return False + +{- If the inodes have changed, only the size and mtime are compared. -} +compareInodeCaches :: InodeCache -> InodeCache -> Annex Bool +compareInodeCaches x y + | compareStrong x y = return True + | otherwise = ifM inodesChanged + ( return $ compareWeak x y + , return False + ) + +elemInodeCaches :: InodeCache -> [InodeCache] -> Annex Bool +elemInodeCaches _ [] = return False +elemInodeCaches c (l:ls) = ifM (compareInodeCaches c l) + ( return True + , elemInodeCaches c ls + ) + +compareInodeCachesWith :: Annex InodeComparisonType +compareInodeCachesWith = ifM inodesChanged ( return Weakly, return Strongly ) + +{- Copies the contentfile to the associated file, if the associated + - file has no content. If the associated file does have content, + - even if the content differs, it's left unchanged. -} +addContentWhenNotPresent :: Key -> FilePath -> FilePath -> Annex () +addContentWhenNotPresent key contentfile associatedfile = do + v <- isAnnexLink associatedfile + when (Just key == v) $ + replaceFile associatedfile $ + liftIO . void . copyFileExternal contentfile + updateInodeCache key associatedfile + +{- Some filesystems get new inodes each time they are mounted. + - In order to work on such a filesystem, a sentinal file is used to detect + - when the inodes have changed. + - + - If the sentinal file does not exist, we have to assume that the + - inodes have changed. + -} +inodesChanged :: Annex Bool +inodesChanged = maybe calc return =<< Annex.getState Annex.inodeschanged + where + calc = do + scache <- liftIO . genInodeCache + =<< fromRepo gitAnnexInodeSentinal + scached <- readInodeSentinalFile + let changed = case (scache, scached) of + (Just c1, Just c2) -> not $ compareStrong c1 c2 + _ -> True + Annex.changeState $ \s -> s { Annex.inodeschanged = Just changed } + return changed + +readInodeSentinalFile :: Annex (Maybe InodeCache) +readInodeSentinalFile = do + sentinalcachefile <- fromRepo gitAnnexInodeSentinalCache + liftIO $ catchDefaultIO Nothing $ + readInodeCache <$> readFile sentinalcachefile + +writeInodeSentinalFile :: Annex () +writeInodeSentinalFile = do + sentinalfile <- fromRepo gitAnnexInodeSentinal + createAnnexDirectory (parentDir sentinalfile) + sentinalcachefile <- fromRepo gitAnnexInodeSentinalCache + liftIO $ writeFile sentinalfile "" + liftIO $ maybe noop (writeFile sentinalcachefile . showInodeCache) + =<< genInodeCache sentinalfile + +{- The sentinal file is only created when first initializing a repository. + - If there are any annexed objects in the repository already, creating + - the file would invalidate their inode caches. -} +createInodeSentinalFile :: Annex () +createInodeSentinalFile = + unlessM (alreadyexists <||> hasobjects) + writeInodeSentinalFile + where + alreadyexists = isJust <$> readInodeSentinalFile + hasobjects = liftIO . doesDirectoryExist =<< fromRepo gitAnnexObjectDir diff --git a/Annex/Direct.hs b/Annex/Direct.hs new file mode 100644 index 000000000..3fa5f9362 --- /dev/null +++ b/Annex/Direct.hs @@ -0,0 +1,306 @@ +{- git-annex direct mode + - + - Copyright 2012 Joey Hess <joey@kitenet.net> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Annex.Direct where + +import Common.Annex +import qualified Annex +import qualified Git +import qualified Git.LsFiles +import qualified Git.Merge +import qualified Git.DiffTree as DiffTree +import qualified Git.Config +import qualified Git.Ref +import qualified Git.Branch +import Git.Sha +import Git.FilePath +import Git.Types +import Config +import Annex.CatFile +import qualified Annex.Queue +import Logs.Location +import Backend +import Types.KeySource +import Annex.Content +import Annex.Content.Direct +import Annex.Link +import Utility.InodeCache +import Utility.CopyFile +import Annex.Perms +import Annex.ReplaceFile +import Annex.Exception + +{- Uses git ls-files to find files that need to be committed, and stages + - them into the index. Returns True if some changes were staged. -} +stageDirect :: Annex Bool +stageDirect = do + Annex.Queue.flush + top <- fromRepo Git.repoPath + (l, cleanup) <- inRepo $ Git.LsFiles.stagedOthersDetails [top] + forM_ l go + void $ liftIO cleanup + staged <- Annex.Queue.size + Annex.Queue.flush + return $ staged /= 0 + where + {- Determine what kind of modified or deleted file this is, as + - efficiently as we can, by getting any key that's associated + - with it in git, as well as its stat info. -} + go (file, Just sha, Just mode) = do + shakey <- catKey sha mode + mstat <- liftIO $ catchMaybeIO $ getSymbolicLinkStatus file + filekey <- isAnnexLink file + case (shakey, filekey, mstat, toInodeCache =<< mstat) of + (_, Just key, _, _) + | shakey == filekey -> noop + {- A changed symlink. -} + | otherwise -> stageannexlink file key + (Just key, _, _, Just cache) -> do + {- All direct mode files will show as + - modified, so compare the cache to see if + - it really was. -} + oldcache <- recordedInodeCache key + case oldcache of + [] -> modifiedannexed file key cache + _ -> unlessM (elemInodeCaches cache oldcache) $ + modifiedannexed file key cache + (Just key, _, Nothing, _) -> deletedannexed file key + (Nothing, _, Nothing, _) -> deletegit file + (_, _, Just _, _) -> addgit file + go _ = noop + + modifiedannexed file oldkey cache = do + void $ removeAssociatedFile oldkey file + void $ addDirect file cache + + deletedannexed file key = do + void $ removeAssociatedFile key file + deletegit file + + stageannexlink file key = do + l <- inRepo $ gitAnnexLink file key + stageSymlink file =<< hashSymlink l + void $ addAssociatedFile key file + + addgit file = Annex.Queue.addCommand "add" [Param "-f"] [file] + + deletegit file = Annex.Queue.addCommand "rm" [Param "-f"] [file] + +{- Adds a file to the annex in direct mode. Can fail, if the file is + - modified or deleted while it's being added. -} +addDirect :: FilePath -> InodeCache -> Annex Bool +addDirect file cache = do + showStart "add" file + let source = KeySource + { keyFilename = file + , contentLocation = file + , inodeCache = Just cache + } + got =<< genKey source =<< chooseBackend file + where + got Nothing = do + showEndFail + return False + got (Just (key, _)) = ifM (sameInodeCache file [cache]) + ( do + l <- inRepo $ gitAnnexLink file key + stageSymlink file =<< hashSymlink l + addInodeCache key cache + void $ addAssociatedFile key file + logStatus key InfoPresent + showEndOk + return True + , do + showEndFail + return False + ) + +{- In direct mode, git merge would usually refuse to do anything, since it + - sees present direct mode files as type changed files. To avoid this, + - merge is run with the work tree set to a temp directory. + - + - This should only be used once any changes to the real working tree have + - already been committed, because it overwrites files in the working tree. + -} +mergeDirect :: FilePath -> Git.Ref -> Git.Repo -> IO Bool +mergeDirect d branch g = do + whenM (doesDirectoryExist d) $ + removeDirectoryRecursive d + createDirectoryIfMissing True d + let g' = g { location = Local { gitdir = Git.localGitDir g, worktree = Just d } } + Git.Merge.mergeNonInteractive branch g' + +{- Cleans up after a direct mode merge. The merge must have been committed, + - and the commit sha passed in, along with the old sha of the tree + - before the merge. Uses git diff-tree to find files that changed between + - the two shas, and applies those changes to the work tree. + - + - There are really only two types of changes: An old item can be deleted, + - or a new item added. Two passes are made, first deleting and then + - adding. This is to handle cases where eg, a file is deleted and a + - directory is added. The diff-tree output may list these in the opposite + - order, but we cannot really add the directory until the file with the + - same name is remvoed. + -} +mergeDirectCleanup :: FilePath -> Git.Ref -> Git.Ref -> Annex () +mergeDirectCleanup d oldsha newsha = do + (items, cleanup) <- inRepo $ DiffTree.diffTreeRecursive oldsha newsha + makeabs <- flip fromTopFilePath <$> gitRepo + let fsitems = zip (map (makeabs . DiffTree.file) items) items + forM_ fsitems $ + go DiffTree.srcsha DiffTree.srcmode moveout moveout_raw + forM_ fsitems $ + go DiffTree.dstsha DiffTree.dstmode movein movein_raw + void $ liftIO cleanup + liftIO $ removeDirectoryRecursive d + where + go getsha getmode a araw (f, item) + | getsha item == nullSha = noop + | otherwise = void $ + tryAnnex . maybe (araw f) (\k -> void $ a k f) + =<< catKey (getsha item) (getmode item) + + moveout k f = removeDirect k f + + {- Files deleted by the merge are removed from the work tree. + - Empty work tree directories are removed, per git behavior. -} + moveout_raw f = liftIO $ do + nukeFile f + void $ tryIO $ removeDirectory $ parentDir f + + {- If the file is already present, with the right content for the + - key, it's left alone. Otherwise, create the symlink and then + - if possible, replace it with the content. -} + movein k f = unlessM (goodContent k f) $ do + l <- inRepo $ gitAnnexLink f k + replaceFile f $ makeAnnexLink l + toDirect k f + + {- Any new, modified, or renamed files were written to the temp + - directory by the merge, and are moved to the real work tree. -} + movein_raw f = liftIO $ do + createDirectoryIfMissing True $ parentDir f + void $ tryIO $ rename (d </> f) f + +{- If possible, converts a symlink in the working tree into a direct + - mode file. If the content is not available, leaves the symlink + - unchanged. -} +toDirect :: Key -> FilePath -> Annex () +toDirect k f = fromMaybe noop =<< toDirectGen k f + +toDirectGen :: Key -> FilePath -> Annex (Maybe (Annex ())) +toDirectGen k f = do + loc <- calcRepo $ gitAnnexLocation k + ifM (liftIO $ doesFileExist loc) + ( return $ Just $ fromindirect loc + , do + {- Copy content from another direct file. -} + absf <- liftIO $ absPath f + dlocs <- filterM (goodContent k) =<< + filterM (\l -> isNothing <$> getAnnexLinkTarget l) =<< + (filter (/= absf) <$> addAssociatedFile k f) + case dlocs of + [] -> return Nothing + (dloc:_) -> return $ Just $ fromdirect dloc + ) + where + fromindirect loc = do + {- Move content from annex to direct file. -} + updateInodeCache k loc + void $ addAssociatedFile k f + modifyContent loc $ do + thawContent loc + replaceFile f $ liftIO . moveFile loc + fromdirect loc = do + replaceFile f $ + liftIO . void . copyFileExternal loc + updateInodeCache k f + +{- Removes a direct mode file, while retaining its content in the annex + - (unless its content has already been changed). -} +removeDirect :: Key -> FilePath -> Annex () +removeDirect k f = do + void $ removeAssociatedFileUnchecked k f + unlessM (inAnnex k) $ + ifM (goodContent k f) + ( moveAnnex k f + , logStatus k InfoMissing + ) + liftIO $ do + nukeFile f + void $ tryIO $ removeDirectory $ parentDir f + +{- Called when a direct mode file has been changed. Its old content may be + - lost. -} +changedDirect :: Key -> FilePath -> Annex () +changedDirect oldk f = do + locs <- removeAssociatedFile oldk f + whenM (pure (null locs) <&&> not <$> inAnnex oldk) $ + logStatus oldk InfoMissing + +{- Enable/disable direct mode. -} +setDirect :: Bool -> Annex () +setDirect wantdirect = do + if wantdirect + then do + switchHEAD + setbare + else do + setbare + switchHEADBack + setConfig (annexConfig "direct") val + Annex.changeGitConfig $ \c -> c { annexDirect = wantdirect } + where + val = Git.Config.boolConfig wantdirect + setbare = setConfig (ConfigKey Git.Config.coreBare) val + +{- Since direct mode sets core.bare=true, incoming pushes could change + - the currently checked out branch. To avoid this problem, HEAD + - is changed to a internal ref that nothing is going to push to. + - + - For refs/heads/master, use refs/heads/annex/direct/master; + - this way things that show HEAD (eg shell prompts) will + - hopefully show just "master". -} +directBranch :: Ref -> Ref +directBranch orighead = case split "/" $ show orighead of + ("refs":"heads":"annex":"direct":_) -> orighead + ("refs":"heads":rest) -> + Ref $ "refs/heads/annex/direct/" ++ intercalate "/" rest + _ -> Ref $ "refs/heads/" ++ show (Git.Ref.base orighead) + +{- Converts a directBranch back to the original branch. + - + - Any other ref is left unchanged. + -} +fromDirectBranch :: Ref -> Ref +fromDirectBranch directhead = case split "/" $ show directhead of + ("refs":"heads":"annex":"direct":rest) -> + Ref $ "refs/heads/" ++ intercalate "/" rest + _ -> directhead + +switchHEAD :: Annex () +switchHEAD = maybe noop switch =<< inRepo Git.Branch.currentUnsafe + where + switch orighead = do + let newhead = directBranch orighead + maybe noop (inRepo . Git.Branch.update newhead) + =<< inRepo (Git.Ref.sha orighead) + inRepo $ Git.Branch.checkout newhead + +switchHEADBack :: Annex () +switchHEADBack = maybe noop switch =<< inRepo Git.Branch.currentUnsafe + where + switch currhead = do + let orighead = fromDirectBranch currhead + v <- inRepo $ Git.Ref.sha currhead + case v of + Just headsha + | orighead /= currhead -> do + inRepo $ Git.Branch.update orighead headsha + inRepo $ Git.Branch.checkout orighead + inRepo $ Git.Branch.delete currhead + _ -> inRepo $ Git.Branch.checkout orighead diff --git a/Annex/Direct/Fixup.hs b/Annex/Direct/Fixup.hs new file mode 100644 index 000000000..13485242a --- /dev/null +++ b/Annex/Direct/Fixup.hs @@ -0,0 +1,31 @@ +{- git-annex direct mode guard fixup + - + - Copyright 2013 Joey Hess <joey@kitenet.net> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Annex.Direct.Fixup where + +import Git.Types +import Git.Config +import qualified Git.Construct as Construct +import Utility.Path +import Utility.SafeCommand + +{- Direct mode repos have core.bare=true, but are not really bare. + - Fix up the Repo to be a non-bare repo, and arrange for git commands + - run by git-annex to be passed parameters that override this setting. -} +fixupDirect :: Repo -> IO Repo +fixupDirect r@(Repo { location = l@(Local { gitdir = d, worktree = Nothing }) }) = do + let r' = r + { location = l { worktree = Just (parentDir d) } + , gitGlobalOpts = gitGlobalOpts r ++ + [ Param "-c" + , Param $ coreBare ++ "=" ++ boolConfig False + ] + } + -- Recalc now that the worktree is correct. + rs' <- Construct.fromRemotes r' + return $ r' { remotes = rs' } +fixupDirect r = return r diff --git a/Annex/Environment.hs b/Annex/Environment.hs new file mode 100644 index 000000000..f22c5f2d4 --- /dev/null +++ b/Annex/Environment.hs @@ -0,0 +1,65 @@ +{- git-annex environment + - + - Copyright 2012, 2013 Joey Hess <joey@kitenet.net> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +{-# LANGUAGE CPP #-} + +module Annex.Environment where + +import Common.Annex +import Utility.UserInfo +import qualified Git.Config +import Config +import Annex.Exception + +#ifndef mingw32_HOST_OS +import Utility.Env +#endif + +{- Checks that the system's environment allows git to function. + - Git requires a GECOS username, or suitable git configuration, or + - environment variables. + - + - Git also requires the system have a hostname containing a dot. + - Otherwise, it tries various methods to find a FQDN, and will fail if it + - does not. To avoid replicating that code here, which would break if its + - methods change, this function does not check the hostname is valid. + - Instead, code that commits can use ensureCommit. + -} +checkEnvironment :: Annex () +checkEnvironment = do + gitusername <- fromRepo $ Git.Config.getMaybe "user.name" + when (isNothing gitusername || gitusername == Just "") $ + liftIO checkEnvironmentIO + +checkEnvironmentIO :: IO () +checkEnvironmentIO = +#ifdef mingw32_HOST_OS + noop +#else + whenM (null <$> myUserGecos) $ do + username <- myUserName + ensureEnv "GIT_AUTHOR_NAME" username + ensureEnv "GIT_COMMITTER_NAME" username + where +#ifndef __ANDROID__ + -- existing environment is not overwritten + ensureEnv var val = void $ setEnv var val False +#else + -- Environment setting is broken on Android, so this is dealt with + -- in runshell instead. + ensureEnv _ _ = noop +#endif +#endif + +{- Runs an action that commits to the repository, and if it fails, + - sets user.email to a dummy value and tries the action again. -} +ensureCommit :: Annex a -> Annex a +ensureCommit a = either retry return =<< tryAnnex a + where + retry _ = do + setConfig (ConfigKey "user.email") =<< liftIO myUserName + a diff --git a/Annex/Exception.hs b/Annex/Exception.hs new file mode 100644 index 000000000..91347583e --- /dev/null +++ b/Annex/Exception.hs @@ -0,0 +1,46 @@ +{- exception handling in the git-annex monad + - + - Note that when an Annex action fails and the exception is handled + - by these functions, any changes the action has made to the + - AnnexState are retained. This works because the Annex monad + - internally stores the AnnexState in a MVar. + - + - Copyright 2011-2013 Joey Hess <joey@kitenet.net> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +{-# LANGUAGE PackageImports #-} + +module Annex.Exception ( + bracketIO, + tryAnnex, + tryAnnexIO, + throwAnnex, + catchAnnex, +) where + +import qualified "MonadCatchIO-transformers" Control.Monad.CatchIO as M +import Control.Exception + +import Common.Annex + +{- Runs an Annex action, with setup and cleanup both in the IO monad. -} +bracketIO :: IO v -> (v -> IO b) -> (v -> Annex a) -> Annex a +bracketIO setup cleanup = M.bracket (liftIO setup) (liftIO . cleanup) + +{- try in the Annex monad -} +tryAnnex :: Annex a -> Annex (Either SomeException a) +tryAnnex = M.try + +{- try in the Annex monad, but only catching IO exceptions -} +tryAnnexIO :: Annex a -> Annex (Either IOException a) +tryAnnexIO = M.try + +{- throw in the Annex monad -} +throwAnnex :: Exception e => e -> Annex a +throwAnnex = M.throw + +{- catch in the Annex monad -} +catchAnnex :: Exception e => Annex a -> (e -> Annex a) -> Annex a +catchAnnex = M.catch diff --git a/Annex/FileMatcher.hs b/Annex/FileMatcher.hs new file mode 100644 index 000000000..cded857a2 --- /dev/null +++ b/Annex/FileMatcher.hs @@ -0,0 +1,102 @@ +{- git-annex file matching + - + - Copyright 2012, 2013 Joey Hess <joey@kitenet.net> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Annex.FileMatcher where + +import qualified Data.Map as M + +import Common.Annex +import Limit +import Utility.Matcher +import Types.Group +import Types.Limit +import Logs.Group +import Logs.Remote +import Annex.UUID +import qualified Annex +import Types.FileMatcher +import Git.FilePath +import Types.Remote (RemoteConfig) + +import Data.Either +import qualified Data.Set as S + +type FileMatcher = Matcher MatchFiles + +checkFileMatcher :: FileMatcher -> FilePath -> Annex Bool +checkFileMatcher matcher file = checkFileMatcher' matcher file S.empty True + +checkFileMatcher' :: FileMatcher -> FilePath -> AssumeNotPresent -> Bool -> Annex Bool +checkFileMatcher' matcher file notpresent def + | isEmpty matcher = return def + | otherwise = do + matchfile <- getTopFilePath <$> inRepo (toTopFilePath file) + let fi = FileInfo + { matchFile = matchfile + , relFile = file + } + matchMrun matcher $ \a -> a notpresent fi + +matchAll :: FileMatcher +matchAll = generate [] + +parsedToMatcher :: [Either String (Token MatchFiles)] -> Either String FileMatcher +parsedToMatcher parsed = case partitionEithers parsed of + ([], vs) -> Right $ generate vs + (es, _) -> Left $ unwords $ map ("Parse failure: " ++) es + +exprParser :: GroupMap -> M.Map UUID RemoteConfig -> Maybe UUID -> String -> [Either String (Token MatchFiles)] +exprParser groupmap configmap mu expr = + map parse $ tokenizeMatcher expr + where + parse = parseToken + (limitPresent mu) + (limitInDir preferreddir) + groupmap + preferreddir = fromMaybe "public" $ + M.lookup "preferreddir" =<< (`M.lookup` configmap) =<< mu + +parseToken :: MkLimit -> MkLimit -> GroupMap -> String -> Either String (Token MatchFiles) +parseToken checkpresent checkpreferreddir groupmap t + | t `elem` tokens = Right $ token t + | t == "present" = use checkpresent + | t == "inpreferreddir" = use checkpreferreddir + | otherwise = maybe (Left $ "near " ++ show t) use $ M.lookup k $ + M.fromList + [ ("include", limitInclude) + , ("exclude", limitExclude) + , ("copies", limitCopies) + , ("inbackend", limitInBackend) + , ("largerthan", limitSize (>)) + , ("smallerthan", limitSize (<)) + , ("inallgroup", limitInAllGroup groupmap) + ] + where + (k, v) = separate (== '=') t + use a = Operation <$> a v + +{- This is really dumb tokenization; there's no support for quoted values. + - Open and close parens are always treated as standalone tokens; + - otherwise tokens must be separated by whitespace. -} +tokenizeMatcher :: String -> [String] +tokenizeMatcher = filter (not . null ) . concatMap splitparens . words + where + splitparens = segmentDelim (`elem` "()") + +{- Generates a matcher for files large enough (or meeting other criteria) + - to be added to the annex, rather than directly to git. -} +largeFilesMatcher :: Annex FileMatcher +largeFilesMatcher = go =<< annexLargeFiles <$> Annex.getGitConfig + where + go Nothing = return matchAll + go (Just expr) = do + gm <- groupMap + rc <- readRemoteLog + u <- getUUID + either badexpr return $ + parsedToMatcher $ exprParser gm rc (Just u) expr + badexpr e = error $ "bad annex.largefiles configuration: " ++ e diff --git a/Annex/Hook.hs b/Annex/Hook.hs new file mode 100644 index 000000000..7301a0958 --- /dev/null +++ b/Annex/Hook.hs @@ -0,0 +1,42 @@ +{- git-annex git hooks + - + - Note that it's important that the scripts not change, otherwise + - removing old hooks using an old version of the script would fail. + - + - Copyright 2013 Joey Hess <joey@kitenet.net> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Annex.Hook where + +import Common.Annex +import qualified Git.Hook as Git +import Utility.Shell +import Config + +preCommitHook :: Git.Hook +preCommitHook = Git.Hook "pre-commit" (mkHookScript "git annex pre-commit .") + +mkHookScript :: String -> String +mkHookScript s = unlines + [ shebang_local + , "# automatically configured by git-annex" + , s + ] + +hookWrite :: Git.Hook -> Annex () +hookWrite h = + -- cannot have git hooks in a crippled filesystem (no execute bit) + unlessM crippledFileSystem $ + unlessM (inRepo $ Git.hookWrite h) $ + hookWarning h "already exists, not configuring" + +hookUnWrite :: Git.Hook -> Annex () +hookUnWrite h = unlessM (inRepo $ Git.hookUnWrite h) $ + hookWarning h "contents modified; not deleting. Edit it to remove call to git annex." + +hookWarning :: Git.Hook -> String -> Annex () +hookWarning h msg = do + r <- gitRepo + warning $ Git.hookName h ++ " hook (" ++ Git.hookFile h r ++ ") " ++ msg diff --git a/Annex/Journal.hs b/Annex/Journal.hs new file mode 100644 index 000000000..8b88ab2fb --- /dev/null +++ b/Annex/Journal.hs @@ -0,0 +1,128 @@ +{- management of the git-annex journal + - + - The journal is used to queue up changes before they are committed to the + - git-annex branch. Among other things, it ensures that if git-annex is + - interrupted, its recorded data is not lost. + - + - Copyright 2011-2013 Joey Hess <joey@kitenet.net> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +{-# LANGUAGE CPP #-} + +module Annex.Journal where + +import System.IO.Binary + +import Common.Annex +import Annex.Exception +import qualified Git +import Annex.Perms + +{- Records content for a file in the branch to the journal. + - + - Using the journal, rather than immediatly staging content to the index + - avoids git needing to rewrite the index after every change. + - + - The file in the journal is updated atomically, which allows + - getJournalFileStale to always return a consistent journal file + - content, although possibly not the most current one. + -} +setJournalFile :: JournalLocked -> FilePath -> String -> Annex () +setJournalFile _jl file content = do + createAnnexDirectory =<< fromRepo gitAnnexJournalDir + createAnnexDirectory =<< fromRepo gitAnnexTmpDir + -- journal file is written atomically + jfile <- fromRepo $ journalFile file + tmp <- fromRepo gitAnnexTmpDir + let tmpfile = tmp </> takeFileName jfile + liftIO $ do + writeBinaryFile tmpfile content + moveFile tmpfile jfile + +{- Gets any journalled content for a file in the branch. -} +getJournalFile :: JournalLocked -> FilePath -> Annex (Maybe String) +getJournalFile _jl = getJournalFileStale + +{- Without locking, this is not guaranteed to be the most recent + - version of the file in the journal, so should not be used as a basis for + - changes. -} +getJournalFileStale :: FilePath -> Annex (Maybe String) +getJournalFileStale file = inRepo $ \g -> catchMaybeIO $ + readFileStrict $ journalFile file g + +{- List of files that have updated content in the journal. -} +getJournalledFiles :: JournalLocked -> Annex [FilePath] +getJournalledFiles jl = map fileJournal <$> getJournalFiles jl + +getJournalledFilesStale :: Annex [FilePath] +getJournalledFilesStale = map fileJournal <$> getJournalFilesStale + +{- List of existing journal files. -} +getJournalFiles :: JournalLocked -> Annex [FilePath] +getJournalFiles _jl = getJournalFilesStale + +{- List of existing journal files, but without locking, may miss new ones + - just being added, or may have false positives if the journal is staged + - as it is run. -} +getJournalFilesStale :: Annex [FilePath] +getJournalFilesStale = do + g <- gitRepo + fs <- liftIO $ catchDefaultIO [] $ + getDirectoryContents $ gitAnnexJournalDir g + return $ filter (`notElem` [".", ".."]) fs + +{- Checks if there are changes in the journal. -} +journalDirty :: Annex Bool +journalDirty = not . null <$> getJournalFilesStale + +{- Produces a filename to use in the journal for a file on the branch. + - + - The journal typically won't have a lot of files in it, so the hashing + - used in the branch is not necessary, and all the files are put directly + - in the journal directory. + -} +journalFile :: FilePath -> Git.Repo -> FilePath +journalFile file repo = gitAnnexJournalDir repo </> concatMap mangle file + where + mangle c + | c == pathSeparator = "_" + | c == '_' = "__" + | otherwise = [c] + +{- Converts a journal file (relative to the journal dir) back to the + - filename on the branch. -} +fileJournal :: FilePath -> FilePath +fileJournal = replace [pathSeparator, pathSeparator] "_" . + replace "_" [pathSeparator] + +{- Sentinal value, only produced by lockJournal; required + - as a parameter by things that need to ensure the journal is + - locked. -} +data JournalLocked = ProduceJournalLocked + +{- Runs an action that modifies the journal, using locking to avoid + - contention with other git-annex processes. -} +lockJournal :: (JournalLocked -> Annex a) -> Annex a +lockJournal a = do + lockfile <- fromRepo gitAnnexJournalLock + createAnnexDirectory $ takeDirectory lockfile + mode <- annexFileMode + bracketIO (lock lockfile mode) unlock (const $ a ProduceJournalLocked) + where +#ifndef mingw32_HOST_OS + lock lockfile mode = do + l <- noUmask mode $ createFile lockfile mode + waitToSetLock l (WriteLock, AbsoluteSeek, 0, 0) + return l +#else + lock lockfile _mode = do + writeFile lockfile "" + return lockfile +#endif +#ifndef mingw32_HOST_OS + unlock = closeFd +#else + unlock = removeFile +#endif diff --git a/Annex/Link.hs b/Annex/Link.hs new file mode 100644 index 000000000..30d8c2ae8 --- /dev/null +++ b/Annex/Link.hs @@ -0,0 +1,105 @@ +{- git-annex links to content + - + - On file systems that support them, symlinks are used. + - + - On other filesystems, git instead stores the symlink target in a regular + - file. + - + - Copyright 2013 Joey Hess <joey@kitenet.net> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Annex.Link where + +import Common.Annex +import qualified Annex +import qualified Git.HashObject +import qualified Git.UpdateIndex +import qualified Annex.Queue +import Git.Types +import Git.FilePath + +type LinkTarget = String + +{- Checks if a file is a link to a key. -} +isAnnexLink :: FilePath -> Annex (Maybe Key) +isAnnexLink file = maybe Nothing (fileKey . takeFileName) <$> getAnnexLinkTarget file + +{- Gets the link target of a symlink. + - + - On a filesystem that does not support symlinks, fall back to getting the + - link target by looking inside the file. + - + - Returns Nothing if the file is not a symlink, or not a link to annex + - content. + -} +getAnnexLinkTarget :: FilePath -> Annex (Maybe LinkTarget) +getAnnexLinkTarget file = ifM (coreSymlinks <$> Annex.getGitConfig) + ( check readSymbolicLink $ + return Nothing + , check readSymbolicLink $ + check probefilecontent $ + return Nothing + ) + where + check getlinktarget fallback = do + v <- liftIO $ catchMaybeIO $ getlinktarget file + case v of + Just l + | isLinkToAnnex (fromInternalGitPath l) -> return v + | otherwise -> return Nothing + Nothing -> fallback + + probefilecontent f = do + h <- openFile f ReadMode + fileEncoding h + -- The first 8k is more than enough to read; link + -- files are small. + s <- take 8192 <$> hGetContents h + -- If we got the full 8k, the file is too large + if length s == 8192 + then do + hClose h + return "" + else do + hClose h + -- If there are any NUL or newline + -- characters, or whitespace, we + -- certianly don't have a link to a + -- git-annex key. + return $ if any (`elem` s) "\0\n\r \t" + then "" + else s + +{- Creates a link on disk. + - + - On a filesystem that does not support symlinks, writes the link target + - to a file. Note that git will only treat the file as a symlink if + - it's staged as such, so use addAnnexLink when adding a new file or + - modified link to git. + -} +makeAnnexLink :: LinkTarget -> FilePath -> Annex () +makeAnnexLink linktarget file = ifM (coreSymlinks <$> Annex.getGitConfig) + ( liftIO $ do + void $ tryIO $ removeFile file + createSymbolicLink linktarget file + , liftIO $ writeFile file linktarget + ) + +{- Creates a link on disk, and additionally stages it in git. -} +addAnnexLink :: LinkTarget -> FilePath -> Annex () +addAnnexLink linktarget file = do + makeAnnexLink linktarget file + stageSymlink file =<< hashSymlink linktarget + +{- Injects a symlink target into git, returning its Sha. -} +hashSymlink :: LinkTarget -> Annex Sha +hashSymlink linktarget = inRepo $ Git.HashObject.hashObject BlobObject $ + toInternalGitPath linktarget + +{- Stages a symlink to the annex, using a Sha of its target. -} +stageSymlink :: FilePath -> Sha -> Annex () +stageSymlink file sha = + Annex.Queue.addUpdateIndex =<< + inRepo (Git.UpdateIndex.stageSymlink file sha) diff --git a/Annex/LockPool.hs b/Annex/LockPool.hs new file mode 100644 index 000000000..a9a0f3101 --- /dev/null +++ b/Annex/LockPool.hs @@ -0,0 +1,56 @@ +{- git-annex lock pool + - + - Copyright 2012 Joey Hess <joey@kitenet.net> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +{-# LANGUAGE CPP #-} + +module Annex.LockPool where + +import qualified Data.Map as M +import System.Posix.Types (Fd) + +import Common.Annex +import Annex +#ifndef mingw32_HOST_OS +import Annex.Perms +#endif + +{- Create a specified lock file, and takes a shared lock. -} +lockFile :: FilePath -> Annex () +lockFile file = go =<< fromPool file + where + go (Just _) = noop -- already locked + go Nothing = do +#ifndef mingw32_HOST_OS + mode <- annexFileMode + fd <- liftIO $ noUmask mode $ + openFd file ReadOnly (Just mode) defaultFileFlags + liftIO $ waitToSetLock fd (ReadLock, AbsoluteSeek, 0, 0) +#else + liftIO $ writeFile file "" + let fd = 0 +#endif + changePool $ M.insert file fd + +unlockFile :: FilePath -> Annex () +unlockFile file = maybe noop go =<< fromPool file + where + go fd = do +#ifndef mingw32_HOST_OS + liftIO $ closeFd fd +#endif + changePool $ M.delete file + +getPool :: Annex (M.Map FilePath Fd) +getPool = getState lockpool + +fromPool :: FilePath -> Annex (Maybe Fd) +fromPool file = M.lookup file <$> getPool + +changePool :: (M.Map FilePath Fd -> M.Map FilePath Fd) -> Annex () +changePool a = do + m <- getPool + changeState $ \s -> s { lockpool = a m } diff --git a/Annex/Path.hs b/Annex/Path.hs new file mode 100644 index 000000000..a8c4907b2 --- /dev/null +++ b/Annex/Path.hs @@ -0,0 +1,34 @@ +{- git-annex program path + - + - Copyright 2013 Joey Hess <joey@kitenet.net> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +{-# LANGUAGE CPP #-} + +module Annex.Path where + +import Common +import Config.Files +import System.Environment + +{- A fully qualified path to the currently running git-annex program. + - + - getExecutablePath is available since ghc 7.4.2. On OSs it supports + - well, it returns the complete path to the program. But, on other OSs, + - it might return just the basename. + -} +programPath :: IO (Maybe FilePath) +programPath = do +#if MIN_VERSION_base(4,6,0) + exe <- getExecutablePath + p <- if isAbsolute exe + then return exe + else readProgramFile +#else + p <- readProgramFile +#endif + -- In case readProgramFile returned just the command name, + -- fall back to finding it in PATH. + searchPath p diff --git a/Annex/Perms.hs b/Annex/Perms.hs new file mode 100644 index 000000000..e3a2fa65a --- /dev/null +++ b/Annex/Perms.hs @@ -0,0 +1,125 @@ +{- git-annex file permissions + - + - Copyright 2012 Joey Hess <joey@kitenet.net> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Annex.Perms ( + setAnnexFilePerm, + setAnnexDirPerm, + annexFileMode, + createAnnexDirectory, + noUmask, + createContentDir, + freezeContentDir, + thawContentDir, + modifyContent, +) where + +import Common.Annex +import Utility.FileMode +import Git.SharedRepository +import qualified Annex +import Annex.Exception +import Config + +import System.Posix.Types + +withShared :: (SharedRepository -> Annex a) -> Annex a +withShared a = maybe startup a =<< Annex.getState Annex.shared + where + startup = do + shared <- fromRepo getSharedRepository + Annex.changeState $ \s -> s { Annex.shared = Just shared } + a shared + +setAnnexFilePerm :: FilePath -> Annex () +setAnnexFilePerm = setAnnexPerm False + +setAnnexDirPerm :: FilePath -> Annex () +setAnnexDirPerm = setAnnexPerm True + +{- Sets appropriate file mode for a file or directory in the annex, + - other than the content files and content directory. Normally, + - use the default mode, but with core.sharedRepository set, + - allow the group to write, etc. -} +setAnnexPerm :: Bool -> FilePath -> Annex () +setAnnexPerm isdir file = unlessM crippledFileSystem $ + withShared $ liftIO . go + where + go GroupShared = modifyFileMode file $ addModes $ + groupSharedModes ++ + if isdir then [ ownerExecuteMode, groupExecuteMode ] else [] + go AllShared = modifyFileMode file $ addModes $ + readModes ++ + [ ownerWriteMode, groupWriteMode ] ++ + if isdir then executeModes else [] + go _ = noop + +{- Gets the appropriate mode to use for creating a file in the annex + - (other than content files, which are locked down more). -} +annexFileMode :: Annex FileMode +annexFileMode = withShared $ return . go + where + go GroupShared = sharedmode + go AllShared = combineModes (sharedmode:readModes) + go _ = stdFileMode + sharedmode = combineModes groupSharedModes + +{- Creates a directory inside the gitAnnexDir, including any parent + - directories. Makes directories with appropriate permissions. -} +createAnnexDirectory :: FilePath -> Annex () +createAnnexDirectory dir = traverse dir [] =<< top + where + top = parentDir <$> fromRepo gitAnnexDir + traverse d below stop + | d `equalFilePath` stop = done + | otherwise = ifM (liftIO $ doesDirectoryExist d) + ( done + , traverse (parentDir d) (d:below) stop + ) + where + done = forM_ below $ \p -> do + liftIO $ createDirectoryIfMissing True p + setAnnexDirPerm p + +{- Blocks writing to the directory an annexed file is in, to prevent the + - file accidentially being deleted. However, if core.sharedRepository + - is set, this is not done, since the group must be allowed to delete the + - file. + -} +freezeContentDir :: FilePath -> Annex () +freezeContentDir file = unlessM crippledFileSystem $ + liftIO . go =<< fromRepo getSharedRepository + where + dir = parentDir file + go GroupShared = groupWriteRead dir + go AllShared = groupWriteRead dir + go _ = preventWrite dir + +thawContentDir :: FilePath -> Annex () +thawContentDir file = unlessM crippledFileSystem $ + liftIO $ allowWrite $ parentDir file + +{- Makes the directory tree to store an annexed file's content, + - with appropriate permissions on each level. -} +createContentDir :: FilePath -> Annex () +createContentDir dest = do + unlessM (liftIO $ doesDirectoryExist dir) $ + createAnnexDirectory dir + -- might have already existed with restricted perms + unlessM crippledFileSystem $ + liftIO $ allowWrite dir + where + dir = parentDir dest + +{- Creates the content directory for a file if it doesn't already exist, + - or thaws it if it does, then runs an action to modify the file, and + - finally, freezes the content directory. -} +modifyContent :: FilePath -> Annex a -> Annex a +modifyContent f a = do + createContentDir f -- also thaws it + v <- tryAnnex a + freezeContentDir f + either throwAnnex return v diff --git a/Annex/Queue.hs b/Annex/Queue.hs new file mode 100644 index 000000000..a5ef60037 --- /dev/null +++ b/Annex/Queue.hs @@ -0,0 +1,62 @@ +{- git-annex command queue + - + - Copyright 2011, 2012 Joey Hess <joey@kitenet.net> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Annex.Queue ( + addCommand, + addUpdateIndex, + flush, + flushWhenFull, + size +) where + +import Common.Annex +import Annex hiding (new) +import qualified Git.Queue +import qualified Git.UpdateIndex + +{- Adds a git command to the queue. -} +addCommand :: String -> [CommandParam] -> [FilePath] -> Annex () +addCommand command params files = do + q <- get + store <=< inRepo $ Git.Queue.addCommand command params files q + +{- Adds an update-index stream to the queue. -} +addUpdateIndex :: Git.UpdateIndex.Streamer -> Annex () +addUpdateIndex streamer = do + q <- get + store <=< inRepo $ Git.Queue.addUpdateIndex streamer q + +{- Runs the queue if it is full. Should be called periodically. -} +flushWhenFull :: Annex () +flushWhenFull = do + q <- get + when (Git.Queue.full q) flush + +{- Runs (and empties) the queue. -} +flush :: Annex () +flush = do + q <- get + unless (0 == Git.Queue.size q) $ do + showStoringStateAction + q' <- inRepo $ Git.Queue.flush q + store q' + +{- Gets the size of the queue. -} +size :: Annex Int +size = Git.Queue.size <$> get + +get :: Annex Git.Queue.Queue +get = maybe new return =<< getState repoqueue + +new :: Annex Git.Queue.Queue +new = do + q <- Git.Queue.new . annexQueueSize <$> getGitConfig + store q + return q + +store :: Git.Queue.Queue -> Annex () +store q = changeState $ \s -> s { repoqueue = Just q } diff --git a/Annex/Quvi.hs b/Annex/Quvi.hs new file mode 100644 index 000000000..b0725bae7 --- /dev/null +++ b/Annex/Quvi.hs @@ -0,0 +1,20 @@ +{- quvi options for git-annex + - + - Copyright 2013 Joey Hess <joey@kitenet.net> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +{-# LANGUAGE Rank2Types #-} + +module Annex.Quvi where + +import Common.Annex +import qualified Annex +import Utility.Quvi +import Utility.Url + +withQuviOptions :: forall a. Query a -> [CommandParam] -> URLString -> Annex a +withQuviOptions a ps url = do + opts <- map Param . annexQuviOptions <$> Annex.getGitConfig + liftIO $ a (ps++opts) url diff --git a/Annex/ReplaceFile.hs b/Annex/ReplaceFile.hs new file mode 100644 index 000000000..dd93b471c --- /dev/null +++ b/Annex/ReplaceFile.hs @@ -0,0 +1,39 @@ +{- git-annex file replacing + - + - Copyright 2013 Joey Hess <joey@kitenet.net> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Annex.ReplaceFile where + +import Common.Annex +import Annex.Perms +import Annex.Exception + +{- Replaces a possibly already existing file with a new version, + - atomically, by running an action. + - + - The action is passed a temp file, which it can write to, and once + - done the temp file is moved into place. + - + - The action can throw an IO exception, in which case the temp file + - will be deleted, and the existing file will be preserved. + - + - Throws an IO exception when it was unable to replace the file. + -} +replaceFile :: FilePath -> (FilePath -> Annex ()) -> Annex () +replaceFile file a = do + tmpdir <- fromRepo gitAnnexTmpDir + void $ createAnnexDirectory tmpdir + bracketIO (setup tmpdir) nukeFile $ \tmpfile -> do + a tmpfile + liftIO $ catchIO (rename tmpfile file) (fallback tmpfile) + where + setup tmpdir = do + (tmpfile, h) <- openTempFileWithDefaultPermissions tmpdir "tmp" + hClose h + return tmpfile + fallback tmpfile _ = do + createDirectoryIfMissing True $ parentDir file + rename tmpfile file diff --git a/Annex/Ssh.hs b/Annex/Ssh.hs new file mode 100644 index 000000000..8553ee797 --- /dev/null +++ b/Annex/Ssh.hs @@ -0,0 +1,198 @@ +{- git-annex ssh interface, with connection caching + - + - Copyright 2012,2013 Joey Hess <joey@kitenet.net> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +{-# LANGUAGE CPP #-} + +module Annex.Ssh ( + sshCachingOptions, + sshCleanup, + sshCacheDir, + sshReadPort, +) where + +import qualified Data.Map as M +import Data.Hash.MD5 +import System.Process (cwd) + +import Common.Annex +import Annex.LockPool +import qualified Build.SysConfig as SysConfig +import qualified Annex +import Config +import Utility.Env +#ifndef mingw32_HOST_OS +import Annex.Perms +#endif + +{- Generates parameters to ssh to a given host (or user@host) on a given + - port, with connection caching. -} +sshCachingOptions :: (String, Maybe Integer) -> [CommandParam] -> Annex [CommandParam] +sshCachingOptions (host, port) opts = go =<< sshInfo (host, port) + where + go (Nothing, params) = ret params + go (Just socketfile, params) = do + cleanstale + liftIO $ createDirectoryIfMissing True $ parentDir socketfile + lockFile $ socket2lock socketfile + ret params + ret ps = return $ ps ++ opts ++ portParams port ++ [Param "-T"] + -- If the lock pool is empty, this is the first ssh of this + -- run. There could be stale ssh connections hanging around + -- from a previous git-annex run that was interrupted. + cleanstale = whenM (not . any isLock . M.keys <$> getPool) + sshCleanup + +{- Returns a filename to use for a ssh connection caching socket, and + - parameters to enable ssh connection caching. -} +sshInfo :: (String, Maybe Integer) -> Annex (Maybe FilePath, [CommandParam]) +sshInfo (host, port) = go =<< sshCacheDir + where + go Nothing = return (Nothing, []) + go (Just dir) = do + r <- liftIO $ bestSocketPath $ dir </> hostport2socket host port + return $ case r of + Nothing -> (Nothing, []) + Just socketfile -> (Just socketfile, sshConnectionCachingParams socketfile) + +{- Given an absolute path to use for a socket file, + - returns whichever is shorter of that or the relative path to the same + - file. + - + - If no path can be constructed that is a valid socket, returns Nothing. -} +bestSocketPath :: FilePath -> IO (Maybe FilePath) +bestSocketPath abssocketfile = do + relsocketfile <- liftIO $ relPathCwdToFile abssocketfile + let socketfile = if length abssocketfile <= length relsocketfile + then abssocketfile + else relsocketfile + return $ if valid_unix_socket_path (socketfile ++ sshgarbage) + then Just socketfile + else Nothing + where + -- ssh appends a 16 char extension to the socket when setting it + -- up, which needs to be taken into account when checking + -- that a valid socket was constructed. + sshgarbage = take (1+16) $ repeat 'X' + +sshConnectionCachingParams :: FilePath -> [CommandParam] +sshConnectionCachingParams socketfile = + [ Param "-S", Param socketfile + , Params "-o ControlMaster=auto -o ControlPersist=yes" + ] + +{- ssh connection caching creates sockets, so will not work on a + - crippled filesystem. A GIT_ANNEX_TMP_DIR can be provided to use + - a different filesystem. -} +sshCacheDir :: Annex (Maybe FilePath) +sshCacheDir + | SysConfig.sshconnectioncaching = ifM crippledFileSystem + ( maybe (return Nothing) usetmpdir =<< gettmpdir + , ifM (fromMaybe True . annexSshCaching <$> Annex.getGitConfig) + ( Just <$> fromRepo gitAnnexSshDir + , return Nothing + ) + ) + | otherwise = return Nothing + where + gettmpdir = liftIO $ getEnv "GIT_ANNEX_TMP_DIR" + usetmpdir tmpdir = liftIO $ catchMaybeIO $ do + createDirectoryIfMissing True tmpdir + return tmpdir + +portParams :: Maybe Integer -> [CommandParam] +portParams Nothing = [] +portParams (Just port) = [Param "-p", Param $ show port] + +{- Stop any unused ssh processes. -} +sshCleanup :: Annex () +sshCleanup = go =<< sshCacheDir + where + go Nothing = noop + go (Just dir) = do + sockets <- liftIO $ filter (not . isLock) + <$> catchDefaultIO [] (dirContents dir) + forM_ sockets cleanup + cleanup socketfile = do +#ifndef mingw32_HOST_OS + -- Drop any shared lock we have, and take an + -- exclusive lock, without blocking. If the lock + -- succeeds, nothing is using this ssh, and it can + -- be stopped. + let lockfile = socket2lock socketfile + unlockFile lockfile + mode <- annexFileMode + fd <- liftIO $ noUmask mode $ + openFd lockfile ReadWrite (Just mode) defaultFileFlags + v <- liftIO $ tryIO $ + setLock fd (WriteLock, AbsoluteSeek, 0, 0) + case v of + Left _ -> noop + Right _ -> stopssh socketfile + liftIO $ closeFd fd +#else + stopssh socketfile +#endif + stopssh socketfile = do + let (dir, base) = splitFileName socketfile + let params = sshConnectionCachingParams base + -- "ssh -O stop" is noisy on stderr even with -q + void $ liftIO $ catchMaybeIO $ + withQuietOutput createProcessSuccess $ + (proc "ssh" $ toCommand $ + [ Params "-O stop" + ] ++ params ++ [Param "any"]) + { cwd = Just dir } + -- Cannot remove the lock file; other processes may + -- be waiting on our exclusive lock to use it. + +{- This needs to be as short as possible, due to limitations on the length + - of the path to a socket file. At the same time, it needs to be unique + - for each host. + -} +hostport2socket :: String -> Maybe Integer -> FilePath +hostport2socket host Nothing = hostport2socket' host +hostport2socket host (Just port) = hostport2socket' $ host ++ "!" ++ show port +hostport2socket' :: String -> FilePath +hostport2socket' s + | length s > lengthofmd5s = md5s (Str s) + | otherwise = s + where + lengthofmd5s = 32 + +socket2lock :: FilePath -> FilePath +socket2lock socket = socket ++ lockExt + +isLock :: FilePath -> Bool +isLock f = lockExt `isSuffixOf` f + +lockExt :: String +lockExt = ".lock" + +{- This is the size of the sun_path component of sockaddr_un, which + - is the limit to the total length of the filename of a unix socket. + - + - On Linux, this is 108. On OSX, 104. TODO: Probe + -} +sizeof_sockaddr_un_sun_path :: Int +sizeof_sockaddr_un_sun_path = 100 + +{- Note that this looks at the true length of the path in bytes, as it will + - appear on disk. -} +valid_unix_socket_path :: FilePath -> Bool +valid_unix_socket_path f = length (decodeW8 f) < sizeof_sockaddr_un_sun_path + +{- Parses the SSH port, and returns the other OpenSSH options. If + - several ports are found, the last one takes precedence. -} +sshReadPort :: [String] -> (Maybe Integer, [String]) +sshReadPort params = (port, reverse args) + where + (port,args) = aux (Nothing, []) params + aux (p,ps) [] = (p,ps) + aux (_,ps) ("-p":p:rest) = aux (readPort p, ps) rest + aux (p,ps) (q:rest) | "-p" `isPrefixOf` q = aux (readPort $ drop 2 q, ps) rest + | otherwise = aux (p,q:ps) rest + readPort p = fmap fst $ listToMaybe $ reads p diff --git a/Annex/TaggedPush.hs b/Annex/TaggedPush.hs new file mode 100644 index 000000000..039dc0e17 --- /dev/null +++ b/Annex/TaggedPush.hs @@ -0,0 +1,61 @@ +{- git-annex tagged pushes + - + - Copyright 2012 Joey Hess <joey@kitenet.net> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Annex.TaggedPush where + +import Common.Annex +import qualified Remote +import qualified Annex.Branch +import qualified Git +import qualified Git.Ref +import qualified Git.Command +import qualified Git.Branch +import Utility.Base64 + +{- Converts a git branch into a branch that is tagged with a UUID, typically + - the UUID of the repo that will be pushing it, and possibly with other + - information. + - + - Pushing to branches on the remote that have our uuid in them is ugly, + - but it reserves those branches for pushing by us, and so our pushes will + - never conflict with other pushes. + - + - To avoid cluttering up the branch display, the branch is put under + - refs/synced/, rather than the usual refs/remotes/ + - + - Both UUIDs and Base64 encoded data are always legal to be used in git + - refs, per git-check-ref-format. + -} +toTaggedBranch :: UUID -> Maybe String -> Git.Branch -> Git.Branch +toTaggedBranch u info b = Git.Ref $ intercalate "/" $ catMaybes + [ Just "refs/synced" + , Just $ fromUUID u + , toB64 <$> info + , Just $ show $ Git.Ref.base b + ] + +fromTaggedBranch :: Git.Branch -> Maybe (UUID, Maybe String) +fromTaggedBranch b = case split "/" $ show b of + ("refs":"synced":u:info:_base) -> + Just (toUUID u, fromB64Maybe info) + ("refs":"synced":u:_base) -> + Just (toUUID u, Nothing) + _ -> Nothing + where + +taggedPush :: UUID -> Maybe String -> Git.Ref -> Remote -> Git.Repo -> IO Bool +taggedPush u info branch remote = Git.Command.runBool + [ Param "push" + , Param $ Remote.name remote + {- Using forcePush here is safe because we "own" the tagged branch + - we're pushing; it has no other writers. Ensures it is pushed + - even if it has been rewritten by a transition. -} + , Param $ Git.Branch.forcePush $ refspec Annex.Branch.name + , Param $ refspec branch + ] + where + refspec b = show b ++ ":" ++ show (toTaggedBranch u info b) diff --git a/Annex/UUID.hs b/Annex/UUID.hs new file mode 100644 index 000000000..4e274503b --- /dev/null +++ b/Annex/UUID.hs @@ -0,0 +1,96 @@ +{- git-annex uuids + - + - Each git repository used by git-annex has an annex.uuid setting that + - uniquely identifies that repository. + - + - UUIDs of remotes are cached in git config, using keys named + - remote.<name>.annex-uuid + - + - Copyright 2010-2013 Joey Hess <joey@kitenet.net> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Annex.UUID ( + getUUID, + getRepoUUID, + getUncachedUUID, + prepUUID, + genUUID, + genUUIDInNameSpace, + gCryptNameSpace, + removeRepoUUID, + storeUUID, + setUUID, +) where + +import Common.Annex +import qualified Git +import qualified Git.Config +import Config + +import qualified Data.UUID as U +import qualified Data.UUID.V5 as U5 +import System.Random +import Data.Bits.Utils + +configkey :: ConfigKey +configkey = annexConfig "uuid" + +{- Generates a random UUID, that does not include the MAC address. -} +genUUID :: IO UUID +genUUID = UUID . show <$> (randomIO :: IO U.UUID) + +{- Generates a UUID from a given string, using a namespace. + - Given the same namespace, the same string will always result + - in the same UUID. -} +genUUIDInNameSpace :: U.UUID -> String -> UUID +genUUIDInNameSpace namespace = UUID . show . U5.generateNamed namespace . s2w8 + +{- Namespace used for UUIDs derived from git-remote-gcrypt ids. -} +gCryptNameSpace :: U.UUID +gCryptNameSpace = U5.generateNamed U5.namespaceURL $ + s2w8 "http://git-annex.branchable.com/design/gcrypt/" + +{- Get current repository's UUID. -} +getUUID :: Annex UUID +getUUID = getRepoUUID =<< gitRepo + +{- Looks up a repo's UUID, caching it in .git/config if it's not already. -} +getRepoUUID :: Git.Repo -> Annex UUID +getRepoUUID r = do + c <- toUUID <$> getConfig cachekey "" + let u = getUncachedUUID r + + if c /= u && u /= NoUUID + then do + updatecache u + return u + else return c + where + updatecache u = do + g <- gitRepo + when (g /= r) $ storeUUID cachekey u + cachekey = remoteConfig r "uuid" + +removeRepoUUID :: Annex () +removeRepoUUID = unsetConfig configkey + +getUncachedUUID :: Git.Repo -> UUID +getUncachedUUID = toUUID . Git.Config.get key "" + where + (ConfigKey key) = configkey + +{- Make sure that the repo has an annex.uuid setting. -} +prepUUID :: Annex () +prepUUID = whenM ((==) NoUUID <$> getUUID) $ + storeUUID configkey =<< liftIO genUUID + +storeUUID :: ConfigKey -> UUID -> Annex () +storeUUID configfield = setConfig configfield . fromUUID + +{- Only sets the configkey in the Repo; does not change .git/config -} +setUUID :: Git.Repo -> UUID -> IO Git.Repo +setUUID r u = do + let s = show configkey ++ "=" ++ fromUUID u + Git.Config.store s r diff --git a/Annex/Url.hs b/Annex/Url.hs new file mode 100644 index 000000000..0401ffe07 --- /dev/null +++ b/Annex/Url.hs @@ -0,0 +1,27 @@ +{- Url downloading, with git-annex user agent. + - + - Copyright 2013 Joey Hess <joey@kitenet.net> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Annex.Url ( + module U, + withUserAgent, + getUserAgent, +) where + +import Common.Annex +import qualified Annex +import Utility.Url as U +import qualified Build.SysConfig as SysConfig + +defaultUserAgent :: U.UserAgent +defaultUserAgent = "git-annex/" ++ SysConfig.packageversion + +getUserAgent :: Annex (Maybe U.UserAgent) +getUserAgent = Annex.getState $ + Just . fromMaybe defaultUserAgent . Annex.useragent + +withUserAgent :: (Maybe U.UserAgent -> IO a) -> Annex a +withUserAgent a = liftIO . a =<< getUserAgent diff --git a/Annex/Version.hs b/Annex/Version.hs new file mode 100644 index 000000000..2b4a49fd2 --- /dev/null +++ b/Annex/Version.hs @@ -0,0 +1,47 @@ +{- git-annex repository versioning + - + - Copyright 2010,2013 Joey Hess <joey@kitenet.net> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +{-# LANGUAGE CPP #-} + +module Annex.Version where + +import Common.Annex +import Config +import qualified Annex + +type Version = String + +defaultVersion :: Version +defaultVersion = "3" + +directModeVersion :: Version +directModeVersion = "5" + +supportedVersions :: [Version] +supportedVersions = [defaultVersion, directModeVersion] + +upgradableVersions :: [Version] +#ifndef mingw32_HOST_OS +upgradableVersions = ["0", "1", "2", "4"] +#else +upgradableVersions = ["2", "4"] +#endif + +autoUpgradeableVersions :: [Version] +autoUpgradeableVersions = ["4"] + +versionField :: ConfigKey +versionField = annexConfig "version" + +getVersion :: Annex (Maybe Version) +getVersion = annexVersion <$> Annex.getGitConfig + +setVersion :: Version -> Annex () +setVersion = setConfig versionField + +removeVersion :: Annex () +removeVersion = unsetConfig versionField diff --git a/Annex/Wanted.hs b/Annex/Wanted.hs new file mode 100644 index 000000000..04dcc1c1c --- /dev/null +++ b/Annex/Wanted.hs @@ -0,0 +1,32 @@ +{- git-annex checking whether content is wanted + - + - Copyright 2012 Joey Hess <joey@kitenet.net> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Annex.Wanted where + +import Common.Annex +import Logs.PreferredContent +import Annex.UUID + +import qualified Data.Set as S + +{- Check if a file is preferred content for the local repository. -} +wantGet :: Bool -> AssociatedFile -> Annex Bool +wantGet def Nothing = return def +wantGet def (Just file) = isPreferredContent Nothing S.empty file def + +{- Check if a file is preferred content for a remote. -} +wantSend :: Bool -> AssociatedFile -> UUID -> Annex Bool +wantSend def Nothing _ = return def +wantSend def (Just file) to = isPreferredContent (Just to) S.empty file def + +{- Check if a file can be dropped, maybe from a remote. + - Don't drop files that are preferred content. -} +wantDrop :: Bool -> Maybe UUID -> AssociatedFile -> Annex Bool +wantDrop def _ Nothing = return $ not def +wantDrop def from (Just file) = do + u <- maybe getUUID (return . id) from + not <$> isPreferredContent (Just u) (S.singleton u) file def |