From 2e6d39d426f6b08f236d6071e671a9dcfc799d91 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 27 Nov 2013 18:41:44 -0400 Subject: git-annex (5.20131127) unstable; urgency=low * webapp: Detect when upgrades are available, and upgrade if the user desires. (Only when git-annex is installed using the prebuilt binaries from git-annex upstream, not from eg Debian.) * assistant: Detect when the git-annex binary is modified or replaced, and either prompt the user to restart the program, or automatically restart it. * annex.autoupgrade configures both the above upgrade behaviors. * Added support for quvi 0.9. Slightly suboptimal due to limitations in its interface compared with the old version. * Bug fix: annex.version did not get set on automatic upgrade to v5 direct mode repo, so the upgrade was performed repeatedly, slowing commands down. * webapp: Fix bug that broke switching between local repositories that use the new guarded direct mode. * Android: Fix stripping of the git-annex binary. * Android: Make terminal app show git-annex version number. * Android: Re-enable XMPP support. * reinject: Allow to be used in direct mode. * Futher improvements to git repo repair. Has now been tested in tens of thousands of intentionally damaged repos, and successfully repaired them all. * Allow use of --unused in bare repository. # imported from the archive --- Command/Sync.hs | 450 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 450 insertions(+) create mode 100644 Command/Sync.hs (limited to 'Command/Sync.hs') diff --git a/Command/Sync.hs b/Command/Sync.hs new file mode 100644 index 000000000..c41f46f8a --- /dev/null +++ b/Command/Sync.hs @@ -0,0 +1,450 @@ +{- git-annex command + - + - Copyright 2011 Joachim Breitner + - Copyright 2011,2012 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Command.Sync where + +import Common.Annex +import Command +import qualified Remote +import qualified Annex +import qualified Annex.Branch +import qualified Annex.Queue +import Annex.Direct +import Annex.CatFile +import Annex.Link +import qualified Git.Command +import qualified Git.LsFiles as LsFiles +import qualified Git.Merge +import qualified Git.Branch +import qualified Git.Ref +import qualified Git +import Git.Types (BlobType(..)) +import qualified Types.Remote +import qualified Remote.Git +import Types.Key +import Config +import Annex.ReplaceFile +import Git.FileMode + +import qualified Data.Set as S +import Data.Hash.MD5 +import Control.Concurrent.MVar + +def :: [Command] +def = [command "sync" (paramOptional (paramRepeating paramRemote)) + [seek] SectionCommon "synchronize local repository with remotes"] + +-- syncing involves several operations, any of which can independently fail +seek :: CommandSeek +seek rs = do + prepMerge + + -- There may not be a branch checked out until after the commit, + -- or perhaps after it gets merged from the remote. + -- So only look it up once it's needed, and if once there is a + -- branch, cache it. + mvar <- liftIO newEmptyMVar + let getbranch = ifM (liftIO $ isEmptyMVar mvar) + ( do + branch <- inRepo Git.Branch.current + when (isJust branch) $ + liftIO $ putMVar mvar branch + return branch + , liftIO $ readMVar mvar + ) + let withbranch a = a =<< getbranch + + remotes <- syncRemotes rs + return $ concat + [ [ commit ] + , [ withbranch mergeLocal ] + , [ withbranch (pullRemote remote) | remote <- remotes ] + , [ mergeAnnex ] + , [ withbranch pushLocal ] + , [ withbranch (pushRemote remote) | remote <- remotes ] + ] + +{- Merging may delete the current directory, so go to the top + - of the repo. -} +prepMerge :: Annex () +prepMerge = liftIO . setCurrentDirectory =<< fromRepo Git.repoPath + +syncBranch :: Git.Ref -> Git.Ref +syncBranch = Git.Ref.under "refs/heads/synced" . fromDirectBranch + +remoteBranch :: Remote -> Git.Ref -> Git.Ref +remoteBranch remote = Git.Ref.underBase $ "refs/remotes/" ++ Remote.name remote + +syncRemotes :: [String] -> Annex [Remote] +syncRemotes rs = ifM (Annex.getState Annex.fast) ( nub <$> pickfast , wanted ) + where + pickfast = (++) <$> listed <*> (good =<< fastest <$> available) + wanted + | null rs = good =<< concat . Remote.byCost <$> available + | otherwise = listed + listed = do + l <- catMaybes <$> mapM (Remote.byName . Just) rs + let s = filter (not . Remote.syncableRemote) l + unless (null s) $ + error $ "cannot sync special remotes: " ++ + unwords (map Types.Remote.name s) + return l + available = filter Remote.syncableRemote + . filter (remoteAnnexSync . Types.Remote.gitconfig) + <$> Remote.remoteList + good = filterM $ Remote.Git.repoAvail . Types.Remote.repo + fastest = fromMaybe [] . headMaybe . Remote.byCost + +commit :: CommandStart +commit = next $ next $ ifM isDirect + ( do + void stageDirect + runcommit [] + , runcommit [Param "-a"] + ) + where + runcommit ps = do + showStart "commit" "" + showOutput + Annex.Branch.commit "update" + -- Commit will fail when the tree is clean, so ignore failure. + let params = Param "commit" : ps ++ + [Param "-m", Param "git-annex automatic sync"] + _ <- inRepo $ tryIO . Git.Command.runQuiet params + return True + +mergeLocal :: Maybe Git.Ref -> CommandStart +mergeLocal Nothing = stop +mergeLocal (Just branch) = go =<< needmerge + where + syncbranch = syncBranch branch + needmerge = ifM isBareRepo + ( return False + , do + unlessM (inRepo $ Git.Ref.exists syncbranch) $ + inRepo $ updateBranch syncbranch + inRepo $ Git.Branch.changed branch syncbranch + ) + go False = stop + go True = do + showStart "merge" $ Git.Ref.describe syncbranch + next $ next $ mergeFrom syncbranch + +pushLocal :: Maybe Git.Ref -> CommandStart +pushLocal Nothing = stop +pushLocal (Just branch) = do + -- Update the sync branch to match the new state of the branch + inRepo $ updateBranch $ syncBranch branch + -- In direct mode, we're operating on some special direct mode + -- branch, rather than the intended branch, so update the indended + -- branch. + whenM isDirect $ + inRepo $ updateBranch $ fromDirectBranch branch + stop + +updateBranch :: Git.Ref -> Git.Repo -> IO () +updateBranch syncbranch g = + unlessM go $ error $ "failed to update " ++ show syncbranch + where + go = Git.Command.runBool + [ Param "branch" + , Param "-f" + , Param $ show $ Git.Ref.base syncbranch + ] g + +pullRemote :: Remote -> Maybe Git.Ref -> CommandStart +pullRemote remote branch = do + showStart "pull" (Remote.name remote) + next $ do + showOutput + stopUnless fetch $ + next $ mergeRemote remote branch + where + fetch = inRepo $ Git.Command.runBool + [Param "fetch", Param $ Remote.name remote] + +{- The remote probably has both a master and a synced/master branch. + - Which to merge from? Well, the master has whatever latest changes + - were committed (or pushed changes, if this is a bare remote), + - while the synced/master may have changes that some + - other remote synced to this remote. So, merge them both. -} +mergeRemote :: Remote -> Maybe Git.Ref -> CommandCleanup +mergeRemote remote b = case b of + Nothing -> do + branch <- inRepo Git.Branch.currentUnsafe + and <$> mapM merge (branchlist branch) + Just _ -> and <$> (mapM merge =<< tomerge (branchlist b)) + where + merge = mergeFrom . remoteBranch remote + tomerge branches = filterM (changed remote) branches + branchlist Nothing = [] + branchlist (Just branch) = [branch, syncBranch branch] + +pushRemote :: Remote -> Maybe Git.Ref -> CommandStart +pushRemote _remote Nothing = stop +pushRemote remote (Just branch) = go =<< needpush + where + needpush = anyM (newer remote) [syncBranch branch, Annex.Branch.name] + go False = stop + go True = do + showStart "push" (Remote.name remote) + next $ next $ do + showOutput + ok <- inRepo $ pushBranch remote branch + unless ok $ do + warning $ unwords [ "Pushing to " ++ Remote.name remote ++ " failed." ] + showLongNote "(non-fast-forward problems can be solved by setting receive.denyNonFastforwards to false in the remote's git config)" + return ok + +{- Pushes a regular branch like master to a remote. Also pushes the git-annex + - branch. + - + - If the remote is a bare git repository, it's best to push the regular + - branch directly to it, so that cloning/pulling will get it. + - On the other hand, if it's not bare, pushing to the checked out branch + - will fail, and this is why we push to its syncBranch. + - + - Git offers no way to tell if a remote is bare or not, so both methods + - are tried. + - + - The direct push is likely to spew an ugly error message, so stderr is + - elided. Since git progress display goes to stderr too, the sync push + - is done first, and actually sends the data. Then the direct push is + - tried, with stderr discarded, to update the branch ref on the remote. + - + - The sync push forces the update of the remote synced/git-annex branch. + - This is necessary if a transition has rewritten the git-annex branch. + - Normally any changes to the git-annex branch get pulled and merged before + - this push, so this forcing is unlikely to overwrite new data pushed + - in from another repository that is also syncing. + - + - But overwriting of data on synced/git-annex can happen, in a race. + - The only difference caused by using a forced push in that case is that + - the last repository to push wins the race, rather than the first to push. + - + - The sync push will fail to overwrite if receive.denyNonFastforwards is + - set on the remote. + -} +pushBranch :: Remote -> Git.Ref -> Git.Repo -> IO Bool +pushBranch remote branch g = tryIO (directpush g) `after` syncpush g + where + syncpush = Git.Command.runBool $ pushparams + [ Git.Branch.forcePush $ refspec Annex.Branch.name + , refspec branch + ] + directpush = Git.Command.runQuiet $ pushparams + [show $ Git.Ref.base $ fromDirectBranch branch] + pushparams branches = + [ Param "push" + , Param $ Remote.name remote + ] ++ map Param branches + refspec b = concat + [ show $ Git.Ref.base b + , ":" + , show $ Git.Ref.base $ syncBranch b + ] + +mergeAnnex :: CommandStart +mergeAnnex = do + void Annex.Branch.forceUpdate + stop + +{- Merges from a branch into the current branch. -} +mergeFrom :: Git.Ref -> Annex Bool +mergeFrom branch = do + showOutput + ifM isDirect + ( maybe go godirect =<< inRepo Git.Branch.current + , go + ) + where + go = runmerge $ inRepo $ Git.Merge.mergeNonInteractive branch + godirect currbranch = do + old <- inRepo $ Git.Ref.sha currbranch + d <- fromRepo gitAnnexMergeDir + r <- runmerge $ inRepo $ mergeDirect d branch + new <- inRepo $ Git.Ref.sha currbranch + case (old, new) of + (Just oldsha, Just newsha) -> + mergeDirectCleanup d oldsha newsha + _ -> noop + return r + runmerge a = ifM a + ( return True + , resolveMerge + ) + +{- Resolves a conflicted merge. It's important that any conflicts be + - resolved in a way that itself avoids later merge conflicts, since + - multiple repositories may be doing this concurrently. + - + - Only annexed files are resolved; other files are left for the user to + - handle. + - + - This uses the Keys pointed to by the files to construct new + - filenames. So when both sides modified file foo, + - it will be deleted, and replaced with files foo.variant-A and + - foo.variant-B. + - + - On the other hand, when one side deleted foo, and the other modified it, + - it will be deleted, and the modified version stored as file + - foo.variant-A (or B). + - + - It's also possible that one side has foo as an annexed file, and + - the other as a directory or non-annexed file. The annexed file + - is renamed to resolve the merge, and the other object is preserved as-is. + -} +resolveMerge :: Annex Bool +resolveMerge = do + top <- fromRepo Git.repoPath + (fs, cleanup) <- inRepo (LsFiles.unmerged [top]) + mergedfs <- catMaybes <$> mapM resolveMerge' fs + let merged = not (null mergedfs) + void $ liftIO cleanup + + (deleted, cleanup2) <- inRepo (LsFiles.deleted [top]) + unless (null deleted) $ + Annex.Queue.addCommand "rm" [Params "--quiet -f --"] deleted + void $ liftIO cleanup2 + + when merged $ do + unlessM isDirect $ + cleanConflictCruft mergedfs top + Annex.Queue.flush + void $ inRepo $ Git.Command.runBool + [ Param "commit" + , Param "-m" + , Param "git-annex automatic merge conflict fix" + ] + showLongNote "Merge conflict was automatically resolved; you may want to examine the result." + return merged + +resolveMerge' :: LsFiles.Unmerged -> Annex (Maybe FilePath) +resolveMerge' u + | issymlink LsFiles.valUs && issymlink LsFiles.valThem = do + kus <- getKey LsFiles.valUs + kthem <- getKey LsFiles.valThem + case (kus, kthem) of + -- Both sides of conflict are annexed files + (Just keyUs, Just keyThem) -> do + removeoldfile keyUs + if keyUs == keyThem + then makelink keyUs + else do + makelink keyUs + makelink keyThem + return $ Just file + -- Our side is annexed, other side is not. + (Just keyUs, Nothing) -> do + ifM isDirect + -- Move newly added non-annexed object + -- out of direct mode merge directory. + ( do + removeoldfile keyUs + makelink keyUs + d <- fromRepo gitAnnexMergeDir + liftIO $ rename (d file) file + -- cleaup tree after git merge + , do + unstageoldfile + makelink keyUs + ) + return $ Just file + -- Our side is not annexed, other side is. + (Nothing, Just keyThem) -> do + makelink keyThem + unstageoldfile + return $ Just file + -- Neither side is annexed; cannot resolve. + (Nothing, Nothing) -> return Nothing + | otherwise = return Nothing + where + file = LsFiles.unmergedFile u + issymlink select = select (LsFiles.unmergedBlobType u) `elem` [Just SymlinkBlob, Nothing] + makelink key = do + let dest = mergeFile file key + l <- inRepo $ gitAnnexLink dest key + replaceFile dest $ makeAnnexLink l + stageSymlink dest =<< hashSymlink l + whenM isDirect $ + toDirect key dest + removeoldfile keyUs = do + ifM isDirect + ( removeDirect keyUs file + , liftIO $ nukeFile file + ) + Annex.Queue.addCommand "rm" [Params "--quiet -f --"] [file] + unstageoldfile = Annex.Queue.addCommand "rm" [Params "--quiet -f --cached --"] [file] + getKey select = case select (LsFiles.unmergedSha u) of + Nothing -> return Nothing + Just sha -> catKey sha symLinkMode + +{- git-merge moves conflicting files away to files + - named something like f~HEAD or f~branch, but the + - exact name chosen can vary. Once the conflict is resolved, + - this cruft can be deleted. To avoid deleting legitimate + - files that look like this, only delete files that are + - A) not staged in git and B) look like git-annex symlinks. + -} +cleanConflictCruft :: [FilePath] -> FilePath -> Annex () +cleanConflictCruft resolvedfs top = do + (fs, cleanup) <- inRepo $ LsFiles.notInRepo False [top] + mapM_ clean fs + void $ liftIO cleanup + where + clean f + | matchesresolved f = whenM (isJust <$> isAnnexLink f) $ + liftIO $ nukeFile f + | otherwise = noop + s = S.fromList resolvedfs + matchesresolved f = S.member (base f) s + base f = reverse $ drop 1 $ dropWhile (/= '~') $ reverse f + +{- The filename to use when resolving a conflicted merge of a file, + - that points to a key. + - + - Something derived from the key needs to be included in the filename, + - but rather than exposing the whole key to the user, a very weak hash + - is used. There is a very real, although still unlikely, chance of + - conflicts using this hash. + - + - In the event that there is a conflict with the filename generated + - for some other key, that conflict will itself be handled by the + - conflicted merge resolution code. That case is detected, and the full + - key is used in the filename. + -} +mergeFile :: FilePath -> Key -> FilePath +mergeFile file key + | doubleconflict = go $ key2file key + | otherwise = go $ shortHash $ key2file key + where + varmarker = ".variant-" + doubleconflict = varmarker `isInfixOf` file + go v = takeDirectory file + dropExtension (takeFileName file) + ++ varmarker ++ v + ++ takeExtension file + +shortHash :: String -> String +shortHash = take 4 . md5s . md5FilePath + +changed :: Remote -> Git.Ref -> Annex Bool +changed remote b = do + let r = remoteBranch remote b + ifM (inRepo $ Git.Ref.exists r) + ( inRepo $ Git.Branch.changed b r + , return False + ) + +newer :: Remote -> Git.Ref -> Annex Bool +newer remote b = do + let r = remoteBranch remote b + ifM (inRepo $ Git.Ref.exists r) + ( inRepo $ Git.Branch.changed r b + , return True + ) -- cgit v1.2.3