diff options
14 files changed, 307 insertions, 7 deletions
diff --git a/Command/Sync.hs b/Command/Sync.hs index 912ce944c..b2bf24d55 100644 --- a/Command/Sync.hs +++ b/Command/Sync.hs @@ -15,15 +15,22 @@ import Command import qualified Remote import qualified Annex import qualified Annex.Branch +import qualified Annex.Queue +import Annex.Content +import Annex.CatFile import qualified Git.Command +import qualified Git.LsFiles as LsFiles import qualified Git.Merge import qualified Git.Branch import qualified Git.Ref import qualified Git +import Git.Types (BlobType(..)) import qualified Types.Remote import qualified Remote.Git import qualified Data.Map as M +import qualified Data.ByteString.Lazy as L +import Data.Hash.MD5 def :: [Command] def = [command "sync" (paramOptional (paramRepeating paramRemote)) @@ -168,10 +175,104 @@ mergeAnnex = do Annex.Branch.forceUpdate stop -mergeFrom :: Git.Ref -> CommandCleanup +mergeFrom :: Git.Ref -> Annex Bool mergeFrom branch = do showOutput - inRepo $ Git.Merge.mergeNonInteractive branch + ok <- inRepo $ Git.Merge.mergeNonInteractive branch + if ok + then return ok + else resolveMerge + +{- Resolves a conflicted merge. It's important that any conflicts be + - resolved in a way that itself avoids later merge conflicts, since + - multiple repositories may be doing this concurrently. + - + - Only annexed files are resolved; other files are left for the user to + - handle. + - + - This uses the Keys pointed to by the files to construct new + - filenames. So when both sides modified file foo, + - it will be deleted, and replaced with files foo.KEYA and foo.KEYB. + - + - On the other hand, when one side deleted foo, and the other modified it, + - it will be deleted, and the modified version stored as file + - foo.KEYA (or KEYB). + -} +resolveMerge :: Annex Bool +resolveMerge = do + top <- fromRepo Git.repoPath + merged <- all id <$> (mapM resolveMerge' =<< inRepo (LsFiles.unmerged [top])) + when merged $ do + Annex.Queue.flush + void $ inRepo $ Git.Command.runBool "commit" + [Param "-m", Param "git-annex automatic merge conflict fix"] + return merged + +resolveMerge' :: LsFiles.Unmerged -> Annex Bool +resolveMerge' u + | issymlink LsFiles.valUs && issymlink LsFiles.valThem = + withKey LsFiles.valUs $ \keyUs -> + withKey LsFiles.valThem $ \keyThem -> go keyUs keyThem + | otherwise = return False + where + go keyUs keyThem + | keyUs == keyThem = do + makelink keyUs + return True + | otherwise = do + liftIO $ nukeFile file + Annex.Queue.addCommand "rm" [Params "--quiet -f --"] [file] + makelink keyUs + makelink keyThem + return True + file = LsFiles.unmergedFile u + issymlink select = any (select (LsFiles.unmergedBlobType u) ==) + [Just SymlinkBlob, Nothing] + makelink (Just key) = do + let dest = mergeFile file key + l <- calcGitLink dest key + liftIO $ do + nukeFile dest + createSymbolicLink l dest + Annex.Queue.addCommand "add" [Param "--force", Param "--"] [dest] + makelink _ = noop + withKey select a = do + let msha = select $ LsFiles.unmergedSha u + case msha of + Nothing -> a Nothing + Just sha -> do + key <- fileKey . takeFileName + . encodeW8 . L.unpack + <$> catObject sha + maybe (return False) (a . Just) key + +{- The filename to use when resolving a conflicted merge of a file, + - that points to a key. + - + - Something derived from the key needs to be included in the filename, + - but rather than exposing the whole key to the user, a very weak hash + - is used. There is a very real, although still unlikely, chance of + - conflicts using this hash. + - + - In the event that there is a conflict with the filename generated + - for some other key, that conflict will itself be handled by the + - conflicted merge resolution code. That case is detected, and the full + - key is used in the filename. + -} +mergeFile :: FilePath -> Key -> FilePath +mergeFile file key + | doubleconflict = go $ show key + | otherwise = go $ shortHash $ show key + where + varmarker = ".variant-" + doubleconflict = vermarker `isSuffixOf` (dropExtension file) + go v = takeDirectory file + </> dropExtension (takeFileName file) + ++ varmarker ++ v + ++ takeExtension file + +shortHash :: String -> String +shortHash = take 4 . md5s . encodeFilePath changed :: Remote -> Git.Ref -> Annex Bool changed remote b = do diff --git a/Git/Config.hs b/Git/Config.hs index dab1cdf5e..c9e4f9a2d 100644 --- a/Git/Config.hs +++ b/Git/Config.hs @@ -54,6 +54,10 @@ read' repo = go repo {- Reads git config from a handle and populates a repo with it. -} hRead :: Repo -> Handle -> IO Repo hRead repo h = do + -- We use the FileSystemEncoding when reading from git-config, + -- because it can contain arbitrary filepaths (and other strings) + -- in any encoding. + fileEncoding h val <- hGetContentsStrict h store val repo diff --git a/Git/LsFiles.hs b/Git/LsFiles.hs index 06d4b9f44..321913334 100644 --- a/Git/LsFiles.hs +++ b/Git/LsFiles.hs @@ -1,6 +1,6 @@ {- git ls-files interface - - - Copyright 2010 Joey Hess <joey@kitenet.net> + - Copyright 2010,2012 Joey Hess <joey@kitenet.net> - - Licensed under the GNU GPL version 3 or higher. -} @@ -13,11 +13,16 @@ module Git.LsFiles ( changedUnstaged, typeChanged, typeChangedStaged, + Conflicting(..), + Unmerged(..), + unmerged, ) where import Common import Git import Git.Command +import Git.Types +import Git.Sha {- Scans for files that are checked into git at the specified locations. -} inRepo :: [FilePath] -> Repo -> IO [FilePath] @@ -75,3 +80,76 @@ typeChanged' ps l repo = do where prefix = [Params "diff --name-only --diff-filter=T -z"] suffix = Param "--" : map File l + +{- A item in conflict has two possible values. + - Either can be Nothing, when that side deleted the file. -} +data Conflicting v = Conflicting + { valUs :: Maybe v + , valThem :: Maybe v + } deriving (Show) + +data Unmerged = Unmerged + { unmergedFile :: FilePath + , unmergedBlobType :: Conflicting BlobType + , unmergedSha :: Conflicting Sha + } deriving (Show) + +{- Returns a list of the files in the specified locations that have + - unresolved merge conflicts. + - + - ls-files outputs multiple lines per conflicting file, each with its own + - stage number: + - 1 = old version, can be ignored + - 2 = us + - 3 = them + - If a line is omitted, that side deleted the file. + -} +unmerged :: [FilePath] -> Repo -> IO [Unmerged] +unmerged l repo = reduceUnmerged [] . catMaybes . map parseUnmerged <$> list repo + where + files = map File l + list = pipeNullSplit $ Params "ls-files --unmerged -z --" : files + +data InternalUnmerged = InternalUnmerged + { isus :: Bool + , ifile :: FilePath + , iblobtype :: Maybe BlobType + , isha :: Maybe Sha + } deriving (Show) + +parseUnmerged :: String -> Maybe InternalUnmerged +parseUnmerged s + | null file || length ws < 3 = Nothing + | otherwise = do + stage <- readish (ws !! 2) :: Maybe Int + unless (stage == 2 || stage == 3) $ + fail undefined -- skip stage 1 + blobtype <- readBlobType (ws !! 0) + sha <- extractSha (ws !! 1) + return $ InternalUnmerged (stage == 2) file (Just blobtype) (Just sha) + where + (metadata, file) = separate (== '\t') s + ws = words metadata + +reduceUnmerged :: [Unmerged] -> [InternalUnmerged] -> [Unmerged] +reduceUnmerged c [] = c +reduceUnmerged c (i:is) = reduceUnmerged (new:c) rest + where + (rest, sibi) = findsib i is + (blobtypeA, blobtypeB, shaA, shaB) + | isus i = (iblobtype i, iblobtype sibi, isha i, isha sibi) + | otherwise = (iblobtype sibi, iblobtype i, isha sibi, isha i) + new = Unmerged + { unmergedFile = ifile i + , unmergedBlobType = Conflicting blobtypeA blobtypeB + , unmergedSha = Conflicting shaA shaB + } + findsib templatei [] = ([], deleted templatei) + findsib templatei (l:ls) + | ifile l == ifile templatei = (ls, l) + | otherwise = (l:ls, deleted templatei) + deleted templatei = templatei + { isus = not (isus templatei) + , iblobtype = Nothing + , isha = Nothing + } diff --git a/Git/Types.hs b/Git/Types.hs index 1df6e343b..0c37427c7 100644 --- a/Git/Types.hs +++ b/Git/Types.hs @@ -51,6 +51,7 @@ type Tag = Ref {- Types of objects that can be stored in git. -} data ObjectType = BlobObject | CommitObject | TreeObject + deriving (Eq) instance Show ObjectType where show BlobObject = "blob" @@ -65,9 +66,16 @@ readObjectType _ = Nothing {- Types of blobs. -} data BlobType = FileBlob | ExecutableBlob | SymlinkBlob + deriving (Eq) {- Git uses magic numbers to denote the type of a blob. -} instance Show BlobType where show FileBlob = "100644" show ExecutableBlob = "100755" show SymlinkBlob = "120000" + +readBlobType :: String -> Maybe BlobType +readBlobType "100644" = Just FileBlob +readBlobType "100755" = Just ExecutableBlob +readBlobType "120000" = Just SymlinkBlob +readBlobType _ = Nothing diff --git a/Git/UnionMerge.hs b/Git/UnionMerge.hs index 0987f9131..504147e1d 100644 --- a/Git/UnionMerge.hs +++ b/Git/UnionMerge.hs @@ -10,8 +10,7 @@ module Git.UnionMerge ( mergeIndex ) where -import qualified Data.Text.Lazy as L -import qualified Data.Text.Lazy.Encoding as L +import qualified Data.ByteString.Lazy as L import qualified Data.Set as S import Common @@ -79,10 +78,14 @@ mergeFile info file h repo = case filter (/= nullSha) [Ref asha, Ref bsha] of =<< calcMerge . zip shas <$> mapM getcontents shas where [_colonmode, _bmode, asha, bsha, _status] = words info - getcontents s = map L.unpack . L.lines . - L.decodeUtf8 <$> catObject h s use sha = return $ Just $ updateIndexLine sha FileBlob $ asTopFilePath file + -- We don't know how the file is encoded, but need to + -- split it into lines to union merge. Using the + -- FileSystemEncoding for this is a hack, but ensures there + -- are no decoding errors. Note that this works because + -- streamUpdateIndex sets fileEncoding on its write handle. + getcontents s = lines . encodeW8 . L.unpack <$> catObject h s {- Calculates a union merge between a list of refs, with contents. - diff --git a/debian/changelog b/debian/changelog index 59eb63b64..46afb6e4d 100644 --- a/debian/changelog +++ b/debian/changelog @@ -5,6 +5,11 @@ git-annex (3.20120625) UNRELEASED; urgency=low which disables the watch command. * Avoid ugly failure mode when moving content from a local repository that is not available. + * Got rid of the last place that did utf8 decoding. + * Accept arbitrarily encoded repository filepaths etc when reading + git config output. This fixes support for remotes with unusual characters + in their names. + * sync: Automatically resolves merge conflicts. -- Joey Hess <joeyh@debian.org> Mon, 25 Jun 2012 11:38:12 -0400 diff --git a/doc/bugs/git-annex:_Cannot_decode_byte___39____92__xfc__39__.mdwn b/doc/bugs/git-annex:_Cannot_decode_byte___39____92__xfc__39__.mdwn new file mode 100644 index 000000000..862259422 --- /dev/null +++ b/doc/bugs/git-annex:_Cannot_decode_byte___39____92__xfc__39__.mdwn @@ -0,0 +1,34 @@ +What steps will reproduce the problem? + + alip@hayalet /tmp/aaa (git)-[master] % git annex init aaa + init aaa ok + (Recording state in git...) + alip@hayalet /tmp/aaa (git)-[master] % git remote add çüş /tmp/çüş + alip@hayalet /tmp/aaa (git)-[master] % git annex sync --debug + git ["--git-dir=/tmp/aaa/.git","--work-tree=/tmp/aaa","symbolic-ref","HEAD"] + git ["--git-dir=/tmp/aaa/.git","--work-tree=/tmp/aaa","show-ref","git-annex"] + git ["--git-dir=/tmp/aaa/.git","--work-tree=/tmp/aaa","show-ref","--hash","refs/heads/git-annex"] + git ["--git-dir=/tmp/aaa/.git","--work-tree=/tmp/aaa","log","refs/heads/git-annex..bc45cd9c2cb7c9b0c7a12a4c0210fe6a262abac9","--oneline","-n1"] + git ["--git-dir=/tmp/aaa/.git","--work-tree=/tmp/aaa","log","refs/heads/git-annex..9220bfedd1e13b2d791c918e2d59901af353825f","--oneline","-n1"] + (merging origin/git-annex into git-annex...) + git ["--git-dir=/tmp/aaa/.git","--work-tree=/tmp/aaa","cat-file","--batch"] + git ["--git-dir=/tmp/aaa/.git","--work-tree=/tmp/aaa","update-index","-z","--index-info"] + git ["--git-dir=/tmp/aaa/.git","--work-tree=/tmp/aaa","diff-index","--raw","-z","-r","--no-renames","-l0","--cached","9220bfedd1e13b2d791c918e2d59901af353825f"] + git-annex: Cannot decode byte '\xfc': Data.Text.Encoding.decodeUtf8: Invalid UTF-8 stream + 1 alip@hayalet /tmp/aaa (git)-[master] % + +What is the expected output? What do you see instead? + +Syncing a repository under a path with utf-8 characters in its name fails. + +What version of git-annex are you using? On what operating system? + +git-annex version: 3.20120624 + +On Exherbo, linux-3.4 + +Please provide any additional information below. + +'\xfc' is valid UTF-8: 'LATIN SMALL LETTER U WITH DIAERESIS' + +> closing as non-reproducible and presumably fixed. [[done]] --[[Joey]] diff --git a/doc/bugs/git-annex:_Cannot_decode_byte___39____92__xfc__39__/comment_1_f1a7352b04f395e06e0094c1f51b6fff._comment b/doc/bugs/git-annex:_Cannot_decode_byte___39____92__xfc__39__/comment_1_f1a7352b04f395e06e0094c1f51b6fff._comment new file mode 100644 index 000000000..28faa7b45 --- /dev/null +++ b/doc/bugs/git-annex:_Cannot_decode_byte___39____92__xfc__39__/comment_1_f1a7352b04f395e06e0094c1f51b6fff._comment @@ -0,0 +1,12 @@ +[[!comment format=mdwn + username="http://joeyh.name/" + ip="4.153.2.25" + subject="comment 1" + date="2012-06-27T02:48:31Z" + content=""" +I don't think this has to do with the path name of the repository containing utf-8 at all. + +Your recipe for reproducing this depends on some pre-existing repository that I don't know how to set up to reproduce this bug. All I can guess is that, based on the \"decodeUtf8\" in the error message, it's coming from the one part of the code that still uses that, the union merger. + + +"""]] diff --git a/doc/bugs/git-annex:_Cannot_decode_byte___39____92__xfc__39__/comment_2_c1890067079cd99667f31cbb4d2e4545._comment b/doc/bugs/git-annex:_Cannot_decode_byte___39____92__xfc__39__/comment_2_c1890067079cd99667f31cbb4d2e4545._comment new file mode 100644 index 000000000..3486be733 --- /dev/null +++ b/doc/bugs/git-annex:_Cannot_decode_byte___39____92__xfc__39__/comment_2_c1890067079cd99667f31cbb4d2e4545._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://joeyh.name/" + ip="4.153.2.25" + subject="comment 2" + date="2012-06-27T03:08:13Z" + content=""" +Since I can't reproduce it I am not sure, but it may be fixed by the commits I've just made. +"""]] diff --git a/doc/bugs/git-annex:_Cannot_decode_byte___39____92__xfc__39__/comment_3_213c96085c60c8e52cd803df07240158._comment b/doc/bugs/git-annex:_Cannot_decode_byte___39____92__xfc__39__/comment_3_213c96085c60c8e52cd803df07240158._comment new file mode 100644 index 000000000..48a382029 --- /dev/null +++ b/doc/bugs/git-annex:_Cannot_decode_byte___39____92__xfc__39__/comment_3_213c96085c60c8e52cd803df07240158._comment @@ -0,0 +1,13 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawkWzAq6TusMi9zI3FLkDOETRIAUTtmGZVg" + nickname="Ali" + subject="comment 3" + date="2012-06-27T12:56:37Z" + content=""" +Yes, the problem is fixed. + +The repository was a normal git repository with path /tmp/çüş (git init) +and with annex description \"çüş\" (git annex init çüş) + +afaict, i can't reproduce the problem anymore either :-) +"""]] diff --git a/doc/forum/Wishlist:_getting_the_disk_used_by_a_subtree_of_files.mdwn b/doc/forum/Wishlist:_getting_the_disk_used_by_a_subtree_of_files.mdwn new file mode 100644 index 000000000..7bdd93654 --- /dev/null +++ b/doc/forum/Wishlist:_getting_the_disk_used_by_a_subtree_of_files.mdwn @@ -0,0 +1,10 @@ +I'm not sure if this _feature_ exists already wrapped or provided as a recipe for users or not yet. But it would be nice to be able to do a + + git annex du [PATH] + +Such that the output that git annex would return is the total disk used locally in the PATH and the theoretical disk used by the PATH if it was fully populated locally. e.g. + + $ git annex du FSL0001_ANALYSIS + $ Local: 1000kb, Annex: 2000kb + +or something along the lines of that? diff --git a/doc/forum/Wishlist:_getting_the_disk_used_by_a_subtree_of_files/comment_1_7abb1155081a23ce4829ee69b2064541._comment b/doc/forum/Wishlist:_getting_the_disk_used_by_a_subtree_of_files/comment_1_7abb1155081a23ce4829ee69b2064541._comment new file mode 100644 index 000000000..bff5b2ea7 --- /dev/null +++ b/doc/forum/Wishlist:_getting_the_disk_used_by_a_subtree_of_files/comment_1_7abb1155081a23ce4829ee69b2064541._comment @@ -0,0 +1,9 @@ +[[!comment format=mdwn + username="http://joeyh.name/" + ip="4.153.2.25" + subject="comment 1" + date="2012-06-27T12:36:08Z" + content=""" +Use `du -L` for the disk space used locally. The other number is not currently available, but it would be nice to have. I also sometimes would like to have data on which backends are used how much, so making this `git annex status --subdir` is tempting. Unfortunatly, it's current implementation scans `.git/annex/objects` +and not the disk tree (better for accurate numbers due to copies), so it would not be a very easy thing to add. Not massively hard, but not something I can pound out before I start work today.. +"""]] diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index 965a07f0d..85a5a18f0 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -135,6 +135,11 @@ subdirectories). commands to do each of those steps by hand, or if you don't want to worry about the details, you can use sync. + Merge conflicts are automatically resolved by sync. When two conflicting + versions of a file have been committed, both will be added to the tree, + under different filenames. For example, file "foo" would be replaced + with "foo.somekey" and "foo.otherkey". + Note that syncing with a remote will not update the remote's working tree with changes made to the local repository. However, those changes are pushed to the remote, so can be merged into its working tree diff --git a/doc/install/OSX/comment_11_707a1a27a15b2de8dfc8d1a30420ab4c._comment b/doc/install/OSX/comment_11_707a1a27a15b2de8dfc8d1a30420ab4c._comment new file mode 100644 index 000000000..69a4f9128 --- /dev/null +++ b/doc/install/OSX/comment_11_707a1a27a15b2de8dfc8d1a30420ab4c._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawkwR9uOA38yi5kEUvcEWNtRiZwpxXskayE" + nickname="Agustin" + subject="comment 11" + date="2012-06-27T08:54:52Z" + content=""" +Hi @joey! Perfect!... I'll do that then! + +Thanks for your time man! +"""]] |