diff options
-rw-r--r-- | Annex/CatFile.hs | 5 | ||||
-rw-r--r-- | Annex/Content.hs | 181 | ||||
-rw-r--r-- | Annex/Content/Direct.hs | 158 | ||||
-rw-r--r-- | Annex/Direct.hs | 105 | ||||
-rw-r--r-- | Backend.hs | 6 | ||||
-rw-r--r-- | Command/SendKey.hs | 5 | ||||
-rw-r--r-- | Command/Sync.hs | 55 | ||||
-rw-r--r-- | Config.hs | 5 | ||||
-rw-r--r-- | Git/DiffTree.hs | 64 | ||||
-rw-r--r-- | Git/LsFiles.hs | 18 | ||||
-rw-r--r-- | Git/LsTree.hs | 3 | ||||
-rw-r--r-- | Locations.hs | 17 | ||||
-rw-r--r-- | Logs/Location.hs | 8 | ||||
-rw-r--r-- | Remote.hs | 2 | ||||
-rw-r--r-- | Remote/Git.hs | 23 | ||||
-rw-r--r-- | doc/design/assistant/desymlink.mdwn | 13 | ||||
-rw-r--r-- | doc/direct_mode.mdwn | 63 | ||||
-rw-r--r-- | doc/git-annex.mdwn | 6 |
18 files changed, 653 insertions, 84 deletions
diff --git a/Annex/CatFile.hs b/Annex/CatFile.hs index 98d1a219f..161554f29 100644 --- a/Annex/CatFile.hs +++ b/Annex/CatFile.hs @@ -9,6 +9,7 @@ module Annex.CatFile ( catFile, catObject, catObjectDetails, + catKey, catFileHandle ) where @@ -42,3 +43,7 @@ catFileHandle = maybe startup return =<< Annex.getState Annex.catfilehandle h <- inRepo Git.CatFile.catFileStart Annex.changeState $ \s -> s { Annex.catfilehandle = Just h } return h + +{- From the Sha of a symlink back to the key. -} +catKey :: Sha -> Annex (Maybe Key) +catKey sha = fileKey . takeFileName . encodeW8 . L.unpack <$> catObject sha diff --git a/Annex/Content.hs b/Annex/Content.hs index 887729fee..5c902e8a9 100644 --- a/Annex/Content.hs +++ b/Annex/Content.hs @@ -1,6 +1,6 @@ {- git-annex file content managing - - - Copyright 2010 Joey Hess <joey@kitenet.net> + - Copyright 2010,2012 Joey Hess <joey@kitenet.net> - - Licensed under the GNU GPL version 3 or higher. -} @@ -10,12 +10,12 @@ module Annex.Content ( inAnnexSafe, lockContent, calcGitLink, - logStatus, getViaTmp, getViaTmpUnchecked, withTmp, checkDiskSpace, moveAnnex, + sendAnnex, removeAnnex, fromAnnex, moveBad, @@ -32,7 +32,6 @@ import System.IO.Unsafe (unsafeInterleaveIO) import Common.Annex import Logs.Location -import Annex.UUID import qualified Git import qualified Git.Config import qualified Annex @@ -48,21 +47,40 @@ import Config import Annex.Exception import Git.SharedRepository import Annex.Perms +import Annex.Content.Direct {- Checks if a given key's content is currently present. -} inAnnex :: Key -> Annex Bool -inAnnex = inAnnex' doesFileExist -inAnnex' :: (FilePath -> IO a) -> Key -> Annex a -inAnnex' a key = do - whenM (fromRepo Git.repoIsUrl) $ - error "inAnnex cannot check remote repo" - inRepo $ \g -> gitAnnexLocation key g >>= a +inAnnex = inAnnex' id False $ liftIO . doesFileExist + +{- Generic inAnnex, handling both indirect and direct mode. + - + - In direct mode, at least one of the associated files must pass the + - check. Additionally, the file must be unmodified. + -} +inAnnex' :: (a -> Bool) -> a -> (FilePath -> Annex a) -> Key -> Annex a +inAnnex' isgood bad check key = withObjectLoc key checkindirect checkdirect + where + checkindirect loc = do + whenM (fromRepo Git.repoIsUrl) $ + error "inAnnex cannot check remote repo" + check loc + checkdirect [] = return bad + checkdirect (loc:locs) = do + r <- check loc + if isgood r + then ifM (goodContent key loc) + ( return r + , checkdirect locs + ) + else checkdirect locs {- A safer check; the key's content must not only be present, but - is not in the process of being removed. -} inAnnexSafe :: Key -> Annex (Maybe Bool) -inAnnexSafe = inAnnex' $ \f -> openforlock f >>= check +inAnnexSafe = inAnnex' (maybe False id) (Just False) go where + go f = liftIO $ openforlock f >>= check openforlock f = catchMaybeIO $ openFd f ReadOnly Nothing defaultFileFlags check Nothing = return is_missing @@ -112,13 +130,6 @@ calcGitLink file key = do where whoops = error $ "unable to normalize " ++ file -{- Updates the Logs.Location when a key's presence changes in the current - - repository. -} -logStatus :: Key -> LogStatus -> Annex () -logStatus key status = do - u <- getUUID - logChange key u status - {- Runs an action, passing it a temporary filename to get, - and if the action succeeds, moves the temp file into - the annex as a key's content. -} @@ -151,10 +162,10 @@ prepTmp key = do - and not being copied into place. -} getViaTmpUnchecked :: Key -> (FilePath -> Annex Bool) -> Annex Bool getViaTmpUnchecked key action = do - tmp <- prepTmp key - ifM (action tmp) + tmpfile <- prepTmp key + ifM (action tmpfile) ( do - moveAnnex key tmp + moveAnnex key tmpfile logStatus key InfoPresent return True , do @@ -194,7 +205,9 @@ checkDiskSpace destination key alreadythere = do " more" ++ forcemsg forcemsg = " (use --force to override this check or adjust annex.diskreserve)" -{- Moves a file into .git/annex/objects/ +{- Moves a key's content into .git/annex/objects/ + - + - In direct mode, moves it to the associated file, or files. - - What if the key there already has content? This could happen for - various reasons; perhaps the same content is being annexed again. @@ -216,22 +229,85 @@ checkDiskSpace destination key alreadythere = do - meet. -} moveAnnex :: Key -> FilePath -> Annex () -moveAnnex key src = do - dest <- inRepo $ gitAnnexLocation key - ifM (liftIO $ doesFileExist dest) - ( liftIO $ removeFile src - , do - createContentDir dest - liftIO $ moveFile src dest - freezeContent dest - freezeContentDir dest - ) +moveAnnex key src = withObjectLoc key storeobject storedirect + where + storeobject dest = do + ifM (liftIO $ doesFileExist dest) + ( liftIO $ removeFile src + , do + createContentDir dest + liftIO $ moveFile src dest + freezeContent dest + freezeContentDir dest + ) + storedirect [] = storeobject =<< inRepo (gitAnnexLocation key) + storedirect (dest:fs) = do + updateCache key src + thawContent src + liftIO $ replaceFile dest $ moveFile src + liftIO $ forM_ fs $ \f -> replaceFile f $ createLink dest + +{- Replaces any existing file with a new version, by running an action. + - First, makes sure the file is deleted. Or, if it didn't already exist, + - makes sure the parent directory exists. -} +replaceFile :: FilePath -> (FilePath -> IO ()) -> IO () +replaceFile file a = do + r <- tryIO $ removeFile file + case r of + Left _ -> createDirectoryIfMissing True (parentDir file) + _ -> noop + a file + +{- Runs an action to transfer an object's content. + - + - In direct mode, it's possible for the file to change as it's being sent. + - If this happens, returns False. Currently, an arbitrary amount of bad + - data may be sent when this occurs. The send is not retried even if + - another file is known to have the same content; the action may not be + - idempotent. + - + - Since objects changing as they're transferred is a somewhat unusual + - situation, and since preventing writes to the file would be expensive, + - annoying or both, we instead detect the situation after the affect, + - and fail. Thus, it's up to the caller to detect a failure and take + - appropriate action. Such as, for example, ensuring that the bad + - data that was sent does not get installed into the annex it's being + - sent to. + -} +sendAnnex :: Key -> (FilePath -> Annex Bool) -> Annex Bool +sendAnnex key a = withObjectLoc key sendobject senddirect + where + sendobject = a + senddirect [] = return False + senddirect (f:fs) = do + cache <- recordedCache key + -- check that we have a good file + ifM (compareCache f cache) + ( do + r <- sendobject f + -- see if file changed while it was being sent + ok <- compareCache f cache + return (r && ok) + , senddirect fs + ) + +{- Performs an action, passing it the location to use for a key's content. + - + - In direct mode, the associated files will be passed. But, if there are + - no associated files for a key, the indirect mode action will be + - performed instead. -} +withObjectLoc :: Key -> (FilePath -> Annex a) -> ([FilePath] -> Annex a) -> Annex a +withObjectLoc key indirect direct = ifM isDirect + ( do + fs <- associatedFiles key + if null fs + then goindirect + else direct fs + , goindirect + ) + where + goindirect = indirect =<< inRepo (gitAnnexLocation key) -withObjectLoc :: Key -> ((FilePath, FilePath) -> Annex a) -> Annex a -withObjectLoc key a = do - file <- inRepo $ gitAnnexLocation key - let dir = parentDir file - a (dir, file) cleanObjectLoc :: Key -> Annex () cleanObjectLoc key = do @@ -244,18 +320,35 @@ cleanObjectLoc key = do maybe noop (const $ removeparents dir (n-1)) <=< catchMaybeIO $ removeDirectory dir -{- Removes a key's file from .git/annex/objects/ -} +{- Removes a key's file from .git/annex/objects/ + - + - In direct mode, deletes the associated files or files, and replaces + - them with symlinks. -} removeAnnex :: Key -> Annex () -removeAnnex key = withObjectLoc key $ \(dir, file) -> do - liftIO $ do - allowWrite dir - removeFile file - cleanObjectLoc key +removeAnnex key = withObjectLoc key remove removedirect + where + remove file = do + liftIO $ do + allowWrite $ parentDir file + removeFile file + cleanObjectLoc key + removedirect fs = do + removeCache key + mapM_ resetfile fs + resetfile f = do + l <- calcGitLink f key + top <- fromRepo Git.repoPath + cwd <- liftIO getCurrentDirectory + let top' = fromMaybe top $ absNormPath cwd top + let l' = relPathDirToFile top' (fromMaybe l $ absNormPath top' l) + liftIO $ replaceFile f $ const $ + createSymbolicLink l' f {- Moves a key's file out of .git/annex/objects/ -} fromAnnex :: Key -> FilePath -> Annex () -fromAnnex key dest = withObjectLoc key $ \(dir, file) -> do - liftIO $ allowWrite dir +fromAnnex key dest = do + file <- inRepo $ gitAnnexLocation key + liftIO $ allowWrite $ parentDir file thawContent file liftIO $ moveFile file dest cleanObjectLoc key diff --git a/Annex/Content/Direct.hs b/Annex/Content/Direct.hs new file mode 100644 index 000000000..f6a564bf0 --- /dev/null +++ b/Annex/Content/Direct.hs @@ -0,0 +1,158 @@ +{- git-annex file content managing for direct mode + - + - Copyright 2010,2012 Joey Hess <joey@kitenet.net> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Annex.Content.Direct ( + associatedFiles, + removeAssociatedFile, + addAssociatedFile, + updateAssociatedFiles, + goodContent, + updateCache, + recordedCache, + compareCache, + writeCache, + removeCache, + genCache, + toCache, + Cache +) where + +import Common.Annex +import qualified Git +import qualified Git.DiffTree as DiffTree +import Git.Sha +import Annex.CatFile +import Utility.TempFile +import Utility.FileMode +import Logs.Location + +import System.Posix.Types + +{- Files in the tree that are associated with a key. -} +associatedFiles :: Key -> Annex [FilePath] +associatedFiles key = do + files <- associatedFilesRelative key + top <- fromRepo Git.repoPath + return $ map (top </>) files + +{- List of files in the tree that are associated with a key, relative to + - the top of the repo. -} +associatedFilesRelative :: Key -> Annex [FilePath] +associatedFilesRelative key = do + mapping <- inRepo $ gitAnnexMapping key + liftIO $ catchDefaultIO [] $ lines <$> readFile mapping + +{- Changes the associated files information for a key, applying a + - transformation to the list. Returns a copy of the new info. -} +changeAssociatedFiles :: Key -> ([FilePath] -> [FilePath]) -> Annex [FilePath] +changeAssociatedFiles key transform = do + mapping <- inRepo $ gitAnnexMapping key + files <- associatedFilesRelative key + let files' = transform files + when (files /= files') $ + liftIO $ viaTmp writeFile mapping $ unlines files' + return files' + +removeAssociatedFile :: Key -> FilePath -> Annex [FilePath] +removeAssociatedFile key file = do + fs <- changeAssociatedFiles key $ filter (/= file) + when (null fs) $ + logStatus key InfoMissing + return fs + +addAssociatedFile :: Key -> FilePath -> Annex [FilePath] +addAssociatedFile key file = changeAssociatedFiles key $ \files -> + if file `elem` files + then files + else file:files + +{- Uses git diff-tree to find files changed between two tree Shas, and + - updates the associated file mappings, efficiently. -} +updateAssociatedFiles :: Git.Sha -> Git.Sha -> Annex () +updateAssociatedFiles oldsha newsha = do + (items, cleanup) <- inRepo $ DiffTree.diffTree oldsha newsha + forM_ items update + void $ liftIO $ cleanup + where + update item = do + go DiffTree.dstsha DiffTree.dstmode addAssociatedFile + go DiffTree.srcsha DiffTree.srcmode removeAssociatedFile + where + go getsha getmode a = + when (getsha item /= nullSha && isSymLink (getmode item)) $ do + key <- catKey (getsha item) + maybe noop (\k -> void $ a k $ DiffTree.file item) key + +{- Checks if a file in the tree, associated with a key, has not been modified. + - + - To avoid needing to fsck the file's content, which can involve an + - expensive checksum, this relies on a cache that contains the file's + - expected mtime and inode. + -} +goodContent :: Key -> FilePath -> Annex Bool +goodContent key file = do + old <- recordedCache key + compareCache file old + +{- Gets the recorded cache for a key. -} +recordedCache :: Key -> Annex (Maybe Cache) +recordedCache key = withCacheFile key $ \cachefile -> + catchDefaultIO Nothing $ readCache <$> readFile cachefile + +{- Compares a cache with the current cache for a file. -} +compareCache :: FilePath -> Maybe Cache -> Annex Bool +compareCache file old = do + curr <- liftIO $ genCache file + return $ isJust curr && curr == old + +{- Stores a cache of attributes for a file that is associated with a key. -} +updateCache :: Key -> FilePath -> Annex () +updateCache key file = maybe noop (writeCache key) =<< liftIO (genCache file) + +{- Writes a cache for a key. -} +writeCache :: Key -> Cache -> Annex () +writeCache key cache = withCacheFile key $ \cachefile -> do + createDirectoryIfMissing True (parentDir cachefile) + writeFile cachefile $ showCache cache + +{- Removes a cache. -} +removeCache :: Key -> Annex () +removeCache key = withCacheFile key nukeFile + +{- Cache a file's inode, size, and modification time to determine if it's + - been changed. -} +data Cache = Cache FileID FileOffset EpochTime + deriving (Eq, Show) + +showCache :: Cache -> String +showCache (Cache inode size mtime) = unwords + [ show inode + , show size + , show mtime + ] + +readCache :: String -> Maybe Cache +readCache s = case words s of + (inode:size:mtime:_) -> Cache + <$> readish inode + <*> readish size + <*> readish mtime + _ -> Nothing + +genCache :: FilePath -> IO (Maybe Cache) +genCache f = catchDefaultIO Nothing $ toCache <$> getFileStatus f + +toCache :: FileStatus -> Maybe Cache +toCache s + | isRegularFile s = Just $ Cache + (fileID s) + (fileSize s) + (modificationTime s) + | otherwise = Nothing + +withCacheFile :: Key -> (FilePath -> IO a) -> Annex a +withCacheFile key a = liftIO . a =<< inRepo (gitAnnexCache key) diff --git a/Annex/Direct.hs b/Annex/Direct.hs new file mode 100644 index 000000000..12984687e --- /dev/null +++ b/Annex/Direct.hs @@ -0,0 +1,105 @@ +{- git-annex direct mode + - + - Copyright 2012 Joey Hess <joey@kitenet.net> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Annex.Direct where + +import Common.Annex +import qualified Git +import qualified Git.LsFiles +import qualified Git.UpdateIndex +import qualified Git.HashObject +import qualified Annex.Queue +import Git.Types +import Annex.CatFile +import Logs.Location +import Backend +import Types.KeySource +import Annex.Content +import Annex.Content.Direct + +{- Uses git ls-files to find files that need to be committed, and stages + - them into the index. Returns True if some changes were staged. -} +stageDirect :: Annex Bool +stageDirect = do + Annex.Queue.flush + top <- fromRepo Git.repoPath + (l, cleanup) <- inRepo $ Git.LsFiles.stagedDetails [top] + forM_ l go + void $ liftIO cleanup + staged <- Annex.Queue.size + Annex.Queue.flush + return $ staged /= 0 + where + {- Determine what kind of modified or deleted file this is, as + - efficiently as we can, by getting any key that's associated + - with it in git, as well as its stat info. -} + go (file, Just sha) = do + mkey <- catKey sha + mstat <- liftIO $ catchMaybeIO $ getSymbolicLinkStatus file + case (mkey, mstat, toCache =<< mstat) of + (Just key, _, Just cache) -> do + {- All direct mode files will show as + - modified, so compare the cache to see if + - it really was. -} + oldcache <- recordedCache key + when (oldcache /= Just cache) $ + modifiedannexed file key cache + (Just key, Nothing, _) -> deletedannexed file key + (Nothing, Nothing, _) -> deletegit file + (_, Just _, _) -> addgit file + go (file, Nothing) = do + mstat <- liftIO $ catchMaybeIO $ getSymbolicLinkStatus file + case (mstat, toCache =<< mstat) of + (Nothing, _) -> noop + (Just stat, Just cache) + | isSymbolicLink stat -> addgit file + | otherwise -> void $ addDirect file cache + (Just stat, Nothing) + | isSymbolicLink stat -> addgit file + | otherwise -> noop + + modifiedannexed file oldkey cache = do + void $ removeAssociatedFile oldkey file + void $ addDirect file cache + + deletedannexed file key = do + void $ removeAssociatedFile key file + deletegit file + + addgit file = Annex.Queue.addCommand "add" [Param "-f"] [file] + + deletegit file = Annex.Queue.addCommand "rm" [Param "-f"] [file] + +{- Adds a file to the annex in direct mode. Can fail, if the file is + - modified or deleted while it's being added. -} +addDirect :: FilePath -> Cache -> Annex Bool +addDirect file cache = do + showStart "add" file + let source = KeySource + { keyFilename = file + , contentLocation = file + } + got =<< genKey source =<< chooseBackend file + where + got Nothing = do + showEndFail + return False + got (Just (key, _)) = ifM (compareCache file $ Just cache) + ( do + link <- calcGitLink file key + sha <- inRepo $ Git.HashObject.hashObject BlobObject link + Annex.Queue.addUpdateIndex =<< + inRepo (Git.UpdateIndex.stageSymlink file sha) + writeCache key cache + void $ addAssociatedFile key file + logStatus key InfoPresent + showEndOk + return True + , do + showEndFail + return False + ) diff --git a/Backend.hs b/Backend.hs index b66e6130e..1e3d8f94f 100644 --- a/Backend.hs +++ b/Backend.hs @@ -52,8 +52,7 @@ orderedList = do parseBackendList s = map lookupBackendName $ words s {- Generates a key for a file, trying each backend in turn until one - - accepts it. - -} + - accepts it. -} genKey :: KeySource -> Maybe Backend -> Annex (Maybe (Key, Backend)) genKey source trybackend = do bs <- orderedList @@ -94,8 +93,7 @@ lookupFile file = do return Nothing {- Looks up the backend that should be used for a file. - - That can be configured on a per-file basis in the gitattributes file. - -} + - That can be configured on a per-file basis in the gitattributes file. -} chooseBackend :: FilePath -> Annex (Maybe Backend) chooseBackend f = Annex.getState Annex.forcebackend >>= go where diff --git a/Command/SendKey.hs b/Command/SendKey.hs index ccbfa9030..82c159f66 100644 --- a/Command/SendKey.hs +++ b/Command/SendKey.hs @@ -23,9 +23,8 @@ seek = [withKeys start] start :: Key -> CommandStart start key = ifM (inAnnex key) - ( fieldTransfer Upload key $ \_p -> do - file <- inRepo $ gitAnnexLocation key - liftIO $ rsyncServerSend file + ( fieldTransfer Upload key $ \_p -> + sendAnnex key $ liftIO . rsyncServerSend , do warning "requested key is not present" liftIO exitFailure diff --git a/Command/Sync.hs b/Command/Sync.hs index f7410112e..7e3769864 100644 --- a/Command/Sync.hs +++ b/Command/Sync.hs @@ -15,6 +15,8 @@ import qualified Annex import qualified Annex.Branch import qualified Annex.Queue import Annex.Content +import Annex.Content.Direct +import Annex.Direct import Annex.CatFile import qualified Git.Command import qualified Git.LsFiles as LsFiles @@ -28,7 +30,6 @@ import qualified Remote.Git import Types.Key import Config -import qualified Data.ByteString.Lazy as L import Data.Hash.MD5 def :: [Command] @@ -78,14 +79,20 @@ syncRemotes rs = ifM (Annex.getState Annex.fast) ( nub <$> pickfast , wanted ) fastest = fromMaybe [] . headMaybe . Remote.byCost commit :: CommandStart -commit = do - showStart "commit" "" - next $ next $ do +commit = next $ next $ do + Annex.Branch.commit "update" + ifM isDirect + ( ifM stageDirect + ( runcommit [] , return True ) + , runcommit [Param "-a"] + ) + where + runcommit ps = do + showStart "commit" "" showOutput - Annex.Branch.commit "update" -- Commit will fail when the tree is clean, so ignore failure. - _ <- inRepo $ Git.Command.runBool "commit" - [Param "-a", Param "-m", Param "git-annex automatic sync"] + _ <- inRepo $ Git.Command.runBool "commit" $ ps ++ + [Param "-m", Param "git-annex automatic sync"] return True mergeLocal :: Git.Ref -> CommandStart @@ -172,13 +179,31 @@ mergeAnnex = do void $ Annex.Branch.forceUpdate stop +{- Merges from a branch into the current branch. + - + - In direct mode, updates associated files mappings for the files that + - were changed by the merge. -} mergeFrom :: Git.Ref -> Annex Bool -mergeFrom branch = do - showOutput - ok <- inRepo $ Git.Merge.mergeNonInteractive branch - if ok - then return ok - else resolveMerge +mergeFrom branch = ifM isDirect + ( maybe go godirect =<< inRepo Git.Branch.current + , go + ) + where + go = do + showOutput + ok <- inRepo $ Git.Merge.mergeNonInteractive branch + if ok + then return ok + else resolveMerge + godirect currbranch = do + old <- inRepo $ Git.Ref.sha currbranch + r <- go + new <- inRepo $ Git.Ref.sha currbranch + case (old, new) of + (Just oldsha, Just newsha) -> do + updateAssociatedFiles oldsha newsha + _ -> noop + return r {- Resolves a conflicted merge. It's important that any conflicts be - resolved in a way that itself avoids later merge conflicts, since @@ -240,9 +265,7 @@ resolveMerge' u case msha of Nothing -> a Nothing Just sha -> do - key <- fileKey . takeFileName - . encodeW8 . L.unpack - <$> catObject sha + key <- catKey sha maybe (return False) (a . Just) key {- The filename to use when resolving a conflicted merge of a file, @@ -116,6 +116,11 @@ getDiskReserve = fromMaybe megabyte . readSize dataUnits where megabyte = 1000000 +{- Gets annex.direct setting. -} +isDirect :: Annex Bool +isDirect = fromMaybe False . Git.Config.isTrue <$> + getConfig (annexConfig "direct") "" + {- Gets annex.httpheaders or annex.httpheaders-command setting, - splitting it into lines. -} getHttpHeaders :: Annex [String] diff --git a/Git/DiffTree.hs b/Git/DiffTree.hs new file mode 100644 index 000000000..7281255f5 --- /dev/null +++ b/Git/DiffTree.hs @@ -0,0 +1,64 @@ +{- git diff-tree interface + - + - Copyright 2012 Joey Hess <joey@kitenet.net> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Git.DiffTree ( + DiffTreeItem(..), + diffTree, + parseDiffTree +) where + +import Numeric +import System.Posix.Types + +import Common +import Git +import Git.Sha +import Git.Command +import qualified Git.Filename + +data DiffTreeItem = DiffTreeItem + { srcmode :: FileMode + , dstmode :: FileMode + , srcsha :: Sha -- nullSha if file was added + , dstsha :: Sha -- nullSha if file was deleted + , status :: String + , file :: FilePath + } deriving Show + +{- Diffs two tree Refs. -} +diffTree :: Ref -> Ref -> Repo -> IO ([DiffTreeItem], IO Bool) +diffTree src dst repo = do + (diff, cleanup) <- pipeNullSplit [Params "diff-tree -z --raw --no-renames -l0", Param (show src), Param (show dst)] repo + return (parseDiffTree diff, cleanup) + +{- Parses diff-tree output. -} +parseDiffTree :: [String] -> [DiffTreeItem] +parseDiffTree l = go l [] + where + go [] c = c + go (info:f:rest) c = go rest (mk info f : c) + go (s:[]) _ = error $ "diff-tree parse error " ++ s + + mk info f = DiffTreeItem + { srcmode = readmode srcm + , dstmode = readmode dstm + , srcsha = fromMaybe (error "bad srcsha") $ extractSha ssha + , dstsha = fromMaybe (error "bad dstsha") $ extractSha dsha + , status = s + , file = Git.Filename.decode f + } + where + readmode = fst . Prelude.head . readOct + + -- info = :<srcmode> SP <dstmode> SP <srcsha> SP <dstsha> SP <status> + -- All fields are fixed, so we can pull them out of + -- specific positions in the line. + (srcm, past_srcm) = splitAt 7 $ drop 1 info + (dstm, past_dstm) = splitAt 7 past_srcm + (ssha, past_ssha) = splitAt shaSize past_dstm + (dsha, past_dsha) = splitAt shaSize $ drop 1 past_ssha + s = drop 1 past_dsha diff --git a/Git/LsFiles.hs b/Git/LsFiles.hs index 6d42d77ed..45e105a3b 100644 --- a/Git/LsFiles.hs +++ b/Git/LsFiles.hs @@ -10,7 +10,7 @@ module Git.LsFiles ( notInRepo, staged, stagedNotDeleted, - changedUnstaged, + stagedDetails, typeChanged, typeChangedStaged, Conflicting(..), @@ -53,6 +53,22 @@ staged' ps l = pipeNullSplit $ prefix ++ ps ++ suffix prefix = [Params "diff --cached --name-only -z"] suffix = Param "--" : map File l +{- Returns details about files that are staged in the index + - (including the Sha of their staged contents), + - as well as files not yet in git. -} +stagedDetails :: [FilePath] -> Repo -> IO ([(FilePath, Maybe Sha)], IO Bool) +stagedDetails l repo = do + (ls, cleanup) <- pipeNullSplit params repo + return (map parse ls, cleanup) + where + params = [Params "ls-files --others --exclude-standard --stage -z --"] ++ + map File l + parse s + | null file = (s, Nothing) + | otherwise = (file, extractSha $ take shaSize $ drop 7 metadata) + where + (metadata, file) = separate (== '\t') s + {- Returns a list of files that have unstaged changes. -} changedUnstaged :: [FilePath] -> Repo -> IO ([FilePath], IO Bool) changedUnstaged l = pipeNullSplit params diff --git a/Git/LsTree.hs b/Git/LsTree.hs index 611793c40..c61ae7fab 100644 --- a/Git/LsTree.hs +++ b/Git/LsTree.hs @@ -19,6 +19,7 @@ import System.Posix.Types import Common import Git import Git.Command +import Git.Sha import qualified Git.Filename data TreeItem = TreeItem @@ -53,5 +54,5 @@ parseLsTree l = TreeItem -- specific positions in the line. (m, past_m) = splitAt 7 l (t, past_t) = splitAt 4 past_m - (s, past_s) = splitAt 40 $ Prelude.tail past_t + (s, past_s) = splitAt shaSize $ Prelude.tail past_t f = Prelude.tail past_s diff --git a/Locations.hs b/Locations.hs index db97bbec7..cfe9bd27d 100644 --- a/Locations.hs +++ b/Locations.hs @@ -11,6 +11,8 @@ module Locations ( keyPaths, keyPath, gitAnnexLocation, + gitAnnexMapping, + gitAnnexCache, annexLocations, annexLocation, gitAnnexDir, @@ -107,6 +109,21 @@ gitAnnexLocation key r check locs@(l:_) = fromMaybe l <$> firstM doesFileExist locs check [] = error "internal" +{- File that maps from a key to the file(s) in the git repository. + - Used in direct mode. -} +gitAnnexMapping :: Key -> Git.Repo -> IO FilePath +gitAnnexMapping key r = do + loc <- gitAnnexLocation key r + return $ loc ++ ".map" + +{- File that caches information about a key's content, used to determine + - if a file has changed. + - Used in direct mode. -} +gitAnnexCache :: Key -> Git.Repo -> IO FilePath +gitAnnexCache key r = do + loc <- gitAnnexLocation key r + return $ loc ++ ".cache" + {- The annex directory of a repository. -} gitAnnexDir :: Git.Repo -> FilePath gitAnnexDir r = addTrailingPathSeparator $ Git.localGitDir r </> annexDir diff --git a/Logs/Location.hs b/Logs/Location.hs index 4273710fc..0f57b6663 100644 --- a/Logs/Location.hs +++ b/Logs/Location.hs @@ -15,6 +15,7 @@ module Logs.Location ( LogStatus(..), + logStatus, logChange, loggedLocations, loggedKeys, @@ -26,6 +27,13 @@ module Logs.Location ( import Common.Annex import qualified Annex.Branch import Logs.Presence +import Annex.UUID + +{- Log a change in the presence of a key's value in current repository. -} +logStatus :: Key -> LogStatus -> Annex () +logStatus key status = do + u <- getUUID + logChange key u status {- Log a change in the presence of a key's value in a repository. -} logChange :: Key -> UUID -> LogStatus -> Annex () @@ -51,7 +51,7 @@ import qualified Annex import Annex.UUID import Logs.UUID import Logs.Trust -import Logs.Location +import Logs.Location hiding (logStatus) import Remote.List import qualified Git diff --git a/Remote/Git.hs b/Remote/Git.hs index 0933a1cae..952fbf29f 100644 --- a/Remote/Git.hs +++ b/Remote/Git.hs @@ -37,6 +37,7 @@ import Config import Init import Types.Key import qualified Fields +import Logs.Location import Control.Concurrent import Control.Concurrent.MSampleVar @@ -243,7 +244,7 @@ dropKey r key whenM (Annex.Content.inAnnex key) $ do Annex.Content.lockContent key $ Annex.Content.removeAnnex key - Annex.Content.logStatus key InfoMissing + logStatus key InfoMissing Annex.Content.saveState True return True | Git.repoIsHttp r = error "dropping from http repo not supported" @@ -262,9 +263,9 @@ copyFromRemote r key file dest -- run copy from perspective of remote liftIO $ onLocal r $ do ensureInitialized - loc <- inRepo $ gitAnnexLocation key - upload u key file noRetry $ - rsyncOrCopyFile params loc dest + Annex.Content.sendAnnex key $ \object -> + upload u key file noRetry $ + rsyncOrCopyFile params object dest | Git.repoIsSsh r = feedprogressback $ \feeder -> rsyncHelper (Just feeder) =<< rsyncParamsRemote r True key dest file @@ -324,8 +325,12 @@ copyFromRemoteCheap r key file {- Tries to copy a key's content to a remote's annex. -} copyToRemote :: Git.Repo -> Key -> AssociatedFile -> MeterUpdate -> Annex Bool copyToRemote r key file p - | not $ Git.repoIsUrl r = guardUsable r False $ commitOnCleanup r $ do - keysrc <- inRepo $ gitAnnexLocation key + | not $ Git.repoIsUrl r = guardUsable r False $ commitOnCleanup r $ copylocal + | Git.repoIsSsh r = commitOnCleanup r $ Annex.Content.sendAnnex key $ \object -> + rsyncHelper (Just p) =<< rsyncParamsRemote r False key object file + | otherwise = error "copying to non-ssh repo not supported" + where + copylocal = Annex.Content.sendAnnex key $ \object -> do params <- rsyncParams r u <- getUUID -- run copy from perspective of remote @@ -336,12 +341,8 @@ copyToRemote r key file p download u key file noRetry $ Annex.Content.saveState True `after` Annex.Content.getViaTmp key - (\d -> rsyncOrCopyFile params keysrc d p) + (\d -> rsyncOrCopyFile params object d p) ) - | Git.repoIsSsh r = commitOnCleanup r $ do - keysrc <- inRepo $ gitAnnexLocation key - rsyncHelper (Just p) =<< rsyncParamsRemote r False key keysrc file - | otherwise = error "copying to non-ssh repo not supported" rsyncHelper :: Maybe MeterUpdate -> [CommandParam] -> Annex Bool rsyncHelper callback params = do diff --git a/doc/design/assistant/desymlink.mdwn b/doc/design/assistant/desymlink.mdwn index 2f49c8304..5498b3691 100644 --- a/doc/design/assistant/desymlink.mdwn +++ b/doc/design/assistant/desymlink.mdwn @@ -43,11 +43,12 @@ is converted to a real file when it becomes present. ## concrete design -* Enable with annex.nosymlink or such config option. -* Use .git/ for the git repo, but `.git/annex/objects` won't be used. +* Enable with annex.direct +* Use .git/ for the git repo, but `.git/annex/objects` won't be used + for object storage. * `git status` and similar will show all files as type changed, and `git commit` would be a very bad idea. Just don't support users running - git commands that affect the repository in this mode. + git commands that affect the repository in this mode. Probably. * However, `git status` and similar also will show deleted and new files, which will be helpful for the assistant to use when starting up. * Cache the mtime, size etc of files, and use this to detect when they've been @@ -61,6 +62,8 @@ is converted to a real file when it becomes present. can map to multiple files. And that when a file is deleted or moved, the mapping needs to be updated. * May need a reverse mapping, from files in the tree to keys? TBD + (Needed to make things like `git annex drop` that want to map from the + file back to the key work.) * The existing watch code detects when a file gets closed, and in this mode, it could be a new file, or a modified file, or an unchanged file. For a modified file, can compare mtime, size, etc, to see if it needs @@ -73,6 +76,10 @@ is converted to a real file when it becomes present. to files in this remote would not be noticed and committed, unless a git-annex command were added to do so. Getting it basically working as a remote would be a good 1st step. +* It could also be used without the assistant as a repository that + the user uses directly. Would need some git-annex commands + to merge changes into the repo, update caches, and commit changes. + This could all be done by "git annex sync". ## TODO diff --git a/doc/direct_mode.mdwn b/doc/direct_mode.mdwn new file mode 100644 index 000000000..095f15d5a --- /dev/null +++ b/doc/direct_mode.mdwn @@ -0,0 +1,63 @@ +Normally, git-annex repositories consist of symlinks that are checked into +git, and in turn point at the content of large files that is stored in +`.git/annex/objects/`. Direct mode is an experimental mode that gets rid of +the symlinks. + +The advantage of direct mode is that you can access files directly, +including modifying them. The disadvantage is that most regular git +commands cannot safely be used, and only a subset of git-annex commands +can be used. + +## make a direct mode repository + +To make a repository using direct mode, either make a fresh clone of an +existing repository, or start a new repository. Then configure direct mode: +`git config annex.direct true` + +You're strongly encouraged to tell git-annex that direct mode repositories +cannot be trusted to retain the content of a file (because it can be +deleted or modified at any time). To do so: `git annex untrust .` + +## use a direct mode repository + +The main command that's used in direct mode repositories is +`git annex sync`. This automatically adds new files, commits all +changed files to git, pushes them out, pulls down any changes, etc. + +You can also run `git annex get` to transfer the content of files into your +direct mode repository. Or if the direct mode repository is a remote of +some other, regular git-annex repository, you can use commands like `git +annex copy` and `git annex move` to transfer the contents of files to the +direct mode repository. + +You can use `git commit --staged`. (But not `git commit -a` .. It'll commit +whole large files into git!) + +You can use `git log` and other git query commands. + +## what doesn't work in direct mode + +Don't use `git annex add` -- it thinks all direct mode files are unlocked, +and locks them. + +In general git-annex commands will only work in direct mode repositories on +files whose content is not present. That's because such files are still +represented as symlinks, which git-annex commands know how to operate on. +So, `git annex get` works, but `git annex drop` and `git annex move` don't, +and things like `git annex fsck` and `git annex status` show incomplete +information. + +It's technically possible to make all git-annex commands work in direct +mode repositories, so this might change. Check back to this page to see +current status about what works and what doesn't. + +As for git commands, you can probably use some git working tree +manipulation commands, like `git checkout` and `git revert` in useful +ways... But beware, these commands can replace files that are present in +your repository with broken symlinks. If that file was the only copy you +had of something, it'll be lost. + +This is one reason it's wise to make git-annex untrust your direct mode +repositories. Still, you can lose data using these sort of git commands, so +use extreme caution. With direct mode, you're operating without large +swathes of git-annex's carefully constructed safety net. diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index 5a69fe496..2fbfc5b16 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -776,6 +776,12 @@ Here are all the supported configuration settings. to close it. On Mac OSX, this defaults to 1 second, to work around a bad interaction with software there. +* `annex.direct` + + Set to true to enable an (experimental) mode where files in the repository + are accessed directly, rather than through symlinks. Note that many git + and git-annex commands will not work with such a repository. + * `remote.<name>.annex-cost` When determining which repository to |