summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Annex/CatFile.hs5
-rw-r--r--Annex/Content.hs181
-rw-r--r--Annex/Content/Direct.hs158
-rw-r--r--Annex/Direct.hs105
-rw-r--r--Backend.hs6
-rw-r--r--Command/SendKey.hs5
-rw-r--r--Command/Sync.hs55
-rw-r--r--Config.hs5
-rw-r--r--Git/DiffTree.hs64
-rw-r--r--Git/LsFiles.hs18
-rw-r--r--Git/LsTree.hs3
-rw-r--r--Locations.hs17
-rw-r--r--Logs/Location.hs8
-rw-r--r--Remote.hs2
-rw-r--r--Remote/Git.hs23
-rw-r--r--doc/design/assistant/desymlink.mdwn13
-rw-r--r--doc/direct_mode.mdwn63
-rw-r--r--doc/git-annex.mdwn6
18 files changed, 653 insertions, 84 deletions
diff --git a/Annex/CatFile.hs b/Annex/CatFile.hs
index 98d1a219f..161554f29 100644
--- a/Annex/CatFile.hs
+++ b/Annex/CatFile.hs
@@ -9,6 +9,7 @@ module Annex.CatFile (
catFile,
catObject,
catObjectDetails,
+ catKey,
catFileHandle
) where
@@ -42,3 +43,7 @@ catFileHandle = maybe startup return =<< Annex.getState Annex.catfilehandle
h <- inRepo Git.CatFile.catFileStart
Annex.changeState $ \s -> s { Annex.catfilehandle = Just h }
return h
+
+{- From the Sha of a symlink back to the key. -}
+catKey :: Sha -> Annex (Maybe Key)
+catKey sha = fileKey . takeFileName . encodeW8 . L.unpack <$> catObject sha
diff --git a/Annex/Content.hs b/Annex/Content.hs
index 887729fee..5c902e8a9 100644
--- a/Annex/Content.hs
+++ b/Annex/Content.hs
@@ -1,6 +1,6 @@
{- git-annex file content managing
-
- - Copyright 2010 Joey Hess <joey@kitenet.net>
+ - Copyright 2010,2012 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@@ -10,12 +10,12 @@ module Annex.Content (
inAnnexSafe,
lockContent,
calcGitLink,
- logStatus,
getViaTmp,
getViaTmpUnchecked,
withTmp,
checkDiskSpace,
moveAnnex,
+ sendAnnex,
removeAnnex,
fromAnnex,
moveBad,
@@ -32,7 +32,6 @@ import System.IO.Unsafe (unsafeInterleaveIO)
import Common.Annex
import Logs.Location
-import Annex.UUID
import qualified Git
import qualified Git.Config
import qualified Annex
@@ -48,21 +47,40 @@ import Config
import Annex.Exception
import Git.SharedRepository
import Annex.Perms
+import Annex.Content.Direct
{- Checks if a given key's content is currently present. -}
inAnnex :: Key -> Annex Bool
-inAnnex = inAnnex' doesFileExist
-inAnnex' :: (FilePath -> IO a) -> Key -> Annex a
-inAnnex' a key = do
- whenM (fromRepo Git.repoIsUrl) $
- error "inAnnex cannot check remote repo"
- inRepo $ \g -> gitAnnexLocation key g >>= a
+inAnnex = inAnnex' id False $ liftIO . doesFileExist
+
+{- Generic inAnnex, handling both indirect and direct mode.
+ -
+ - In direct mode, at least one of the associated files must pass the
+ - check. Additionally, the file must be unmodified.
+ -}
+inAnnex' :: (a -> Bool) -> a -> (FilePath -> Annex a) -> Key -> Annex a
+inAnnex' isgood bad check key = withObjectLoc key checkindirect checkdirect
+ where
+ checkindirect loc = do
+ whenM (fromRepo Git.repoIsUrl) $
+ error "inAnnex cannot check remote repo"
+ check loc
+ checkdirect [] = return bad
+ checkdirect (loc:locs) = do
+ r <- check loc
+ if isgood r
+ then ifM (goodContent key loc)
+ ( return r
+ , checkdirect locs
+ )
+ else checkdirect locs
{- A safer check; the key's content must not only be present, but
- is not in the process of being removed. -}
inAnnexSafe :: Key -> Annex (Maybe Bool)
-inAnnexSafe = inAnnex' $ \f -> openforlock f >>= check
+inAnnexSafe = inAnnex' (maybe False id) (Just False) go
where
+ go f = liftIO $ openforlock f >>= check
openforlock f = catchMaybeIO $
openFd f ReadOnly Nothing defaultFileFlags
check Nothing = return is_missing
@@ -112,13 +130,6 @@ calcGitLink file key = do
where
whoops = error $ "unable to normalize " ++ file
-{- Updates the Logs.Location when a key's presence changes in the current
- - repository. -}
-logStatus :: Key -> LogStatus -> Annex ()
-logStatus key status = do
- u <- getUUID
- logChange key u status
-
{- Runs an action, passing it a temporary filename to get,
- and if the action succeeds, moves the temp file into
- the annex as a key's content. -}
@@ -151,10 +162,10 @@ prepTmp key = do
- and not being copied into place. -}
getViaTmpUnchecked :: Key -> (FilePath -> Annex Bool) -> Annex Bool
getViaTmpUnchecked key action = do
- tmp <- prepTmp key
- ifM (action tmp)
+ tmpfile <- prepTmp key
+ ifM (action tmpfile)
( do
- moveAnnex key tmp
+ moveAnnex key tmpfile
logStatus key InfoPresent
return True
, do
@@ -194,7 +205,9 @@ checkDiskSpace destination key alreadythere = do
" more" ++ forcemsg
forcemsg = " (use --force to override this check or adjust annex.diskreserve)"
-{- Moves a file into .git/annex/objects/
+{- Moves a key's content into .git/annex/objects/
+ -
+ - In direct mode, moves it to the associated file, or files.
-
- What if the key there already has content? This could happen for
- various reasons; perhaps the same content is being annexed again.
@@ -216,22 +229,85 @@ checkDiskSpace destination key alreadythere = do
- meet.
-}
moveAnnex :: Key -> FilePath -> Annex ()
-moveAnnex key src = do
- dest <- inRepo $ gitAnnexLocation key
- ifM (liftIO $ doesFileExist dest)
- ( liftIO $ removeFile src
- , do
- createContentDir dest
- liftIO $ moveFile src dest
- freezeContent dest
- freezeContentDir dest
- )
+moveAnnex key src = withObjectLoc key storeobject storedirect
+ where
+ storeobject dest = do
+ ifM (liftIO $ doesFileExist dest)
+ ( liftIO $ removeFile src
+ , do
+ createContentDir dest
+ liftIO $ moveFile src dest
+ freezeContent dest
+ freezeContentDir dest
+ )
+ storedirect [] = storeobject =<< inRepo (gitAnnexLocation key)
+ storedirect (dest:fs) = do
+ updateCache key src
+ thawContent src
+ liftIO $ replaceFile dest $ moveFile src
+ liftIO $ forM_ fs $ \f -> replaceFile f $ createLink dest
+
+{- Replaces any existing file with a new version, by running an action.
+ - First, makes sure the file is deleted. Or, if it didn't already exist,
+ - makes sure the parent directory exists. -}
+replaceFile :: FilePath -> (FilePath -> IO ()) -> IO ()
+replaceFile file a = do
+ r <- tryIO $ removeFile file
+ case r of
+ Left _ -> createDirectoryIfMissing True (parentDir file)
+ _ -> noop
+ a file
+
+{- Runs an action to transfer an object's content.
+ -
+ - In direct mode, it's possible for the file to change as it's being sent.
+ - If this happens, returns False. Currently, an arbitrary amount of bad
+ - data may be sent when this occurs. The send is not retried even if
+ - another file is known to have the same content; the action may not be
+ - idempotent.
+ -
+ - Since objects changing as they're transferred is a somewhat unusual
+ - situation, and since preventing writes to the file would be expensive,
+ - annoying or both, we instead detect the situation after the affect,
+ - and fail. Thus, it's up to the caller to detect a failure and take
+ - appropriate action. Such as, for example, ensuring that the bad
+ - data that was sent does not get installed into the annex it's being
+ - sent to.
+ -}
+sendAnnex :: Key -> (FilePath -> Annex Bool) -> Annex Bool
+sendAnnex key a = withObjectLoc key sendobject senddirect
+ where
+ sendobject = a
+ senddirect [] = return False
+ senddirect (f:fs) = do
+ cache <- recordedCache key
+ -- check that we have a good file
+ ifM (compareCache f cache)
+ ( do
+ r <- sendobject f
+ -- see if file changed while it was being sent
+ ok <- compareCache f cache
+ return (r && ok)
+ , senddirect fs
+ )
+
+{- Performs an action, passing it the location to use for a key's content.
+ -
+ - In direct mode, the associated files will be passed. But, if there are
+ - no associated files for a key, the indirect mode action will be
+ - performed instead. -}
+withObjectLoc :: Key -> (FilePath -> Annex a) -> ([FilePath] -> Annex a) -> Annex a
+withObjectLoc key indirect direct = ifM isDirect
+ ( do
+ fs <- associatedFiles key
+ if null fs
+ then goindirect
+ else direct fs
+ , goindirect
+ )
+ where
+ goindirect = indirect =<< inRepo (gitAnnexLocation key)
-withObjectLoc :: Key -> ((FilePath, FilePath) -> Annex a) -> Annex a
-withObjectLoc key a = do
- file <- inRepo $ gitAnnexLocation key
- let dir = parentDir file
- a (dir, file)
cleanObjectLoc :: Key -> Annex ()
cleanObjectLoc key = do
@@ -244,18 +320,35 @@ cleanObjectLoc key = do
maybe noop (const $ removeparents dir (n-1))
<=< catchMaybeIO $ removeDirectory dir
-{- Removes a key's file from .git/annex/objects/ -}
+{- Removes a key's file from .git/annex/objects/
+ -
+ - In direct mode, deletes the associated files or files, and replaces
+ - them with symlinks. -}
removeAnnex :: Key -> Annex ()
-removeAnnex key = withObjectLoc key $ \(dir, file) -> do
- liftIO $ do
- allowWrite dir
- removeFile file
- cleanObjectLoc key
+removeAnnex key = withObjectLoc key remove removedirect
+ where
+ remove file = do
+ liftIO $ do
+ allowWrite $ parentDir file
+ removeFile file
+ cleanObjectLoc key
+ removedirect fs = do
+ removeCache key
+ mapM_ resetfile fs
+ resetfile f = do
+ l <- calcGitLink f key
+ top <- fromRepo Git.repoPath
+ cwd <- liftIO getCurrentDirectory
+ let top' = fromMaybe top $ absNormPath cwd top
+ let l' = relPathDirToFile top' (fromMaybe l $ absNormPath top' l)
+ liftIO $ replaceFile f $ const $
+ createSymbolicLink l' f
{- Moves a key's file out of .git/annex/objects/ -}
fromAnnex :: Key -> FilePath -> Annex ()
-fromAnnex key dest = withObjectLoc key $ \(dir, file) -> do
- liftIO $ allowWrite dir
+fromAnnex key dest = do
+ file <- inRepo $ gitAnnexLocation key
+ liftIO $ allowWrite $ parentDir file
thawContent file
liftIO $ moveFile file dest
cleanObjectLoc key
diff --git a/Annex/Content/Direct.hs b/Annex/Content/Direct.hs
new file mode 100644
index 000000000..f6a564bf0
--- /dev/null
+++ b/Annex/Content/Direct.hs
@@ -0,0 +1,158 @@
+{- git-annex file content managing for direct mode
+ -
+ - Copyright 2010,2012 Joey Hess <joey@kitenet.net>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+module Annex.Content.Direct (
+ associatedFiles,
+ removeAssociatedFile,
+ addAssociatedFile,
+ updateAssociatedFiles,
+ goodContent,
+ updateCache,
+ recordedCache,
+ compareCache,
+ writeCache,
+ removeCache,
+ genCache,
+ toCache,
+ Cache
+) where
+
+import Common.Annex
+import qualified Git
+import qualified Git.DiffTree as DiffTree
+import Git.Sha
+import Annex.CatFile
+import Utility.TempFile
+import Utility.FileMode
+import Logs.Location
+
+import System.Posix.Types
+
+{- Files in the tree that are associated with a key. -}
+associatedFiles :: Key -> Annex [FilePath]
+associatedFiles key = do
+ files <- associatedFilesRelative key
+ top <- fromRepo Git.repoPath
+ return $ map (top </>) files
+
+{- List of files in the tree that are associated with a key, relative to
+ - the top of the repo. -}
+associatedFilesRelative :: Key -> Annex [FilePath]
+associatedFilesRelative key = do
+ mapping <- inRepo $ gitAnnexMapping key
+ liftIO $ catchDefaultIO [] $ lines <$> readFile mapping
+
+{- Changes the associated files information for a key, applying a
+ - transformation to the list. Returns a copy of the new info. -}
+changeAssociatedFiles :: Key -> ([FilePath] -> [FilePath]) -> Annex [FilePath]
+changeAssociatedFiles key transform = do
+ mapping <- inRepo $ gitAnnexMapping key
+ files <- associatedFilesRelative key
+ let files' = transform files
+ when (files /= files') $
+ liftIO $ viaTmp writeFile mapping $ unlines files'
+ return files'
+
+removeAssociatedFile :: Key -> FilePath -> Annex [FilePath]
+removeAssociatedFile key file = do
+ fs <- changeAssociatedFiles key $ filter (/= file)
+ when (null fs) $
+ logStatus key InfoMissing
+ return fs
+
+addAssociatedFile :: Key -> FilePath -> Annex [FilePath]
+addAssociatedFile key file = changeAssociatedFiles key $ \files ->
+ if file `elem` files
+ then files
+ else file:files
+
+{- Uses git diff-tree to find files changed between two tree Shas, and
+ - updates the associated file mappings, efficiently. -}
+updateAssociatedFiles :: Git.Sha -> Git.Sha -> Annex ()
+updateAssociatedFiles oldsha newsha = do
+ (items, cleanup) <- inRepo $ DiffTree.diffTree oldsha newsha
+ forM_ items update
+ void $ liftIO $ cleanup
+ where
+ update item = do
+ go DiffTree.dstsha DiffTree.dstmode addAssociatedFile
+ go DiffTree.srcsha DiffTree.srcmode removeAssociatedFile
+ where
+ go getsha getmode a =
+ when (getsha item /= nullSha && isSymLink (getmode item)) $ do
+ key <- catKey (getsha item)
+ maybe noop (\k -> void $ a k $ DiffTree.file item) key
+
+{- Checks if a file in the tree, associated with a key, has not been modified.
+ -
+ - To avoid needing to fsck the file's content, which can involve an
+ - expensive checksum, this relies on a cache that contains the file's
+ - expected mtime and inode.
+ -}
+goodContent :: Key -> FilePath -> Annex Bool
+goodContent key file = do
+ old <- recordedCache key
+ compareCache file old
+
+{- Gets the recorded cache for a key. -}
+recordedCache :: Key -> Annex (Maybe Cache)
+recordedCache key = withCacheFile key $ \cachefile ->
+ catchDefaultIO Nothing $ readCache <$> readFile cachefile
+
+{- Compares a cache with the current cache for a file. -}
+compareCache :: FilePath -> Maybe Cache -> Annex Bool
+compareCache file old = do
+ curr <- liftIO $ genCache file
+ return $ isJust curr && curr == old
+
+{- Stores a cache of attributes for a file that is associated with a key. -}
+updateCache :: Key -> FilePath -> Annex ()
+updateCache key file = maybe noop (writeCache key) =<< liftIO (genCache file)
+
+{- Writes a cache for a key. -}
+writeCache :: Key -> Cache -> Annex ()
+writeCache key cache = withCacheFile key $ \cachefile -> do
+ createDirectoryIfMissing True (parentDir cachefile)
+ writeFile cachefile $ showCache cache
+
+{- Removes a cache. -}
+removeCache :: Key -> Annex ()
+removeCache key = withCacheFile key nukeFile
+
+{- Cache a file's inode, size, and modification time to determine if it's
+ - been changed. -}
+data Cache = Cache FileID FileOffset EpochTime
+ deriving (Eq, Show)
+
+showCache :: Cache -> String
+showCache (Cache inode size mtime) = unwords
+ [ show inode
+ , show size
+ , show mtime
+ ]
+
+readCache :: String -> Maybe Cache
+readCache s = case words s of
+ (inode:size:mtime:_) -> Cache
+ <$> readish inode
+ <*> readish size
+ <*> readish mtime
+ _ -> Nothing
+
+genCache :: FilePath -> IO (Maybe Cache)
+genCache f = catchDefaultIO Nothing $ toCache <$> getFileStatus f
+
+toCache :: FileStatus -> Maybe Cache
+toCache s
+ | isRegularFile s = Just $ Cache
+ (fileID s)
+ (fileSize s)
+ (modificationTime s)
+ | otherwise = Nothing
+
+withCacheFile :: Key -> (FilePath -> IO a) -> Annex a
+withCacheFile key a = liftIO . a =<< inRepo (gitAnnexCache key)
diff --git a/Annex/Direct.hs b/Annex/Direct.hs
new file mode 100644
index 000000000..12984687e
--- /dev/null
+++ b/Annex/Direct.hs
@@ -0,0 +1,105 @@
+{- git-annex direct mode
+ -
+ - Copyright 2012 Joey Hess <joey@kitenet.net>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+module Annex.Direct where
+
+import Common.Annex
+import qualified Git
+import qualified Git.LsFiles
+import qualified Git.UpdateIndex
+import qualified Git.HashObject
+import qualified Annex.Queue
+import Git.Types
+import Annex.CatFile
+import Logs.Location
+import Backend
+import Types.KeySource
+import Annex.Content
+import Annex.Content.Direct
+
+{- Uses git ls-files to find files that need to be committed, and stages
+ - them into the index. Returns True if some changes were staged. -}
+stageDirect :: Annex Bool
+stageDirect = do
+ Annex.Queue.flush
+ top <- fromRepo Git.repoPath
+ (l, cleanup) <- inRepo $ Git.LsFiles.stagedDetails [top]
+ forM_ l go
+ void $ liftIO cleanup
+ staged <- Annex.Queue.size
+ Annex.Queue.flush
+ return $ staged /= 0
+ where
+ {- Determine what kind of modified or deleted file this is, as
+ - efficiently as we can, by getting any key that's associated
+ - with it in git, as well as its stat info. -}
+ go (file, Just sha) = do
+ mkey <- catKey sha
+ mstat <- liftIO $ catchMaybeIO $ getSymbolicLinkStatus file
+ case (mkey, mstat, toCache =<< mstat) of
+ (Just key, _, Just cache) -> do
+ {- All direct mode files will show as
+ - modified, so compare the cache to see if
+ - it really was. -}
+ oldcache <- recordedCache key
+ when (oldcache /= Just cache) $
+ modifiedannexed file key cache
+ (Just key, Nothing, _) -> deletedannexed file key
+ (Nothing, Nothing, _) -> deletegit file
+ (_, Just _, _) -> addgit file
+ go (file, Nothing) = do
+ mstat <- liftIO $ catchMaybeIO $ getSymbolicLinkStatus file
+ case (mstat, toCache =<< mstat) of
+ (Nothing, _) -> noop
+ (Just stat, Just cache)
+ | isSymbolicLink stat -> addgit file
+ | otherwise -> void $ addDirect file cache
+ (Just stat, Nothing)
+ | isSymbolicLink stat -> addgit file
+ | otherwise -> noop
+
+ modifiedannexed file oldkey cache = do
+ void $ removeAssociatedFile oldkey file
+ void $ addDirect file cache
+
+ deletedannexed file key = do
+ void $ removeAssociatedFile key file
+ deletegit file
+
+ addgit file = Annex.Queue.addCommand "add" [Param "-f"] [file]
+
+ deletegit file = Annex.Queue.addCommand "rm" [Param "-f"] [file]
+
+{- Adds a file to the annex in direct mode. Can fail, if the file is
+ - modified or deleted while it's being added. -}
+addDirect :: FilePath -> Cache -> Annex Bool
+addDirect file cache = do
+ showStart "add" file
+ let source = KeySource
+ { keyFilename = file
+ , contentLocation = file
+ }
+ got =<< genKey source =<< chooseBackend file
+ where
+ got Nothing = do
+ showEndFail
+ return False
+ got (Just (key, _)) = ifM (compareCache file $ Just cache)
+ ( do
+ link <- calcGitLink file key
+ sha <- inRepo $ Git.HashObject.hashObject BlobObject link
+ Annex.Queue.addUpdateIndex =<<
+ inRepo (Git.UpdateIndex.stageSymlink file sha)
+ writeCache key cache
+ void $ addAssociatedFile key file
+ logStatus key InfoPresent
+ showEndOk
+ return True
+ , do
+ showEndFail
+ return False
+ )
diff --git a/Backend.hs b/Backend.hs
index b66e6130e..1e3d8f94f 100644
--- a/Backend.hs
+++ b/Backend.hs
@@ -52,8 +52,7 @@ orderedList = do
parseBackendList s = map lookupBackendName $ words s
{- Generates a key for a file, trying each backend in turn until one
- - accepts it.
- -}
+ - accepts it. -}
genKey :: KeySource -> Maybe Backend -> Annex (Maybe (Key, Backend))
genKey source trybackend = do
bs <- orderedList
@@ -94,8 +93,7 @@ lookupFile file = do
return Nothing
{- Looks up the backend that should be used for a file.
- - That can be configured on a per-file basis in the gitattributes file.
- -}
+ - That can be configured on a per-file basis in the gitattributes file. -}
chooseBackend :: FilePath -> Annex (Maybe Backend)
chooseBackend f = Annex.getState Annex.forcebackend >>= go
where
diff --git a/Command/SendKey.hs b/Command/SendKey.hs
index ccbfa9030..82c159f66 100644
--- a/Command/SendKey.hs
+++ b/Command/SendKey.hs
@@ -23,9 +23,8 @@ seek = [withKeys start]
start :: Key -> CommandStart
start key = ifM (inAnnex key)
- ( fieldTransfer Upload key $ \_p -> do
- file <- inRepo $ gitAnnexLocation key
- liftIO $ rsyncServerSend file
+ ( fieldTransfer Upload key $ \_p ->
+ sendAnnex key $ liftIO . rsyncServerSend
, do
warning "requested key is not present"
liftIO exitFailure
diff --git a/Command/Sync.hs b/Command/Sync.hs
index f7410112e..7e3769864 100644
--- a/Command/Sync.hs
+++ b/Command/Sync.hs
@@ -15,6 +15,8 @@ import qualified Annex
import qualified Annex.Branch
import qualified Annex.Queue
import Annex.Content
+import Annex.Content.Direct
+import Annex.Direct
import Annex.CatFile
import qualified Git.Command
import qualified Git.LsFiles as LsFiles
@@ -28,7 +30,6 @@ import qualified Remote.Git
import Types.Key
import Config
-import qualified Data.ByteString.Lazy as L
import Data.Hash.MD5
def :: [Command]
@@ -78,14 +79,20 @@ syncRemotes rs = ifM (Annex.getState Annex.fast) ( nub <$> pickfast , wanted )
fastest = fromMaybe [] . headMaybe . Remote.byCost
commit :: CommandStart
-commit = do
- showStart "commit" ""
- next $ next $ do
+commit = next $ next $ do
+ Annex.Branch.commit "update"
+ ifM isDirect
+ ( ifM stageDirect
+ ( runcommit [] , return True )
+ , runcommit [Param "-a"]
+ )
+ where
+ runcommit ps = do
+ showStart "commit" ""
showOutput
- Annex.Branch.commit "update"
-- Commit will fail when the tree is clean, so ignore failure.
- _ <- inRepo $ Git.Command.runBool "commit"
- [Param "-a", Param "-m", Param "git-annex automatic sync"]
+ _ <- inRepo $ Git.Command.runBool "commit" $ ps ++
+ [Param "-m", Param "git-annex automatic sync"]
return True
mergeLocal :: Git.Ref -> CommandStart
@@ -172,13 +179,31 @@ mergeAnnex = do
void $ Annex.Branch.forceUpdate
stop
+{- Merges from a branch into the current branch.
+ -
+ - In direct mode, updates associated files mappings for the files that
+ - were changed by the merge. -}
mergeFrom :: Git.Ref -> Annex Bool
-mergeFrom branch = do
- showOutput
- ok <- inRepo $ Git.Merge.mergeNonInteractive branch
- if ok
- then return ok
- else resolveMerge
+mergeFrom branch = ifM isDirect
+ ( maybe go godirect =<< inRepo Git.Branch.current
+ , go
+ )
+ where
+ go = do
+ showOutput
+ ok <- inRepo $ Git.Merge.mergeNonInteractive branch
+ if ok
+ then return ok
+ else resolveMerge
+ godirect currbranch = do
+ old <- inRepo $ Git.Ref.sha currbranch
+ r <- go
+ new <- inRepo $ Git.Ref.sha currbranch
+ case (old, new) of
+ (Just oldsha, Just newsha) -> do
+ updateAssociatedFiles oldsha newsha
+ _ -> noop
+ return r
{- Resolves a conflicted merge. It's important that any conflicts be
- resolved in a way that itself avoids later merge conflicts, since
@@ -240,9 +265,7 @@ resolveMerge' u
case msha of
Nothing -> a Nothing
Just sha -> do
- key <- fileKey . takeFileName
- . encodeW8 . L.unpack
- <$> catObject sha
+ key <- catKey sha
maybe (return False) (a . Just) key
{- The filename to use when resolving a conflicted merge of a file,
diff --git a/Config.hs b/Config.hs
index d6f240f18..248a169ad 100644
--- a/Config.hs
+++ b/Config.hs
@@ -116,6 +116,11 @@ getDiskReserve = fromMaybe megabyte . readSize dataUnits
where
megabyte = 1000000
+{- Gets annex.direct setting. -}
+isDirect :: Annex Bool
+isDirect = fromMaybe False . Git.Config.isTrue <$>
+ getConfig (annexConfig "direct") ""
+
{- Gets annex.httpheaders or annex.httpheaders-command setting,
- splitting it into lines. -}
getHttpHeaders :: Annex [String]
diff --git a/Git/DiffTree.hs b/Git/DiffTree.hs
new file mode 100644
index 000000000..7281255f5
--- /dev/null
+++ b/Git/DiffTree.hs
@@ -0,0 +1,64 @@
+{- git diff-tree interface
+ -
+ - Copyright 2012 Joey Hess <joey@kitenet.net>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+module Git.DiffTree (
+ DiffTreeItem(..),
+ diffTree,
+ parseDiffTree
+) where
+
+import Numeric
+import System.Posix.Types
+
+import Common
+import Git
+import Git.Sha
+import Git.Command
+import qualified Git.Filename
+
+data DiffTreeItem = DiffTreeItem
+ { srcmode :: FileMode
+ , dstmode :: FileMode
+ , srcsha :: Sha -- nullSha if file was added
+ , dstsha :: Sha -- nullSha if file was deleted
+ , status :: String
+ , file :: FilePath
+ } deriving Show
+
+{- Diffs two tree Refs. -}
+diffTree :: Ref -> Ref -> Repo -> IO ([DiffTreeItem], IO Bool)
+diffTree src dst repo = do
+ (diff, cleanup) <- pipeNullSplit [Params "diff-tree -z --raw --no-renames -l0", Param (show src), Param (show dst)] repo
+ return (parseDiffTree diff, cleanup)
+
+{- Parses diff-tree output. -}
+parseDiffTree :: [String] -> [DiffTreeItem]
+parseDiffTree l = go l []
+ where
+ go [] c = c
+ go (info:f:rest) c = go rest (mk info f : c)
+ go (s:[]) _ = error $ "diff-tree parse error " ++ s
+
+ mk info f = DiffTreeItem
+ { srcmode = readmode srcm
+ , dstmode = readmode dstm
+ , srcsha = fromMaybe (error "bad srcsha") $ extractSha ssha
+ , dstsha = fromMaybe (error "bad dstsha") $ extractSha dsha
+ , status = s
+ , file = Git.Filename.decode f
+ }
+ where
+ readmode = fst . Prelude.head . readOct
+
+ -- info = :<srcmode> SP <dstmode> SP <srcsha> SP <dstsha> SP <status>
+ -- All fields are fixed, so we can pull them out of
+ -- specific positions in the line.
+ (srcm, past_srcm) = splitAt 7 $ drop 1 info
+ (dstm, past_dstm) = splitAt 7 past_srcm
+ (ssha, past_ssha) = splitAt shaSize past_dstm
+ (dsha, past_dsha) = splitAt shaSize $ drop 1 past_ssha
+ s = drop 1 past_dsha
diff --git a/Git/LsFiles.hs b/Git/LsFiles.hs
index 6d42d77ed..45e105a3b 100644
--- a/Git/LsFiles.hs
+++ b/Git/LsFiles.hs
@@ -10,7 +10,7 @@ module Git.LsFiles (
notInRepo,
staged,
stagedNotDeleted,
- changedUnstaged,
+ stagedDetails,
typeChanged,
typeChangedStaged,
Conflicting(..),
@@ -53,6 +53,22 @@ staged' ps l = pipeNullSplit $ prefix ++ ps ++ suffix
prefix = [Params "diff --cached --name-only -z"]
suffix = Param "--" : map File l
+{- Returns details about files that are staged in the index
+ - (including the Sha of their staged contents),
+ - as well as files not yet in git. -}
+stagedDetails :: [FilePath] -> Repo -> IO ([(FilePath, Maybe Sha)], IO Bool)
+stagedDetails l repo = do
+ (ls, cleanup) <- pipeNullSplit params repo
+ return (map parse ls, cleanup)
+ where
+ params = [Params "ls-files --others --exclude-standard --stage -z --"] ++
+ map File l
+ parse s
+ | null file = (s, Nothing)
+ | otherwise = (file, extractSha $ take shaSize $ drop 7 metadata)
+ where
+ (metadata, file) = separate (== '\t') s
+
{- Returns a list of files that have unstaged changes. -}
changedUnstaged :: [FilePath] -> Repo -> IO ([FilePath], IO Bool)
changedUnstaged l = pipeNullSplit params
diff --git a/Git/LsTree.hs b/Git/LsTree.hs
index 611793c40..c61ae7fab 100644
--- a/Git/LsTree.hs
+++ b/Git/LsTree.hs
@@ -19,6 +19,7 @@ import System.Posix.Types
import Common
import Git
import Git.Command
+import Git.Sha
import qualified Git.Filename
data TreeItem = TreeItem
@@ -53,5 +54,5 @@ parseLsTree l = TreeItem
-- specific positions in the line.
(m, past_m) = splitAt 7 l
(t, past_t) = splitAt 4 past_m
- (s, past_s) = splitAt 40 $ Prelude.tail past_t
+ (s, past_s) = splitAt shaSize $ Prelude.tail past_t
f = Prelude.tail past_s
diff --git a/Locations.hs b/Locations.hs
index db97bbec7..cfe9bd27d 100644
--- a/Locations.hs
+++ b/Locations.hs
@@ -11,6 +11,8 @@ module Locations (
keyPaths,
keyPath,
gitAnnexLocation,
+ gitAnnexMapping,
+ gitAnnexCache,
annexLocations,
annexLocation,
gitAnnexDir,
@@ -107,6 +109,21 @@ gitAnnexLocation key r
check locs@(l:_) = fromMaybe l <$> firstM doesFileExist locs
check [] = error "internal"
+{- File that maps from a key to the file(s) in the git repository.
+ - Used in direct mode. -}
+gitAnnexMapping :: Key -> Git.Repo -> IO FilePath
+gitAnnexMapping key r = do
+ loc <- gitAnnexLocation key r
+ return $ loc ++ ".map"
+
+{- File that caches information about a key's content, used to determine
+ - if a file has changed.
+ - Used in direct mode. -}
+gitAnnexCache :: Key -> Git.Repo -> IO FilePath
+gitAnnexCache key r = do
+ loc <- gitAnnexLocation key r
+ return $ loc ++ ".cache"
+
{- The annex directory of a repository. -}
gitAnnexDir :: Git.Repo -> FilePath
gitAnnexDir r = addTrailingPathSeparator $ Git.localGitDir r </> annexDir
diff --git a/Logs/Location.hs b/Logs/Location.hs
index 4273710fc..0f57b6663 100644
--- a/Logs/Location.hs
+++ b/Logs/Location.hs
@@ -15,6 +15,7 @@
module Logs.Location (
LogStatus(..),
+ logStatus,
logChange,
loggedLocations,
loggedKeys,
@@ -26,6 +27,13 @@ module Logs.Location (
import Common.Annex
import qualified Annex.Branch
import Logs.Presence
+import Annex.UUID
+
+{- Log a change in the presence of a key's value in current repository. -}
+logStatus :: Key -> LogStatus -> Annex ()
+logStatus key status = do
+ u <- getUUID
+ logChange key u status
{- Log a change in the presence of a key's value in a repository. -}
logChange :: Key -> UUID -> LogStatus -> Annex ()
diff --git a/Remote.hs b/Remote.hs
index 721b64edb..c4291a997 100644
--- a/Remote.hs
+++ b/Remote.hs
@@ -51,7 +51,7 @@ import qualified Annex
import Annex.UUID
import Logs.UUID
import Logs.Trust
-import Logs.Location
+import Logs.Location hiding (logStatus)
import Remote.List
import qualified Git
diff --git a/Remote/Git.hs b/Remote/Git.hs
index c30988cbf..db73247a1 100644
--- a/Remote/Git.hs
+++ b/Remote/Git.hs
@@ -37,6 +37,7 @@ import Config
import Init
import Types.Key
import qualified Fields
+import Logs.Location
import Control.Concurrent
import Control.Concurrent.MSampleVar
@@ -243,7 +244,7 @@ dropKey r key
whenM (Annex.Content.inAnnex key) $ do
Annex.Content.lockContent key $
Annex.Content.removeAnnex key
- Annex.Content.logStatus key InfoMissing
+ logStatus key InfoMissing
Annex.Content.saveState True
return True
| Git.repoIsHttp r = error "dropping from http repo not supported"
@@ -262,9 +263,9 @@ copyFromRemote r key file dest
-- run copy from perspective of remote
liftIO $ onLocal r $ do
ensureInitialized
- loc <- inRepo $ gitAnnexLocation key
- upload u key file noRetry $
- rsyncOrCopyFile params loc dest
+ Annex.Content.sendAnnex key $ \object ->
+ upload u key file noRetry $
+ rsyncOrCopyFile params object dest
| Git.repoIsSsh r = feedprogressback $ \feeder ->
rsyncHelper (Just feeder)
=<< rsyncParamsRemote r True key dest file
@@ -324,8 +325,12 @@ copyFromRemoteCheap r key file
{- Tries to copy a key's content to a remote's annex. -}
copyToRemote :: Git.Repo -> Key -> AssociatedFile -> MeterUpdate -> Annex Bool
copyToRemote r key file p
- | not $ Git.repoIsUrl r = guardUsable r False $ commitOnCleanup r $ do
- keysrc <- inRepo $ gitAnnexLocation key
+ | not $ Git.repoIsUrl r = guardUsable r False $ commitOnCleanup r $ copylocal
+ | Git.repoIsSsh r = commitOnCleanup r $ Annex.Content.sendAnnex key $ \object ->
+ rsyncHelper (Just p) =<< rsyncParamsRemote r False key object file
+ | otherwise = error "copying to non-ssh repo not supported"
+ where
+ copylocal = Annex.Content.sendAnnex key $ \object -> do
params <- rsyncParams r
u <- getUUID
-- run copy from perspective of remote
@@ -336,12 +341,8 @@ copyToRemote r key file p
download u key file noRetry $
Annex.Content.saveState True `after`
Annex.Content.getViaTmp key
- (\d -> rsyncOrCopyFile params keysrc d p)
+ (\d -> rsyncOrCopyFile params object d p)
)
- | Git.repoIsSsh r = commitOnCleanup r $ do
- keysrc <- inRepo $ gitAnnexLocation key
- rsyncHelper (Just p) =<< rsyncParamsRemote r False key keysrc file
- | otherwise = error "copying to non-ssh repo not supported"
rsyncHelper :: Maybe MeterUpdate -> [CommandParam] -> Annex Bool
rsyncHelper callback params = do
diff --git a/doc/design/assistant/desymlink.mdwn b/doc/design/assistant/desymlink.mdwn
index 2f49c8304..5498b3691 100644
--- a/doc/design/assistant/desymlink.mdwn
+++ b/doc/design/assistant/desymlink.mdwn
@@ -43,11 +43,12 @@ is converted to a real file when it becomes present.
## concrete design
-* Enable with annex.nosymlink or such config option.
-* Use .git/ for the git repo, but `.git/annex/objects` won't be used.
+* Enable with annex.direct
+* Use .git/ for the git repo, but `.git/annex/objects` won't be used
+ for object storage.
* `git status` and similar will show all files as type changed, and
`git commit` would be a very bad idea. Just don't support users running
- git commands that affect the repository in this mode.
+ git commands that affect the repository in this mode. Probably.
* However, `git status` and similar also will show deleted and new files,
which will be helpful for the assistant to use when starting up.
* Cache the mtime, size etc of files, and use this to detect when they've been
@@ -61,6 +62,8 @@ is converted to a real file when it becomes present.
can map to multiple files. And that when a file is deleted or moved, the
mapping needs to be updated.
* May need a reverse mapping, from files in the tree to keys? TBD
+ (Needed to make things like `git annex drop` that want to map from the
+ file back to the key work.)
* The existing watch code detects when a file gets closed, and in this
mode, it could be a new file, or a modified file, or an unchanged file.
For a modified file, can compare mtime, size, etc, to see if it needs
@@ -73,6 +76,10 @@ is converted to a real file when it becomes present.
to files in this remote would not be noticed and committed, unless
a git-annex command were added to do so.
Getting it basically working as a remote would be a good 1st step.
+* It could also be used without the assistant as a repository that
+ the user uses directly. Would need some git-annex commands
+ to merge changes into the repo, update caches, and commit changes.
+ This could all be done by "git annex sync".
## TODO
diff --git a/doc/direct_mode.mdwn b/doc/direct_mode.mdwn
new file mode 100644
index 000000000..095f15d5a
--- /dev/null
+++ b/doc/direct_mode.mdwn
@@ -0,0 +1,63 @@
+Normally, git-annex repositories consist of symlinks that are checked into
+git, and in turn point at the content of large files that is stored in
+`.git/annex/objects/`. Direct mode is an experimental mode that gets rid of
+the symlinks.
+
+The advantage of direct mode is that you can access files directly,
+including modifying them. The disadvantage is that most regular git
+commands cannot safely be used, and only a subset of git-annex commands
+can be used.
+
+## make a direct mode repository
+
+To make a repository using direct mode, either make a fresh clone of an
+existing repository, or start a new repository. Then configure direct mode:
+`git config annex.direct true`
+
+You're strongly encouraged to tell git-annex that direct mode repositories
+cannot be trusted to retain the content of a file (because it can be
+deleted or modified at any time). To do so: `git annex untrust .`
+
+## use a direct mode repository
+
+The main command that's used in direct mode repositories is
+`git annex sync`. This automatically adds new files, commits all
+changed files to git, pushes them out, pulls down any changes, etc.
+
+You can also run `git annex get` to transfer the content of files into your
+direct mode repository. Or if the direct mode repository is a remote of
+some other, regular git-annex repository, you can use commands like `git
+annex copy` and `git annex move` to transfer the contents of files to the
+direct mode repository.
+
+You can use `git commit --staged`. (But not `git commit -a` .. It'll commit
+whole large files into git!)
+
+You can use `git log` and other git query commands.
+
+## what doesn't work in direct mode
+
+Don't use `git annex add` -- it thinks all direct mode files are unlocked,
+and locks them.
+
+In general git-annex commands will only work in direct mode repositories on
+files whose content is not present. That's because such files are still
+represented as symlinks, which git-annex commands know how to operate on.
+So, `git annex get` works, but `git annex drop` and `git annex move` don't,
+and things like `git annex fsck` and `git annex status` show incomplete
+information.
+
+It's technically possible to make all git-annex commands work in direct
+mode repositories, so this might change. Check back to this page to see
+current status about what works and what doesn't.
+
+As for git commands, you can probably use some git working tree
+manipulation commands, like `git checkout` and `git revert` in useful
+ways... But beware, these commands can replace files that are present in
+your repository with broken symlinks. If that file was the only copy you
+had of something, it'll be lost.
+
+This is one reason it's wise to make git-annex untrust your direct mode
+repositories. Still, you can lose data using these sort of git commands, so
+use extreme caution. With direct mode, you're operating without large
+swathes of git-annex's carefully constructed safety net.
diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn
index 5a69fe496..2fbfc5b16 100644
--- a/doc/git-annex.mdwn
+++ b/doc/git-annex.mdwn
@@ -776,6 +776,12 @@ Here are all the supported configuration settings.
to close it. On Mac OSX, this defaults to 1 second, to work around
a bad interaction with software there.
+* `annex.direct`
+
+ Set to true to enable an (experimental) mode where files in the repository
+ are accessed directly, rather than through symlinks. Note that many git
+ and git-annex commands will not work with such a repository.
+
* `remote.<name>.annex-cost`
When determining which repository to