summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Joey Hess <joey@kitenet.net>2012-12-07 17:28:23 -0400
committerGravatar Joey Hess <joey@kitenet.net>2012-12-07 17:29:55 -0400
commit560b644a52971a7e4706c775982ec29e03ca3ab2 (patch)
tree24f067dffecb1ef18643fbbb1977f9900aef163c
parent420038580f6d14b0e5a7b1d41b9806c275c4824e (diff)
support for checking presence of objects in direct mode
Also for dropping objects in direct mode. Checking presence reliably needs a cache of mtime, size, and inode. This way, if a file is modified, keys that point to it are no longer present. Also, the code for restoring the symlink when removing objects is unnecessarily messy. calcGitLink was generating links starting with "../../remote/.git/", when running "git annex move --from remote". I put in a workaround, but calcGitLink should probably be fixed. There is not yet support for getting objects from repositories in direct mode; it still looks for content in .git/annex/objects, and there's no once place I can change to fix that. Also, getting objects from direct mode repositories is problematic since the can be changed while the object is being transferred. It probably needs to quarantine it first.
-rw-r--r--Annex/Content.hs139
-rw-r--r--Annex/Content/Direct.hs78
-rw-r--r--Locations.hs9
-rw-r--r--doc/design/assistant/desymlink.mdwn13
4 files changed, 181 insertions, 58 deletions
diff --git a/Annex/Content.hs b/Annex/Content.hs
index f66fd51ef..3dfb4d864 100644
--- a/Annex/Content.hs
+++ b/Annex/Content.hs
@@ -1,6 +1,6 @@
{- git-annex file content managing
-
- - Copyright 2010 Joey Hess <joey@kitenet.net>
+ - Copyright 2010,2012 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@@ -48,21 +48,57 @@ import Config
import Annex.Exception
import Git.SharedRepository
import Annex.Perms
+import Annex.Content.Direct
+
+{- Performs an action, passing it the location to use for a key's content.
+ -
+ - In direct mode, the associated files will be passed. But, if there are
+ - no associated files for a key, the indirect mode action will be
+ - performed instead. -}
+withObjectLoc :: Key -> (FilePath -> Annex a) -> ([FilePath] -> Annex a) -> Annex a
+withObjectLoc key indirect direct = ifM isDirect
+ ( do
+ fs <- associatedFiles key
+ if null fs
+ then goindirect
+ else direct fs
+ , goindirect
+ )
+ where
+ goindirect = indirect =<< inRepo (gitAnnexLocation key)
{- Checks if a given key's content is currently present. -}
inAnnex :: Key -> Annex Bool
-inAnnex = inAnnex' doesFileExist
-inAnnex' :: (FilePath -> IO a) -> Key -> Annex a
-inAnnex' a key = do
- whenM (fromRepo Git.repoIsUrl) $
- error "inAnnex cannot check remote repo"
- inRepo $ \g -> gitAnnexLocation key g >>= a
+inAnnex = inAnnex' id False $ liftIO . doesFileExist
+
+{- Generic inAnnex, handling both indirect and direct mode.
+ -
+ - In direct mode, at least one of the associated files must pass the
+ - check. Additionally, the file must be unmodified.
+ -}
+inAnnex' :: (a -> Bool) -> a -> (FilePath -> Annex a) -> Key -> Annex a
+inAnnex' isgood bad check key = withObjectLoc key checkindirect checkdirect
+ where
+ checkindirect loc = do
+ whenM (fromRepo Git.repoIsUrl) $
+ error "inAnnex cannot check remote repo"
+ check loc
+ checkdirect [] = return bad
+ checkdirect (loc:locs) = do
+ r <- check loc
+ if isgood r
+ then ifM (unmodifed key loc)
+ ( return r
+ , checkdirect locs
+ )
+ else checkdirect locs
{- A safer check; the key's content must not only be present, but
- is not in the process of being removed. -}
inAnnexSafe :: Key -> Annex (Maybe Bool)
-inAnnexSafe = inAnnex' $ \f -> openforlock f >>= check
+inAnnexSafe = inAnnex' (maybe False id) (Just False) go
where
+ go f = liftIO $ openforlock f >>= check
openforlock f = catchMaybeIO $
openFd f ReadOnly Nothing defaultFileFlags
check Nothing = return is_missing
@@ -195,6 +231,7 @@ checkDiskSpace destination key alreadythere = do
forcemsg = " (use --force to override this check or adjust annex.diskreserve)"
{- Moves a key's content into .git/annex/objects/
+ -
- In direct mode, moves it to the associated file, or files.
-
- What if the key there already has content? This could happen for
@@ -217,13 +254,9 @@ checkDiskSpace destination key alreadythere = do
- meet.
-}
moveAnnex :: Key -> FilePath -> Annex ()
-moveAnnex key src = ifM isDirect
- ( storefiles =<< associatedFiles key
- , storeobject
- )
+moveAnnex key src = withObjectLoc key storeobject storedirect
where
- storeobject = do
- dest <- inRepo $ gitAnnexLocation key
+ storeobject dest = do
ifM (liftIO $ doesFileExist dest)
( liftIO $ removeFile src
, do
@@ -232,41 +265,22 @@ moveAnnex key src = ifM isDirect
freezeContent dest
freezeContentDir dest
)
- storefiles [] = storeobject
- storefiles (dest:fs) = do
+ storedirect [] = storeobject =<< inRepo (gitAnnexLocation key)
+ storedirect (dest:fs) = do
thawContent src
- liftIO $ replacefile dest $ moveFile src
- liftIO $ forM_ fs $ \f -> replacefile f $ createLink dest
- replacefile file a = do
- {- Remove any symlink or existing file. -}
- r <- tryIO $ removeFile file
- {- Only need to create parent directory if file did not exist. -}
- case r of
- Left _ -> createDirectoryIfMissing True (parentDir file)
- _ -> noop
- a file
+ liftIO $ replaceFile dest $ moveFile src
+ liftIO $ forM_ fs $ \f -> replaceFile f $ createLink dest
-{- Files in the tree that are associated with a key.
- - For use in direct mode.
- -
- - When no known associated files exist, returns the gitAnnexLocation. -}
-associatedFiles :: Key -> Annex [FilePath]
-associatedFiles key = do
- mapping <- inRepo $ gitAnnexMapping key
- files <- liftIO $ catchDefaultIO [] $ lines <$> readFile mapping
- if null files
- then do
- l <- inRepo $ gitAnnexLocation key
- return [l]
- else do
- top <- fromRepo Git.repoPath
- return $ map (top </>) files
-
-withObjectLoc :: Key -> ((FilePath, FilePath) -> Annex a) -> Annex a
-withObjectLoc key a = do
- file <- inRepo $ gitAnnexLocation key
- let dir = parentDir file
- a (dir, file)
+{- Replaces any existing file with a new version, by running an action.
+ - First, makes sure the file is deleted. Or, if it didn't already exist,
+ - makes sure the parent directory exists. -}
+replaceFile :: FilePath -> (FilePath -> IO ()) -> IO ()
+replaceFile file a = do
+ r <- tryIO $ removeFile file
+ case r of
+ Left _ -> createDirectoryIfMissing True (parentDir file)
+ _ -> noop
+ a file
cleanObjectLoc :: Key -> Annex ()
cleanObjectLoc key = do
@@ -279,18 +293,33 @@ cleanObjectLoc key = do
maybe noop (const $ removeparents dir (n-1))
<=< catchMaybeIO $ removeDirectory dir
-{- Removes a key's file from .git/annex/objects/ -}
+{- Removes a key's file from .git/annex/objects/
+ -
+ - In direct mode, deletes the associated files or files, and replaces
+ - them with symlinks. -}
removeAnnex :: Key -> Annex ()
-removeAnnex key = withObjectLoc key $ \(dir, file) -> do
- liftIO $ do
- allowWrite dir
- removeFile file
- cleanObjectLoc key
+removeAnnex key = withObjectLoc key remove removedirect
+ where
+ remove file = do
+ liftIO $ do
+ allowWrite $ parentDir file
+ removeFile file
+ cleanObjectLoc key
+ removedirect fs = mapM_ resetfile fs
+ resetfile f = do
+ l <- calcGitLink f key
+ top <- fromRepo Git.repoPath
+ cwd <- liftIO getCurrentDirectory
+ let top' = fromMaybe top $ absNormPath cwd top
+ let l' = relPathDirToFile top' (fromMaybe l $ absNormPath top' l)
+ liftIO $ replaceFile f $ const $
+ createSymbolicLink l' f
{- Moves a key's file out of .git/annex/objects/ -}
fromAnnex :: Key -> FilePath -> Annex ()
-fromAnnex key dest = withObjectLoc key $ \(dir, file) -> do
- liftIO $ allowWrite dir
+fromAnnex key dest = do
+ file <- inRepo $ gitAnnexLocation key
+ liftIO $ allowWrite $ parentDir file
thawContent file
liftIO $ moveFile file dest
cleanObjectLoc key
diff --git a/Annex/Content/Direct.hs b/Annex/Content/Direct.hs
new file mode 100644
index 000000000..e23c6512c
--- /dev/null
+++ b/Annex/Content/Direct.hs
@@ -0,0 +1,78 @@
+{- git-annex file content managing for direct mode
+ -
+ - Copyright 2010,2012 Joey Hess <joey@kitenet.net>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+module Annex.Content.Direct (
+ associatedFiles,
+ unmodifed,
+ getCache,
+ showCache,
+) where
+
+import Common.Annex
+import qualified Git
+
+import System.Posix.Types
+
+{- Files in the tree that are associated with a key.
+ -
+ - When no known associated files exist, returns the gitAnnexLocation. -}
+associatedFiles :: Key -> Annex [FilePath]
+associatedFiles key = do
+ mapping <- inRepo $ gitAnnexMapping key
+ files <- liftIO $ catchDefaultIO [] $ lines <$> readFile mapping
+ if null files
+ then do
+ l <- inRepo $ gitAnnexLocation key
+ return [l]
+ else do
+ top <- fromRepo Git.repoPath
+ return $ map (top </>) files
+
+{- Checks if a file in the tree, associated with a key, has not been modified.
+ -
+ - To avoid needing to fsck the file's content, which can involve an
+ - expensive checksum, this relies on a cache that contains the file's
+ - expected mtime and inode.
+ -}
+unmodifed :: Key -> FilePath -> Annex Bool
+unmodifed key file = do
+ cachefile <- inRepo $ gitAnnexCache key
+ liftIO $ do
+ curr <- getCache file
+ old <- catchDefaultIO Nothing $ readCache <$> readFile cachefile
+ return $ isJust curr && curr == old
+
+{- Cache a file's inode, size, and modification time to determine if it's
+ - been changed. -}
+data Cache = Cache FileID FileOffset EpochTime
+ deriving (Eq)
+
+showCache :: Cache -> String
+showCache (Cache inode size mtime) = unwords
+ [ show inode
+ , show size
+ , show mtime
+ ]
+
+readCache :: String -> Maybe Cache
+readCache s = case words s of
+ (inode:size:mtime:_) -> Cache
+ <$> readish inode
+ <*> readish size
+ <*> readish mtime
+ _ -> Nothing
+
+getCache :: FilePath -> IO (Maybe Cache)
+getCache f = catchDefaultIO Nothing $ toCache <$> getFileStatus f
+
+toCache :: FileStatus -> Maybe Cache
+toCache s
+ | isRegularFile s = Just $ Cache
+ (fileID s)
+ (fileSize s)
+ (modificationTime s)
+ | otherwise = Nothing
diff --git a/Locations.hs b/Locations.hs
index 36172d621..cfe9bd27d 100644
--- a/Locations.hs
+++ b/Locations.hs
@@ -12,6 +12,7 @@ module Locations (
keyPath,
gitAnnexLocation,
gitAnnexMapping,
+ gitAnnexCache,
annexLocations,
annexLocation,
gitAnnexDir,
@@ -115,6 +116,14 @@ gitAnnexMapping key r = do
loc <- gitAnnexLocation key r
return $ loc ++ ".map"
+{- File that caches information about a key's content, used to determine
+ - if a file has changed.
+ - Used in direct mode. -}
+gitAnnexCache :: Key -> Git.Repo -> IO FilePath
+gitAnnexCache key r = do
+ loc <- gitAnnexLocation key r
+ return $ loc ++ ".cache"
+
{- The annex directory of a repository. -}
gitAnnexDir :: Git.Repo -> FilePath
gitAnnexDir r = addTrailingPathSeparator $ Git.localGitDir r </> annexDir
diff --git a/doc/design/assistant/desymlink.mdwn b/doc/design/assistant/desymlink.mdwn
index df36c036f..13a51890d 100644
--- a/doc/design/assistant/desymlink.mdwn
+++ b/doc/design/assistant/desymlink.mdwn
@@ -43,11 +43,12 @@ is converted to a real file when it becomes present.
## concrete design
-* Enable with annex.nosymlink or such config option.
-* Use .git/ for the git repo, but `.git/annex/objects` won't be used.
+* Enable with annex.direct
+* Use .git/ for the git repo, but `.git/annex/objects` won't be used
+ for object storage.
* `git status` and similar will show all files as type changed, and
`git commit` would be a very bad idea. Just don't support users running
- git commands that affect the repository in this mode.
+ git commands that affect the repository in this mode. Probably.
* However, `git status` and similar also will show deleted and new files,
which will be helpful for the assistant to use when starting up.
* Cache the mtime, size etc of files, and use this to detect when they've been
@@ -61,6 +62,8 @@ is converted to a real file when it becomes present.
can map to multiple files. And that when a file is deleted or moved, the
mapping needs to be updated.
* May need a reverse mapping, from files in the tree to keys? TBD
+ (Needed to make things like `git annex drop` that want to map from the
+ file back to the key work.)
* The existing watch code detects when a file gets closed, and in this
mode, it could be a new file, or a modified file, or an unchanged file.
For a modified file, can compare mtime, size, etc, to see if it needs
@@ -73,3 +76,7 @@ is converted to a real file when it becomes present.
to files in this remote would not be noticed and committed, unless
a git-annex command were added to do so.
Getting it basically working as a remote would be a good 1st step.
+* It could also be used without the assistant as a repository that
+ the user uses directly. Would need some git-annex commands
+ to merge changes into the repo, update caches, and commit changes.
+ This could all be done by "git annex sync".