summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Joey Hess <joeyh@joeyh.name>2016-01-25 16:16:18 -0400
committerGravatar Joey Hess <joeyh@joeyh.name>2016-01-25 16:16:18 -0400
commit2d0d0b1b401cdcd9c6c1c530826a61bfc3349d12 (patch)
tree5cab04dc2cfa5d887244a4f31191158f7914a445
parent4fcd04b876f4fc4f3738d80ef66b29a76871aa2d (diff)
matchexpression: New plumbing command to check if a preferred content expression matches some data.
-rw-r--r--CmdLine/GitAnnex.hs2
-rw-r--r--Command/MatchExpression.hs75
-rw-r--r--Limit.hs24
-rw-r--r--Limit/Wanted.hs1
-rw-r--r--Types/FileMatcher.hs13
-rw-r--r--Utility/FileSize.hs6
-rw-r--r--debian/changelog2
-rw-r--r--doc/bugs/treatment_of_largefiles_is_not_working_for_addurl_--fast___40__or_--relaxed__41__/comment_4_00dfd040f4d8b9f1ed765ee38dbc67b9._comment16
-rw-r--r--doc/git-annex-matchexpression.mdwn51
-rw-r--r--doc/git-annex.mdwn7
10 files changed, 186 insertions, 11 deletions
diff --git a/CmdLine/GitAnnex.hs b/CmdLine/GitAnnex.hs
index 0383dada3..ec35285c4 100644
--- a/CmdLine/GitAnnex.hs
+++ b/CmdLine/GitAnnex.hs
@@ -27,6 +27,7 @@ import qualified Command.Fsck
import qualified Command.LookupKey
import qualified Command.ContentLocation
import qualified Command.ExamineKey
+import qualified Command.MatchExpression
import qualified Command.FromKey
import qualified Command.RegisterUrl
import qualified Command.SetKey
@@ -166,6 +167,7 @@ cmds testoptparser testrunner =
, Command.LookupKey.cmd
, Command.ContentLocation.cmd
, Command.ExamineKey.cmd
+ , Command.MatchExpression.cmd
, Command.FromKey.cmd
, Command.RegisterUrl.cmd
, Command.SetKey.cmd
diff --git a/Command/MatchExpression.hs b/Command/MatchExpression.hs
new file mode 100644
index 000000000..062a46b55
--- /dev/null
+++ b/Command/MatchExpression.hs
@@ -0,0 +1,75 @@
+{- git-annex command
+ -
+ - Copyright 2016 Joey Hess <id@joeyh.name>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+module Command.MatchExpression where
+
+import Command
+import Annex.FileMatcher
+import Types.FileMatcher
+import Utility.DataUnits
+import Utility.Matcher
+import Annex.UUID
+import Logs.Group
+
+import qualified Data.Map as M
+import qualified Data.Set as S
+
+cmd :: Command
+cmd = noCommit $
+ command "matchexpression" SectionPlumbing
+ "checks if a preferred content expression matches"
+ paramExpression
+ (seek <$$> optParser)
+
+data MatchExpressionOptions = MatchExpressionOptions
+ { matchexpr :: String
+ , matchinfo :: MatchInfo
+ }
+
+optParser :: CmdParamsDesc -> Parser MatchExpressionOptions
+optParser desc = MatchExpressionOptions
+ <$> argument str (metavar desc)
+ <*> (addkeysize <$> dataparser)
+ where
+ dataparser = MatchingInfo
+ <$> optinfo "file" (strOption
+ ( long "file" <> metavar paramFile
+ <> help "specify filename to match against"
+ ))
+ <*> optinfo "key" (option (str >>= parseKey)
+ ( long "key" <> metavar paramKey
+ <> help "specify key to match against"
+ ))
+ <*> optinfo "size" (option (str >>= maybe (fail "parse error") return . readSize dataUnits)
+ ( long "size" <> metavar paramSize
+ <> help "specify size to match against"
+ ))
+ optinfo datadesc mk = (Right <$> mk)
+ <|> (pure $ Left $ missingdata datadesc)
+ missingdata datadesc = bail $ "cannot match this expression without " ++ datadesc ++ " data"
+ -- When a key is provided, use its size.
+ addkeysize i@(MatchingInfo f (Right k) _) = case keySize k of
+ Just sz -> MatchingInfo f (Right k) (Right sz)
+ Nothing -> i
+ addkeysize i = i
+
+seek :: MatchExpressionOptions -> CommandSeek
+seek o = do
+ u <- getUUID
+ case parsedToMatcher $ exprParser matchAll matchAll groupMap M.empty (Just u) (matchexpr o) of
+ Left e -> liftIO $ bail $ "bad expression: " ++ e
+ Right matcher -> ifM (checkmatcher matcher)
+ ( liftIO exitSuccess
+ , liftIO exitFailure
+ )
+ where
+ checkmatcher matcher = matchMrun matcher $ \a -> a S.empty (matchinfo o)
+
+bail :: String -> IO a
+bail s = do
+ hPutStrLn stderr s
+ exitWith $ ExitFailure 42
diff --git a/Limit.hs b/Limit.hs
index c4bab311a..79335d3b3 100644
--- a/Limit.hs
+++ b/Limit.hs
@@ -73,21 +73,22 @@ addInclude :: String -> Annex ()
addInclude = addLimit . limitInclude
limitInclude :: MkLimit Annex
-limitInclude glob = Right $ const $ return . matchGlobFile glob
+limitInclude glob = Right $ const $ matchGlobFile glob
{- Add a limit to skip files that match the glob. -}
addExclude :: String -> Annex ()
addExclude = addLimit . limitExclude
limitExclude :: MkLimit Annex
-limitExclude glob = Right $ const $ return . not . matchGlobFile glob
+limitExclude glob = Right $ const $ not <$$> matchGlobFile glob
-matchGlobFile :: String -> MatchInfo -> Bool
+matchGlobFile :: String -> MatchInfo -> Annex Bool
matchGlobFile glob = go
where
cglob = compileGlob glob CaseSensative -- memoized
- go (MatchingKey _) = False
- go (MatchingFile fi) = matchGlob cglob (matchFile fi)
+ go (MatchingKey _) = pure False
+ go (MatchingFile fi) = pure $ matchGlob cglob (matchFile fi)
+ go (MatchingInfo af _ _) = matchGlob cglob <$> getInfo af
{- Adds a limit to skip files not believed to be present
- in a specfied repository. Optionally on a prior date. -}
@@ -133,8 +134,10 @@ matchPresent u _ = checkKey $ \key -> do
limitInDir :: FilePath -> MkLimit Annex
limitInDir dir = const $ Right $ const go
where
- go (MatchingFile fi) = return $ elem dir $ splitPath $ takeDirectory $ matchFile fi
+ go (MatchingFile fi) = checkf $ matchFile fi
go (MatchingKey _) = return False
+ go (MatchingInfo af _ _) = checkf =<< getInfo af
+ checkf = return . elem dir . splitPath . takeDirectory
{- Adds a limit to skip files not believed to have the specified number
- of copies. -}
@@ -177,8 +180,9 @@ limitLackingCopies approx want = case readish want of
NumCopies numcopies <- if approx
then approxNumCopies
else case mi of
- MatchingKey _ -> approxNumCopies
MatchingFile fi -> getGlobalFileNumCopies $ matchFile fi
+ MatchingKey _ -> approxNumCopies
+ MatchingInfo _ _ _ -> approxNumCopies
us <- filter (`S.notMember` notpresent)
<$> (trustExclude UnTrusted =<< Remote.keyLocations key)
return $ numcopies - length us >= needed
@@ -192,6 +196,9 @@ limitLackingCopies approx want = case readish want of
limitUnused :: MatchFiles Annex
limitUnused _ (MatchingFile _) = return False
limitUnused _ (MatchingKey k) = S.member k <$> unusedKeys
+limitUnused _ (MatchingInfo _ ak _) = do
+ k <- getInfo ak
+ S.member k <$> unusedKeys
{- Limit that matches any version of any file. -}
limitAnything :: MatchFiles Annex
@@ -240,6 +247,8 @@ limitSize vs s = case readSize dataUnits s of
where
go sz _ (MatchingFile fi) = lookupFileKey fi >>= check fi sz
go sz _ (MatchingKey key) = checkkey sz key
+ go sz _ (MatchingInfo _ _ as) =
+ getInfo as >>= \sz' -> return (Just sz' `vs` Just sz)
checkkey sz key = return $ keySize key `vs` Just sz
check _ sz (Just key) = checkkey sz key
check fi sz Nothing = do
@@ -281,3 +290,4 @@ lookupFileKey = lookupFile . currFile
checkKey :: (Key -> Annex Bool) -> MatchInfo -> Annex Bool
checkKey a (MatchingFile fi) = lookupFileKey fi >>= maybe (return False) a
checkKey a (MatchingKey k) = a k
+checkKey a (MatchingInfo _ ak _) = a =<< getInfo ak
diff --git a/Limit/Wanted.hs b/Limit/Wanted.hs
index 237cb7ae0..c11e24b7d 100644
--- a/Limit/Wanted.hs
+++ b/Limit/Wanted.hs
@@ -21,3 +21,4 @@ addWantDrop = addLimit $ Right $ const $ checkWant $ wantDrop False Nothing Noth
checkWant :: (Maybe FilePath -> Annex Bool) -> MatchInfo -> Annex Bool
checkWant a (MatchingFile fi) = a (Just $ matchFile fi)
checkWant _ (MatchingKey _) = return False
+checkWant _ (MatchingInfo {}) = return False
diff --git a/Types/FileMatcher.hs b/Types/FileMatcher.hs
index 377bba72a..43f05efb6 100644
--- a/Types/FileMatcher.hs
+++ b/Types/FileMatcher.hs
@@ -1,6 +1,6 @@
{- git-annex file matcher types
-
- - Copyright 2013 Joey Hess <id@joeyh.name>
+ - Copyright 2013-2016 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@@ -10,13 +10,16 @@ module Types.FileMatcher where
import Types.UUID (UUID)
import Types.Key (Key)
import Utility.Matcher (Matcher, Token)
+import Utility.FileSize
+import Control.Monad.IO.Class
import qualified Data.Map as M
import qualified Data.Set as S
data MatchInfo
= MatchingFile FileInfo
| MatchingKey Key
+ | MatchingInfo (OptInfo FilePath) (OptInfo Key) (OptInfo FileSize)
data FileInfo = FileInfo
{ currFile :: FilePath
@@ -25,6 +28,14 @@ data FileInfo = FileInfo
-- ^ filepath to match on; may be relative to top of repo or cwd
}
+type OptInfo a = Either (IO a) a
+
+-- If the OptInfo is not available, accessing it may result in eg an
+-- exception being thrown.
+getInfo :: MonadIO m => OptInfo a -> m a
+getInfo (Right i) = pure i
+getInfo (Left e) = liftIO e
+
type FileMatcherMap a = M.Map UUID (Utility.Matcher.Matcher (S.Set UUID -> MatchInfo -> a Bool))
type MkLimit a = String -> Either String (MatchFiles a)
diff --git a/Utility/FileSize.hs b/Utility/FileSize.hs
index 1055754cb..5f89cff2c 100644
--- a/Utility/FileSize.hs
+++ b/Utility/FileSize.hs
@@ -13,13 +13,15 @@ import Control.Exception (bracket)
import System.IO
#endif
+type FileSize = Integer
+
{- Gets the size of a file.
-
- This is better than using fileSize, because on Windows that returns a
- FileOffset which maxes out at 2 gb.
- See https://github.com/jystic/unix-compat/issues/16
-}
-getFileSize :: FilePath -> IO Integer
+getFileSize :: FilePath -> IO FileSize
#ifndef mingw32_HOST_OS
getFileSize f = fmap (fromIntegral . fileSize) (getFileStatus f)
#else
@@ -27,7 +29,7 @@ getFileSize f = bracket (openFile f ReadMode) hClose hFileSize
#endif
{- Gets the size of the file, when its FileStatus is already known. -}
-getFileSize' :: FilePath -> FileStatus -> IO Integer
+getFileSize' :: FilePath -> FileStatus -> IO FileSize
#ifndef mingw32_HOST_OS
getFileSize' _ s = return $ fromIntegral $ fileSize s
#else
diff --git a/debian/changelog b/debian/changelog
index 1b3b0f67c..e59f3f218 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -16,6 +16,8 @@ git-annex (6.20160115) UNRELEASED; urgency=medium
* assistant: Use udisks2 dbus events to detect when disks are mounted,
instead of relying on gnome/kde stuff that is not stable.
* Fix build with QuickCheck 2.8.2
+ * matchexpression: New plumbing command to check if a preferred content
+ expression matches some data.
-- Joey Hess <id@joeyh.name> Fri, 15 Jan 2016 14:05:01 -0400
diff --git a/doc/bugs/treatment_of_largefiles_is_not_working_for_addurl_--fast___40__or_--relaxed__41__/comment_4_00dfd040f4d8b9f1ed765ee38dbc67b9._comment b/doc/bugs/treatment_of_largefiles_is_not_working_for_addurl_--fast___40__or_--relaxed__41__/comment_4_00dfd040f4d8b9f1ed765ee38dbc67b9._comment
new file mode 100644
index 000000000..465da0707
--- /dev/null
+++ b/doc/bugs/treatment_of_largefiles_is_not_working_for_addurl_--fast___40__or_--relaxed__41__/comment_4_00dfd040f4d8b9f1ed765ee38dbc67b9._comment
@@ -0,0 +1,16 @@
+[[!comment format=mdwn
+ username="joey"
+ subject="""comment 4"""
+ date="2016-01-25T20:15:05Z"
+ content="""
+Implemented the matchexpression command.
+
+ time for x in $(seq 1 100); do git annex matchexpression "include=*.png and largerthan=100mb" --file=foo.png --size=10mb --debug; done
+
+ real 0m5.167s
+ user 0m2.688s
+ sys 0m1.860s
+
+Don't know if that's fast enough or if it will need further optimisation
+or a --batch option..
+"""]]
diff --git a/doc/git-annex-matchexpression.mdwn b/doc/git-annex-matchexpression.mdwn
new file mode 100644
index 000000000..c148487bf
--- /dev/null
+++ b/doc/git-annex-matchexpression.mdwn
@@ -0,0 +1,51 @@
+# NAME
+
+git-annex matchexpression - checks if a preferred content expression matches
+
+# SYNOPSIS
+
+git annex matchexpression `expression [data]`
+
+# DESCRIPTION
+
+This plumbing-level command is given a prefferred content expression,
+and some data, and checks if the expression matches the data. It exits 0 if
+it matches, and 1 if not. If not enough data was provided, it displays an
+error and exits with special code 42.
+
+For example, this will exit 0:
+
+ git annex matchexpression "include=*.png and largerthan=1mb" --file=foo.png --size=10mb
+
+# OPTIONS
+
+* `--file=`
+
+ Provide the filename to match against. Note that the file does not have
+ to actually exist on disk.
+
+* `--size=`
+
+ Tell what the size of the file is. The size can be specified with any
+ commonly used units, for example, "0.5 gb" or "100 KiloBytes".
+
+* `--key=`
+
+ Tell what key is being matched against. This is needed for
+ matching expressions like "copies=N" and "metadata=tag=foo" and
+ "present", which all need to look up the information on file for a key.
+
+ Many keys have a known size, and so --size is not needed when specifying
+ such a key.
+
+# SEE ALSO
+
+[[git-annex]](1)
+
+[[git-annex-preferred-content]](1)
+
+# AUTHOR
+
+Joey Hess <id@joeyh.name>
+
+Warning: Automatically converted into a man page by mdwn2man. Edit with care.
diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn
index 329fb8932..91ae78559 100644
--- a/doc/git-annex.mdwn
+++ b/doc/git-annex.mdwn
@@ -535,6 +535,12 @@ subdirectories).
See [[git-annex-examinekey]](1) for details.
+* `matchexpression`
+
+ Checks if a preferred content expression matches provided data.
+
+ See [[git-annex-matchexpression]](1) for details.
+
* `fromkey [key file]`
Manually set up a file in the git repository to link to a specified key.
@@ -553,7 +559,6 @@ subdirectories).
See [[git-annex-setkey]](1) for details.
-
* `dropkey [key ...]`
Drops annexed content for specified keys.