From 2d0d0b1b401cdcd9c6c1c530826a61bfc3349d12 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 25 Jan 2016 16:16:18 -0400 Subject: matchexpression: New plumbing command to check if a preferred content expression matches some data. --- CmdLine/GitAnnex.hs | 2 + Command/MatchExpression.hs | 75 ++++++++++++++++++++++ Limit.hs | 24 +++++-- Limit/Wanted.hs | 1 + Types/FileMatcher.hs | 13 +++- Utility/FileSize.hs | 6 +- debian/changelog | 2 + ...ent_4_00dfd040f4d8b9f1ed765ee38dbc67b9._comment | 16 +++++ doc/git-annex-matchexpression.mdwn | 51 +++++++++++++++ doc/git-annex.mdwn | 7 +- 10 files changed, 186 insertions(+), 11 deletions(-) create mode 100644 Command/MatchExpression.hs create mode 100644 doc/bugs/treatment_of_largefiles_is_not_working_for_addurl_--fast___40__or_--relaxed__41__/comment_4_00dfd040f4d8b9f1ed765ee38dbc67b9._comment create mode 100644 doc/git-annex-matchexpression.mdwn diff --git a/CmdLine/GitAnnex.hs b/CmdLine/GitAnnex.hs index 0383dada3..ec35285c4 100644 --- a/CmdLine/GitAnnex.hs +++ b/CmdLine/GitAnnex.hs @@ -27,6 +27,7 @@ import qualified Command.Fsck import qualified Command.LookupKey import qualified Command.ContentLocation import qualified Command.ExamineKey +import qualified Command.MatchExpression import qualified Command.FromKey import qualified Command.RegisterUrl import qualified Command.SetKey @@ -166,6 +167,7 @@ cmds testoptparser testrunner = , Command.LookupKey.cmd , Command.ContentLocation.cmd , Command.ExamineKey.cmd + , Command.MatchExpression.cmd , Command.FromKey.cmd , Command.RegisterUrl.cmd , Command.SetKey.cmd diff --git a/Command/MatchExpression.hs b/Command/MatchExpression.hs new file mode 100644 index 000000000..062a46b55 --- /dev/null +++ b/Command/MatchExpression.hs @@ -0,0 +1,75 @@ +{- git-annex command + - + - Copyright 2016 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Command.MatchExpression where + +import Command +import Annex.FileMatcher +import Types.FileMatcher +import Utility.DataUnits +import Utility.Matcher +import Annex.UUID +import Logs.Group + +import qualified Data.Map as M +import qualified Data.Set as S + +cmd :: Command +cmd = noCommit $ + command "matchexpression" SectionPlumbing + "checks if a preferred content expression matches" + paramExpression + (seek <$$> optParser) + +data MatchExpressionOptions = MatchExpressionOptions + { matchexpr :: String + , matchinfo :: MatchInfo + } + +optParser :: CmdParamsDesc -> Parser MatchExpressionOptions +optParser desc = MatchExpressionOptions + <$> argument str (metavar desc) + <*> (addkeysize <$> dataparser) + where + dataparser = MatchingInfo + <$> optinfo "file" (strOption + ( long "file" <> metavar paramFile + <> help "specify filename to match against" + )) + <*> optinfo "key" (option (str >>= parseKey) + ( long "key" <> metavar paramKey + <> help "specify key to match against" + )) + <*> optinfo "size" (option (str >>= maybe (fail "parse error") return . readSize dataUnits) + ( long "size" <> metavar paramSize + <> help "specify size to match against" + )) + optinfo datadesc mk = (Right <$> mk) + <|> (pure $ Left $ missingdata datadesc) + missingdata datadesc = bail $ "cannot match this expression without " ++ datadesc ++ " data" + -- When a key is provided, use its size. + addkeysize i@(MatchingInfo f (Right k) _) = case keySize k of + Just sz -> MatchingInfo f (Right k) (Right sz) + Nothing -> i + addkeysize i = i + +seek :: MatchExpressionOptions -> CommandSeek +seek o = do + u <- getUUID + case parsedToMatcher $ exprParser matchAll matchAll groupMap M.empty (Just u) (matchexpr o) of + Left e -> liftIO $ bail $ "bad expression: " ++ e + Right matcher -> ifM (checkmatcher matcher) + ( liftIO exitSuccess + , liftIO exitFailure + ) + where + checkmatcher matcher = matchMrun matcher $ \a -> a S.empty (matchinfo o) + +bail :: String -> IO a +bail s = do + hPutStrLn stderr s + exitWith $ ExitFailure 42 diff --git a/Limit.hs b/Limit.hs index c4bab311a..79335d3b3 100644 --- a/Limit.hs +++ b/Limit.hs @@ -73,21 +73,22 @@ addInclude :: String -> Annex () addInclude = addLimit . limitInclude limitInclude :: MkLimit Annex -limitInclude glob = Right $ const $ return . matchGlobFile glob +limitInclude glob = Right $ const $ matchGlobFile glob {- Add a limit to skip files that match the glob. -} addExclude :: String -> Annex () addExclude = addLimit . limitExclude limitExclude :: MkLimit Annex -limitExclude glob = Right $ const $ return . not . matchGlobFile glob +limitExclude glob = Right $ const $ not <$$> matchGlobFile glob -matchGlobFile :: String -> MatchInfo -> Bool +matchGlobFile :: String -> MatchInfo -> Annex Bool matchGlobFile glob = go where cglob = compileGlob glob CaseSensative -- memoized - go (MatchingKey _) = False - go (MatchingFile fi) = matchGlob cglob (matchFile fi) + go (MatchingKey _) = pure False + go (MatchingFile fi) = pure $ matchGlob cglob (matchFile fi) + go (MatchingInfo af _ _) = matchGlob cglob <$> getInfo af {- Adds a limit to skip files not believed to be present - in a specfied repository. Optionally on a prior date. -} @@ -133,8 +134,10 @@ matchPresent u _ = checkKey $ \key -> do limitInDir :: FilePath -> MkLimit Annex limitInDir dir = const $ Right $ const go where - go (MatchingFile fi) = return $ elem dir $ splitPath $ takeDirectory $ matchFile fi + go (MatchingFile fi) = checkf $ matchFile fi go (MatchingKey _) = return False + go (MatchingInfo af _ _) = checkf =<< getInfo af + checkf = return . elem dir . splitPath . takeDirectory {- Adds a limit to skip files not believed to have the specified number - of copies. -} @@ -177,8 +180,9 @@ limitLackingCopies approx want = case readish want of NumCopies numcopies <- if approx then approxNumCopies else case mi of - MatchingKey _ -> approxNumCopies MatchingFile fi -> getGlobalFileNumCopies $ matchFile fi + MatchingKey _ -> approxNumCopies + MatchingInfo _ _ _ -> approxNumCopies us <- filter (`S.notMember` notpresent) <$> (trustExclude UnTrusted =<< Remote.keyLocations key) return $ numcopies - length us >= needed @@ -192,6 +196,9 @@ limitLackingCopies approx want = case readish want of limitUnused :: MatchFiles Annex limitUnused _ (MatchingFile _) = return False limitUnused _ (MatchingKey k) = S.member k <$> unusedKeys +limitUnused _ (MatchingInfo _ ak _) = do + k <- getInfo ak + S.member k <$> unusedKeys {- Limit that matches any version of any file. -} limitAnything :: MatchFiles Annex @@ -240,6 +247,8 @@ limitSize vs s = case readSize dataUnits s of where go sz _ (MatchingFile fi) = lookupFileKey fi >>= check fi sz go sz _ (MatchingKey key) = checkkey sz key + go sz _ (MatchingInfo _ _ as) = + getInfo as >>= \sz' -> return (Just sz' `vs` Just sz) checkkey sz key = return $ keySize key `vs` Just sz check _ sz (Just key) = checkkey sz key check fi sz Nothing = do @@ -281,3 +290,4 @@ lookupFileKey = lookupFile . currFile checkKey :: (Key -> Annex Bool) -> MatchInfo -> Annex Bool checkKey a (MatchingFile fi) = lookupFileKey fi >>= maybe (return False) a checkKey a (MatchingKey k) = a k +checkKey a (MatchingInfo _ ak _) = a =<< getInfo ak diff --git a/Limit/Wanted.hs b/Limit/Wanted.hs index 237cb7ae0..c11e24b7d 100644 --- a/Limit/Wanted.hs +++ b/Limit/Wanted.hs @@ -21,3 +21,4 @@ addWantDrop = addLimit $ Right $ const $ checkWant $ wantDrop False Nothing Noth checkWant :: (Maybe FilePath -> Annex Bool) -> MatchInfo -> Annex Bool checkWant a (MatchingFile fi) = a (Just $ matchFile fi) checkWant _ (MatchingKey _) = return False +checkWant _ (MatchingInfo {}) = return False diff --git a/Types/FileMatcher.hs b/Types/FileMatcher.hs index 377bba72a..43f05efb6 100644 --- a/Types/FileMatcher.hs +++ b/Types/FileMatcher.hs @@ -1,6 +1,6 @@ {- git-annex file matcher types - - - Copyright 2013 Joey Hess + - Copyright 2013-2016 Joey Hess - - Licensed under the GNU GPL version 3 or higher. -} @@ -10,13 +10,16 @@ module Types.FileMatcher where import Types.UUID (UUID) import Types.Key (Key) import Utility.Matcher (Matcher, Token) +import Utility.FileSize +import Control.Monad.IO.Class import qualified Data.Map as M import qualified Data.Set as S data MatchInfo = MatchingFile FileInfo | MatchingKey Key + | MatchingInfo (OptInfo FilePath) (OptInfo Key) (OptInfo FileSize) data FileInfo = FileInfo { currFile :: FilePath @@ -25,6 +28,14 @@ data FileInfo = FileInfo -- ^ filepath to match on; may be relative to top of repo or cwd } +type OptInfo a = Either (IO a) a + +-- If the OptInfo is not available, accessing it may result in eg an +-- exception being thrown. +getInfo :: MonadIO m => OptInfo a -> m a +getInfo (Right i) = pure i +getInfo (Left e) = liftIO e + type FileMatcherMap a = M.Map UUID (Utility.Matcher.Matcher (S.Set UUID -> MatchInfo -> a Bool)) type MkLimit a = String -> Either String (MatchFiles a) diff --git a/Utility/FileSize.hs b/Utility/FileSize.hs index 1055754cb..5f89cff2c 100644 --- a/Utility/FileSize.hs +++ b/Utility/FileSize.hs @@ -13,13 +13,15 @@ import Control.Exception (bracket) import System.IO #endif +type FileSize = Integer + {- Gets the size of a file. - - This is better than using fileSize, because on Windows that returns a - FileOffset which maxes out at 2 gb. - See https://github.com/jystic/unix-compat/issues/16 -} -getFileSize :: FilePath -> IO Integer +getFileSize :: FilePath -> IO FileSize #ifndef mingw32_HOST_OS getFileSize f = fmap (fromIntegral . fileSize) (getFileStatus f) #else @@ -27,7 +29,7 @@ getFileSize f = bracket (openFile f ReadMode) hClose hFileSize #endif {- Gets the size of the file, when its FileStatus is already known. -} -getFileSize' :: FilePath -> FileStatus -> IO Integer +getFileSize' :: FilePath -> FileStatus -> IO FileSize #ifndef mingw32_HOST_OS getFileSize' _ s = return $ fromIntegral $ fileSize s #else diff --git a/debian/changelog b/debian/changelog index 1b3b0f67c..e59f3f218 100644 --- a/debian/changelog +++ b/debian/changelog @@ -16,6 +16,8 @@ git-annex (6.20160115) UNRELEASED; urgency=medium * assistant: Use udisks2 dbus events to detect when disks are mounted, instead of relying on gnome/kde stuff that is not stable. * Fix build with QuickCheck 2.8.2 + * matchexpression: New plumbing command to check if a preferred content + expression matches some data. -- Joey Hess Fri, 15 Jan 2016 14:05:01 -0400 diff --git a/doc/bugs/treatment_of_largefiles_is_not_working_for_addurl_--fast___40__or_--relaxed__41__/comment_4_00dfd040f4d8b9f1ed765ee38dbc67b9._comment b/doc/bugs/treatment_of_largefiles_is_not_working_for_addurl_--fast___40__or_--relaxed__41__/comment_4_00dfd040f4d8b9f1ed765ee38dbc67b9._comment new file mode 100644 index 000000000..465da0707 --- /dev/null +++ b/doc/bugs/treatment_of_largefiles_is_not_working_for_addurl_--fast___40__or_--relaxed__41__/comment_4_00dfd040f4d8b9f1ed765ee38dbc67b9._comment @@ -0,0 +1,16 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 4""" + date="2016-01-25T20:15:05Z" + content=""" +Implemented the matchexpression command. + + time for x in $(seq 1 100); do git annex matchexpression "include=*.png and largerthan=100mb" --file=foo.png --size=10mb --debug; done + + real 0m5.167s + user 0m2.688s + sys 0m1.860s + +Don't know if that's fast enough or if it will need further optimisation +or a --batch option.. +"""]] diff --git a/doc/git-annex-matchexpression.mdwn b/doc/git-annex-matchexpression.mdwn new file mode 100644 index 000000000..c148487bf --- /dev/null +++ b/doc/git-annex-matchexpression.mdwn @@ -0,0 +1,51 @@ +# NAME + +git-annex matchexpression - checks if a preferred content expression matches + +# SYNOPSIS + +git annex matchexpression `expression [data]` + +# DESCRIPTION + +This plumbing-level command is given a prefferred content expression, +and some data, and checks if the expression matches the data. It exits 0 if +it matches, and 1 if not. If not enough data was provided, it displays an +error and exits with special code 42. + +For example, this will exit 0: + + git annex matchexpression "include=*.png and largerthan=1mb" --file=foo.png --size=10mb + +# OPTIONS + +* `--file=` + + Provide the filename to match against. Note that the file does not have + to actually exist on disk. + +* `--size=` + + Tell what the size of the file is. The size can be specified with any + commonly used units, for example, "0.5 gb" or "100 KiloBytes". + +* `--key=` + + Tell what key is being matched against. This is needed for + matching expressions like "copies=N" and "metadata=tag=foo" and + "present", which all need to look up the information on file for a key. + + Many keys have a known size, and so --size is not needed when specifying + such a key. + +# SEE ALSO + +[[git-annex]](1) + +[[git-annex-preferred-content]](1) + +# AUTHOR + +Joey Hess + +Warning: Automatically converted into a man page by mdwn2man. Edit with care. diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index 329fb8932..91ae78559 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -535,6 +535,12 @@ subdirectories). See [[git-annex-examinekey]](1) for details. +* `matchexpression` + + Checks if a preferred content expression matches provided data. + + See [[git-annex-matchexpression]](1) for details. + * `fromkey [key file]` Manually set up a file in the git repository to link to a specified key. @@ -553,7 +559,6 @@ subdirectories). See [[git-annex-setkey]](1) for details. - * `dropkey [key ...]` Drops annexed content for specified keys. -- cgit v1.2.3