diff options
-rw-r--r-- | Annex/FileMatcher.hs | 17 | ||||
-rw-r--r-- | Logs/PreferredContent.hs | 32 | ||||
-rw-r--r-- | debian/changelog | 2 | ||||
-rw-r--r-- | doc/preferred_content.mdwn | 149 | ||||
-rw-r--r-- | doc/preferred_content/standard_groups.mdwn | 115 |
5 files changed, 166 insertions, 149 deletions
diff --git a/Annex/FileMatcher.hs b/Annex/FileMatcher.hs index 750795280..524ae3c7e 100644 --- a/Annex/FileMatcher.hs +++ b/Annex/FileMatcher.hs @@ -56,23 +56,26 @@ parsedToMatcher parsed = case partitionEithers parsed of ([], vs) -> Right $ generate vs (es, _) -> Left $ unwords $ map ("Parse failure: " ++) es -exprParser :: GroupMap -> M.Map UUID RemoteConfig -> Maybe UUID -> String -> [Either String (Token MatchFiles)] -exprParser groupmap configmap mu expr = +exprParser :: FileMatcher -> GroupMap -> M.Map UUID RemoteConfig -> Maybe UUID -> String -> [Either String (Token MatchFiles)] +exprParser matchstandard groupmap configmap mu expr = map parse $ tokenizeMatcher expr where - parse = parseToken + parse = parseToken + matchstandard (limitPresent mu) (limitInDir preferreddir) groupmap preferreddir = fromMaybe "public" $ M.lookup "preferreddir" =<< (`M.lookup` configmap) =<< mu -parseToken :: MkLimit -> MkLimit -> GroupMap -> String -> Either String (Token MatchFiles) -parseToken checkpresent checkpreferreddir groupmap t +parseToken :: FileMatcher -> MkLimit -> MkLimit -> GroupMap -> String -> Either String (Token MatchFiles) +parseToken matchstandard checkpresent checkpreferreddir groupmap t | t `elem` tokens = Right $ token t + | t == "standard" = Right $ Operation $ \notpresent mi -> + matchMrun matchstandard $ \a -> a notpresent mi | t == "present" = use checkpresent | t == "inpreferreddir" = use checkpreferreddir - | t == "unused" = Right (Operation limitUnused) + | t == "unused" = Right $ Operation limitUnused | otherwise = maybe (Left $ "near " ++ show t) use $ M.lookup k $ M.fromList [ ("include", limitInclude) @@ -109,5 +112,5 @@ largeFilesMatcher = go =<< annexLargeFiles <$> Annex.getGitConfig rc <- readRemoteLog u <- getUUID either badexpr return $ - parsedToMatcher $ exprParser gm rc (Just u) expr + parsedToMatcher $ exprParser matchAll gm rc (Just u) expr badexpr e = error $ "bad annex.largefiles configuration: " ++ e diff --git a/Logs/PreferredContent.hs b/Logs/PreferredContent.hs index 4b25ea094..2bc5f08d6 100644 --- a/Logs/PreferredContent.hs +++ b/Logs/PreferredContent.hs @@ -67,29 +67,25 @@ preferredContentMapLoad = do - versions of git-annex may add new features. Instead, parse errors - result in a Matcher that will always succeed. -} makeMatcher :: GroupMap -> M.Map UUID RemoteConfig -> UUID -> PreferredContentExpression -> FileMatcher -makeMatcher groupmap configmap u expr - | expr == "standard" = standardMatcher groupmap configmap u - | null (lefts tokens) = Utility.Matcher.generate $ rights tokens - | otherwise = matchAll +makeMatcher groupmap configmap u = go True where - tokens = exprParser groupmap configmap (Just u) expr - -{- Standard matchers are pre-defined for some groups. If none is defined, - - or a repository is in multiple groups with standard matchers, match all. -} -standardMatcher :: GroupMap -> M.Map UUID RemoteConfig -> UUID -> FileMatcher -standardMatcher groupmap configmap u = - maybe matchAll (makeMatcher groupmap configmap u . preferredContent) $ - getStandardGroup =<< u `M.lookup` groupsByUUID groupmap + go expandstandard expr + | null (lefts tokens) = Utility.Matcher.generate $ rights tokens + | otherwise = matchAll + where + tokens = exprParser matchstandard groupmap configmap (Just u) expr + matchstandard + | expandstandard = maybe matchAll (go False . preferredContent) $ + getStandardGroup =<< u `M.lookup` groupsByUUID groupmap + | otherwise = matchAll {- Checks if an expression can be parsed, if not returns Just error -} checkPreferredContentExpression :: PreferredContentExpression -> Maybe String -checkPreferredContentExpression expr - | expr == "standard" = Nothing - | otherwise = case parsedToMatcher tokens of - Left e -> Just e - Right _ -> Nothing +checkPreferredContentExpression expr = case parsedToMatcher tokens of + Left e -> Just e + Right _ -> Nothing where - tokens = exprParser emptyGroupMap M.empty Nothing expr + tokens = exprParser matchAll emptyGroupMap M.empty Nothing expr {- Puts a UUID in a standard group, and sets its preferred content to use - the standard expression for that group, unless something is already set. -} diff --git a/debian/changelog b/debian/changelog index e3cb63d77..60c6f1030 100644 --- a/debian/changelog +++ b/debian/changelog @@ -17,6 +17,8 @@ git-annex (5.20140307) UNRELEASED; urgency=medium * Fix ssh connection caching stop method to work with openssh 6.5p1, which broke the old method. * Better workaround for problem umasks when eg, setting up ssh keys. + * "standard" can now be used as a first-class keyword in preferred content + expressions. For example "standard or (include=otherdir/*)" -- Joey Hess <joeyh@debian.org> Thu, 06 Mar 2014 16:17:01 -0400 diff --git a/doc/preferred_content.mdwn b/doc/preferred_content.mdwn index 0bc8aa35c..d67cba635 100644 --- a/doc/preferred_content.mdwn +++ b/doc/preferred_content.mdwn @@ -18,6 +18,20 @@ If a file matches, it's preferred to have its content stored in the repository. If it doesn't, it's preferred to drop its content from the repository (if there are enough copies elsewhere). +Rather than writing your own preferred content expression, you can use +several canned ones included in git-annex that are tuned to cover different +common use cases. You do this by putting a repository in a group, +and simply setting its preferred content to "standard" to match whatever +is standard for that group. See [[standard_groups]]. + +To check at the command line which files are matched by preferred content +settings, you can use the --want-get and --want-drop options. + +For example, "git annex find --want-get --not --in ." will find all the +files that "git annex get --auto" will want to get, and "git annex find +--want-drop --in ." will find all the files that "git annex drop --auto" +will want to drop. + The expressions are very similar to the matching options documented on the [[git-annex]] man page. At the command line, you can use those options in commands like this: @@ -86,130 +100,17 @@ The name of the directory can be configured using (If no directory name is configured, it uses "public" by default.) -## testing preferred content settings - -To check at the command line which files are matched by preferred content -settings, you can use the --want-get and --want-drop options. - -For example, "git annex find --want-get --not --in ." will find all the -files that "git annex get --auto" will want to get, and "git annex find ---want-drop --in ." will find all the files that "git annex drop --auto" -will want to drop. - -## standard expressions - -git-annex comes with some standard preferred content expressions, that can -be used with repositories that are in some pre-defined groups. To make a -repository use one of these, just set its preferred content expression -to "standard", and put it in one of these groups. - -(Note that most of these standard expressions also make the repository -prefer any content that is only currently available on untrusted and -dead repositories. So if an untrusted repository gets connected, -any repository that can will back it up.) - -### client - -All content is preferred, unless it's for a file in a "archive" directory, -which has reached an archive repository, or is unused. - -`(((exclude=*/archive/* and exclude=archive/*) or (not (copies=archive:1 or copies=smallarchive:1))) and not unused) or roughlylackingcopies=1` - -### transfer - -Use for repositories that are used to transfer data between other -repositories, but do not need to retain data themselves. For -example, a repository on a server, or in the cloud, or a small -USB drive used in a sneakernet. - -The preferred content expression for these causes them to get and retain -data until all clients have a copy. - -`not (inallgroup=client and copies=client:2) and ($client)` - -(Where $client is a copy of the preferred content expression used for -clients.) - -The "copies=client:2" part of the above handles the case where -there is only one client repository. It makes a transfer repository -speculatively prefer content in this case, even though it as of yet -has nowhere to transfer it to. Presumably, another client repository -will be added later. - -### backup - -All content is preferred. - -`include=* or unused` - -### incremental backup - -Only prefers content that's not already backed up to another backup -or incremental backup repository. - -`((include=* or unused) and (not copies=backup:1) and (not copies=incrementalbackup:1)) or approxlackingcopies=1` - -### small archive - -Only prefers content that's located in an "archive" directory, and -only if it's not already been archived somewhere else. - -`((include=*/archive/* or include=archive/*) and not (copies=archive:1 or copies=smallarchive:1)) or approxlackingcopies=1` - -### full archive - -All content is preferred, unless it's already been archived somewhere else. - -`(not (copies=archive:1 or copies=smallarchive:1)) or approxlackingcopies=1` - -Note that if you want to archive multiple copies (not a bad idea!), -you should instead configure all your archive repositories with a -version of the above preferred content expression with a larger -number of copies. - -### source - -Use for repositories where files are often added, but that do not need to -retain files for local use. For example, a repository on a camera, where -it's desirable to remove photos as soon as they're transferred elsewhere. - -The preferred content expression for these causes them to only retain -data until a copy has been sent to some other repository. - -`not (copies=1)` - -### manual - -This gives you nearly full manual control over what content is stored in the -repository. This allows using the [[assistant]] without it trying to keep a -local copy of every file. Instead, you can manually run `git annex get`, -`git annex drop`, etc to manage content. Only content that is present -is preferred. - -The exception to this manual control is that content that a client -repository would not want is not preferred. So, files in archive -directories are not preferred once their content has -reached an archive repository. - -`present and ($client)` - -(Where $client is a copy of the preferred content expression used for -clients.) - -### public - -This is used for publishing information to a repository that can be -publically accessed. Only files in a directory with a particular name -will be published. (The directory can be located anywhere in the -repository.) - -The name of the directory can be configured using -`git annex enableremote $remote preferreddir=$dirname` +### difference: "standard" -### unwanted +git-annex comes with some standard preferred content expressions, that +can be used with repositories that are in some pre-defined groups, +as listed in [[standard_groups]]. -Use for repositories that you don't want to exist. This will result -in any content on them being moved away to other repositories. (Works -best when the unwanted repository is also marked as untrusted or dead.) +When a repository is in exactly one such group, you can use the "standard" +keyword in its preferred content expression, to match whatever content +the group preferrs to have. (If a repository is put into multiple standard +groups, "standard" will match anything.. so don't do that!) -`exclude=*` +Most often, the whole preferred content expression is simply "standard". +But, you can do more complicated things, for example: +"`standard or include=otherdir/*`" diff --git a/doc/preferred_content/standard_groups.mdwn b/doc/preferred_content/standard_groups.mdwn new file mode 100644 index 000000000..7f2afd446 --- /dev/null +++ b/doc/preferred_content/standard_groups.mdwn @@ -0,0 +1,115 @@ +git-annex comes with some pre-defined [[preferred_content]] settings, that can +be used with repositories that are in special groups. To make a +repository use one of these, just set its preferred content expression +to "standard", and put it in one of these groups. + +(Note that most of these standard expressions also make the repository +prefer any content that is only currently available on untrusted and +dead repositories. So if an untrusted repository gets connected, +any repository that can will back it up.) + +### client + +All content is preferred, unless it's for a file in a "archive" directory, +which has reached an archive repository, or is unused. + +`(((exclude=*/archive/* and exclude=archive/*) or (not (copies=archive:1 or copies=smallarchive:1))) and not unused) or roughlylackingcopies=1` + +### transfer + +Use for repositories that are used to transfer data between other +repositories, but do not need to retain data themselves. For +example, a repository on a server, or in the cloud, or a small +USB drive used in a sneakernet. + +The preferred content expression for these causes them to get and retain +data until all clients have a copy. + +`not (inallgroup=client and copies=client:2) and ($client)` + +(Where $client is a copy of the preferred content expression used for +clients.) + +The "copies=client:2" part of the above handles the case where +there is only one client repository. It makes a transfer repository +speculatively prefer content in this case, even though it as of yet +has nowhere to transfer it to. Presumably, another client repository +will be added later. + +### backup + +All content is preferred. + +`include=* or unused` + +### incremental backup + +Only prefers content that's not already backed up to another backup +or incremental backup repository. + +`((include=* or unused) and (not copies=backup:1) and (not copies=incrementalbackup:1)) or approxlackingcopies=1` + +### small archive + +Only prefers content that's located in an "archive" directory, and +only if it's not already been archived somewhere else. + +`((include=*/archive/* or include=archive/*) and not (copies=archive:1 or copies=smallarchive:1)) or approxlackingcopies=1` + +### full archive + +All content is preferred, unless it's already been archived somewhere else. + +`(not (copies=archive:1 or copies=smallarchive:1)) or approxlackingcopies=1` + +Note that if you want to archive multiple copies (not a bad idea!), +you should instead configure all your archive repositories with a +version of the above preferred content expression with a larger +number of copies. + +### source + +Use for repositories where files are often added, but that do not need to +retain files for local use. For example, a repository on a camera, where +it's desirable to remove photos as soon as they're transferred elsewhere. + +The preferred content expression for these causes them to only retain +data until a copy has been sent to some other repository. + +`not (copies=1)` + +### manual + +This gives you nearly full manual control over what content is stored in the +repository. This allows using the [[assistant]] without it trying to keep a +local copy of every file. Instead, you can manually run `git annex get`, +`git annex drop`, etc to manage content. Only content that is present +is preferred. + +The exception to this manual control is that content that a client +repository would not want is not preferred. So, files in archive +directories are not preferred once their content has +reached an archive repository. + +`present and ($client)` + +(Where $client is a copy of the preferred content expression used for +clients.) + +### public + +This is used for publishing information to a repository that can be +publically accessed. Only files in a directory with a particular name +will be published. (The directory can be located anywhere in the +repository.) + +The name of the directory can be configured using +`git annex enableremote $remote preferreddir=$dirname` + +### unwanted + +Use for repositories that you don't want to exist. This will result +in any content on them being moved away to other repositories. (Works +best when the unwanted repository is also marked as untrusted or dead.) + +`exclude=*` |