From 083ab36d08569dd64a7ed94cdfd49753964c7bdd Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sun, 16 Feb 2014 17:39:54 -0400 Subject: filter branches (incomplete) Promosing work toward metadata driven filter branches. A few methods to construct them are stubbed out; all the data types and pure code seems good. This commit was sponsored by Walter Somerville. --- Annex/View.hs | 220 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ Git/Sha.hs | 4 + Test.hs | 2 + Types/MetaData.hs | 5 ++ Utility/Path.hs | 15 ++++ 5 files changed, 246 insertions(+) create mode 100644 Annex/View.hs diff --git a/Annex/View.hs b/Annex/View.hs new file mode 100644 index 000000000..620e71f0e --- /dev/null +++ b/Annex/View.hs @@ -0,0 +1,220 @@ +{- metadata based branch views + - + - Copyright 2014 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +{-# LANGUAGE CPP #-} + +module Annex.View where + +import Common.Annex +import Logs.MetaData +import Types.MetaData +import qualified Git.Types as Git +import qualified Git.Ref +import qualified Git.DiffTree +import qualified Git.Branch +import qualified Git.Index +import Git.Sha (nullSha) +import Utility.QuickCheck + +import qualified Data.Set as S +import Data.Char + +#ifdef WITH_TDFA +import Text.Regex.TDFA +import Text.Regex.TDFA.String +#endif + +type View = [(MetaField, ViewFilter)] + +data ViewFilter + = FilterValues (S.Set MetaValue) +#ifdef WITH_TDFA + | FilterGlob String Regex +#endif + +instance Show ViewFilter where + show (FilterValues s) = show s + show (FilterGlob s _) = s + +instance Eq ViewFilter where + FilterValues x == FilterValues y = x == y + FilterGlob x _ == FilterGlob y _ = x == y + _ == _ = False + +instance Arbitrary ViewFilter where + arbitrary = do + size <- arbitrarySizedBoundedIntegral `suchThat` (< 100) + FilterValues . S.fromList <$> vector size + +{- Can a ViewFilter match multiple different MetaValues? -} +multiValue :: ViewFilter -> Bool +multiValue (FilterValues s) = S.size s > 1 +#ifdef WITH_TDFA +multiValue (FilterGlob _ _) = True +#endif + +type FileView = FilePath +type MkFileView = FilePath -> FileView + +{- Checks if metadata matches a filter, and if so returns the value, + - or values that match. -} +matchFilter :: MetaData -> MetaField -> ViewFilter -> Maybe [MetaValue] +matchFilter metadata metafield (FilterValues s) = nonEmptyList $ + S.intersection s (currentMetaDataValues metafield metadata) +#ifdef WITH_TDFA +matchFilter metadata metafield (FilterGlob _ r) = nonEmptyList $ + S.filter matching (currentMetaDataValues metafield metadata) + where + matching = either (const False) (const True) . execute r . fromMetaValue +#endif + +nonEmptyList :: S.Set a -> Maybe [a] +nonEmptyList s + | S.null s = Nothing + | otherwise = Just $ S.toList s + +{- Converts a filepath used in a reference branch to the + - filename that will be used in the view. + - + - No two filenames from the same branch should yeild the same result, + - so all directory structure needs to be included in the output file + - in some way. However, the branch's directory structure is not relevant + - in the view. + - + - So, from dir/subdir/file.foo, generate file(dir)(subdir).foo + -} +fileViewFromReference :: MkFileView +fileViewFromReference f = base ++ concatMap (\d -> "(" ++ d ++ ")") dirs ++ concat extensions + where + (path, basefile) = splitFileName f + dirs = filter (/= ".") $ map dropTrailingPathSeparator (splitPath path) + (base, extensions) = splitShortExtensions basefile + +{- Generates views for a file from a branch, based on its metadata + - and the filename used in the branch. + - + - Note that a file may appear multiple times in a view, when it + - has multiple matching values for a MetaField used in the View. + -} +fileViews :: View -> MkFileView -> FilePath -> MetaData -> Maybe [FileView] +fileViews view mkfileview file metadata + | any isNothing matches = Nothing + | otherwise = Just $ map ( mkfileview file) $ + pathProduct $ map (map fromMetaValue) $ visible matches + where + matches :: [Maybe [MetaValue]] + matches = map (uncurry $ matchFilter metadata) view + visible :: [Maybe [MetaValue]] -> [[MetaValue]] + visible = map (fromJust . snd) . + filter (multiValue . fst) . + zip (map snd view) + +pathProduct :: [[FilePath]] -> [FilePath] +pathProduct [] = [] +pathProduct (l:ls) = foldl combinel l ls + where + combinel xs ys = [combine x y | x <- xs, y <- ys] + +{- Extracts the metadata from a fileview, based on the view that was used + - to construct it. -} +fromView :: View -> FileView -> MetaData +fromView view f = foldr (uncurry updateMetaData) newMetaData (zip fields values) + where + visible = filter (multiValue . snd) view + fields = map fst visible + paths = splitDirectories $ dropFileName f + values = map toMetaValue paths + +{- Generates a git branch name for a View. + - + - There is no guarantee that each view gets a unique branch name, + - but the branch name is used to express the view as well as possible. + -} +branchView :: View -> Git.Branch +branchView view + | null name = Git.Ref "refs/views" + | otherwise = Git.Ref $ "refs/views/" ++ name + where + name = intercalate "/" $ map branchbit view + branchbit b@(_metafield, viewfilter) + | multiValue viewfilter = branchbit' b + | otherwise = "(" ++ branchbit' b ++ ")" + branchbit' (metafield, viewfilter) + | metafield == tagMetaField = branchvals viewfilter + | otherwise = concat + [ forcelegal (fromMetaField metafield) + , "=" + , branchvals viewfilter + ] + branchvals (FilterValues set) = forcelegal $ + intercalate "," $ map fromMetaValue $ S.toList set +#ifdef WITH_TDFA + branchvals (FilterGlob glob _) = forcelegal $ + replace "*" "ANY" $ replace "?" "_" glob +#endif + forcelegal s + | Git.Ref.legal True s = s + | otherwise = map (\c -> if isAlphaNum c then c else '_') s + +prop_branchView_legal :: View -> Bool +prop_branchView_legal = Git.Ref.legal False . show . branchView + +{- Applies a view to the currently checked out branch, generating a new + - branch for the view. + -} +applyView :: View -> Annex Git.Branch +applyView = applyView' fileViewFromReference + +{- Generates a new branch for a View, which must be a more specific + - version of the View originally used to generate the currently + - checked out branch. + -} +refineView :: View -> Annex Git.Branch +refineView = applyView' id + +{- Go through each file in the currently checked out branch. + - If the file is not annexed, skip it, unless it's a dotfile in the top. + - Look up the metadata of annexed files, and generate any FileViews, + - and stage them into the (temporary) index. + -} +applyView' :: MkFileView -> View -> Annex Git.Branch +applyView' mkfileview view = genViewBranch view $ do + error "TODO" + +{- Applies a view to the reference branch, generating a new branch + - for the View. + - + - This needs to work incrementally, to quickly update the view branch + - when the reference branch is changed. So, it works based on an + - old version of the reference branch, uses diffTree to find the + - changes, and applies those changes to the view branch. + -} +updateView :: View -> Git.Ref -> Git.Ref -> Annex Git.Branch +updateView view ref oldref = genViewBranch view $ do + (diffs, cleanup) <- inRepo $ Git.DiffTree.diffTree oldref ref + forM_ diffs go + void $ liftIO cleanup + where + go diff + | Git.DiffTree.dstsha diff == nullSha = error "TODO delete file" + | otherwise = error "TODO add file" + +{- Generates a branch for a view. This is done by creating a temporary + - index file, which starts off empty. An action is run to stage the files + - that will be in the branch. Then a commit is made, to the view branch. + - The view branch is not checked out, but entering it will display the + - view. -} +genViewBranch :: View -> Annex () -> Annex Git.Branch +genViewBranch view a = withTempIndex $ do + a + let branch = branchView view + void $ inRepo $ Git.Branch.commit True (show branch) branch [] + return branch + +{- -} +withTempIndex :: Annex a -> Annex a +withTempIndex a = error "TODO" diff --git a/Git/Sha.hs b/Git/Sha.hs index ee1b6d669..cbb66ea2d 100644 --- a/Git/Sha.hs +++ b/Git/Sha.hs @@ -37,3 +37,7 @@ shaSize = 40 nullSha :: Ref nullSha = Ref $ replicate shaSize '0' + +{- Git's magic empty tree. -} +emptyTree :: Ref +emptyTree = Ref "4b825dc642cb6eb9a060e54bf8d69288fbee4904" diff --git a/Test.hs b/Test.hs index 5e5d4b340..c5d047875 100644 --- a/Test.hs +++ b/Test.hs @@ -54,6 +54,7 @@ import qualified Config.Cost import qualified Crypto import qualified Annex.Init import qualified Annex.CatFile +import qualified Annex.View import qualified Utility.Path import qualified Utility.FileMode import qualified Build.SysConfig @@ -147,6 +148,7 @@ properties = localOption (QuickCheckTests 1000) $ testGroup "QuickCheck" , testProperty "prop_duration_roundtrips" Utility.HumanTime.prop_duration_roundtrips , testProperty "prop_metadata_sane" Types.MetaData.prop_metadata_sane , testProperty "prop_metadata_serialize" Types.MetaData.prop_metadata_serialize + , testProperty "prop_branchView_legal" Annex.View.prop_branchView_legal ] {- These tests set up the test environment, but also test some basic parts diff --git a/Types/MetaData.hs b/Types/MetaData.hs index 151f456c0..d8184a768 100644 --- a/Types/MetaData.hs +++ b/Types/MetaData.hs @@ -16,6 +16,8 @@ module Types.MetaData ( deserialize, MetaSerializable, toMetaField, + mkMetaField, + tagMetaField, fromMetaField, toMetaValue, mkMetaValue, @@ -225,6 +227,9 @@ mkMetaField f = maybe (Left $ badField f) Right (toMetaField f) badField :: String -> String badField f = "Illegal metadata field name, \"" ++ f ++ "\"" +tagMetaField :: MetaField +tagMetaField = MetaField "tag" + {- Avoid putting too many fields in the map; extremely large maps make - the seriaization test slow due to the sheer amount of data. - It's unlikely that more than 100 fields of metadata will be used. -} diff --git a/Utility/Path.hs b/Utility/Path.hs index 2bcd110d8..e22d0c3f7 100644 --- a/Utility/Path.hs +++ b/Utility/Path.hs @@ -277,3 +277,18 @@ sanitizeFilePath = map sanitize | c == '.' = c | isSpace c || isPunctuation c || isSymbol c || isControl c || c == '/' = '_' | otherwise = c + +{- Similar to splitExtensions, but knows that some things in FilePaths + - after a dot are too long to be extensions. -} +splitShortExtensions :: FilePath -> (FilePath, [String]) +splitShortExtensions = splitShortExtensions' 5 -- enough for ".jpeg" +splitShortExtensions' :: Int -> FilePath -> (FilePath, [String]) +splitShortExtensions' maxextension = go [] + where + go c f + | len > 0 && len <= maxextension && not (null base) = + go (ext:c) base + | otherwise = (f, c) + where + (base, ext) = splitExtension f + len = length ext -- cgit v1.2.3