summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Joey Hess <joey@kitenet.net>2014-02-16 17:39:54 -0400
committerGravatar Joey Hess <joey@kitenet.net>2014-02-16 17:39:54 -0400
commit083ab36d08569dd64a7ed94cdfd49753964c7bdd (patch)
tree27403db8f2304ada025dd8911d03df5801603861
parent16af107d2d573046ab09af964257cf3573889974 (diff)
filter branches (incomplete)
Promosing work toward metadata driven filter branches. A few methods to construct them are stubbed out; all the data types and pure code seems good. This commit was sponsored by Walter Somerville.
-rw-r--r--Annex/View.hs220
-rw-r--r--Git/Sha.hs4
-rw-r--r--Test.hs2
-rw-r--r--Types/MetaData.hs5
-rw-r--r--Utility/Path.hs15
5 files changed, 246 insertions, 0 deletions
diff --git a/Annex/View.hs b/Annex/View.hs
new file mode 100644
index 000000000..620e71f0e
--- /dev/null
+++ b/Annex/View.hs
@@ -0,0 +1,220 @@
+{- metadata based branch views
+ -
+ - Copyright 2014 Joey Hess <joey@kitenet.net>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+{-# LANGUAGE CPP #-}
+
+module Annex.View where
+
+import Common.Annex
+import Logs.MetaData
+import Types.MetaData
+import qualified Git.Types as Git
+import qualified Git.Ref
+import qualified Git.DiffTree
+import qualified Git.Branch
+import qualified Git.Index
+import Git.Sha (nullSha)
+import Utility.QuickCheck
+
+import qualified Data.Set as S
+import Data.Char
+
+#ifdef WITH_TDFA
+import Text.Regex.TDFA
+import Text.Regex.TDFA.String
+#endif
+
+type View = [(MetaField, ViewFilter)]
+
+data ViewFilter
+ = FilterValues (S.Set MetaValue)
+#ifdef WITH_TDFA
+ | FilterGlob String Regex
+#endif
+
+instance Show ViewFilter where
+ show (FilterValues s) = show s
+ show (FilterGlob s _) = s
+
+instance Eq ViewFilter where
+ FilterValues x == FilterValues y = x == y
+ FilterGlob x _ == FilterGlob y _ = x == y
+ _ == _ = False
+
+instance Arbitrary ViewFilter where
+ arbitrary = do
+ size <- arbitrarySizedBoundedIntegral `suchThat` (< 100)
+ FilterValues . S.fromList <$> vector size
+
+{- Can a ViewFilter match multiple different MetaValues? -}
+multiValue :: ViewFilter -> Bool
+multiValue (FilterValues s) = S.size s > 1
+#ifdef WITH_TDFA
+multiValue (FilterGlob _ _) = True
+#endif
+
+type FileView = FilePath
+type MkFileView = FilePath -> FileView
+
+{- Checks if metadata matches a filter, and if so returns the value,
+ - or values that match. -}
+matchFilter :: MetaData -> MetaField -> ViewFilter -> Maybe [MetaValue]
+matchFilter metadata metafield (FilterValues s) = nonEmptyList $
+ S.intersection s (currentMetaDataValues metafield metadata)
+#ifdef WITH_TDFA
+matchFilter metadata metafield (FilterGlob _ r) = nonEmptyList $
+ S.filter matching (currentMetaDataValues metafield metadata)
+ where
+ matching = either (const False) (const True) . execute r . fromMetaValue
+#endif
+
+nonEmptyList :: S.Set a -> Maybe [a]
+nonEmptyList s
+ | S.null s = Nothing
+ | otherwise = Just $ S.toList s
+
+{- Converts a filepath used in a reference branch to the
+ - filename that will be used in the view.
+ -
+ - No two filenames from the same branch should yeild the same result,
+ - so all directory structure needs to be included in the output file
+ - in some way. However, the branch's directory structure is not relevant
+ - in the view.
+ -
+ - So, from dir/subdir/file.foo, generate file(dir)(subdir).foo
+ -}
+fileViewFromReference :: MkFileView
+fileViewFromReference f = base ++ concatMap (\d -> "(" ++ d ++ ")") dirs ++ concat extensions
+ where
+ (path, basefile) = splitFileName f
+ dirs = filter (/= ".") $ map dropTrailingPathSeparator (splitPath path)
+ (base, extensions) = splitShortExtensions basefile
+
+{- Generates views for a file from a branch, based on its metadata
+ - and the filename used in the branch.
+ -
+ - Note that a file may appear multiple times in a view, when it
+ - has multiple matching values for a MetaField used in the View.
+ -}
+fileViews :: View -> MkFileView -> FilePath -> MetaData -> Maybe [FileView]
+fileViews view mkfileview file metadata
+ | any isNothing matches = Nothing
+ | otherwise = Just $ map (</> mkfileview file) $
+ pathProduct $ map (map fromMetaValue) $ visible matches
+ where
+ matches :: [Maybe [MetaValue]]
+ matches = map (uncurry $ matchFilter metadata) view
+ visible :: [Maybe [MetaValue]] -> [[MetaValue]]
+ visible = map (fromJust . snd) .
+ filter (multiValue . fst) .
+ zip (map snd view)
+
+pathProduct :: [[FilePath]] -> [FilePath]
+pathProduct [] = []
+pathProduct (l:ls) = foldl combinel l ls
+ where
+ combinel xs ys = [combine x y | x <- xs, y <- ys]
+
+{- Extracts the metadata from a fileview, based on the view that was used
+ - to construct it. -}
+fromView :: View -> FileView -> MetaData
+fromView view f = foldr (uncurry updateMetaData) newMetaData (zip fields values)
+ where
+ visible = filter (multiValue . snd) view
+ fields = map fst visible
+ paths = splitDirectories $ dropFileName f
+ values = map toMetaValue paths
+
+{- Generates a git branch name for a View.
+ -
+ - There is no guarantee that each view gets a unique branch name,
+ - but the branch name is used to express the view as well as possible.
+ -}
+branchView :: View -> Git.Branch
+branchView view
+ | null name = Git.Ref "refs/views"
+ | otherwise = Git.Ref $ "refs/views/" ++ name
+ where
+ name = intercalate "/" $ map branchbit view
+ branchbit b@(_metafield, viewfilter)
+ | multiValue viewfilter = branchbit' b
+ | otherwise = "(" ++ branchbit' b ++ ")"
+ branchbit' (metafield, viewfilter)
+ | metafield == tagMetaField = branchvals viewfilter
+ | otherwise = concat
+ [ forcelegal (fromMetaField metafield)
+ , "="
+ , branchvals viewfilter
+ ]
+ branchvals (FilterValues set) = forcelegal $
+ intercalate "," $ map fromMetaValue $ S.toList set
+#ifdef WITH_TDFA
+ branchvals (FilterGlob glob _) = forcelegal $
+ replace "*" "ANY" $ replace "?" "_" glob
+#endif
+ forcelegal s
+ | Git.Ref.legal True s = s
+ | otherwise = map (\c -> if isAlphaNum c then c else '_') s
+
+prop_branchView_legal :: View -> Bool
+prop_branchView_legal = Git.Ref.legal False . show . branchView
+
+{- Applies a view to the currently checked out branch, generating a new
+ - branch for the view.
+ -}
+applyView :: View -> Annex Git.Branch
+applyView = applyView' fileViewFromReference
+
+{- Generates a new branch for a View, which must be a more specific
+ - version of the View originally used to generate the currently
+ - checked out branch.
+ -}
+refineView :: View -> Annex Git.Branch
+refineView = applyView' id
+
+{- Go through each file in the currently checked out branch.
+ - If the file is not annexed, skip it, unless it's a dotfile in the top.
+ - Look up the metadata of annexed files, and generate any FileViews,
+ - and stage them into the (temporary) index.
+ -}
+applyView' :: MkFileView -> View -> Annex Git.Branch
+applyView' mkfileview view = genViewBranch view $ do
+ error "TODO"
+
+{- Applies a view to the reference branch, generating a new branch
+ - for the View.
+ -
+ - This needs to work incrementally, to quickly update the view branch
+ - when the reference branch is changed. So, it works based on an
+ - old version of the reference branch, uses diffTree to find the
+ - changes, and applies those changes to the view branch.
+ -}
+updateView :: View -> Git.Ref -> Git.Ref -> Annex Git.Branch
+updateView view ref oldref = genViewBranch view $ do
+ (diffs, cleanup) <- inRepo $ Git.DiffTree.diffTree oldref ref
+ forM_ diffs go
+ void $ liftIO cleanup
+ where
+ go diff
+ | Git.DiffTree.dstsha diff == nullSha = error "TODO delete file"
+ | otherwise = error "TODO add file"
+
+{- Generates a branch for a view. This is done by creating a temporary
+ - index file, which starts off empty. An action is run to stage the files
+ - that will be in the branch. Then a commit is made, to the view branch.
+ - The view branch is not checked out, but entering it will display the
+ - view. -}
+genViewBranch :: View -> Annex () -> Annex Git.Branch
+genViewBranch view a = withTempIndex $ do
+ a
+ let branch = branchView view
+ void $ inRepo $ Git.Branch.commit True (show branch) branch []
+ return branch
+
+{- -}
+withTempIndex :: Annex a -> Annex a
+withTempIndex a = error "TODO"
diff --git a/Git/Sha.hs b/Git/Sha.hs
index ee1b6d669..cbb66ea2d 100644
--- a/Git/Sha.hs
+++ b/Git/Sha.hs
@@ -37,3 +37,7 @@ shaSize = 40
nullSha :: Ref
nullSha = Ref $ replicate shaSize '0'
+
+{- Git's magic empty tree. -}
+emptyTree :: Ref
+emptyTree = Ref "4b825dc642cb6eb9a060e54bf8d69288fbee4904"
diff --git a/Test.hs b/Test.hs
index 5e5d4b340..c5d047875 100644
--- a/Test.hs
+++ b/Test.hs
@@ -54,6 +54,7 @@ import qualified Config.Cost
import qualified Crypto
import qualified Annex.Init
import qualified Annex.CatFile
+import qualified Annex.View
import qualified Utility.Path
import qualified Utility.FileMode
import qualified Build.SysConfig
@@ -147,6 +148,7 @@ properties = localOption (QuickCheckTests 1000) $ testGroup "QuickCheck"
, testProperty "prop_duration_roundtrips" Utility.HumanTime.prop_duration_roundtrips
, testProperty "prop_metadata_sane" Types.MetaData.prop_metadata_sane
, testProperty "prop_metadata_serialize" Types.MetaData.prop_metadata_serialize
+ , testProperty "prop_branchView_legal" Annex.View.prop_branchView_legal
]
{- These tests set up the test environment, but also test some basic parts
diff --git a/Types/MetaData.hs b/Types/MetaData.hs
index 151f456c0..d8184a768 100644
--- a/Types/MetaData.hs
+++ b/Types/MetaData.hs
@@ -16,6 +16,8 @@ module Types.MetaData (
deserialize,
MetaSerializable,
toMetaField,
+ mkMetaField,
+ tagMetaField,
fromMetaField,
toMetaValue,
mkMetaValue,
@@ -225,6 +227,9 @@ mkMetaField f = maybe (Left $ badField f) Right (toMetaField f)
badField :: String -> String
badField f = "Illegal metadata field name, \"" ++ f ++ "\""
+tagMetaField :: MetaField
+tagMetaField = MetaField "tag"
+
{- Avoid putting too many fields in the map; extremely large maps make
- the seriaization test slow due to the sheer amount of data.
- It's unlikely that more than 100 fields of metadata will be used. -}
diff --git a/Utility/Path.hs b/Utility/Path.hs
index 2bcd110d8..e22d0c3f7 100644
--- a/Utility/Path.hs
+++ b/Utility/Path.hs
@@ -277,3 +277,18 @@ sanitizeFilePath = map sanitize
| c == '.' = c
| isSpace c || isPunctuation c || isSymbol c || isControl c || c == '/' = '_'
| otherwise = c
+
+{- Similar to splitExtensions, but knows that some things in FilePaths
+ - after a dot are too long to be extensions. -}
+splitShortExtensions :: FilePath -> (FilePath, [String])
+splitShortExtensions = splitShortExtensions' 5 -- enough for ".jpeg"
+splitShortExtensions' :: Int -> FilePath -> (FilePath, [String])
+splitShortExtensions' maxextension = go []
+ where
+ go c f
+ | len > 0 && len <= maxextension && not (null base) =
+ go (ext:c) base
+ | otherwise = (f, c)
+ where
+ (base, ext) = splitExtension f
+ len = length ext