diff options
-rw-r--r-- | CmdLine/GitAnnex.hs | 6 | ||||
-rw-r--r-- | Command/Benchmark.hs | 106 | ||||
-rw-r--r-- | Common.hs | 2 | ||||
-rw-r--r-- | Database/Keys.hs | 5 | ||||
-rw-r--r-- | Database/Keys/SQL.hs | 7 | ||||
-rw-r--r-- | doc/git-annex.mdwn | 5 | ||||
-rw-r--r-- | doc/todo/smudge.mdwn | 8 | ||||
-rw-r--r-- | git-annex.cabal | 8 |
8 files changed, 135 insertions, 12 deletions
diff --git a/CmdLine/GitAnnex.hs b/CmdLine/GitAnnex.hs index ba7689f70..0383dada3 100644 --- a/CmdLine/GitAnnex.hs +++ b/CmdLine/GitAnnex.hs @@ -115,6 +115,9 @@ import qualified Command.Test import qualified Command.FuzzTest import qualified Command.TestRemote #endif +#ifdef WITH_BENCHMARK +import qualified Command.Benchmark +#endif #ifdef WITH_EKG import System.Remote.Monitoring #endif @@ -221,6 +224,9 @@ cmds testoptparser testrunner = , Command.FuzzTest.cmd , Command.TestRemote.cmd #endif +#ifdef WITH_BENCHMARK + , Command.Benchmark.cmd +#endif ] run :: Parser TestOptions -> Maybe TestRunner -> [String] -> IO () diff --git a/Command/Benchmark.hs b/Command/Benchmark.hs new file mode 100644 index 000000000..d4585fdb8 --- /dev/null +++ b/Command/Benchmark.hs @@ -0,0 +1,106 @@ +{- git-annex benchmark + - + - Copyright 2016 Joey Hess <id@joeyh.name> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +{-# OPTIONS_GHC -fno-warn-orphans #-} + +module Command.Benchmark where + +import Command +import Database.Types +import qualified Database.Keys.SQL as SQL +import qualified Database.Queue as H +import Utility.Tmp +import Git.FilePath + +import Criterion.Main +import Criterion.Internal (runAndAnalyse) +import Criterion.Monad +import Control.Monad.IO.Class (liftIO) +import Control.Monad +import Control.DeepSeq +import System.FilePath +import System.Random + +cmd :: Command +cmd = noRepo (withParams benchmark) $ + dontCheck repoExists $ + command "benchmark" SectionTesting + "run benchmarks" + paramNothing + (withParams (liftIO . benchmark)) + +benchmark :: CmdParams -> IO () +benchmark _ = withTmpDirIn "." "benchmark" $ \tmpdir -> do + -- benchmark different sizes of databases + dbs <- mapM (benchDb tmpdir) + [ 1000 + , 10000 + -- , 100000 + ] + -- can't use Criterion's defaultMain here because it looks at + -- command-line parameters + withConfig defaultConfig $ runAndAnalyse (const True) $ + bgroup "keys database" $ flip concatMap dbs $ \db -> + [ getAssociatedFilesHitBench db + , getAssociatedFilesMissBench db + , getAssociatedKeyHitBench db + , getAssociatedKeyMissBench db + ] + +getAssociatedFilesHitBench :: BenchDb -> Benchmark +getAssociatedFilesHitBench ( BenchDb h num) = bench ("getAssociatedFiles from " ++ show num ++ " (hit)") $ nfIO $ do + n <- getStdRandom (randomR (1,num)) + SQL.getAssociatedFiles (keyN n) (SQL.ReadHandle h) + +getAssociatedFilesMissBench :: BenchDb -> Benchmark +getAssociatedFilesMissBench ( BenchDb h num) = bench ("getAssociatedFiles from " ++ show num ++ " (miss)") $ nfIO $ + SQL.getAssociatedFiles keyMiss (SQL.ReadHandle h) + +getAssociatedKeyHitBench :: BenchDb -> Benchmark +getAssociatedKeyHitBench (BenchDb h num) = bench ("getAssociatedKey from " ++ show num ++ " (hit)") $ nfIO $ do + n <- getStdRandom (randomR (1,num)) + SQL.getAssociatedKey (fileN n) (SQL.ReadHandle h) + +getAssociatedKeyMissBench :: BenchDb -> Benchmark +getAssociatedKeyMissBench (BenchDb h num) = bench ("getAssociatedKey from " ++ show num ++ " (miss)") $ nfIO $ + SQL.getAssociatedKey fileMiss (SQL.ReadHandle h) + +populateAssociatedFiles :: H.DbQueue -> Int -> IO () +populateAssociatedFiles h num = do + forM_ [1..num] $ \n -> + SQL.addAssociatedFile (keyN n) (fileN n) (SQL.WriteHandle h) + H.flushDbQueue h + +keyN :: Int -> SKey +keyN n = SKey ("key" ++ show n) + +fileN :: Int -> TopFilePath +fileN n = asTopFilePath ("file" ++ show n) + +keyMiss :: SKey +keyMiss = keyN 0 -- 0 is never stored + +fileMiss :: TopFilePath +fileMiss = fileN 0 -- 0 is never stored + +data BenchDb = BenchDb H.DbQueue Int + +benchDb :: FilePath -> Int -> IO BenchDb +benchDb tmpdir num = do + putStrLn $ "setting up database with " ++ show num + H.initDb f SQL.createTables + h <- H.openDbQueue f SQL.containedTable + populateAssociatedFiles h num + return (BenchDb h num) + where + f = tmpdir </> "db" ++ show num + +instance NFData TopFilePath where + rnf = rnf . getTopFilePath + +instance NFData SKey where + rnf (SKey s) = rnf s @@ -5,7 +5,7 @@ module Common (module X) where import Control.Monad as X import Control.Monad.IfElse as X import Control.Applicative as X -import "mtl" Control.Monad.State.Strict as X (liftIO) +import Control.Monad.IO.Class as X (liftIO) import Data.Maybe as X import Data.List as X hiding (head, tail, init, last) diff --git a/Database/Keys.hs b/Database/Keys.hs index a711ba7ca..faf34132a 100644 --- a/Database/Keys.hs +++ b/Database/Keys.hs @@ -127,14 +127,13 @@ openDb createdb _ = withExclusiveLock gitAnnexKeysDbLock $ do (False, True) -> do liftIO $ do createDirectoryIfMissing True dbdir - H.initDb db $ void $ - runMigrationSilent SQL.migrateKeysDb + H.initDb db SQL.createTables setAnnexDirPerm dbdir setAnnexFilePerm db open db (False, False) -> return DbEmpty where - open db = liftIO $ DbOpen <$> H.openDbQueue db "content" + open db = liftIO $ DbOpen <$> H.openDbQueue db SQL.containedTable addAssociatedFile :: Key -> TopFilePath -> Annex () addAssociatedFile k f = runWriterIO $ SQL.addAssociatedFile (toSKey k) f diff --git a/Database/Keys/SQL.hs b/Database/Keys/SQL.hs index 22bcb86a1..6862b15d9 100644 --- a/Database/Keys/SQL.hs +++ b/Database/Keys/SQL.hs @@ -13,6 +13,7 @@ module Database.Keys.SQL where import Database.Types +import Database.Handle import qualified Database.Queue as H import Utility.InodeCache import Git.FilePath @@ -33,6 +34,12 @@ Content KeyCacheIndex key cache |] +containedTable :: TableName +containedTable = "content" + +createTables :: SqlPersistM () +createTables = void $ runMigrationSilent migrateKeysDb + newtype ReadHandle = ReadHandle H.DbQueue readDb :: SqlPersistM a -> ReadHandle -> IO a diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index 299428d1e..329fb8932 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -672,6 +672,11 @@ subdirectories). See [[git-annex-fuzztest]](1) for details. +* `benchmark` + + This runs git-annex's built-in benchmarks, if it was built with + benchmarking support. + # COMMON OPTIONS These common options are accepted by all git-annex commands, and diff --git a/doc/todo/smudge.mdwn b/doc/todo/smudge.mdwn index 6498863e4..5f3d521bf 100644 --- a/doc/todo/smudge.mdwn +++ b/doc/todo/smudge.mdwn @@ -32,14 +32,6 @@ git-annex should use smudge/clean filters. when pushing changes committed in such a repo. Ideally, should avoid committing implicit unlocks, or should prevent such commits leaking out in pushes. -* Optimisation: See if the database schema can be improved to speed things - up. Are there enough indexes? getAssociatedKey in particular does a - reverse lookup and might benefit from an index. -* Optimisation: Reads from the Keys database avoid doing anything if the - database doesn't exist. This makes v5 repos, or v6 with all locked files - faster. However, if a v6 repo unlocks and then re-locks a file, its - database will exist, and so this optimisation will no longer apply. - Could try to detect when the database is empty, and remove it or avoid reads. * Eventually (but not yet), make v6 the default for new repositories. Note that the assistant forces repos into direct mode; that will need to diff --git a/git-annex.cabal b/git-annex.cabal index 5c09a0c61..289e7a255 100644 --- a/git-annex.cabal +++ b/git-annex.cabal @@ -88,6 +88,10 @@ Flag EKG Description: Enable use of EKG to monitor git-annex as it runs (at http://localhost:4242/) Default: False +Flag Benchmark + Description: Enable benchmarking + Default: False + Flag network-uri Description: Get Network.URI from the network-uri package Default: True @@ -260,6 +264,10 @@ Executable git-annex Build-Depends: ekg GHC-Options: -with-rtsopts=-T CPP-Options: -DWITH_EKG + + if flag(Benchmark) + Build-Depends: criterion, deepseq + CPP-Options: -DWITH_BENCHMARK source-repository head type: git |