summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CmdLine/GitAnnex.hs6
-rw-r--r--Command/Benchmark.hs106
-rw-r--r--Common.hs2
-rw-r--r--Database/Keys.hs5
-rw-r--r--Database/Keys/SQL.hs7
-rw-r--r--doc/git-annex.mdwn5
-rw-r--r--doc/todo/smudge.mdwn8
-rw-r--r--git-annex.cabal8
8 files changed, 135 insertions, 12 deletions
diff --git a/CmdLine/GitAnnex.hs b/CmdLine/GitAnnex.hs
index ba7689f70..0383dada3 100644
--- a/CmdLine/GitAnnex.hs
+++ b/CmdLine/GitAnnex.hs
@@ -115,6 +115,9 @@ import qualified Command.Test
import qualified Command.FuzzTest
import qualified Command.TestRemote
#endif
+#ifdef WITH_BENCHMARK
+import qualified Command.Benchmark
+#endif
#ifdef WITH_EKG
import System.Remote.Monitoring
#endif
@@ -221,6 +224,9 @@ cmds testoptparser testrunner =
, Command.FuzzTest.cmd
, Command.TestRemote.cmd
#endif
+#ifdef WITH_BENCHMARK
+ , Command.Benchmark.cmd
+#endif
]
run :: Parser TestOptions -> Maybe TestRunner -> [String] -> IO ()
diff --git a/Command/Benchmark.hs b/Command/Benchmark.hs
new file mode 100644
index 000000000..d4585fdb8
--- /dev/null
+++ b/Command/Benchmark.hs
@@ -0,0 +1,106 @@
+{- git-annex benchmark
+ -
+ - Copyright 2016 Joey Hess <id@joeyh.name>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+{-# OPTIONS_GHC -fno-warn-orphans #-}
+
+module Command.Benchmark where
+
+import Command
+import Database.Types
+import qualified Database.Keys.SQL as SQL
+import qualified Database.Queue as H
+import Utility.Tmp
+import Git.FilePath
+
+import Criterion.Main
+import Criterion.Internal (runAndAnalyse)
+import Criterion.Monad
+import Control.Monad.IO.Class (liftIO)
+import Control.Monad
+import Control.DeepSeq
+import System.FilePath
+import System.Random
+
+cmd :: Command
+cmd = noRepo (withParams benchmark) $
+ dontCheck repoExists $
+ command "benchmark" SectionTesting
+ "run benchmarks"
+ paramNothing
+ (withParams (liftIO . benchmark))
+
+benchmark :: CmdParams -> IO ()
+benchmark _ = withTmpDirIn "." "benchmark" $ \tmpdir -> do
+ -- benchmark different sizes of databases
+ dbs <- mapM (benchDb tmpdir)
+ [ 1000
+ , 10000
+ -- , 100000
+ ]
+ -- can't use Criterion's defaultMain here because it looks at
+ -- command-line parameters
+ withConfig defaultConfig $ runAndAnalyse (const True) $
+ bgroup "keys database" $ flip concatMap dbs $ \db ->
+ [ getAssociatedFilesHitBench db
+ , getAssociatedFilesMissBench db
+ , getAssociatedKeyHitBench db
+ , getAssociatedKeyMissBench db
+ ]
+
+getAssociatedFilesHitBench :: BenchDb -> Benchmark
+getAssociatedFilesHitBench ( BenchDb h num) = bench ("getAssociatedFiles from " ++ show num ++ " (hit)") $ nfIO $ do
+ n <- getStdRandom (randomR (1,num))
+ SQL.getAssociatedFiles (keyN n) (SQL.ReadHandle h)
+
+getAssociatedFilesMissBench :: BenchDb -> Benchmark
+getAssociatedFilesMissBench ( BenchDb h num) = bench ("getAssociatedFiles from " ++ show num ++ " (miss)") $ nfIO $
+ SQL.getAssociatedFiles keyMiss (SQL.ReadHandle h)
+
+getAssociatedKeyHitBench :: BenchDb -> Benchmark
+getAssociatedKeyHitBench (BenchDb h num) = bench ("getAssociatedKey from " ++ show num ++ " (hit)") $ nfIO $ do
+ n <- getStdRandom (randomR (1,num))
+ SQL.getAssociatedKey (fileN n) (SQL.ReadHandle h)
+
+getAssociatedKeyMissBench :: BenchDb -> Benchmark
+getAssociatedKeyMissBench (BenchDb h num) = bench ("getAssociatedKey from " ++ show num ++ " (miss)") $ nfIO $
+ SQL.getAssociatedKey fileMiss (SQL.ReadHandle h)
+
+populateAssociatedFiles :: H.DbQueue -> Int -> IO ()
+populateAssociatedFiles h num = do
+ forM_ [1..num] $ \n ->
+ SQL.addAssociatedFile (keyN n) (fileN n) (SQL.WriteHandle h)
+ H.flushDbQueue h
+
+keyN :: Int -> SKey
+keyN n = SKey ("key" ++ show n)
+
+fileN :: Int -> TopFilePath
+fileN n = asTopFilePath ("file" ++ show n)
+
+keyMiss :: SKey
+keyMiss = keyN 0 -- 0 is never stored
+
+fileMiss :: TopFilePath
+fileMiss = fileN 0 -- 0 is never stored
+
+data BenchDb = BenchDb H.DbQueue Int
+
+benchDb :: FilePath -> Int -> IO BenchDb
+benchDb tmpdir num = do
+ putStrLn $ "setting up database with " ++ show num
+ H.initDb f SQL.createTables
+ h <- H.openDbQueue f SQL.containedTable
+ populateAssociatedFiles h num
+ return (BenchDb h num)
+ where
+ f = tmpdir </> "db" ++ show num
+
+instance NFData TopFilePath where
+ rnf = rnf . getTopFilePath
+
+instance NFData SKey where
+ rnf (SKey s) = rnf s
diff --git a/Common.hs b/Common.hs
index 8272043c2..2b8b9a805 100644
--- a/Common.hs
+++ b/Common.hs
@@ -5,7 +5,7 @@ module Common (module X) where
import Control.Monad as X
import Control.Monad.IfElse as X
import Control.Applicative as X
-import "mtl" Control.Monad.State.Strict as X (liftIO)
+import Control.Monad.IO.Class as X (liftIO)
import Data.Maybe as X
import Data.List as X hiding (head, tail, init, last)
diff --git a/Database/Keys.hs b/Database/Keys.hs
index a711ba7ca..faf34132a 100644
--- a/Database/Keys.hs
+++ b/Database/Keys.hs
@@ -127,14 +127,13 @@ openDb createdb _ = withExclusiveLock gitAnnexKeysDbLock $ do
(False, True) -> do
liftIO $ do
createDirectoryIfMissing True dbdir
- H.initDb db $ void $
- runMigrationSilent SQL.migrateKeysDb
+ H.initDb db SQL.createTables
setAnnexDirPerm dbdir
setAnnexFilePerm db
open db
(False, False) -> return DbEmpty
where
- open db = liftIO $ DbOpen <$> H.openDbQueue db "content"
+ open db = liftIO $ DbOpen <$> H.openDbQueue db SQL.containedTable
addAssociatedFile :: Key -> TopFilePath -> Annex ()
addAssociatedFile k f = runWriterIO $ SQL.addAssociatedFile (toSKey k) f
diff --git a/Database/Keys/SQL.hs b/Database/Keys/SQL.hs
index 22bcb86a1..6862b15d9 100644
--- a/Database/Keys/SQL.hs
+++ b/Database/Keys/SQL.hs
@@ -13,6 +13,7 @@
module Database.Keys.SQL where
import Database.Types
+import Database.Handle
import qualified Database.Queue as H
import Utility.InodeCache
import Git.FilePath
@@ -33,6 +34,12 @@ Content
KeyCacheIndex key cache
|]
+containedTable :: TableName
+containedTable = "content"
+
+createTables :: SqlPersistM ()
+createTables = void $ runMigrationSilent migrateKeysDb
+
newtype ReadHandle = ReadHandle H.DbQueue
readDb :: SqlPersistM a -> ReadHandle -> IO a
diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn
index 299428d1e..329fb8932 100644
--- a/doc/git-annex.mdwn
+++ b/doc/git-annex.mdwn
@@ -672,6 +672,11 @@ subdirectories).
See [[git-annex-fuzztest]](1) for details.
+* `benchmark`
+
+ This runs git-annex's built-in benchmarks, if it was built with
+ benchmarking support.
+
# COMMON OPTIONS
These common options are accepted by all git-annex commands, and
diff --git a/doc/todo/smudge.mdwn b/doc/todo/smudge.mdwn
index 6498863e4..5f3d521bf 100644
--- a/doc/todo/smudge.mdwn
+++ b/doc/todo/smudge.mdwn
@@ -32,14 +32,6 @@ git-annex should use smudge/clean filters.
when pushing changes committed in such a repo. Ideally, should avoid
committing implicit unlocks, or should prevent such commits leaking out
in pushes.
-* Optimisation: See if the database schema can be improved to speed things
- up. Are there enough indexes? getAssociatedKey in particular does a
- reverse lookup and might benefit from an index.
-* Optimisation: Reads from the Keys database avoid doing anything if the
- database doesn't exist. This makes v5 repos, or v6 with all locked files
- faster. However, if a v6 repo unlocks and then re-locks a file, its
- database will exist, and so this optimisation will no longer apply.
- Could try to detect when the database is empty, and remove it or avoid reads.
* Eventually (but not yet), make v6 the default for new repositories.
Note that the assistant forces repos into direct mode; that will need to
diff --git a/git-annex.cabal b/git-annex.cabal
index 5c09a0c61..289e7a255 100644
--- a/git-annex.cabal
+++ b/git-annex.cabal
@@ -88,6 +88,10 @@ Flag EKG
Description: Enable use of EKG to monitor git-annex as it runs (at http://localhost:4242/)
Default: False
+Flag Benchmark
+ Description: Enable benchmarking
+ Default: False
+
Flag network-uri
Description: Get Network.URI from the network-uri package
Default: True
@@ -260,6 +264,10 @@ Executable git-annex
Build-Depends: ekg
GHC-Options: -with-rtsopts=-T
CPP-Options: -DWITH_EKG
+
+ if flag(Benchmark)
+ Build-Depends: criterion, deepseq
+ CPP-Options: -DWITH_BENCHMARK
source-repository head
type: git