aboutsummaryrefslogtreecommitdiff
path: root/Command/Benchmark.hs
diff options
context:
space:
mode:
authorGravatar Joey Hess <joeyh@joeyh.name>2016-01-12 13:01:44 -0400
committerGravatar Joey Hess <joeyh@joeyh.name>2016-01-12 13:07:03 -0400
commit17cf39db4fb4985ad1230417f537dadce8272d38 (patch)
tree89e5588428255072f88307fc3f324ffc84db7299 /Command/Benchmark.hs
parentfcdf8b0475b39d5132e74978479cb541c276ccfe (diff)
add database benchmark
The benchmark shows that the database access is quite fast indeed! And, it scales linearly to the number of keys, with one exception, getAssociatedKey. Based on this benchmark, I don't think I need worry about optimising for cases where all files are locked and the database is mostly empty. In those cases, database access will be misses, and according to this benchmark, should add only 50 milliseconds to runtime. (NB: There may be some overhead to getting the database opened and locking the handle that this benchmark doesn't see.) joey@darkstar:~/src/git-annex>./git-annex benchmark setting up database with 1000 setting up database with 10000 benchmarking keys database/getAssociatedFiles from 1000 (hit) time 62.77 μs (62.70 μs .. 62.85 μs) 1.000 R² (1.000 R² .. 1.000 R²) mean 62.81 μs (62.76 μs .. 62.88 μs) std dev 201.6 ns (157.5 ns .. 259.5 ns) benchmarking keys database/getAssociatedFiles from 1000 (miss) time 50.02 μs (49.97 μs .. 50.07 μs) 1.000 R² (1.000 R² .. 1.000 R²) mean 50.09 μs (50.04 μs .. 50.17 μs) std dev 206.7 ns (133.8 ns .. 295.3 ns) benchmarking keys database/getAssociatedKey from 1000 (hit) time 211.2 μs (210.5 μs .. 212.3 μs) 1.000 R² (0.999 R² .. 1.000 R²) mean 211.0 μs (210.7 μs .. 212.0 μs) std dev 1.685 μs (334.4 ns .. 3.517 μs) benchmarking keys database/getAssociatedKey from 1000 (miss) time 173.5 μs (172.7 μs .. 174.2 μs) 1.000 R² (0.999 R² .. 1.000 R²) mean 173.7 μs (173.0 μs .. 175.5 μs) std dev 3.833 μs (1.858 μs .. 6.617 μs) variance introduced by outliers: 16% (moderately inflated) benchmarking keys database/getAssociatedFiles from 10000 (hit) time 64.01 μs (63.84 μs .. 64.18 μs) 1.000 R² (1.000 R² .. 1.000 R²) mean 64.85 μs (64.34 μs .. 66.02 μs) std dev 2.433 μs (547.6 ns .. 4.652 μs) variance introduced by outliers: 40% (moderately inflated) benchmarking keys database/getAssociatedFiles from 10000 (miss) time 50.33 μs (50.28 μs .. 50.39 μs) 1.000 R² (1.000 R² .. 1.000 R²) mean 50.32 μs (50.26 μs .. 50.38 μs) std dev 202.7 ns (167.6 ns .. 252.0 ns) benchmarking keys database/getAssociatedKey from 10000 (hit) time 1.142 ms (1.139 ms .. 1.146 ms) 1.000 R² (1.000 R² .. 1.000 R²) mean 1.142 ms (1.140 ms .. 1.144 ms) std dev 7.142 μs (4.994 μs .. 10.98 μs) benchmarking keys database/getAssociatedKey from 10000 (miss) time 1.094 ms (1.092 ms .. 1.096 ms) 1.000 R² (1.000 R² .. 1.000 R²) mean 1.095 ms (1.095 ms .. 1.097 ms) std dev 4.277 μs (2.591 μs .. 7.228 μs)
Diffstat (limited to 'Command/Benchmark.hs')
-rw-r--r--Command/Benchmark.hs106
1 files changed, 106 insertions, 0 deletions
diff --git a/Command/Benchmark.hs b/Command/Benchmark.hs
new file mode 100644
index 000000000..d4585fdb8
--- /dev/null
+++ b/Command/Benchmark.hs
@@ -0,0 +1,106 @@
+{- git-annex benchmark
+ -
+ - Copyright 2016 Joey Hess <id@joeyh.name>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+{-# OPTIONS_GHC -fno-warn-orphans #-}
+
+module Command.Benchmark where
+
+import Command
+import Database.Types
+import qualified Database.Keys.SQL as SQL
+import qualified Database.Queue as H
+import Utility.Tmp
+import Git.FilePath
+
+import Criterion.Main
+import Criterion.Internal (runAndAnalyse)
+import Criterion.Monad
+import Control.Monad.IO.Class (liftIO)
+import Control.Monad
+import Control.DeepSeq
+import System.FilePath
+import System.Random
+
+cmd :: Command
+cmd = noRepo (withParams benchmark) $
+ dontCheck repoExists $
+ command "benchmark" SectionTesting
+ "run benchmarks"
+ paramNothing
+ (withParams (liftIO . benchmark))
+
+benchmark :: CmdParams -> IO ()
+benchmark _ = withTmpDirIn "." "benchmark" $ \tmpdir -> do
+ -- benchmark different sizes of databases
+ dbs <- mapM (benchDb tmpdir)
+ [ 1000
+ , 10000
+ -- , 100000
+ ]
+ -- can't use Criterion's defaultMain here because it looks at
+ -- command-line parameters
+ withConfig defaultConfig $ runAndAnalyse (const True) $
+ bgroup "keys database" $ flip concatMap dbs $ \db ->
+ [ getAssociatedFilesHitBench db
+ , getAssociatedFilesMissBench db
+ , getAssociatedKeyHitBench db
+ , getAssociatedKeyMissBench db
+ ]
+
+getAssociatedFilesHitBench :: BenchDb -> Benchmark
+getAssociatedFilesHitBench ( BenchDb h num) = bench ("getAssociatedFiles from " ++ show num ++ " (hit)") $ nfIO $ do
+ n <- getStdRandom (randomR (1,num))
+ SQL.getAssociatedFiles (keyN n) (SQL.ReadHandle h)
+
+getAssociatedFilesMissBench :: BenchDb -> Benchmark
+getAssociatedFilesMissBench ( BenchDb h num) = bench ("getAssociatedFiles from " ++ show num ++ " (miss)") $ nfIO $
+ SQL.getAssociatedFiles keyMiss (SQL.ReadHandle h)
+
+getAssociatedKeyHitBench :: BenchDb -> Benchmark
+getAssociatedKeyHitBench (BenchDb h num) = bench ("getAssociatedKey from " ++ show num ++ " (hit)") $ nfIO $ do
+ n <- getStdRandom (randomR (1,num))
+ SQL.getAssociatedKey (fileN n) (SQL.ReadHandle h)
+
+getAssociatedKeyMissBench :: BenchDb -> Benchmark
+getAssociatedKeyMissBench (BenchDb h num) = bench ("getAssociatedKey from " ++ show num ++ " (miss)") $ nfIO $
+ SQL.getAssociatedKey fileMiss (SQL.ReadHandle h)
+
+populateAssociatedFiles :: H.DbQueue -> Int -> IO ()
+populateAssociatedFiles h num = do
+ forM_ [1..num] $ \n ->
+ SQL.addAssociatedFile (keyN n) (fileN n) (SQL.WriteHandle h)
+ H.flushDbQueue h
+
+keyN :: Int -> SKey
+keyN n = SKey ("key" ++ show n)
+
+fileN :: Int -> TopFilePath
+fileN n = asTopFilePath ("file" ++ show n)
+
+keyMiss :: SKey
+keyMiss = keyN 0 -- 0 is never stored
+
+fileMiss :: TopFilePath
+fileMiss = fileN 0 -- 0 is never stored
+
+data BenchDb = BenchDb H.DbQueue Int
+
+benchDb :: FilePath -> Int -> IO BenchDb
+benchDb tmpdir num = do
+ putStrLn $ "setting up database with " ++ show num
+ H.initDb f SQL.createTables
+ h <- H.openDbQueue f SQL.containedTable
+ populateAssociatedFiles h num
+ return (BenchDb h num)
+ where
+ f = tmpdir </> "db" ++ show num
+
+instance NFData TopFilePath where
+ rnf = rnf . getTopFilePath
+
+instance NFData SKey where
+ rnf (SKey s) = rnf s