aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CmdLine/GitAnnex.hs6
-rw-r--r--Command/Benchmark.hs106
-rw-r--r--Common.hs2
-rw-r--r--Database/Keys.hs123
-rw-r--r--Database/Keys/SQL.hs108
-rw-r--r--doc/bugs/cannot_add_a_files_with_an_accent_in_it/comment_1_d465d7f88c8f11b4b636fba56711d745._comment23
-rw-r--r--doc/git-annex.mdwn5
-rw-r--r--doc/todo/smudge.mdwn8
-rw-r--r--git-annex.cabal8
9 files changed, 281 insertions, 108 deletions
diff --git a/CmdLine/GitAnnex.hs b/CmdLine/GitAnnex.hs
index ba7689f70..0383dada3 100644
--- a/CmdLine/GitAnnex.hs
+++ b/CmdLine/GitAnnex.hs
@@ -115,6 +115,9 @@ import qualified Command.Test
import qualified Command.FuzzTest
import qualified Command.TestRemote
#endif
+#ifdef WITH_BENCHMARK
+import qualified Command.Benchmark
+#endif
#ifdef WITH_EKG
import System.Remote.Monitoring
#endif
@@ -221,6 +224,9 @@ cmds testoptparser testrunner =
, Command.FuzzTest.cmd
, Command.TestRemote.cmd
#endif
+#ifdef WITH_BENCHMARK
+ , Command.Benchmark.cmd
+#endif
]
run :: Parser TestOptions -> Maybe TestRunner -> [String] -> IO ()
diff --git a/Command/Benchmark.hs b/Command/Benchmark.hs
new file mode 100644
index 000000000..d4585fdb8
--- /dev/null
+++ b/Command/Benchmark.hs
@@ -0,0 +1,106 @@
+{- git-annex benchmark
+ -
+ - Copyright 2016 Joey Hess <id@joeyh.name>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+{-# OPTIONS_GHC -fno-warn-orphans #-}
+
+module Command.Benchmark where
+
+import Command
+import Database.Types
+import qualified Database.Keys.SQL as SQL
+import qualified Database.Queue as H
+import Utility.Tmp
+import Git.FilePath
+
+import Criterion.Main
+import Criterion.Internal (runAndAnalyse)
+import Criterion.Monad
+import Control.Monad.IO.Class (liftIO)
+import Control.Monad
+import Control.DeepSeq
+import System.FilePath
+import System.Random
+
+cmd :: Command
+cmd = noRepo (withParams benchmark) $
+ dontCheck repoExists $
+ command "benchmark" SectionTesting
+ "run benchmarks"
+ paramNothing
+ (withParams (liftIO . benchmark))
+
+benchmark :: CmdParams -> IO ()
+benchmark _ = withTmpDirIn "." "benchmark" $ \tmpdir -> do
+ -- benchmark different sizes of databases
+ dbs <- mapM (benchDb tmpdir)
+ [ 1000
+ , 10000
+ -- , 100000
+ ]
+ -- can't use Criterion's defaultMain here because it looks at
+ -- command-line parameters
+ withConfig defaultConfig $ runAndAnalyse (const True) $
+ bgroup "keys database" $ flip concatMap dbs $ \db ->
+ [ getAssociatedFilesHitBench db
+ , getAssociatedFilesMissBench db
+ , getAssociatedKeyHitBench db
+ , getAssociatedKeyMissBench db
+ ]
+
+getAssociatedFilesHitBench :: BenchDb -> Benchmark
+getAssociatedFilesHitBench ( BenchDb h num) = bench ("getAssociatedFiles from " ++ show num ++ " (hit)") $ nfIO $ do
+ n <- getStdRandom (randomR (1,num))
+ SQL.getAssociatedFiles (keyN n) (SQL.ReadHandle h)
+
+getAssociatedFilesMissBench :: BenchDb -> Benchmark
+getAssociatedFilesMissBench ( BenchDb h num) = bench ("getAssociatedFiles from " ++ show num ++ " (miss)") $ nfIO $
+ SQL.getAssociatedFiles keyMiss (SQL.ReadHandle h)
+
+getAssociatedKeyHitBench :: BenchDb -> Benchmark
+getAssociatedKeyHitBench (BenchDb h num) = bench ("getAssociatedKey from " ++ show num ++ " (hit)") $ nfIO $ do
+ n <- getStdRandom (randomR (1,num))
+ SQL.getAssociatedKey (fileN n) (SQL.ReadHandle h)
+
+getAssociatedKeyMissBench :: BenchDb -> Benchmark
+getAssociatedKeyMissBench (BenchDb h num) = bench ("getAssociatedKey from " ++ show num ++ " (miss)") $ nfIO $
+ SQL.getAssociatedKey fileMiss (SQL.ReadHandle h)
+
+populateAssociatedFiles :: H.DbQueue -> Int -> IO ()
+populateAssociatedFiles h num = do
+ forM_ [1..num] $ \n ->
+ SQL.addAssociatedFile (keyN n) (fileN n) (SQL.WriteHandle h)
+ H.flushDbQueue h
+
+keyN :: Int -> SKey
+keyN n = SKey ("key" ++ show n)
+
+fileN :: Int -> TopFilePath
+fileN n = asTopFilePath ("file" ++ show n)
+
+keyMiss :: SKey
+keyMiss = keyN 0 -- 0 is never stored
+
+fileMiss :: TopFilePath
+fileMiss = fileN 0 -- 0 is never stored
+
+data BenchDb = BenchDb H.DbQueue Int
+
+benchDb :: FilePath -> Int -> IO BenchDb
+benchDb tmpdir num = do
+ putStrLn $ "setting up database with " ++ show num
+ H.initDb f SQL.createTables
+ h <- H.openDbQueue f SQL.containedTable
+ populateAssociatedFiles h num
+ return (BenchDb h num)
+ where
+ f = tmpdir </> "db" ++ show num
+
+instance NFData TopFilePath where
+ rnf = rnf . getTopFilePath
+
+instance NFData SKey where
+ rnf (SKey s) = rnf s
diff --git a/Common.hs b/Common.hs
index 8272043c2..2b8b9a805 100644
--- a/Common.hs
+++ b/Common.hs
@@ -5,7 +5,7 @@ module Common (module X) where
import Control.Monad as X
import Control.Monad.IfElse as X
import Control.Applicative as X
-import "mtl" Control.Monad.State.Strict as X (liftIO)
+import Control.Monad.IO.Class as X (liftIO)
import Data.Maybe as X
import Data.List as X hiding (head, tail, init, last)
diff --git a/Database/Keys.hs b/Database/Keys.hs
index 4c4c65850..faf34132a 100644
--- a/Database/Keys.hs
+++ b/Database/Keys.hs
@@ -21,10 +21,9 @@ module Database.Keys (
addInodeCaches,
getInodeCaches,
removeInodeCaches,
- AssociatedId,
- ContentId,
) where
+import qualified Database.Keys.SQL as SQL
import Database.Types
import Database.Keys.Handle
import qualified Database.Queue as H
@@ -42,24 +41,7 @@ import Git.Ref
import Git.FilePath
import Annex.CatFile
-import Database.Persist.TH
import Database.Esqueleto hiding (Key)
-import Data.Time.Clock
-
-share [mkPersist sqlSettings, mkMigrate "migrateKeysDb"] [persistLowerCase|
-Associated
- key SKey
- file FilePath
- KeyFileIndex key file
-Content
- key SKey
- cache SInodeCache
- KeyCacheIndex key cache
-|]
-
-newtype ReadHandle = ReadHandle H.DbQueue
-
-type Reader v = ReadHandle -> Annex v
{- Runs an action that reads from the database.
-
@@ -72,7 +54,7 @@ type Reader v = ReadHandle -> Annex v
-
- Any queued writes will be flushed before the read.
-}
-runReader :: Monoid v => Reader v -> Annex v
+runReader :: Monoid v => (SQL.ReadHandle -> Annex v) -> Annex v
runReader a = do
h <- getDbHandle
withDbState h go
@@ -80,50 +62,39 @@ runReader a = do
go DbEmpty = return (mempty, DbEmpty)
go st@(DbOpen qh) = do
liftIO $ H.flushDbQueue qh
- v <- a (ReadHandle qh)
+ v <- a (SQL.ReadHandle qh)
return (v, st)
go DbClosed = do
st' <- openDb False DbClosed
v <- case st' of
- (DbOpen qh) -> a (ReadHandle qh)
+ (DbOpen qh) -> a (SQL.ReadHandle qh)
_ -> return mempty
return (v, st')
-readDb :: SqlPersistM a -> ReadHandle -> Annex a
-readDb a (ReadHandle h) = liftIO $ H.queryDbQueue h a
-
-newtype WriteHandle = WriteHandle H.DbQueue
-
-type Writer = WriteHandle -> Annex ()
+runReaderIO :: Monoid v => (SQL.ReadHandle -> IO v) -> Annex v
+runReaderIO a = runReader (liftIO . a)
{- Runs an action that writes to the database. Typically this is used to
- queue changes, which will be flushed at a later point.
-
- The database is created if it doesn't exist yet. -}
-runWriter :: Writer -> Annex ()
+runWriter :: (SQL.WriteHandle -> Annex ()) -> Annex ()
runWriter a = do
h <- getDbHandle
withDbState h go
where
go st@(DbOpen qh) = do
- v <- a (WriteHandle qh)
+ v <- a (SQL.WriteHandle qh)
return (v, st)
go st = do
st' <- openDb True st
v <- case st' of
- DbOpen qh -> a (WriteHandle qh)
+ DbOpen qh -> a (SQL.WriteHandle qh)
_ -> error "internal"
return (v, st')
-queueDb :: SqlPersistM () -> WriteHandle -> Annex ()
-queueDb a (WriteHandle h) = liftIO $ H.queueDb h checkcommit a
- where
- -- commit queue after 1000 changes or 5 minutes, whichever comes first
- checkcommit sz lastcommittime
- | sz > 1000 = return True
- | otherwise = do
- now <- getCurrentTime
- return $ diffUTCTime lastcommittime now > 300
+runWriterIO :: (SQL.WriteHandle -> IO ()) -> Annex ()
+runWriterIO a = runWriter (liftIO . a)
{- Gets the handle cached in Annex state; creates a new one if it's not yet
- available, but doesn't open the database. -}
@@ -156,60 +127,30 @@ openDb createdb _ = withExclusiveLock gitAnnexKeysDbLock $ do
(False, True) -> do
liftIO $ do
createDirectoryIfMissing True dbdir
- H.initDb db $ void $
- runMigrationSilent migrateKeysDb
+ H.initDb db SQL.createTables
setAnnexDirPerm dbdir
setAnnexFilePerm db
open db
(False, False) -> return DbEmpty
where
- open db = liftIO $ DbOpen <$> H.openDbQueue db "content"
+ open db = liftIO $ DbOpen <$> H.openDbQueue db SQL.containedTable
addAssociatedFile :: Key -> TopFilePath -> Annex ()
-addAssociatedFile k f = runWriter $ addAssociatedFile' k f
-
-addAssociatedFile' :: Key -> TopFilePath -> Writer
-addAssociatedFile' k f = queueDb $ do
- -- If the same file was associated with a different key before,
- -- remove that.
- delete $ from $ \r -> do
- where_ (r ^. AssociatedFile ==. val (getTopFilePath f) &&. not_ (r ^. AssociatedKey ==. val sk))
- void $ insertUnique $ Associated sk (getTopFilePath f)
- where
- sk = toSKey k
+addAssociatedFile k f = runWriterIO $ SQL.addAssociatedFile (toSKey k) f
{- Note that the files returned were once associated with the key, but
- some of them may not be any longer. -}
getAssociatedFiles :: Key -> Annex [TopFilePath]
-getAssociatedFiles = runReader . getAssociatedFiles' . toSKey
-
-getAssociatedFiles' :: SKey -> Reader [TopFilePath]
-getAssociatedFiles' sk = readDb $ do
- l <- select $ from $ \r -> do
- where_ (r ^. AssociatedKey ==. val sk)
- return (r ^. AssociatedFile)
- return $ map (asTopFilePath . unValue) l
+getAssociatedFiles = runReaderIO . SQL.getAssociatedFiles . toSKey
{- Gets any keys that are on record as having a particular associated file.
- (Should be one or none but the database doesn't enforce that.) -}
getAssociatedKey :: TopFilePath -> Annex [Key]
-getAssociatedKey = runReader . getAssociatedKey'
-
-getAssociatedKey' :: TopFilePath -> Reader [Key]
-getAssociatedKey' f = readDb $ do
- l <- select $ from $ \r -> do
- where_ (r ^. AssociatedFile ==. val (getTopFilePath f))
- return (r ^. AssociatedKey)
- return $ map (fromSKey . unValue) l
+getAssociatedKey = map fromSKey <$$> runReaderIO . SQL.getAssociatedKey
removeAssociatedFile :: Key -> TopFilePath -> Annex ()
-removeAssociatedFile k = runWriter . removeAssociatedFile' (toSKey k)
+removeAssociatedFile k = runWriterIO . SQL.removeAssociatedFile (toSKey k)
-removeAssociatedFile' :: SKey -> TopFilePath -> Writer
-removeAssociatedFile' sk f = queueDb $
- delete $ from $ \r -> do
- where_ (r ^. AssociatedKey ==. val sk &&. r ^. AssociatedFile ==. val (getTopFilePath f))
-
{- Find all unlocked associated files. This is expensive, and so normally
- the associated files are updated incrementally when changes are noticed. -}
scanAssociatedFiles :: Annex ()
@@ -224,12 +165,12 @@ scanAssociatedFiles = whenM (isJust <$> inRepo Git.Branch.current) $
=<< catKey (Git.LsTree.sha i)
liftIO $ void cleanup
where
- dropallassociated = queueDb $
- delete $ from $ \(_r :: SqlExpr (Entity Associated)) ->
+ dropallassociated h = liftIO $ flip SQL.queueDb h $
+ delete $ from $ \(_r :: SqlExpr (Entity SQL.Associated)) ->
return ()
isregfile i = Git.Types.toBlobType (Git.LsTree.mode i) == Just Git.Types.FileBlob
- add h i k = flip queueDb h $
- void $ insertUnique $ Associated
+ add h i k = liftIO $ flip SQL.queueDb h $
+ void $ insertUnique $ SQL.Associated
(toSKey k)
(getTopFilePath $ Git.LsTree.file i)
@@ -239,28 +180,12 @@ storeInodeCaches k fs = withTSDelta $ \d ->
addInodeCaches k . catMaybes =<< liftIO (mapM (`genInodeCache` d) fs)
addInodeCaches :: Key -> [InodeCache] -> Annex ()
-addInodeCaches k is = runWriter $ addInodeCaches' (toSKey k) is
-
-addInodeCaches' :: SKey -> [InodeCache] -> Writer
-addInodeCaches' sk is = queueDb $
- forM_ is $ \i -> insertUnique $ Content sk (toSInodeCache i)
+addInodeCaches k is = runWriterIO $ SQL.addInodeCaches (toSKey k) is
{- A key may have multiple InodeCaches; one for the annex object, and one
- for each pointer file that is a copy of it. -}
getInodeCaches :: Key -> Annex [InodeCache]
-getInodeCaches = runReader . getInodeCaches' . toSKey
-
-getInodeCaches' :: SKey -> Reader [InodeCache]
-getInodeCaches' sk = readDb $ do
- l <- select $ from $ \r -> do
- where_ (r ^. ContentKey ==. val sk)
- return (r ^. ContentCache)
- return $ map (fromSInodeCache . unValue) l
+getInodeCaches = runReaderIO . SQL.getInodeCaches . toSKey
removeInodeCaches :: Key -> Annex ()
-removeInodeCaches = runWriter . removeInodeCaches' . toSKey
-
-removeInodeCaches' :: SKey -> Writer
-removeInodeCaches' sk = queueDb $
- delete $ from $ \r -> do
- where_ (r ^. ContentKey ==. val sk)
+removeInodeCaches = runWriterIO . SQL.removeInodeCaches . toSKey
diff --git a/Database/Keys/SQL.hs b/Database/Keys/SQL.hs
new file mode 100644
index 000000000..79230b60d
--- /dev/null
+++ b/Database/Keys/SQL.hs
@@ -0,0 +1,108 @@
+{- Sqlite database of information about Keys
+ -
+ - Copyright 2015-2016 Joey Hess <id@joeyh.name>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+{-# LANGUAGE QuasiQuotes, TypeFamilies, TemplateHaskell #-}
+{-# LANGUAGE OverloadedStrings, GADTs, FlexibleContexts #-}
+{-# LANGUAGE MultiParamTypeClasses, GeneralizedNewtypeDeriving #-}
+{-# LANGUAGE RankNTypes, ScopedTypeVariables #-}
+
+module Database.Keys.SQL where
+
+import Database.Types
+import Database.Handle
+import qualified Database.Queue as H
+import Utility.InodeCache
+import Git.FilePath
+
+import Database.Persist.TH
+import Database.Esqueleto hiding (Key)
+import Data.Time.Clock
+import Control.Monad
+
+share [mkPersist sqlSettings, mkMigrate "migrateKeysDb"] [persistLowerCase|
+Associated
+ key SKey
+ file FilePath
+ KeyFileIndex key file
+ FileKeyIndex file key
+Content
+ key SKey
+ cache SInodeCache
+ KeyCacheIndex key cache
+|]
+
+containedTable :: TableName
+containedTable = "content"
+
+createTables :: SqlPersistM ()
+createTables = void $ runMigrationSilent migrateKeysDb
+
+newtype ReadHandle = ReadHandle H.DbQueue
+
+readDb :: SqlPersistM a -> ReadHandle -> IO a
+readDb a (ReadHandle h) = H.queryDbQueue h a
+
+newtype WriteHandle = WriteHandle H.DbQueue
+
+queueDb :: SqlPersistM () -> WriteHandle -> IO ()
+queueDb a (WriteHandle h) = H.queueDb h checkcommit a
+ where
+ -- commit queue after 1000 changes or 5 minutes, whichever comes first
+ checkcommit sz lastcommittime
+ | sz > 1000 = return True
+ | otherwise = do
+ now <- getCurrentTime
+ return $ diffUTCTime lastcommittime now > 300
+
+addAssociatedFile :: SKey -> TopFilePath -> WriteHandle -> IO ()
+addAssociatedFile sk f = queueDb $ do
+ -- If the same file was associated with a different key before,
+ -- remove that.
+ delete $ from $ \r -> do
+ where_ (r ^. AssociatedFile ==. val (getTopFilePath f) &&. not_ (r ^. AssociatedKey ==. val sk))
+ void $ insertUnique $ Associated sk (getTopFilePath f)
+
+{- Note that the files returned were once associated with the key, but
+ - some of them may not be any longer. -}
+getAssociatedFiles :: SKey -> ReadHandle -> IO [TopFilePath]
+getAssociatedFiles sk = readDb $ do
+ l <- select $ from $ \r -> do
+ where_ (r ^. AssociatedKey ==. val sk)
+ return (r ^. AssociatedFile)
+ return $ map (asTopFilePath . unValue) l
+
+{- Gets any keys that are on record as having a particular associated file.
+ - (Should be one or none but the database doesn't enforce that.) -}
+getAssociatedKey :: TopFilePath -> ReadHandle -> IO [SKey]
+getAssociatedKey f = readDb $ do
+ l <- select $ from $ \r -> do
+ where_ (r ^. AssociatedFile ==. val (getTopFilePath f))
+ return (r ^. AssociatedKey)
+ return $ map unValue l
+
+removeAssociatedFile :: SKey -> TopFilePath -> WriteHandle -> IO ()
+removeAssociatedFile sk f = queueDb $
+ delete $ from $ \r -> do
+ where_ (r ^. AssociatedKey ==. val sk &&. r ^. AssociatedFile ==. val (getTopFilePath f))
+
+addInodeCaches :: SKey -> [InodeCache] -> WriteHandle -> IO ()
+addInodeCaches sk is = queueDb $
+ forM_ is $ \i -> insertUnique $ Content sk (toSInodeCache i)
+
+{- A key may have multiple InodeCaches; one for the annex object, and one
+ - for each pointer file that is a copy of it. -}
+getInodeCaches :: SKey -> ReadHandle -> IO [InodeCache]
+getInodeCaches sk = readDb $ do
+ l <- select $ from $ \r -> do
+ where_ (r ^. ContentKey ==. val sk)
+ return (r ^. ContentCache)
+ return $ map (fromSInodeCache . unValue) l
+
+removeInodeCaches :: SKey -> WriteHandle -> IO ()
+removeInodeCaches sk = queueDb $
+ delete $ from $ \r -> do
+ where_ (r ^. ContentKey ==. val sk)
diff --git a/doc/bugs/cannot_add_a_files_with_an_accent_in_it/comment_1_d465d7f88c8f11b4b636fba56711d745._comment b/doc/bugs/cannot_add_a_files_with_an_accent_in_it/comment_1_d465d7f88c8f11b4b636fba56711d745._comment
new file mode 100644
index 000000000..0d47449a0
--- /dev/null
+++ b/doc/bugs/cannot_add_a_files_with_an_accent_in_it/comment_1_d465d7f88c8f11b4b636fba56711d745._comment
@@ -0,0 +1,23 @@
+[[!comment format=mdwn
+ username="joey"
+ subject="""comment 1"""
+ date="2016-01-11T16:46:42Z"
+ content="""
+I can't even type an e with an accent in it into the bash prompt on OSX.
+However, I copied over such a file, and had no problem adding it:
+
+ bash-3.2$ ls
+ e??
+ bash-3.2$ git annex add .
+ add ë ok
+ bash-3.2$ git annex status
+ A ë
+
+I also tried tab-completing a file named "testë" and that also works:
+
+ oberon:gd joeyh$ git annex add teste\314\210
+ add testë ok
+ (recording state in git...)
+
+So, I can't reproduce your problem.
+"""]]
diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn
index 299428d1e..329fb8932 100644
--- a/doc/git-annex.mdwn
+++ b/doc/git-annex.mdwn
@@ -672,6 +672,11 @@ subdirectories).
See [[git-annex-fuzztest]](1) for details.
+* `benchmark`
+
+ This runs git-annex's built-in benchmarks, if it was built with
+ benchmarking support.
+
# COMMON OPTIONS
These common options are accepted by all git-annex commands, and
diff --git a/doc/todo/smudge.mdwn b/doc/todo/smudge.mdwn
index 6498863e4..5f3d521bf 100644
--- a/doc/todo/smudge.mdwn
+++ b/doc/todo/smudge.mdwn
@@ -32,14 +32,6 @@ git-annex should use smudge/clean filters.
when pushing changes committed in such a repo. Ideally, should avoid
committing implicit unlocks, or should prevent such commits leaking out
in pushes.
-* Optimisation: See if the database schema can be improved to speed things
- up. Are there enough indexes? getAssociatedKey in particular does a
- reverse lookup and might benefit from an index.
-* Optimisation: Reads from the Keys database avoid doing anything if the
- database doesn't exist. This makes v5 repos, or v6 with all locked files
- faster. However, if a v6 repo unlocks and then re-locks a file, its
- database will exist, and so this optimisation will no longer apply.
- Could try to detect when the database is empty, and remove it or avoid reads.
* Eventually (but not yet), make v6 the default for new repositories.
Note that the assistant forces repos into direct mode; that will need to
diff --git a/git-annex.cabal b/git-annex.cabal
index 5c09a0c61..289e7a255 100644
--- a/git-annex.cabal
+++ b/git-annex.cabal
@@ -88,6 +88,10 @@ Flag EKG
Description: Enable use of EKG to monitor git-annex as it runs (at http://localhost:4242/)
Default: False
+Flag Benchmark
+ Description: Enable benchmarking
+ Default: False
+
Flag network-uri
Description: Get Network.URI from the network-uri package
Default: True
@@ -260,6 +264,10 @@ Executable git-annex
Build-Depends: ekg
GHC-Options: -with-rtsopts=-T
CPP-Options: -DWITH_EKG
+
+ if flag(Benchmark)
+ Build-Depends: criterion, deepseq
+ CPP-Options: -DWITH_BENCHMARK
source-repository head
type: git