diff options
-rw-r--r-- | Command/Fsck.hs | 57 | ||||
-rw-r--r-- | Database/Fsck.hs | 71 | ||||
-rw-r--r-- | Database/Handle.hs | 63 | ||||
-rw-r--r-- | Database/Types.hs | 27 | ||||
-rw-r--r-- | Footype.hs | 20 | ||||
-rw-r--r-- | Locations.hs | 5 | ||||
-rw-r--r-- | foo.hs | 74 | ||||
-rw-r--r-- | fooes.hs | 42 | ||||
-rw-r--r-- | git-annex.cabal | 3 |
9 files changed, 185 insertions, 177 deletions
diff --git a/Command/Fsck.hs b/Command/Fsck.hs index 49b49407a..4890b33bd 100644 --- a/Command/Fsck.hs +++ b/Command/Fsck.hs @@ -1,6 +1,6 @@ {- git-annex command - - - Copyright 2010-2013 Joey Hess <id@joeyh.name> + - Copyright 2010-2015 Joey Hess <id@joeyh.name> - - Licensed under the GNU GPL version 3 or higher. -} @@ -26,13 +26,13 @@ import Logs.Trust import Config.NumCopies import Annex.UUID import Utility.DataUnits -import Utility.FileMode import Config import Types.Key import Types.CleanupActions import Utility.HumanTime import Git.FilePath import Utility.PID +import qualified Database.Fsck as FsckDb import Data.Time.Clock.POSIX import Data.Time @@ -72,6 +72,7 @@ seek ps = do (\k -> startKey i k =<< getNumCopies) (withFilesInGit $ whenAnnexed $ start from i) ps + withFsckDb i (liftIO . FsckDb.closeDb) getIncremental :: Annex Incremental getIncremental = do @@ -82,15 +83,17 @@ getIncremental = do case (i, starti, morei) of (False, False, False) -> return NonIncremental (False, True, False) -> startIncremental - (False ,False, True) -> ContIncremental <$> getStartTime + (False ,False, True) -> contIncremental (True, False, False) -> - maybe startIncremental (return . ContIncremental . Just) + maybe startIncremental (const contIncremental) =<< getStartTime _ -> error "Specify only one of --incremental, --more, or --incremental-schedule" where startIncremental = do recordStartTime - return StartIncremental + FsckDb.newPass + StartIncremental <$> FsckDb.openDb + contIncremental = ContIncremental <$> FsckDb.openDb checkschedule Nothing = error "bad --incremental-schedule value" checkschedule (Just delta) = do @@ -415,8 +418,7 @@ badContentRemote remote key = do return $ (if ok then "dropped from " else "failed to drop from ") ++ Remote.name remote -data Incremental = StartIncremental | ContIncremental (Maybe EpochTime) | NonIncremental - deriving (Eq, Show) +data Incremental = StartIncremental FsckDb.DbHandle | ContIncremental FsckDb.DbHandle | NonIncremental runFsck :: Incremental -> FilePath -> Key -> Annex Bool -> CommandStart runFsck inc file key a = ifM (needFsck inc key) @@ -425,48 +427,23 @@ runFsck inc file key a = ifM (needFsck inc key) next $ do ok <- a when ok $ - recordFsckTime key + recordFsckTime inc key next $ return ok , stop ) {- Check if a key needs to be fscked, with support for incremental fscks. -} needFsck :: Incremental -> Key -> Annex Bool -needFsck (ContIncremental Nothing) _ = return True -needFsck (ContIncremental starttime) key = do - fscktime <- getFsckTime key - return $ fscktime < starttime +needFsck (ContIncremental h) key = not <$> FsckDb.inDb h key needFsck _ _ = return True -{- To record the time that a key was last fscked, without - - modifying its mtime, we set the timestamp of its parent directory. - - Each annexed file is the only thing in its directory, so this is fine. - - - - To record that the file was fscked, the directory's sticky bit is set. - - (None of the normal unix behaviors of the sticky bit should matter, so - - we can reuse this permission bit.) - - - - Note that this relies on the parent directory being deleted when a file - - is dropped. That way, if it's later added back, the fsck record - - won't still be present. - -} -recordFsckTime :: Key -> Annex () -recordFsckTime key = do - parent <- parentDir <$> calcRepo (gitAnnexLocation key) - liftIO $ void $ tryIO $ do - touchFile parent -#ifndef mingw32_HOST_OS - setSticky parent -#endif +withFsckDb :: Incremental -> (FsckDb.DbHandle -> Annex ()) -> Annex () +withFsckDb (ContIncremental h) a = a h +withFsckDb (StartIncremental h) a = a h +withFsckDb NonIncremental _ = noop -getFsckTime :: Key -> Annex (Maybe EpochTime) -getFsckTime key = do - parent <- parentDir <$> calcRepo (gitAnnexLocation key) - liftIO $ catchDefaultIO Nothing $ do - s <- getFileStatus parent - return $ if isSticky $ fileMode s - then Just $ modificationTime s - else Nothing +recordFsckTime :: Incremental -> Key -> Annex () +recordFsckTime inc key = withFsckDb inc $ \h -> FsckDb.addDb h key {- Records the start time of an incremental fsck. - diff --git a/Database/Fsck.hs b/Database/Fsck.hs new file mode 100644 index 000000000..f03a4c009 --- /dev/null +++ b/Database/Fsck.hs @@ -0,0 +1,71 @@ +{- Sqlite database used for incremental fsck. + - + - Copyright 2015 Joey Hess <id@joeyh.name> + -: + - Licensed under the GNU GPL version 3 or higher. + -} + +{-# LANGUAGE QuasiQuotes, TypeFamilies, TemplateHaskell #-} +{-# LANGUAGE OverloadedStrings, GADTs, FlexibleContexts #-} + +module Database.Fsck ( + newPass, + openDb, + H.closeDb, + H.DbHandle, + addDb, + inDb, + FsckedId, +) where + +import Database.Types +import qualified Database.Handle as H +import Locations +import Utility.Directory +import Annex +import Types.Key +import Annex.Perms + +import Database.Persist.TH +import Database.Esqueleto hiding (Key) +import Control.Monad +import Control.Monad.IfElse +import Control.Monad.IO.Class (liftIO) +import System.Directory + +{- Each key stored in the database has already been fscked as part + - of the latest incremental fsck pass. -} +share [mkPersist sqlSettings, mkMigrate "migrateFsck"] [persistLowerCase| +Fscked + key SKey + UniqueKey key + deriving Show +|] + +{- The database is removed when starting a new incremental fsck pass. -} +newPass :: Annex () +newPass = liftIO. nukeFile =<< fromRepo gitAnnexFsckDb + +{- Opens the database, creating it atomically if it doesn't exist yet. -} +openDb :: Annex H.DbHandle +openDb = do + db <- fromRepo gitAnnexFsckDb + unlessM (liftIO $ doesFileExist db) $ do + let newdb = db ++ ".new" + h <- liftIO $ H.openDb newdb + void $ liftIO $ H.runDb h $ + runMigrationSilent migrateFsck + liftIO $ H.closeDb h + setAnnexFilePerm newdb + liftIO $ renameFile newdb db + liftIO $ H.openDb db + +addDb :: H.DbHandle -> Key -> Annex () +addDb h = void . liftIO . H.runDb h . insert . Fscked . toSKey + +inDb :: H.DbHandle -> Key -> Annex Bool +inDb h k = liftIO $ H.runDb h $ do + r <- select $ from $ \r -> do + where_ (r ^. FsckedKey ==. val (toSKey k)) + return (r ^. FsckedKey) + return $ not $ null r diff --git a/Database/Handle.hs b/Database/Handle.hs new file mode 100644 index 000000000..c39dcfd2b --- /dev/null +++ b/Database/Handle.hs @@ -0,0 +1,63 @@ +{- Persistent sqlite database handles. + - + - Copyright 2015 Joey Hess <id@joeyh.name> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Database.Handle ( + DbHandle, + openDb, + closeDb, + runDb, +) where + +import Utility.Exception + +import Database.Persist.Sqlite (runSqlite) +import Database.Esqueleto hiding (Key) +import Control.Monad.IO.Class (liftIO) +import Control.Concurrent +import Control.Concurrent.Async +import Control.Exception (throwIO) +import qualified Data.Text as T + +{- A DbHandle is a reference to a worker thread that communicates with + - the database. It has a MVar which Jobs are submitted to. -} +data DbHandle = DbHandle (Async ()) (MVar Job) + +data Job = Job (SqlPersistM ()) | CloseJob + +openDb :: FilePath -> IO DbHandle +openDb db = do + jobs <- newEmptyMVar + worker <- async (workerThread db jobs) + return $ DbHandle worker jobs + +workerThread :: FilePath -> MVar Job -> IO () +workerThread db jobs = runSqlite (T.pack db) go + where + go = do + job <- liftIO $ takeMVar jobs + case job of + Job a -> a >> go + CloseJob -> return () + +closeDb :: DbHandle -> IO () +closeDb (DbHandle worker jobs) = do + putMVar jobs CloseJob + wait worker + +{- Runs an action using the DbHandle. + - + - Note that the action is not run by the calling thread, but by a + - worker thread. Exceptions are propigated to the calling thread. + - + - Note that only one action can be run at a time against a given DbHandle. + - If called concurrently, this will block until it is able to run. + -} +runDb :: DbHandle -> SqlPersistM a -> IO a +runDb (DbHandle _ jobs) a = do + res <- newEmptyMVar + putMVar jobs $ Job $ liftIO . putMVar res =<< tryNonAsync a + either throwIO return =<< takeMVar res diff --git a/Database/Types.hs b/Database/Types.hs new file mode 100644 index 000000000..dee56832b --- /dev/null +++ b/Database/Types.hs @@ -0,0 +1,27 @@ +{- types for SQL databases + - + - Copyright 2015 Joey Hess <id@joeyh.name> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +{-# LANGUAGE TemplateHaskell #-} + +module Database.Types where + +import Database.Persist.TH +import Data.Maybe + +import Types.Key + +-- A serialized Key +newtype SKey = SKey String + deriving (Show, Read) + +toSKey :: Key -> SKey +toSKey = SKey . key2file + +fromSKey :: SKey -> Key +fromSKey (SKey s) = fromMaybe (error $ "bad serialied key " ++ s) (file2key s) + +derivePersistField "SKey" diff --git a/Footype.hs b/Footype.hs deleted file mode 100644 index 04d6fead6..000000000 --- a/Footype.hs +++ /dev/null @@ -1,20 +0,0 @@ -{-# LANGUAGE QuasiQuotes, TypeFamilies, GeneralizedNewtypeDeriving, TemplateHaskell, - OverloadedStrings, GADTs, FlexibleContexts #-} - -module Footype where - -import Database.Persist hiding (Key) -import Database.Persist.TH -import Database.Persist.Sqlite hiding (Key) -import Control.Monad.IO.Class (liftIO) -import Control.Monad -import Data.Time.Clock - -import Types.Key -import Types.UUID -import Types.MetaData - --- has to be in a separate file from foo.hs for silly reasons -derivePersistField "Key" -derivePersistField "UUID" -derivePersistField "MetaField" diff --git a/Locations.hs b/Locations.hs index 02995ee4a..0cd57aa98 100644 --- a/Locations.hs +++ b/Locations.hs @@ -57,6 +57,7 @@ module Locations ( gitAnnexSshDir, gitAnnexRemotesDir, gitAnnexAssistantDefaultDir, + gitAnnexFsckDb, isLinkToAnnex, HashLevels(..), hashDirMixed, @@ -340,6 +341,10 @@ gitAnnexRemotesDir r = addTrailingPathSeparator $ gitAnnexDir r </> "remotes" gitAnnexAssistantDefaultDir :: FilePath gitAnnexAssistantDefaultDir = "annex" +{- Database used to record fsck info. -} +gitAnnexFsckDb :: Git.Repo -> FilePath +gitAnnexFsckDb r = gitAnnexDir r </> "fsck.db" + {- Checks a symlink target to see if it appears to point to annexed content. - - We only look at paths inside the .git directory, and not at the .git diff --git a/foo.hs b/foo.hs deleted file mode 100644 index 047c0b03f..000000000 --- a/foo.hs +++ /dev/null @@ -1,74 +0,0 @@ -{-# LANGUAGE QuasiQuotes, TypeFamilies, GeneralizedNewtypeDeriving, TemplateHaskell, - OverloadedStrings, GADTs, FlexibleContexts #-} -import Database.Persist hiding (Key) -import Database.Persist.TH -import Database.Persist.Sqlite hiding (Key) -import Control.Monad.IO.Class (liftIO) -import Control.Monad -import Data.Time.Clock -import Data.Maybe - -import Types.Key -import Types.UUID -import Types.MetaData -import Footype - -data RemoteFsckTime - -share [mkPersist sqlSettings, mkSave "entityDefs", mkMigrate "migrateAll"] [persistLowerCase| -CachedKey - key Key - KeyOutdex key - deriving Show - -AssociatedFiles - keyId CachedKeyId Eq - associatedFile FilePath - KeyIdOutdex keyId associatedFile - deriving Show - -CachedMetaField - field MetaField - FieldOutdex field - deriving Show - -CachedMetaData - keyId CachedKeyId Eq - fieldId CachedMetaFieldId Eq - metaValue String - deriving Show - -LastFscked - keyId CachedKeyId Eq - localFscked Int Maybe -|] - -main :: IO () -main = query - -query :: IO () -query = runSqlite "foo.db" $ do - forM_ [1..1000] $ \i -> do - Just k <- getBy $ KeyOutdex (fromJust $ file2key $ "WORM--" ++ show i) - selectList [AssociatedFilesKeyId ==. entityKey k] [] - -query2 :: IO () -query2 = runSqlite "foo.db" $ do - forM_ [1..1] $ \i -> do - Just f <- getBy $ FieldOutdex (fromJust $ toMetaField "tag") - liftIO $ print f - fs <- selectList [CachedMetaDataFieldId ==. entityKey f] [] - liftIO $ print $ length fs - -populate :: IO () -populate = runSqlite "foo.db" $ do - runMigration migrateAll - t <- insert $ CachedMetaField (fromJust $ toMetaField "tag") - f <- insert $ CachedMetaField (fromJust $ toMetaField "foo") - forM_ [1..30000] $ \i -> do - k <- insert $ CachedKey (fromJust $ file2key $ "WORM--" ++ show i) - liftIO $ print k - insert $ AssociatedFiles k (show i) - insert $ AssociatedFiles k ("and" ++ show (i + 1)) - insert $ CachedMetaData k f (show i) - insert $ CachedMetaData k t "bar" diff --git a/fooes.hs b/fooes.hs deleted file mode 100644 index cc7a0f8c3..000000000 --- a/fooes.hs +++ /dev/null @@ -1,42 +0,0 @@ -{-# LANGUAGE QuasiQuotes, TypeFamilies, GeneralizedNewtypeDeriving, TemplateHaskell, - OverloadedStrings, GADTs, FlexibleContexts #-} -import Database.Persist.TH -import Database.Persist.Sqlite (runSqlite) -import Control.Monad.IO.Class (liftIO) -import Control.Monad -import Database.Esqueleto hiding (Key) - -share [mkPersist sqlSettings, mkMigrate "migrateAll"] [persistLowerCase| -CachedKey - key String - UniqueKey key - deriving Show - -AssociatedFiles - key CachedKeyId Eq - file FilePath - UniqueKeyFile key file - deriving Show -|] - -main :: IO () -main = runSqlite "foo.db" $ do - runMigration migrateAll - if True then populate else return () - query - -populate = do - forM_ [1..30000] $ \i -> do - --delete $ from $ \f -> do - -- where_ (f ^. CachedKeyKey ==. val (show i)) - k <- insert $ CachedKey (show i) - liftIO $ print ("stored", k) - insert $ AssociatedFiles k ("file" ++show (i+1)) - --insert $ AssociatedFiles k ("otherfile" ++show (i+2)) - -query = forM_ [1..1000] $ \i -> do - r <- select $ from $ \(k, f) -> do - where_ (k ^. CachedKeyKey ==. val (show i)) - where_ (f ^. AssociatedFilesKey ==. k ^. CachedKeyId) - return (f ^. AssociatedFilesFile) - liftIO $ print ("got", r) diff --git a/git-annex.cabal b/git-annex.cabal index 986ba6be8..f9d1be316 100644 --- a/git-annex.cabal +++ b/git-annex.cabal @@ -110,7 +110,8 @@ Executable git-annex IfElse, text, QuickCheck >= 2.1, bloomfilter, edit-distance, SafeSemaphore, uuid, random, dlist, unix-compat, async, stm (>= 2.3), data-default, case-insensitive, http-conduit, http-types, - cryptohash (>= 0.10.0) + cryptohash (>= 0.10.0), + esqueleto, persistent-sqlite, persistent, persistent-template CC-Options: -Wall GHC-Options: -Wall Extensions: PackageImports |