diff options
-rw-r--r-- | Assistant.hs | 2 | ||||
-rw-r--r-- | Assistant/Alert.hs | 19 | ||||
-rw-r--r-- | Assistant/Threads/SanityChecker.hs | 108 | ||||
-rw-r--r-- | Assistant/Threads/WebApp.hs | 1 | ||||
-rw-r--r-- | Assistant/TransferQueue.hs | 2 | ||||
-rw-r--r-- | Assistant/Types/Alert.hs | 1 | ||||
-rw-r--r-- | Git/Command.hs | 11 | ||||
-rw-r--r-- | Logs/Unused.hs | 1 | ||||
-rw-r--r-- | Types/GitConfig.hs | 5 | ||||
-rw-r--r-- | Types/StandardGroups.hs | 2 | ||||
-rw-r--r-- | Utility/DiskFree.hs | 28 | ||||
-rw-r--r-- | Utility/libdiskfree.c | 37 | ||||
-rw-r--r-- | debian/changelog | 10 | ||||
-rw-r--r-- | doc/git-annex.mdwn | 13 | ||||
-rw-r--r-- | doc/preferred_content.mdwn | 4 | ||||
-rw-r--r-- | doc/todo/Limit_file_revision_history.mdwn | 7 |
16 files changed, 219 insertions, 32 deletions
diff --git a/Assistant.hs b/Assistant.hs index d4786f99a..800a3ef78 100644 --- a/Assistant.hs +++ b/Assistant.hs @@ -145,7 +145,7 @@ startDaemon assistant foreground startdelay cannotrun listenhost startbrowser = , assist $ transferPollerThread , assist $ transfererThread , assist $ daemonStatusThread - , assist $ sanityCheckerDailyThread + , assist $ sanityCheckerDailyThread urlrenderer , assist $ sanityCheckerHourlyThread , assist $ problemFixerThread urlrenderer #ifdef WITH_CLIBS diff --git a/Assistant/Alert.hs b/Assistant/Alert.hs index 055e66de5..b5ee706b8 100644 --- a/Assistant/Alert.hs +++ b/Assistant/Alert.hs @@ -260,6 +260,25 @@ upgradeFailedAlert :: String -> Alert upgradeFailedAlert msg = (errorAlert msg []) { alertHeader = Just $ fromString "Upgrade failed." } +unusedFilesAlert :: [AlertButton] -> String -> Alert +unusedFilesAlert buttons message = Alert + { alertHeader = Just $ fromString $ unwords + [ "Old and deleted files are piling up --" + , message + ] + , alertIcon = Just InfoIcon + , alertPriority = High + , alertButtons = buttons + , alertClosable = True + , alertClass = Message + , alertMessageRender = renderData + , alertCounter = 0 + , alertBlockDisplay = True + , alertName = Just UnusedFilesAlert + , alertCombiner = Just $ fullCombiner $ \new _old -> new + , alertData = [] + } + brokenRepositoryAlert :: [AlertButton] -> Alert brokenRepositoryAlert = errorAlert "Serious problems have been detected with your repository. This needs your immediate attention!" diff --git a/Assistant/Threads/SanityChecker.hs b/Assistant/Threads/SanityChecker.hs index 8aa691cdc..07d46ff84 100644 --- a/Assistant/Threads/SanityChecker.hs +++ b/Assistant/Threads/SanityChecker.hs @@ -5,6 +5,8 @@ - Licensed under the GNU GPL version 3 or higher. -} +{-# LANGUAGE CPP #-} + module Assistant.Threads.SanityChecker ( sanityCheckerStartupThread, sanityCheckerDailyThread, @@ -16,7 +18,10 @@ import Assistant.DaemonStatus import Assistant.Alert import Assistant.Repair import Assistant.Ssh +import Assistant.TransferQueue +import Assistant.Types.UrlRenderer import qualified Annex.Branch +import qualified Git import qualified Git.LsFiles import qualified Git.Command import qualified Git.Config @@ -25,12 +30,26 @@ import qualified Assistant.Threads.Watcher as Watcher import Utility.LogFile import Utility.Batch import Utility.NotificationBroadcaster +import Utility.DiskFree import Config import Utility.HumanTime +import Utility.DataUnits import Git.Repair import Git.Index +import Logs.Unused +import Logs.Location +import Logs.Transfer +import Annex.Content +import Config.Files +import Types.Key +import qualified Annex +#ifdef WITH_WEBAPP +import Assistant.WebApp.Types +#endif import Data.Time.Clock.POSIX +import qualified Data.Map as M +import qualified Data.Text as T {- This thread runs once at startup, and most other threads wait for it - to finish. (However, the webapp thread does not, to prevent the UI @@ -78,8 +97,8 @@ sanityCheckerHourlyThread = namedThread "SanityCheckerHourly" $ forever $ do hourlyCheck {- This thread wakes up daily to make sure the tree is in good shape. -} -sanityCheckerDailyThread :: NamedThread -sanityCheckerDailyThread = namedThread "SanityCheckerDaily" $ forever $ do +sanityCheckerDailyThread :: UrlRenderer -> NamedThread +sanityCheckerDailyThread urlrenderer = namedThread "SanityCheckerDaily" $ forever $ do waitForNextCheck debug ["starting sanity check"] @@ -90,7 +109,8 @@ sanityCheckerDailyThread = namedThread "SanityCheckerDaily" $ forever $ do modifyDaemonStatus_ $ \s -> s { sanityCheckRunning = True } now <- liftIO getPOSIXTime -- before check started - r <- either showerr return =<< (tryIO . batch) <~> dailyCheck + r <- either showerr return + =<< (tryIO . batch) <~> dailyCheck urlrenderer modifyDaemonStatus_ $ \s -> s { sanityCheckRunning = False @@ -119,9 +139,10 @@ waitForNextCheck = do {- It's important to stay out of the Annex monad as much as possible while - running potentially expensive parts of this check, since remaining in it - will block the watcher. -} -dailyCheck :: Assistant Bool -dailyCheck = do +dailyCheck :: UrlRenderer -> Assistant Bool +dailyCheck urlrenderer = do g <- liftAnnex gitRepo + batchmaker <- liftIO getBatchCommandMaker -- Find old unstaged symlinks, and add them to git. (unstaged, cleanup) <- liftIO $ Git.LsFiles.notInRepo False ["."] g @@ -140,12 +161,28 @@ dailyCheck = do - to have a lot of small objects and they should not be a - significant size. -} when (Git.Config.getMaybe "gc.auto" g == Just "0") $ - liftIO $ void $ Git.Command.runBool + liftIO $ void $ Git.Command.runBatch batchmaker [ Param "-c", Param "gc.auto=670000" , Param "gc" , Param "--auto" ] g + {- Check if the unused files found last time have been dealt with. -} + checkOldUnused urlrenderer + + {- Run git-annex unused once per day. This is run as a separate + - process to stay out of the annex monad and so it can run as a + - batch job. -} + program <- liftIO readProgramFile + let (program', params') = batchmaker (program, [Param "unused"]) + void $ liftIO $ boolSystem program' params' + {- Invalidate unused keys cache, and queue transfers of all unused + - keys. -} + unused <- liftAnnex unusedKeys' + void $ liftAnnex $ setUnusedKeys unused + forM_ unused $ \k -> + queueTransfers "unused" Later k Nothing Upload + return True where toonew timestamp now = now < (realToFrac (timestamp + slop) :: POSIXTime) @@ -159,7 +196,8 @@ dailyCheck = do insanity $ "found unstaged symlink: " ++ file hourlyCheck :: Assistant () -hourlyCheck = checkLogSize 0 +hourlyCheck = do + checkLogSize 0 {- Rotate logs until log file size is < 1 mb. -} checkLogSize :: Int -> Assistant () @@ -184,3 +222,59 @@ oneHour = 60 * 60 oneDay :: Int oneDay = 24 * oneHour +{- If annex.expireunused is set, find any keys that have lingered unused + - for the specified duration, and remove them. + - + - Otherwise, check to see if unused keys are piling up, and let the user + - know. This uses heuristics: 1000 unused keys, or more unused keys + - than the remaining free disk space, or more than 1/10th the total + - disk space being unused keys all suggest a problem. -} +checkOldUnused :: UrlRenderer -> Assistant () +checkOldUnused urlrenderer = go =<< annexExpireUnused <$> liftAnnex Annex.getGitConfig + where + go (Just expireunused) = do + m <- liftAnnex $ readUnusedLog "" + now <- liftIO getPOSIXTime + let duration = durationToPOSIXTime expireunused + let oldkeys = M.keys $ M.filter (tooold now duration) m + forM_ oldkeys $ \k -> do + debug ["removing old unused key", key2file k] + liftAnnex $ do + removeAnnex k + logStatus k InfoMissing + go Nothing = maybe noop prompt + =<< toobig =<< liftAnnex (readUnusedLog "") + + tooold now duration (_, mt) = + maybe False (\t -> now - t >= duration) mt + + toobig m = do + let num = M.size m + let diskused = foldl' sumkeysize 0 (M.keys m) + df <- forpath getDiskFree + disksize <- forpath getDiskSize + return $ if moreused df diskused || tenthused disksize diskused + then Just $ roughSize storageUnits False diskused ++ + " is used by old files" + else if num > 1000 + then Just $ show num ++ " old files exist" + else Nothing + + moreused Nothing _ = False + moreused (Just df) used = df <= used + + tenthused Nothing _ = False + tenthused (Just disksize) used = used >= disksize `div` 10 + + sumkeysize s k = s + fromMaybe 0 (keySize k) + + forpath a = liftAnnex $ inRepo $ liftIO . a . Git.repoPath + + prompt msg = +#ifdef WITH_WEBAPP + do + button <- mkAlertButton True (T.pack "Fix This") urlrenderer ConfigUnusedR + void $ addAlert $ unusedFilesAlert [button] msg +#else + debug [msg] +#endif diff --git a/Assistant/Threads/WebApp.hs b/Assistant/Threads/WebApp.hs index a7f9cc5b2..d2c2afd47 100644 --- a/Assistant/Threads/WebApp.hs +++ b/Assistant/Threads/WebApp.hs @@ -27,6 +27,7 @@ import Assistant.WebApp.Configurators.IA import Assistant.WebApp.Configurators.WebDAV import Assistant.WebApp.Configurators.XMPP import Assistant.WebApp.Configurators.Preferences +import Assistant.WebApp.Configurators.Unused import Assistant.WebApp.Configurators.Edit import Assistant.WebApp.Configurators.Delete import Assistant.WebApp.Configurators.Fsck diff --git a/Assistant/TransferQueue.hs b/Assistant/TransferQueue.hs index 98fb2f06c..6d8e72852 100644 --- a/Assistant/TransferQueue.hs +++ b/Assistant/TransferQueue.hs @@ -1,6 +1,6 @@ {- git-annex assistant pending transfer queue - - - Copyright 2012 Joey Hess <joey@kitenet.net> + - Copyright 2012-2014 Joey Hess <joey@kitenet.net> - - Licensed under the GNU GPL version 3 or higher. -} diff --git a/Assistant/Types/Alert.hs b/Assistant/Types/Alert.hs index e6fbe86d3..19fe55e6e 100644 --- a/Assistant/Types/Alert.hs +++ b/Assistant/Types/Alert.hs @@ -32,6 +32,7 @@ data AlertName | SyncAlert | NotFsckedAlert | UpgradeAlert + | UnusedFilesAlert deriving (Eq) {- The first alert is the new alert, the second is an old alert. diff --git a/Git/Command.hs b/Git/Command.hs index 4c338ba25..90abc7e4f 100644 --- a/Git/Command.hs +++ b/Git/Command.hs @@ -18,6 +18,7 @@ import qualified Utility.CoProcess as CoProcess #ifdef mingw32_HOST_OS import Git.FilePath #endif +import Utility.Batch {- Constructs a git command line operating on the specified repo. -} gitCommandLine :: [CommandParam] -> Repo -> [CommandParam] @@ -41,9 +42,13 @@ gitCommandLine _ repo = assertLocal repo $ error "internal" {- Runs git in the specified repo. -} runBool :: [CommandParam] -> Repo -> IO Bool runBool params repo = assertLocal repo $ - boolSystemEnv "git" - (gitCommandLine params repo) - (gitEnv repo) + boolSystemEnv "git" (gitCommandLine params repo) (gitEnv repo) + +{- Runs git in batch mode. -} +runBatch :: BatchCommandMaker -> [CommandParam] -> Repo -> IO Bool +runBatch batchmaker params repo = assertLocal repo $ do + let (cmd, params') = batchmaker ("git", gitCommandLine params repo) + boolSystemEnv cmd params' (gitEnv repo) {- Runs git in the specified repo, throwing an error if it fails. -} run :: [CommandParam] -> Repo -> IO () diff --git a/Logs/Unused.hs b/Logs/Unused.hs index 74f46b85e..cdfcd2165 100644 --- a/Logs/Unused.hs +++ b/Logs/Unused.hs @@ -21,6 +21,7 @@ module Logs.Unused ( readUnusedLog, readUnusedMap, unusedKeys, + setUnusedKeys, unusedKeys' ) where diff --git a/Types/GitConfig.hs b/Types/GitConfig.hs index e19fdc42f..f8e3be98e 100644 --- a/Types/GitConfig.hs +++ b/Types/GitConfig.hs @@ -1,6 +1,6 @@ {- git-annex configuration - - - Copyright 2012 Joey Hess <joey@kitenet.net> + - Copyright 2012-2014 Joey Hess <joey@kitenet.net> - - Licensed under the GNU GPL version 3 or higher. -} @@ -20,6 +20,7 @@ import Config.Cost import Types.Distribution import Types.Availability import Types.NumCopies +import Utility.HumanTime {- Main git-annex settings. Each setting corresponds to a git-config key - such as annex.foo -} @@ -46,6 +47,7 @@ data GitConfig = GitConfig , annexLargeFiles :: Maybe String , annexFsckNudge :: Bool , annexAutoUpgrade :: AutoUpgrade + , annexExpireUnused :: Maybe Duration , coreSymlinks :: Bool , gcryptId :: Maybe String } @@ -75,6 +77,7 @@ extractGitConfig r = GitConfig , annexLargeFiles = getmaybe (annex "largefiles") , annexFsckNudge = getbool (annex "fscknudge") True , annexAutoUpgrade = toAutoUpgrade $ getmaybe (annex "autoupgrade") + , annexExpireUnused = parseDuration =<< getmaybe (annex "expireunused") , coreSymlinks = getbool "core.symlinks" True , gcryptId = getmaybe "core.gcrypt-id" } diff --git a/Types/StandardGroups.hs b/Types/StandardGroups.hs index f89b4e424..434156a0e 100644 --- a/Types/StandardGroups.hs +++ b/Types/StandardGroups.hs @@ -75,7 +75,7 @@ associatedDirectory _ _ = Nothing {- See doc/preferred_content.mdwn for explanations of these expressions. -} preferredContent :: StandardGroup -> PreferredContentExpression preferredContent ClientGroup = lastResort $ - "(exclude=*/archive/* and exclude=archive/*) or (" ++ notArchived ++ ")" + "((exclude=*/archive/* and exclude=archive/*) or (" ++ notArchived ++ ")) and not unused" preferredContent TransferGroup = lastResort $ "not (inallgroup=client and copies=client:2) and (" ++ preferredContent ClientGroup ++ ")" preferredContent BackupGroup = "include=*" diff --git a/Utility/DiskFree.hs b/Utility/DiskFree.hs index f04f636ec..2f296e2cb 100644 --- a/Utility/DiskFree.hs +++ b/Utility/DiskFree.hs @@ -1,13 +1,16 @@ {- disk free space checking - - - Copyright 2012 Joey Hess <joey@kitenet.net> + - Copyright 2012, 2014 Joey Hess <joey@kitenet.net> - - Licensed under the GNU GPL version 3 or higher. -} {-# LANGUAGE ForeignFunctionInterface, CPP #-} -module Utility.DiskFree ( getDiskFree ) where +module Utility.DiskFree ( + getDiskFree, + getDiskSize +) where #ifdef WITH_CLIBS @@ -20,9 +23,12 @@ import Foreign.C.Error foreign import ccall safe "libdiskfree.h diskfree" c_diskfree :: CString -> IO CULLong -getDiskFree :: FilePath -> IO (Maybe Integer) -getDiskFree path = withFilePath path $ \c_path -> do - free <- c_diskfree c_path +foreign import ccall safe "libdiskfree.h disksize" c_disksize + :: CString -> IO CULLong + +getVal :: (CString -> IO CULLong) -> FilePath -> IO (Maybe Integer) +getVal getter path = withFilePath path $ \c_path -> do + free <- getter c_path ifM (safeErrno <$> getErrno) ( return $ Just $ toInteger free , return Nothing @@ -30,6 +36,12 @@ getDiskFree path = withFilePath path $ \c_path -> do where safeErrno (Errno v) = v == 0 +getDiskFree :: FilePath -> IO (Maybe Integer) +getDiskFree = getVal c_diskfree + +getDiskSize :: FilePath -> IO (Maybe Integer) +getDiskSize = getVal c_disksize + #else #ifdef mingw32_HOST_OS @@ -41,6 +53,9 @@ getDiskFree :: FilePath -> IO (Maybe Integer) getDiskFree path = catchMaybeIO $ do (sectors, bytes, nfree, _ntotal) <- getDiskFreeSpace (Just path) return $ toInteger sectors * toInteger bytes * toInteger nfree + +getDiskSize :: FilePath -> IO (Maybe Integer) +getDiskSize _ = return Nothing #else #warning Building without disk free space checking support @@ -48,5 +63,8 @@ getDiskFree path = catchMaybeIO $ do getDiskFree :: FilePath -> IO (Maybe Integer) getDiskFree _ = return Nothing +getDiskSize :: FilePath -> IO (Maybe Integer) +getDiskSize _ = return Nothing + #endif #endif diff --git a/Utility/libdiskfree.c b/Utility/libdiskfree.c index d2843ed20..8c9ab6145 100644 --- a/Utility/libdiskfree.c +++ b/Utility/libdiskfree.c @@ -1,6 +1,6 @@ /* disk free space checking, C mini-library * - * Copyright 2012 Joey Hess <joey@kitenet.net> + * Copyright 2012, 2014 Joey Hess <joey@kitenet.net> * * Licensed under the GNU GPL version 3 or higher. */ @@ -43,16 +43,12 @@ #include <errno.h> #include <stdio.h> -/* Checks the amount of disk that is available to regular (non-root) users. - * (If there's an error, or this is not supported, - * returns 0 and sets errno to nonzero.) - */ -unsigned long long int diskfree(const char *path) { +unsigned long long int get(const char *path, int req) { #ifdef UNKNOWN errno = 1; return 0; #else - unsigned long long int available, blocksize; + unsigned long long int v, blocksize; struct STATSTRUCT buf; if (STATCALL(path, &buf) != 0) @@ -60,12 +56,35 @@ unsigned long long int diskfree(const char *path) { else errno = 0; - available = buf.f_bavail; + switch (req) { + case 0: + v = buf.f_blocks; + break; + case 1: + v = buf.f_bavail; + break; + default: + v = 0; + } + blocksize = buf.f_bsize; - return available * blocksize; + return v * blocksize; #endif } +/* Checks the amount of disk that is available to regular (non-root) users. + * (If there's an error, or this is not supported, + * returns 0 and sets errno to nonzero.) + */ +unsigned long long int diskfree(const char *path) { + return get(path, 1); +} + +/* Gets the total size of the disk. */ +unsigned long long int disksize(const char *path) { + return get(path, 0); +} + /* main () { printf("%lli\n", diskfree(".")); diff --git a/debian/changelog b/debian/changelog index 7c2a0d3b3..420ae603b 100644 --- a/debian/changelog +++ b/debian/changelog @@ -18,7 +18,17 @@ git-annex (5.20140118) UNRELEASED; urgency=medium preferred content expressions. * Client, transfer, incremental backup, and archive repositories now want to get content that does not yet have enough copies. + * Client, transfer, and source repositories now do not want to retain + unused file contents. + * assistant: Checks daily for unused file contents, and when possible + moves them to a repository (such as a backup repository) that + wants to retain them. + * assistant: annex.expireunused can be configured to cause unused + file contents to be deleted after some period of time. + * webapp: Nudge user to see if they want to expire old unused file + contents when a lot of them seem to be piling up in the repository. * repair: Check git version at run time. + * assistant: Run the periodic git gc in batch mode. -- Joey Hess <joeyh@debian.org> Sat, 18 Jan 2014 11:54:17 -0400 diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index 279fa24dd..3e239f7d6 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -1228,6 +1228,19 @@ Here are all the supported configuration settings. to close it. On Mac OSX, when not using direct mode this defaults to 1 second, to work around a bad interaction with software there. +* `annex.expireunused` + + Controls what the assistant does about unused file contents + that are stored in the repository. + + The default is `false`, which causes + all old and unused file contents to be retained, unless the assistant + is able to move them to some other repository (such as a backup repository). + + Can be set to a time specification, like "7d" or "1m", and then + file contents that have been known to be unused for a week or a + month will be deleted. + * `annex.fscknudge` When set to false, prevents the webapp from reminding you when using diff --git a/doc/preferred_content.mdwn b/doc/preferred_content.mdwn index 039df3878..92250c034 100644 --- a/doc/preferred_content.mdwn +++ b/doc/preferred_content.mdwn @@ -111,9 +111,9 @@ any repository that can will back it up.) ### client All content is preferred, unless it's for a file in a "archive" directory, -which has reached an archive repository. +which has reached an archive repository, or is unused. -`((exclude=*/archive/* and exclude=archive/*) or (not (copies=archive:1 or copies=smallarchive:1))) or roughlylackingcopies=1` +`(((exclude=*/archive/* and exclude=archive/*) or (not (copies=archive:1 or copies=smallarchive:1))) or roughlylackingcopies=1) and not unused` ### transfer diff --git a/doc/todo/Limit_file_revision_history.mdwn b/doc/todo/Limit_file_revision_history.mdwn index c00b555b1..901d6f052 100644 --- a/doc/todo/Limit_file_revision_history.mdwn +++ b/doc/todo/Limit_file_revision_history.mdwn @@ -51,12 +51,13 @@ Finally, how to specify a feature request for git-annex? > `and (not unused)`. Transfer repositories too, because typically > only client repos connect to them, and so otherwise unused files > would build up there. Backup repos would want unused files. I -> think that archive repos would too. +> think that archive repos would too. **done** > * Make the assistant check for unused files periodically. Exactly > how often may need to be tuned, but once per day seems reasonable > for most repos. Note that the assistant could also notice on the > fly when files are removed and mark their keys as unused if that was > the last associated file. (Only currently possible in direct mode.) +> **done** > * After scanning for unused files, it makes sense for the > assistant to queue transfers of unused files to any remotes that > do want them (eg, backup remotes). If the files can successfully be @@ -75,7 +76,9 @@ Finally, how to specify a feature request for git-annex? > is not set, and there is some significant quantity of unused files > (eg, more than 1000, or more than 1 gb, or more than the amount of > remaining free disk space), -> it can pop up a webapp alert asking to configure it. +> it can pop up a webapp alert asking to configure it. **done** +> * Webapp interface to configure annex.expireunused. Reasonable values +> are no expiring, or any number of days. > > This does not cover every use case that was requested. > But I don't see a cheap way to ensure it keeps eg the past 10 versions of |