diff options
-rw-r--r-- | Assistant.hs | 2 | ||||
-rw-r--r-- | Assistant/Alert.hs | 19 | ||||
-rw-r--r-- | Assistant/Threads/SanityChecker.hs | 105 | ||||
-rw-r--r-- | Assistant/Threads/WebApp.hs | 1 | ||||
-rw-r--r-- | Assistant/TransferQueue.hs | 2 | ||||
-rw-r--r-- | Assistant/Types/Alert.hs | 1 | ||||
-rw-r--r-- | Logs/Unused.hs | 1 | ||||
-rw-r--r-- | Types/GitConfig.hs | 5 | ||||
-rw-r--r-- | Types/StandardGroups.hs | 2 | ||||
-rw-r--r-- | debian/changelog | 9 | ||||
-rw-r--r-- | doc/git-annex.mdwn | 13 | ||||
-rw-r--r-- | doc/preferred_content.mdwn | 4 | ||||
-rw-r--r-- | doc/todo/Limit_file_revision_history.mdwn | 7 |
13 files changed, 157 insertions, 14 deletions
diff --git a/Assistant.hs b/Assistant.hs index d4786f99a..800a3ef78 100644 --- a/Assistant.hs +++ b/Assistant.hs @@ -145,7 +145,7 @@ startDaemon assistant foreground startdelay cannotrun listenhost startbrowser = , assist $ transferPollerThread , assist $ transfererThread , assist $ daemonStatusThread - , assist $ sanityCheckerDailyThread + , assist $ sanityCheckerDailyThread urlrenderer , assist $ sanityCheckerHourlyThread , assist $ problemFixerThread urlrenderer #ifdef WITH_CLIBS diff --git a/Assistant/Alert.hs b/Assistant/Alert.hs index 055e66de5..b5ee706b8 100644 --- a/Assistant/Alert.hs +++ b/Assistant/Alert.hs @@ -260,6 +260,25 @@ upgradeFailedAlert :: String -> Alert upgradeFailedAlert msg = (errorAlert msg []) { alertHeader = Just $ fromString "Upgrade failed." } +unusedFilesAlert :: [AlertButton] -> String -> Alert +unusedFilesAlert buttons message = Alert + { alertHeader = Just $ fromString $ unwords + [ "Old and deleted files are piling up --" + , message + ] + , alertIcon = Just InfoIcon + , alertPriority = High + , alertButtons = buttons + , alertClosable = True + , alertClass = Message + , alertMessageRender = renderData + , alertCounter = 0 + , alertBlockDisplay = True + , alertName = Just UnusedFilesAlert + , alertCombiner = Just $ fullCombiner $ \new _old -> new + , alertData = [] + } + brokenRepositoryAlert :: [AlertButton] -> Alert brokenRepositoryAlert = errorAlert "Serious problems have been detected with your repository. This needs your immediate attention!" diff --git a/Assistant/Threads/SanityChecker.hs b/Assistant/Threads/SanityChecker.hs index a1be8dffc..07d46ff84 100644 --- a/Assistant/Threads/SanityChecker.hs +++ b/Assistant/Threads/SanityChecker.hs @@ -5,6 +5,8 @@ - Licensed under the GNU GPL version 3 or higher. -} +{-# LANGUAGE CPP #-} + module Assistant.Threads.SanityChecker ( sanityCheckerStartupThread, sanityCheckerDailyThread, @@ -16,7 +18,10 @@ import Assistant.DaemonStatus import Assistant.Alert import Assistant.Repair import Assistant.Ssh +import Assistant.TransferQueue +import Assistant.Types.UrlRenderer import qualified Annex.Branch +import qualified Git import qualified Git.LsFiles import qualified Git.Command import qualified Git.Config @@ -25,12 +30,26 @@ import qualified Assistant.Threads.Watcher as Watcher import Utility.LogFile import Utility.Batch import Utility.NotificationBroadcaster +import Utility.DiskFree import Config import Utility.HumanTime +import Utility.DataUnits import Git.Repair import Git.Index +import Logs.Unused +import Logs.Location +import Logs.Transfer +import Annex.Content +import Config.Files +import Types.Key +import qualified Annex +#ifdef WITH_WEBAPP +import Assistant.WebApp.Types +#endif import Data.Time.Clock.POSIX +import qualified Data.Map as M +import qualified Data.Text as T {- This thread runs once at startup, and most other threads wait for it - to finish. (However, the webapp thread does not, to prevent the UI @@ -78,8 +97,8 @@ sanityCheckerHourlyThread = namedThread "SanityCheckerHourly" $ forever $ do hourlyCheck {- This thread wakes up daily to make sure the tree is in good shape. -} -sanityCheckerDailyThread :: NamedThread -sanityCheckerDailyThread = namedThread "SanityCheckerDaily" $ forever $ do +sanityCheckerDailyThread :: UrlRenderer -> NamedThread +sanityCheckerDailyThread urlrenderer = namedThread "SanityCheckerDaily" $ forever $ do waitForNextCheck debug ["starting sanity check"] @@ -90,7 +109,8 @@ sanityCheckerDailyThread = namedThread "SanityCheckerDaily" $ forever $ do modifyDaemonStatus_ $ \s -> s { sanityCheckRunning = True } now <- liftIO getPOSIXTime -- before check started - r <- either showerr return =<< (tryIO . batch) <~> dailyCheck + r <- either showerr return + =<< (tryIO . batch) <~> dailyCheck urlrenderer modifyDaemonStatus_ $ \s -> s { sanityCheckRunning = False @@ -119,8 +139,8 @@ waitForNextCheck = do {- It's important to stay out of the Annex monad as much as possible while - running potentially expensive parts of this check, since remaining in it - will block the watcher. -} -dailyCheck :: Assistant Bool -dailyCheck = do +dailyCheck :: UrlRenderer -> Assistant Bool +dailyCheck urlrenderer = do g <- liftAnnex gitRepo batchmaker <- liftIO getBatchCommandMaker @@ -147,6 +167,22 @@ dailyCheck = do , Param "--auto" ] g + {- Check if the unused files found last time have been dealt with. -} + checkOldUnused urlrenderer + + {- Run git-annex unused once per day. This is run as a separate + - process to stay out of the annex monad and so it can run as a + - batch job. -} + program <- liftIO readProgramFile + let (program', params') = batchmaker (program, [Param "unused"]) + void $ liftIO $ boolSystem program' params' + {- Invalidate unused keys cache, and queue transfers of all unused + - keys. -} + unused <- liftAnnex unusedKeys' + void $ liftAnnex $ setUnusedKeys unused + forM_ unused $ \k -> + queueTransfers "unused" Later k Nothing Upload + return True where toonew timestamp now = now < (realToFrac (timestamp + slop) :: POSIXTime) @@ -160,7 +196,8 @@ dailyCheck = do insanity $ "found unstaged symlink: " ++ file hourlyCheck :: Assistant () -hourlyCheck = checkLogSize 0 +hourlyCheck = do + checkLogSize 0 {- Rotate logs until log file size is < 1 mb. -} checkLogSize :: Int -> Assistant () @@ -185,3 +222,59 @@ oneHour = 60 * 60 oneDay :: Int oneDay = 24 * oneHour +{- If annex.expireunused is set, find any keys that have lingered unused + - for the specified duration, and remove them. + - + - Otherwise, check to see if unused keys are piling up, and let the user + - know. This uses heuristics: 1000 unused keys, or more unused keys + - than the remaining free disk space, or more than 1/10th the total + - disk space being unused keys all suggest a problem. -} +checkOldUnused :: UrlRenderer -> Assistant () +checkOldUnused urlrenderer = go =<< annexExpireUnused <$> liftAnnex Annex.getGitConfig + where + go (Just expireunused) = do + m <- liftAnnex $ readUnusedLog "" + now <- liftIO getPOSIXTime + let duration = durationToPOSIXTime expireunused + let oldkeys = M.keys $ M.filter (tooold now duration) m + forM_ oldkeys $ \k -> do + debug ["removing old unused key", key2file k] + liftAnnex $ do + removeAnnex k + logStatus k InfoMissing + go Nothing = maybe noop prompt + =<< toobig =<< liftAnnex (readUnusedLog "") + + tooold now duration (_, mt) = + maybe False (\t -> now - t >= duration) mt + + toobig m = do + let num = M.size m + let diskused = foldl' sumkeysize 0 (M.keys m) + df <- forpath getDiskFree + disksize <- forpath getDiskSize + return $ if moreused df diskused || tenthused disksize diskused + then Just $ roughSize storageUnits False diskused ++ + " is used by old files" + else if num > 1000 + then Just $ show num ++ " old files exist" + else Nothing + + moreused Nothing _ = False + moreused (Just df) used = df <= used + + tenthused Nothing _ = False + tenthused (Just disksize) used = used >= disksize `div` 10 + + sumkeysize s k = s + fromMaybe 0 (keySize k) + + forpath a = liftAnnex $ inRepo $ liftIO . a . Git.repoPath + + prompt msg = +#ifdef WITH_WEBAPP + do + button <- mkAlertButton True (T.pack "Fix This") urlrenderer ConfigUnusedR + void $ addAlert $ unusedFilesAlert [button] msg +#else + debug [msg] +#endif diff --git a/Assistant/Threads/WebApp.hs b/Assistant/Threads/WebApp.hs index a7f9cc5b2..d2c2afd47 100644 --- a/Assistant/Threads/WebApp.hs +++ b/Assistant/Threads/WebApp.hs @@ -27,6 +27,7 @@ import Assistant.WebApp.Configurators.IA import Assistant.WebApp.Configurators.WebDAV import Assistant.WebApp.Configurators.XMPP import Assistant.WebApp.Configurators.Preferences +import Assistant.WebApp.Configurators.Unused import Assistant.WebApp.Configurators.Edit import Assistant.WebApp.Configurators.Delete import Assistant.WebApp.Configurators.Fsck diff --git a/Assistant/TransferQueue.hs b/Assistant/TransferQueue.hs index 98fb2f06c..6d8e72852 100644 --- a/Assistant/TransferQueue.hs +++ b/Assistant/TransferQueue.hs @@ -1,6 +1,6 @@ {- git-annex assistant pending transfer queue - - - Copyright 2012 Joey Hess <joey@kitenet.net> + - Copyright 2012-2014 Joey Hess <joey@kitenet.net> - - Licensed under the GNU GPL version 3 or higher. -} diff --git a/Assistant/Types/Alert.hs b/Assistant/Types/Alert.hs index e6fbe86d3..19fe55e6e 100644 --- a/Assistant/Types/Alert.hs +++ b/Assistant/Types/Alert.hs @@ -32,6 +32,7 @@ data AlertName | SyncAlert | NotFsckedAlert | UpgradeAlert + | UnusedFilesAlert deriving (Eq) {- The first alert is the new alert, the second is an old alert. diff --git a/Logs/Unused.hs b/Logs/Unused.hs index 74f46b85e..cdfcd2165 100644 --- a/Logs/Unused.hs +++ b/Logs/Unused.hs @@ -21,6 +21,7 @@ module Logs.Unused ( readUnusedLog, readUnusedMap, unusedKeys, + setUnusedKeys, unusedKeys' ) where diff --git a/Types/GitConfig.hs b/Types/GitConfig.hs index e19fdc42f..f8e3be98e 100644 --- a/Types/GitConfig.hs +++ b/Types/GitConfig.hs @@ -1,6 +1,6 @@ {- git-annex configuration - - - Copyright 2012 Joey Hess <joey@kitenet.net> + - Copyright 2012-2014 Joey Hess <joey@kitenet.net> - - Licensed under the GNU GPL version 3 or higher. -} @@ -20,6 +20,7 @@ import Config.Cost import Types.Distribution import Types.Availability import Types.NumCopies +import Utility.HumanTime {- Main git-annex settings. Each setting corresponds to a git-config key - such as annex.foo -} @@ -46,6 +47,7 @@ data GitConfig = GitConfig , annexLargeFiles :: Maybe String , annexFsckNudge :: Bool , annexAutoUpgrade :: AutoUpgrade + , annexExpireUnused :: Maybe Duration , coreSymlinks :: Bool , gcryptId :: Maybe String } @@ -75,6 +77,7 @@ extractGitConfig r = GitConfig , annexLargeFiles = getmaybe (annex "largefiles") , annexFsckNudge = getbool (annex "fscknudge") True , annexAutoUpgrade = toAutoUpgrade $ getmaybe (annex "autoupgrade") + , annexExpireUnused = parseDuration =<< getmaybe (annex "expireunused") , coreSymlinks = getbool "core.symlinks" True , gcryptId = getmaybe "core.gcrypt-id" } diff --git a/Types/StandardGroups.hs b/Types/StandardGroups.hs index f89b4e424..434156a0e 100644 --- a/Types/StandardGroups.hs +++ b/Types/StandardGroups.hs @@ -75,7 +75,7 @@ associatedDirectory _ _ = Nothing {- See doc/preferred_content.mdwn for explanations of these expressions. -} preferredContent :: StandardGroup -> PreferredContentExpression preferredContent ClientGroup = lastResort $ - "(exclude=*/archive/* and exclude=archive/*) or (" ++ notArchived ++ ")" + "((exclude=*/archive/* and exclude=archive/*) or (" ++ notArchived ++ ")) and not unused" preferredContent TransferGroup = lastResort $ "not (inallgroup=client and copies=client:2) and (" ++ preferredContent ClientGroup ++ ")" preferredContent BackupGroup = "include=*" diff --git a/debian/changelog b/debian/changelog index 900d84984..420ae603b 100644 --- a/debian/changelog +++ b/debian/changelog @@ -18,6 +18,15 @@ git-annex (5.20140118) UNRELEASED; urgency=medium preferred content expressions. * Client, transfer, incremental backup, and archive repositories now want to get content that does not yet have enough copies. + * Client, transfer, and source repositories now do not want to retain + unused file contents. + * assistant: Checks daily for unused file contents, and when possible + moves them to a repository (such as a backup repository) that + wants to retain them. + * assistant: annex.expireunused can be configured to cause unused + file contents to be deleted after some period of time. + * webapp: Nudge user to see if they want to expire old unused file + contents when a lot of them seem to be piling up in the repository. * repair: Check git version at run time. * assistant: Run the periodic git gc in batch mode. diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index 279fa24dd..3e239f7d6 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -1228,6 +1228,19 @@ Here are all the supported configuration settings. to close it. On Mac OSX, when not using direct mode this defaults to 1 second, to work around a bad interaction with software there. +* `annex.expireunused` + + Controls what the assistant does about unused file contents + that are stored in the repository. + + The default is `false`, which causes + all old and unused file contents to be retained, unless the assistant + is able to move them to some other repository (such as a backup repository). + + Can be set to a time specification, like "7d" or "1m", and then + file contents that have been known to be unused for a week or a + month will be deleted. + * `annex.fscknudge` When set to false, prevents the webapp from reminding you when using diff --git a/doc/preferred_content.mdwn b/doc/preferred_content.mdwn index 039df3878..92250c034 100644 --- a/doc/preferred_content.mdwn +++ b/doc/preferred_content.mdwn @@ -111,9 +111,9 @@ any repository that can will back it up.) ### client All content is preferred, unless it's for a file in a "archive" directory, -which has reached an archive repository. +which has reached an archive repository, or is unused. -`((exclude=*/archive/* and exclude=archive/*) or (not (copies=archive:1 or copies=smallarchive:1))) or roughlylackingcopies=1` +`(((exclude=*/archive/* and exclude=archive/*) or (not (copies=archive:1 or copies=smallarchive:1))) or roughlylackingcopies=1) and not unused` ### transfer diff --git a/doc/todo/Limit_file_revision_history.mdwn b/doc/todo/Limit_file_revision_history.mdwn index c00b555b1..901d6f052 100644 --- a/doc/todo/Limit_file_revision_history.mdwn +++ b/doc/todo/Limit_file_revision_history.mdwn @@ -51,12 +51,13 @@ Finally, how to specify a feature request for git-annex? > `and (not unused)`. Transfer repositories too, because typically > only client repos connect to them, and so otherwise unused files > would build up there. Backup repos would want unused files. I -> think that archive repos would too. +> think that archive repos would too. **done** > * Make the assistant check for unused files periodically. Exactly > how often may need to be tuned, but once per day seems reasonable > for most repos. Note that the assistant could also notice on the > fly when files are removed and mark their keys as unused if that was > the last associated file. (Only currently possible in direct mode.) +> **done** > * After scanning for unused files, it makes sense for the > assistant to queue transfers of unused files to any remotes that > do want them (eg, backup remotes). If the files can successfully be @@ -75,7 +76,9 @@ Finally, how to specify a feature request for git-annex? > is not set, and there is some significant quantity of unused files > (eg, more than 1000, or more than 1 gb, or more than the amount of > remaining free disk space), -> it can pop up a webapp alert asking to configure it. +> it can pop up a webapp alert asking to configure it. **done** +> * Webapp interface to configure annex.expireunused. Reasonable values +> are no expiring, or any number of days. > > This does not cover every use case that was requested. > But I don't see a cheap way to ensure it keeps eg the past 10 versions of |