summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Assistant.hs2
-rw-r--r--Assistant/Alert.hs19
-rw-r--r--Assistant/Threads/SanityChecker.hs105
-rw-r--r--Assistant/Threads/WebApp.hs1
-rw-r--r--Assistant/TransferQueue.hs2
-rw-r--r--Assistant/Types/Alert.hs1
-rw-r--r--Logs/Unused.hs1
-rw-r--r--Types/GitConfig.hs5
-rw-r--r--Types/StandardGroups.hs2
-rw-r--r--debian/changelog9
-rw-r--r--doc/git-annex.mdwn13
-rw-r--r--doc/preferred_content.mdwn4
-rw-r--r--doc/todo/Limit_file_revision_history.mdwn7
13 files changed, 157 insertions, 14 deletions
diff --git a/Assistant.hs b/Assistant.hs
index d4786f99a..800a3ef78 100644
--- a/Assistant.hs
+++ b/Assistant.hs
@@ -145,7 +145,7 @@ startDaemon assistant foreground startdelay cannotrun listenhost startbrowser =
, assist $ transferPollerThread
, assist $ transfererThread
, assist $ daemonStatusThread
- , assist $ sanityCheckerDailyThread
+ , assist $ sanityCheckerDailyThread urlrenderer
, assist $ sanityCheckerHourlyThread
, assist $ problemFixerThread urlrenderer
#ifdef WITH_CLIBS
diff --git a/Assistant/Alert.hs b/Assistant/Alert.hs
index 055e66de5..b5ee706b8 100644
--- a/Assistant/Alert.hs
+++ b/Assistant/Alert.hs
@@ -260,6 +260,25 @@ upgradeFailedAlert :: String -> Alert
upgradeFailedAlert msg = (errorAlert msg [])
{ alertHeader = Just $ fromString "Upgrade failed." }
+unusedFilesAlert :: [AlertButton] -> String -> Alert
+unusedFilesAlert buttons message = Alert
+ { alertHeader = Just $ fromString $ unwords
+ [ "Old and deleted files are piling up --"
+ , message
+ ]
+ , alertIcon = Just InfoIcon
+ , alertPriority = High
+ , alertButtons = buttons
+ , alertClosable = True
+ , alertClass = Message
+ , alertMessageRender = renderData
+ , alertCounter = 0
+ , alertBlockDisplay = True
+ , alertName = Just UnusedFilesAlert
+ , alertCombiner = Just $ fullCombiner $ \new _old -> new
+ , alertData = []
+ }
+
brokenRepositoryAlert :: [AlertButton] -> Alert
brokenRepositoryAlert = errorAlert "Serious problems have been detected with your repository. This needs your immediate attention!"
diff --git a/Assistant/Threads/SanityChecker.hs b/Assistant/Threads/SanityChecker.hs
index a1be8dffc..07d46ff84 100644
--- a/Assistant/Threads/SanityChecker.hs
+++ b/Assistant/Threads/SanityChecker.hs
@@ -5,6 +5,8 @@
- Licensed under the GNU GPL version 3 or higher.
-}
+{-# LANGUAGE CPP #-}
+
module Assistant.Threads.SanityChecker (
sanityCheckerStartupThread,
sanityCheckerDailyThread,
@@ -16,7 +18,10 @@ import Assistant.DaemonStatus
import Assistant.Alert
import Assistant.Repair
import Assistant.Ssh
+import Assistant.TransferQueue
+import Assistant.Types.UrlRenderer
import qualified Annex.Branch
+import qualified Git
import qualified Git.LsFiles
import qualified Git.Command
import qualified Git.Config
@@ -25,12 +30,26 @@ import qualified Assistant.Threads.Watcher as Watcher
import Utility.LogFile
import Utility.Batch
import Utility.NotificationBroadcaster
+import Utility.DiskFree
import Config
import Utility.HumanTime
+import Utility.DataUnits
import Git.Repair
import Git.Index
+import Logs.Unused
+import Logs.Location
+import Logs.Transfer
+import Annex.Content
+import Config.Files
+import Types.Key
+import qualified Annex
+#ifdef WITH_WEBAPP
+import Assistant.WebApp.Types
+#endif
import Data.Time.Clock.POSIX
+import qualified Data.Map as M
+import qualified Data.Text as T
{- This thread runs once at startup, and most other threads wait for it
- to finish. (However, the webapp thread does not, to prevent the UI
@@ -78,8 +97,8 @@ sanityCheckerHourlyThread = namedThread "SanityCheckerHourly" $ forever $ do
hourlyCheck
{- This thread wakes up daily to make sure the tree is in good shape. -}
-sanityCheckerDailyThread :: NamedThread
-sanityCheckerDailyThread = namedThread "SanityCheckerDaily" $ forever $ do
+sanityCheckerDailyThread :: UrlRenderer -> NamedThread
+sanityCheckerDailyThread urlrenderer = namedThread "SanityCheckerDaily" $ forever $ do
waitForNextCheck
debug ["starting sanity check"]
@@ -90,7 +109,8 @@ sanityCheckerDailyThread = namedThread "SanityCheckerDaily" $ forever $ do
modifyDaemonStatus_ $ \s -> s { sanityCheckRunning = True }
now <- liftIO getPOSIXTime -- before check started
- r <- either showerr return =<< (tryIO . batch) <~> dailyCheck
+ r <- either showerr return
+ =<< (tryIO . batch) <~> dailyCheck urlrenderer
modifyDaemonStatus_ $ \s -> s
{ sanityCheckRunning = False
@@ -119,8 +139,8 @@ waitForNextCheck = do
{- It's important to stay out of the Annex monad as much as possible while
- running potentially expensive parts of this check, since remaining in it
- will block the watcher. -}
-dailyCheck :: Assistant Bool
-dailyCheck = do
+dailyCheck :: UrlRenderer -> Assistant Bool
+dailyCheck urlrenderer = do
g <- liftAnnex gitRepo
batchmaker <- liftIO getBatchCommandMaker
@@ -147,6 +167,22 @@ dailyCheck = do
, Param "--auto"
] g
+ {- Check if the unused files found last time have been dealt with. -}
+ checkOldUnused urlrenderer
+
+ {- Run git-annex unused once per day. This is run as a separate
+ - process to stay out of the annex monad and so it can run as a
+ - batch job. -}
+ program <- liftIO readProgramFile
+ let (program', params') = batchmaker (program, [Param "unused"])
+ void $ liftIO $ boolSystem program' params'
+ {- Invalidate unused keys cache, and queue transfers of all unused
+ - keys. -}
+ unused <- liftAnnex unusedKeys'
+ void $ liftAnnex $ setUnusedKeys unused
+ forM_ unused $ \k ->
+ queueTransfers "unused" Later k Nothing Upload
+
return True
where
toonew timestamp now = now < (realToFrac (timestamp + slop) :: POSIXTime)
@@ -160,7 +196,8 @@ dailyCheck = do
insanity $ "found unstaged symlink: " ++ file
hourlyCheck :: Assistant ()
-hourlyCheck = checkLogSize 0
+hourlyCheck = do
+ checkLogSize 0
{- Rotate logs until log file size is < 1 mb. -}
checkLogSize :: Int -> Assistant ()
@@ -185,3 +222,59 @@ oneHour = 60 * 60
oneDay :: Int
oneDay = 24 * oneHour
+{- If annex.expireunused is set, find any keys that have lingered unused
+ - for the specified duration, and remove them.
+ -
+ - Otherwise, check to see if unused keys are piling up, and let the user
+ - know. This uses heuristics: 1000 unused keys, or more unused keys
+ - than the remaining free disk space, or more than 1/10th the total
+ - disk space being unused keys all suggest a problem. -}
+checkOldUnused :: UrlRenderer -> Assistant ()
+checkOldUnused urlrenderer = go =<< annexExpireUnused <$> liftAnnex Annex.getGitConfig
+ where
+ go (Just expireunused) = do
+ m <- liftAnnex $ readUnusedLog ""
+ now <- liftIO getPOSIXTime
+ let duration = durationToPOSIXTime expireunused
+ let oldkeys = M.keys $ M.filter (tooold now duration) m
+ forM_ oldkeys $ \k -> do
+ debug ["removing old unused key", key2file k]
+ liftAnnex $ do
+ removeAnnex k
+ logStatus k InfoMissing
+ go Nothing = maybe noop prompt
+ =<< toobig =<< liftAnnex (readUnusedLog "")
+
+ tooold now duration (_, mt) =
+ maybe False (\t -> now - t >= duration) mt
+
+ toobig m = do
+ let num = M.size m
+ let diskused = foldl' sumkeysize 0 (M.keys m)
+ df <- forpath getDiskFree
+ disksize <- forpath getDiskSize
+ return $ if moreused df diskused || tenthused disksize diskused
+ then Just $ roughSize storageUnits False diskused ++
+ " is used by old files"
+ else if num > 1000
+ then Just $ show num ++ " old files exist"
+ else Nothing
+
+ moreused Nothing _ = False
+ moreused (Just df) used = df <= used
+
+ tenthused Nothing _ = False
+ tenthused (Just disksize) used = used >= disksize `div` 10
+
+ sumkeysize s k = s + fromMaybe 0 (keySize k)
+
+ forpath a = liftAnnex $ inRepo $ liftIO . a . Git.repoPath
+
+ prompt msg =
+#ifdef WITH_WEBAPP
+ do
+ button <- mkAlertButton True (T.pack "Fix This") urlrenderer ConfigUnusedR
+ void $ addAlert $ unusedFilesAlert [button] msg
+#else
+ debug [msg]
+#endif
diff --git a/Assistant/Threads/WebApp.hs b/Assistant/Threads/WebApp.hs
index a7f9cc5b2..d2c2afd47 100644
--- a/Assistant/Threads/WebApp.hs
+++ b/Assistant/Threads/WebApp.hs
@@ -27,6 +27,7 @@ import Assistant.WebApp.Configurators.IA
import Assistant.WebApp.Configurators.WebDAV
import Assistant.WebApp.Configurators.XMPP
import Assistant.WebApp.Configurators.Preferences
+import Assistant.WebApp.Configurators.Unused
import Assistant.WebApp.Configurators.Edit
import Assistant.WebApp.Configurators.Delete
import Assistant.WebApp.Configurators.Fsck
diff --git a/Assistant/TransferQueue.hs b/Assistant/TransferQueue.hs
index 98fb2f06c..6d8e72852 100644
--- a/Assistant/TransferQueue.hs
+++ b/Assistant/TransferQueue.hs
@@ -1,6 +1,6 @@
{- git-annex assistant pending transfer queue
-
- - Copyright 2012 Joey Hess <joey@kitenet.net>
+ - Copyright 2012-2014 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
diff --git a/Assistant/Types/Alert.hs b/Assistant/Types/Alert.hs
index e6fbe86d3..19fe55e6e 100644
--- a/Assistant/Types/Alert.hs
+++ b/Assistant/Types/Alert.hs
@@ -32,6 +32,7 @@ data AlertName
| SyncAlert
| NotFsckedAlert
| UpgradeAlert
+ | UnusedFilesAlert
deriving (Eq)
{- The first alert is the new alert, the second is an old alert.
diff --git a/Logs/Unused.hs b/Logs/Unused.hs
index 74f46b85e..cdfcd2165 100644
--- a/Logs/Unused.hs
+++ b/Logs/Unused.hs
@@ -21,6 +21,7 @@ module Logs.Unused (
readUnusedLog,
readUnusedMap,
unusedKeys,
+ setUnusedKeys,
unusedKeys'
) where
diff --git a/Types/GitConfig.hs b/Types/GitConfig.hs
index e19fdc42f..f8e3be98e 100644
--- a/Types/GitConfig.hs
+++ b/Types/GitConfig.hs
@@ -1,6 +1,6 @@
{- git-annex configuration
-
- - Copyright 2012 Joey Hess <joey@kitenet.net>
+ - Copyright 2012-2014 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@@ -20,6 +20,7 @@ import Config.Cost
import Types.Distribution
import Types.Availability
import Types.NumCopies
+import Utility.HumanTime
{- Main git-annex settings. Each setting corresponds to a git-config key
- such as annex.foo -}
@@ -46,6 +47,7 @@ data GitConfig = GitConfig
, annexLargeFiles :: Maybe String
, annexFsckNudge :: Bool
, annexAutoUpgrade :: AutoUpgrade
+ , annexExpireUnused :: Maybe Duration
, coreSymlinks :: Bool
, gcryptId :: Maybe String
}
@@ -75,6 +77,7 @@ extractGitConfig r = GitConfig
, annexLargeFiles = getmaybe (annex "largefiles")
, annexFsckNudge = getbool (annex "fscknudge") True
, annexAutoUpgrade = toAutoUpgrade $ getmaybe (annex "autoupgrade")
+ , annexExpireUnused = parseDuration =<< getmaybe (annex "expireunused")
, coreSymlinks = getbool "core.symlinks" True
, gcryptId = getmaybe "core.gcrypt-id"
}
diff --git a/Types/StandardGroups.hs b/Types/StandardGroups.hs
index f89b4e424..434156a0e 100644
--- a/Types/StandardGroups.hs
+++ b/Types/StandardGroups.hs
@@ -75,7 +75,7 @@ associatedDirectory _ _ = Nothing
{- See doc/preferred_content.mdwn for explanations of these expressions. -}
preferredContent :: StandardGroup -> PreferredContentExpression
preferredContent ClientGroup = lastResort $
- "(exclude=*/archive/* and exclude=archive/*) or (" ++ notArchived ++ ")"
+ "((exclude=*/archive/* and exclude=archive/*) or (" ++ notArchived ++ ")) and not unused"
preferredContent TransferGroup = lastResort $
"not (inallgroup=client and copies=client:2) and (" ++ preferredContent ClientGroup ++ ")"
preferredContent BackupGroup = "include=*"
diff --git a/debian/changelog b/debian/changelog
index 900d84984..420ae603b 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -18,6 +18,15 @@ git-annex (5.20140118) UNRELEASED; urgency=medium
preferred content expressions.
* Client, transfer, incremental backup, and archive repositories
now want to get content that does not yet have enough copies.
+ * Client, transfer, and source repositories now do not want to retain
+ unused file contents.
+ * assistant: Checks daily for unused file contents, and when possible
+ moves them to a repository (such as a backup repository) that
+ wants to retain them.
+ * assistant: annex.expireunused can be configured to cause unused
+ file contents to be deleted after some period of time.
+ * webapp: Nudge user to see if they want to expire old unused file
+ contents when a lot of them seem to be piling up in the repository.
* repair: Check git version at run time.
* assistant: Run the periodic git gc in batch mode.
diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn
index 279fa24dd..3e239f7d6 100644
--- a/doc/git-annex.mdwn
+++ b/doc/git-annex.mdwn
@@ -1228,6 +1228,19 @@ Here are all the supported configuration settings.
to close it. On Mac OSX, when not using direct mode this defaults to
1 second, to work around a bad interaction with software there.
+* `annex.expireunused`
+
+ Controls what the assistant does about unused file contents
+ that are stored in the repository.
+
+ The default is `false`, which causes
+ all old and unused file contents to be retained, unless the assistant
+ is able to move them to some other repository (such as a backup repository).
+
+ Can be set to a time specification, like "7d" or "1m", and then
+ file contents that have been known to be unused for a week or a
+ month will be deleted.
+
* `annex.fscknudge`
When set to false, prevents the webapp from reminding you when using
diff --git a/doc/preferred_content.mdwn b/doc/preferred_content.mdwn
index 039df3878..92250c034 100644
--- a/doc/preferred_content.mdwn
+++ b/doc/preferred_content.mdwn
@@ -111,9 +111,9 @@ any repository that can will back it up.)
### client
All content is preferred, unless it's for a file in a "archive" directory,
-which has reached an archive repository.
+which has reached an archive repository, or is unused.
-`((exclude=*/archive/* and exclude=archive/*) or (not (copies=archive:1 or copies=smallarchive:1))) or roughlylackingcopies=1`
+`(((exclude=*/archive/* and exclude=archive/*) or (not (copies=archive:1 or copies=smallarchive:1))) or roughlylackingcopies=1) and not unused`
### transfer
diff --git a/doc/todo/Limit_file_revision_history.mdwn b/doc/todo/Limit_file_revision_history.mdwn
index c00b555b1..901d6f052 100644
--- a/doc/todo/Limit_file_revision_history.mdwn
+++ b/doc/todo/Limit_file_revision_history.mdwn
@@ -51,12 +51,13 @@ Finally, how to specify a feature request for git-annex?
> `and (not unused)`. Transfer repositories too, because typically
> only client repos connect to them, and so otherwise unused files
> would build up there. Backup repos would want unused files. I
-> think that archive repos would too.
+> think that archive repos would too. **done**
> * Make the assistant check for unused files periodically. Exactly
> how often may need to be tuned, but once per day seems reasonable
> for most repos. Note that the assistant could also notice on the
> fly when files are removed and mark their keys as unused if that was
> the last associated file. (Only currently possible in direct mode.)
+> **done**
> * After scanning for unused files, it makes sense for the
> assistant to queue transfers of unused files to any remotes that
> do want them (eg, backup remotes). If the files can successfully be
@@ -75,7 +76,9 @@ Finally, how to specify a feature request for git-annex?
> is not set, and there is some significant quantity of unused files
> (eg, more than 1000, or more than 1 gb, or more than the amount of
> remaining free disk space),
-> it can pop up a webapp alert asking to configure it.
+> it can pop up a webapp alert asking to configure it. **done**
+> * Webapp interface to configure annex.expireunused. Reasonable values
+> are no expiring, or any number of days.
>
> This does not cover every use case that was requested.
> But I don't see a cheap way to ensure it keeps eg the past 10 versions of