summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Joey Hess <joey@kitenet.net>2014-01-22 15:33:02 -0400
committerGravatar Joey Hess <joey@kitenet.net>2014-01-22 15:33:02 -0400
commitcc366b8241cfc3e41252ecd2624332c15da03377 (patch)
tree78d254a03b1298ff3ae9c8c77dad04b049ce1871
parentf78a0a2b11591aca0885213e0b74cddff4d6ae57 (diff)
add timestamps to unused log files
This will be used in expiring old unused objects. The timestamp is when it was first noticed it was unused. Backwards compatability: It supports reading old format unused log files. The old version of git-annex will ignore lines in log files written by the new version, so the worst interop problem would be git annex dropunused not knowing some numbers that git-annex unused reported.
-rw-r--r--Command/Unused.hs8
-rw-r--r--Logs/Unused.hs63
-rw-r--r--Test.hs2
3 files changed, 58 insertions, 15 deletions
diff --git a/Command/Unused.hs b/Command/Unused.hs
index 19dc82007..59c5ec1aa 100644
--- a/Command/Unused.hs
+++ b/Command/Unused.hs
@@ -92,7 +92,7 @@ check file msg a c = do
l <- a
let unusedlist = number c l
unless (null l) $ showLongNote $ msg unusedlist
- writeUnusedLog file unusedlist
+ updateUnusedLog file $ M.fromList unusedlist
return $ c + length l
number :: Int -> [a] -> [(Int, a)]
@@ -328,9 +328,9 @@ data UnusedMaps = UnusedMaps
withUnusedMaps :: (UnusedMaps -> Int -> CommandStart) -> CommandSeek
withUnusedMaps a params = do
- unused <- readUnusedLog ""
- unusedbad <- readUnusedLog "bad"
- unusedtmp <- readUnusedLog "tmp"
+ unused <- readUnusedMap ""
+ unusedbad <- readUnusedMap "bad"
+ unusedtmp <- readUnusedMap "tmp"
let m = unused `M.union` unusedbad `M.union` unusedtmp
let unusedmaps = UnusedMaps unused unusedbad unusedtmp
seekActions $ return $ map (a unusedmaps) $
diff --git a/Logs/Unused.hs b/Logs/Unused.hs
index 4de5bc17a..daeeb93ab 100644
--- a/Logs/Unused.hs
+++ b/Logs/Unused.hs
@@ -1,32 +1,71 @@
{- git-annex unused log file
-
- - Copyright 2010,2012 Joey Hess <joey@kitenet.net>
+ - This file is stored locally in .git/annex/, not in the git-annex branch.
+ -
+ - The format: "int key timestamp"
+ -
+ - The int is a short, stable identifier that the user can use to
+ - refer to this key. (Equivilant to a filename.)
+ -
+ - The timestamp indicates when the key was first determined to be unused.
+ - Older versions of the log omit the timestamp.
+ -
+ - Copyright 2010-2014 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Logs.Unused (
UnusedMap,
- writeUnusedLog,
+ updateUnusedLog,
readUnusedLog,
+ readUnusedMap,
unusedKeys,
) where
import qualified Data.Map as M
+import Data.Time.Clock.POSIX
+import Data.Time
+import System.Locale
import Common.Annex
import Types.Key
import Utility.Tmp
+-- everything that is stored in the unused log
+type UnusedLog = M.Map Key (Int, Maybe POSIXTime)
+
+-- used to look up unused keys specified by the user
type UnusedMap = M.Map Int Key
-writeUnusedLog :: FilePath -> [(Int, Key)] -> Annex ()
+log2map :: UnusedLog -> UnusedMap
+log2map = M.fromList . map (\(k, (i, _t)) -> (i, k)) . M.toList
+
+map2log :: POSIXTime -> UnusedMap -> UnusedLog
+map2log t = M.fromList . map (\(i, k) -> (k, (i, Just t))) . M.toList
+
+{- Only keeps keys that are in the new log, but uses any timestamps
+ - those keys had in the old log. -}
+preserveTimestamps :: UnusedLog -> UnusedLog -> UnusedLog
+preserveTimestamps oldl newl = M.intersection (M.unionWith oldts oldl newl) newl
+ where
+ oldts _old@(_, ts) _new@(int, _) = (int, ts)
+
+updateUnusedLog :: FilePath -> UnusedMap -> Annex ()
+updateUnusedLog prefix m = do
+ oldl <- readUnusedLog prefix
+ newl <- preserveTimestamps oldl . flip map2log m <$> liftIO getPOSIXTime
+ writeUnusedLog prefix newl
+
+writeUnusedLog :: FilePath -> UnusedLog -> Annex ()
writeUnusedLog prefix l = do
logfile <- fromRepo $ gitAnnexUnusedLog prefix
- liftIO $ viaTmp writeFile logfile $
- unlines $ map (\(n, k) -> show n ++ " " ++ key2file k) l
+ liftIO $ viaTmp writeFile logfile $ unlines $ map format $ M.toList l
+ where
+ format (k, (i, Just t)) = show i ++ " " ++ key2file k ++ " " ++ show t
+ format (k, (i, Nothing)) = show i ++ " " ++ key2file k
-readUnusedLog :: FilePath -> Annex UnusedMap
+readUnusedLog :: FilePath -> Annex UnusedLog
readUnusedLog prefix = do
f <- fromRepo $ gitAnnexUnusedLog prefix
ifM (liftIO $ doesFileExist f)
@@ -35,11 +74,15 @@ readUnusedLog prefix = do
, return M.empty
)
where
- parse line = case (readish tag, file2key rest) of
- (Just num, Just key) -> Just (num, key)
+ parse line = case (readish sint, file2key skey, utcTimeToPOSIXSeconds <$> parseTime defaultTimeLocale "%s%Qs" ts) of
+ (Just int, Just key, mtimestamp) -> Just (key, (int, mtimestamp))
_ -> Nothing
where
- (tag, rest) = separate (== ' ') line
+ (sint, rest) = separate (== ' ') line
+ (skey, ts) = separate (== ' ') rest
+
+readUnusedMap :: FilePath -> Annex UnusedMap
+readUnusedMap = log2map <$$> readUnusedLog
unusedKeys :: Annex [Key]
-unusedKeys = M.elems <$> readUnusedLog ""
+unusedKeys = M.keys <$> readUnusedLog ""
diff --git a/Test.hs b/Test.hs
index 962fc9d7f..d38efea04 100644
--- a/Test.hs
+++ b/Test.hs
@@ -683,7 +683,7 @@ test_unused env = intmpclonerepoInDirect env $ do
where
checkunused expectedkeys desc = do
git_annex env "unused" [] @? "unused failed"
- unusedmap <- annexeval $ Logs.Unused.readUnusedLog ""
+ unusedmap <- annexeval $ Logs.Unused.readUnusedMap ""
let unusedkeys = M.elems unusedmap
assertEqual ("unused keys differ " ++ desc)
(sort expectedkeys) (sort unusedkeys)