From cc366b8241cfc3e41252ecd2624332c15da03377 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 22 Jan 2014 15:33:02 -0400 Subject: add timestamps to unused log files This will be used in expiring old unused objects. The timestamp is when it was first noticed it was unused. Backwards compatability: It supports reading old format unused log files. The old version of git-annex will ignore lines in log files written by the new version, so the worst interop problem would be git annex dropunused not knowing some numbers that git-annex unused reported. --- Command/Unused.hs | 8 +++---- Logs/Unused.hs | 63 ++++++++++++++++++++++++++++++++++++++++++++++--------- Test.hs | 2 +- 3 files changed, 58 insertions(+), 15 deletions(-) diff --git a/Command/Unused.hs b/Command/Unused.hs index 19dc82007..59c5ec1aa 100644 --- a/Command/Unused.hs +++ b/Command/Unused.hs @@ -92,7 +92,7 @@ check file msg a c = do l <- a let unusedlist = number c l unless (null l) $ showLongNote $ msg unusedlist - writeUnusedLog file unusedlist + updateUnusedLog file $ M.fromList unusedlist return $ c + length l number :: Int -> [a] -> [(Int, a)] @@ -328,9 +328,9 @@ data UnusedMaps = UnusedMaps withUnusedMaps :: (UnusedMaps -> Int -> CommandStart) -> CommandSeek withUnusedMaps a params = do - unused <- readUnusedLog "" - unusedbad <- readUnusedLog "bad" - unusedtmp <- readUnusedLog "tmp" + unused <- readUnusedMap "" + unusedbad <- readUnusedMap "bad" + unusedtmp <- readUnusedMap "tmp" let m = unused `M.union` unusedbad `M.union` unusedtmp let unusedmaps = UnusedMaps unused unusedbad unusedtmp seekActions $ return $ map (a unusedmaps) $ diff --git a/Logs/Unused.hs b/Logs/Unused.hs index 4de5bc17a..daeeb93ab 100644 --- a/Logs/Unused.hs +++ b/Logs/Unused.hs @@ -1,32 +1,71 @@ {- git-annex unused log file - - - Copyright 2010,2012 Joey Hess + - This file is stored locally in .git/annex/, not in the git-annex branch. + - + - The format: "int key timestamp" + - + - The int is a short, stable identifier that the user can use to + - refer to this key. (Equivilant to a filename.) + - + - The timestamp indicates when the key was first determined to be unused. + - Older versions of the log omit the timestamp. + - + - Copyright 2010-2014 Joey Hess - - Licensed under the GNU GPL version 3 or higher. -} module Logs.Unused ( UnusedMap, - writeUnusedLog, + updateUnusedLog, readUnusedLog, + readUnusedMap, unusedKeys, ) where import qualified Data.Map as M +import Data.Time.Clock.POSIX +import Data.Time +import System.Locale import Common.Annex import Types.Key import Utility.Tmp +-- everything that is stored in the unused log +type UnusedLog = M.Map Key (Int, Maybe POSIXTime) + +-- used to look up unused keys specified by the user type UnusedMap = M.Map Int Key -writeUnusedLog :: FilePath -> [(Int, Key)] -> Annex () +log2map :: UnusedLog -> UnusedMap +log2map = M.fromList . map (\(k, (i, _t)) -> (i, k)) . M.toList + +map2log :: POSIXTime -> UnusedMap -> UnusedLog +map2log t = M.fromList . map (\(i, k) -> (k, (i, Just t))) . M.toList + +{- Only keeps keys that are in the new log, but uses any timestamps + - those keys had in the old log. -} +preserveTimestamps :: UnusedLog -> UnusedLog -> UnusedLog +preserveTimestamps oldl newl = M.intersection (M.unionWith oldts oldl newl) newl + where + oldts _old@(_, ts) _new@(int, _) = (int, ts) + +updateUnusedLog :: FilePath -> UnusedMap -> Annex () +updateUnusedLog prefix m = do + oldl <- readUnusedLog prefix + newl <- preserveTimestamps oldl . flip map2log m <$> liftIO getPOSIXTime + writeUnusedLog prefix newl + +writeUnusedLog :: FilePath -> UnusedLog -> Annex () writeUnusedLog prefix l = do logfile <- fromRepo $ gitAnnexUnusedLog prefix - liftIO $ viaTmp writeFile logfile $ - unlines $ map (\(n, k) -> show n ++ " " ++ key2file k) l + liftIO $ viaTmp writeFile logfile $ unlines $ map format $ M.toList l + where + format (k, (i, Just t)) = show i ++ " " ++ key2file k ++ " " ++ show t + format (k, (i, Nothing)) = show i ++ " " ++ key2file k -readUnusedLog :: FilePath -> Annex UnusedMap +readUnusedLog :: FilePath -> Annex UnusedLog readUnusedLog prefix = do f <- fromRepo $ gitAnnexUnusedLog prefix ifM (liftIO $ doesFileExist f) @@ -35,11 +74,15 @@ readUnusedLog prefix = do , return M.empty ) where - parse line = case (readish tag, file2key rest) of - (Just num, Just key) -> Just (num, key) + parse line = case (readish sint, file2key skey, utcTimeToPOSIXSeconds <$> parseTime defaultTimeLocale "%s%Qs" ts) of + (Just int, Just key, mtimestamp) -> Just (key, (int, mtimestamp)) _ -> Nothing where - (tag, rest) = separate (== ' ') line + (sint, rest) = separate (== ' ') line + (skey, ts) = separate (== ' ') rest + +readUnusedMap :: FilePath -> Annex UnusedMap +readUnusedMap = log2map <$$> readUnusedLog unusedKeys :: Annex [Key] -unusedKeys = M.elems <$> readUnusedLog "" +unusedKeys = M.keys <$> readUnusedLog "" diff --git a/Test.hs b/Test.hs index 962fc9d7f..d38efea04 100644 --- a/Test.hs +++ b/Test.hs @@ -683,7 +683,7 @@ test_unused env = intmpclonerepoInDirect env $ do where checkunused expectedkeys desc = do git_annex env "unused" [] @? "unused failed" - unusedmap <- annexeval $ Logs.Unused.readUnusedLog "" + unusedmap <- annexeval $ Logs.Unused.readUnusedMap "" let unusedkeys = M.elems unusedmap assertEqual ("unused keys differ " ++ desc) (sort expectedkeys) (sort unusedkeys) -- cgit v1.2.3