diff options
author | Joey Hess <joey@kitenet.net> | 2014-07-24 16:23:36 -0400 |
---|---|---|
committer | Joey Hess <joey@kitenet.net> | 2014-07-24 16:23:36 -0400 |
commit | 2b414feaf2d20452794d0cdd608c6dd91feb1ec1 (patch) | |
tree | 16f4caafc5158227eda6b36462b93a9c3f35173f | |
parent | 35b31b00e4efbf84bcfb814acc477bbb89b50107 (diff) |
implement chunk logs
Slightly tricky as they are not normal UUIDBased logs, but are instead maps
from (uuid, chunksize) to chunkcount.
This commit was sponsored by Frank Thomas.
-rw-r--r-- | Annex/Branch/Transitions.hs | 15 | ||||
-rw-r--r-- | Logs.hs | 52 | ||||
-rw-r--r-- | Logs/Chunk.hs | 44 | ||||
-rw-r--r-- | Logs/Chunk/Pure.hs | 32 | ||||
-rw-r--r-- | Logs/UUIDBased.hs | 2 | ||||
-rw-r--r-- | Remote/Helper/Chunked.hs | 3 | ||||
-rw-r--r-- | doc/design/assistant/chunks.mdwn | 15 | ||||
-rw-r--r-- | doc/internals.mdwn | 10 |
8 files changed, 134 insertions, 39 deletions
diff --git a/Annex/Branch/Transitions.hs b/Annex/Branch/Transitions.hs index 5c2c14548..4c39f198f 100644 --- a/Annex/Branch/Transitions.hs +++ b/Annex/Branch/Transitions.hs @@ -12,8 +12,9 @@ module Annex.Branch.Transitions ( import Logs import Logs.Transitions -import Logs.UUIDBased as UUIDBased -import Logs.Presence.Pure as Presence +import qualified Logs.UUIDBased as UUIDBased +import qualified Logs.Presence.Pure as Presence +import qualified Logs.Chunk.Pure as Chunk import Types.TrustLevel import Types.UUID @@ -37,9 +38,11 @@ dropDead f content trustmap = case getLogVariety f of -- because git remotes may still exist, and they need -- to still know it's dead. | f == trustLog -> PreserveFile - | otherwise -> ChangeFile $ UUIDBased.showLog id $ dropDeadFromUUIDBasedLog trustmap $ UUIDBased.parseLog Just content + | otherwise -> ChangeFile $ UUIDBased.showLog id $ dropDeadFromMapLog trustmap id $ UUIDBased.parseLog Just content Just NewUUIDBasedLog -> ChangeFile $ - UUIDBased.showLogNew id $ dropDeadFromUUIDBasedLog trustmap $ UUIDBased.parseLogNew Just content + UUIDBased.showLogNew id $ dropDeadFromMapLog trustmap id $ UUIDBased.parseLogNew Just content + Just (ChunkLog _) -> ChangeFile $ + Chunk.showLog $ dropDeadFromMapLog trustmap fst $ Chunk.parseLog content Just (PresenceLog _) -> let newlog = Presence.compactLog $ dropDeadFromPresenceLog trustmap $ Presence.parseLog content in if null newlog @@ -48,8 +51,8 @@ dropDead f content trustmap = case getLogVariety f of Just OtherLog -> PreserveFile Nothing -> PreserveFile -dropDeadFromUUIDBasedLog :: TrustMap -> UUIDBased.Log String -> UUIDBased.Log String -dropDeadFromUUIDBasedLog trustmap = M.filterWithKey $ notDead trustmap . const +dropDeadFromMapLog :: TrustMap -> (k -> UUID) -> M.Map k v -> M.Map k v +dropDeadFromMapLog trustmap getuuid = M.filterWithKey $ \k _v -> notDead trustmap getuuid k {- Presence logs can contain UUIDs or other values. Any line that matches - a dead uuid is dropped; any other values are passed through. -} @@ -14,6 +14,7 @@ import Types.Key data LogVariety = UUIDBasedLog | NewUUIDBasedLog + | ChunkLog Key | PresenceLog Key | OtherLog deriving (Show) @@ -24,6 +25,7 @@ getLogVariety :: FilePath -> Maybe LogVariety getLogVariety f | f `elem` topLevelUUIDBasedLogs = Just UUIDBasedLog | isRemoteStateLog f = Just NewUUIDBasedLog + | isChunkLog f = ChunkLog <$> chunkLogFileKey f | isMetaDataLog f || f `elem` otherLogs = Just OtherLog | otherwise = PresenceLog <$> firstJust (presenceLogs f) @@ -133,6 +135,25 @@ remoteStateLogExt = ".log.rmt" isRemoteStateLog :: FilePath -> Bool isRemoteStateLog path = remoteStateLogExt `isSuffixOf` path +{- The filename of the chunk log for a given key. -} +chunkLogFile :: Key -> FilePath +chunkLogFile key = hashDirLower key </> keyFile key ++ chunkLogExt + +chunkLogFileKey :: FilePath -> Maybe Key +chunkLogFileKey path + | ext == chunkLogExt = fileKey base + | otherwise = Nothing + where + file = takeFileName path + (base, ext) = splitAt (length file - extlen) file + extlen = length chunkLogExt + +chunkLogExt :: String +chunkLogExt = ".log.cnk" + +isChunkLog :: FilePath -> Bool +isChunkLog path = chunkLogExt `isSuffixOf` path + {- The filename of the metadata log for a given key. -} metaDataLogFile :: Key -> FilePath metaDataLogFile key = hashDirLower key </> keyFile key ++ metaDataLogExt @@ -146,20 +167,23 @@ isMetaDataLog path = metaDataLogExt `isSuffixOf` path prop_logs_sane :: Key -> Bool prop_logs_sane dummykey = and [ isNothing (getLogVariety "unknown") - , expect isUUIDBasedLog (getLogVariety uuidLog) - , expect isPresenceLog (getLogVariety $ locationLogFile dummykey) - , expect isPresenceLog (getLogVariety $ urlLogFile dummykey) - , expect isNewUUIDBasedLog (getLogVariety $ remoteStateLogFile dummykey) - , expect isOtherLog (getLogVariety $ metaDataLogFile dummykey) - , expect isOtherLog (getLogVariety $ numcopiesLog) + , expect gotUUIDBasedLog (getLogVariety uuidLog) + , expect gotPresenceLog (getLogVariety $ locationLogFile dummykey) + , expect gotPresenceLog (getLogVariety $ urlLogFile dummykey) + , expect gotNewUUIDBasedLog (getLogVariety $ remoteStateLogFile dummykey) + , expect gotChunkLog (getLogVariety $ chunkLogFile dummykey) + , expect gotOtherLog (getLogVariety $ metaDataLogFile dummykey) + , expect gotOtherLog (getLogVariety $ numcopiesLog) ] where expect = maybe False - isUUIDBasedLog UUIDBasedLog = True - isUUIDBasedLog _ = False - isNewUUIDBasedLog NewUUIDBasedLog = True - isNewUUIDBasedLog _ = False - isPresenceLog (PresenceLog k) = k == dummykey - isPresenceLog _ = False - isOtherLog OtherLog = True - isOtherLog _ = False + gotUUIDBasedLog UUIDBasedLog = True + gotUUIDBasedLog _ = False + gotNewUUIDBasedLog NewUUIDBasedLog = True + gotNewUUIDBasedLog _ = False + gotChunkLog (ChunkLog k) = k == dummykey + gotChunkLog _ = False + gotPresenceLog (PresenceLog k) = k == dummykey + gotPresenceLog _ = False + gotOtherLog OtherLog = True + gotOtherLog _ = False diff --git a/Logs/Chunk.hs b/Logs/Chunk.hs new file mode 100644 index 000000000..76da50947 --- /dev/null +++ b/Logs/Chunk.hs @@ -0,0 +1,44 @@ +{- Chunk logs. + - + - An object can be stored in chunked for on a remote; these logs keep + - track of the chunk size used, and the number of chunks. + - + - It's possible for a single object to be stored multiple times on the + - same remote using different chunk sizes. So, while this is a MapLog, it + - is not a normal UUIDBased log. Intead, it's a map from UUID and chunk + - size to number of chunks. + - + - Format: "timestamp uuid:chunksize chunkcount" + - + - Copyright 2014 Joey Hess <joey@kitenet.net> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Logs.Chunk where + +import Common.Annex +import Logs +import Logs.MapLog +import qualified Annex.Branch +import Logs.Chunk.Pure + +import qualified Data.Map as M +import Data.Time.Clock.POSIX + +chunksStored :: UUID -> Key -> ChunkSize -> ChunkCount -> Annex () +chunksStored u k chunksize chunkcount = do + ts <- liftIO getPOSIXTime + Annex.Branch.change (chunkLogFile k) $ + showLog . changeMapLog ts (u, chunksize) chunkcount . parseLog + +chunksRemoved :: UUID -> Key -> ChunkSize -> Annex () +chunksRemoved u k chunksize = chunksStored u k chunksize 0 + +getCurrentChunks :: UUID -> Key -> Annex [(ChunkSize, ChunkCount)] +getCurrentChunks u k = select . parseLog <$> Annex.Branch.get (chunkLogFile k) + where + select = filter (\(_sz, ct) -> ct > 0) + . map (\((_ku, sz), l) -> (sz, value l)) + . M.toList + . M.filterWithKey (\(ku, _sz) _ -> ku == u) diff --git a/Logs/Chunk/Pure.hs b/Logs/Chunk/Pure.hs new file mode 100644 index 000000000..09e871c38 --- /dev/null +++ b/Logs/Chunk/Pure.hs @@ -0,0 +1,32 @@ +{- Chunk logs, pure operations. + - + - Copyright 2014 Joey Hess <joey@kitenet.net> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Logs.Chunk.Pure where + +import Common.Annex +import Logs.MapLog +import Data.Int + +type ChunkSize = Int64 + +type ChunkCount = Integer + +type ChunkLog = MapLog (UUID, ChunkSize) ChunkCount + +parseLog :: String -> ChunkLog +parseLog = parseMapLog fieldparser valueparser + where + fieldparser s = + let (u,sz) = separate (== ':') s + in (,) <$> pure (toUUID u) <*> readish sz + valueparser = readish + +showLog :: ChunkLog -> String +showLog = showMapLog fieldshower valueshower + where + fieldshower (u, sz) = fromUUID u ++ ':' : show sz + valueshower = show diff --git a/Logs/UUIDBased.hs b/Logs/UUIDBased.hs index b403b6253..fe1c9e012 100644 --- a/Logs/UUIDBased.hs +++ b/Logs/UUIDBased.hs @@ -1,6 +1,6 @@ {- git-annex uuid-based logs - - - This is used to store information about a UUID in a way that can + - This is used to store information about UUIDs in a way that can - be union merged. - - A line of the log will look like: "UUID[ INFO[ timestamp=foo]]" diff --git a/Remote/Helper/Chunked.hs b/Remote/Helper/Chunked.hs index a71c39fbc..edd9dd8c9 100644 --- a/Remote/Helper/Chunked.hs +++ b/Remote/Helper/Chunked.hs @@ -9,12 +9,11 @@ module Remote.Helper.Chunked where import Utility.DataUnits import Types.Remote +import Logs.Chunk.Pure (ChunkSize) import qualified Data.Map as M import Data.Int -type ChunkSize = Int64 - data ChunkConfig = NoChunks | UnpaddedChunks ChunkSize diff --git a/doc/design/assistant/chunks.mdwn b/doc/design/assistant/chunks.mdwn index c20bb9aab..d7517243c 100644 --- a/doc/design/assistant/chunks.mdwn +++ b/doc/design/assistant/chunks.mdwn @@ -160,17 +160,12 @@ the git-annex branch. The location log does not record locations of individual chunk keys (too space-inneficient). Instead, look at git-annex:aaa/bbb/SHA256-s12345--xxxxxxx.log.cnk to get -the chunk count and size for a key. File format would be: +the chunk count and size for a key. - ts uuid chunksize chunkcount - -Where a chunkcount of 0 means that the object is not longer present in the -remote using the specified chunk size. - -Note that a given remote uuid might have multiple lines, if a key was -stored on it twice using different chunk sizes. Also note that even when -this file exists for a key, the object may be stored non-chunked on the -remote too. +Note that a given remote uuid might have multiple chunk sizes logged, if a +key was stored on it twice using different chunk sizes. Also note that even +when this file exists for a key, the object may be stored non-chunked on +the remote too. `hasKey` would check if any one (chunksize, chunkcount) is satisfied by the files on the remote. It would also check if the non-chunked key is diff --git a/doc/internals.mdwn b/doc/internals.mdwn index 5cb8ec5aa..71d1b0380 100644 --- a/doc/internals.mdwn +++ b/doc/internals.mdwn @@ -224,16 +224,14 @@ are indicated by prefixing them with "!" These log files are used when objects are stored in chunked form on remotes. They record the size(s) of the chunks, and the number of chunks. -For example, this logs that a remote has an object stored using 9 chunks -of 1 mb size: +For example, this logs that a remote has an object stored using both +9 chunks of 1 mb size, and 1 chunk of 10 mb size. - 1287290776.765152s e605dca6-446a-11e0-8b2a-002170d25c55 10240 9 + 1287290776.765152s e605dca6-446a-11e0-8b2a-002170d25c55:10240 9 + 1287290776.765153s e605dca6-446a-11e0-8b2a-002170d25c55:102400 1 (When those chunks are removed from the remote, the 9 is changed to 0.) -For future expansion, additional fields may be present following the -number of chunks. - ## `schedule.log` Used to record scheduled events, such as periodic fscks. |