aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Joey Hess <joey@kitenet.net>2014-07-24 16:23:36 -0400
committerGravatar Joey Hess <joey@kitenet.net>2014-07-24 16:23:36 -0400
commit2b414feaf2d20452794d0cdd608c6dd91feb1ec1 (patch)
tree16f4caafc5158227eda6b36462b93a9c3f35173f
parent35b31b00e4efbf84bcfb814acc477bbb89b50107 (diff)
implement chunk logs
Slightly tricky as they are not normal UUIDBased logs, but are instead maps from (uuid, chunksize) to chunkcount. This commit was sponsored by Frank Thomas.
-rw-r--r--Annex/Branch/Transitions.hs15
-rw-r--r--Logs.hs52
-rw-r--r--Logs/Chunk.hs44
-rw-r--r--Logs/Chunk/Pure.hs32
-rw-r--r--Logs/UUIDBased.hs2
-rw-r--r--Remote/Helper/Chunked.hs3
-rw-r--r--doc/design/assistant/chunks.mdwn15
-rw-r--r--doc/internals.mdwn10
8 files changed, 134 insertions, 39 deletions
diff --git a/Annex/Branch/Transitions.hs b/Annex/Branch/Transitions.hs
index 5c2c14548..4c39f198f 100644
--- a/Annex/Branch/Transitions.hs
+++ b/Annex/Branch/Transitions.hs
@@ -12,8 +12,9 @@ module Annex.Branch.Transitions (
import Logs
import Logs.Transitions
-import Logs.UUIDBased as UUIDBased
-import Logs.Presence.Pure as Presence
+import qualified Logs.UUIDBased as UUIDBased
+import qualified Logs.Presence.Pure as Presence
+import qualified Logs.Chunk.Pure as Chunk
import Types.TrustLevel
import Types.UUID
@@ -37,9 +38,11 @@ dropDead f content trustmap = case getLogVariety f of
-- because git remotes may still exist, and they need
-- to still know it's dead.
| f == trustLog -> PreserveFile
- | otherwise -> ChangeFile $ UUIDBased.showLog id $ dropDeadFromUUIDBasedLog trustmap $ UUIDBased.parseLog Just content
+ | otherwise -> ChangeFile $ UUIDBased.showLog id $ dropDeadFromMapLog trustmap id $ UUIDBased.parseLog Just content
Just NewUUIDBasedLog -> ChangeFile $
- UUIDBased.showLogNew id $ dropDeadFromUUIDBasedLog trustmap $ UUIDBased.parseLogNew Just content
+ UUIDBased.showLogNew id $ dropDeadFromMapLog trustmap id $ UUIDBased.parseLogNew Just content
+ Just (ChunkLog _) -> ChangeFile $
+ Chunk.showLog $ dropDeadFromMapLog trustmap fst $ Chunk.parseLog content
Just (PresenceLog _) ->
let newlog = Presence.compactLog $ dropDeadFromPresenceLog trustmap $ Presence.parseLog content
in if null newlog
@@ -48,8 +51,8 @@ dropDead f content trustmap = case getLogVariety f of
Just OtherLog -> PreserveFile
Nothing -> PreserveFile
-dropDeadFromUUIDBasedLog :: TrustMap -> UUIDBased.Log String -> UUIDBased.Log String
-dropDeadFromUUIDBasedLog trustmap = M.filterWithKey $ notDead trustmap . const
+dropDeadFromMapLog :: TrustMap -> (k -> UUID) -> M.Map k v -> M.Map k v
+dropDeadFromMapLog trustmap getuuid = M.filterWithKey $ \k _v -> notDead trustmap getuuid k
{- Presence logs can contain UUIDs or other values. Any line that matches
- a dead uuid is dropped; any other values are passed through. -}
diff --git a/Logs.hs b/Logs.hs
index c9d58157a..ff7b7dcf0 100644
--- a/Logs.hs
+++ b/Logs.hs
@@ -14,6 +14,7 @@ import Types.Key
data LogVariety
= UUIDBasedLog
| NewUUIDBasedLog
+ | ChunkLog Key
| PresenceLog Key
| OtherLog
deriving (Show)
@@ -24,6 +25,7 @@ getLogVariety :: FilePath -> Maybe LogVariety
getLogVariety f
| f `elem` topLevelUUIDBasedLogs = Just UUIDBasedLog
| isRemoteStateLog f = Just NewUUIDBasedLog
+ | isChunkLog f = ChunkLog <$> chunkLogFileKey f
| isMetaDataLog f || f `elem` otherLogs = Just OtherLog
| otherwise = PresenceLog <$> firstJust (presenceLogs f)
@@ -133,6 +135,25 @@ remoteStateLogExt = ".log.rmt"
isRemoteStateLog :: FilePath -> Bool
isRemoteStateLog path = remoteStateLogExt `isSuffixOf` path
+{- The filename of the chunk log for a given key. -}
+chunkLogFile :: Key -> FilePath
+chunkLogFile key = hashDirLower key </> keyFile key ++ chunkLogExt
+
+chunkLogFileKey :: FilePath -> Maybe Key
+chunkLogFileKey path
+ | ext == chunkLogExt = fileKey base
+ | otherwise = Nothing
+ where
+ file = takeFileName path
+ (base, ext) = splitAt (length file - extlen) file
+ extlen = length chunkLogExt
+
+chunkLogExt :: String
+chunkLogExt = ".log.cnk"
+
+isChunkLog :: FilePath -> Bool
+isChunkLog path = chunkLogExt `isSuffixOf` path
+
{- The filename of the metadata log for a given key. -}
metaDataLogFile :: Key -> FilePath
metaDataLogFile key = hashDirLower key </> keyFile key ++ metaDataLogExt
@@ -146,20 +167,23 @@ isMetaDataLog path = metaDataLogExt `isSuffixOf` path
prop_logs_sane :: Key -> Bool
prop_logs_sane dummykey = and
[ isNothing (getLogVariety "unknown")
- , expect isUUIDBasedLog (getLogVariety uuidLog)
- , expect isPresenceLog (getLogVariety $ locationLogFile dummykey)
- , expect isPresenceLog (getLogVariety $ urlLogFile dummykey)
- , expect isNewUUIDBasedLog (getLogVariety $ remoteStateLogFile dummykey)
- , expect isOtherLog (getLogVariety $ metaDataLogFile dummykey)
- , expect isOtherLog (getLogVariety $ numcopiesLog)
+ , expect gotUUIDBasedLog (getLogVariety uuidLog)
+ , expect gotPresenceLog (getLogVariety $ locationLogFile dummykey)
+ , expect gotPresenceLog (getLogVariety $ urlLogFile dummykey)
+ , expect gotNewUUIDBasedLog (getLogVariety $ remoteStateLogFile dummykey)
+ , expect gotChunkLog (getLogVariety $ chunkLogFile dummykey)
+ , expect gotOtherLog (getLogVariety $ metaDataLogFile dummykey)
+ , expect gotOtherLog (getLogVariety $ numcopiesLog)
]
where
expect = maybe False
- isUUIDBasedLog UUIDBasedLog = True
- isUUIDBasedLog _ = False
- isNewUUIDBasedLog NewUUIDBasedLog = True
- isNewUUIDBasedLog _ = False
- isPresenceLog (PresenceLog k) = k == dummykey
- isPresenceLog _ = False
- isOtherLog OtherLog = True
- isOtherLog _ = False
+ gotUUIDBasedLog UUIDBasedLog = True
+ gotUUIDBasedLog _ = False
+ gotNewUUIDBasedLog NewUUIDBasedLog = True
+ gotNewUUIDBasedLog _ = False
+ gotChunkLog (ChunkLog k) = k == dummykey
+ gotChunkLog _ = False
+ gotPresenceLog (PresenceLog k) = k == dummykey
+ gotPresenceLog _ = False
+ gotOtherLog OtherLog = True
+ gotOtherLog _ = False
diff --git a/Logs/Chunk.hs b/Logs/Chunk.hs
new file mode 100644
index 000000000..76da50947
--- /dev/null
+++ b/Logs/Chunk.hs
@@ -0,0 +1,44 @@
+{- Chunk logs.
+ -
+ - An object can be stored in chunked for on a remote; these logs keep
+ - track of the chunk size used, and the number of chunks.
+ -
+ - It's possible for a single object to be stored multiple times on the
+ - same remote using different chunk sizes. So, while this is a MapLog, it
+ - is not a normal UUIDBased log. Intead, it's a map from UUID and chunk
+ - size to number of chunks.
+ -
+ - Format: "timestamp uuid:chunksize chunkcount"
+ -
+ - Copyright 2014 Joey Hess <joey@kitenet.net>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+module Logs.Chunk where
+
+import Common.Annex
+import Logs
+import Logs.MapLog
+import qualified Annex.Branch
+import Logs.Chunk.Pure
+
+import qualified Data.Map as M
+import Data.Time.Clock.POSIX
+
+chunksStored :: UUID -> Key -> ChunkSize -> ChunkCount -> Annex ()
+chunksStored u k chunksize chunkcount = do
+ ts <- liftIO getPOSIXTime
+ Annex.Branch.change (chunkLogFile k) $
+ showLog . changeMapLog ts (u, chunksize) chunkcount . parseLog
+
+chunksRemoved :: UUID -> Key -> ChunkSize -> Annex ()
+chunksRemoved u k chunksize = chunksStored u k chunksize 0
+
+getCurrentChunks :: UUID -> Key -> Annex [(ChunkSize, ChunkCount)]
+getCurrentChunks u k = select . parseLog <$> Annex.Branch.get (chunkLogFile k)
+ where
+ select = filter (\(_sz, ct) -> ct > 0)
+ . map (\((_ku, sz), l) -> (sz, value l))
+ . M.toList
+ . M.filterWithKey (\(ku, _sz) _ -> ku == u)
diff --git a/Logs/Chunk/Pure.hs b/Logs/Chunk/Pure.hs
new file mode 100644
index 000000000..09e871c38
--- /dev/null
+++ b/Logs/Chunk/Pure.hs
@@ -0,0 +1,32 @@
+{- Chunk logs, pure operations.
+ -
+ - Copyright 2014 Joey Hess <joey@kitenet.net>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+module Logs.Chunk.Pure where
+
+import Common.Annex
+import Logs.MapLog
+import Data.Int
+
+type ChunkSize = Int64
+
+type ChunkCount = Integer
+
+type ChunkLog = MapLog (UUID, ChunkSize) ChunkCount
+
+parseLog :: String -> ChunkLog
+parseLog = parseMapLog fieldparser valueparser
+ where
+ fieldparser s =
+ let (u,sz) = separate (== ':') s
+ in (,) <$> pure (toUUID u) <*> readish sz
+ valueparser = readish
+
+showLog :: ChunkLog -> String
+showLog = showMapLog fieldshower valueshower
+ where
+ fieldshower (u, sz) = fromUUID u ++ ':' : show sz
+ valueshower = show
diff --git a/Logs/UUIDBased.hs b/Logs/UUIDBased.hs
index b403b6253..fe1c9e012 100644
--- a/Logs/UUIDBased.hs
+++ b/Logs/UUIDBased.hs
@@ -1,6 +1,6 @@
{- git-annex uuid-based logs
-
- - This is used to store information about a UUID in a way that can
+ - This is used to store information about UUIDs in a way that can
- be union merged.
-
- A line of the log will look like: "UUID[ INFO[ timestamp=foo]]"
diff --git a/Remote/Helper/Chunked.hs b/Remote/Helper/Chunked.hs
index a71c39fbc..edd9dd8c9 100644
--- a/Remote/Helper/Chunked.hs
+++ b/Remote/Helper/Chunked.hs
@@ -9,12 +9,11 @@ module Remote.Helper.Chunked where
import Utility.DataUnits
import Types.Remote
+import Logs.Chunk.Pure (ChunkSize)
import qualified Data.Map as M
import Data.Int
-type ChunkSize = Int64
-
data ChunkConfig
= NoChunks
| UnpaddedChunks ChunkSize
diff --git a/doc/design/assistant/chunks.mdwn b/doc/design/assistant/chunks.mdwn
index c20bb9aab..d7517243c 100644
--- a/doc/design/assistant/chunks.mdwn
+++ b/doc/design/assistant/chunks.mdwn
@@ -160,17 +160,12 @@ the git-annex branch.
The location log does not record locations of individual chunk keys
(too space-inneficient).
Instead, look at git-annex:aaa/bbb/SHA256-s12345--xxxxxxx.log.cnk to get
-the chunk count and size for a key. File format would be:
+the chunk count and size for a key.
- ts uuid chunksize chunkcount
-
-Where a chunkcount of 0 means that the object is not longer present in the
-remote using the specified chunk size.
-
-Note that a given remote uuid might have multiple lines, if a key was
-stored on it twice using different chunk sizes. Also note that even when
-this file exists for a key, the object may be stored non-chunked on the
-remote too.
+Note that a given remote uuid might have multiple chunk sizes logged, if a
+key was stored on it twice using different chunk sizes. Also note that even
+when this file exists for a key, the object may be stored non-chunked on
+the remote too.
`hasKey` would check if any one (chunksize, chunkcount) is satisfied by
the files on the remote. It would also check if the non-chunked key is
diff --git a/doc/internals.mdwn b/doc/internals.mdwn
index 5cb8ec5aa..71d1b0380 100644
--- a/doc/internals.mdwn
+++ b/doc/internals.mdwn
@@ -224,16 +224,14 @@ are indicated by prefixing them with "!"
These log files are used when objects are stored in chunked form on
remotes. They record the size(s) of the chunks, and the number of chunks.
-For example, this logs that a remote has an object stored using 9 chunks
-of 1 mb size:
+For example, this logs that a remote has an object stored using both
+9 chunks of 1 mb size, and 1 chunk of 10 mb size.
- 1287290776.765152s e605dca6-446a-11e0-8b2a-002170d25c55 10240 9
+ 1287290776.765152s e605dca6-446a-11e0-8b2a-002170d25c55:10240 9
+ 1287290776.765153s e605dca6-446a-11e0-8b2a-002170d25c55:102400 1
(When those chunks are removed from the remote, the 9 is changed to 0.)
-For future expansion, additional fields may be present following the
-number of chunks.
-
## `schedule.log`
Used to record scheduled events, such as periodic fscks.