summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Joey Hess <joey@kitenet.net>2014-01-03 16:35:57 -0400
committerGravatar Joey Hess <joey@kitenet.net>2014-01-03 16:35:57 -0400
commit6e2eaff251cff32119f9def02afafa709e5d3da8 (patch)
treeef791fa88d0e8ea7dd3545cefe649034a6823683
parent0013d5b66019a96c809d28c96a0d3555694b1de2 (diff)
add remote state logs
This allows a remote to store a piece of arbitrary state associated with a key. This is needed to support Tahoe, where the file-cap is calculated from the data stored in it, and used to retrieve a key later. Glacier also would be much improved by using this. GETSTATE and SETSTATE are added to the external special remote protocol. Note that the state is left as-is even when a key is removed from a remote. It's up to the remote to decide when it wants to clear the state. The remote state log, $KEY.log.rmt, is a UUID-based log. However, rather than using the old UUID-based log format, I created a new variant of that format. The new varient is more space efficient (since it lacks the "timestamp=" hack, and easier to parse (and the parser doesn't mess with whitespace in the value), and avoids compatability cruft in the old one. This seemed worth cleaning up for these new files, since there could be a lot of them, while before UUID-based logs were only used for a few log files at the top of the git-annex branch. The transition code has also been updated to handle these new UUID-based logs. This commit was sponsored by Daniel Hofer.
-rw-r--r--Annex/Branch/Transitions.hs2
-rw-r--r--Logs.hs25
-rw-r--r--Logs/RemoteState.hs33
-rw-r--r--Logs/UUIDBased.hs25
-rw-r--r--Remote/External.hs7
-rw-r--r--Remote/External/Types.hs4
-rw-r--r--debian/changelog3
-rw-r--r--doc/design/external_special_remote_protocol.mdwn11
-rw-r--r--doc/internals.mdwn20
9 files changed, 119 insertions, 11 deletions
diff --git a/Annex/Branch/Transitions.hs b/Annex/Branch/Transitions.hs
index 90002de62..84cd1bbd9 100644
--- a/Annex/Branch/Transitions.hs
+++ b/Annex/Branch/Transitions.hs
@@ -34,6 +34,8 @@ dropDead :: FilePath -> String -> TrustMap -> FileTransition
dropDead f content trustmap = case getLogVariety f of
Just UUIDBasedLog -> ChangeFile $
UUIDBased.showLog id $ dropDeadFromUUIDBasedLog trustmap $ UUIDBased.parseLog Just content
+ Just NewUUIDBasedLog -> ChangeFile $
+ UUIDBased.showLogNew id $ dropDeadFromUUIDBasedLog trustmap $ UUIDBased.parseLogNew Just content
Just (PresenceLog _) ->
let newlog = Presence.compactLog $ dropDeadFromPresenceLog trustmap $ Presence.parseLog content
in if null newlog
diff --git a/Logs.hs b/Logs.hs
index 4386b7fd7..2952d6920 100644
--- a/Logs.hs
+++ b/Logs.hs
@@ -10,19 +10,21 @@ module Logs where
import Common.Annex
import Types.Key
-data LogVariety = UUIDBasedLog | PresenceLog Key
+{- There are several varieties of log file formats. -}
+data LogVariety = UUIDBasedLog | NewUUIDBasedLog | PresenceLog Key
deriving (Show)
{- Converts a path from the git-annex branch into one of the varieties
- of logs used by git-annex, if it's a known path. -}
getLogVariety :: FilePath -> Maybe LogVariety
getLogVariety f
- | f `elem` uuidBasedLogs = Just UUIDBasedLog
+ | f `elem` topLevelUUIDBasedLogs = Just UUIDBasedLog
+ | isRemoteStateLog f = Just NewUUIDBasedLog
| otherwise = PresenceLog <$> firstJust (presenceLogs f)
-{- All the uuid-based logs stored in the git-annex branch. -}
-uuidBasedLogs :: [FilePath]
-uuidBasedLogs =
+{- All the uuid-based logs stored in the top of the git-annex branch. -}
+topLevelUUIDBasedLogs :: [FilePath]
+topLevelUUIDBasedLogs =
[ uuidLog
, remoteLog
, trustLog
@@ -99,16 +101,29 @@ urlLogFileKey path
isUrlLog :: FilePath -> Bool
isUrlLog file = urlLogExt `isSuffixOf` file
+{- The filename of the remote state log for a given key. -}
+remoteStateLogFile :: Key -> FilePath
+remoteStateLogFile key = hashDirLower key </> keyFile key ++ remoteStateLogExt
+
+remoteStateLogExt :: String
+remoteStateLogExt = ".log.rmt"
+
+isRemoteStateLog :: FilePath -> Bool
+isRemoteStateLog path = remoteStateLogExt `isSuffixOf` path
+
prop_logs_sane :: Key -> Bool
prop_logs_sane dummykey = all id
[ isNothing (getLogVariety "unknown")
, expect isUUIDBasedLog (getLogVariety uuidLog)
, expect isPresenceLog (getLogVariety $ locationLogFile dummykey)
, expect isPresenceLog (getLogVariety $ urlLogFile dummykey)
+ , expect isNewUUIDBasedLog (getLogVariety $ remoteStateLogFile dummykey)
]
where
expect = maybe False
isUUIDBasedLog UUIDBasedLog = True
isUUIDBasedLog _ = False
+ isNewUUIDBasedLog NewUUIDBasedLog = True
+ isNewUUIDBasedLog _ = False
isPresenceLog (PresenceLog k) = k == dummykey
isPresenceLog _ = False
diff --git a/Logs/RemoteState.hs b/Logs/RemoteState.hs
new file mode 100644
index 000000000..95e51832e
--- /dev/null
+++ b/Logs/RemoteState.hs
@@ -0,0 +1,33 @@
+{- Remote state logs.
+ -
+ - Copyright 2013 Joey Hess <joey@kitenet.net>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+module Logs.RemoteState (
+ getRemoteState,
+ setRemoteState,
+) where
+
+import Common.Annex
+import Logs
+import Logs.UUIDBased
+import qualified Annex.Branch
+
+import qualified Data.Map as M
+import Data.Time.Clock.POSIX
+
+type RemoteState = String
+
+setRemoteState :: UUID -> Key -> RemoteState -> Annex ()
+setRemoteState u k s = do
+ ts <- liftIO getPOSIXTime
+ Annex.Branch.change (remoteStateLogFile k) $
+ showLogNew id . changeLog ts u s . parseLogNew Just
+
+getRemoteState :: UUID -> Key -> Annex (Maybe RemoteState)
+getRemoteState u k = extract . parseLogNew Just
+ <$> Annex.Branch.get (remoteStateLogFile k)
+ where
+ extract m = value <$> M.lookup u m
diff --git a/Logs/UUIDBased.hs b/Logs/UUIDBased.hs
index 10b3bf55d..430c92d55 100644
--- a/Logs/UUIDBased.hs
+++ b/Logs/UUIDBased.hs
@@ -6,8 +6,10 @@
- A line of the log will look like: "UUID[ INFO[ timestamp=foo]]"
- The timestamp is last for backwards compatability reasons,
- and may not be present on old log lines.
+ -
+ - New uuid based logs instead use the form: "timestamp UUID INFO"
-
- - Copyright 2011 Joey Hess <joey@kitenet.net>
+ - Copyright 2011-2013 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@@ -17,8 +19,10 @@ module Logs.UUIDBased (
LogEntry(..),
TimeStamp(..),
parseLog,
+ parseLogNew,
parseLogWithUUID,
showLog,
+ showLogNew,
changeLog,
addLog,
simpleMap,
@@ -56,6 +60,14 @@ showLog shower = unlines . map showpair . M.toList
showpair (k, LogEntry Unknown v) =
unwords [fromUUID k, shower v]
+showLogNew :: (a -> String) -> Log a -> String
+showLogNew shower = unlines . map showpair . M.toList
+ where
+ showpair (k, LogEntry (Date p) v) =
+ unwords [show p, fromUUID k, shower v]
+ showpair (k, LogEntry Unknown v) =
+ unwords ["0", fromUUID k, shower v]
+
parseLog :: (String -> Maybe a) -> String -> Log a
parseLog = parseLogWithUUID . const
@@ -86,6 +98,17 @@ parseLogWithUUID parser = M.fromListWith best . mapMaybe parse . lines
Nothing -> Unknown
Just d -> Date $ utcTimeToPOSIXSeconds d
+parseLogNew :: (String -> Maybe a) -> String -> Log a
+parseLogNew parser = M.fromListWith best . mapMaybe parse . lines
+ where
+ parse line = do
+ let (ts, rest) = splitword line
+ (u, v) = splitword rest
+ date <- Date . utcTimeToPOSIXSeconds <$> parseTime defaultTimeLocale "%s%Qs" ts
+ val <- parser v
+ Just (toUUID u, LogEntry date val)
+ splitword = separate (== ' ')
+
changeLog :: POSIXTime -> UUID -> a -> Log a -> Log a
changeLog t u v = M.insert u $ LogEntry (Date t) v
diff --git a/Remote/External.hs b/Remote/External.hs
index f682d242d..a0c3ef2d6 100644
--- a/Remote/External.hs
+++ b/Remote/External.hs
@@ -19,6 +19,7 @@ import Crypto
import Utility.Metered
import Logs.Transfer
import Logs.PreferredContent.Raw
+import Logs.RemoteState
import Config.Cost
import Annex.Content
import Annex.UUID
@@ -235,6 +236,12 @@ handleRequest' lck external req mp responsehandler
expr <- fromMaybe "" . M.lookup (externalUUID external)
<$> preferredContentMapRaw
send $ VALUE expr
+ handleRemoteRequest (SETSTATE key state) =
+ setRemoteState (externalUUID external) key state
+ handleRemoteRequest (GETSTATE key) = do
+ state <- fromMaybe ""
+ <$> getRemoteState (externalUUID external) key
+ send $ VALUE state
handleRemoteRequest (VERSION _) =
sendMessage lck external $ ERROR "too late to send VERSION"
diff --git a/Remote/External/Types.hs b/Remote/External/Types.hs
index e925f0e91..88c2126d7 100644
--- a/Remote/External/Types.hs
+++ b/Remote/External/Types.hs
@@ -170,6 +170,8 @@ data RemoteRequest
| GETUUID
| SETWANTED PreferredContentExpression
| GETWANTED
+ | SETSTATE Key String
+ | GETSTATE Key
deriving (Show)
instance Receivable RemoteRequest where
@@ -183,6 +185,8 @@ instance Receivable RemoteRequest where
parseCommand "GETUUID" = parse0 GETUUID
parseCommand "SETWANTED" = parse1 SETWANTED
parseCommand "GETWANTED" = parse0 GETWANTED
+ parseCommand "SETSTATE" = parse2 SETSTATE
+ parseCommand "GETSTATE" = parse1 GETSTATE
parseCommand _ = parseFail
-- Responses to RemoteRequest.
diff --git a/debian/changelog b/debian/changelog
index 6f9acd234..3727d4c5d 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,7 +1,8 @@
git-annex (5.20131231) UNRELEASED; urgency=medium
* mirror: Support --all (and --unused).
- * external special remote protocol: Added GETUUID, GETWANTED, SETWANTED.
+ * external special remote protocol: Added GETUUID, GETWANTED, SETWANTED,
+ SETSTATE, GETSTATE.
* Windows: Fix bug in direct mode merge code that could cause files
in subdirectories to go missing.
* Windows: Avoid eating stdin when running ssh to add a authorized key,
diff --git a/doc/design/external_special_remote_protocol.mdwn b/doc/design/external_special_remote_protocol.mdwn
index 138d9dd18..cac5489d2 100644
--- a/doc/design/external_special_remote_protocol.mdwn
+++ b/doc/design/external_special_remote_protocol.mdwn
@@ -222,6 +222,17 @@ in control.
Gets the current preferred content setting of the repository.
(git-annex replies with VALUE followed by the preferred content
expression.)
+* `SETSTATE Key Value`
+ Can be used to store some form of state for a Key. The state stored
+ can be anything this remote needs to store, in any format.
+ It is stored in the git-annex branch. Note that this means that if
+ multiple repositories are using the same special remote, and store
+ different state, whichever one stored the state last will win. Also,
+ it's best to avoid storing much state, since this will bloat the
+ git-annex branch. Most remotes will not need to store any state.
+* `GETSTATE Key`
+ Gets any state that has been stored for the key.
+ (git-annex replies with VALUE followed by the state.)
## general messages
diff --git a/doc/internals.mdwn b/doc/internals.mdwn
index 4cc6d3c93..d95ab3f5e 100644
--- a/doc/internals.mdwn
+++ b/doc/internals.mdwn
@@ -39,6 +39,10 @@ are added to git.
This branch operates on objects exclusively. No file names will ever
be stored in this branch.
+The files stored in this branch are all designed to be auto-merged
+using git's [[union merge driver|git-union-merge]]. So each line
+has a timestamp, to allow the most recent information to be identified.
+
### `uuid.log`
Records the UUIDs of known repositories, and associates them with a
@@ -110,7 +114,7 @@ somewhere else.
## `aaa/bbb/*.log`
These log files record [[location_tracking]] information
-for file contents. Again these are placed in two levels of subdirectories
+for file contents. These are placed in two levels of subdirectories
for hashing. See [[hashing]] for details.
The name of the key is the filename, and the content
@@ -122,15 +126,23 @@ Example:
1287290776.765152s 1 e605dca6-446a-11e0-8b2a-002170d25c55
1287290767.478634s 0 26339d22-446b-11e0-9101-002170d25c55
-These files are designed to be auto-merged using git's [[union merge driver|git-union-merge]].
-The timestamps allow the most recent information to be identified.
-
## `aaa/bbb/*.log.web`
These log files record urls used by the
[[web_special_remote|special_remotes/web]]. Their format is similar
to the location tracking files, but with urls rather than UUIDs.
+## `aaa/bbb/*.log.rmt`
+
+These log files are used by remotes that need to record their own state
+about keys. Each remote can store one line of data about a key, in
+its own format.
+
+Example:
+
+ 1287290776.765152s e605dca6-446a-11e0-8b2a-002170d25c55 blah blah
+ 1287290767.478634s 26339d22-446b-11e0-9101-002170d25c55 foo=bar
+
## `schedule.log`
Used to record scheduled events, such as periodic fscks.