summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Backend/WORM.hs2
-rw-r--r--Crypto.hs4
-rw-r--r--Types/Key.hs23
-rw-r--r--Upgrade/V1.hs2
-rw-r--r--doc/design/assistant/chunks.mdwn22
-rw-r--r--doc/internals/key_format.mdwn9
6 files changed, 40 insertions, 22 deletions
diff --git a/Backend/WORM.hs b/Backend/WORM.hs
index cc7123850..fdeea6f89 100644
--- a/Backend/WORM.hs
+++ b/Backend/WORM.hs
@@ -36,7 +36,7 @@ keyValue :: KeySource -> Annex (Maybe Key)
keyValue source = do
stat <- liftIO $ getFileStatus $ contentLocation source
n <- genKeyName $ keyFilename source
- return $ Just Key
+ return $ Just $ stubKey
{ keyName = n
, keyBackendName = name backend
, keySize = Just $ fromIntegral $ fileSize stat
diff --git a/Crypto.hs b/Crypto.hs
index f3a9e3957..0bfa81db2 100644
--- a/Crypto.hs
+++ b/Crypto.hs
@@ -142,11 +142,9 @@ decryptCipher (EncryptedCipher t variant _) =
- reversable, nor does it need to be the same type of encryption used
- on content. It does need to be repeatable. -}
encryptKey :: Mac -> Cipher -> Key -> Key
-encryptKey mac c k = Key
+encryptKey mac c k = stubKey
{ keyName = macWithCipher mac c (key2file k)
, keyBackendName = "GPG" ++ showMac mac
- , keySize = Nothing -- size and mtime omitted
- , keyMtime = Nothing -- to avoid leaking data
}
type Feeder = Handle -> IO ()
diff --git a/Types/Key.hs b/Types/Key.hs
index 26af6220f..90f66f23e 100644
--- a/Types/Key.hs
+++ b/Types/Key.hs
@@ -2,7 +2,7 @@
-
- Most things should not need this, using Types instead
-
- - Copyright 2011 Joey Hess <joey@kitenet.net>
+ - Copyright 2011-2014 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@@ -30,6 +30,8 @@ data Key = Key
, keyBackendName :: String
, keySize :: Maybe Integer
, keyMtime :: Maybe EpochTime
+ , keyChunkSize :: Maybe Integer
+ , keyChunkNum :: Maybe Integer
} deriving (Eq, Ord, Read, Show)
{- A filename may be associated with a Key. -}
@@ -41,6 +43,8 @@ stubKey = Key
, keyBackendName = ""
, keySize = Nothing
, keyMtime = Nothing
+ , keyChunkSize = Nothing
+ , keyChunkNum = Nothing
}
fieldSep :: Char
@@ -50,13 +54,13 @@ fieldSep = '-'
- The name field is always shown last, separated by doubled fieldSeps,
- and is the only field allowed to contain the fieldSep. -}
key2file :: Key -> FilePath
-key2file Key { keyBackendName = b, keySize = s, keyMtime = m, keyName = n } =
- b +++ ('s' ?: s) +++ ('m' ?: m) +++ (fieldSep : n)
+key2file Key { keyBackendName = b, keySize = s, keyMtime = m, keyChunkSize = cs, keyChunkNum = cn, keyName = n } =
+ b +++ ('s' ?: s) +++ ('m' ?: m) +++ ('S' ?: cs) +++ ('C' ?: cn) +++ (fieldSep : n)
where
"" +++ y = y
x +++ "" = x
x +++ y = x ++ fieldSep:y
- c ?: (Just v) = c : show v
+ f ?: (Just v) = f : show v
_ ?: _ = ""
file2key :: FilePath -> Maybe Key
@@ -84,6 +88,13 @@ file2key s
addfield 'm' k v = do
mtime <- readish v
return $ k { keyMtime = Just mtime }
+ addfield 'S' k v = do
+ chunksize <- readish v
+ return $ k { keyChunkSize = Just chunksize }
+ addfield 'C' k v = case readish v of
+ Just chunknum | chunknum > 0 ->
+ return $ k { keyChunkNum = Just chunknum }
+ _ -> return k
addfield _ _ _ = Nothing
instance Arbitrary Key where
@@ -92,6 +103,8 @@ instance Arbitrary Key where
<*> (listOf1 $ elements ['A'..'Z']) -- BACKEND
<*> ((abs <$>) <$> arbitrary) -- size cannot be negative
<*> arbitrary
+ <*> ((abs <$>) <$> arbitrary) -- chunksize cannot be negative
+ <*> ((succ . abs <$>) <$> arbitrary) -- chunknum cannot be 0 or negative
prop_idempotent_key_encode :: Key -> Bool
prop_idempotent_key_encode k = Just k == (file2key . key2file) k
@@ -103,6 +116,6 @@ prop_idempotent_key_decode f
where
-- file2key will accept the fields in any order, so don't
-- try the test unless the fields are in the normal order
- normalfieldorder = fields `isPrefixOf` "sm"
+ normalfieldorder = fields `isPrefixOf` "smSC"
fields = map (f !!) $ filter (< length f) $ map succ $
elemIndices fieldSep f
diff --git a/Upgrade/V1.hs b/Upgrade/V1.hs
index 8af4848a1..347b102ac 100644
--- a/Upgrade/V1.hs
+++ b/Upgrade/V1.hs
@@ -144,7 +144,7 @@ oldlog2key l
readKey1 :: String -> Key
readKey1 v
| mixup = fromJust $ file2key $ intercalate ":" $ Prelude.tail bits
- | otherwise = Key
+ | otherwise = stubKey
{ keyName = n
, keyBackendName = b
, keySize = s
diff --git a/doc/design/assistant/chunks.mdwn b/doc/design/assistant/chunks.mdwn
index 42a31bd25..c20bb9aab 100644
--- a/doc/design/assistant/chunks.mdwn
+++ b/doc/design/assistant/chunks.mdwn
@@ -104,7 +104,7 @@ Problem: Does not solve concurrent uploads with different chunk sizes.
When chunking is enabled, always put a chunk number in the Key,
along with the chunk size.
-So, SHA256-s10000-c1--xxxxxxx for the first chunk of 1 megabyte.
+So, SHA256-1048576-c1--xxxxxxx for the first chunk of 1 megabyte.
Before any chunks are stored, write a chunkcount file, eg
SHA256-s12345-c0--xxxxxxx. Note that this key is the same as the original
@@ -148,20 +148,24 @@ could lead to data loss. (Same as in design 2.)
# design 4
-Use key SHA256-s10000-c1--xxxxxxx for the first chunk of 1 megabyte.
+Use key SHA256-s12345-S1048576-C1--xxxxxxx for the first chunk of 1 megabyte.
+
+Note that keeping the 's'ize field unchanged is necessary because it
+disambiguates eg, WORM keys. So a 'S'ize field is used to hold the chunk
+size.
Instead of storing the chunk count in the special remote, store it in
the git-annex branch.
-Look at git-annex:aaa/bbb/SHA256-s12345--xxxxxxx.log.cnk to get the
-chunk count and size. File format would be:
+The location log does not record locations of individual chunk keys
+(too space-inneficient).
+Instead, look at git-annex:aaa/bbb/SHA256-s12345--xxxxxxx.log.cnk to get
+the chunk count and size for a key. File format would be:
- ts uuid chunksize chunkcount 0|1
+ ts uuid chunksize chunkcount
-Where a trailing 0 means that chunk size is no longer present on the
-remote, and a trailing 1 means it is. For future expansion, any other
-value /= "0" is also accepted, meaning the chunk is present. For example,
-this could be used for [[deltas]], storing the checksums of the chunks.
+Where a chunkcount of 0 means that the object is not longer present in the
+remote using the specified chunk size.
Note that a given remote uuid might have multiple lines, if a key was
stored on it twice using different chunk sizes. Also note that even when
diff --git a/doc/internals/key_format.mdwn b/doc/internals/key_format.mdwn
index 17e20592c..52fb80395 100644
--- a/doc/internals/key_format.mdwn
+++ b/doc/internals/key_format.mdwn
@@ -1,6 +1,6 @@
A git-annex key has this format:
- BACKEND-sNNNN-mNNNN--NAME
+ BACKEND[-sNNNN][-mNNNN][-SNNNN-CNNNN]--NAME
For example:
@@ -10,12 +10,15 @@ For example:
are always upper-cased.
* The name field at the end has a format dependent on the backend. It is
always the last field, and is prefixed with "--". Unlike other fields,
- it may contain "-" in its content. It should not contain newline characters;
- otherwise nearly anything goes.
+ it may contain "-" in its content. It should not contain newline
+ characters or "/"; otherwise nearly anything goes.
* The "-s" field is optional, and is the size of the content in bytes.
* The "-m" field is optional, and is the mtime of the file when it was
added to git-annex, expressed as seconds from the epoch.
This is currently only used by the WORM backend.
+* The "-S" and "-C" fields are only used for keys that are chunks
+ of some other key. "-S" is the size of the chunk, and "-c" is the chunk
+ number (starting at 1).
* Other fields could be added in the future, if needed.
git-annex always puts the fields in the order shown above when serializing