summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Joey Hess <joey@kitenet.net>2014-07-25 16:09:23 -0400
committerGravatar Joey Hess <joey@kitenet.net>2014-07-25 16:09:23 -0400
commit2b9e3d5ca89f88fbcae237270e95c592e3ce3bc1 (patch)
tree16a9f352bd52510964766d771bb812c169fbaa38
parentacbd521c0e7fd7176775378845ff8f4f65bf20c0 (diff)
use same hash directories for chunked key as are used for its parent
This avoids a proliferation of hash directories when using new-style chunking, and should improve performance since chunks are accessed in sequence and so should have a common locality. Of course, when a chunked key is encrypted, its hash directories have no relation to the parent key. This commit was sponsored by Christian Kellermann.
-rw-r--r--Locations.hs5
-rw-r--r--Types/Key.hs12
-rw-r--r--doc/internals/hashing.mdwn5
3 files changed, 20 insertions, 2 deletions
diff --git a/Locations.hs b/Locations.hs
index d397a97be..0369c7a1c 100644
--- a/Locations.hs
+++ b/Locations.hs
@@ -421,6 +421,7 @@ keyPaths key = map (keyPath key) annexHashes
- which do not allow using a directory "XX" when "xx" already exists.
- To support that, most repositories use the lower case hash for new data. -}
type Hasher = Key -> FilePath
+
annexHashes :: [Hasher]
annexHashes = [hashDirLower, hashDirMixed]
@@ -428,12 +429,12 @@ hashDirMixed :: Hasher
hashDirMixed k = addTrailingPathSeparator $ take 2 dir </> drop 2 dir
where
dir = take 4 $ display_32bits_as_dir =<< [a,b,c,d]
- ABCD (a,b,c,d) = md5 $ md5FilePath $ key2file k
+ ABCD (a,b,c,d) = md5 $ md5FilePath $ key2file $ nonChunkKey k
hashDirLower :: Hasher
hashDirLower k = addTrailingPathSeparator $ take 3 dir </> drop 3 dir
where
- dir = take 6 $ md5s $ md5FilePath $ key2file k
+ dir = take 6 $ md5s $ md5FilePath $ key2file $ nonChunkKey k
{- modified version of display_32bits_as_hex from Data.Hash.MD5
- Copyright (C) 2001 Ian Lynagh
diff --git a/Types/Key.hs b/Types/Key.hs
index 90f66f23e..3015b1e86 100644
--- a/Types/Key.hs
+++ b/Types/Key.hs
@@ -13,6 +13,8 @@ module Types.Key (
stubKey,
key2file,
file2key,
+ isChunkKey,
+ nonChunkKey,
prop_idempotent_key_encode,
prop_idempotent_key_decode
@@ -47,6 +49,16 @@ stubKey = Key
, keyChunkNum = Nothing
}
+isChunkKey :: Key -> Bool
+isChunkKey k = isJust (keyChunkSize k) && isJust (keyChunkNum k)
+
+-- Gets the parent of a chunk key.
+nonChunkKey :: Key -> Key
+nonChunkKey k = k
+ { keyChunkSize = Nothing
+ , keyChunkNum = Nothing
+ }
+
fieldSep :: Char
fieldSep = '-'
diff --git a/doc/internals/hashing.mdwn b/doc/internals/hashing.mdwn
index cc4bc6456..bdc259b63 100644
--- a/doc/internals/hashing.mdwn
+++ b/doc/internals/hashing.mdwn
@@ -36,3 +36,8 @@ string, but where that would normally encode the bits using the 16 characters
0-9a-f, this instead uses the 32 characters "0123456789zqjxkmvwgpfZQJXKMVWGPF".
The first 2 letters of the resulting string are the first directory, and the
second 2 are the second directory.
+
+## chunk keys
+
+The same hash directory is used for a chunk key as would be used for the
+key that it's a chunk of.