diff options
author | Joey Hess <joeyh@joeyh.name> | 2015-01-28 16:51:40 -0400 |
---|---|---|
committer | Joey Hess <joeyh@joeyh.name> | 2015-01-28 16:52:08 -0400 |
commit | 6e765717650f0270cdc497d38245bcbc4180e60c (patch) | |
tree | c989f13da5507d8f46fdfa1b36f1ce0aa77aca2f | |
parent | 82161654830b0dc4187e9928555c9321ef61bb89 (diff) |
implement annex.tune.objecthashlower
Split out Annex.DirHashes which never really belonged in Locations.
-rw-r--r-- | Annex/DirHashes.hs | 76 | ||||
-rw-r--r-- | Locations.hs | 88 | ||||
-rw-r--r-- | Remote/Git.hs | 6 | ||||
-rw-r--r-- | Remote/Rsync.hs | 3 | ||||
-rw-r--r-- | Remote/Rsync/RsyncUrl.hs | 3 | ||||
-rw-r--r-- | debian/changelog | 1 |
6 files changed, 109 insertions, 68 deletions
diff --git a/Annex/DirHashes.hs b/Annex/DirHashes.hs new file mode 100644 index 000000000..36998821b --- /dev/null +++ b/Annex/DirHashes.hs @@ -0,0 +1,76 @@ +{- git-annex file locations + - + - Copyright 2010-2015 Joey Hess <id@joeyh.name> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Annex.DirHashes ( + Hasher, + HashLevels(..), + objectHashLevels, + dirHashes, + hashDirMixed, + hashDirLower, +) where + +import Data.Bits +import Data.Word +import Data.Hash.MD5 +import Data.Default + +import Common +import Types.Key +import Types.GitConfig +import Types.Difference + +type Hasher = Key -> FilePath + +-- Number of hash levels to use. 2 is the default. +newtype HashLevels = HashLevels Int + +instance Default HashLevels where + def = HashLevels 2 + +objectHashLevels :: GitConfig -> HashLevels +objectHashLevels config + | hasDifference (== OneLevelObjectHash) (annexDifferences config) = + HashLevels 1 + | otherwise = def + +{- Two different directory hashes may be used. The mixed case hash + - came first, and is fine, except for the problem of case-strict + - filesystems such as Linux VFAT (mounted with shortname=mixed), + - which do not allow using a directory "XX" when "xx" already exists. + - To support that, most repositories use the lower case hash for new data. -} +dirHashes :: [HashLevels -> Hasher] +dirHashes = [hashDirLower, hashDirMixed] + +hashDirs :: HashLevels -> Int -> String -> FilePath +hashDirs (HashLevels 1) sz s = addTrailingPathSeparator $ take sz s +hashDirs _ sz s = addTrailingPathSeparator $ take sz s </> drop sz s + +hashDirMixed :: HashLevels -> Hasher +hashDirMixed n k = hashDirs n 2 $ take 4 $ display_32bits_as_dir =<< [a,b,c,d] + where + ABCD (a,b,c,d) = md5 $ md5FilePath $ key2file $ nonChunkKey k + +hashDirLower :: HashLevels -> Hasher +hashDirLower n k = hashDirs n 3 $ take 6 $ md5s $ md5FilePath $ key2file $ nonChunkKey k + +{- modified version of display_32bits_as_hex from Data.Hash.MD5 + - Copyright (C) 2001 Ian Lynagh + - License: Either BSD or GPL + -} +display_32bits_as_dir :: Word32 -> String +display_32bits_as_dir w = trim $ swap_pairs cs + where + -- Need 32 characters to use. To avoid inaverdently making + -- a real word, use letters that appear less frequently. + chars = ['0'..'9'] ++ "zqjxkmvwgpfZQJXKMVWGPF" + cs = map (\x -> getc $ (shiftR w (6*x)) .&. 31) [0..7] + getc n = chars !! fromIntegral n + swap_pairs (x1:x2:xs) = x2:x1:swap_pairs xs + swap_pairs _ = [] + -- Last 2 will always be 00, so omit. + trim = take 6 diff --git a/Locations.hs b/Locations.hs index 1a9551b3b..a9847f460 100644 --- a/Locations.hs +++ b/Locations.hs @@ -1,6 +1,6 @@ {- git-annex file locations - - - Copyright 2010-2013 Joey Hess <id@joeyh.name> + - Copyright 2010-2015 Joey Hess <id@joeyh.name> - - Licensed under the GNU GPL version 3 or higher. -} @@ -20,7 +20,6 @@ module Locations ( gitAnnexInodeSentinal, gitAnnexInodeSentinalCache, annexLocations, - annexLocation, gitAnnexDir, gitAnnexObjectDir, gitAnnexTmpMiscDir, @@ -59,7 +58,6 @@ module Locations ( gitAnnexRemotesDir, gitAnnexAssistantDefaultDir, isLinkToAnnex, - annexHashes, HashLevels(..), hashDirMixed, hashDirLower, @@ -68,18 +66,16 @@ module Locations ( prop_idempotent_fileKey ) where -import Data.Bits -import Data.Word -import Data.Hash.MD5 import Data.Char import Data.Default import Common -import Types +import Types.GitConfig import Types.Key import Types.UUID import Types.Difference import qualified Git +import Annex.DirHashes {- Conventions: - @@ -105,11 +101,15 @@ objectDir :: FilePath objectDir = addTrailingPathSeparator $ annexDir </> "objects" {- Annexed file's possible locations relative to the .git directory. - - There are two different possibilities, using different hashes. -} -annexLocations :: Key -> [FilePath] -annexLocations key = map (annexLocation key) (annexHashes def) -annexLocation :: Key -> Hasher -> FilePath -annexLocation key hasher = objectDir </> keyPath key hasher + - There are two different possibilities, using different hashes. + - + - Also, some repositories have a Difference in hash directory depth. + -} +annexLocations :: GitConfig -> Key -> [FilePath] +annexLocations config key = map (annexLocation config key) dirHashes + +annexLocation :: GitConfig -> Key -> (HashLevels -> Hasher) -> FilePath +annexLocation config key hasher = objectDir </> keyPath key (hasher $ objectHashLevels config) {- Annexed object's location in a repository. - @@ -138,11 +138,11 @@ gitAnnexLocation' key r config crippled | Git.repoIsLocalBare r || crippled || hasDifference (== ObjectHashLower) (annexDifferences config) = - check $ map inrepo $ annexLocations key + check $ map inrepo $ annexLocations config key {- Non-bare repositories only use hashDirMixed, so - don't need to do any work to check if the file is - present. -} - | otherwise = return $ inrepo $ annexLocation key (hashDirMixed def) + | otherwise = return $ inrepo $ annexLocation config key hashDirMixed where inrepo d = Git.localGitDir r </> d check locs@(l:_) = fromMaybe l <$> firstM doesFileExist locs @@ -407,9 +407,9 @@ prop_idempotent_fileKey s where k = stubKey { keyName = s, keyBackendName = "test" } -{- A location to store a key on the filesystem. A directory hash is used, - - to protect against filesystems that dislike having many items in a - - single directory. +{- A location to store a key on a special remote that uses a filesystem. + - A directory hash is used, to protect against filesystems that dislike + - having many items in a single directory. - - The file is put in a directory with the same name, this allows - write-protecting the directory to avoid accidental deletion of the file. @@ -419,51 +419,11 @@ keyPath key hasher = hasher key </> f </> f where f = keyFile key -{- All possibile locations to store a key using different directory hashes. -} -keyPaths :: Key -> [FilePath] -keyPaths key = map (keyPath key) (annexHashes def) - -{- Two different directory hashes may be used. The mixed case hash - - came first, and is fine, except for the problem of case-strict - - filesystems such as Linux VFAT (mounted with shortname=mixed), - - which do not allow using a directory "XX" when "xx" already exists. - - To support that, most repositories use the lower case hash for new data. -} -annexHashes :: HashLevels -> [Hasher] -annexHashes n = [hashDirLower n, hashDirMixed n] - -type Hasher = Key -> FilePath - --- Number of hash levels to use. 2 is the default. -newtype HashLevels = HashLevels Int - -instance Default HashLevels where - def = HashLevels 2 - -hashDirs :: HashLevels -> Int -> String -> FilePath -hashDirs (HashLevels 1) sz s = addTrailingPathSeparator $ take sz s -hashDirs _ sz s = addTrailingPathSeparator $ take sz s </> drop sz s - -hashDirMixed :: HashLevels -> Hasher -hashDirMixed n k = hashDirs n 2 $ take 4 $ display_32bits_as_dir =<< [a,b,c,d] - where - ABCD (a,b,c,d) = md5 $ md5FilePath $ key2file $ nonChunkKey k - -hashDirLower :: HashLevels -> Hasher -hashDirLower n k = hashDirs n 3 $ take 6 $ md5s $ md5FilePath $ key2file $ nonChunkKey k - -{- modified version of display_32bits_as_hex from Data.Hash.MD5 - - Copyright (C) 2001 Ian Lynagh - - License: Either BSD or GPL +{- All possibile locations to store a key in a special remote + - using different directory hashes. + - + - This is compatible with the annexLocations, for interoperability between + - special remotes and git-annex repos. -} -display_32bits_as_dir :: Word32 -> String -display_32bits_as_dir w = trim $ swap_pairs cs - where - -- Need 32 characters to use. To avoid inaverdently making - -- a real word, use letters that appear less frequently. - chars = ['0'..'9'] ++ "zqjxkmvwgpfZQJXKMVWGPF" - cs = map (\x -> getc $ (shiftR w (6*x)) .&. 31) [0..7] - getc n = chars !! fromIntegral n - swap_pairs (x1:x2:xs) = x2:x1:swap_pairs xs - swap_pairs _ = [] - -- Last 2 will always be 00, so omit. - trim = take 6 +keyPaths :: Key -> [FilePath] +keyPaths key = map (keyPath key . def) dirHashes diff --git a/Remote/Git.hs b/Remote/Git.hs index b31a1d850..328e39111 100644 --- a/Remote/Git.hs +++ b/Remote/Git.hs @@ -327,13 +327,15 @@ keyUrls r key = map tourl locs' -- If the remote is known to not be bare, try the hash locations -- used for non-bare repos first, as an optimisation. locs - | remoteAnnexBare (gitconfig r) == Just False = reverse (annexLocations key) - | otherwise = annexLocations key + | remoteAnnexBare remoteconfig == Just False = reverse (annexLocations cfg key) + | otherwise = annexLocations cfg key #ifndef mingw32_HOST_OS locs' = locs #else locs' = map (replace "\\" "/") locs #endif + remoteconfig = gitconfig r + cfg = fromJust $ remoteGitConfig remoteconfig dropKey :: Remote -> Key -> Annex Bool dropKey r key diff --git a/Remote/Rsync.hs b/Remote/Rsync.hs index 04bbb19a7..da258551b 100644 --- a/Remote/Rsync.hs +++ b/Remote/Rsync.hs @@ -37,6 +37,7 @@ import Annex.Perms import Logs.Transfer import Types.Creds import Types.Key (isChunkKey) +import Annex.DirHashes import qualified Data.Map as M @@ -212,7 +213,7 @@ remove o k = do - content could be. Note that the parent directories have - to also be explicitly included, due to how rsync - traverses directories. -} - includes = concatMap use (annexHashes def) + includes = concatMap (use .def) dirHashes use h = let dir = h k in [ parentDir dir , dir diff --git a/Remote/Rsync/RsyncUrl.hs b/Remote/Rsync/RsyncUrl.hs index 5493e4e90..0cb1733c6 100644 --- a/Remote/Rsync/RsyncUrl.hs +++ b/Remote/Rsync/RsyncUrl.hs @@ -19,6 +19,7 @@ import System.FilePath.Posix #ifdef mingw32_HOST_OS import Data.String.Utils #endif +import Annex.DirHashes type RsyncUrl = String @@ -36,7 +37,7 @@ rsyncEscape o u | otherwise = u rsyncUrls :: RsyncOpts -> Key -> [RsyncUrl] -rsyncUrls o k = map use (annexHashes def) +rsyncUrls o k = map (use . def) dirHashes where use h = rsyncUrl o </> hash h </> rsyncEscape o (f </> f) f = keyFile k diff --git a/debian/changelog b/debian/changelog index 3caa7ddad..8aa351fea 100644 --- a/debian/changelog +++ b/debian/changelog @@ -26,6 +26,7 @@ git-annex (5.20150114) UNRELEASED; urgency=medium http://git-annex.branchable.com/tuning/ * merge: Refuse to merge changes from a git-annex branch of a repo that has been tuned in incompatable ways. + * Support annex.tune.objecthash1 and annex.tune.objecthashlower. -- Joey Hess <id@joeyh.name> Tue, 13 Jan 2015 17:03:39 -0400 |