diff options
-rw-r--r-- | Backend.hs | 4 | ||||
-rw-r--r-- | Backend/Hash.hs (renamed from Backend/SHA.hs) | 146 | ||||
-rw-r--r-- | Utility/Hash.hs | 4 | ||||
-rw-r--r-- | debian/changelog | 1 | ||||
-rw-r--r-- | doc/backends.mdwn | 2 |
5 files changed, 89 insertions, 68 deletions
diff --git a/Backend.hs b/Backend.hs index 2ee14acc6..38314687a 100644 --- a/Backend.hs +++ b/Backend.hs @@ -27,12 +27,12 @@ import qualified Types.Backend as B import Config -- When adding a new backend, import it here and add it to the list. -import qualified Backend.SHA +import qualified Backend.Hash import qualified Backend.WORM import qualified Backend.URL list :: [Backend] -list = Backend.SHA.backends ++ Backend.WORM.backends ++ Backend.URL.backends +list = Backend.Hash.backends ++ Backend.WORM.backends ++ Backend.URL.backends {- List of backends in the order to try them when storing a new key. -} orderedList :: Annex [Backend] diff --git a/Backend/SHA.hs b/Backend/Hash.hs index 42cde3f57..309c0fe9f 100644 --- a/Backend/SHA.hs +++ b/Backend/Hash.hs @@ -1,11 +1,11 @@ -{- git-annex SHA backends +{- git-annex hashing backends - - Copyright 2011-2013 Joey Hess <joey@kitenet.net> - - Licensed under the GNU GPL version 3 or higher. -} -module Backend.SHA (backends) where +module Backend.Hash (backends) where import Common.Annex import qualified Annex @@ -19,87 +19,64 @@ import qualified Build.SysConfig as SysConfig import qualified Data.ByteString.Lazy as L import Data.Char -type SHASize = Int +data Hash = SHAHash HashSize | SkeinHash HashSize +type HashSize = Int {- Order is slightly significant; want SHA256 first, and more general - sizes earlier. -} -sizes :: [Int] -sizes = [256, 1, 512, 224, 384] +hashes :: [Hash] +hashes = concat + [ map SHAHash [256, 1, 512, 224, 384] + , map SkeinHash [256, 512] + ] -{- The SHA256E backend is the default. -} +{- The SHA256E backend is the default, so genBackendE comes first. -} backends :: [Backend] -backends = catMaybes $ map genBackendE sizes ++ map genBackend sizes +backends = catMaybes $ map genBackendE hashes ++ map genBackend hashes -genBackend :: SHASize -> Maybe Backend -genBackend size = Just Backend - { name = shaName size - , getKey = keyValue size - , fsckKey = Just $ checkKeyChecksum size +genBackend :: Hash -> Maybe Backend +genBackend hash = Just Backend + { name = hashName hash + , getKey = keyValue hash + , fsckKey = Just $ checkKeyChecksum hash , canUpgradeKey = Just needsUpgrade } -genBackendE :: SHASize -> Maybe Backend -genBackendE size = do - b <- genBackend size +genBackendE :: Hash -> Maybe Backend +genBackendE hash = do + b <- genBackend hash return $ b - { name = shaNameE size - , getKey = keyValueE size + { name = hashNameE hash + , getKey = keyValueE hash } -shaName :: SHASize -> String -shaName size = "SHA" ++ show size +hashName :: Hash -> String +hashName (SHAHash size) = "SHA" ++ show size +hashName (SkeinHash size) = "SKEIN" ++ show size -shaNameE :: SHASize -> String -shaNameE size = shaName size ++ "E" +hashNameE :: Hash -> String +hashNameE hash = hashName hash ++ "E" -shaN :: SHASize -> FilePath -> Integer -> Annex String -shaN shasize file filesize = do - showAction "checksum" - liftIO $ case shaCommand shasize filesize of - Left sha -> sha <$> L.readFile file - Right command -> - either error return - =<< externalSHA command shasize file - -shaCommand :: SHASize -> Integer -> Either (L.ByteString -> String) String -shaCommand shasize filesize - | shasize == 1 = use SysConfig.sha1 sha1 - | shasize == 256 = use SysConfig.sha256 sha256 - | shasize == 224 = use SysConfig.sha224 sha224 - | shasize == 384 = use SysConfig.sha384 sha384 - | shasize == 512 = use SysConfig.sha512 sha512 - | otherwise = error $ "bad sha size " ++ show shasize - where - use Nothing hasher = Left $ show . hasher - use (Just c) hasher - {- Use builtin, but slightly slower hashing for - - smallish files. Cryptohash benchmarks 90 to 101% - - faster than external hashers, depending on the hash - - and system. So there is no point forking an external - - process unless the file is large. -} - | filesize < 1048576 = use Nothing hasher - | otherwise = Right c - -{- A key is a checksum of its contents. -} -keyValue :: SHASize -> KeySource -> Annex (Maybe Key) -keyValue shasize source = do +{- A key is a hash of its contents. -} +keyValue :: Hash -> KeySource -> Annex (Maybe Key) +keyValue hash source = do let file = contentLocation source stat <- liftIO $ getFileStatus file let filesize = fromIntegral $ fileSize stat - s <- shaN shasize file filesize + s <- hashFile hash file filesize return $ Just $ stubKey { keyName = s - , keyBackendName = shaName shasize + , keyBackendName = hashName hash , keySize = Just filesize } {- Extension preserving keys. -} -keyValueE :: SHASize -> KeySource -> Annex (Maybe Key) -keyValueE size source = keyValue size source >>= maybe (return Nothing) addE +keyValueE :: Hash -> KeySource -> Annex (Maybe Key) +keyValueE hash source = keyValue hash source >>= maybe (return Nothing) addE where addE k = return $ Just $ k { keyName = keyName k ++ selectExtension (keyFilename source) - , keyBackendName = shaNameE size + , keyBackendName = hashNameE hash } selectExtension :: FilePath -> String @@ -113,27 +90,27 @@ selectExtension f shortenough e = length e <= 4 -- long enough for "jpeg" {- A key's checksum is checked during fsck. -} -checkKeyChecksum :: SHASize -> Key -> FilePath -> Annex Bool -checkKeyChecksum size key file = do +checkKeyChecksum :: Hash -> Key -> FilePath -> Annex Bool +checkKeyChecksum hash key file = do fast <- Annex.getState Annex.fast mstat <- liftIO $ catchMaybeIO $ getFileStatus file case (mstat, fast) of (Just stat, False) -> do let filesize = fromIntegral $ fileSize stat - check <$> shaN size file filesize + check <$> hashFile hash file filesize _ -> return True where - sha = keySha key + expected = keyHash key check s - | s == sha = True + | s == expected = True {- A bug caused checksums to be prefixed with \ in some - cases; still accept these as legal now that the bug has been - fixed. -} - | '\\' : s == sha = True + | '\\' : s == expected = True | otherwise = False -keySha :: Key -> String -keySha key = dropExtensions (keyName key) +keyHash :: Key -> String +keyHash key = dropExtensions (keyName key) validExtension :: Char -> Bool validExtension c @@ -144,5 +121,42 @@ validExtension c {- Upgrade keys that have the \ prefix on their sha due to a bug, or - that contain non-alphanumeric characters in their extension. -} needsUpgrade :: Key -> Bool -needsUpgrade key = "\\" `isPrefixOf` keySha key || +needsUpgrade key = "\\" `isPrefixOf` keyHash key || any (not . validExtension) (takeExtensions $ keyName key) + +hashFile :: Hash -> FilePath -> Integer -> Annex String +hashFile hash file filesize = do + showAction "checksum" + liftIO $ go hash + where + go (SHAHash hashsize) = case shaCommand hashsize filesize of + Left sha -> sha <$> L.readFile file + Right command -> + either error return + =<< externalSHA command hashsize file + go (SkeinHash hashsize) = skeinHasher hashsize <$> L.readFile file + +skeinHasher :: HashSize -> (L.ByteString -> String) +skeinHasher hashsize + | hashsize == 256 = show . skein256 + | hashsize == 512 = show . skein512 + | otherwise = error $ "bad skein size " ++ show hashsize + +shaCommand :: HashSize -> Integer -> Either (L.ByteString -> String) String +shaCommand hashsize filesize + | hashsize == 1 = use SysConfig.sha1 sha1 + | hashsize == 256 = use SysConfig.sha256 sha256 + | hashsize == 224 = use SysConfig.sha224 sha224 + | hashsize == 384 = use SysConfig.sha384 sha384 + | hashsize == 512 = use SysConfig.sha512 sha512 + | otherwise = error $ "bad sha size " ++ show hashsize + where + use Nothing hasher = Left $ show . hasher + use (Just c) hasher + {- Use builtin, but slightly slower hashing for + - smallish files. Cryptohash benchmarks 90 to 101% + - faster than external hashers, depending on the hash + - and system. So there is no point forking an external + - process unless the file is large. -} + | filesize < 1048576 = use Nothing hasher + | otherwise = Right c diff --git a/Utility/Hash.hs b/Utility/Hash.hs index 31a36462c..e90f50623 100644 --- a/Utility/Hash.hs +++ b/Utility/Hash.hs @@ -26,4 +26,8 @@ sha512 = hashlazy --sha3 :: L.ByteString -> Digest SHA3 --sha3 = hashlazy +skein256 :: L.ByteString -> Digest Skein256_256 +skein256 = hashlazy +skein512 :: L.ByteString -> Digest Skein512_512 +skein512 = hashlazy diff --git a/debian/changelog b/debian/changelog index 70d2b8d0a..0fddf8f97 100644 --- a/debian/changelog +++ b/debian/changelog @@ -12,6 +12,7 @@ git-annex (4.20130921) UNRELEASED; urgency=low * Use cryptohash rather than SHA for hashing when no external hash program is available. This is a significant speedup for SHA256 on OSX, for example. + * Added SKEIN256 and SKEIN512 backends. * Android build redone from scratch, many dependencies updated, and entire build can now be done using provided scripts. * assistant: Clear the list of failed transfers when doing a full transfer diff --git a/doc/backends.mdwn b/doc/backends.mdwn index 9abe6eac0..79bacd68e 100644 --- a/doc/backends.mdwn +++ b/doc/backends.mdwn @@ -21,6 +21,8 @@ can use different ones for different files. but are not concerned about security. * `SHA384`, `SHA384E`, `SHA224`, `SHA224E` -- Hashes for people who like unusual sizes. +* `SKEIN512`, `SKEIN256` -- [Skein hash](http://en.wikipedia.org/wiki/Skein_hash), + a well-regarded SHA3 hash competition finalist. The `annex.backends` git-config setting can be used to list the backends git-annex should use. The first one listed will be used by default when |