From 2d8a1ba354653fb01f9f33ec126ca53d64200acf Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 1 Oct 2013 20:34:06 -0400 Subject: Added SKEIN256 and SKEIN512 backends SHA3 is still waiting for final standardization. Although this is looking less likely given https://www.cdt.org/blogs/joseph-lorenzo-hall/2409-nist-sha-3 In the meantime, cryptohash implements skein, and it's used by some of the haskell ecosystem (for yesod sessions, IIRC), so this implementation is likely to continue working. Also, I've talked with the cryprohash author and he's a reasonable guy. It makes sense to have an alternate high security hash, in case some horrible attack is found against SHA2 tomorrow, or in case SHA3 comes out and worst fears are realized. I'd also like to support using skein for HMAC. But no hurry there and a new version of cryptohash has much nicer HMAC code, so I will probably wait until I can use that version. --- Backend/Hash.hs | 162 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Backend/SHA.hs | 148 --------------------------------------------------- 2 files changed, 162 insertions(+), 148 deletions(-) create mode 100644 Backend/Hash.hs delete mode 100644 Backend/SHA.hs (limited to 'Backend') diff --git a/Backend/Hash.hs b/Backend/Hash.hs new file mode 100644 index 000000000..309c0fe9f --- /dev/null +++ b/Backend/Hash.hs @@ -0,0 +1,162 @@ +{- git-annex hashing backends + - + - Copyright 2011-2013 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Backend.Hash (backends) where + +import Common.Annex +import qualified Annex +import Types.Backend +import Types.Key +import Types.KeySource +import Utility.Hash +import Utility.ExternalSHA + +import qualified Build.SysConfig as SysConfig +import qualified Data.ByteString.Lazy as L +import Data.Char + +data Hash = SHAHash HashSize | SkeinHash HashSize +type HashSize = Int + +{- Order is slightly significant; want SHA256 first, and more general + - sizes earlier. -} +hashes :: [Hash] +hashes = concat + [ map SHAHash [256, 1, 512, 224, 384] + , map SkeinHash [256, 512] + ] + +{- The SHA256E backend is the default, so genBackendE comes first. -} +backends :: [Backend] +backends = catMaybes $ map genBackendE hashes ++ map genBackend hashes + +genBackend :: Hash -> Maybe Backend +genBackend hash = Just Backend + { name = hashName hash + , getKey = keyValue hash + , fsckKey = Just $ checkKeyChecksum hash + , canUpgradeKey = Just needsUpgrade + } + +genBackendE :: Hash -> Maybe Backend +genBackendE hash = do + b <- genBackend hash + return $ b + { name = hashNameE hash + , getKey = keyValueE hash + } + +hashName :: Hash -> String +hashName (SHAHash size) = "SHA" ++ show size +hashName (SkeinHash size) = "SKEIN" ++ show size + +hashNameE :: Hash -> String +hashNameE hash = hashName hash ++ "E" + +{- A key is a hash of its contents. -} +keyValue :: Hash -> KeySource -> Annex (Maybe Key) +keyValue hash source = do + let file = contentLocation source + stat <- liftIO $ getFileStatus file + let filesize = fromIntegral $ fileSize stat + s <- hashFile hash file filesize + return $ Just $ stubKey + { keyName = s + , keyBackendName = hashName hash + , keySize = Just filesize + } + +{- Extension preserving keys. -} +keyValueE :: Hash -> KeySource -> Annex (Maybe Key) +keyValueE hash source = keyValue hash source >>= maybe (return Nothing) addE + where + addE k = return $ Just $ k + { keyName = keyName k ++ selectExtension (keyFilename source) + , keyBackendName = hashNameE hash + } + +selectExtension :: FilePath -> String +selectExtension f + | null es = "" + | otherwise = intercalate "." ("":es) + where + es = filter (not . null) $ reverse $ + take 2 $ takeWhile shortenough $ + reverse $ split "." $ filter validExtension $ takeExtensions f + shortenough e = length e <= 4 -- long enough for "jpeg" + +{- A key's checksum is checked during fsck. -} +checkKeyChecksum :: Hash -> Key -> FilePath -> Annex Bool +checkKeyChecksum hash key file = do + fast <- Annex.getState Annex.fast + mstat <- liftIO $ catchMaybeIO $ getFileStatus file + case (mstat, fast) of + (Just stat, False) -> do + let filesize = fromIntegral $ fileSize stat + check <$> hashFile hash file filesize + _ -> return True + where + expected = keyHash key + check s + | s == expected = True + {- A bug caused checksums to be prefixed with \ in some + - cases; still accept these as legal now that the bug has been + - fixed. -} + | '\\' : s == expected = True + | otherwise = False + +keyHash :: Key -> String +keyHash key = dropExtensions (keyName key) + +validExtension :: Char -> Bool +validExtension c + | isAlphaNum c = True + | c == '.' = True + | otherwise = False + +{- Upgrade keys that have the \ prefix on their sha due to a bug, or + - that contain non-alphanumeric characters in their extension. -} +needsUpgrade :: Key -> Bool +needsUpgrade key = "\\" `isPrefixOf` keyHash key || + any (not . validExtension) (takeExtensions $ keyName key) + +hashFile :: Hash -> FilePath -> Integer -> Annex String +hashFile hash file filesize = do + showAction "checksum" + liftIO $ go hash + where + go (SHAHash hashsize) = case shaCommand hashsize filesize of + Left sha -> sha <$> L.readFile file + Right command -> + either error return + =<< externalSHA command hashsize file + go (SkeinHash hashsize) = skeinHasher hashsize <$> L.readFile file + +skeinHasher :: HashSize -> (L.ByteString -> String) +skeinHasher hashsize + | hashsize == 256 = show . skein256 + | hashsize == 512 = show . skein512 + | otherwise = error $ "bad skein size " ++ show hashsize + +shaCommand :: HashSize -> Integer -> Either (L.ByteString -> String) String +shaCommand hashsize filesize + | hashsize == 1 = use SysConfig.sha1 sha1 + | hashsize == 256 = use SysConfig.sha256 sha256 + | hashsize == 224 = use SysConfig.sha224 sha224 + | hashsize == 384 = use SysConfig.sha384 sha384 + | hashsize == 512 = use SysConfig.sha512 sha512 + | otherwise = error $ "bad sha size " ++ show hashsize + where + use Nothing hasher = Left $ show . hasher + use (Just c) hasher + {- Use builtin, but slightly slower hashing for + - smallish files. Cryptohash benchmarks 90 to 101% + - faster than external hashers, depending on the hash + - and system. So there is no point forking an external + - process unless the file is large. -} + | filesize < 1048576 = use Nothing hasher + | otherwise = Right c diff --git a/Backend/SHA.hs b/Backend/SHA.hs deleted file mode 100644 index 42cde3f57..000000000 --- a/Backend/SHA.hs +++ /dev/null @@ -1,148 +0,0 @@ -{- git-annex SHA backends - - - - Copyright 2011-2013 Joey Hess - - - - Licensed under the GNU GPL version 3 or higher. - -} - -module Backend.SHA (backends) where - -import Common.Annex -import qualified Annex -import Types.Backend -import Types.Key -import Types.KeySource -import Utility.Hash -import Utility.ExternalSHA - -import qualified Build.SysConfig as SysConfig -import qualified Data.ByteString.Lazy as L -import Data.Char - -type SHASize = Int - -{- Order is slightly significant; want SHA256 first, and more general - - sizes earlier. -} -sizes :: [Int] -sizes = [256, 1, 512, 224, 384] - -{- The SHA256E backend is the default. -} -backends :: [Backend] -backends = catMaybes $ map genBackendE sizes ++ map genBackend sizes - -genBackend :: SHASize -> Maybe Backend -genBackend size = Just Backend - { name = shaName size - , getKey = keyValue size - , fsckKey = Just $ checkKeyChecksum size - , canUpgradeKey = Just needsUpgrade - } - -genBackendE :: SHASize -> Maybe Backend -genBackendE size = do - b <- genBackend size - return $ b - { name = shaNameE size - , getKey = keyValueE size - } - -shaName :: SHASize -> String -shaName size = "SHA" ++ show size - -shaNameE :: SHASize -> String -shaNameE size = shaName size ++ "E" - -shaN :: SHASize -> FilePath -> Integer -> Annex String -shaN shasize file filesize = do - showAction "checksum" - liftIO $ case shaCommand shasize filesize of - Left sha -> sha <$> L.readFile file - Right command -> - either error return - =<< externalSHA command shasize file - -shaCommand :: SHASize -> Integer -> Either (L.ByteString -> String) String -shaCommand shasize filesize - | shasize == 1 = use SysConfig.sha1 sha1 - | shasize == 256 = use SysConfig.sha256 sha256 - | shasize == 224 = use SysConfig.sha224 sha224 - | shasize == 384 = use SysConfig.sha384 sha384 - | shasize == 512 = use SysConfig.sha512 sha512 - | otherwise = error $ "bad sha size " ++ show shasize - where - use Nothing hasher = Left $ show . hasher - use (Just c) hasher - {- Use builtin, but slightly slower hashing for - - smallish files. Cryptohash benchmarks 90 to 101% - - faster than external hashers, depending on the hash - - and system. So there is no point forking an external - - process unless the file is large. -} - | filesize < 1048576 = use Nothing hasher - | otherwise = Right c - -{- A key is a checksum of its contents. -} -keyValue :: SHASize -> KeySource -> Annex (Maybe Key) -keyValue shasize source = do - let file = contentLocation source - stat <- liftIO $ getFileStatus file - let filesize = fromIntegral $ fileSize stat - s <- shaN shasize file filesize - return $ Just $ stubKey - { keyName = s - , keyBackendName = shaName shasize - , keySize = Just filesize - } - -{- Extension preserving keys. -} -keyValueE :: SHASize -> KeySource -> Annex (Maybe Key) -keyValueE size source = keyValue size source >>= maybe (return Nothing) addE - where - addE k = return $ Just $ k - { keyName = keyName k ++ selectExtension (keyFilename source) - , keyBackendName = shaNameE size - } - -selectExtension :: FilePath -> String -selectExtension f - | null es = "" - | otherwise = intercalate "." ("":es) - where - es = filter (not . null) $ reverse $ - take 2 $ takeWhile shortenough $ - reverse $ split "." $ filter validExtension $ takeExtensions f - shortenough e = length e <= 4 -- long enough for "jpeg" - -{- A key's checksum is checked during fsck. -} -checkKeyChecksum :: SHASize -> Key -> FilePath -> Annex Bool -checkKeyChecksum size key file = do - fast <- Annex.getState Annex.fast - mstat <- liftIO $ catchMaybeIO $ getFileStatus file - case (mstat, fast) of - (Just stat, False) -> do - let filesize = fromIntegral $ fileSize stat - check <$> shaN size file filesize - _ -> return True - where - sha = keySha key - check s - | s == sha = True - {- A bug caused checksums to be prefixed with \ in some - - cases; still accept these as legal now that the bug has been - - fixed. -} - | '\\' : s == sha = True - | otherwise = False - -keySha :: Key -> String -keySha key = dropExtensions (keyName key) - -validExtension :: Char -> Bool -validExtension c - | isAlphaNum c = True - | c == '.' = True - | otherwise = False - -{- Upgrade keys that have the \ prefix on their sha due to a bug, or - - that contain non-alphanumeric characters in their extension. -} -needsUpgrade :: Key -> Bool -needsUpgrade key = "\\" `isPrefixOf` keySha key || - any (not . validExtension) (takeExtensions $ keyName key) -- cgit v1.2.3