diff options
Diffstat (limited to 'Backend/Hash.hs')
-rw-r--r-- | Backend/Hash.hs | 162 |
1 files changed, 162 insertions, 0 deletions
diff --git a/Backend/Hash.hs b/Backend/Hash.hs new file mode 100644 index 000000000..309c0fe9f --- /dev/null +++ b/Backend/Hash.hs @@ -0,0 +1,162 @@ +{- git-annex hashing backends + - + - Copyright 2011-2013 Joey Hess <joey@kitenet.net> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Backend.Hash (backends) where + +import Common.Annex +import qualified Annex +import Types.Backend +import Types.Key +import Types.KeySource +import Utility.Hash +import Utility.ExternalSHA + +import qualified Build.SysConfig as SysConfig +import qualified Data.ByteString.Lazy as L +import Data.Char + +data Hash = SHAHash HashSize | SkeinHash HashSize +type HashSize = Int + +{- Order is slightly significant; want SHA256 first, and more general + - sizes earlier. -} +hashes :: [Hash] +hashes = concat + [ map SHAHash [256, 1, 512, 224, 384] + , map SkeinHash [256, 512] + ] + +{- The SHA256E backend is the default, so genBackendE comes first. -} +backends :: [Backend] +backends = catMaybes $ map genBackendE hashes ++ map genBackend hashes + +genBackend :: Hash -> Maybe Backend +genBackend hash = Just Backend + { name = hashName hash + , getKey = keyValue hash + , fsckKey = Just $ checkKeyChecksum hash + , canUpgradeKey = Just needsUpgrade + } + +genBackendE :: Hash -> Maybe Backend +genBackendE hash = do + b <- genBackend hash + return $ b + { name = hashNameE hash + , getKey = keyValueE hash + } + +hashName :: Hash -> String +hashName (SHAHash size) = "SHA" ++ show size +hashName (SkeinHash size) = "SKEIN" ++ show size + +hashNameE :: Hash -> String +hashNameE hash = hashName hash ++ "E" + +{- A key is a hash of its contents. -} +keyValue :: Hash -> KeySource -> Annex (Maybe Key) +keyValue hash source = do + let file = contentLocation source + stat <- liftIO $ getFileStatus file + let filesize = fromIntegral $ fileSize stat + s <- hashFile hash file filesize + return $ Just $ stubKey + { keyName = s + , keyBackendName = hashName hash + , keySize = Just filesize + } + +{- Extension preserving keys. -} +keyValueE :: Hash -> KeySource -> Annex (Maybe Key) +keyValueE hash source = keyValue hash source >>= maybe (return Nothing) addE + where + addE k = return $ Just $ k + { keyName = keyName k ++ selectExtension (keyFilename source) + , keyBackendName = hashNameE hash + } + +selectExtension :: FilePath -> String +selectExtension f + | null es = "" + | otherwise = intercalate "." ("":es) + where + es = filter (not . null) $ reverse $ + take 2 $ takeWhile shortenough $ + reverse $ split "." $ filter validExtension $ takeExtensions f + shortenough e = length e <= 4 -- long enough for "jpeg" + +{- A key's checksum is checked during fsck. -} +checkKeyChecksum :: Hash -> Key -> FilePath -> Annex Bool +checkKeyChecksum hash key file = do + fast <- Annex.getState Annex.fast + mstat <- liftIO $ catchMaybeIO $ getFileStatus file + case (mstat, fast) of + (Just stat, False) -> do + let filesize = fromIntegral $ fileSize stat + check <$> hashFile hash file filesize + _ -> return True + where + expected = keyHash key + check s + | s == expected = True + {- A bug caused checksums to be prefixed with \ in some + - cases; still accept these as legal now that the bug has been + - fixed. -} + | '\\' : s == expected = True + | otherwise = False + +keyHash :: Key -> String +keyHash key = dropExtensions (keyName key) + +validExtension :: Char -> Bool +validExtension c + | isAlphaNum c = True + | c == '.' = True + | otherwise = False + +{- Upgrade keys that have the \ prefix on their sha due to a bug, or + - that contain non-alphanumeric characters in their extension. -} +needsUpgrade :: Key -> Bool +needsUpgrade key = "\\" `isPrefixOf` keyHash key || + any (not . validExtension) (takeExtensions $ keyName key) + +hashFile :: Hash -> FilePath -> Integer -> Annex String +hashFile hash file filesize = do + showAction "checksum" + liftIO $ go hash + where + go (SHAHash hashsize) = case shaCommand hashsize filesize of + Left sha -> sha <$> L.readFile file + Right command -> + either error return + =<< externalSHA command hashsize file + go (SkeinHash hashsize) = skeinHasher hashsize <$> L.readFile file + +skeinHasher :: HashSize -> (L.ByteString -> String) +skeinHasher hashsize + | hashsize == 256 = show . skein256 + | hashsize == 512 = show . skein512 + | otherwise = error $ "bad skein size " ++ show hashsize + +shaCommand :: HashSize -> Integer -> Either (L.ByteString -> String) String +shaCommand hashsize filesize + | hashsize == 1 = use SysConfig.sha1 sha1 + | hashsize == 256 = use SysConfig.sha256 sha256 + | hashsize == 224 = use SysConfig.sha224 sha224 + | hashsize == 384 = use SysConfig.sha384 sha384 + | hashsize == 512 = use SysConfig.sha512 sha512 + | otherwise = error $ "bad sha size " ++ show hashsize + where + use Nothing hasher = Left $ show . hasher + use (Just c) hasher + {- Use builtin, but slightly slower hashing for + - smallish files. Cryptohash benchmarks 90 to 101% + - faster than external hashers, depending on the hash + - and system. So there is no point forking an external + - process unless the file is large. -} + | filesize < 1048576 = use Nothing hasher + | otherwise = Right c |