From 1da0165678cb990160edcf376c4e6f08cccf8bf4 Mon Sep 17 00:00:00 2001 From: Benjamin Barenblat Date: Wed, 24 Jan 2018 19:49:51 -0500 Subject: Data.Digest.Sha2: Improve memory management Rework the SHA-2 implementation to use the low-level sha.h interface rather than the higher-level evp.h. This allows us to preallocate all the data structures, eliminating BoringSSL cleanup functions. As a result, we can implement hashing under `unsafeLocalState` (a.k.a. `unsafeDupablePerformIO`) instead of `unsafePerformIO`, which should improve performance in multithreaded programs. --- src/Data/Digest/Internal.hs | 76 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) create mode 100644 src/Data/Digest/Internal.hs (limited to 'src/Data/Digest/Internal.hs') diff --git a/src/Data/Digest/Internal.hs b/src/Data/Digest/Internal.hs new file mode 100644 index 0000000..8e723ee --- /dev/null +++ b/src/Data/Digest/Internal.hs @@ -0,0 +1,76 @@ +{-# LANGUAGE ExistentialQuantification #-} +{-# LANGUAGE NamedFieldPuns #-} +{-# LANGUAGE Rank2Types #-} + +module Data.Digest.Internal where + +import Control.Exception (assert) +import Data.Bits (Bits((.&.)), shiftR) +import Data.ByteString (ByteString) +import qualified Data.ByteString as ByteString +import qualified Data.ByteString.Unsafe as ByteString +import Data.Char (intToDigit) +import Data.Word (Word8) +import Foreign (Ptr, Storable, alloca, allocaArray, throwIf_) +import Foreign.C.Types +import Foreign.Marshal.Unsafe (unsafeLocalState) +import Unsafe.Coerce (unsafeCoerce) + +-- | A hash algorithm which follows the standard initialize-update-finalize +-- pattern. +data Algo = forall ctx. Storable ctx => Algo + { mdLen :: CSize -- ^ The length of the digest. + , mdInit :: Ptr ctx -> IO CInt -- ^ Initializes the context. Must return 1. + -- | Adds the buffer to the context. Must not modify the buffer. Must return + -- 1. + , mdUpdate :: forall a. Ptr ctx -> Ptr a -> CSize -> IO CInt + -- | Adds final padding to the context and writes the digest to the buffer. + , mdFinal :: Ptr CUChar -> Ptr ctx -> IO CInt + } + +-- The type signatures in 'Algo' are suggestive of the functions exposed by the +-- BoringSSL API. Those functions fall into two broad categories--those which +-- always return 1 and those which return 1 only on success. + +alwaysSucceeds :: IO CInt -> IO () +alwaysSucceeds f = do + r <- f + assert (r == 1) (return ()) + +requireSuccess :: IO CInt -> IO () +requireSuccess f = throwIf_ (/= 1) (const "BoringSSL failure") f + +-- | The result of a hash operation. +newtype Digest = + Digest ByteString + deriving (Eq, Ord) + +instance Show Digest where + show (Digest d) = ByteString.foldr showHexPadded [] d + where + showHexPadded b xs = + hexit (b `shiftR` 4 .&. 0x0f) : hexit (b .&. 0x0f) : xs + hexit = intToDigit . fromIntegral :: Word8 -> Char + +-- | Hashes according to the given 'Algo'. +hash :: Algo -> ByteString -> Digest +hash (Algo {mdLen, mdInit, mdUpdate, mdFinal}) bytes = + let mdLen' = fromIntegral mdLen :: Int + in unsafeLocalState $ + alloca $ \ctx -> do + alwaysSucceeds $ mdInit ctx + -- 'mdUpdate' treats its @buf@ argument as @const@, so the sharing + -- inherent in 'ByteString.unsafeUseAsCStringLen' is fine. + ByteString.unsafeUseAsCStringLen bytes $ \(buf, len) -> + alwaysSucceeds $ mdUpdate ctx buf (fromIntegral len) + d <- + allocaArray mdLen' $ \mdOut -> do + requireSuccess $ mdFinal mdOut ctx + -- 'mdOut' is a 'Ptr CUChar'. However, to make life more interesting, + -- 'CString' is a 'Ptr CChar', and 'CChar' is signed. This is + -- especially unfortunate given that all we really want to do is + -- convert to a 'ByteString', which is unsigned. To work around it, + -- we're going to cheat and let Haskell reinterpret-cast 'mdOut' to + -- 'Ptr CChar' before it does its 'ByteString' ingestion. + ByteString.packCStringLen (unsafeCoerce mdOut, mdLen') + return (Digest d) -- cgit v1.2.3