From 1da0165678cb990160edcf376c4e6f08cccf8bf4 Mon Sep 17 00:00:00 2001 From: Benjamin Barenblat Date: Wed, 24 Jan 2018 19:49:51 -0500 Subject: Data.Digest.Sha2: Improve memory management Rework the SHA-2 implementation to use the low-level sha.h interface rather than the higher-level evp.h. This allows us to preallocate all the data structures, eliminating BoringSSL cleanup functions. As a result, we can implement hashing under `unsafeLocalState` (a.k.a. `unsafeDupablePerformIO`) instead of `unsafePerformIO`, which should improve performance in multithreaded programs. --- src/Data/Digest.hs | 2 +- src/Data/Digest/Evp.hsc | 135 -------------------------------------------- src/Data/Digest/Internal.hs | 76 +++++++++++++++++++++++++ src/Data/Digest/Sha2.hs | 30 ---------- src/Data/Digest/Sha2.hsc | 109 +++++++++++++++++++++++++++++++++++ 5 files changed, 186 insertions(+), 166 deletions(-) delete mode 100644 src/Data/Digest/Evp.hsc create mode 100644 src/Data/Digest/Internal.hs delete mode 100644 src/Data/Digest/Sha2.hs create mode 100644 src/Data/Digest/Sha2.hsc (limited to 'src') diff --git a/src/Data/Digest.hs b/src/Data/Digest.hs index 717cd0d..6eebd27 100644 --- a/src/Data/Digest.hs +++ b/src/Data/Digest.hs @@ -2,4 +2,4 @@ module Data.Digest ( Digest ) where -import Data.Digest.Evp +import Data.Digest.Internal (Digest) \ No newline at end of file diff --git a/src/Data/Digest/Evp.hsc b/src/Data/Digest/Evp.hsc deleted file mode 100644 index 3bba247..0000000 --- a/src/Data/Digest/Evp.hsc +++ /dev/null @@ -1,135 +0,0 @@ -{-# LANGUAGE CApiFFI #-} -{-# OPTIONS_GHC -Wno-missing-methods #-} - -module Data.Digest.Evp - ( Algo - , Digest(Digest) - , hash - ) where - -import Control.Exception (bracket_) -import Control.Monad (void) -import Data.Bits (Bits((.&.)), shiftR) -import Data.ByteString (ByteString) -import qualified Data.ByteString as ByteString -import qualified Data.ByteString.Unsafe as ByteString -import Data.Char (intToDigit) -import Data.Word (Word8) -import Foreign - (Ptr, Storable(alignment, peek, sizeOf), alloca, allocaArray, - nullPtr, throwIf_) -import Foreign.C.Types -import System.IO.Unsafe (unsafePerformIO) -import Unsafe.Coerce (unsafeCoerce) - -#include - --- First, we build basic bindings to the BoringSSL EVP interface. - --- | The BoringSSL @ENGINE@ type. -data Engine - --- | The BoringSSL @EVP_MD@ type, representing a hash algorithm. -data EvpMd - --- | A convenience alias for @Ptr EvpMd@. -type Algo = Ptr EvpMd - --- | The BoringSSL @EVP_MD_CTX@ type, representing the state of a pending --- hashing operation. -data EvpMdCtx - -instance Storable EvpMdCtx where - sizeOf _ = #size EVP_MD_CTX - alignment _ = #alignment EVP_MD_CTX - --- Imported functions from BoringSSL. See --- https://commondatastorage.googleapis.com/chromium-boringssl-docs/digest.h.html --- for documentation. - -foreign import ccall "openssl/digest.h EVP_MD_CTX_init" - evpMdCtxInit :: Ptr EvpMdCtx -> IO () - -foreign import ccall "openssl/digest.h EVP_MD_CTX_cleanup" - evpMdCtxCleanup' :: Ptr EvpMdCtx -> IO CInt - -foreign import ccall "openssl/digest.h EVP_DigestInit_ex" - evpDigestInitEx' :: Ptr EvpMdCtx -> Ptr EvpMd -> Ptr Engine -> IO CInt - -foreign import capi "openssl/digest.h value EVP_MAX_MD_SIZE" - evpMaxMdSize :: CSize - -foreign import ccall "openssl/digest.h EVP_DigestUpdate" - evpDigestUpdate' :: Ptr EvpMdCtx -> Ptr a -> CSize -> IO CInt - -foreign import ccall "openssl/digest.h EVP_DigestFinal_ex" - evpDigestFinalEx' :: Ptr EvpMdCtx -> Ptr CUChar -> Ptr CUInt -> IO CInt - --- Some of these functions return 'CInt' even though they can never fail. Wrap --- them to prevent warnings. - -evpMdCtxCleanup :: Ptr EvpMdCtx -> IO () -evpMdCtxCleanup = void . evpMdCtxCleanup' - -evpDigestUpdate :: Ptr EvpMdCtx -> Ptr a -> CSize -> IO () -evpDigestUpdate ctx md bytes = void $ evpDigestUpdate' ctx md bytes - -evpDigestFinalEx :: Ptr EvpMdCtx -> Ptr CUChar -> Ptr CUInt -> IO () -evpDigestFinalEx ctx mdOut outSize = void $ evpDigestFinalEx' ctx mdOut outSize - --- Convert functions that can in fact fail to throw exceptions instead. - -evpDigestInitEx :: Ptr EvpMdCtx -> Ptr EvpMd -> Ptr Engine -> IO () -evpDigestInitEx ctx md engine = - throwIf_ (/= 1) (const "BoringSSL failure") $ evpDigestInitEx' ctx md engine - --- Now we can build a memory-safe abstraction layer. - --- | Memory-safe wrapper for 'EvpMdCtx'. -withMdCtx :: (Ptr EvpMdCtx -> IO a) -> IO a -withMdCtx f = - alloca $ \ctx -> bracket_ (evpMdCtxInit ctx) (evpMdCtxCleanup ctx) (f ctx) - --- Finally, we're ready to actually implement the hashing interface. - --- | The result of a hash operation. -newtype Digest = - Digest ByteString - deriving (Eq, Ord) - -instance Show Digest where - show (Digest d) = ByteString.foldr showHexPadded [] d - where - showHexPadded b xs = - hexit (b `shiftR` 4 .&. 0x0f) : hexit (b .&. 0x0f) : xs - hexit = intToDigit . fromIntegral :: Word8 -> Char - -hash :: Algo -> ByteString -> Digest -hash md bytes = - -- We'd like to use 'unsafeLocalState' (i.e., 'unsafeDupablePerformIO') here, - -- but 'unsafeDupablePerformIO' runs computation in a context where it can be - -- arbitrarily terminated--i.e., where the cleanup in 'withMdCtx' is not - -- guaranteed to run. See - -- https://hackage.haskell.org/package/base/docs/System-IO-Unsafe.html#v:unsafeDupablePerformIO. - unsafePerformIO $ - withMdCtx $ \ctx -> do - evpDigestInitEx ctx md noEngine - -- evpDigestUpdate treats its @buf@ argument as @const@, so the sharing - -- inherent in 'ByteString.unsafeUseAsCStringLen' is fine. - ByteString.unsafeUseAsCStringLen bytes $ \(buf, len) -> - evpDigestUpdate ctx buf (fromIntegral len) - d <- - allocaArray (fromIntegral evpMaxMdSize) $ \mdOut -> - alloca $ \pOutSize -> do - evpDigestFinalEx ctx mdOut pOutSize - outSize <- fromIntegral <$> peek pOutSize - -- 'mdOut' is a 'Ptr CUChar'. However, to make life more interesting, - -- 'CString' is a 'Ptr CChar', and 'CChar' is signed. This is - -- especially unfortunate given that all we really want to do is - -- convert to a 'ByteString', which is unsigned. To work around it, - -- we're going to cheat and let Haskell reinterpret-cast 'mdOut' to - -- 'Ptr CChar' before it does its 'ByteString' ingestion. - ByteString.packCStringLen (unsafeCoerce mdOut, outSize) - return (Digest d) - where - noEngine = nullPtr :: Ptr Engine diff --git a/src/Data/Digest/Internal.hs b/src/Data/Digest/Internal.hs new file mode 100644 index 0000000..8e723ee --- /dev/null +++ b/src/Data/Digest/Internal.hs @@ -0,0 +1,76 @@ +{-# LANGUAGE ExistentialQuantification #-} +{-# LANGUAGE NamedFieldPuns #-} +{-# LANGUAGE Rank2Types #-} + +module Data.Digest.Internal where + +import Control.Exception (assert) +import Data.Bits (Bits((.&.)), shiftR) +import Data.ByteString (ByteString) +import qualified Data.ByteString as ByteString +import qualified Data.ByteString.Unsafe as ByteString +import Data.Char (intToDigit) +import Data.Word (Word8) +import Foreign (Ptr, Storable, alloca, allocaArray, throwIf_) +import Foreign.C.Types +import Foreign.Marshal.Unsafe (unsafeLocalState) +import Unsafe.Coerce (unsafeCoerce) + +-- | A hash algorithm which follows the standard initialize-update-finalize +-- pattern. +data Algo = forall ctx. Storable ctx => Algo + { mdLen :: CSize -- ^ The length of the digest. + , mdInit :: Ptr ctx -> IO CInt -- ^ Initializes the context. Must return 1. + -- | Adds the buffer to the context. Must not modify the buffer. Must return + -- 1. + , mdUpdate :: forall a. Ptr ctx -> Ptr a -> CSize -> IO CInt + -- | Adds final padding to the context and writes the digest to the buffer. + , mdFinal :: Ptr CUChar -> Ptr ctx -> IO CInt + } + +-- The type signatures in 'Algo' are suggestive of the functions exposed by the +-- BoringSSL API. Those functions fall into two broad categories--those which +-- always return 1 and those which return 1 only on success. + +alwaysSucceeds :: IO CInt -> IO () +alwaysSucceeds f = do + r <- f + assert (r == 1) (return ()) + +requireSuccess :: IO CInt -> IO () +requireSuccess f = throwIf_ (/= 1) (const "BoringSSL failure") f + +-- | The result of a hash operation. +newtype Digest = + Digest ByteString + deriving (Eq, Ord) + +instance Show Digest where + show (Digest d) = ByteString.foldr showHexPadded [] d + where + showHexPadded b xs = + hexit (b `shiftR` 4 .&. 0x0f) : hexit (b .&. 0x0f) : xs + hexit = intToDigit . fromIntegral :: Word8 -> Char + +-- | Hashes according to the given 'Algo'. +hash :: Algo -> ByteString -> Digest +hash (Algo {mdLen, mdInit, mdUpdate, mdFinal}) bytes = + let mdLen' = fromIntegral mdLen :: Int + in unsafeLocalState $ + alloca $ \ctx -> do + alwaysSucceeds $ mdInit ctx + -- 'mdUpdate' treats its @buf@ argument as @const@, so the sharing + -- inherent in 'ByteString.unsafeUseAsCStringLen' is fine. + ByteString.unsafeUseAsCStringLen bytes $ \(buf, len) -> + alwaysSucceeds $ mdUpdate ctx buf (fromIntegral len) + d <- + allocaArray mdLen' $ \mdOut -> do + requireSuccess $ mdFinal mdOut ctx + -- 'mdOut' is a 'Ptr CUChar'. However, to make life more interesting, + -- 'CString' is a 'Ptr CChar', and 'CChar' is signed. This is + -- especially unfortunate given that all we really want to do is + -- convert to a 'ByteString', which is unsigned. To work around it, + -- we're going to cheat and let Haskell reinterpret-cast 'mdOut' to + -- 'Ptr CChar' before it does its 'ByteString' ingestion. + ByteString.packCStringLen (unsafeCoerce mdOut, mdLen') + return (Digest d) diff --git a/src/Data/Digest/Sha2.hs b/src/Data/Digest/Sha2.hs deleted file mode 100644 index 0aa814e..0000000 --- a/src/Data/Digest/Sha2.hs +++ /dev/null @@ -1,30 +0,0 @@ -module Data.Digest.Sha2 - ( sha224 - , sha256 - , sha384 - , sha512 - ) where - -import Data.ByteString (ByteString) - -import qualified Data.Digest.Evp as Evp - -foreign import ccall "openssl/digest.h EVP_sha224" evpSha224 :: Evp.Algo - -foreign import ccall "openssl/digest.h EVP_sha256" evpSha256 :: Evp.Algo - -foreign import ccall "openssl/digest.h EVP_sha384" evpSha384 :: Evp.Algo - -foreign import ccall "openssl/digest.h EVP_sha512" evpSha512 :: Evp.Algo - -sha224 :: ByteString -> Evp.Digest -sha224 = Evp.hash evpSha224 - -sha256 :: ByteString -> Evp.Digest -sha256 = Evp.hash evpSha256 - -sha384 :: ByteString -> Evp.Digest -sha384 = Evp.hash evpSha384 - -sha512 :: ByteString -> Evp.Digest -sha512 = Evp.hash evpSha512 diff --git a/src/Data/Digest/Sha2.hsc b/src/Data/Digest/Sha2.hsc new file mode 100644 index 0000000..f587863 --- /dev/null +++ b/src/Data/Digest/Sha2.hsc @@ -0,0 +1,109 @@ +{-# LANGUAGE CApiFFI #-} +{-# OPTIONS_GHC -Wno-missing-methods #-} + +module Data.Digest.Sha2 + ( sha224 + , sha256 + , sha384 + , sha512 + ) where + +import Data.ByteString (ByteString) +import Foreign (Ptr, Storable(alignment, sizeOf)) +import Foreign.C.Types + +import Data.Digest.Internal + +#include + +-- SHA-224 + +foreign import capi "openssl/sha.h value SHA224_DIGEST_LENGTH" + sha224DigestLength :: CSize + +foreign import ccall "openssl/sha.h SHA224_Init" + sha224Init :: Ptr Sha256Ctx -> IO CInt + +foreign import ccall "openssl/sha.h SHA224_Update" + sha224Update :: Ptr Sha256Ctx -> Ptr a -> CSize -> IO CInt + +foreign import ccall "openssl/sha.h SHA224_Final" + sha224Final :: Ptr CUChar -> Ptr Sha256Ctx -> IO CInt + +sha224Algo :: Algo +sha224Algo = Algo sha224DigestLength sha224Init sha224Update sha224Final + +sha224 :: ByteString -> Digest +sha224 = hash sha224Algo + +-- SHA-256 + +data Sha256Ctx + +instance Storable Sha256Ctx where + sizeOf _ = #size SHA256_CTX + alignment _ = #alignment SHA256_CTX + +foreign import capi "openssl/sha.h value SHA256_DIGEST_LENGTH" + sha256DigestLength :: CSize + +foreign import ccall "openssl/sha.h SHA256_Init" + sha256Init :: Ptr Sha256Ctx -> IO CInt + +foreign import ccall "openssl/sha.h SHA256_Update" + sha256Update :: Ptr Sha256Ctx -> Ptr a -> CSize -> IO CInt + +foreign import ccall "openssl/sha.h SHA256_Final" + sha256Final :: Ptr CUChar -> Ptr Sha256Ctx -> IO CInt + +sha256Algo :: Algo +sha256Algo = Algo sha256DigestLength sha256Init sha256Update sha256Final + +sha256 :: ByteString -> Digest +sha256 = hash sha256Algo + +-- SHA-384 + +foreign import capi "openssl/sha.h value SHA384_DIGEST_LENGTH" + sha384DigestLength :: CSize + +foreign import ccall "openssl/sha.h SHA384_Init" + sha384Init :: Ptr Sha512Ctx -> IO CInt + +foreign import ccall "openssl/sha.h SHA384_Update" + sha384Update :: Ptr Sha512Ctx -> Ptr a -> CSize -> IO CInt + +foreign import ccall "openssl/sha.h SHA384_Final" + sha384Final :: Ptr CUChar -> Ptr Sha512Ctx -> IO CInt + +sha384Algo :: Algo +sha384Algo = Algo sha384DigestLength sha384Init sha384Update sha384Final + +sha384 :: ByteString -> Digest +sha384 = hash sha384Algo + +-- SHA-512 + +data Sha512Ctx + +instance Storable Sha512Ctx where + sizeOf _ = #size SHA512_CTX + alignment _ = #alignment SHA512_CTX + +foreign import capi "openssl/sha.h value SHA512_DIGEST_LENGTH" + sha512DigestLength :: CSize + +foreign import ccall "openssl/sha.h SHA512_Init" + sha512Init :: Ptr Sha512Ctx -> IO CInt + +foreign import ccall "openssl/sha.h SHA512_Update" + sha512Update :: Ptr Sha512Ctx -> Ptr a -> CSize -> IO CInt + +foreign import ccall "openssl/sha.h SHA512_Final" + sha512Final :: Ptr CUChar -> Ptr Sha512Ctx -> IO CInt + +sha512Algo :: Algo +sha512Algo = Algo sha512DigestLength sha512Init sha512Update sha512Final + +sha512 :: ByteString -> Digest +sha512 = hash sha512Algo -- cgit v1.2.3