diff options
author | Joey Hess <joey@kitenet.net> | 2012-07-04 13:04:01 -0400 |
---|---|---|
committer | Joey Hess <joey@kitenet.net> | 2012-07-04 13:04:01 -0400 |
commit | 40729e7fa21684bfed758479c97e3172ff8777fa (patch) | |
tree | 1f08747baa32d1c75096d50998b57a46e870dbfd | |
parent | d3be4790937390c29bb76ab8abaeb934330ed37e (diff) |
Use SHA library for files less than 50 kb in size, at which point it's faster than forking the more optimised external program.
-rw-r--r-- | Backend/SHA.hs | 54 | ||||
-rw-r--r-- | debian/changelog | 2 |
2 files changed, 33 insertions, 23 deletions
diff --git a/Backend/SHA.hs b/Backend/SHA.hs index 6ecc78ff2..7abbf8035 100644 --- a/Backend/SHA.hs +++ b/Backend/SHA.hs @@ -42,28 +42,16 @@ genBackendE size = do , getKey = keyValueE size } -shaCommand :: SHASize -> Either (L.ByteString -> String) String -shaCommand sz - | sz == 1 = use SysConfig.sha1 sha1 - | sz == 256 = use SysConfig.sha256 sha256 - | sz == 224 = use SysConfig.sha224 sha224 - | sz == 384 = use SysConfig.sha384 sha384 - | sz == 512 = use SysConfig.sha512 sha512 - | otherwise = error $ "bad sha size " ++ show sz - where - use Nothing sha = Left $ showDigest . sha - use (Just c) _ = Right c - shaName :: SHASize -> String shaName size = "SHA" ++ show size shaNameE :: SHASize -> String shaNameE size = shaName size ++ "E" -shaN :: SHASize -> FilePath -> Annex String -shaN size file = do +shaN :: SHASize -> FilePath -> Integer -> Annex String +shaN shasize file filesize = do showAction "checksum" - case shaCommand size of + case shaCommand shasize filesize of Left sha -> liftIO $ sha <$> L.readFile file Right command -> liftIO $ runcommand command where @@ -74,16 +62,34 @@ shaN size file = do then error $ command ++ " parse error" else return sha +shaCommand :: SHASize -> Integer -> Either (L.ByteString -> String) String +shaCommand shasize filesize + | shasize == 1 = use SysConfig.sha1 sha1 + | shasize == 256 = use SysConfig.sha256 sha256 + | shasize == 224 = use SysConfig.sha224 sha224 + | shasize == 384 = use SysConfig.sha384 sha384 + | shasize == 512 = use SysConfig.sha512 sha512 + | otherwise = error $ "bad sha size " ++ show shasize + where + use Nothing sha = Left $ showDigest . sha + use (Just c) sha + -- use builtin, but slower sha for small files + -- benchmarking indicates it's faster up to + -- and slightly beyond 50 kb files + | filesize < 51200 = use Nothing sha + | otherwise = Right c + {- A key is a checksum of its contents. -} keyValue :: SHASize -> KeySource -> Annex (Maybe Key) -keyValue size source = do +keyValue shasize source = do let file = contentLocation source - s <- shaN size file stat <- liftIO $ getFileStatus file + let filesize = fromIntegral $ fileSize stat + s <- shaN shasize file filesize return $ Just $ stubKey { keyName = s - , keyBackendName = shaName size - , keySize = Just $ fromIntegral $ fileSize stat + , keyBackendName = shaName shasize + , keySize = Just filesize } {- Extension preserving keys. -} @@ -106,10 +112,12 @@ keyValueE size source = keyValue size source >>= maybe (return Nothing) addE checkKeyChecksum :: SHASize -> Key -> FilePath -> Annex Bool checkKeyChecksum size key file = do fast <- Annex.getState Annex.fast - present <- liftIO $ doesFileExist file - if not present || fast - then return True - else check <$> shaN size file + mstat <- liftIO $ catchMaybeIO $ getFileStatus file + case (mstat, fast) of + (Just stat, False) -> do + let filesize = fromIntegral $ fileSize stat + check <$> shaN size file filesize + _ -> return True where check s | s == dropExtension (keyName key) = True diff --git a/debian/changelog b/debian/changelog index ebd34c944..1c44f5952 100644 --- a/debian/changelog +++ b/debian/changelog @@ -7,6 +7,8 @@ git-annex (3.20120630) UNRELEASED; urgency=low * When shaNsum commands cannot be found, use the Haskell SHA library (already a dependency) to do the checksumming. This may be slower, but avoids portability problems. + * Use SHA library for files less than 50 kb in size, at which point it's + faster than forking the more optimised external program. -- Joey Hess <joeyh@debian.org> Sun, 01 Jul 2012 15:04:37 -0400 |