summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Joey Hess <joey@kitenet.net>2012-07-04 13:04:01 -0400
committerGravatar Joey Hess <joey@kitenet.net>2012-07-04 13:04:01 -0400
commit40729e7fa21684bfed758479c97e3172ff8777fa (patch)
tree1f08747baa32d1c75096d50998b57a46e870dbfd
parentd3be4790937390c29bb76ab8abaeb934330ed37e (diff)
Use SHA library for files less than 50 kb in size, at which point it's faster than forking the more optimised external program.
-rw-r--r--Backend/SHA.hs54
-rw-r--r--debian/changelog2
2 files changed, 33 insertions, 23 deletions
diff --git a/Backend/SHA.hs b/Backend/SHA.hs
index 6ecc78ff2..7abbf8035 100644
--- a/Backend/SHA.hs
+++ b/Backend/SHA.hs
@@ -42,28 +42,16 @@ genBackendE size = do
, getKey = keyValueE size
}
-shaCommand :: SHASize -> Either (L.ByteString -> String) String
-shaCommand sz
- | sz == 1 = use SysConfig.sha1 sha1
- | sz == 256 = use SysConfig.sha256 sha256
- | sz == 224 = use SysConfig.sha224 sha224
- | sz == 384 = use SysConfig.sha384 sha384
- | sz == 512 = use SysConfig.sha512 sha512
- | otherwise = error $ "bad sha size " ++ show sz
- where
- use Nothing sha = Left $ showDigest . sha
- use (Just c) _ = Right c
-
shaName :: SHASize -> String
shaName size = "SHA" ++ show size
shaNameE :: SHASize -> String
shaNameE size = shaName size ++ "E"
-shaN :: SHASize -> FilePath -> Annex String
-shaN size file = do
+shaN :: SHASize -> FilePath -> Integer -> Annex String
+shaN shasize file filesize = do
showAction "checksum"
- case shaCommand size of
+ case shaCommand shasize filesize of
Left sha -> liftIO $ sha <$> L.readFile file
Right command -> liftIO $ runcommand command
where
@@ -74,16 +62,34 @@ shaN size file = do
then error $ command ++ " parse error"
else return sha
+shaCommand :: SHASize -> Integer -> Either (L.ByteString -> String) String
+shaCommand shasize filesize
+ | shasize == 1 = use SysConfig.sha1 sha1
+ | shasize == 256 = use SysConfig.sha256 sha256
+ | shasize == 224 = use SysConfig.sha224 sha224
+ | shasize == 384 = use SysConfig.sha384 sha384
+ | shasize == 512 = use SysConfig.sha512 sha512
+ | otherwise = error $ "bad sha size " ++ show shasize
+ where
+ use Nothing sha = Left $ showDigest . sha
+ use (Just c) sha
+ -- use builtin, but slower sha for small files
+ -- benchmarking indicates it's faster up to
+ -- and slightly beyond 50 kb files
+ | filesize < 51200 = use Nothing sha
+ | otherwise = Right c
+
{- A key is a checksum of its contents. -}
keyValue :: SHASize -> KeySource -> Annex (Maybe Key)
-keyValue size source = do
+keyValue shasize source = do
let file = contentLocation source
- s <- shaN size file
stat <- liftIO $ getFileStatus file
+ let filesize = fromIntegral $ fileSize stat
+ s <- shaN shasize file filesize
return $ Just $ stubKey
{ keyName = s
- , keyBackendName = shaName size
- , keySize = Just $ fromIntegral $ fileSize stat
+ , keyBackendName = shaName shasize
+ , keySize = Just filesize
}
{- Extension preserving keys. -}
@@ -106,10 +112,12 @@ keyValueE size source = keyValue size source >>= maybe (return Nothing) addE
checkKeyChecksum :: SHASize -> Key -> FilePath -> Annex Bool
checkKeyChecksum size key file = do
fast <- Annex.getState Annex.fast
- present <- liftIO $ doesFileExist file
- if not present || fast
- then return True
- else check <$> shaN size file
+ mstat <- liftIO $ catchMaybeIO $ getFileStatus file
+ case (mstat, fast) of
+ (Just stat, False) -> do
+ let filesize = fromIntegral $ fileSize stat
+ check <$> shaN size file filesize
+ _ -> return True
where
check s
| s == dropExtension (keyName key) = True
diff --git a/debian/changelog b/debian/changelog
index ebd34c944..1c44f5952 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -7,6 +7,8 @@ git-annex (3.20120630) UNRELEASED; urgency=low
* When shaNsum commands cannot be found, use the Haskell SHA library
(already a dependency) to do the checksumming. This may be slower,
but avoids portability problems.
+ * Use SHA library for files less than 50 kb in size, at which point it's
+ faster than forking the more optimised external program.
-- Joey Hess <joeyh@debian.org> Sun, 01 Jul 2012 15:04:37 -0400