summaryrefslogtreecommitdiff
path: root/Backend/SHA.hs
blob: 258caafd14620d2c3f4a0dd6bda4b9c4226ad14b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
{- git-annex SHA backends
 -
 - Copyright 2011,2012 Joey Hess <joey@kitenet.net>
 -
 - Licensed under the GNU GPL version 3 or higher.
 -}

module Backend.SHA (backends) where

import Common.Annex
import qualified Annex
import Types.Backend
import Types.Key
import Types.KeySource

import qualified Build.SysConfig as SysConfig
import Data.Digest.Pure.SHA
import qualified Data.ByteString.Lazy as L
import System.Process
import Data.Char

type SHASize = Int

{- Order is slightly significant; want SHA256 first, and more general
 - sizes earlier. -}
sizes :: [Int]
sizes = [256, 1, 512, 224, 384]

{- The SHA256E backend is the default. -}
backends :: [Backend]
backends = catMaybes $ map genBackendE sizes ++ map genBackend sizes

genBackend :: SHASize -> Maybe Backend
genBackend size = Just $ Backend
	{ name = shaName size
	, getKey = keyValue size
	, fsckKey = Just $ checkKeyChecksum size
	, canUpgradeKey = Just $ needsUpgrade
	}

genBackendE :: SHASize -> Maybe Backend
genBackendE size = do
	b <- genBackend size
	return $ b 
		{ name = shaNameE size
		, getKey = keyValueE size
		}

shaName :: SHASize -> String
shaName size = "SHA" ++ show size

shaNameE :: SHASize -> String
shaNameE size = shaName size ++ "E"

shaN :: SHASize -> FilePath -> Integer -> Annex String
shaN shasize file filesize = do
	showAction "checksum"
	case shaCommand shasize filesize of
		Left sha -> liftIO $ sha <$> L.readFile file
		Right command -> liftIO $ 
			sanitycheck command . parse command . lines <$>
				readsha command (toCommand [File file])
  where
	parse command [] = bad command
	parse command (l:_)
		| null sha = bad command
		-- sha is prefixed with \ when filename contains certian chars
		| "\\" `isPrefixOf` sha = drop 1 sha
		| otherwise = sha
	  where
		sha = fst $ separate (== ' ') l
	bad command = error $ command ++ " parse error"

	{- sha commands output the filename, so need to set fileEncoding -}
	readsha command args =
		withHandle StdoutHandle createProcessSuccess p $ \h -> do
			fileEncoding h
			output  <- hGetContentsStrict h
			hClose h
			return output
	  where
		p = (proc command args) { std_out = CreatePipe }

	{- Check that we've correctly parsing the output of the command,
	 - by making sure the sha we read is of the expected length. -}
	sanitycheck command sha
		| length sha /= expectedlen =
			error $ "Failed to parse the output of " ++ command
		| any (`notElem` "0123456789abcdef") sha' =
			error $ "Unexpected character in output of " ++ command ++ "\"" ++ sha ++ "\""
		| otherwise = sha'
	  where
	  	sha' = map toLower sha
		expectedlen = case shasize of
			1 -> 40
			256 -> 64
			512 -> 128
			224 -> 56
			384 -> 96
			_ -> 0

shaCommand :: SHASize -> Integer -> Either (L.ByteString -> String) String
shaCommand shasize filesize
	| shasize == 1 = use SysConfig.sha1 sha1
	| shasize == 256 = use SysConfig.sha256 sha256
	| shasize == 224 = use SysConfig.sha224 sha224
	| shasize == 384 = use SysConfig.sha384 sha384
	| shasize == 512 = use SysConfig.sha512 sha512
	| otherwise = error $ "bad sha size " ++ show shasize
  where
	use Nothing sha = Left $ showDigest . sha
	use (Just c) sha
		{- use builtin, but slower sha for small files
		 - benchmarking indicates it's faster up to
		 - and slightly beyond 50 kb files -}
		| filesize < 51200 = use Nothing sha
		| otherwise = Right c

{- A key is a checksum of its contents. -}
keyValue :: SHASize -> KeySource -> Annex (Maybe Key)
keyValue shasize source = do
	let file = contentLocation source
	stat <- liftIO $ getFileStatus file
	let filesize = fromIntegral $ fileSize stat
	s <- shaN shasize file filesize
	return $ Just $ stubKey
		{ keyName = s
		, keyBackendName = shaName shasize
		, keySize = Just filesize
		}

{- Extension preserving keys. -}
keyValueE :: SHASize -> KeySource -> Annex (Maybe Key)
keyValueE size source = keyValue size source >>= maybe (return Nothing) addE
  where
	addE k = return $ Just $ k
		{ keyName = keyName k ++ selectExtension (keyFilename source)
		, keyBackendName = shaNameE size
		}

selectExtension :: FilePath -> String
selectExtension f
	| null es = ""
	| otherwise = intercalate "." ("":es)
  where
	es = filter (not . null) $ reverse $
		take 2 $ takeWhile shortenough $
		reverse $ split "." $ filter validExtension $ takeExtensions f
	shortenough e = length e <= 4 -- long enough for "jpeg"

{- A key's checksum is checked during fsck. -}
checkKeyChecksum :: SHASize -> Key -> FilePath -> Annex Bool
checkKeyChecksum size key file = do
	fast <- Annex.getState Annex.fast
	mstat <- liftIO $ catchMaybeIO $ getFileStatus file
	case (mstat, fast) of
		(Just stat, False) -> do
			let filesize = fromIntegral $ fileSize stat
			check <$> shaN size file filesize
		_ -> return True
  where
	sha = keySha key
	check s
		| s == sha = True
		{- A bug caused checksums to be prefixed with \ in some
		 - cases; still accept these as legal now that the bug has been
		 - fixed. -}
		| '\\' : s == sha = True
		| otherwise = False

keySha :: Key -> String
keySha key = dropExtensions (keyName key)

validExtension :: Char -> Bool
validExtension c
	| isAlphaNum c = True
	| c == '.' = True
	| otherwise = False

{- Upgrade keys that have the \ prefix on their sha due to a bug, or
 - that contain non-alphanumeric characters in their extension. -}
needsUpgrade :: Key -> Bool
needsUpgrade key = "\\" `isPrefixOf` keySha key ||
	any (not . validExtension) (takeExtensions $ keyName key)