summaryrefslogtreecommitdiff
path: root/Command/Fsck.hs
blob: 0d70f697bfdc7321b394eea83db01ef103cdd34e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
{- git-annex command
 -
 - Copyright 2010-2011 Joey Hess <joey@kitenet.net>
 -
 - Licensed under the GNU GPL version 3 or higher.
 -}

module Command.Fsck where

import Common.Annex
import Command
import qualified Annex
import qualified Remote
import qualified Types.Backend
import qualified Types.Key
import qualified Backend
import Annex.Content
import Annex.Content.Direct
import Annex.Perms
import Annex.Link
import Logs.Location
import Logs.Trust
import Annex.UUID
import Utility.DataUnits
import Utility.FileMode
import Config
import qualified Option
import Types.Key
import Utility.HumanTime

import System.Posix.Process (getProcessID)
import Data.Time.Clock.POSIX
import Data.Time
import System.Posix.Types (EpochTime)
import System.Locale

def :: [Command]
def = [withOptions options $ command "fsck" paramPaths seek
	SectionMaintenance "check for problems"]

fromOption :: Option
fromOption = Option.field ['f'] "from" paramRemote "check remote"

startIncrementalOption :: Option
startIncrementalOption = Option.flag ['S'] "incremental" "start an incremental fsck"

moreIncrementalOption :: Option
moreIncrementalOption = Option.flag ['m'] "more" "continue an incremental fsck"

incrementalScheduleOption :: Option
incrementalScheduleOption = Option.field [] "incremental-schedule" paramTime
	"schedule incremental fscking"

options :: [Option]
options = 
	[ fromOption
	, startIncrementalOption
	, moreIncrementalOption
	, incrementalScheduleOption
	]

seek :: [CommandSeek]
seek =
	[ withField fromOption Remote.byNameWithUUID $ \from ->
	  withIncremental $ \i -> withFilesInGit $ whenAnnexed $ start from i
	, withIncremental $ \i -> withBarePresentKeys $ startBare i
	]

withIncremental :: (Incremental -> CommandSeek) -> CommandSeek
withIncremental = withValue $ do
	i <- maybe (return False) (checkschedule . parseDuration)
		=<< Annex.getField (Option.name incrementalScheduleOption)
	starti <- Annex.getFlag (Option.name startIncrementalOption)
	morei <- Annex.getFlag (Option.name moreIncrementalOption)
	case (i, starti, morei) of
		(False, False, False) -> return NonIncremental
		(False, True, _) -> startIncremental
		(False ,False, True) -> ContIncremental <$> getStartTime
		(True, _, _) ->
			maybe startIncremental (return . ContIncremental . Just)
				=<< getStartTime
  where
	startIncremental = do
		recordStartTime
		return StartIncremental

	checkschedule Nothing = error "bad --incremental-schedule value"
	checkschedule (Just delta) = do
		Annex.addCleanup "" $ do
			v <- getStartTime
			case v of
				Nothing -> noop
				Just started -> do
					now <- liftIO getPOSIXTime
					when (now - realToFrac started >= delta) $
						resetStartTime
		return True

start :: Maybe Remote -> Incremental -> FilePath -> (Key, Backend) -> CommandStart
start from inc file (key, backend) = do
	numcopies <- numCopies file
	case from of
		Nothing -> go $ perform key file backend numcopies
		Just r -> go $ performRemote key file backend numcopies r
  where
	go = runFsck inc file key

perform :: Key -> FilePath -> Backend -> Maybe Int -> Annex Bool
perform key file backend numcopies = check
	-- order matters
	[ fixLink key file
	, verifyLocationLog key file
	, verifyDirectMapping key file
	, checkKeySize key
	, checkBackend backend key
	, checkKeyNumCopies key file numcopies
	]

{- To fsck a remote, the content is retrieved to a tmp file,
 - and checked locally. -}
performRemote :: Key -> FilePath -> Backend -> Maybe Int -> Remote -> Annex Bool
performRemote key file backend numcopies remote =
	dispatch =<< Remote.hasKey remote key
  where
	dispatch (Left err) = do
		showNote err
		return False
	dispatch (Right True) = withtmp $ \tmpfile ->
		ifM (getfile tmpfile)
			( go True (Just tmpfile)
			, go True Nothing
			)
	dispatch (Right False) = go False Nothing
	go present localcopy = check
		[ verifyLocationLogRemote key file remote present
		, checkKeySizeRemote key remote localcopy
		, checkBackendRemote backend key remote localcopy
		, checkKeyNumCopies key file numcopies
		]
	withtmp a = do
		pid <- liftIO getProcessID
		t <- fromRepo gitAnnexTmpDir
		createAnnexDirectory t
		let tmp = t </> "fsck" ++ show pid ++ "." ++ keyFile key
		let cleanup = liftIO $ catchIO (removeFile tmp) (const noop)
		cleanup
		cleanup `after` a tmp
	getfile tmp =
		ifM (Remote.retrieveKeyFileCheap remote key tmp)
			( return True
			, ifM (Annex.getState Annex.fast)
				( return False
				, Remote.retrieveKeyFile remote key Nothing tmp
				)
			)

{- To fsck a bare repository, fsck each key in the location log. -}
withBarePresentKeys :: (Key -> CommandStart) -> CommandSeek
withBarePresentKeys a params = isBareRepo >>= go
  where
	go False = return []
	go True = do
		unless (null params) $
			error "fsck should be run without parameters in a bare repository"
		map a <$> loggedKeys

startBare :: Incremental -> Key -> CommandStart
startBare inc key = case Backend.maybeLookupBackendName (Types.Key.keyBackendName key) of
	Nothing -> stop
	Just backend -> runFsck inc (key2file key) key $ performBare key backend

{- Note that numcopies cannot be checked in a bare repository, because
 - getting the numcopies value requires a working copy with .gitattributes
 - files. -}
performBare :: Key -> Backend -> Annex Bool
performBare key backend = check
	[ verifyLocationLog key (key2file key)
	, checkKeySize key
	, checkBackend backend key
	]

check :: [Annex Bool] -> Annex Bool
check cs = all id <$> sequence cs

{- Checks that the file's link points correctly to the content.
 -
 - In direct mode, there is only a link when the content is not present.
 -}
fixLink :: Key -> FilePath -> Annex Bool
fixLink key file = do
	want <- inRepo $ gitAnnexLink file key
	have <- getAnnexLinkTarget file
	maybe noop (go want) have
	return True
  where
	go want have = when (want /= have) $ do
		{- Version 3.20120227 had a bug that could cause content
		 - to be stored in the wrong hash directory. Clean up
		 - after the bug by moving the content.
		 -}
		whenM (liftIO $ doesFileExist file) $
			unlessM (inAnnex key) $ do
				showNote "fixing content location"
				dir <- liftIO $ parentDir <$> absPath file
				let content = absPathFrom dir have
				unlessM crippledFileSystem $
					liftIO $ allowWrite (parentDir content)
				moveAnnex key content

		showNote "fixing link"
		liftIO $ createDirectoryIfMissing True (parentDir file)
		liftIO $ removeFile file
		addAnnexLink want file

{- Checks that the location log reflects the current status of the key,
 - in this repository only. -}
verifyLocationLog :: Key -> String -> Annex Bool
verifyLocationLog key desc = do
	present <- inAnnex key
	direct <- isDirect
	u <- getUUID
	
	{- Since we're checking that a key's file is present, throw
	 - in a permission fixup here too. -}
	when (present && not direct) $ do
		file <- calcRepo $ gitAnnexLocation key
		freezeContent file
		freezeContentDir file

	{- In direct mode, modified files will show up as not present,
	 - but that is expected and not something to do anything about. -}
	if (direct && not present)
		then return True
		else verifyLocationLog' key desc present u (logChange key u)

verifyLocationLogRemote :: Key -> String -> Remote -> Bool -> Annex Bool
verifyLocationLogRemote key desc remote present =
	verifyLocationLog' key desc present (Remote.uuid remote)
		(Remote.logStatus remote key)

verifyLocationLog' :: Key -> String -> Bool -> UUID -> (LogStatus -> Annex ()) -> Annex Bool
verifyLocationLog' key desc present u bad = do
	uuids <- Remote.keyLocations key
	case (present, u `elem` uuids) of
		(True, False) -> do
				fix InfoPresent
				-- There is no data loss, so do not fail.
				return True
		(False, True) -> do
				fix InfoMissing
				warning $
					"** Based on the location log, " ++ desc
					++ "\n** was expected to be present, " ++
					"but its content is missing."
				return False
		_ -> return True
  where
	fix s = do
		showNote "fixing location log"
		bad s

{- Ensures the direct mode mapping file is consistent. Each file
 - it lists for the key should exist, and the specified file should be
 - included in it.
 -}
verifyDirectMapping :: Key -> FilePath -> Annex Bool
verifyDirectMapping key file = do
	whenM isDirect $ do
		fs <- addAssociatedFile key file
		forM_ fs $ \f -> 
			unlessM (liftIO $ doesFileExist f) $
				void $ removeAssociatedFile key f
	return True

{- The size of the data for a key is checked against the size encoded in
 - the key's metadata, if available.
 -
 - Not checked in direct mode, because files can be changed directly.
  -}
checkKeySize :: Key -> Annex Bool
checkKeySize key = ifM isDirect
	( return True
	, do
		file <- calcRepo $ gitAnnexLocation key
		ifM (liftIO $ doesFileExist file)
			( checkKeySizeOr badContent key file
			, return True
			)
	)

checkKeySizeRemote :: Key -> Remote -> Maybe FilePath -> Annex Bool
checkKeySizeRemote _ _ Nothing = return True
checkKeySizeRemote key remote (Just file) =
	checkKeySizeOr (badContentRemote remote) key file

checkKeySizeOr :: (Key -> Annex String) -> Key -> FilePath -> Annex Bool
checkKeySizeOr bad key file = case Types.Key.keySize key of
	Nothing -> return True
	Just size -> do
		size' <- fromIntegral . fileSize
			<$> liftIO (getFileStatus file)
		comparesizes size size'
  where
	comparesizes a b = do
		let same = a == b
		unless same $ badsize a b
		return same
	badsize a b = do
		msg <- bad key
		warning $ concat
			[ "Bad file size ("
			, compareSizes storageUnits True a b
			, "); "
			, msg
			]

{- Runs the backend specific check on a key's content.
 -
 - In direct mode this is not done if the file has clearly been modified,
 - because modification of direct mode files is allowed. It's still done
 - if the file does not appear modified, to catch disk corruption, etc.
 -}
checkBackend :: Backend -> Key -> Annex Bool
checkBackend backend key = do
	file <- calcRepo $ gitAnnexLocation key
	ifM isDirect
		( ifM (goodContent key file)
			( checkBackendOr' (badContentDirect file) backend key file
				(goodContent key file)
			, return True
			)
		, checkBackendOr badContent backend key file
		)

checkBackendRemote :: Backend -> Key -> Remote -> Maybe FilePath -> Annex Bool
checkBackendRemote backend key remote = maybe (return True) go
  where
	go file = checkBackendOr (badContentRemote remote) backend key file

checkBackendOr :: (Key -> Annex String) -> Backend -> Key -> FilePath -> Annex Bool
checkBackendOr bad backend key file =
	checkBackendOr' bad backend key file (return True)

checkBackendOr' :: (Key -> Annex String) -> Backend -> Key -> FilePath -> Annex Bool -> Annex Bool
checkBackendOr' bad backend key file postcheck =
	case Types.Backend.fsckKey backend of
		Nothing -> return True
		Just a -> do
			ok <- a key file
			ifM postcheck
				( do
					unless ok $ do
						msg <- bad key
						warning $ "Bad file content; " ++ msg
					return ok
				, return True
				)

checkKeyNumCopies :: Key -> FilePath -> Maybe Int -> Annex Bool
checkKeyNumCopies key file numcopies = do
	needed <- getNumCopies numcopies
	(untrustedlocations, safelocations) <- trustPartition UnTrusted =<< Remote.keyLocations key
	let present = length safelocations
	if present < needed
		then do
			ppuuids <- Remote.prettyPrintUUIDs "untrusted" untrustedlocations
			warning $ missingNote file present needed ppuuids
			return False
		else return True

missingNote :: String -> Int -> Int -> String -> String
missingNote file 0 _ [] = 
		"** No known copies exist of " ++ file
missingNote file 0 _ untrusted =
		"Only these untrusted locations may have copies of " ++ file ++
		"\n" ++ untrusted ++
		"Back it up to trusted locations with git-annex copy."
missingNote file present needed [] =
		"Only " ++ show present ++ " of " ++ show needed ++ 
		" trustworthy copies exist of " ++ file ++
		"\nBack it up with git-annex copy."
missingNote file present needed untrusted = 
		missingNote file present needed [] ++
		"\nThe following untrusted locations may also have copies: " ++
		"\n" ++ untrusted

{- Bad content is moved aside. -}
badContent :: Key -> Annex String
badContent key = do
	dest <- moveBad key
	return $ "moved to " ++ dest

{- Bad content is left where it is, but we touch the file, so it'll be
 - committed to a new key. -}
badContentDirect :: FilePath -> Key -> Annex String
badContentDirect file key = do
	void $ liftIO $ catchMaybeIO $ touchFile file
	logStatus key InfoMissing
	return $ "left in place for you to examine"

badContentRemote :: Remote -> Key -> Annex String
badContentRemote remote key = do
	ok <- Remote.removeKey remote key
	when ok $
		Remote.logStatus remote key InfoMissing
	return $ (if ok then "dropped from " else "failed to drop from ")
		++ Remote.name remote

data Incremental = StartIncremental | ContIncremental (Maybe EpochTime) | NonIncremental
	deriving (Eq)

runFsck :: Incremental -> FilePath -> Key -> Annex Bool -> CommandStart
runFsck inc file key a = ifM (needFsck inc key)
	( do
		showStart "fsck" file
		next $ do
			ok <- a
			when ok $
				recordFsckTime key
			next $ return ok
	, stop
	)

{- Check if a key needs to be fscked, with support for incremental fscks. -}
needFsck :: Incremental -> Key -> Annex Bool
needFsck (ContIncremental Nothing) _ = return True
needFsck (ContIncremental starttime) key = do
	fscktime <- getFsckTime key
	return $ fscktime < starttime
needFsck _ _ = return True

{- To record the time that a key was last fscked, without
 - modifying its mtime, we set the timestamp of its parent directory.
 - Each annexed file is the only thing in its directory, so this is fine.
 -
 - To record that the file was fscked, the directory's sticky bit is set.
 - (None of the normal unix behaviors of the sticky bit should matter, so
 - we can reuse this permission bit.)
 -
 - Note that this relies on the parent directory being deleted when a file
 - is dropped. That way, if it's later added back, the fsck record
 - won't still be present.
 -}
recordFsckTime :: Key -> Annex ()
recordFsckTime key = do
	parent <- parentDir <$> calcRepo (gitAnnexLocation key)
	liftIO $ void $ tryIO $ do
		touchFile parent
		setSticky parent

getFsckTime :: Key -> Annex (Maybe EpochTime)
getFsckTime key = do
	parent <- parentDir <$> calcRepo (gitAnnexLocation key)
	liftIO $ catchDefaultIO Nothing $ do
		s <- getFileStatus parent
		return $ if isSticky $ fileMode s
			then Just $ modificationTime s
			else Nothing

{- Records the start time of an interactive fsck.
 -
 - To guard against time stamp damange (for example, if an annex directory
 - is copied without -a), the fsckstate file contains a time that should
 - be identical to its modification time. -}
recordStartTime :: Annex ()
recordStartTime = do
	f <- fromRepo gitAnnexFsckState
	createAnnexDirectory $ parentDir f
	liftIO $ do
		nukeFile f
		h <- openFile f WriteMode
		t <- modificationTime <$> getFileStatus f
		hPutStr h $ showTime $ realToFrac t
		hClose h
  where
	showTime :: POSIXTime -> String
	showTime = show

resetStartTime :: Annex ()
resetStartTime = liftIO . nukeFile =<< fromRepo gitAnnexFsckState

{- Gets the incremental fsck start time. -}
getStartTime :: Annex (Maybe EpochTime)
getStartTime = do
	f <- fromRepo gitAnnexFsckState
	liftIO $ catchDefaultIO Nothing $ do
		timestamp <- modificationTime <$> getFileStatus f
		t <- readishTime <$> readFile f
		return $ if Just (realToFrac timestamp) == t
			then Just timestamp
			else Nothing
  where
	readishTime :: String -> Maybe POSIXTime
	readishTime s = utcTimeToPOSIXSeconds <$>
		parseTime defaultTimeLocale "%s%Qs" s