summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Joey Hess <joey@kitenet.net>2010-11-13 14:59:27 -0400
committerGravatar Joey Hess <joey@kitenet.net>2010-11-13 14:59:27 -0400
commit5fa25a812a8a03af9f6a5fdb3d06eb4d89ee06f5 (patch)
tree467341e52d23660eee3dc05c9935c961801374e5
parentd4d65a3c923de1eece50463145e875326bfe57e9 (diff)
fsck improvements
* fsck: Check if annex.numcopies is satisfied. * fsck: Verify the sha1 of files when the SHA1 backend is used. * fsck: Verify the size of files when the WORM backend is used. * fsck: Allow specifying individual files to fsk if fscking everything is not desired. * fsck: Fix bug, introduced in 0.04, in detection of unused data.
-rw-r--r--Backend.hs7
-rw-r--r--Backend/File.hs43
-rw-r--r--Backend/SHA1.hs37
-rw-r--r--Backend/URL.hs8
-rw-r--r--Backend/WORM.hs34
-rw-r--r--CmdLine.hs6
-rw-r--r--Command.hs10
-rw-r--r--Command/Fsck.hs9
-rw-r--r--Command/FsckFile.hs33
-rw-r--r--Core.hs18
-rw-r--r--Locations.hs5
-rw-r--r--TypeInternals.hs4
-rw-r--r--debian/changelog11
-rw-r--r--doc/git-annex.mdwn8
-rw-r--r--doc/walkthrough.mdwn34
15 files changed, 236 insertions, 31 deletions
diff --git a/Backend.hs b/Backend.hs
index 43b450736..14af56bbf 100644
--- a/Backend.hs
+++ b/Backend.hs
@@ -23,6 +23,7 @@ module Backend (
retrieveKeyFile,
removeKey,
hasKey,
+ fsckKey,
lookupFile,
chooseBackends
) where
@@ -105,7 +106,7 @@ retrieveKeyFile backend key dest = (Internals.retrieveKeyFile backend) key dest
{- Removes a key from a backend. -}
removeKey :: Backend -> Key -> Annex Bool
-removeKey backend key = (Internals.removeKey backend) key
+removeKey backend key = (Internals.removeKey backend) key
{- Checks if a key is present in its backend. -}
hasKey :: Key -> Annex Bool
@@ -113,6 +114,10 @@ hasKey key = do
bs <- Annex.supportedBackends
(Internals.hasKey (lookupBackendName bs $ backendName key)) key
+{- Checks a key's backend for problems. -}
+fsckKey :: Backend -> Key -> Annex Bool
+fsckKey backend key = (Internals.fsckKey backend) key
+
{- Looks up the key and backend corresponding to an annexed file,
- by examining what the file symlinks to. -}
lookupFile :: FilePath -> Annex (Maybe (Key, Backend))
diff --git a/Backend/File.hs b/Backend/File.hs
index 9178b830a..9bda0d571 100644
--- a/Backend/File.hs
+++ b/Backend/File.hs
@@ -4,15 +4,15 @@
- it relies on the file contents in .git/annex/ in this repo,
- and other accessible repos.
-
- - This is an abstract backend; getKey has to be implemented to complete
- - it.
+ - This is an abstract backend; name, getKey and fsckKey have to be implemented
+ - to complete it.
-
- Copyright 2010 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
-module Backend.File (backend) where
+module Backend.File (backend, checkKey) where
import Control.Monad.State
import System.Directory
@@ -34,7 +34,8 @@ backend = Backend {
storeFileKey = dummyStore,
retrieveKeyFile = copyKeyFile,
removeKey = checkRemoveKey,
- hasKey = checkKeyFile
+ hasKey = checkKeyFile,
+ fsckKey = mustProvide
}
mustProvide :: a
@@ -97,14 +98,12 @@ checkRemoveKey key = do
if (force)
then return True
else do
- g <- Annex.gitRepo
remotes <- Remotes.keyPossibilities key
- let numcopies = read $ Git.configGet g config "1"
+ numcopies <- getNumCopies
if (numcopies > length remotes)
then notEnoughCopies numcopies (length remotes) []
else findcopies numcopies 0 remotes []
where
- config = "annex.numcopies"
findcopies need have [] bad =
if (have >= need)
then return True
@@ -147,3 +146,33 @@ showTriedRemotes [] = return ()
showTriedRemotes remotes =
showLongNote $ "I was unable to access these remotes: " ++
(Remotes.list remotes)
+
+getNumCopies :: Annex Int
+getNumCopies = do
+ g <- Annex.gitRepo
+ return $ read $ Git.configGet g config "1"
+ where
+ config = "annex.numcopies"
+
+{- This is used to check that numcopies is satisfied for the key on fsck.
+ - This trusts the location log, and so checks all keys, even those with
+ - data not present in the current annex.
+ -
+ - The passed action is first run to allow backends deriving this one
+ - to do their own checks.
+ -}
+checkKey :: (Key -> Annex Bool) -> Key -> Annex Bool
+checkKey a key = do
+ a_ok <- a key
+ copies_ok <- checkKeyNumCopies key
+ return $ a_ok && copies_ok
+
+checkKeyNumCopies :: Key -> Annex Bool
+checkKeyNumCopies key = do
+ remotes <- Remotes.keyPossibilities key
+ numcopies <- getNumCopies
+ if (length remotes < numcopies)
+ then do
+ showLongNote $ "only " ++ show (length remotes) ++ " of " ++ show numcopies ++ " copies"
+ return False
+ else return True
diff --git a/Backend/SHA1.hs b/Backend/SHA1.hs
index 5a232ec1d..8852e72e9 100644
--- a/Backend/SHA1.hs
+++ b/Backend/SHA1.hs
@@ -11,24 +11,51 @@ import Control.Monad.State
import Data.String.Utils
import System.Cmd.Utils
import System.IO
+import System.Directory
import qualified Backend.File
import TypeInternals
import Messages
+import qualified Annex
+import Locations
+import Core
backend :: Backend
backend = Backend.File.backend {
name = "SHA1",
- getKey = keyValue
+ getKey = keyValue,
+ fsckKey = Backend.File.checkKey checkKeySHA1
}
--- checksum the file to get its key
-keyValue :: FilePath -> Annex (Maybe Key)
-keyValue file = do
+sha1 :: FilePath -> Annex String
+sha1 file = do
showNote "checksum..."
liftIO $ pOpen ReadFromPipe "sha1sum" [file] $ \h -> do
line <- hGetLine h
let bits = split " " line
if (null bits)
then error "sha1sum parse error"
- else return $ Just $ Key ((name backend), bits !! 0)
+ else return $ bits !! 0
+
+-- A key is a sha1 of its contents.
+keyValue :: FilePath -> Annex (Maybe Key)
+keyValue file = do
+ s <- sha1 file
+ return $ Just $ Key ((name backend), s)
+
+-- A key's sha1 is checked during fsck.
+checkKeySHA1 :: Key -> Annex Bool
+checkKeySHA1 key = do
+ g <- Annex.gitRepo
+ let file = annexLocation g key
+ present <- liftIO $ doesFileExist file
+ if (not present)
+ then return True
+ else do
+ s <- sha1 file
+ if (s == keyName key)
+ then return True
+ else do
+ dest <- moveBad key
+ showNote $ "bad file content (moved to "++dest++")"
+ return False
diff --git a/Backend/URL.hs b/Backend/URL.hs
index 830d343c5..b38ea71c9 100644
--- a/Backend/URL.hs
+++ b/Backend/URL.hs
@@ -20,8 +20,13 @@ backend = Backend {
getKey = keyValue,
storeFileKey = dummyStore,
retrieveKeyFile = downloadUrl,
+ -- allow keys to be removed; presumably they can always be
+ -- downloaded again
removeKey = dummyOk,
- hasKey = dummyOk
+ -- similarly, keys are always assumed to be out there on the web
+ hasKey = dummyOk,
+ -- and nothing needed to fsck
+ fsckKey = dummyOk
}
-- cannot generate url from filename
@@ -32,7 +37,6 @@ keyValue _ = return Nothing
dummyStore :: FilePath -> Key -> Annex Bool
dummyStore _ _ = return False
--- allow keys to be removed; presumably they can always be downloaded again
dummyOk :: Key -> Annex Bool
dummyOk _ = return True
diff --git a/Backend/WORM.hs b/Backend/WORM.hs
index 848386ecd..21b3876b9 100644
--- a/Backend/WORM.hs
+++ b/Backend/WORM.hs
@@ -10,14 +10,22 @@ module Backend.WORM (backend) where
import Control.Monad.State
import System.FilePath
import System.Posix.Files
+import System.Posix.Types
+import System.Directory
+import Data.String.Utils
import qualified Backend.File
import TypeInternals
+import Locations
+import qualified Annex
+import Core
+import Messages
backend :: Backend
backend = Backend.File.backend {
name = "WORM",
- getKey = keyValue
+ getKey = keyValue,
+ fsckKey = Backend.File.checkKey checkKeySize
}
-- The key is formed from the file size, modification time, and the
@@ -36,3 +44,27 @@ keyValue file = do
(show $ fileSize stat)
base = takeFileName file
sep = ":"
+
+{- Extracts the file size from a key. -}
+keySize :: Key -> FileOffset
+keySize key = read $ section !! 2
+ where
+ section = split ":" (keyName key)
+
+{- The size of the data for a key is checked against the size encoded in
+ - the key. Note that the modification time is not checked. -}
+checkKeySize :: Key -> Annex Bool
+checkKeySize key = do
+ g <- Annex.gitRepo
+ let file = annexLocation g key
+ present <- liftIO $ doesFileExist file
+ if (not present)
+ then return True
+ else do
+ s <- liftIO $ getFileStatus file
+ if (fileSize s == keySize key)
+ then return True
+ else do
+ dest <- moveBad key
+ showNote $ "bad file size (moved to "++dest++")"
+ return False
diff --git a/CmdLine.hs b/CmdLine.hs
index efa541ebc..a683be5c5 100644
--- a/CmdLine.hs
+++ b/CmdLine.hs
@@ -59,14 +59,14 @@ subCmds =
"sets annexed content for a key using a temp file"
, SubCommand "fix" path Command.Fix.seek
"fix up symlinks to point to annexed content"
- , SubCommand "fsck" nothing Command.Fsck.seek
- "check annex for problems"
+ , SubCommand "fsck" maybepath Command.Fsck.seek
+ "check for problems"
]
where
path = "PATH ..."
+ maybepath = "[PATH ...]"
key = "KEY ..."
desc = "DESCRIPTION"
- nothing = ""
-- Each dashed command-line option results in generation of an action
-- in the Annex monad that performs the necessary setting.
diff --git a/Command.hs b/Command.hs
index 21d636463..4180155fa 100644
--- a/Command.hs
+++ b/Command.hs
@@ -146,6 +146,16 @@ withTempFile a params = return $ map a params
withNothing :: SubCmdSeekNothing
withNothing a _ = return [a]
+{- Default to acting on all files matching the seek action if
+ - none are specified. -}
+withAll :: SubCmdSeekStrings -> SubCmdSeekStrings
+withAll w a params = do
+ if null params
+ then do
+ g <- Annex.gitRepo
+ w a [Git.workTree g]
+ else w a params
+
{- filter out files from the state directory -}
notState :: FilePath -> Bool
notState f = stateLoc /= take (length stateLoc) f
diff --git a/Command/Fsck.hs b/Command/Fsck.hs
index e5f0debe0..b0b9f7bb6 100644
--- a/Command/Fsck.hs
+++ b/Command/Fsck.hs
@@ -13,9 +13,10 @@ import Command
import Types
import Core
import Messages
+import qualified Command.FsckFile
seek :: [SubCmdSeek]
-seek = [withNothing start]
+seek = [withNothing start, withAll withFilesInGit Command.FsckFile.start]
{- Checks the whole annex for problems. -}
start :: SubCmdStart
@@ -26,11 +27,9 @@ start = do
perform :: SubCmdPerform
perform = do
ok <- checkUnused
- if (ok)
+ if ok
then return $ Just $ return True
- else do
- showLongNote "Possible problems detected."
- return Nothing
+ else return Nothing
checkUnused :: Annex Bool
checkUnused = do
diff --git a/Command/FsckFile.hs b/Command/FsckFile.hs
new file mode 100644
index 000000000..2f9efa56e
--- /dev/null
+++ b/Command/FsckFile.hs
@@ -0,0 +1,33 @@
+{- git-annex command
+ -
+ - Copyright 2010 Joey Hess <joey@kitenet.net>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+module Command.FsckFile where
+
+import Command
+import qualified Backend
+import Types
+import Messages
+
+seek :: [SubCmdSeek]
+seek = [withFilesInGit start]
+
+{- Checks a file's backend data for problems. -}
+start :: SubCmdStartString
+start file = isAnnexed file $ \(key, backend) -> do
+ inbackend <- Backend.hasKey key
+ if (not inbackend)
+ then return Nothing
+ else do
+ showStart "fsck" file
+ return $ Just $ perform key backend
+
+perform :: Key -> Backend -> SubCmdPerform
+perform key backend = do
+ success <- Backend.fsckKey backend key
+ if (success)
+ then return $ Just $ return True
+ else return Nothing
diff --git a/Core.hs b/Core.hs
index 8497a7f36..789b369cc 100644
--- a/Core.hs
+++ b/Core.hs
@@ -14,6 +14,7 @@ import System.Path
import Control.Monad (when, unless, filterM)
import System.Posix.Files
import Data.Maybe
+import System.FilePath
import Types
import Locations
@@ -201,6 +202,16 @@ fromAnnex key dest = do
renameFile file dest
removeDirectory dir
+{- Moves a key out of .git/annex/objects/ into .git/annex/bad, and
+ - returns the directory it was moved to. -}
+moveBad :: Key -> Annex FilePath
+moveBad key = do
+ g <- Annex.gitRepo
+ let src = parentDir $ annexLocation g key
+ let dest = annexBadLocation g
+ liftIO $ renameDirectory src dest
+ return dest
+
{- List of keys whose content exists in .git/annex/objects/ -}
getKeysPresent :: Annex [Key]
getKeysPresent = do
@@ -209,11 +220,12 @@ getKeysPresent = do
getKeysPresent' :: FilePath -> Annex [Key]
getKeysPresent' dir = do
contents <- liftIO $ getDirectoryContents dir
- files <- liftIO $ filterM isreg contents
+ files <- liftIO $ filterM present contents
return $ map fileKey files
where
- isreg f = do
- s <- getFileStatus $ dir ++ "/" ++ f
+ present d = do
+ s <- getFileStatus $ dir ++ "/" ++ d ++ "/"
+ ++ (takeFileName d)
return $ isRegularFile s
{- List of keys referenced by symlinks in the git repo. -}
diff --git a/Locations.hs b/Locations.hs
index 58244cef0..c3bab285d 100644
--- a/Locations.hs
+++ b/Locations.hs
@@ -13,6 +13,7 @@ module Locations (
annexLocation,
annexLocationRelative,
annexTmpLocation,
+ annexBadLocation,
annexDir,
annexObjectDir,
@@ -59,6 +60,10 @@ annexObjectDir r = annexDir r ++ "/objects"
annexTmpLocation :: Git.Repo -> FilePath
annexTmpLocation r = annexDir r ++ "/tmp/"
+{- .git-annex/bad is used for bad files found during fsck -}
+annexBadLocation :: Git.Repo -> FilePath
+annexBadLocation r = annexDir r ++ "/bad/"
+
{- Converts a key into a filename fragment.
-
- Escape "/" in the key name, to keep a flat tree of files and avoid
diff --git a/TypeInternals.hs b/TypeInternals.hs
index 4b5cff9d9..3078224b1 100644
--- a/TypeInternals.hs
+++ b/TypeInternals.hs
@@ -75,7 +75,9 @@ data Backend = Backend {
-- removes a key
removeKey :: Key -> Annex Bool,
-- checks if a backend is storing the content of a key
- hasKey :: Key -> Annex Bool
+ hasKey :: Key -> Annex Bool,
+ -- called during fsck to check a key
+ fsckKey :: Key -> Annex Bool
}
instance Show Backend where
diff --git a/debian/changelog b/debian/changelog
index d1c3344ff..71d163d11 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,14 @@
+git-annex (0.06) UNRELEASED; urgency=low
+
+ * fsck: Check if annex.numcopies is satisfied.
+ * fsck: Verify the sha1 of files when the SHA1 backend is used.
+ * fsck: Verify the size of files when the WORM backend is used.
+ * fsck: Allow specifying individual files to fsk if fscking everything
+ is not desired.
+ * fsck: Fix bug, introduced in 0.04, in detection of unused data.
+
+ -- Joey Hess <joeyh@debian.org> Sat, 13 Nov 2010 14:08:58 -0400
+
git-annex (0.05) unstable; urgency=low
* Optimize both pre-commit and lock subcommands to not call git diff
diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn
index d0bd3a754..61a5962f1 100644
--- a/doc/git-annex.mdwn
+++ b/doc/git-annex.mdwn
@@ -159,10 +159,12 @@ Many git-annex subcommands will stage changes for later `git commit` by you.
git annex setkey --key=1287765018:3 /tmp/file
-* fsck
+* fsck [path ...]
- This subcommand checks the whole annex for consistency, and warns
- about any problems found.
+ With no parameters, this subcommand checks the whole annex for consistency,
+ and warns about any problems found.
+
+ With parameters, only the specified files are checked.
# OPTIONS
diff --git a/doc/walkthrough.mdwn b/doc/walkthrough.mdwn
index d6c0214ff..7effb5317 100644
--- a/doc/walkthrough.mdwn
+++ b/doc/walkthrough.mdwn
@@ -276,3 +276,37 @@ significantly for really big files. To make SHA1 the detault, just
add something like this to `.gitattributes`:
* git-annex-backend=SHA1
+
+## fsck: verifying your data
+
+You can use the fsck subcommand to check for problems in your data.
+What can be checked depends on the [[backend|backends]] you've used to store
+the data. For example, when you use the SHA1 backend, fsck will verify that
+the checksums of your files are good. Fsck also checks that the annex.numcopies
+setting is satisfied for all files, and it warns about any dangling values
+in `.git/annex/objects/`.
+
+ # git annex fsck
+ fsck (checking for unused data...) (checking files...) ok
+
+Fsck checks the entire repository for problems by default. But you can
+also specify the files to check.
+This is particularly useful if you're using sha1 and don't want to spend
+a long time checksumming everything.
+
+ # git annex fsck my_cool_big_file
+ fsck my_cool_big_file (checksum..) ok
+
+## fsck: When things go wrong
+
+Fsck never deletes possibly bad data; instead it will be moved to
+`.git/annex/bad/` for you to review. Here is a sample of what fsck
+might say about a badly messed up annex:
+
+ # git annex fsck
+ fsck (checking for unused data...)
+ Some annexed data is no longer pointed to by any files in the repository.
+ If this data is no longer needed, it can be removed using git-annex dropkey:
+ WORM:1289672605:3:file
+ (checking files...)
+