summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Command/Fsck.hs85
-rw-r--r--Database/Keys.hs2
-rw-r--r--Database/Keys/SQL.hs18
-rw-r--r--Database/Types.hs43
-rw-r--r--Utility/Format.hs8
-rw-r--r--debian/changelog5
-rw-r--r--doc/bugs/__39__git_annex_get__39___fails_for_unlocked_files_with_special_characters___40__e.g._umlauts__41___when_using_precompiled_version_6.20160126-g2336107_.mdwn1
-rw-r--r--doc/bugs/__39__git_annex_get__39___fails_for_unlocked_files_with_special_characters___40__e.g._umlauts__41___when_using_precompiled_version_6.20160126-g2336107_/comment_1_8d6bdb32884cb80e444c7739c743c9de._comment31
-rw-r--r--doc/bugs/__39__git_annex_get__39___fails_for_unlocked_files_with_special_characters___40__e.g._umlauts__41___when_using_precompiled_version_6.20160126-g2336107_/comment_2_1c547ab07cf57cfa9eb5398629e27d56._comment7
-rw-r--r--doc/bugs/annex_symlinks_too.mdwn2
-rw-r--r--doc/devblog/day_362__encoding_fun.mdwn19
11 files changed, 183 insertions, 38 deletions
diff --git a/Command/Fsck.hs b/Command/Fsck.hs
index 08892e37f..3b31b7211 100644
--- a/Command/Fsck.hs
+++ b/Command/Fsck.hs
@@ -1,6 +1,6 @@
{- git-annex command
-
- - Copyright 2010-2015 Joey Hess <id@joeyh.name>
+ - Copyright 2010-2016 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@@ -15,7 +15,7 @@ import qualified Remote
import qualified Types.Backend
import qualified Backend
import Annex.Content
-import Annex.Content.Direct
+import qualified Annex.Content.Direct as Direct
import Annex.Direct
import Annex.Perms
import Annex.Link
@@ -25,6 +25,7 @@ import Logs.Activity
import Logs.TimeStamp
import Annex.NumCopies
import Annex.UUID
+import Annex.ReplaceFile
import Utility.DataUnits
import Config
import Utility.HumanTime
@@ -114,13 +115,13 @@ start from inc file key = do
perform :: Key -> FilePath -> Backend -> NumCopies -> Annex Bool
perform key file backend numcopies = do
- keystatus <- getKeyStatus key
+ keystatus <- getKeyFileStatus key file
check
-- order matters
[ fixLink key file
, verifyLocationLog key keystatus file
- , verifyDirectMapping key file
- , verifyDirectMode key file
+ , verifyAssociatedFiles key keystatus file
+ , verifyWorkTree key file
, checkKeySize key keystatus
, checkBackend backend key keystatus (Just file)
, checkKeyNumCopies key (Just file) numcopies
@@ -261,30 +262,55 @@ verifyLocationLog' key desc present u updatestatus = do
showNote "fixing location log"
updatestatus s
-{- Ensures the direct mode mapping file is consistent. Each file
- - it lists for the key should exist, and the specified file should be
- - included in it.
- -}
-verifyDirectMapping :: Key -> FilePath -> Annex Bool
-verifyDirectMapping key file = do
- whenM isDirect $ do
- fs <- addAssociatedFile key file
+{- Verifies the associated file records. -}
+verifyAssociatedFiles :: Key -> KeyStatus -> FilePath -> Annex Bool
+verifyAssociatedFiles key keystatus file = do
+ ifM isDirect (godirect, goindirect)
+ return True
+ where
+ godirect = do
+ fs <- Direct.addAssociatedFile key file
forM_ fs $ \f ->
unlessM (liftIO $ doesFileExist f) $
- void $ removeAssociatedFile key f
+ void $ Direct.removeAssociatedFile key f
+ goindirect = case keystatus of
+ KeyUnlocked -> do
+ f <- inRepo $ toTopFilePath file
+ afs <- Database.Keys.getAssociatedFiles key
+ unless (getTopFilePath f `elem` map getTopFilePath afs) $
+ Database.Keys.addAssociatedFile key f
+ _ -> return ()
+
+verifyWorkTree :: Key -> FilePath -> Annex Bool
+verifyWorkTree key file = do
+ ifM isDirect ( godirect, goindirect )
return True
-
-{- Ensures that files whose content is available are in direct mode. -}
-verifyDirectMode :: Key -> FilePath -> Annex Bool
-verifyDirectMode key file = do
- whenM (isDirect <&&> isJust <$> isAnnexLink file) $ do
+ where
+ {- Ensures that files whose content is available are in direct mode. -}
+ godirect = whenM (isJust <$> isAnnexLink file) $ do
v <- toDirectGen key file
case v of
Nothing -> noop
Just a -> do
showNote "fixing direct mode"
a
- return True
+ {- Make sure that a pointer file is replaced with its content,
+ - when the content is available. -}
+ goindirect = do
+ mk <- liftIO $ isPointerFile file
+ case mk of
+ Just k | k == key -> whenM (inAnnex key) $ do
+ showNote "fixing worktree content"
+ replaceFile file $ \tmp ->
+ ifM (annexThin <$> Annex.getGitConfig)
+ ( void $ linkFromAnnex key tmp
+ , do
+ obj <- calcRepo $ gitAnnexLocation key
+ void $ checkedCopyFile key obj tmp
+ thawContent tmp
+ )
+ Database.Keys.storeInodeCaches key [file]
+ _ -> return ()
{- The size of the data for a key is checked against the size encoded in
- the key's metadata, if available.
@@ -346,9 +372,9 @@ checkBackend backend key keystatus mfile = go =<< isDirect
, checkBackendOr badContent backend key content
)
go True = maybe nocheck checkdirect mfile
- checkdirect file = ifM (goodContent key file)
+ checkdirect file = ifM (Direct.goodContent key file)
( checkBackendOr' (badContentDirect file) backend key file
- (goodContent key file)
+ (Direct.goodContent key file)
, nocheck
)
nocheck = return True
@@ -587,8 +613,17 @@ getKeyStatus :: Key -> Annex KeyStatus
getKeyStatus key = ifM isDirect
( return KeyUnlocked
, catchDefaultIO KeyMissing $ do
- obj <- calcRepo $ gitAnnexLocation key
- unlocked <- ((> 1) . linkCount <$> liftIO (getFileStatus obj))
- <&&> (not . null <$> Database.Keys.getAssociatedFiles key)
+ unlocked <- not . null <$> Database.Keys.getAssociatedFiles key
return $ if unlocked then KeyUnlocked else KeyLocked
)
+
+getKeyFileStatus :: Key -> FilePath -> Annex KeyStatus
+getKeyFileStatus key file = do
+ s <- getKeyStatus key
+ case s of
+ KeyLocked -> catchDefaultIO KeyLocked $
+ ifM (isJust <$> isAnnexLink file)
+ ( return KeyLocked
+ , return KeyUnlocked
+ )
+ _ -> return s
diff --git a/Database/Keys.hs b/Database/Keys.hs
index f3d349dc0..246ccd191 100644
--- a/Database/Keys.hs
+++ b/Database/Keys.hs
@@ -174,7 +174,7 @@ scanAssociatedFiles = whenM (isJust <$> inRepo Git.Branch.current) $
add h i k = liftIO $ flip SQL.queueDb h $
void $ insertUnique $ SQL.Associated
(toIKey k)
- (getTopFilePath $ Git.LsTree.file i)
+ (toSFilePath $ getTopFilePath $ Git.LsTree.file i)
{- Stats the files, and stores their InodeCaches. -}
storeInodeCaches :: Key -> [FilePath] -> Annex ()
diff --git a/Database/Keys/SQL.hs b/Database/Keys/SQL.hs
index 456b48e46..88e6ba2dc 100644
--- a/Database/Keys/SQL.hs
+++ b/Database/Keys/SQL.hs
@@ -26,7 +26,7 @@ import Control.Monad
share [mkPersist sqlSettings, mkMigrate "migrateKeysDb"] [persistLowerCase|
Associated
key IKey
- file FilePath
+ file SFilePath
KeyFileIndex key file
FileKeyIndex file key
Content
@@ -63,8 +63,10 @@ addAssociatedFile ik f = queueDb $ do
-- If the same file was associated with a different key before,
-- remove that.
delete $ from $ \r -> do
- where_ (r ^. AssociatedFile ==. val (getTopFilePath f) &&. not_ (r ^. AssociatedKey ==. val ik))
- void $ insertUnique $ Associated ik (getTopFilePath f)
+ where_ (r ^. AssociatedFile ==. val af &&. not_ (r ^. AssociatedKey ==. val ik))
+ void $ insertUnique $ Associated ik af
+ where
+ af = toSFilePath (getTopFilePath f)
{- Note that the files returned were once associated with the key, but
- some of them may not be any longer. -}
@@ -73,21 +75,25 @@ getAssociatedFiles ik = readDb $ do
l <- select $ from $ \r -> do
where_ (r ^. AssociatedKey ==. val ik)
return (r ^. AssociatedFile)
- return $ map (asTopFilePath . unValue) l
+ return $ map (asTopFilePath . fromSFilePath . unValue) l
{- Gets any keys that are on record as having a particular associated file.
- (Should be one or none but the database doesn't enforce that.) -}
getAssociatedKey :: TopFilePath -> ReadHandle -> IO [IKey]
getAssociatedKey f = readDb $ do
l <- select $ from $ \r -> do
- where_ (r ^. AssociatedFile ==. val (getTopFilePath f))
+ where_ (r ^. AssociatedFile ==. val af)
return (r ^. AssociatedKey)
return $ map unValue l
+ where
+ af = toSFilePath (getTopFilePath f)
removeAssociatedFile :: IKey -> TopFilePath -> WriteHandle -> IO ()
removeAssociatedFile ik f = queueDb $
delete $ from $ \r -> do
- where_ (r ^. AssociatedKey ==. val ik &&. r ^. AssociatedFile ==. val (getTopFilePath f))
+ where_ (r ^. AssociatedKey ==. val ik &&. r ^. AssociatedFile ==. val af)
+ where
+ af = toSFilePath (getTopFilePath f)
addInodeCaches :: IKey -> [InodeCache] -> WriteHandle -> IO ()
addInodeCaches ik is = queueDb $
diff --git a/Database/Types.hs b/Database/Types.hs
index 6667bc343..bf5417dc8 100644
--- a/Database/Types.hs
+++ b/Database/Types.hs
@@ -1,6 +1,6 @@
{- types for SQL databases
-
- - Copyright 2015 Joey Hess <id@joeyh.name>
+ - Copyright 2015-2016 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@@ -11,7 +11,9 @@ module Database.Types where
import Database.Persist.TH
import Data.Maybe
+import Data.Char
+import Utility.PartialPrelude
import Types.Key
import Utility.InodeCache
@@ -53,6 +55,43 @@ toSInodeCache :: InodeCache -> SInodeCache
toSInodeCache = I . showInodeCache
fromSInodeCache :: SInodeCache -> InodeCache
-fromSInodeCache (I s) = fromMaybe (error $ "bad serialied InodeCache " ++ s) (readInodeCache s)
+fromSInodeCache (I s) = fromMaybe (error $ "bad serialized InodeCache " ++ s) (readInodeCache s)
derivePersistField "SInodeCache"
+
+-- A serialized FilePath.
+--
+-- Not all unicode characters round-trip through sqlite. In particular,
+-- surrigate code points do not. So, escape the FilePath. But, only when
+-- it contains such characters.
+newtype SFilePath = SFilePath String
+
+instance
+
+-- Note that Read instance does not work when used in any kind of complex
+-- data structure.
+instance Read SFilePath where
+ readsPrec _ s = [(SFilePath s, "")]
+
+instance Show SFilePath where
+ show (SFilePath s) = s
+
+toSFilePath :: FilePath -> SFilePath
+toSFilePath s@('"':_) = SFilePath (show s)
+toSFilePath s
+ | any needsescape s = SFilePath (show s)
+ | otherwise = SFilePath s
+ where
+ needsescape c = case generalCategory c of
+ Surrogate -> True
+ PrivateUse -> True
+ NotAssigned -> True
+ _ -> False
+
+fromSFilePath :: SFilePath -> FilePath
+fromSFilePath (SFilePath s@('"':_)) =
+ fromMaybe (error "bad serialized SFilePath " ++ s) (readish s)
+fromSFilePath (SFilePath s) = s
+
+derivePersistField "SFilePath"
+
diff --git a/Utility/Format.hs b/Utility/Format.hs
index 784496310..1ebf68d6c 100644
--- a/Utility/Format.hs
+++ b/Utility/Format.hs
@@ -103,7 +103,7 @@ empty _ = False
{- Decodes a C-style encoding, where \n is a newline, \NNN is an octal
- encoded character, and \xNN is a hex encoded character.
-}
-decode_c :: FormatString -> FormatString
+decode_c :: FormatString -> String
decode_c [] = []
decode_c s = unescape ("", s)
where
@@ -141,14 +141,14 @@ decode_c s = unescape ("", s)
handle n = ("", n)
{- Inverse of decode_c. -}
-encode_c :: FormatString -> FormatString
+encode_c :: String -> FormatString
encode_c = encode_c' (const False)
{- Encodes more strictly, including whitespace. -}
-encode_c_strict :: FormatString -> FormatString
+encode_c_strict :: String -> FormatString
encode_c_strict = encode_c' isSpace
-encode_c' :: (Char -> Bool) -> FormatString -> FormatString
+encode_c' :: (Char -> Bool) -> String -> FormatString
encode_c' p = concatMap echar
where
e c = '\\' : [c]
diff --git a/debian/changelog b/debian/changelog
index 49d98d6e1..b93d7ba91 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -6,6 +6,11 @@ git-annex (6.20160212) UNRELEASED; urgency=medium
* Work around problem with concurrent-output when in a non-unicode locale
by avoiding use of it in such a locale. Instead -J will behave as if
it was built without concurrent-output support in this situation.
+ * Fix storing of filenames of v6 unlocked files when the filename is not
+ representable in the current locale.
+ * fsck: Detect and fix missing associated file mappings in v6 repositories.
+ * fsck: Populate unlocked files in v6 repositories whose content is
+ present in annex/objects but didn't reach the work tree.
-- Joey Hess <id@joeyh.name> Fri, 12 Feb 2016 14:03:46 -0400
diff --git a/doc/bugs/__39__git_annex_get__39___fails_for_unlocked_files_with_special_characters___40__e.g._umlauts__41___when_using_precompiled_version_6.20160126-g2336107_.mdwn b/doc/bugs/__39__git_annex_get__39___fails_for_unlocked_files_with_special_characters___40__e.g._umlauts__41___when_using_precompiled_version_6.20160126-g2336107_.mdwn
index b59f7ea94..1225643f4 100644
--- a/doc/bugs/__39__git_annex_get__39___fails_for_unlocked_files_with_special_characters___40__e.g._umlauts__41___when_using_precompiled_version_6.20160126-g2336107_.mdwn
+++ b/doc/bugs/__39__git_annex_get__39___fails_for_unlocked_files_with_special_characters___40__e.g._umlauts__41___when_using_precompiled_version_6.20160126-g2336107_.mdwn
@@ -74,3 +74,4 @@ I tested this on two different systems, Arch Linux and Debian Jessie. It does no
Yes, I've been using it for more than a year to synchronize between different PCs. Great work :-)
+> [[fixed|done]] --[[Joey]]
diff --git a/doc/bugs/__39__git_annex_get__39___fails_for_unlocked_files_with_special_characters___40__e.g._umlauts__41___when_using_precompiled_version_6.20160126-g2336107_/comment_1_8d6bdb32884cb80e444c7739c743c9de._comment b/doc/bugs/__39__git_annex_get__39___fails_for_unlocked_files_with_special_characters___40__e.g._umlauts__41___when_using_precompiled_version_6.20160126-g2336107_/comment_1_8d6bdb32884cb80e444c7739c743c9de._comment
new file mode 100644
index 000000000..067182f18
--- /dev/null
+++ b/doc/bugs/__39__git_annex_get__39___fails_for_unlocked_files_with_special_characters___40__e.g._umlauts__41___when_using_precompiled_version_6.20160126-g2336107_/comment_1_8d6bdb32884cb80e444c7739c743c9de._comment
@@ -0,0 +1,31 @@
+[[!comment format=mdwn
+ username="joey"
+ subject="""comment 1"""
+ date="2016-02-14T19:19:46Z"
+ content="""
+Reproduced using LANG=C.
+
+This is a problem with the filename stored in the keys db. In the first
+repo, it has:
+
+ VALUES(1,'SHA256E-s8--d1d0c59000f7c0d71485b051c9ca3f25f7afa84f0be5fea98fe1e12f3f898f44','test_öüä');
+
+However, in the clone:
+
+ VALUES(1,'SHA256E-s8--d1d0c59000f7c0d71485b051c9ca3f25f7afa84f0be5fea98fe1e12f3f898f44','test_������');
+
+So, it's lost the correct filename there. Since it doesn't
+find the file with the messed up name, it doesn't replace the file content.
+
+The problem is not with decoding git's C-style character encoding; that
+happens ok yielding `"test_\56515\56502\56515\56508\56515\56484"`.
+But, that does not seem to get stored in the database correctly.
+
+Seems that these unicode surrigates are not handled by the sqlite layer.
+The surrigates are being used because LANG=C does not support
+unicode. This could also happen when in a (working) utf-8 locale, when
+the filename is not utf-8 encoded.
+
+So, need to escape strings containing such surrigates before passing to
+SQL. In a backwards-compatible way. Done.
+"""]]
diff --git a/doc/bugs/__39__git_annex_get__39___fails_for_unlocked_files_with_special_characters___40__e.g._umlauts__41___when_using_precompiled_version_6.20160126-g2336107_/comment_2_1c547ab07cf57cfa9eb5398629e27d56._comment b/doc/bugs/__39__git_annex_get__39___fails_for_unlocked_files_with_special_characters___40__e.g._umlauts__41___when_using_precompiled_version_6.20160126-g2336107_/comment_2_1c547ab07cf57cfa9eb5398629e27d56._comment
new file mode 100644
index 000000000..ee1c70781
--- /dev/null
+++ b/doc/bugs/__39__git_annex_get__39___fails_for_unlocked_files_with_special_characters___40__e.g._umlauts__41___when_using_precompiled_version_6.20160126-g2336107_/comment_2_1c547ab07cf57cfa9eb5398629e27d56._comment
@@ -0,0 +1,7 @@
+[[!comment format=mdwn
+ username="joey"
+ subject="""comment 2"""
+ date="2016-02-14T21:10:53Z"
+ content="""
+`git annex fsck` will now clean up repos affected by this problem.
+"""]]
diff --git a/doc/bugs/annex_symlinks_too.mdwn b/doc/bugs/annex_symlinks_too.mdwn
index baac67485..ade91650b 100644
--- a/doc/bugs/annex_symlinks_too.mdwn
+++ b/doc/bugs/annex_symlinks_too.mdwn
@@ -4,3 +4,5 @@ Because git annex doesn't annex symlinks, it is not possible to copy files from
Would it be possible to change this behavior in the future, at least with an option?
Thanks
+
+> Not going to happen, sorry. [[done]] --[[Joey]]
diff --git a/doc/devblog/day_362__encoding_fun.mdwn b/doc/devblog/day_362__encoding_fun.mdwn
new file mode 100644
index 000000000..392c5745a
--- /dev/null
+++ b/doc/devblog/day_362__encoding_fun.mdwn
@@ -0,0 +1,19 @@
+This was one of those days where I somehow end up dealing with tricky
+filename encoding problems all day.
+
+First, worked around inability for concurrent-output to display unicode
+characters when in a non-unicode locale. The normal trick that git-annex
+uses doesn't work in this case. Since it only affected -J, I decided to
+make git-annex detect the problem and make -J behave as if it was not built
+with the concurrent-output feature. So, it just doesn't display concurrent
+output, which is better than crashing with an encoding error.
+
+The other problem affects v6 repos only. Seems that not all Strings will
+round trip through a persistent sqlite database. In particular, unicode
+surrogate characters are replaced with garbage. This is really [a bug in
+persistent](https://github.com/yesodweb/persistent/issues/540).
+But, for git-annex's purposes, it was possible to work around it,
+by detecting such Strings and serializing them differently.
+
+Then I had to enhance `git annex fsck` to fix up repositories that were
+affected by that problem.