diff options
5 files changed, 85 insertions, 9 deletions
diff --git a/Database/Keys.hs b/Database/Keys.hs index f3d349dc0..246ccd191 100644 --- a/Database/Keys.hs +++ b/Database/Keys.hs @@ -174,7 +174,7 @@ scanAssociatedFiles = whenM (isJust <$> inRepo Git.Branch.current) $ add h i k = liftIO $ flip SQL.queueDb h $ void $ insertUnique $ SQL.Associated (toIKey k) - (getTopFilePath $ Git.LsTree.file i) + (toSFilePath $ getTopFilePath $ Git.LsTree.file i) {- Stats the files, and stores their InodeCaches. -} storeInodeCaches :: Key -> [FilePath] -> Annex () diff --git a/Database/Keys/SQL.hs b/Database/Keys/SQL.hs index 456b48e46..88e6ba2dc 100644 --- a/Database/Keys/SQL.hs +++ b/Database/Keys/SQL.hs @@ -26,7 +26,7 @@ import Control.Monad share [mkPersist sqlSettings, mkMigrate "migrateKeysDb"] [persistLowerCase| Associated key IKey - file FilePath + file SFilePath KeyFileIndex key file FileKeyIndex file key Content @@ -63,8 +63,10 @@ addAssociatedFile ik f = queueDb $ do -- If the same file was associated with a different key before, -- remove that. delete $ from $ \r -> do - where_ (r ^. AssociatedFile ==. val (getTopFilePath f) &&. not_ (r ^. AssociatedKey ==. val ik)) - void $ insertUnique $ Associated ik (getTopFilePath f) + where_ (r ^. AssociatedFile ==. val af &&. not_ (r ^. AssociatedKey ==. val ik)) + void $ insertUnique $ Associated ik af + where + af = toSFilePath (getTopFilePath f) {- Note that the files returned were once associated with the key, but - some of them may not be any longer. -} @@ -73,21 +75,25 @@ getAssociatedFiles ik = readDb $ do l <- select $ from $ \r -> do where_ (r ^. AssociatedKey ==. val ik) return (r ^. AssociatedFile) - return $ map (asTopFilePath . unValue) l + return $ map (asTopFilePath . fromSFilePath . unValue) l {- Gets any keys that are on record as having a particular associated file. - (Should be one or none but the database doesn't enforce that.) -} getAssociatedKey :: TopFilePath -> ReadHandle -> IO [IKey] getAssociatedKey f = readDb $ do l <- select $ from $ \r -> do - where_ (r ^. AssociatedFile ==. val (getTopFilePath f)) + where_ (r ^. AssociatedFile ==. val af) return (r ^. AssociatedKey) return $ map unValue l + where + af = toSFilePath (getTopFilePath f) removeAssociatedFile :: IKey -> TopFilePath -> WriteHandle -> IO () removeAssociatedFile ik f = queueDb $ delete $ from $ \r -> do - where_ (r ^. AssociatedKey ==. val ik &&. r ^. AssociatedFile ==. val (getTopFilePath f)) + where_ (r ^. AssociatedKey ==. val ik &&. r ^. AssociatedFile ==. val af) + where + af = toSFilePath (getTopFilePath f) addInodeCaches :: IKey -> [InodeCache] -> WriteHandle -> IO () addInodeCaches ik is = queueDb $ diff --git a/Database/Types.hs b/Database/Types.hs index 6667bc343..4521bb346 100644 --- a/Database/Types.hs +++ b/Database/Types.hs @@ -1,6 +1,6 @@ {- types for SQL databases - - - Copyright 2015 Joey Hess <id@joeyh.name> + - Copyright 2015-2016 Joey Hess <id@joeyh.name> - - Licensed under the GNU GPL version 3 or higher. -} @@ -11,7 +11,9 @@ module Database.Types where import Database.Persist.TH import Data.Maybe +import Data.Char +import Utility.PartialPrelude import Types.Key import Utility.InodeCache @@ -53,6 +55,41 @@ toSInodeCache :: InodeCache -> SInodeCache toSInodeCache = I . showInodeCache fromSInodeCache :: SInodeCache -> InodeCache -fromSInodeCache (I s) = fromMaybe (error $ "bad serialied InodeCache " ++ s) (readInodeCache s) +fromSInodeCache (I s) = fromMaybe (error $ "bad serialized InodeCache " ++ s) (readInodeCache s) derivePersistField "SInodeCache" + +-- A serialized FilePath. +-- +-- Not all unicode characters round-trip through sqlite. In particular, +-- surrigate code points do not. So, escape the FilePath. But, only when +-- it contains such characters. +newtype SFilePath = SFilePath String + +-- Note that Read instance does not work when used in any kind of complex +-- data structure. +instance Read SFilePath where + readsPrec _ s = [(SFilePath s, "")] + +instance Show SFilePath where + show (SFilePath s) = s + +toSFilePath :: FilePath -> SFilePath +toSFilePath s@('"':_) = SFilePath (show s) +toSFilePath s + | any needsescape s = SFilePath (show s) + | otherwise = SFilePath s + where + needsescape c = case generalCategory c of + Surrogate -> True + PrivateUse -> True + NotAssigned -> True + _ -> False + +fromSFilePath :: SFilePath -> FilePath +fromSFilePath (SFilePath s@('"':_)) = + fromMaybe (error "bad serialized SFilePath " ++ s) (readish s) +fromSFilePath (SFilePath s) = s + +derivePersistField "SFilePath" + diff --git a/debian/changelog b/debian/changelog index 49d98d6e1..d44af9ded 100644 --- a/debian/changelog +++ b/debian/changelog @@ -6,6 +6,8 @@ git-annex (6.20160212) UNRELEASED; urgency=medium * Work around problem with concurrent-output when in a non-unicode locale by avoiding use of it in such a locale. Instead -J will behave as if it was built without concurrent-output support in this situation. + * Fix storing of filenames of v6 unlocked files when the filename is not + representable in the current locale. -- Joey Hess <id@joeyh.name> Fri, 12 Feb 2016 14:03:46 -0400 diff --git a/doc/bugs/__39__git_annex_get__39___fails_for_unlocked_files_with_special_characters___40__e.g._umlauts__41___when_using_precompiled_version_6.20160126-g2336107_/comment_1_8d6bdb32884cb80e444c7739c743c9de._comment b/doc/bugs/__39__git_annex_get__39___fails_for_unlocked_files_with_special_characters___40__e.g._umlauts__41___when_using_precompiled_version_6.20160126-g2336107_/comment_1_8d6bdb32884cb80e444c7739c743c9de._comment new file mode 100644 index 000000000..067182f18 --- /dev/null +++ b/doc/bugs/__39__git_annex_get__39___fails_for_unlocked_files_with_special_characters___40__e.g._umlauts__41___when_using_precompiled_version_6.20160126-g2336107_/comment_1_8d6bdb32884cb80e444c7739c743c9de._comment @@ -0,0 +1,31 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2016-02-14T19:19:46Z" + content=""" +Reproduced using LANG=C. + +This is a problem with the filename stored in the keys db. In the first +repo, it has: + + VALUES(1,'SHA256E-s8--d1d0c59000f7c0d71485b051c9ca3f25f7afa84f0be5fea98fe1e12f3f898f44','test_öüä'); + +However, in the clone: + + VALUES(1,'SHA256E-s8--d1d0c59000f7c0d71485b051c9ca3f25f7afa84f0be5fea98fe1e12f3f898f44','test_������'); + +So, it's lost the correct filename there. Since it doesn't +find the file with the messed up name, it doesn't replace the file content. + +The problem is not with decoding git's C-style character encoding; that +happens ok yielding `"test_\56515\56502\56515\56508\56515\56484"`. +But, that does not seem to get stored in the database correctly. + +Seems that these unicode surrigates are not handled by the sqlite layer. +The surrigates are being used because LANG=C does not support +unicode. This could also happen when in a (working) utf-8 locale, when +the filename is not utf-8 encoded. + +So, need to escape strings containing such surrigates before passing to +SQL. In a backwards-compatible way. Done. +"""]] |