summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Database/Keys.hs2
-rw-r--r--Database/Keys/SQL.hs18
-rw-r--r--Database/Types.hs41
-rw-r--r--debian/changelog2
-rw-r--r--doc/bugs/__39__git_annex_get__39___fails_for_unlocked_files_with_special_characters___40__e.g._umlauts__41___when_using_precompiled_version_6.20160126-g2336107_/comment_1_8d6bdb32884cb80e444c7739c743c9de._comment31
5 files changed, 85 insertions, 9 deletions
diff --git a/Database/Keys.hs b/Database/Keys.hs
index f3d349dc0..246ccd191 100644
--- a/Database/Keys.hs
+++ b/Database/Keys.hs
@@ -174,7 +174,7 @@ scanAssociatedFiles = whenM (isJust <$> inRepo Git.Branch.current) $
add h i k = liftIO $ flip SQL.queueDb h $
void $ insertUnique $ SQL.Associated
(toIKey k)
- (getTopFilePath $ Git.LsTree.file i)
+ (toSFilePath $ getTopFilePath $ Git.LsTree.file i)
{- Stats the files, and stores their InodeCaches. -}
storeInodeCaches :: Key -> [FilePath] -> Annex ()
diff --git a/Database/Keys/SQL.hs b/Database/Keys/SQL.hs
index 456b48e46..88e6ba2dc 100644
--- a/Database/Keys/SQL.hs
+++ b/Database/Keys/SQL.hs
@@ -26,7 +26,7 @@ import Control.Monad
share [mkPersist sqlSettings, mkMigrate "migrateKeysDb"] [persistLowerCase|
Associated
key IKey
- file FilePath
+ file SFilePath
KeyFileIndex key file
FileKeyIndex file key
Content
@@ -63,8 +63,10 @@ addAssociatedFile ik f = queueDb $ do
-- If the same file was associated with a different key before,
-- remove that.
delete $ from $ \r -> do
- where_ (r ^. AssociatedFile ==. val (getTopFilePath f) &&. not_ (r ^. AssociatedKey ==. val ik))
- void $ insertUnique $ Associated ik (getTopFilePath f)
+ where_ (r ^. AssociatedFile ==. val af &&. not_ (r ^. AssociatedKey ==. val ik))
+ void $ insertUnique $ Associated ik af
+ where
+ af = toSFilePath (getTopFilePath f)
{- Note that the files returned were once associated with the key, but
- some of them may not be any longer. -}
@@ -73,21 +75,25 @@ getAssociatedFiles ik = readDb $ do
l <- select $ from $ \r -> do
where_ (r ^. AssociatedKey ==. val ik)
return (r ^. AssociatedFile)
- return $ map (asTopFilePath . unValue) l
+ return $ map (asTopFilePath . fromSFilePath . unValue) l
{- Gets any keys that are on record as having a particular associated file.
- (Should be one or none but the database doesn't enforce that.) -}
getAssociatedKey :: TopFilePath -> ReadHandle -> IO [IKey]
getAssociatedKey f = readDb $ do
l <- select $ from $ \r -> do
- where_ (r ^. AssociatedFile ==. val (getTopFilePath f))
+ where_ (r ^. AssociatedFile ==. val af)
return (r ^. AssociatedKey)
return $ map unValue l
+ where
+ af = toSFilePath (getTopFilePath f)
removeAssociatedFile :: IKey -> TopFilePath -> WriteHandle -> IO ()
removeAssociatedFile ik f = queueDb $
delete $ from $ \r -> do
- where_ (r ^. AssociatedKey ==. val ik &&. r ^. AssociatedFile ==. val (getTopFilePath f))
+ where_ (r ^. AssociatedKey ==. val ik &&. r ^. AssociatedFile ==. val af)
+ where
+ af = toSFilePath (getTopFilePath f)
addInodeCaches :: IKey -> [InodeCache] -> WriteHandle -> IO ()
addInodeCaches ik is = queueDb $
diff --git a/Database/Types.hs b/Database/Types.hs
index 6667bc343..4521bb346 100644
--- a/Database/Types.hs
+++ b/Database/Types.hs
@@ -1,6 +1,6 @@
{- types for SQL databases
-
- - Copyright 2015 Joey Hess <id@joeyh.name>
+ - Copyright 2015-2016 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@@ -11,7 +11,9 @@ module Database.Types where
import Database.Persist.TH
import Data.Maybe
+import Data.Char
+import Utility.PartialPrelude
import Types.Key
import Utility.InodeCache
@@ -53,6 +55,41 @@ toSInodeCache :: InodeCache -> SInodeCache
toSInodeCache = I . showInodeCache
fromSInodeCache :: SInodeCache -> InodeCache
-fromSInodeCache (I s) = fromMaybe (error $ "bad serialied InodeCache " ++ s) (readInodeCache s)
+fromSInodeCache (I s) = fromMaybe (error $ "bad serialized InodeCache " ++ s) (readInodeCache s)
derivePersistField "SInodeCache"
+
+-- A serialized FilePath.
+--
+-- Not all unicode characters round-trip through sqlite. In particular,
+-- surrigate code points do not. So, escape the FilePath. But, only when
+-- it contains such characters.
+newtype SFilePath = SFilePath String
+
+-- Note that Read instance does not work when used in any kind of complex
+-- data structure.
+instance Read SFilePath where
+ readsPrec _ s = [(SFilePath s, "")]
+
+instance Show SFilePath where
+ show (SFilePath s) = s
+
+toSFilePath :: FilePath -> SFilePath
+toSFilePath s@('"':_) = SFilePath (show s)
+toSFilePath s
+ | any needsescape s = SFilePath (show s)
+ | otherwise = SFilePath s
+ where
+ needsescape c = case generalCategory c of
+ Surrogate -> True
+ PrivateUse -> True
+ NotAssigned -> True
+ _ -> False
+
+fromSFilePath :: SFilePath -> FilePath
+fromSFilePath (SFilePath s@('"':_)) =
+ fromMaybe (error "bad serialized SFilePath " ++ s) (readish s)
+fromSFilePath (SFilePath s) = s
+
+derivePersistField "SFilePath"
+
diff --git a/debian/changelog b/debian/changelog
index 49d98d6e1..d44af9ded 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -6,6 +6,8 @@ git-annex (6.20160212) UNRELEASED; urgency=medium
* Work around problem with concurrent-output when in a non-unicode locale
by avoiding use of it in such a locale. Instead -J will behave as if
it was built without concurrent-output support in this situation.
+ * Fix storing of filenames of v6 unlocked files when the filename is not
+ representable in the current locale.
-- Joey Hess <id@joeyh.name> Fri, 12 Feb 2016 14:03:46 -0400
diff --git a/doc/bugs/__39__git_annex_get__39___fails_for_unlocked_files_with_special_characters___40__e.g._umlauts__41___when_using_precompiled_version_6.20160126-g2336107_/comment_1_8d6bdb32884cb80e444c7739c743c9de._comment b/doc/bugs/__39__git_annex_get__39___fails_for_unlocked_files_with_special_characters___40__e.g._umlauts__41___when_using_precompiled_version_6.20160126-g2336107_/comment_1_8d6bdb32884cb80e444c7739c743c9de._comment
new file mode 100644
index 000000000..067182f18
--- /dev/null
+++ b/doc/bugs/__39__git_annex_get__39___fails_for_unlocked_files_with_special_characters___40__e.g._umlauts__41___when_using_precompiled_version_6.20160126-g2336107_/comment_1_8d6bdb32884cb80e444c7739c743c9de._comment
@@ -0,0 +1,31 @@
+[[!comment format=mdwn
+ username="joey"
+ subject="""comment 1"""
+ date="2016-02-14T19:19:46Z"
+ content="""
+Reproduced using LANG=C.
+
+This is a problem with the filename stored in the keys db. In the first
+repo, it has:
+
+ VALUES(1,'SHA256E-s8--d1d0c59000f7c0d71485b051c9ca3f25f7afa84f0be5fea98fe1e12f3f898f44','test_öüä');
+
+However, in the clone:
+
+ VALUES(1,'SHA256E-s8--d1d0c59000f7c0d71485b051c9ca3f25f7afa84f0be5fea98fe1e12f3f898f44','test_������');
+
+So, it's lost the correct filename there. Since it doesn't
+find the file with the messed up name, it doesn't replace the file content.
+
+The problem is not with decoding git's C-style character encoding; that
+happens ok yielding `"test_\56515\56502\56515\56508\56515\56484"`.
+But, that does not seem to get stored in the database correctly.
+
+Seems that these unicode surrigates are not handled by the sqlite layer.
+The surrigates are being used because LANG=C does not support
+unicode. This could also happen when in a (working) utf-8 locale, when
+the filename is not utf-8 encoded.
+
+So, need to escape strings containing such surrigates before passing to
+SQL. In a backwards-compatible way. Done.
+"""]]