aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Backend/URL.hs17
-rw-r--r--Backend/Utilities.hs25
-rw-r--r--Backend/WORM.hs14
-rw-r--r--Locations.hs34
-rw-r--r--debian/changelog2
-rw-r--r--doc/bugs/direct_repository_on_FAT32_fails_to_addurl_containing___63__.mdwn2
6 files changed, 74 insertions, 20 deletions
diff --git a/Backend/URL.hs b/Backend/URL.hs
index ace578a24..a8161c98d 100644
--- a/Backend/URL.hs
+++ b/Backend/URL.hs
@@ -10,11 +10,10 @@ module Backend.URL (
fromUrl
) where
-import Data.Hash.MD5
-
import Common.Annex
import Types.Backend
import Types.Key
+import Backend.Utilities
backends :: [Backend]
backends = [backend]
@@ -27,18 +26,12 @@ backend = Backend
, canUpgradeKey = Nothing
}
-{- When it's not too long, use the full url as the key name.
- - If the url is too long, it's truncated at half the filename length
- - limit, and the md5 of the url is prepended to ensure a unique key. -}
+{- Every unique url has a corresponding key. -}
fromUrl :: String -> Maybe Integer -> Annex Key
fromUrl url size = do
- limit <- liftIO . fileNameLengthLimit =<< fromRepo gitAnnexDir
- let truncurl = truncateFilePath (limit `div` 2) url
- let key = if url == truncurl
- then url
- else truncurl ++ "-" ++ md5s (Str url)
+ n <- genKeyName url
return $ stubKey
- { keyName = key
+ { keyName = n
, keyBackendName = "URL"
, keySize = size
- }
+ }
diff --git a/Backend/Utilities.hs b/Backend/Utilities.hs
new file mode 100644
index 000000000..24dbfd6d9
--- /dev/null
+++ b/Backend/Utilities.hs
@@ -0,0 +1,25 @@
+{- git-annex backend utilities
+ -
+ - Copyright 2012 Joey Hess <joey@kitenet.net>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+module Backend.Utilities where
+
+import Data.Hash.MD5
+
+import Common.Annex
+
+{- Generates a keyName from an input string. Takes care of sanitizing it.
+ - If it's not too long, the full string is used as the keyName.
+ - Otherwise, it's truncated at half the filename length limit, and its
+ - md5 is prepended to ensure a unique key. -}
+genKeyName :: String -> Annex String
+genKeyName s = do
+ limit <- liftIO . fileNameLengthLimit =<< fromRepo gitAnnexDir
+ let s' = preSanitizeKeyName s
+ let truncs = truncateFilePath (limit `div` 2) s'
+ return $ if s' == truncs
+ then s'
+ else truncs ++ "-" ++ md5s (Str s)
diff --git a/Backend/WORM.hs b/Backend/WORM.hs
index 3471eedc1..60db42f56 100644
--- a/Backend/WORM.hs
+++ b/Backend/WORM.hs
@@ -11,6 +11,7 @@ import Common.Annex
import Types.Backend
import Types.Key
import Types.KeySource
+import Backend.Utilities
backends :: [Backend]
backends = [backend]
@@ -33,9 +34,10 @@ backend = Backend
keyValue :: KeySource -> Annex (Maybe Key)
keyValue source = do
stat <- liftIO $ getFileStatus $ contentLocation source
- return $ Just Key {
- keyName = takeFileName $ keyFilename source,
- keyBackendName = name backend,
- keySize = Just $ fromIntegral $ fileSize stat,
- keyMtime = Just $ modificationTime stat
- }
+ n <- genKeyName $ keyFilename source
+ return $ Just Key
+ { keyName = n
+ , keyBackendName = name backend
+ , keySize = Just $ fromIntegral $ fileSize stat
+ , keyMtime = Just $ modificationTime stat
+ }
diff --git a/Locations.hs b/Locations.hs
index 36104458e..9b7bb575b 100644
--- a/Locations.hs
+++ b/Locations.hs
@@ -51,6 +51,7 @@ module Locations (
annexHashes,
hashDirMixed,
hashDirLower,
+ preSanitizeKeyName,
prop_idempotent_fileKey
) where
@@ -58,6 +59,7 @@ module Locations (
import Data.Bits
import Data.Word
import Data.Hash.MD5
+import Data.Char
import Common
import Types
@@ -284,6 +286,32 @@ gitAnnexAssistantDefaultDir = "annex"
isLinkToAnnex :: FilePath -> Bool
isLinkToAnnex s = (pathSeparator:objectDir) `isInfixOf` s
+{- Sanitizes a String that will be used as part of a Key's keyName,
+ - dealing with characters that cause problems on substandard filesystems.
+ -
+ - This is used when a new Key is initially being generated, eg by getKey.
+ - Unlike keyFile and fileKey, it does not need to be a reversable
+ - escaping. Also, it's ok to change this to add more problimatic
+ - characters later. Unlike changing keyFile, which could result in the
+ - filenames used for existing keys changing and contents getting lost.
+ -
+ - It is, however, important that the input and output of this function
+ - have a 1:1 mapping, to avoid two different inputs from mapping to the
+ - same key.
+ -}
+preSanitizeKeyName :: String -> String
+preSanitizeKeyName = concatMap escape
+ where
+ escape c
+ | isAsciiUpper c || isAsciiLower c || isDigit c = [c]
+ | c `elem` ".-_ " = [c] -- common, assumed safe
+ | c `elem` "/%:" = [c] -- handled by keyFile
+ -- , is safe and uncommon, so will be used to escape
+ -- other characters. By itself, it is escaped to
+ -- doubled form.
+ | c == ',' = ",,"
+ | otherwise = ',' : show(ord(c))
+
{- Converts a key into a filename fragment without any directory.
-
- Escape "/" in the key name, to keep a flat tree of files and avoid
@@ -293,8 +321,10 @@ isLinkToAnnex s = (pathSeparator:objectDir) `isInfixOf` s
- a slash
- "%" is escaped to "&s", and "&" to "&a"; this ensures that the mapping
- is one to one.
- - ":" is escaped to "&c", because despite it being 20XX people still care
- - about FAT.
+ - ":" is escaped to "&c", because it seemed like a good idea at the time.
+ -
+ - Changing what this function escapes and how is not a good idea, as it
+ - can cause existing objects to get lost.
-}
keyFile :: Key -> FilePath
keyFile key = replace "/" "%" $ replace ":" "&c" $
diff --git a/debian/changelog b/debian/changelog
index 7582e3e23..4bbdffb3f 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -7,6 +7,8 @@ git-annex (4.20131003) UNRELEASED; urgency=low
and remove it so it does not interfere with the automatic
commits of changed files.
* addurl: Better sanitization of generated filenames.
+ * Better sanitization of problem characters when generating URL and WORM
+ keys.
-- Joey Hess <joeyh@debian.org> Thu, 03 Oct 2013 15:41:24 -0400
diff --git a/doc/bugs/direct_repository_on_FAT32_fails_to_addurl_containing___63__.mdwn b/doc/bugs/direct_repository_on_FAT32_fails_to_addurl_containing___63__.mdwn
index 19215bf0e..1afb9ac14 100644
--- a/doc/bugs/direct_repository_on_FAT32_fails_to_addurl_containing___63__.mdwn
+++ b/doc/bugs/direct_repository_on_FAT32_fails_to_addurl_containing___63__.mdwn
@@ -40,3 +40,5 @@ git-annex: addurl: 1 failed
# End of transcript or log.
"""]]
+
+> [[fixed|done]] --[[Joey]]