diff options
-rw-r--r-- | Backend/URL.hs | 17 | ||||
-rw-r--r-- | Backend/Utilities.hs | 25 | ||||
-rw-r--r-- | Backend/WORM.hs | 14 | ||||
-rw-r--r-- | Locations.hs | 34 | ||||
-rw-r--r-- | debian/changelog | 2 | ||||
-rw-r--r-- | doc/bugs/direct_repository_on_FAT32_fails_to_addurl_containing___63__.mdwn | 2 |
6 files changed, 74 insertions, 20 deletions
diff --git a/Backend/URL.hs b/Backend/URL.hs index ace578a24..a8161c98d 100644 --- a/Backend/URL.hs +++ b/Backend/URL.hs @@ -10,11 +10,10 @@ module Backend.URL ( fromUrl ) where -import Data.Hash.MD5 - import Common.Annex import Types.Backend import Types.Key +import Backend.Utilities backends :: [Backend] backends = [backend] @@ -27,18 +26,12 @@ backend = Backend , canUpgradeKey = Nothing } -{- When it's not too long, use the full url as the key name. - - If the url is too long, it's truncated at half the filename length - - limit, and the md5 of the url is prepended to ensure a unique key. -} +{- Every unique url has a corresponding key. -} fromUrl :: String -> Maybe Integer -> Annex Key fromUrl url size = do - limit <- liftIO . fileNameLengthLimit =<< fromRepo gitAnnexDir - let truncurl = truncateFilePath (limit `div` 2) url - let key = if url == truncurl - then url - else truncurl ++ "-" ++ md5s (Str url) + n <- genKeyName url return $ stubKey - { keyName = key + { keyName = n , keyBackendName = "URL" , keySize = size - } + } diff --git a/Backend/Utilities.hs b/Backend/Utilities.hs new file mode 100644 index 000000000..24dbfd6d9 --- /dev/null +++ b/Backend/Utilities.hs @@ -0,0 +1,25 @@ +{- git-annex backend utilities + - + - Copyright 2012 Joey Hess <joey@kitenet.net> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Backend.Utilities where + +import Data.Hash.MD5 + +import Common.Annex + +{- Generates a keyName from an input string. Takes care of sanitizing it. + - If it's not too long, the full string is used as the keyName. + - Otherwise, it's truncated at half the filename length limit, and its + - md5 is prepended to ensure a unique key. -} +genKeyName :: String -> Annex String +genKeyName s = do + limit <- liftIO . fileNameLengthLimit =<< fromRepo gitAnnexDir + let s' = preSanitizeKeyName s + let truncs = truncateFilePath (limit `div` 2) s' + return $ if s' == truncs + then s' + else truncs ++ "-" ++ md5s (Str s) diff --git a/Backend/WORM.hs b/Backend/WORM.hs index 3471eedc1..60db42f56 100644 --- a/Backend/WORM.hs +++ b/Backend/WORM.hs @@ -11,6 +11,7 @@ import Common.Annex import Types.Backend import Types.Key import Types.KeySource +import Backend.Utilities backends :: [Backend] backends = [backend] @@ -33,9 +34,10 @@ backend = Backend keyValue :: KeySource -> Annex (Maybe Key) keyValue source = do stat <- liftIO $ getFileStatus $ contentLocation source - return $ Just Key { - keyName = takeFileName $ keyFilename source, - keyBackendName = name backend, - keySize = Just $ fromIntegral $ fileSize stat, - keyMtime = Just $ modificationTime stat - } + n <- genKeyName $ keyFilename source + return $ Just Key + { keyName = n + , keyBackendName = name backend + , keySize = Just $ fromIntegral $ fileSize stat + , keyMtime = Just $ modificationTime stat + } diff --git a/Locations.hs b/Locations.hs index 36104458e..9b7bb575b 100644 --- a/Locations.hs +++ b/Locations.hs @@ -51,6 +51,7 @@ module Locations ( annexHashes, hashDirMixed, hashDirLower, + preSanitizeKeyName, prop_idempotent_fileKey ) where @@ -58,6 +59,7 @@ module Locations ( import Data.Bits import Data.Word import Data.Hash.MD5 +import Data.Char import Common import Types @@ -284,6 +286,32 @@ gitAnnexAssistantDefaultDir = "annex" isLinkToAnnex :: FilePath -> Bool isLinkToAnnex s = (pathSeparator:objectDir) `isInfixOf` s +{- Sanitizes a String that will be used as part of a Key's keyName, + - dealing with characters that cause problems on substandard filesystems. + - + - This is used when a new Key is initially being generated, eg by getKey. + - Unlike keyFile and fileKey, it does not need to be a reversable + - escaping. Also, it's ok to change this to add more problimatic + - characters later. Unlike changing keyFile, which could result in the + - filenames used for existing keys changing and contents getting lost. + - + - It is, however, important that the input and output of this function + - have a 1:1 mapping, to avoid two different inputs from mapping to the + - same key. + -} +preSanitizeKeyName :: String -> String +preSanitizeKeyName = concatMap escape + where + escape c + | isAsciiUpper c || isAsciiLower c || isDigit c = [c] + | c `elem` ".-_ " = [c] -- common, assumed safe + | c `elem` "/%:" = [c] -- handled by keyFile + -- , is safe and uncommon, so will be used to escape + -- other characters. By itself, it is escaped to + -- doubled form. + | c == ',' = ",," + | otherwise = ',' : show(ord(c)) + {- Converts a key into a filename fragment without any directory. - - Escape "/" in the key name, to keep a flat tree of files and avoid @@ -293,8 +321,10 @@ isLinkToAnnex s = (pathSeparator:objectDir) `isInfixOf` s - a slash - "%" is escaped to "&s", and "&" to "&a"; this ensures that the mapping - is one to one. - - ":" is escaped to "&c", because despite it being 20XX people still care - - about FAT. + - ":" is escaped to "&c", because it seemed like a good idea at the time. + - + - Changing what this function escapes and how is not a good idea, as it + - can cause existing objects to get lost. -} keyFile :: Key -> FilePath keyFile key = replace "/" "%" $ replace ":" "&c" $ diff --git a/debian/changelog b/debian/changelog index 7582e3e23..4bbdffb3f 100644 --- a/debian/changelog +++ b/debian/changelog @@ -7,6 +7,8 @@ git-annex (4.20131003) UNRELEASED; urgency=low and remove it so it does not interfere with the automatic commits of changed files. * addurl: Better sanitization of generated filenames. + * Better sanitization of problem characters when generating URL and WORM + keys. -- Joey Hess <joeyh@debian.org> Thu, 03 Oct 2013 15:41:24 -0400 diff --git a/doc/bugs/direct_repository_on_FAT32_fails_to_addurl_containing___63__.mdwn b/doc/bugs/direct_repository_on_FAT32_fails_to_addurl_containing___63__.mdwn index 19215bf0e..1afb9ac14 100644 --- a/doc/bugs/direct_repository_on_FAT32_fails_to_addurl_containing___63__.mdwn +++ b/doc/bugs/direct_repository_on_FAT32_fails_to_addurl_containing___63__.mdwn @@ -40,3 +40,5 @@ git-annex: addurl: 1 failed # End of transcript or log. """]] + +> [[fixed|done]] --[[Joey]] |