From 7e9c52ef3975128798caafaafc5fb0ecd5dd2107 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 9 Nov 2015 12:19:10 -0400 Subject: quvi may output utf-8 encoded data when the conifigured locale doesn't support that; avoid crashing on such invalid encoding. --- Utility/Quvi.hs | 29 ++++++++++++++++------ debian/changelog | 2 ++ ...___40__possibly_UTF-8_chars_in_title__41__.mdwn | 2 ++ ...ent_1_2b71126bc2e4f3d1e863a2c0c0181efe._comment | 12 +++++++++ 4 files changed, 37 insertions(+), 8 deletions(-) create mode 100644 doc/bugs/git_annex_addurl_fails_on_some_Youtube_URLs___40__possibly_UTF-8_chars_in_title__41__/comment_1_2b71126bc2e4f3d1e863a2c0c0181efe._comment diff --git a/Utility/Quvi.hs b/Utility/Quvi.hs index 2aacfaea2..09f74968b 100644 --- a/Utility/Quvi.hs +++ b/Utility/Quvi.hs @@ -14,7 +14,8 @@ import Common import Utility.Url import Data.Aeson -import Data.ByteString.Lazy.UTF8 (fromString) +import qualified Data.ByteString as B +import qualified Data.ByteString.Lazy as BL import qualified Data.Map as M import Network.URI (uriAuthority, uriRegName) import Data.Char @@ -77,8 +78,8 @@ type Query a = QuviVersion -> [CommandParam] -> URLString -> IO a forceQuery :: Query (Maybe Page) forceQuery v ps url = query' v ps url `catchNonAsync` onerr where - onerr _ = ifM (inPath "quvi") - ( error "quvi failed" + onerr e = ifM (inPath "quvi") + ( error ("quvi failed: " ++ show e) , error "quvi is not installed" ) @@ -89,9 +90,11 @@ query v ps url = flip catchNonAsync (const $ return Nothing) (query' v ps url) query' :: Query (Maybe Page) query' Quvi09 ps url = parseEnum - <$> readProcess "quvi" (toCommand $ [Param "dump", Param "-p", Param "enum"] ++ ps ++ [Param url]) -query' Quvi04 ps url = decode . fromString - <$> readProcess "quvi" (toCommand $ ps ++ [Param url]) + <$> readQuvi (toCommand $ [Param "dump", Param "-p", Param "enum"] ++ ps ++ [Param url]) +query' Quvi04 ps url = do + let p = proc "quvi" (toCommand $ ps ++ [Param url]) + decode . BL.fromStrict + <$> withHandle StdoutHandle createProcessSuccess p B.hGetContents query' NoQuvi _ _ = return Nothing queryLinks :: Query [URLString] @@ -131,8 +134,7 @@ listdomains :: QuviVersion -> IO [String] listdomains Quvi09 = concatMap (split ",") . concatMap (drop 1 . words) . filter ("domains: " `isPrefixOf`) . lines - <$> readProcess "quvi" - (toCommand [Param "info", Param "-p", Param "domains"]) + <$> readQuvi (toCommand [Param "info", Param "-p", Param "domains"]) listdomains _ = return [] type QuviParams = QuviVersion -> [CommandParam] @@ -150,3 +152,14 @@ httponly :: QuviParams -- No way to do it with 0.9? httponly Quvi04 = [Param "-c", Param "http"] httponly _ = [] -- No way to do it with 0.9? + +{- Both versions of quvi will output utf-8 encoded data even when + - the locale doesn't support it. -} +readQuvi :: [String] -> IO String +readQuvi ps = withHandle StdoutHandle createProcessSuccess p $ \h -> do + fileEncoding h + r <- hGetContentsStrict h + hClose h + return r + where + p = proc "quvi" ps diff --git a/debian/changelog b/debian/changelog index 43eddfa0d..d45a9d269 100644 --- a/debian/changelog +++ b/debian/changelog @@ -14,6 +14,8 @@ git-annex (5.20151102.2) UNRELEASED; urgency=medium out of the annex back to the file, because other files may point to that same content. Instead, copy the injected file content out to recover. + * quvi may output utf-8 encoded data when the conifigured locale doesn't + support that; avoid crashing on such invalid encoding. -- Joey Hess Wed, 04 Nov 2015 12:50:20 -0400 diff --git a/doc/bugs/git_annex_addurl_fails_on_some_Youtube_URLs___40__possibly_UTF-8_chars_in_title__41__.mdwn b/doc/bugs/git_annex_addurl_fails_on_some_Youtube_URLs___40__possibly_UTF-8_chars_in_title__41__.mdwn index 4d185250f..55f37663b 100644 --- a/doc/bugs/git_annex_addurl_fails_on_some_Youtube_URLs___40__possibly_UTF-8_chars_in_title__41__.mdwn +++ b/doc/bugs/git_annex_addurl_fails_on_some_Youtube_URLs___40__possibly_UTF-8_chars_in_title__41__.mdwn @@ -68,3 +68,5 @@ on Linux. I love it. It has motivated me enough to start organizing my files spread on different machines, disks etc. at least a little :-) +> [[fixed|done]]; quvi output is now parsed in a locale-independant manner. +> --[[Joey]] diff --git a/doc/bugs/git_annex_addurl_fails_on_some_Youtube_URLs___40__possibly_UTF-8_chars_in_title__41__/comment_1_2b71126bc2e4f3d1e863a2c0c0181efe._comment b/doc/bugs/git_annex_addurl_fails_on_some_Youtube_URLs___40__possibly_UTF-8_chars_in_title__41__/comment_1_2b71126bc2e4f3d1e863a2c0c0181efe._comment new file mode 100644 index 000000000..d4025e024 --- /dev/null +++ b/doc/bugs/git_annex_addurl_fails_on_some_Youtube_URLs___40__possibly_UTF-8_chars_in_title__41__/comment_1_2b71126bc2e4f3d1e863a2c0c0181efe._comment @@ -0,0 +1,12 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2015-11-09T15:45:35Z" + content=""" +This works fine when LANG is set to a utf-8 capable locale. I reproduced it +with LANG=C. quvi outputs utf-8 in that configuration, and git-annex, +following the locale settings, did not know what to do with that. + +Easily fixed, but you'll have better luck in general if you get into a +utf-8 capable locale. +"""]] -- cgit v1.2.3