From 3b9d9a267b7c9247d36d9b622e1b836724ca5fb0 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sat, 24 Dec 2016 14:46:31 -0400 Subject: Always use filesystem encoding for all file and handle reads and writes. This is a big scary change. I have convinced myself it should be safe. I hope! --- Annex/Branch.hs | 2 +- Annex/CatFile.hs | 1 + Annex/Content/Direct.hs | 6 ++---- Annex/DirHashes.hs | 1 + Annex/Journal.hs | 3 +-- Annex/Link.hs | 2 +- Annex/Ssh.hs | 1 + Annex/VariantFile.hs | 1 + Assistant/TransferrerPool.hs | 2 -- Backend/Utilities.hs | 1 + Build/DistributionUpdate.hs | 2 ++ Build/EvilSplicer.hs | 5 +++-- CHANGELOG | 2 ++ CmdLine/Batch.hs | 4 +--- Command/AddUrl.hs | 1 + Command/ImportFeed.hs | 2 +- Command/P2P.hs | 3 +-- Command/TransferKeys.hs | 5 +---- Command/Vicfg.hs | 6 +++--- Common.hs | 1 - Config.hs | 2 +- Database/Handle.hs | 2 +- Git/CatFile.hs | 1 + Git/Command.hs | 6 +----- Git/Config.hs | 5 ----- Git/HashObject.hs | 1 - Git/Queue.hs | 1 - Git/Repair.hs | 2 +- Git/UnionMerge.hs | 4 ++-- Git/UpdateIndex.hs | 1 - Logs/Transfer.hs | 5 ++--- Logs/Unused.hs | 4 ++-- Messages.hs | 1 - P2P/IO.hs | 1 - Remote/BitTorrent.hs | 1 + Remote/External.hs | 3 --- Test.hs | 4 +++- Utility/CoProcess.hs | 6 +++--- Utility/ExternalSHA.hs | 2 -- Utility/FileSystemEncoding.hs | 41 ++++++++++++++++++++++------------------- Utility/Lsof.hs | 5 ++--- Utility/MagicWormhole.hs | 4 +--- Utility/Misc.hs | 17 ----------------- Utility/Quvi.hs | 3 --- Utility/Shell.hs | 5 ++--- git-annex.hs | 2 ++ git-union-merge.hs | 2 ++ 47 files changed, 74 insertions(+), 108 deletions(-) diff --git a/Annex/Branch.hs b/Annex/Branch.hs index 9663311d5..c90958ab0 100644 --- a/Annex/Branch.hs +++ b/Annex/Branch.hs @@ -61,6 +61,7 @@ import qualified Annex.Queue import Annex.Branch.Transitions import qualified Annex import Annex.Hook +import Utility.FileSystemEncoding {- Name of the branch that is used to store git-annex's information. -} name :: Git.Ref @@ -436,7 +437,6 @@ stageJournal jl = withIndex $ do g <- gitRepo let dir = gitAnnexJournalDir g (jlogf, jlogh) <- openjlog - liftIO $ fileEncoding jlogh h <- hashObjectHandle withJournalHandle $ \jh -> Git.UpdateIndex.streamUpdateIndex g diff --git a/Annex/CatFile.hs b/Annex/CatFile.hs index b1d8fba28..25952dfec 100644 --- a/Annex/CatFile.hs +++ b/Annex/CatFile.hs @@ -33,6 +33,7 @@ import Git.FilePath import Git.Index import qualified Git.Ref import Annex.Link +import Utility.FileSystemEncoding catFile :: Git.Branch -> FilePath -> Annex L.ByteString catFile branch file = do diff --git a/Annex/Content/Direct.hs b/Annex/Content/Direct.hs index 2007360e3..734a0c1b9 100644 --- a/Annex/Content/Direct.hs +++ b/Annex/Content/Direct.hs @@ -52,8 +52,7 @@ associatedFiles key = do associatedFilesRelative :: Key -> Annex [FilePath] associatedFilesRelative key = do mapping <- calcRepo $ gitAnnexMapping key - liftIO $ catchDefaultIO [] $ withFile mapping ReadMode $ \h -> do - fileEncoding h + liftIO $ catchDefaultIO [] $ withFile mapping ReadMode $ \h -> -- Read strictly to ensure the file is closed -- before changeAssociatedFiles tries to write to it. -- (Especially needed on Windows.) @@ -68,8 +67,7 @@ changeAssociatedFiles key transform = do let files' = transform files when (files /= files') $ modifyContent mapping $ - liftIO $ viaTmp writeFileAnyEncoding mapping $ - unlines files' + liftIO $ viaTmp writeFile mapping $ unlines files' top <- fromRepo Git.repoPath return $ map (top ) files' diff --git a/Annex/DirHashes.hs b/Annex/DirHashes.hs index 004536ca7..ed20cfb8a 100644 --- a/Annex/DirHashes.hs +++ b/Annex/DirHashes.hs @@ -26,6 +26,7 @@ import Common import Types.Key import Types.GitConfig import Types.Difference +import Utility.FileSystemEncoding type Hasher = Key -> FilePath diff --git a/Annex/Journal.hs b/Annex/Journal.hs index e4faa4865..184bb0ab0 100644 --- a/Annex/Journal.hs +++ b/Annex/Journal.hs @@ -37,7 +37,6 @@ setJournalFile _jl file content = do let tmpfile = tmp takeFileName jfile liftIO $ do withFile tmpfile WriteMode $ \h -> do - fileEncoding h #ifdef mingw32_HOST_OS hSetNewlineMode h noNewlineTranslation #endif @@ -53,7 +52,7 @@ getJournalFile _jl = getJournalFileStale - changes. -} getJournalFileStale :: FilePath -> Annex (Maybe String) getJournalFileStale file = inRepo $ \g -> catchMaybeIO $ - readFileStrictAnyEncoding $ journalFile file g + readFileStrict $ journalFile file g {- List of files that have updated content in the journal. -} getJournalledFiles :: JournalLocked -> Annex [FilePath] diff --git a/Annex/Link.hs b/Annex/Link.hs index 90312a04a..fcc300bee 100644 --- a/Annex/Link.hs +++ b/Annex/Link.hs @@ -24,6 +24,7 @@ import Git.Types import Git.FilePath import Annex.HashObject import Utility.FileMode +import Utility.FileSystemEncoding import qualified Data.ByteString.Lazy as L @@ -63,7 +64,6 @@ getAnnexLinkTarget' file coresymlinks = if coresymlinks Nothing -> fallback probefilecontent f = withFile f ReadMode $ \h -> do - fileEncoding h -- The first 8k is more than enough to read; link -- files are small. s <- take 8192 <$> hGetContents h diff --git a/Annex/Ssh.hs b/Annex/Ssh.hs index 4f879436b..512f0375c 100644 --- a/Annex/Ssh.hs +++ b/Annex/Ssh.hs @@ -33,6 +33,7 @@ import qualified Git.Url import Config import Annex.Path import Utility.Env +import Utility.FileSystemEncoding import Types.CleanupActions import Git.Env #ifndef mingw32_HOST_OS diff --git a/Annex/VariantFile.hs b/Annex/VariantFile.hs index 9bf027b5c..17658a9c6 100644 --- a/Annex/VariantFile.hs +++ b/Annex/VariantFile.hs @@ -8,6 +8,7 @@ module Annex.VariantFile where import Annex.Common +import Utility.FileSystemEncoding import Data.Hash.MD5 diff --git a/Assistant/TransferrerPool.hs b/Assistant/TransferrerPool.hs index 7c0cb4415..892e156e8 100644 --- a/Assistant/TransferrerPool.hs +++ b/Assistant/TransferrerPool.hs @@ -74,8 +74,6 @@ mkTransferrer program batchmaker = do , std_in = CreatePipe , std_out = CreatePipe } - fileEncoding readh - fileEncoding writeh return $ Transferrer { transferrerRead = readh , transferrerWrite = writeh diff --git a/Backend/Utilities.hs b/Backend/Utilities.hs index 04221650b..d1fb94f2a 100644 --- a/Backend/Utilities.hs +++ b/Backend/Utilities.hs @@ -10,6 +10,7 @@ module Backend.Utilities where import Data.Hash.MD5 import Annex.Common +import Utility.FileSystemEncoding {- Generates a keyName from an input string. Takes care of sanitizing it. - If it's not too long, the full string is used as the keyName. diff --git a/Build/DistributionUpdate.hs b/Build/DistributionUpdate.hs index 814927e99..dd18a7883 100644 --- a/Build/DistributionUpdate.hs +++ b/Build/DistributionUpdate.hs @@ -14,6 +14,7 @@ import Build.Version (getChangelogVersion, Version) import Utility.UserInfo import Utility.Url import Utility.Tmp +import Utility.FileSystemEncoding import qualified Git.Construct import qualified Annex import Annex.Content @@ -50,6 +51,7 @@ autobuilds = main :: IO () main = do + useFileSystemEncoding version <- liftIO getChangelogVersion repodir <- getRepoDir changeWorkingDirectory repodir diff --git a/Build/EvilSplicer.hs b/Build/EvilSplicer.hs index ca690c250..32d9a1c9f 100644 --- a/Build/EvilSplicer.hs +++ b/Build/EvilSplicer.hs @@ -210,7 +210,6 @@ applySplices destdir imports splices@(first:_) = do when (oldcontent /= Just newcontent) $ do putStrLn $ "splicing " ++ f withFile dest WriteMode $ \h -> do - fileEncoding h hPutStr h newcontent hClose h where @@ -721,7 +720,9 @@ parsecAndReplace p s = case parse find "" s of find = many $ try (Right <$> p) <|> (Left <$> anyChar) main :: IO () -main = go =<< getArgs +main = do + useFileSystemEncoding + go =<< getArgs where go (destdir:log:header:[]) = run destdir log (Just header) go (destdir:log:[]) = run destdir log Nothing diff --git a/CHANGELOG b/CHANGELOG index 2d7ea22a7..7a0ca2eb2 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -24,6 +24,8 @@ git-annex (6.20161211) UNRELEASED; urgency=medium * enable-tor: No longer needs to be run as root. * enable-tor: When run as a regular user, test a connection back to the hidden service over tor. + * Always use filesystem encoding for all file and handle reads and + writes. * Fix build with directory-1.3. * Debian: Suggest tor and magic-wormhole. * Debian: Build webapp on armel. diff --git a/CmdLine/Batch.hs b/CmdLine/Batch.hs index 6ef21372f..82038314c 100644 --- a/CmdLine/Batch.hs +++ b/CmdLine/Batch.hs @@ -57,9 +57,7 @@ batchInput parser a = go =<< batchLines parseerr s = giveup $ "Batch input parse failure: " ++ s batchLines :: Annex [String] -batchLines = liftIO $ do - fileEncoding stdin - lines <$> getContents +batchLines = liftIO $ lines <$> getContents -- Runs a CommandStart in batch mode. -- diff --git a/Command/AddUrl.hs b/Command/AddUrl.hs index e49d2727c..8cc148440 100644 --- a/Command/AddUrl.hs +++ b/Command/AddUrl.hs @@ -27,6 +27,7 @@ import Types.UrlContents import Annex.FileMatcher import Logs.Location import Utility.Metered +import Utility.FileSystemEncoding import qualified Annex.Transfer as Transfer import Annex.Quvi import qualified Utility.Quvi as Quvi diff --git a/Command/ImportFeed.hs b/Command/ImportFeed.hs index 832ec1246..ea936e84a 100644 --- a/Command/ImportFeed.hs +++ b/Command/ImportFeed.hs @@ -156,7 +156,7 @@ downloadFeed url liftIO $ withTmpFile "feed" $ \f h -> do hClose h ifM (Url.download url f uo) - ( parseFeedString <$> readFileStrictAnyEncoding f + ( parseFeedString <$> readFileStrict f , return Nothing ) diff --git a/Command/P2P.hs b/Command/P2P.hs index afa5f9dc6..4ba3e43d5 100644 --- a/Command/P2P.hs +++ b/Command/P2P.hs @@ -161,7 +161,6 @@ performPairing remotename addrs = do getcode ourcode = do putStr "Enter the other repository's pairing code: " hFlush stdout - fileEncoding stdin l <- getLine case Wormhole.toCode l of Just code @@ -236,7 +235,7 @@ wormholePairing remotename ouraddrs ui = do then return ReceiveFailed else do r <- liftIO $ tryIO $ - readFileStrictAnyEncoding recvf + readFileStrict recvf case r of Left _e -> return ReceiveFailed Right s -> maybe diff --git a/Command/TransferKeys.hs b/Command/TransferKeys.hs index 2ac784589..d875f496d 100644 --- a/Command/TransferKeys.hs +++ b/Command/TransferKeys.hs @@ -56,10 +56,7 @@ runRequests -> (TransferRequest -> Annex Bool) -> Annex () runRequests readh writeh a = do - liftIO $ do - hSetBuffering readh NoBuffering - fileEncoding readh - fileEncoding writeh + liftIO $ hSetBuffering readh NoBuffering go =<< readrequests where go (d:rn:k:f:rest) = do diff --git a/Command/Vicfg.hs b/Command/Vicfg.hs index 64daa598b..d9e8b8823 100644 --- a/Command/Vicfg.hs +++ b/Command/Vicfg.hs @@ -41,7 +41,7 @@ start = do createAnnexDirectory $ parentDir f cfg <- getCfg descs <- uuidDescriptions - liftIO $ writeFileAnyEncoding f $ genCfg cfg descs + liftIO $ writeFile f $ genCfg cfg descs vicfg cfg f stop @@ -51,11 +51,11 @@ vicfg curcfg f = do -- Allow EDITOR to be processed by the shell, so it can contain options. unlessM (liftIO $ boolSystem "sh" [Param "-c", Param $ unwords [vi, shellEscape f]]) $ giveup $ vi ++ " exited nonzero; aborting" - r <- parseCfg (defCfg curcfg) <$> liftIO (readFileStrictAnyEncoding f) + r <- parseCfg (defCfg curcfg) <$> liftIO (readFileStrict f) liftIO $ nukeFile f case r of Left s -> do - liftIO $ writeFileAnyEncoding f s + liftIO $ writeFile f s vicfg curcfg f Right newcfg -> setCfg curcfg newcfg diff --git a/Common.hs b/Common.hs index 5213863b9..2e28117b6 100644 --- a/Common.hs +++ b/Common.hs @@ -29,7 +29,6 @@ import Utility.Directory as X import Utility.Monad as X import Utility.Data as X import Utility.Applicative as X -import Utility.FileSystemEncoding as X import Utility.PosixFiles as X hiding (fileSize) import Utility.FileSize as X import Utility.Network as X diff --git a/Config.hs b/Config.hs index be60852da..84736cac3 100644 --- a/Config.hs +++ b/Config.hs @@ -112,7 +112,7 @@ configureSmudgeFilter = do createDirectoryIfMissing True (takeDirectory lf) writeFile lf (lfs ++ "\n" ++ stdattr) where - readattr = liftIO . catchDefaultIO "" . readFileStrictAnyEncoding + readattr = liftIO . catchDefaultIO "" . readFileStrict stdattr = unlines [ "* filter=annex" , ".* !filter" diff --git a/Database/Handle.hs b/Database/Handle.hs index 748feaa97..9071cd538 100644 --- a/Database/Handle.hs +++ b/Database/Handle.hs @@ -69,7 +69,7 @@ openDb db tablename = do worker <- async (workerThread (T.pack db) tablename jobs) -- work around https://github.com/yesodweb/persistent/issues/474 - liftIO setConsoleEncoding + liftIO useFileSystemEncoding return $ DbHandle worker jobs diff --git a/Git/CatFile.hs b/Git/CatFile.hs index 061349f05..4935cdffa 100644 --- a/Git/CatFile.hs +++ b/Git/CatFile.hs @@ -37,6 +37,7 @@ import Git.Command import Git.Types import Git.FilePath import qualified Utility.CoProcess as CoProcess +import Utility.FileSystemEncoding data CatFileHandle = CatFileHandle { catFileProcess :: CoProcess.CoProcessHandle diff --git a/Git/Command.hs b/Git/Command.hs index 206056368..adea7622e 100644 --- a/Git/Command.hs +++ b/Git/Command.hs @@ -53,7 +53,6 @@ runQuiet params repo = withQuietOutput createProcessSuccess $ pipeReadLazy :: [CommandParam] -> Repo -> IO (String, IO Bool) pipeReadLazy params repo = assertLocal repo $ do (_, Just h, _, pid) <- createProcess p { std_out = CreatePipe } - fileEncoding h c <- hGetContents h return (c, checkSuccessProcess pid) where @@ -66,7 +65,6 @@ pipeReadLazy params repo = assertLocal repo $ do pipeReadStrict :: [CommandParam] -> Repo -> IO String pipeReadStrict params repo = assertLocal repo $ withHandle StdoutHandle (createProcessChecked ignoreFailureProcess) p $ \h -> do - fileEncoding h output <- hGetContentsStrict h hClose h return output @@ -81,9 +79,7 @@ pipeWriteRead params writer repo = assertLocal repo $ writeReadProcessEnv "git" (toCommand $ gitCommandLine params repo) (gitEnv repo) writer (Just adjusthandle) where - adjusthandle h = do - fileEncoding h - hSetNewlineMode h noNewlineTranslation + adjusthandle h = hSetNewlineMode h noNewlineTranslation {- Runs a git command, feeding it input on a handle with an action. -} pipeWrite :: [CommandParam] -> Repo -> (Handle -> IO ()) -> IO () diff --git a/Git/Config.hs b/Git/Config.hs index 3d6239560..65bd9b7ba 100644 --- a/Git/Config.hs +++ b/Git/Config.hs @@ -79,10 +79,6 @@ global = do {- Reads git config from a handle and populates a repo with it. -} hRead :: Repo -> Handle -> IO Repo hRead repo h = do - -- We use the FileSystemEncoding when reading from git-config, - -- because it can contain arbitrary filepaths (and other strings) - -- in any encoding. - fileEncoding h val <- hGetContentsStrict h store val repo @@ -167,7 +163,6 @@ coreBare = "core.bare" fromPipe :: Repo -> String -> [CommandParam] -> IO (Either SomeException (Repo, String)) fromPipe r cmd params = try $ withHandle StdoutHandle createProcessSuccess p $ \h -> do - fileEncoding h val <- hGetContentsStrict h r' <- store val r return (r', val) diff --git a/Git/HashObject.hs b/Git/HashObject.hs index 4cd54ef54..399e36d46 100644 --- a/Git/HashObject.hs +++ b/Git/HashObject.hs @@ -41,7 +41,6 @@ hashFile h file = CoProcess.query h send receive - interface does not allow batch hashing without using temp files. -} hashBlob :: HashObjectHandle -> String -> IO Sha hashBlob h s = withTmpFile "hash" $ \tmp tmph -> do - fileEncoding tmph #ifdef mingw32_HOST_OS hSetNewlineMode tmph noNewlineTranslation #endif diff --git a/Git/Queue.hs b/Git/Queue.hs index 0b0025b0a..ee1f83ca9 100644 --- a/Git/Queue.hs +++ b/Git/Queue.hs @@ -159,7 +159,6 @@ runAction repo action@(CommandAction {}) = do #ifndef mingw32_HOST_OS let p = (proc "xargs" $ "-0":"git":toCommand gitparams) { env = gitEnv repo } withHandle StdinHandle createProcessSuccess p $ \h -> do - fileEncoding h hPutStr h $ intercalate "\0" $ toCommand $ getFiles action hClose h #else diff --git a/Git/Repair.hs b/Git/Repair.hs index fcfc03600..1baf51a64 100644 --- a/Git/Repair.hs +++ b/Git/Repair.hs @@ -614,4 +614,4 @@ successfulRepair = fst safeReadFile :: FilePath -> IO String safeReadFile f = do allowRead f - readFileStrictAnyEncoding f + readFileStrict f diff --git a/Git/UnionMerge.hs b/Git/UnionMerge.hs index 9ae8295ae..c6157a9ed 100644 --- a/Git/UnionMerge.hs +++ b/Git/UnionMerge.hs @@ -22,6 +22,7 @@ import Git.UpdateIndex import Git.HashObject import Git.Types import Git.FilePath +import Utility.FileSystemEncoding {- Performs a union merge between two branches, staging it in the index. - Any previously staged changes in the index will be lost. @@ -94,8 +95,7 @@ mergeFile info file hashhandle h = case filter (/= nullSha) [Ref asha, Ref bsha] -- We don't know how the file is encoded, but need to -- split it into lines to union merge. Using the -- FileSystemEncoding for this is a hack, but ensures there - -- are no decoding errors. Note that this works because - -- hashObject sets fileEncoding on its write handle. + -- are no decoding errors. getcontents s = lines . encodeW8NUL . L.unpack <$> catObject h s {- Calculates a union merge between a list of refs, with contents. diff --git a/Git/UpdateIndex.hs b/Git/UpdateIndex.hs index 55c5b3bb2..7fdc9450f 100644 --- a/Git/UpdateIndex.hs +++ b/Git/UpdateIndex.hs @@ -55,7 +55,6 @@ startUpdateIndex :: Repo -> IO UpdateIndexHandle startUpdateIndex repo = do (Just h, _, _, p) <- createProcess (gitCreateProcess params repo) { std_in = CreatePipe } - fileEncoding h return $ UpdateIndexHandle p h where params = map Param ["update-index", "-z", "--index-info"] diff --git a/Logs/Transfer.hs b/Logs/Transfer.hs index 65a4e3796..28f7b0a26 100644 --- a/Logs/Transfer.hs +++ b/Logs/Transfer.hs @@ -220,8 +220,7 @@ parseTransferFile file bits = splitDirectories file writeTransferInfoFile :: TransferInfo -> FilePath -> IO () -writeTransferInfoFile info tfile = writeFileAnyEncoding tfile $ - writeTransferInfo info +writeTransferInfoFile info tfile = writeFile tfile $ writeTransferInfo info {- File format is a header line containing the startedTime and any - bytesComplete value. Followed by a newline and the associatedFile. @@ -243,7 +242,7 @@ writeTransferInfo info = unlines readTransferInfoFile :: Maybe PID -> FilePath -> IO (Maybe TransferInfo) readTransferInfoFile mpid tfile = catchDefaultIO Nothing $ - readTransferInfo mpid <$> readFileStrictAnyEncoding tfile + readTransferInfo mpid <$> readFileStrict tfile readTransferInfo :: Maybe PID -> String -> Maybe TransferInfo readTransferInfo mpid s = TransferInfo diff --git a/Logs/Unused.hs b/Logs/Unused.hs index 1035d1246..2361fedbc 100644 --- a/Logs/Unused.hs +++ b/Logs/Unused.hs @@ -66,7 +66,7 @@ updateUnusedLog prefix m = do writeUnusedLog :: FilePath -> UnusedLog -> Annex () writeUnusedLog prefix l = do logfile <- fromRepo $ gitAnnexUnusedLog prefix - liftIO $ viaTmp writeFileAnyEncoding logfile $ unlines $ map format $ M.toList l + liftIO $ viaTmp writeFile logfile $ unlines $ map format $ M.toList l where format (k, (i, Just t)) = show i ++ " " ++ key2file k ++ " " ++ show t format (k, (i, Nothing)) = show i ++ " " ++ key2file k @@ -76,7 +76,7 @@ readUnusedLog prefix = do f <- fromRepo $ gitAnnexUnusedLog prefix ifM (liftIO $ doesFileExist f) ( M.fromList . mapMaybe parse . lines - <$> liftIO (readFileStrictAnyEncoding f) + <$> liftIO (readFileStrict f) , return M.empty ) where diff --git a/Messages.hs b/Messages.hs index 0ab1f72bb..0036e5759 100644 --- a/Messages.hs +++ b/Messages.hs @@ -183,7 +183,6 @@ setupConsole = do <$> streamHandler stderr DEBUG <*> pure preciseLogFormatter updateGlobalLogger rootLoggerName (setLevel NOTICE . setHandlers [s]) - setConsoleEncoding {- Force output to be line buffered. This is normally the case when - it's connected to a terminal, but may not be when redirected to - a file or a pipe. -} diff --git a/P2P/IO.hs b/P2P/IO.hs index ee1724d7b..9ebb102f1 100644 --- a/P2P/IO.hs +++ b/P2P/IO.hs @@ -99,7 +99,6 @@ setupHandle s = do h <- socketToHandle s ReadWriteMode hSetBuffering h LineBuffering hSetBinaryMode h False - fileEncoding h return h -- Purposefully incomplete interpreter of Proto. diff --git a/Remote/BitTorrent.hs b/Remote/BitTorrent.hs index 899c57e3e..0ec78aa64 100644 --- a/Remote/BitTorrent.hs +++ b/Remote/BitTorrent.hs @@ -21,6 +21,7 @@ import Types.CleanupActions import Messages.Progress import Utility.Metered import Utility.Tmp +import Utility.FileSystemEncoding import Backend.URL import Annex.Perms import Annex.UUID diff --git a/Remote/External.hs b/Remote/External.hs index 0b0e1dc18..7091a657c 100644 --- a/Remote/External.hs +++ b/Remote/External.hs @@ -384,9 +384,6 @@ startExternal external = do p <- propgit g basep (Just hin, Just hout, Just herr, ph) <- createProcess p `catchIO` runerr - fileEncoding hin - fileEncoding hout - fileEncoding herr stderrelay <- async $ errrelayer herr checkearlytermination =<< getProcessExitCode ph cv <- newTVarIO $ externalDefaultConfig external diff --git a/Test.hs b/Test.hs index 3f6727721..0ab7bf130 100644 --- a/Test.hs +++ b/Test.hs @@ -95,6 +95,7 @@ import qualified Utility.HumanTime import qualified Utility.ThreadScheduler import qualified Utility.Base64 import qualified Utility.Tmp +import qualified Utility.FileSystemEncoding import qualified Command.Uninit import qualified CmdLine.GitAnnex as GitAnnex #ifndef mingw32_HOST_OS @@ -1675,7 +1676,8 @@ test_add_subdirs = intmpclonerepo $ do - calculated correctly for files in subdirs. -} unlessM (unlockedFiles <$> getTestMode) $ do git_annex "sync" [] @? "sync failed" - l <- annexeval $ decodeBS <$> Annex.CatFile.catObject (Git.Types.Ref "HEAD:dir/foo") + l <- annexeval $ Utility.FileSystemEncoding.decodeBS + <$> Annex.CatFile.catObject (Git.Types.Ref "HEAD:dir/foo") "../.git/annex/" `isPrefixOf` l @? ("symlink from subdir to .git/annex is wrong: " ++ l) createDirectory "dir2" diff --git a/Utility/CoProcess.hs b/Utility/CoProcess.hs index 94d5ac3bc..2bae40fba 100644 --- a/Utility/CoProcess.hs +++ b/Utility/CoProcess.hs @@ -47,10 +47,10 @@ start' s = do rawMode to return $ CoProcessState pid to from s where - rawMode h = do - fileEncoding h #ifdef mingw32_HOST_OS - hSetNewlineMode h noNewlineTranslation + rawMode h = hSetNewlineMode h noNewlineTranslation +#else + rawMode _ = return () #endif stop :: CoProcessHandle -> IO () diff --git a/Utility/ExternalSHA.hs b/Utility/ExternalSHA.hs index e581697ae..7b0882004 100644 --- a/Utility/ExternalSHA.hs +++ b/Utility/ExternalSHA.hs @@ -14,7 +14,6 @@ module Utility.ExternalSHA (externalSHA) where import Utility.SafeCommand import Utility.Process -import Utility.FileSystemEncoding import Utility.Misc import Utility.Exception @@ -30,7 +29,6 @@ externalSHA command shasize file = do Left _ -> Left (command ++ " failed") where readsha args = withHandle StdoutHandle createProcessSuccess p $ \h -> do - fileEncoding h output <- hGetContentsStrict h hClose h return output diff --git a/Utility/FileSystemEncoding.hs b/Utility/FileSystemEncoding.hs index eab98337a..be43ace95 100644 --- a/Utility/FileSystemEncoding.hs +++ b/Utility/FileSystemEncoding.hs @@ -1,6 +1,6 @@ {- GHC File system encoding handling. - - - Copyright 2012-2014 Joey Hess + - Copyright 2012-2016 Joey Hess - - License: BSD-2-clause -} @@ -9,7 +9,7 @@ {-# OPTIONS_GHC -fno-warn-tabs #-} module Utility.FileSystemEncoding ( - fileEncoding, + useFileSystemEncoding, withFilePath, md5FilePath, decodeBS, @@ -19,7 +19,6 @@ module Utility.FileSystemEncoding ( encodeW8NUL, decodeW8NUL, truncateFilePath, - setConsoleEncoding, ) where import qualified GHC.Foreign as GHC @@ -39,19 +38,30 @@ import qualified Data.ByteString.Lazy.UTF8 as L8 import Utility.Exception -{- Sets a Handle to use the filesystem encoding. This causes data - - written or read from it to be encoded/decoded the same - - as ghc 7.4 does to filenames etc. This special encoding - - allows "arbitrary undecodable bytes to be round-tripped through it". +{- Makes all subsequent Handles that are opened, as well as stdio Handles, + - use the filesystem encoding, instead of the encoding of the current + - locale. + - + - The filesystem encoding allows "arbitrary undecodable bytes to be + - round-tripped through it". This avoids encoded failures when data is not + - encoded matching the current locale. + - + - Note that code can still use hSetEncoding to change the encoding of a + - Handle. This only affects the default encoding. -} -fileEncoding :: Handle -> IO () +useFileSystemEncoding :: IO () +useFileSystemEncoding = do #ifndef mingw32_HOST_OS -fileEncoding h = hSetEncoding h =<< Encoding.getFileSystemEncoding + e <- Encoding.getFileSystemEncoding #else -{- The file system encoding does not work well on Windows, - - and Windows only has utf FilePaths anyway. -} -fileEncoding h = hSetEncoding h Encoding.utf8 + {- The file system encoding does not work well on Windows, + - and Windows only has utf FilePaths anyway. -} + let e = Encoding.utf8 #endif + hSetEncoding stdin e + hSetEncoding stdout e + hSetEncoding stderr e + Encoding.setLocaleEncoding e {- Marshal a Haskell FilePath into a NUL terminated C string using temporary - storage. The FilePath is encoded using the filesystem encoding, @@ -165,10 +175,3 @@ truncateFilePath n = reverse . go [] n . L8.fromString else go (c:coll) (cnt - x') (L8.drop 1 bs) _ -> coll #endif - -{- This avoids ghc's output layer crashing on invalid encoded characters in - - filenames when printing them out. -} -setConsoleEncoding :: IO () -setConsoleEncoding = do - fileEncoding stdout - fileEncoding stderr diff --git a/Utility/Lsof.hs b/Utility/Lsof.hs index 433b7c679..27d34b592 100644 --- a/Utility/Lsof.hs +++ b/Utility/Lsof.hs @@ -47,9 +47,8 @@ queryDir path = query ["+d", path] -} query :: [String] -> IO [(FilePath, LsofOpenMode, ProcessInfo)] query opts = - withHandle StdoutHandle (createProcessChecked checkSuccessProcess) p $ \h -> do - fileEncoding h - parse <$> hGetContentsStrict h + withHandle StdoutHandle (createProcessChecked checkSuccessProcess) p $ + parse <$$> hGetContentsStrict where p = proc "lsof" ("-F0can" : opts) diff --git a/Utility/MagicWormhole.hs b/Utility/MagicWormhole.hs index 9a99cba33..e217dcdca 100644 --- a/Utility/MagicWormhole.hs +++ b/Utility/MagicWormhole.hs @@ -27,7 +27,6 @@ import Utility.Process import Utility.SafeCommand import Utility.Monad import Utility.Misc -import Utility.FileSystemEncoding import Utility.Env import Utility.Path @@ -105,8 +104,7 @@ sendFile f (CodeObserver observer) ps = do -- Work around stupid stdout buffering behavior of python. -- See https://github.com/warner/magic-wormhole/issues/108 environ <- addEntry "PYTHONUNBUFFERED" "1" <$> getEnvironment - runWormHoleProcess p { env = Just environ} $ \_hin hout -> do - fileEncoding hout + runWormHoleProcess p { env = Just environ} $ \_hin hout -> findcode =<< words <$> hGetContents hout where p = wormHoleProcess (Param "send" : ps ++ [File f]) diff --git a/Utility/Misc.hs b/Utility/Misc.hs index ebb42576b..4498c0a03 100644 --- a/Utility/Misc.hs +++ b/Utility/Misc.hs @@ -10,9 +10,6 @@ module Utility.Misc where -import Utility.FileSystemEncoding -import Utility.Monad - import System.IO import Control.Monad import Foreign @@ -35,20 +32,6 @@ hGetContentsStrict = hGetContents >=> \s -> length s `seq` return s readFileStrict :: FilePath -> IO String readFileStrict = readFile >=> \s -> length s `seq` return s -{- Reads a file strictly, and using the FileSystemEncoding, so it will - - never crash on a badly encoded file. -} -readFileStrictAnyEncoding :: FilePath -> IO String -readFileStrictAnyEncoding f = withFile f ReadMode $ \h -> do - fileEncoding h - hClose h `after` hGetContentsStrict h - -{- Writes a file, using the FileSystemEncoding so it will never crash - - on a badly encoded content string. -} -writeFileAnyEncoding :: FilePath -> String -> IO () -writeFileAnyEncoding f content = withFile f WriteMode $ \h -> do - fileEncoding h - hPutStr h content - {- Like break, but the item matching the condition is not included - in the second result list. - diff --git a/Utility/Quvi.hs b/Utility/Quvi.hs index 417ab7041..d33d79bb8 100644 --- a/Utility/Quvi.hs +++ b/Utility/Quvi.hs @@ -153,11 +153,8 @@ httponly :: QuviParams httponly Quvi04 = [Param "-c", Param "http"] httponly _ = [] -- No way to do it with 0.9? -{- Both versions of quvi will output utf-8 encoded data even when - - the locale doesn't support it. -} readQuvi :: [String] -> IO String readQuvi ps = withHandle StdoutHandle createProcessSuccess p $ \h -> do - fileEncoding h r <- hGetContentsStrict h hClose h return r diff --git a/Utility/Shell.hs b/Utility/Shell.hs index 860ee11dd..7adb65128 100644 --- a/Utility/Shell.hs +++ b/Utility/Shell.hs @@ -48,9 +48,8 @@ findShellCommand f = do #ifndef mingw32_HOST_OS defcmd #else - l <- catchDefaultIO Nothing $ withFile f ReadMode $ \h -> do - fileEncoding h - headMaybe . lines <$> hGetContents h + l <- catchDefaultIO Nothing $ withFile f ReadMode $ + headMaybe . lines <$$> hGetContents h case l of Just ('#':'!':rest) -> case words rest of [] -> defcmd diff --git a/git-annex.hs b/git-annex.hs index d5fab7f47..e30d320b9 100644 --- a/git-annex.hs +++ b/git-annex.hs @@ -15,6 +15,7 @@ import qualified CmdLine.GitAnnex import qualified CmdLine.GitAnnexShell import qualified CmdLine.GitRemoteTorAnnex import qualified Test +import Utility.FileSystemEncoding #ifdef mingw32_HOST_OS import Utility.UserInfo @@ -23,6 +24,7 @@ import Utility.Env main :: IO () main = withSocketsDo $ do + useFileSystemEncoding ps <- getArgs #ifdef mingw32_HOST_OS winEnv diff --git a/git-union-merge.hs b/git-union-merge.hs index 3bf628c75..18c88b1a9 100644 --- a/git-union-merge.hs +++ b/git-union-merge.hs @@ -14,6 +14,7 @@ import qualified Git.CurrentRepo import qualified Git.Branch import qualified Git.Index import qualified Git +import Utility.FileSystemEncoding header :: String header = "Usage: git-union-merge ref ref newref" @@ -39,6 +40,7 @@ parseArgs = do main :: IO () main = do + useFileSystemEncoding [aref, bref, newref] <- map Git.Ref <$> parseArgs g <- Git.Config.read =<< Git.CurrentRepo.get _ <- Git.Index.override (tmpIndex g) g -- cgit v1.2.3