diff options
-rw-r--r-- | Annex/Content.hs | 4 | ||||
-rw-r--r-- | Annex/Transfer.hs | 3 | ||||
-rw-r--r-- | Assistant/Threads/SanityChecker.hs | 6 | ||||
-rw-r--r-- | Assistant/Threads/TransferPoller.hs | 3 | ||||
-rw-r--r-- | Backend/Hash.hs | 5 | ||||
-rw-r--r-- | Backend/WORM.hs | 6 | ||||
-rw-r--r-- | Command/Fsck.hs | 3 | ||||
-rw-r--r-- | Command/Info.hs | 3 | ||||
-rw-r--r-- | Command/RecvKey.hs | 3 | ||||
-rw-r--r-- | Common.hs | 3 | ||||
-rw-r--r-- | Limit.hs | 4 | ||||
-rw-r--r-- | Remote/Git.hs | 4 | ||||
-rw-r--r-- | Remote/Helper/Chunked.hs | 3 | ||||
-rw-r--r-- | Remote/Helper/Http.hs | 2 | ||||
-rw-r--r-- | Remote/S3.hs | 2 | ||||
-rw-r--r-- | Utility/FileSize.hs | 33 | ||||
-rw-r--r-- | Utility/InodeCache.hs | 3 | ||||
-rw-r--r-- | Utility/Url.hs | 7 | ||||
-rw-r--r-- | debian/changelog | 3 | ||||
-rw-r--r-- | doc/bugs/Direct_mode_sync_fails_to_transfer_a_10GB_file.mdwn | 2 | ||||
-rw-r--r-- | doc/bugs/Direct_mode_sync_fails_to_transfer_a_10GB_file/comment_1_cb9e9dada7baf4e48725a9483b3b448a._comment | 25 |
21 files changed, 93 insertions, 34 deletions
diff --git a/Annex/Content.hs b/Annex/Content.hs index 37090d3bb..60daaab90 100644 --- a/Annex/Content.hs +++ b/Annex/Content.hs @@ -234,8 +234,8 @@ prepGetViaTmpChecked key unabletoget getkey = do tmp <- fromRepo $ gitAnnexTmpObjectLocation key e <- liftIO $ doesFileExist tmp - alreadythere <- if e - then fromIntegral . fileSize <$> liftIO (getFileStatus tmp) + alreadythere <- liftIO $ if e + then getFileSize tmp else return 0 ifM (checkDiskSpace Nothing key alreadythere) ( do diff --git a/Annex/Transfer.hs b/Annex/Transfer.hs index fb89869f8..1603974ff 100644 --- a/Annex/Transfer.hs +++ b/Annex/Transfer.hs @@ -132,8 +132,7 @@ runTransfer' ignorelock t file shouldretry a = do liftIO $ readMVar metervar | otherwise = do f <- fromRepo $ gitAnnexTmpObjectLocation (transferKey t) - liftIO $ catchDefaultIO 0 $ - fromIntegral . fileSize <$> getFileStatus f + liftIO $ catchDefaultIO 0 $ getFileSize f type RetryDecider = TransferInfo -> TransferInfo -> Bool diff --git a/Assistant/Threads/SanityChecker.hs b/Assistant/Threads/SanityChecker.hs index 26c6ebba5..df29df006 100644 --- a/Assistant/Threads/SanityChecker.hs +++ b/Assistant/Threads/SanityChecker.hs @@ -225,7 +225,7 @@ checkLogSize :: Int -> Assistant () checkLogSize n = do f <- liftAnnex $ fromRepo gitAnnexLogFile logs <- liftIO $ listLogs f - totalsize <- liftIO $ sum <$> mapM filesize logs + totalsize <- liftIO $ sum <$> mapM getFileSize logs when (totalsize > 2 * oneMegabyte) $ do notice ["Rotated logs due to size:", show totalsize] liftIO $ openLog f >>= handleToFd >>= redirLog @@ -237,9 +237,7 @@ checkLogSize n = do checkLogSize (n + 1) _ -> noop where - filesize f = fromIntegral . fileSize <$> liftIO (getFileStatus f) - - oneMegabyte :: Int + oneMegabyte :: Integer oneMegabyte = 1000000 #endif diff --git a/Assistant/Threads/TransferPoller.hs b/Assistant/Threads/TransferPoller.hs index 71bfe3676..a5b30b4f0 100644 --- a/Assistant/Threads/TransferPoller.hs +++ b/Assistant/Threads/TransferPoller.hs @@ -36,8 +36,7 @@ transferPollerThread = namedThread "TransferPoller" $ do - temp file being used for the transfer. -} | transferDirection t == Download = do let f = gitAnnexTmpObjectLocation (transferKey t) g - sz <- liftIO $ catchMaybeIO $ - fromIntegral . fileSize <$> getFileStatus f + sz <- liftIO $ catchMaybeIO $ getFileSize f newsize t info sz {- Uploads don't need to be polled for when the TransferWatcher - thread can track file modifications. -} diff --git a/Backend/Hash.hs b/Backend/Hash.hs index e50eca516..8ddccd229 100644 --- a/Backend/Hash.hs +++ b/Backend/Hash.hs @@ -68,8 +68,7 @@ hashNameE hash = hashName hash ++ "E" keyValue :: Hash -> KeySource -> Annex (Maybe Key) keyValue hash source = do let file = contentLocation source - stat <- liftIO $ getFileStatus file - let filesize = fromIntegral $ fileSize stat + filesize <- liftIO $ getFileSize file s <- hashFile hash file filesize return $ Just $ stubKey { keyName = s @@ -103,7 +102,7 @@ checkKeyChecksum hash key file = do mstat <- liftIO $ catchMaybeIO $ getFileStatus file case (mstat, fast) of (Just stat, False) -> do - let filesize = fromIntegral $ fileSize stat + filesize <- liftIO $ getFileSize' file stat showSideAction "checksum" check <$> hashFile hash file filesize _ -> return True diff --git a/Backend/WORM.hs b/Backend/WORM.hs index bd5e374e1..24dba5795 100644 --- a/Backend/WORM.hs +++ b/Backend/WORM.hs @@ -32,11 +32,13 @@ backend = Backend -} keyValue :: KeySource -> Annex (Maybe Key) keyValue source = do - stat <- liftIO $ getFileStatus $ contentLocation source + let f = contentLocation source + stat <- liftIO $ getFileStatus f + sz <- liftIO $ getFileSize' f stat relf <- getTopFilePath <$> inRepo (toTopFilePath $ keyFilename source) return $ Just $ stubKey { keyName = genKeyName relf , keyBackendName = name backend - , keySize = Just $ fromIntegral $ fileSize stat + , keySize = Just sz , keyMtime = Just $ modificationTime stat } diff --git a/Command/Fsck.hs b/Command/Fsck.hs index 46c1620f1..5dad10127 100644 --- a/Command/Fsck.hs +++ b/Command/Fsck.hs @@ -303,8 +303,7 @@ checkKeySizeOr :: (Key -> Annex String) -> Key -> FilePath -> Annex Bool checkKeySizeOr bad key file = case Types.Key.keySize key of Nothing -> return True Just size -> do - size' <- fromIntegral . fileSize - <$> liftIO (getFileStatus file) + size' <- liftIO $ getFileSize file comparesizes size size' where comparesizes a b = do diff --git a/Command/Info.hs b/Command/Info.hs index 36d93a270..86b608928 100644 --- a/Command/Info.hs +++ b/Command/Info.hs @@ -490,8 +490,7 @@ staleSize label dirspec = go =<< lift (dirKeys dirspec) keysizes keys = do dir <- lift $ fromRepo dirspec liftIO $ forM keys $ \k -> catchDefaultIO 0 $ - fromIntegral . fileSize - <$> getFileStatus (dir </> keyFile k) + getFileSize (dir </> keyFile k) aside :: String -> String aside s = " (" ++ s ++ ")" diff --git a/Command/RecvKey.hs b/Command/RecvKey.hs index 8a806875b..424ca923d 100644 --- a/Command/RecvKey.hs +++ b/Command/RecvKey.hs @@ -62,8 +62,7 @@ start key = fieldTransfer Download key $ \_p -> oksize <- case Types.Key.keySize key of Nothing -> return True Just size -> do - size' <- fromIntegral . fileSize - <$> liftIO (getFileStatus tmp) + size' <- liftIO $ getFileSize tmp return $ size == size' if oksize then case Backend.maybeLookupBackendName (Types.Key.keyBackendName key) of @@ -30,7 +30,8 @@ import Utility.Monad as X import Utility.Data as X import Utility.Applicative as X import Utility.FileSystemEncoding as X -import Utility.PosixFiles as X +import Utility.PosixFiles as X hiding (fileSize) +import Utility.FileSize as X import Utility.Network as X import Utility.PartialPrelude as X @@ -239,9 +239,7 @@ limitSize vs s = case readSize dataUnits s of checkkey sz key = return $ keySize key `vs` Just sz check _ sz (Just key) = checkkey sz key check fi sz Nothing = do - filesize <- liftIO $ catchMaybeIO $ - fromIntegral . fileSize - <$> getFileStatus (relFile fi) + filesize <- liftIO $ catchMaybeIO $ getFileSize (relFile fi) return $ filesize `vs` Just sz addMetaData :: String -> Annex () diff --git a/Remote/Git.hs b/Remote/Git.hs index f015e295e..583e9c728 100644 --- a/Remote/Git.hs +++ b/Remote/Git.hs @@ -563,9 +563,7 @@ rsyncOrCopyFile rsyncparams src dest p = (const $ copyFileExternal CopyTimeStamps src dest) watchfilesize oldsz = do threadDelay 500000 -- 0.5 seconds - v <- catchMaybeIO $ - toBytesProcessed . fileSize - <$> getFileStatus dest + v <- catchMaybeIO $ toBytesProcessed <$> getFileSize dest case v of Just sz | sz /= oldsz -> do diff --git a/Remote/Helper/Chunked.hs b/Remote/Helper/Chunked.hs index 806fab542..8516268ce 100644 --- a/Remote/Helper/Chunked.hs +++ b/Remote/Helper/Chunked.hs @@ -244,8 +244,7 @@ retrieveChunks retriever u chunkconfig encryptor basek dest basep sink | otherwise = go =<< chunkKeys u chunkconfig basek where go ls = do - currsize <- liftIO $ catchMaybeIO $ - toInteger . fileSize <$> getFileStatus dest + currsize <- liftIO $ catchMaybeIO $ getFileSize dest let ls' = maybe ls (setupResume ls) currsize if any null ls' then return True -- dest is already complete diff --git a/Remote/Helper/Http.hs b/Remote/Helper/Http.hs index 6ce5bacb8..3d08066ba 100644 --- a/Remote/Helper/Http.hs +++ b/Remote/Helper/Http.hs @@ -32,7 +32,7 @@ httpStorer a = fileStorer $ \k f m -> a k =<< liftIO (httpBodyStorer f m) -- the meter as it's sent. httpBodyStorer :: FilePath -> MeterUpdate -> IO RequestBody httpBodyStorer src m = do - size <- fromIntegral . fileSize <$> getFileStatus src :: IO Integer + size <- getFileSize src let streamer sink = withMeteredFile src m $ \b -> byteStringPopper b sink return $ RequestBodyStream (fromInteger size) streamer diff --git a/Remote/S3.hs b/Remote/S3.hs index 1a6e41094..104fdddca 100644 --- a/Remote/S3.hs +++ b/Remote/S3.hs @@ -159,7 +159,7 @@ store :: Remote -> S3Handle -> Storer store r h = fileStorer $ \k f p -> do case partSize (hinfo h) of Just partsz | partsz > 0 -> do - fsz <- fromIntegral . fileSize <$> liftIO (getFileStatus f) + fsz <- liftIO $ getFileSize f if fsz > partsz then multipartupload fsz partsz k f p else singlepartupload k f p diff --git a/Utility/FileSize.hs b/Utility/FileSize.hs new file mode 100644 index 000000000..3113695d3 --- /dev/null +++ b/Utility/FileSize.hs @@ -0,0 +1,33 @@ +{- File size. + - + - License: BSD-2-clause + -} + +{-# LANGUAGE CPP #-} + +module Utility.FileSize where + +import System.PosixCompat.Files +import Control.Exception (bracket) +import System.IO + +{- Gets the size of a file. + - + - This is better than using fileSize, because on Windows that returns a + - FileOffset which maxes out at 2 gb. + - See https://github.com/jystic/unix-compat/issues/16 + -} +getFileSize :: FilePath -> IO Integer +#ifndef mingw32_HOST_OS +getFileSize f = fromIntegral . fileSize <$> getFileStatus f +#else +getFileSize f = bracket (openFile f ReadMode) hClose hFileSize +#endif + +{- Gets the size of the file, when its FileStatus is already known. -} +getFileSize' :: FilePath -> FileStatus -> IO Integer +#ifndef mingw32_HOST_OS +getFileSize' _ s = return $ fromIntegral $ fileSize s +#else +getFileSize' f _ = getFileSize f +#endif diff --git a/Utility/InodeCache.hs b/Utility/InodeCache.hs index 8a182449b..0b0b040cb 100644 --- a/Utility/InodeCache.hs +++ b/Utility/InodeCache.hs @@ -40,6 +40,9 @@ module Utility.InodeCache ( import Common import System.PosixCompat.Types import Utility.QuickCheck +-- While fileSize overflows and wraps at 2gb on Windows, +-- it's ok for purposes of comparison. +import System.PosixCompat.Files (fileSize) #ifdef mingw32_HOST_OS import Data.Word (Word64) diff --git a/Utility/Url.hs b/Utility/Url.hs index cc15c82d0..a8828e048 100644 --- a/Utility/Url.hs +++ b/Utility/Url.hs @@ -102,9 +102,12 @@ exists url uo = case parseURIRelaxed url of -- so fall back to reading files and using curl. Nothing | uriScheme u == "file:" -> do - s <- catchMaybeIO $ getFileStatus (unEscapeString $ uriPath u) + let f = unEscapeString (uriPath u) + s <- catchMaybeIO $ getFileStatus f case s of - Just stat -> return (True, Just $ fromIntegral $ fileSize stat) + Just stat -> do + sz <- getFileSize' f stat + return (True, Just sz) Nothing -> dne | Build.SysConfig.curl -> do output <- catchDefaultIO "" $ diff --git a/debian/changelog b/debian/changelog index bdbe2663d..eb48707f2 100644 --- a/debian/changelog +++ b/debian/changelog @@ -13,6 +13,9 @@ git-annex (5.20150114) UNRELEASED; urgency=medium * Fix wording of message displayed when unable to get a file that is available in untrusted repositories. * Windows: Fix running of the pre-commit-annex hook. + * Avoid using fileSize which maxes out at just 2 gb on Windows. + Instead, use hFileSize, which doesn't have a bounded size. + Fixes support for files > 2 gb on Windows. -- Joey Hess <id@joeyh.name> Tue, 13 Jan 2015 17:03:39 -0400 diff --git a/doc/bugs/Direct_mode_sync_fails_to_transfer_a_10GB_file.mdwn b/doc/bugs/Direct_mode_sync_fails_to_transfer_a_10GB_file.mdwn index 773d452f6..21c7af19b 100644 --- a/doc/bugs/Direct_mode_sync_fails_to_transfer_a_10GB_file.mdwn +++ b/doc/bugs/Direct_mode_sync_fails_to_transfer_a_10GB_file.mdwn @@ -56,3 +56,5 @@ git-annex: sync: 1 failed # End of transcript or log. """]] + +> [[fixed|done]] --[[Joey]] diff --git a/doc/bugs/Direct_mode_sync_fails_to_transfer_a_10GB_file/comment_1_cb9e9dada7baf4e48725a9483b3b448a._comment b/doc/bugs/Direct_mode_sync_fails_to_transfer_a_10GB_file/comment_1_cb9e9dada7baf4e48725a9483b3b448a._comment new file mode 100644 index 000000000..42acf9f64 --- /dev/null +++ b/doc/bugs/Direct_mode_sync_fails_to_transfer_a_10GB_file/comment_1_cb9e9dada7baf4e48725a9483b3b448a._comment @@ -0,0 +1,25 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2015-01-20T19:41:44Z" + content=""" +In my own test, I made a 10 gb file, and the key git-annex came up with had +a size of -2147483648 which is clearly screwed up.. But that's what +getFileStatus reports the size as. This was in an 32 bit XP VM. + +Hmm, unix-compat's getFileStatus calls getFileSize, which yields a +FileOffset. The maxBound of that on linux is a nice large +9223372036854775807, but on Windows, it appears to be 2147483647. + +Compare with using hFileSize, which yields an Integer. So, +getFileSize and fileSize are unsafe on Windows due to FileOffset being so +small on Windows. + +I have now corrected all places in git-annex that used the unsafe fileSize. +It will behave correctly on Windows now. + +However, if you still have the repo with the big file, it's key still has +the wrong size. To fix, you can "git annex unannex" the file, and then "git +annex add" it back, after upgrading to the current daily build, or the next +release of git-annex. +"""]] |