summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Joey Hess <joeyh@joeyh.name>2015-01-20 16:58:48 -0400
committerGravatar Joey Hess <joeyh@joeyh.name>2015-01-20 17:09:24 -0400
commitc7e67aa29183c7fe7bb14b909cf953a89c531b08 (patch)
treee63f341fb51375faf9a0928af00703bcef584306
parent68c8b9b7592a7c407bdd6a0fe8b5f49241118926 (diff)
add getFileSize, which can get the real size of a large file on Windows
Avoid using fileSize which maxes out at just 2 gb on Windows. Instead, use hFileSize, which doesn't have a bounded size. Fixes support for files > 2 gb on Windows. Note that the InodeCache code only needs to compare a file size, so it doesn't matter it the file size wraps. So it has been left as-is. This was necessary both to avoid invalidating existing inode caches, and because the code passed FileStatus around and would have become more expensive if it called getFileSize. This commit was sponsored by Christian Dietrich.
-rw-r--r--Annex/Content.hs4
-rw-r--r--Annex/Transfer.hs3
-rw-r--r--Assistant/Threads/SanityChecker.hs6
-rw-r--r--Assistant/Threads/TransferPoller.hs3
-rw-r--r--Backend/Hash.hs5
-rw-r--r--Backend/WORM.hs6
-rw-r--r--Command/Fsck.hs3
-rw-r--r--Command/Info.hs3
-rw-r--r--Command/RecvKey.hs3
-rw-r--r--Common.hs3
-rw-r--r--Limit.hs4
-rw-r--r--Remote/Git.hs4
-rw-r--r--Remote/Helper/Chunked.hs3
-rw-r--r--Remote/Helper/Http.hs2
-rw-r--r--Remote/S3.hs2
-rw-r--r--Utility/FileSize.hs33
-rw-r--r--Utility/InodeCache.hs3
-rw-r--r--Utility/Url.hs7
-rw-r--r--debian/changelog3
-rw-r--r--doc/bugs/Direct_mode_sync_fails_to_transfer_a_10GB_file.mdwn2
-rw-r--r--doc/bugs/Direct_mode_sync_fails_to_transfer_a_10GB_file/comment_1_cb9e9dada7baf4e48725a9483b3b448a._comment25
21 files changed, 93 insertions, 34 deletions
diff --git a/Annex/Content.hs b/Annex/Content.hs
index 37090d3bb..60daaab90 100644
--- a/Annex/Content.hs
+++ b/Annex/Content.hs
@@ -234,8 +234,8 @@ prepGetViaTmpChecked key unabletoget getkey = do
tmp <- fromRepo $ gitAnnexTmpObjectLocation key
e <- liftIO $ doesFileExist tmp
- alreadythere <- if e
- then fromIntegral . fileSize <$> liftIO (getFileStatus tmp)
+ alreadythere <- liftIO $ if e
+ then getFileSize tmp
else return 0
ifM (checkDiskSpace Nothing key alreadythere)
( do
diff --git a/Annex/Transfer.hs b/Annex/Transfer.hs
index fb89869f8..1603974ff 100644
--- a/Annex/Transfer.hs
+++ b/Annex/Transfer.hs
@@ -132,8 +132,7 @@ runTransfer' ignorelock t file shouldretry a = do
liftIO $ readMVar metervar
| otherwise = do
f <- fromRepo $ gitAnnexTmpObjectLocation (transferKey t)
- liftIO $ catchDefaultIO 0 $
- fromIntegral . fileSize <$> getFileStatus f
+ liftIO $ catchDefaultIO 0 $ getFileSize f
type RetryDecider = TransferInfo -> TransferInfo -> Bool
diff --git a/Assistant/Threads/SanityChecker.hs b/Assistant/Threads/SanityChecker.hs
index 26c6ebba5..df29df006 100644
--- a/Assistant/Threads/SanityChecker.hs
+++ b/Assistant/Threads/SanityChecker.hs
@@ -225,7 +225,7 @@ checkLogSize :: Int -> Assistant ()
checkLogSize n = do
f <- liftAnnex $ fromRepo gitAnnexLogFile
logs <- liftIO $ listLogs f
- totalsize <- liftIO $ sum <$> mapM filesize logs
+ totalsize <- liftIO $ sum <$> mapM getFileSize logs
when (totalsize > 2 * oneMegabyte) $ do
notice ["Rotated logs due to size:", show totalsize]
liftIO $ openLog f >>= handleToFd >>= redirLog
@@ -237,9 +237,7 @@ checkLogSize n = do
checkLogSize (n + 1)
_ -> noop
where
- filesize f = fromIntegral . fileSize <$> liftIO (getFileStatus f)
-
- oneMegabyte :: Int
+ oneMegabyte :: Integer
oneMegabyte = 1000000
#endif
diff --git a/Assistant/Threads/TransferPoller.hs b/Assistant/Threads/TransferPoller.hs
index 71bfe3676..a5b30b4f0 100644
--- a/Assistant/Threads/TransferPoller.hs
+++ b/Assistant/Threads/TransferPoller.hs
@@ -36,8 +36,7 @@ transferPollerThread = namedThread "TransferPoller" $ do
- temp file being used for the transfer. -}
| transferDirection t == Download = do
let f = gitAnnexTmpObjectLocation (transferKey t) g
- sz <- liftIO $ catchMaybeIO $
- fromIntegral . fileSize <$> getFileStatus f
+ sz <- liftIO $ catchMaybeIO $ getFileSize f
newsize t info sz
{- Uploads don't need to be polled for when the TransferWatcher
- thread can track file modifications. -}
diff --git a/Backend/Hash.hs b/Backend/Hash.hs
index e50eca516..8ddccd229 100644
--- a/Backend/Hash.hs
+++ b/Backend/Hash.hs
@@ -68,8 +68,7 @@ hashNameE hash = hashName hash ++ "E"
keyValue :: Hash -> KeySource -> Annex (Maybe Key)
keyValue hash source = do
let file = contentLocation source
- stat <- liftIO $ getFileStatus file
- let filesize = fromIntegral $ fileSize stat
+ filesize <- liftIO $ getFileSize file
s <- hashFile hash file filesize
return $ Just $ stubKey
{ keyName = s
@@ -103,7 +102,7 @@ checkKeyChecksum hash key file = do
mstat <- liftIO $ catchMaybeIO $ getFileStatus file
case (mstat, fast) of
(Just stat, False) -> do
- let filesize = fromIntegral $ fileSize stat
+ filesize <- liftIO $ getFileSize' file stat
showSideAction "checksum"
check <$> hashFile hash file filesize
_ -> return True
diff --git a/Backend/WORM.hs b/Backend/WORM.hs
index bd5e374e1..24dba5795 100644
--- a/Backend/WORM.hs
+++ b/Backend/WORM.hs
@@ -32,11 +32,13 @@ backend = Backend
-}
keyValue :: KeySource -> Annex (Maybe Key)
keyValue source = do
- stat <- liftIO $ getFileStatus $ contentLocation source
+ let f = contentLocation source
+ stat <- liftIO $ getFileStatus f
+ sz <- liftIO $ getFileSize' f stat
relf <- getTopFilePath <$> inRepo (toTopFilePath $ keyFilename source)
return $ Just $ stubKey
{ keyName = genKeyName relf
, keyBackendName = name backend
- , keySize = Just $ fromIntegral $ fileSize stat
+ , keySize = Just sz
, keyMtime = Just $ modificationTime stat
}
diff --git a/Command/Fsck.hs b/Command/Fsck.hs
index 46c1620f1..5dad10127 100644
--- a/Command/Fsck.hs
+++ b/Command/Fsck.hs
@@ -303,8 +303,7 @@ checkKeySizeOr :: (Key -> Annex String) -> Key -> FilePath -> Annex Bool
checkKeySizeOr bad key file = case Types.Key.keySize key of
Nothing -> return True
Just size -> do
- size' <- fromIntegral . fileSize
- <$> liftIO (getFileStatus file)
+ size' <- liftIO $ getFileSize file
comparesizes size size'
where
comparesizes a b = do
diff --git a/Command/Info.hs b/Command/Info.hs
index 36d93a270..86b608928 100644
--- a/Command/Info.hs
+++ b/Command/Info.hs
@@ -490,8 +490,7 @@ staleSize label dirspec = go =<< lift (dirKeys dirspec)
keysizes keys = do
dir <- lift $ fromRepo dirspec
liftIO $ forM keys $ \k -> catchDefaultIO 0 $
- fromIntegral . fileSize
- <$> getFileStatus (dir </> keyFile k)
+ getFileSize (dir </> keyFile k)
aside :: String -> String
aside s = " (" ++ s ++ ")"
diff --git a/Command/RecvKey.hs b/Command/RecvKey.hs
index 8a806875b..424ca923d 100644
--- a/Command/RecvKey.hs
+++ b/Command/RecvKey.hs
@@ -62,8 +62,7 @@ start key = fieldTransfer Download key $ \_p ->
oksize <- case Types.Key.keySize key of
Nothing -> return True
Just size -> do
- size' <- fromIntegral . fileSize
- <$> liftIO (getFileStatus tmp)
+ size' <- liftIO $ getFileSize tmp
return $ size == size'
if oksize
then case Backend.maybeLookupBackendName (Types.Key.keyBackendName key) of
diff --git a/Common.hs b/Common.hs
index 670fc44e5..9333a19d9 100644
--- a/Common.hs
+++ b/Common.hs
@@ -30,7 +30,8 @@ import Utility.Monad as X
import Utility.Data as X
import Utility.Applicative as X
import Utility.FileSystemEncoding as X
-import Utility.PosixFiles as X
+import Utility.PosixFiles as X hiding (fileSize)
+import Utility.FileSize as X
import Utility.Network as X
import Utility.PartialPrelude as X
diff --git a/Limit.hs b/Limit.hs
index 5d58e77f0..1053547ec 100644
--- a/Limit.hs
+++ b/Limit.hs
@@ -239,9 +239,7 @@ limitSize vs s = case readSize dataUnits s of
checkkey sz key = return $ keySize key `vs` Just sz
check _ sz (Just key) = checkkey sz key
check fi sz Nothing = do
- filesize <- liftIO $ catchMaybeIO $
- fromIntegral . fileSize
- <$> getFileStatus (relFile fi)
+ filesize <- liftIO $ catchMaybeIO $ getFileSize (relFile fi)
return $ filesize `vs` Just sz
addMetaData :: String -> Annex ()
diff --git a/Remote/Git.hs b/Remote/Git.hs
index f015e295e..583e9c728 100644
--- a/Remote/Git.hs
+++ b/Remote/Git.hs
@@ -563,9 +563,7 @@ rsyncOrCopyFile rsyncparams src dest p =
(const $ copyFileExternal CopyTimeStamps src dest)
watchfilesize oldsz = do
threadDelay 500000 -- 0.5 seconds
- v <- catchMaybeIO $
- toBytesProcessed . fileSize
- <$> getFileStatus dest
+ v <- catchMaybeIO $ toBytesProcessed <$> getFileSize dest
case v of
Just sz
| sz /= oldsz -> do
diff --git a/Remote/Helper/Chunked.hs b/Remote/Helper/Chunked.hs
index 806fab542..8516268ce 100644
--- a/Remote/Helper/Chunked.hs
+++ b/Remote/Helper/Chunked.hs
@@ -244,8 +244,7 @@ retrieveChunks retriever u chunkconfig encryptor basek dest basep sink
| otherwise = go =<< chunkKeys u chunkconfig basek
where
go ls = do
- currsize <- liftIO $ catchMaybeIO $
- toInteger . fileSize <$> getFileStatus dest
+ currsize <- liftIO $ catchMaybeIO $ getFileSize dest
let ls' = maybe ls (setupResume ls) currsize
if any null ls'
then return True -- dest is already complete
diff --git a/Remote/Helper/Http.hs b/Remote/Helper/Http.hs
index 6ce5bacb8..3d08066ba 100644
--- a/Remote/Helper/Http.hs
+++ b/Remote/Helper/Http.hs
@@ -32,7 +32,7 @@ httpStorer a = fileStorer $ \k f m -> a k =<< liftIO (httpBodyStorer f m)
-- the meter as it's sent.
httpBodyStorer :: FilePath -> MeterUpdate -> IO RequestBody
httpBodyStorer src m = do
- size <- fromIntegral . fileSize <$> getFileStatus src :: IO Integer
+ size <- getFileSize src
let streamer sink = withMeteredFile src m $ \b -> byteStringPopper b sink
return $ RequestBodyStream (fromInteger size) streamer
diff --git a/Remote/S3.hs b/Remote/S3.hs
index 1a6e41094..104fdddca 100644
--- a/Remote/S3.hs
+++ b/Remote/S3.hs
@@ -159,7 +159,7 @@ store :: Remote -> S3Handle -> Storer
store r h = fileStorer $ \k f p -> do
case partSize (hinfo h) of
Just partsz | partsz > 0 -> do
- fsz <- fromIntegral . fileSize <$> liftIO (getFileStatus f)
+ fsz <- liftIO $ getFileSize f
if fsz > partsz
then multipartupload fsz partsz k f p
else singlepartupload k f p
diff --git a/Utility/FileSize.hs b/Utility/FileSize.hs
new file mode 100644
index 000000000..3113695d3
--- /dev/null
+++ b/Utility/FileSize.hs
@@ -0,0 +1,33 @@
+{- File size.
+ -
+ - License: BSD-2-clause
+ -}
+
+{-# LANGUAGE CPP #-}
+
+module Utility.FileSize where
+
+import System.PosixCompat.Files
+import Control.Exception (bracket)
+import System.IO
+
+{- Gets the size of a file.
+ -
+ - This is better than using fileSize, because on Windows that returns a
+ - FileOffset which maxes out at 2 gb.
+ - See https://github.com/jystic/unix-compat/issues/16
+ -}
+getFileSize :: FilePath -> IO Integer
+#ifndef mingw32_HOST_OS
+getFileSize f = fromIntegral . fileSize <$> getFileStatus f
+#else
+getFileSize f = bracket (openFile f ReadMode) hClose hFileSize
+#endif
+
+{- Gets the size of the file, when its FileStatus is already known. -}
+getFileSize' :: FilePath -> FileStatus -> IO Integer
+#ifndef mingw32_HOST_OS
+getFileSize' _ s = return $ fromIntegral $ fileSize s
+#else
+getFileSize' f _ = getFileSize f
+#endif
diff --git a/Utility/InodeCache.hs b/Utility/InodeCache.hs
index 8a182449b..0b0b040cb 100644
--- a/Utility/InodeCache.hs
+++ b/Utility/InodeCache.hs
@@ -40,6 +40,9 @@ module Utility.InodeCache (
import Common
import System.PosixCompat.Types
import Utility.QuickCheck
+-- While fileSize overflows and wraps at 2gb on Windows,
+-- it's ok for purposes of comparison.
+import System.PosixCompat.Files (fileSize)
#ifdef mingw32_HOST_OS
import Data.Word (Word64)
diff --git a/Utility/Url.hs b/Utility/Url.hs
index cc15c82d0..a8828e048 100644
--- a/Utility/Url.hs
+++ b/Utility/Url.hs
@@ -102,9 +102,12 @@ exists url uo = case parseURIRelaxed url of
-- so fall back to reading files and using curl.
Nothing
| uriScheme u == "file:" -> do
- s <- catchMaybeIO $ getFileStatus (unEscapeString $ uriPath u)
+ let f = unEscapeString (uriPath u)
+ s <- catchMaybeIO $ getFileStatus f
case s of
- Just stat -> return (True, Just $ fromIntegral $ fileSize stat)
+ Just stat -> do
+ sz <- getFileSize' f stat
+ return (True, Just sz)
Nothing -> dne
| Build.SysConfig.curl -> do
output <- catchDefaultIO "" $
diff --git a/debian/changelog b/debian/changelog
index bdbe2663d..eb48707f2 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -13,6 +13,9 @@ git-annex (5.20150114) UNRELEASED; urgency=medium
* Fix wording of message displayed when unable to get a file that
is available in untrusted repositories.
* Windows: Fix running of the pre-commit-annex hook.
+ * Avoid using fileSize which maxes out at just 2 gb on Windows.
+ Instead, use hFileSize, which doesn't have a bounded size.
+ Fixes support for files > 2 gb on Windows.
-- Joey Hess <id@joeyh.name> Tue, 13 Jan 2015 17:03:39 -0400
diff --git a/doc/bugs/Direct_mode_sync_fails_to_transfer_a_10GB_file.mdwn b/doc/bugs/Direct_mode_sync_fails_to_transfer_a_10GB_file.mdwn
index 773d452f6..21c7af19b 100644
--- a/doc/bugs/Direct_mode_sync_fails_to_transfer_a_10GB_file.mdwn
+++ b/doc/bugs/Direct_mode_sync_fails_to_transfer_a_10GB_file.mdwn
@@ -56,3 +56,5 @@ git-annex: sync: 1 failed
# End of transcript or log.
"""]]
+
+> [[fixed|done]] --[[Joey]]
diff --git a/doc/bugs/Direct_mode_sync_fails_to_transfer_a_10GB_file/comment_1_cb9e9dada7baf4e48725a9483b3b448a._comment b/doc/bugs/Direct_mode_sync_fails_to_transfer_a_10GB_file/comment_1_cb9e9dada7baf4e48725a9483b3b448a._comment
new file mode 100644
index 000000000..42acf9f64
--- /dev/null
+++ b/doc/bugs/Direct_mode_sync_fails_to_transfer_a_10GB_file/comment_1_cb9e9dada7baf4e48725a9483b3b448a._comment
@@ -0,0 +1,25 @@
+[[!comment format=mdwn
+ username="joey"
+ subject="""comment 1"""
+ date="2015-01-20T19:41:44Z"
+ content="""
+In my own test, I made a 10 gb file, and the key git-annex came up with had
+a size of -2147483648 which is clearly screwed up.. But that's what
+getFileStatus reports the size as. This was in an 32 bit XP VM.
+
+Hmm, unix-compat's getFileStatus calls getFileSize, which yields a
+FileOffset. The maxBound of that on linux is a nice large
+9223372036854775807, but on Windows, it appears to be 2147483647.
+
+Compare with using hFileSize, which yields an Integer. So,
+getFileSize and fileSize are unsafe on Windows due to FileOffset being so
+small on Windows.
+
+I have now corrected all places in git-annex that used the unsafe fileSize.
+It will behave correctly on Windows now.
+
+However, if you still have the repo with the big file, it's key still has
+the wrong size. To fix, you can "git annex unannex" the file, and then "git
+annex add" it back, after upgrading to the current daily build, or the next
+release of git-annex.
+"""]]