aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Annex/Content.hs4
-rw-r--r--Annex/Transfer.hs3
-rw-r--r--Assistant/Threads/SanityChecker.hs6
-rw-r--r--Assistant/Threads/TransferPoller.hs3
-rw-r--r--Backend/Hash.hs5
-rw-r--r--Backend/WORM.hs6
-rw-r--r--Command/Fsck.hs3
-rw-r--r--Command/Info.hs3
-rw-r--r--Command/RecvKey.hs3
-rw-r--r--Common.hs3
-rw-r--r--Limit.hs4
-rw-r--r--Remote/Git.hs4
-rw-r--r--Remote/Helper/Chunked.hs3
-rw-r--r--Remote/Helper/Http.hs2
-rw-r--r--Remote/S3.hs2
-rw-r--r--Utility/FileSize.hs33
-rw-r--r--Utility/InodeCache.hs3
-rw-r--r--Utility/Url.hs7
-rw-r--r--debian/changelog3
-rw-r--r--doc/bugs/Direct_mode_sync_fails_to_transfer_a_10GB_file.mdwn2
-rw-r--r--doc/bugs/Direct_mode_sync_fails_to_transfer_a_10GB_file/comment_1_cb9e9dada7baf4e48725a9483b3b448a._comment25
21 files changed, 93 insertions, 34 deletions
diff --git a/Annex/Content.hs b/Annex/Content.hs
index 37090d3bb..60daaab90 100644
--- a/Annex/Content.hs
+++ b/Annex/Content.hs
@@ -234,8 +234,8 @@ prepGetViaTmpChecked key unabletoget getkey = do
tmp <- fromRepo $ gitAnnexTmpObjectLocation key
e <- liftIO $ doesFileExist tmp
- alreadythere <- if e
- then fromIntegral . fileSize <$> liftIO (getFileStatus tmp)
+ alreadythere <- liftIO $ if e
+ then getFileSize tmp
else return 0
ifM (checkDiskSpace Nothing key alreadythere)
( do
diff --git a/Annex/Transfer.hs b/Annex/Transfer.hs
index fb89869f8..1603974ff 100644
--- a/Annex/Transfer.hs
+++ b/Annex/Transfer.hs
@@ -132,8 +132,7 @@ runTransfer' ignorelock t file shouldretry a = do
liftIO $ readMVar metervar
| otherwise = do
f <- fromRepo $ gitAnnexTmpObjectLocation (transferKey t)
- liftIO $ catchDefaultIO 0 $
- fromIntegral . fileSize <$> getFileStatus f
+ liftIO $ catchDefaultIO 0 $ getFileSize f
type RetryDecider = TransferInfo -> TransferInfo -> Bool
diff --git a/Assistant/Threads/SanityChecker.hs b/Assistant/Threads/SanityChecker.hs
index 26c6ebba5..df29df006 100644
--- a/Assistant/Threads/SanityChecker.hs
+++ b/Assistant/Threads/SanityChecker.hs
@@ -225,7 +225,7 @@ checkLogSize :: Int -> Assistant ()
checkLogSize n = do
f <- liftAnnex $ fromRepo gitAnnexLogFile
logs <- liftIO $ listLogs f
- totalsize <- liftIO $ sum <$> mapM filesize logs
+ totalsize <- liftIO $ sum <$> mapM getFileSize logs
when (totalsize > 2 * oneMegabyte) $ do
notice ["Rotated logs due to size:", show totalsize]
liftIO $ openLog f >>= handleToFd >>= redirLog
@@ -237,9 +237,7 @@ checkLogSize n = do
checkLogSize (n + 1)
_ -> noop
where
- filesize f = fromIntegral . fileSize <$> liftIO (getFileStatus f)
-
- oneMegabyte :: Int
+ oneMegabyte :: Integer
oneMegabyte = 1000000
#endif
diff --git a/Assistant/Threads/TransferPoller.hs b/Assistant/Threads/TransferPoller.hs
index 71bfe3676..a5b30b4f0 100644
--- a/Assistant/Threads/TransferPoller.hs
+++ b/Assistant/Threads/TransferPoller.hs
@@ -36,8 +36,7 @@ transferPollerThread = namedThread "TransferPoller" $ do
- temp file being used for the transfer. -}
| transferDirection t == Download = do
let f = gitAnnexTmpObjectLocation (transferKey t) g
- sz <- liftIO $ catchMaybeIO $
- fromIntegral . fileSize <$> getFileStatus f
+ sz <- liftIO $ catchMaybeIO $ getFileSize f
newsize t info sz
{- Uploads don't need to be polled for when the TransferWatcher
- thread can track file modifications. -}
diff --git a/Backend/Hash.hs b/Backend/Hash.hs
index e50eca516..8ddccd229 100644
--- a/Backend/Hash.hs
+++ b/Backend/Hash.hs
@@ -68,8 +68,7 @@ hashNameE hash = hashName hash ++ "E"
keyValue :: Hash -> KeySource -> Annex (Maybe Key)
keyValue hash source = do
let file = contentLocation source
- stat <- liftIO $ getFileStatus file
- let filesize = fromIntegral $ fileSize stat
+ filesize <- liftIO $ getFileSize file
s <- hashFile hash file filesize
return $ Just $ stubKey
{ keyName = s
@@ -103,7 +102,7 @@ checkKeyChecksum hash key file = do
mstat <- liftIO $ catchMaybeIO $ getFileStatus file
case (mstat, fast) of
(Just stat, False) -> do
- let filesize = fromIntegral $ fileSize stat
+ filesize <- liftIO $ getFileSize' file stat
showSideAction "checksum"
check <$> hashFile hash file filesize
_ -> return True
diff --git a/Backend/WORM.hs b/Backend/WORM.hs
index bd5e374e1..24dba5795 100644
--- a/Backend/WORM.hs
+++ b/Backend/WORM.hs
@@ -32,11 +32,13 @@ backend = Backend
-}
keyValue :: KeySource -> Annex (Maybe Key)
keyValue source = do
- stat <- liftIO $ getFileStatus $ contentLocation source
+ let f = contentLocation source
+ stat <- liftIO $ getFileStatus f
+ sz <- liftIO $ getFileSize' f stat
relf <- getTopFilePath <$> inRepo (toTopFilePath $ keyFilename source)
return $ Just $ stubKey
{ keyName = genKeyName relf
, keyBackendName = name backend
- , keySize = Just $ fromIntegral $ fileSize stat
+ , keySize = Just sz
, keyMtime = Just $ modificationTime stat
}
diff --git a/Command/Fsck.hs b/Command/Fsck.hs
index 46c1620f1..5dad10127 100644
--- a/Command/Fsck.hs
+++ b/Command/Fsck.hs
@@ -303,8 +303,7 @@ checkKeySizeOr :: (Key -> Annex String) -> Key -> FilePath -> Annex Bool
checkKeySizeOr bad key file = case Types.Key.keySize key of
Nothing -> return True
Just size -> do
- size' <- fromIntegral . fileSize
- <$> liftIO (getFileStatus file)
+ size' <- liftIO $ getFileSize file
comparesizes size size'
where
comparesizes a b = do
diff --git a/Command/Info.hs b/Command/Info.hs
index 36d93a270..86b608928 100644
--- a/Command/Info.hs
+++ b/Command/Info.hs
@@ -490,8 +490,7 @@ staleSize label dirspec = go =<< lift (dirKeys dirspec)
keysizes keys = do
dir <- lift $ fromRepo dirspec
liftIO $ forM keys $ \k -> catchDefaultIO 0 $
- fromIntegral . fileSize
- <$> getFileStatus (dir </> keyFile k)
+ getFileSize (dir </> keyFile k)
aside :: String -> String
aside s = " (" ++ s ++ ")"
diff --git a/Command/RecvKey.hs b/Command/RecvKey.hs
index 8a806875b..424ca923d 100644
--- a/Command/RecvKey.hs
+++ b/Command/RecvKey.hs
@@ -62,8 +62,7 @@ start key = fieldTransfer Download key $ \_p ->
oksize <- case Types.Key.keySize key of
Nothing -> return True
Just size -> do
- size' <- fromIntegral . fileSize
- <$> liftIO (getFileStatus tmp)
+ size' <- liftIO $ getFileSize tmp
return $ size == size'
if oksize
then case Backend.maybeLookupBackendName (Types.Key.keyBackendName key) of
diff --git a/Common.hs b/Common.hs
index 670fc44e5..9333a19d9 100644
--- a/Common.hs
+++ b/Common.hs
@@ -30,7 +30,8 @@ import Utility.Monad as X
import Utility.Data as X
import Utility.Applicative as X
import Utility.FileSystemEncoding as X
-import Utility.PosixFiles as X
+import Utility.PosixFiles as X hiding (fileSize)
+import Utility.FileSize as X
import Utility.Network as X
import Utility.PartialPrelude as X
diff --git a/Limit.hs b/Limit.hs
index 5d58e77f0..1053547ec 100644
--- a/Limit.hs
+++ b/Limit.hs
@@ -239,9 +239,7 @@ limitSize vs s = case readSize dataUnits s of
checkkey sz key = return $ keySize key `vs` Just sz
check _ sz (Just key) = checkkey sz key
check fi sz Nothing = do
- filesize <- liftIO $ catchMaybeIO $
- fromIntegral . fileSize
- <$> getFileStatus (relFile fi)
+ filesize <- liftIO $ catchMaybeIO $ getFileSize (relFile fi)
return $ filesize `vs` Just sz
addMetaData :: String -> Annex ()
diff --git a/Remote/Git.hs b/Remote/Git.hs
index f015e295e..583e9c728 100644
--- a/Remote/Git.hs
+++ b/Remote/Git.hs
@@ -563,9 +563,7 @@ rsyncOrCopyFile rsyncparams src dest p =
(const $ copyFileExternal CopyTimeStamps src dest)
watchfilesize oldsz = do
threadDelay 500000 -- 0.5 seconds
- v <- catchMaybeIO $
- toBytesProcessed . fileSize
- <$> getFileStatus dest
+ v <- catchMaybeIO $ toBytesProcessed <$> getFileSize dest
case v of
Just sz
| sz /= oldsz -> do
diff --git a/Remote/Helper/Chunked.hs b/Remote/Helper/Chunked.hs
index 806fab542..8516268ce 100644
--- a/Remote/Helper/Chunked.hs
+++ b/Remote/Helper/Chunked.hs
@@ -244,8 +244,7 @@ retrieveChunks retriever u chunkconfig encryptor basek dest basep sink
| otherwise = go =<< chunkKeys u chunkconfig basek
where
go ls = do
- currsize <- liftIO $ catchMaybeIO $
- toInteger . fileSize <$> getFileStatus dest
+ currsize <- liftIO $ catchMaybeIO $ getFileSize dest
let ls' = maybe ls (setupResume ls) currsize
if any null ls'
then return True -- dest is already complete
diff --git a/Remote/Helper/Http.hs b/Remote/Helper/Http.hs
index 6ce5bacb8..3d08066ba 100644
--- a/Remote/Helper/Http.hs
+++ b/Remote/Helper/Http.hs
@@ -32,7 +32,7 @@ httpStorer a = fileStorer $ \k f m -> a k =<< liftIO (httpBodyStorer f m)
-- the meter as it's sent.
httpBodyStorer :: FilePath -> MeterUpdate -> IO RequestBody
httpBodyStorer src m = do
- size <- fromIntegral . fileSize <$> getFileStatus src :: IO Integer
+ size <- getFileSize src
let streamer sink = withMeteredFile src m $ \b -> byteStringPopper b sink
return $ RequestBodyStream (fromInteger size) streamer
diff --git a/Remote/S3.hs b/Remote/S3.hs
index 1a6e41094..104fdddca 100644
--- a/Remote/S3.hs
+++ b/Remote/S3.hs
@@ -159,7 +159,7 @@ store :: Remote -> S3Handle -> Storer
store r h = fileStorer $ \k f p -> do
case partSize (hinfo h) of
Just partsz | partsz > 0 -> do
- fsz <- fromIntegral . fileSize <$> liftIO (getFileStatus f)
+ fsz <- liftIO $ getFileSize f
if fsz > partsz
then multipartupload fsz partsz k f p
else singlepartupload k f p
diff --git a/Utility/FileSize.hs b/Utility/FileSize.hs
new file mode 100644
index 000000000..3113695d3
--- /dev/null
+++ b/Utility/FileSize.hs
@@ -0,0 +1,33 @@
+{- File size.
+ -
+ - License: BSD-2-clause
+ -}
+
+{-# LANGUAGE CPP #-}
+
+module Utility.FileSize where
+
+import System.PosixCompat.Files
+import Control.Exception (bracket)
+import System.IO
+
+{- Gets the size of a file.
+ -
+ - This is better than using fileSize, because on Windows that returns a
+ - FileOffset which maxes out at 2 gb.
+ - See https://github.com/jystic/unix-compat/issues/16
+ -}
+getFileSize :: FilePath -> IO Integer
+#ifndef mingw32_HOST_OS
+getFileSize f = fromIntegral . fileSize <$> getFileStatus f
+#else
+getFileSize f = bracket (openFile f ReadMode) hClose hFileSize
+#endif
+
+{- Gets the size of the file, when its FileStatus is already known. -}
+getFileSize' :: FilePath -> FileStatus -> IO Integer
+#ifndef mingw32_HOST_OS
+getFileSize' _ s = return $ fromIntegral $ fileSize s
+#else
+getFileSize' f _ = getFileSize f
+#endif
diff --git a/Utility/InodeCache.hs b/Utility/InodeCache.hs
index 8a182449b..0b0b040cb 100644
--- a/Utility/InodeCache.hs
+++ b/Utility/InodeCache.hs
@@ -40,6 +40,9 @@ module Utility.InodeCache (
import Common
import System.PosixCompat.Types
import Utility.QuickCheck
+-- While fileSize overflows and wraps at 2gb on Windows,
+-- it's ok for purposes of comparison.
+import System.PosixCompat.Files (fileSize)
#ifdef mingw32_HOST_OS
import Data.Word (Word64)
diff --git a/Utility/Url.hs b/Utility/Url.hs
index cc15c82d0..a8828e048 100644
--- a/Utility/Url.hs
+++ b/Utility/Url.hs
@@ -102,9 +102,12 @@ exists url uo = case parseURIRelaxed url of
-- so fall back to reading files and using curl.
Nothing
| uriScheme u == "file:" -> do
- s <- catchMaybeIO $ getFileStatus (unEscapeString $ uriPath u)
+ let f = unEscapeString (uriPath u)
+ s <- catchMaybeIO $ getFileStatus f
case s of
- Just stat -> return (True, Just $ fromIntegral $ fileSize stat)
+ Just stat -> do
+ sz <- getFileSize' f stat
+ return (True, Just sz)
Nothing -> dne
| Build.SysConfig.curl -> do
output <- catchDefaultIO "" $
diff --git a/debian/changelog b/debian/changelog
index bdbe2663d..eb48707f2 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -13,6 +13,9 @@ git-annex (5.20150114) UNRELEASED; urgency=medium
* Fix wording of message displayed when unable to get a file that
is available in untrusted repositories.
* Windows: Fix running of the pre-commit-annex hook.
+ * Avoid using fileSize which maxes out at just 2 gb on Windows.
+ Instead, use hFileSize, which doesn't have a bounded size.
+ Fixes support for files > 2 gb on Windows.
-- Joey Hess <id@joeyh.name> Tue, 13 Jan 2015 17:03:39 -0400
diff --git a/doc/bugs/Direct_mode_sync_fails_to_transfer_a_10GB_file.mdwn b/doc/bugs/Direct_mode_sync_fails_to_transfer_a_10GB_file.mdwn
index 773d452f6..21c7af19b 100644
--- a/doc/bugs/Direct_mode_sync_fails_to_transfer_a_10GB_file.mdwn
+++ b/doc/bugs/Direct_mode_sync_fails_to_transfer_a_10GB_file.mdwn
@@ -56,3 +56,5 @@ git-annex: sync: 1 failed
# End of transcript or log.
"""]]
+
+> [[fixed|done]] --[[Joey]]
diff --git a/doc/bugs/Direct_mode_sync_fails_to_transfer_a_10GB_file/comment_1_cb9e9dada7baf4e48725a9483b3b448a._comment b/doc/bugs/Direct_mode_sync_fails_to_transfer_a_10GB_file/comment_1_cb9e9dada7baf4e48725a9483b3b448a._comment
new file mode 100644
index 000000000..42acf9f64
--- /dev/null
+++ b/doc/bugs/Direct_mode_sync_fails_to_transfer_a_10GB_file/comment_1_cb9e9dada7baf4e48725a9483b3b448a._comment
@@ -0,0 +1,25 @@
+[[!comment format=mdwn
+ username="joey"
+ subject="""comment 1"""
+ date="2015-01-20T19:41:44Z"
+ content="""
+In my own test, I made a 10 gb file, and the key git-annex came up with had
+a size of -2147483648 which is clearly screwed up.. But that's what
+getFileStatus reports the size as. This was in an 32 bit XP VM.
+
+Hmm, unix-compat's getFileStatus calls getFileSize, which yields a
+FileOffset. The maxBound of that on linux is a nice large
+9223372036854775807, but on Windows, it appears to be 2147483647.
+
+Compare with using hFileSize, which yields an Integer. So,
+getFileSize and fileSize are unsafe on Windows due to FileOffset being so
+small on Windows.
+
+I have now corrected all places in git-annex that used the unsafe fileSize.
+It will behave correctly on Windows now.
+
+However, if you still have the repo with the big file, it's key still has
+the wrong size. To fix, you can "git annex unannex" the file, and then "git
+annex add" it back, after upgrading to the current daily build, or the next
+release of git-annex.
+"""]]