summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Joey Hess <joey@kitenet.net>2013-03-11 19:55:01 -0400
committerGravatar Joey Hess <joey@kitenet.net>2013-03-11 19:55:01 -0400
commit8cdbe8fe29ccd7f1d2bc0daeaacd3a88427d40c2 (patch)
tree9f53c20d153c2f1e1d7ae2b4699102ede5142b7b
parentdfc7b3351c4b10681f48dfa347a2308a2eb63489 (diff)
addurl: Add --relaxed option.
-rw-r--r--Command/AddUrl.hs30
-rw-r--r--debian/changelog1
-rw-r--r--doc/git-annex.mdwn3
-rw-r--r--doc/todo/wishlist:_option_to_disable_url_checking_with_addurl.mdwn2
4 files changed, 24 insertions, 12 deletions
diff --git a/Command/AddUrl.hs b/Command/AddUrl.hs
index 41a947db8..cf8aae17c 100644
--- a/Command/AddUrl.hs
+++ b/Command/AddUrl.hs
@@ -25,7 +25,7 @@ import Config
import Annex.Content.Direct
def :: [Command]
-def = [notBareRepo $ withOptions [fileOption, pathdepthOption] $
+def = [notBareRepo $ withOptions [fileOption, pathdepthOption, relaxedOption] $
command "addurl" (paramRepeating paramUrl) seek "add urls to annex"]
fileOption :: Option
@@ -34,28 +34,32 @@ fileOption = Option.field [] "file" paramFile "specify what file the url is adde
pathdepthOption :: Option
pathdepthOption = Option.field [] "pathdepth" paramNumber "path components to use in filename"
+relaxedOption :: Option
+relaxedOption = Option.flag [] "relaxed" "skip size check"
+
seek :: [CommandSeek]
seek = [withField fileOption return $ \f ->
+ withFlag relaxedOption $ \relaxed ->
withField pathdepthOption (return . maybe Nothing readish) $ \d ->
- withStrings $ start f d]
+ withStrings $ start relaxed f d]
-start :: Maybe FilePath -> Maybe Int -> String -> CommandStart
-start optfile pathdepth s = go $ fromMaybe bad $ parseURI s
+start :: Bool -> Maybe FilePath -> Maybe Int -> String -> CommandStart
+start relaxed optfile pathdepth s = go $ fromMaybe bad $ parseURI s
where
bad = fromMaybe (error $ "bad url " ++ s) $
parseURI $ escapeURIString isUnescapedInURI s
go url = do
let file = fromMaybe (url2file url pathdepth) optfile
showStart "addurl" file
- next $ perform s file
+ next $ perform relaxed s file
-perform :: String -> FilePath -> CommandPerform
-perform url file = ifAnnexed file addurl geturl
+perform :: Bool -> String -> FilePath -> CommandPerform
+perform relaxed url file = ifAnnexed file addurl geturl
where
geturl = do
liftIO $ createDirectoryIfMissing True (parentDir file)
- ifM (Annex.getState Annex.fast)
- ( nodownload url file , download url file )
+ ifM (Annex.getState Annex.fast <||> pure relaxed)
+ ( nodownload relaxed url file , download url file )
addurl (key, _backend) = do
headers <- getHttpHeaders
ifM (liftIO $ Url.check url headers $ keySize key)
@@ -90,10 +94,12 @@ download url file = do
setUrlPresent key url
next $ Command.Add.cleanup file key True
-nodownload :: String -> FilePath -> CommandPerform
-nodownload url file = do
+nodownload :: Bool -> String -> FilePath -> CommandPerform
+nodownload relaxed url file = do
headers <- getHttpHeaders
- (exists, size) <- liftIO $ Url.exists url headers
+ (exists, size) <- if relaxed
+ then pure (True, Nothing)
+ else liftIO $ Url.exists url headers
if exists
then do
let key = Backend.URL.fromUrl url size
diff --git a/debian/changelog b/debian/changelog
index 43475322d..dbacec2ca 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -54,6 +54,7 @@ git-annex (4.20130228) UNRELEASED; urgency=low
avoiding re-checksumming.
* assistant: Detects most renames, including directory renames, and
combines all their changes into a single commit.
+ * addurl: Add --relaxed option.
-- Joey Hess <joeyh@debian.org> Wed, 27 Feb 2013 23:20:40 -0400
diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn
index c34fd4bfb..8013c3769 100644
--- a/doc/git-annex.mdwn
+++ b/doc/git-annex.mdwn
@@ -154,6 +154,9 @@ subdirectories).
To avoid immediately downloading the url, specify --fast.
+ To avoid storing the size of the url's content, and accept whatever
+ is there at a future point, specific --relaxed. (Implies --fast.)
+
Normally the filename is based on the full url, so will look like
"www.example.com_dir_subdir_bigfile". For a shorter filename, specify
--pathdepth=N. For example, --pathdepth=1 will use "dir/subdir/bigfile",
diff --git a/doc/todo/wishlist:_option_to_disable_url_checking_with_addurl.mdwn b/doc/todo/wishlist:_option_to_disable_url_checking_with_addurl.mdwn
index a046f70ab..d0b847933 100644
--- a/doc/todo/wishlist:_option_to_disable_url_checking_with_addurl.mdwn
+++ b/doc/todo/wishlist:_option_to_disable_url_checking_with_addurl.mdwn
@@ -5,3 +5,5 @@ c.f. [http://git-annex.branchable.com/tips/How_to_retroactively_annex_a_file_alr
The bottleneck I'm hitting here seems to be the fact that `git annex addurl` diligently checks each url to see that it is accessible, which adds up quickly if many files are to be processed.
It would be great if addurl had an option to disable checking the url, in order to speed up large batch jobs like this.
+
+> --relaxed added [[done]] --[[Joey]]