From 8cdbe8fe29ccd7f1d2bc0daeaacd3a88427d40c2 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 11 Mar 2013 19:55:01 -0400 Subject: addurl: Add --relaxed option. --- Command/AddUrl.hs | 30 +++++++++++++--------- debian/changelog | 1 + doc/git-annex.mdwn | 3 +++ ...option_to_disable_url_checking_with_addurl.mdwn | 2 ++ 4 files changed, 24 insertions(+), 12 deletions(-) diff --git a/Command/AddUrl.hs b/Command/AddUrl.hs index 41a947db8..cf8aae17c 100644 --- a/Command/AddUrl.hs +++ b/Command/AddUrl.hs @@ -25,7 +25,7 @@ import Config import Annex.Content.Direct def :: [Command] -def = [notBareRepo $ withOptions [fileOption, pathdepthOption] $ +def = [notBareRepo $ withOptions [fileOption, pathdepthOption, relaxedOption] $ command "addurl" (paramRepeating paramUrl) seek "add urls to annex"] fileOption :: Option @@ -34,28 +34,32 @@ fileOption = Option.field [] "file" paramFile "specify what file the url is adde pathdepthOption :: Option pathdepthOption = Option.field [] "pathdepth" paramNumber "path components to use in filename" +relaxedOption :: Option +relaxedOption = Option.flag [] "relaxed" "skip size check" + seek :: [CommandSeek] seek = [withField fileOption return $ \f -> + withFlag relaxedOption $ \relaxed -> withField pathdepthOption (return . maybe Nothing readish) $ \d -> - withStrings $ start f d] + withStrings $ start relaxed f d] -start :: Maybe FilePath -> Maybe Int -> String -> CommandStart -start optfile pathdepth s = go $ fromMaybe bad $ parseURI s +start :: Bool -> Maybe FilePath -> Maybe Int -> String -> CommandStart +start relaxed optfile pathdepth s = go $ fromMaybe bad $ parseURI s where bad = fromMaybe (error $ "bad url " ++ s) $ parseURI $ escapeURIString isUnescapedInURI s go url = do let file = fromMaybe (url2file url pathdepth) optfile showStart "addurl" file - next $ perform s file + next $ perform relaxed s file -perform :: String -> FilePath -> CommandPerform -perform url file = ifAnnexed file addurl geturl +perform :: Bool -> String -> FilePath -> CommandPerform +perform relaxed url file = ifAnnexed file addurl geturl where geturl = do liftIO $ createDirectoryIfMissing True (parentDir file) - ifM (Annex.getState Annex.fast) - ( nodownload url file , download url file ) + ifM (Annex.getState Annex.fast <||> pure relaxed) + ( nodownload relaxed url file , download url file ) addurl (key, _backend) = do headers <- getHttpHeaders ifM (liftIO $ Url.check url headers $ keySize key) @@ -90,10 +94,12 @@ download url file = do setUrlPresent key url next $ Command.Add.cleanup file key True -nodownload :: String -> FilePath -> CommandPerform -nodownload url file = do +nodownload :: Bool -> String -> FilePath -> CommandPerform +nodownload relaxed url file = do headers <- getHttpHeaders - (exists, size) <- liftIO $ Url.exists url headers + (exists, size) <- if relaxed + then pure (True, Nothing) + else liftIO $ Url.exists url headers if exists then do let key = Backend.URL.fromUrl url size diff --git a/debian/changelog b/debian/changelog index 43475322d..dbacec2ca 100644 --- a/debian/changelog +++ b/debian/changelog @@ -54,6 +54,7 @@ git-annex (4.20130228) UNRELEASED; urgency=low avoiding re-checksumming. * assistant: Detects most renames, including directory renames, and combines all their changes into a single commit. + * addurl: Add --relaxed option. -- Joey Hess Wed, 27 Feb 2013 23:20:40 -0400 diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index c34fd4bfb..8013c3769 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -154,6 +154,9 @@ subdirectories). To avoid immediately downloading the url, specify --fast. + To avoid storing the size of the url's content, and accept whatever + is there at a future point, specific --relaxed. (Implies --fast.) + Normally the filename is based on the full url, so will look like "www.example.com_dir_subdir_bigfile". For a shorter filename, specify --pathdepth=N. For example, --pathdepth=1 will use "dir/subdir/bigfile", diff --git a/doc/todo/wishlist:_option_to_disable_url_checking_with_addurl.mdwn b/doc/todo/wishlist:_option_to_disable_url_checking_with_addurl.mdwn index a046f70ab..d0b847933 100644 --- a/doc/todo/wishlist:_option_to_disable_url_checking_with_addurl.mdwn +++ b/doc/todo/wishlist:_option_to_disable_url_checking_with_addurl.mdwn @@ -5,3 +5,5 @@ c.f. [http://git-annex.branchable.com/tips/How_to_retroactively_annex_a_file_alr The bottleneck I'm hitting here seems to be the fact that `git annex addurl` diligently checks each url to see that it is accessible, which adds up quickly if many files are to be processed. It would be great if addurl had an option to disable checking the url, in order to speed up large batch jobs like this. + +> --relaxed added [[done]] --[[Joey]] -- cgit v1.2.3