summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Joey Hess <joey@kitenet.net>2011-09-07 19:04:51 -0400
committerGravatar Joey Hess <joey@kitenet.net>2011-09-07 19:04:51 -0400
commit03d6209e1ccee4a8df7d1b0336c1d5587a2b3ff6 (patch)
treeb65ce23e1f72242a13ce5b942cf8648ff2677c49
parent7c768c09841d7346444d65721b132d144835fc99 (diff)
addurl: Always use whole url as destination filename, rather than only its file component.
First, this ensures that git annex addurl, when run repeatedly with the same url, doesn't create duplicate files, which it did before when it fell back to the longer filename. Secondly, the file part of an url is frequently not very descriptive on its own. The uri scheme, auth, and port is intentionally left out, as clutter.
-rw-r--r--Command/AddUrl.hs26
-rw-r--r--debian/changelog2
-rw-r--r--doc/walkthrough/using_the_web.mdwn12
3 files changed, 17 insertions, 23 deletions
diff --git a/Command/AddUrl.hs b/Command/AddUrl.hs
index 55e51100c..9fc68ca03 100644
--- a/Command/AddUrl.hs
+++ b/Command/AddUrl.hs
@@ -7,9 +7,10 @@
module Command.AddUrl where
-import Control.Monad.State (liftIO, when)
+import Control.Monad.State
import Network.URI
import Data.String.Utils
+import Data.Maybe
import System.Directory
import Command
@@ -24,6 +25,7 @@ import Content
import PresenceLog
import Locations
import Utility.Path
+import Utility.Conditional
command :: [Command]
command = [repoCommand "addurl" paramPath seek "add urls to annex"]
@@ -75,20 +77,10 @@ nodownload url file = do
url2file :: URI -> IO FilePath
url2file url = do
- let parts = filter safe $ split "/" $ uriPath url
- if null parts
- then fallback
- else do
- let file = last parts
- e <- doesFileExist file
- if e then fallback else return file
+ whenM (doesFileExist file) $
+ error $ "already have this url in " ++ file
+ return file
where
- fallback = do
- let file = replace "/" "_" $ show url
- e <- doesFileExist file
- when e $ error "already have this url"
- return file
- safe "" = False
- safe "." = False
- safe ".." = False
- safe _ = True
+ file = escape $ uriRegName auth ++ uriPath url ++ uriQuery url
+ escape = replace "/?" $ repeat '_'
+ auth = fromMaybe (error $ "bad url " ++ show url) $ uriAuthority url
diff --git a/debian/changelog b/debian/changelog
index e79685a9a..9ff745566 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -3,6 +3,8 @@ git-annex (3.20110907) UNRELEASED; urgency=low
* whereis: Show untrusted locations separately and do not include in
location count.
* Fix build without S3.
+ * addurl: Always use whole url as destination filename, rather than
+ only its file component.
-- Joey Hess <joeyh@debian.org> Tue, 06 Sep 2011 16:59:15 -0400
diff --git a/doc/walkthrough/using_the_web.mdwn b/doc/walkthrough/using_the_web.mdwn
index 9d5525758..8009927a4 100644
--- a/doc/walkthrough/using_the_web.mdwn
+++ b/doc/walkthrough/using_the_web.mdwn
@@ -1,20 +1,20 @@
The web can be used as a [[special_remote|special_remotes]] too.
# git annex addurl http://example.com/video.mpeg
- addurl video.mpeg (downloading http://example.com/video.mpeg)
+ addurl example.com_video.mpeg (downloading http://example.com/video.mpeg)
########################################################## 100.0%
ok
Now the file is downloaded, and has been added to the annex like any other
-file. So it can be copied to other repositories, and so on.
+file. So it can be renamed, copied to other repositories, and so on.
Note that git-annex assumes that, if the web site does not 404, the file is
still present on the web, and this counts as one [[copy|copies]] of the
file. So it will let you remove your last copy, trusting it can be
downloaded again:
- # git annex drop video.mpeg
- drop video.mpeg (checking http://example.com/video.mpeg) ok
+ # git annex drop example.com_video.mpeg
+ drop example.com_video.mpeg (checking http://example.com/video.mpeg) ok
If you don't [[trust]] the web to this degree, just let git-annex know:
@@ -23,8 +23,8 @@ If you don't [[trust]] the web to this degree, just let git-annex know:
With the result that it will hang onto files:
- # git annex drop video.mpeg
- drop video.mpeg (unsafe)
+ # git annex drop example.com_video.mpeg
+ drop example.com_video.mpeg (unsafe)
Could only verify the existence of 0 out of 1 necessary copies
Also these untrusted repositories may contain the file:
00000000-0000-0000-0000-000000000001 -- web