summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Annex/FileMatcher.hs26
-rw-r--r--Assistant/Changes.hs2
-rw-r--r--Assistant/Threads/Committer.hs93
-rw-r--r--Assistant/Threads/Pusher.hs1
-rw-r--r--Assistant/Threads/WebApp.hs1
-rw-r--r--Assistant/TransferQueue.hs29
-rw-r--r--Assistant/Types/TransferQueue.hs9
-rw-r--r--Limit.hs11
-rw-r--r--Logs/PreferredContent.hs29
-rw-r--r--Remote/S3.hs76
-rw-r--r--Types/StandardGroups.hs38
-rw-r--r--Utility/TList.hs11
-rw-r--r--debian/changelog9
-rw-r--r--doc/assistant/iaitem.pngbin0 -> 34868 bytes
-rw-r--r--doc/design/assistant/blog/day_248__Internet_Archive.mdwn28
-rw-r--r--doc/preferred_content.mdwn24
16 files changed, 288 insertions, 99 deletions
diff --git a/Annex/FileMatcher.hs b/Annex/FileMatcher.hs
index 220fea286..cbf6f873b 100644
--- a/Annex/FileMatcher.hs
+++ b/Annex/FileMatcher.hs
@@ -14,9 +14,11 @@ import Limit
import Utility.Matcher
import Types.Group
import Logs.Group
+import Logs.Remote
import Annex.UUID
import qualified Annex
import Git.FilePath
+import Types.Remote (RemoteConfig)
import Data.Either
import qualified Data.Set as S
@@ -45,10 +47,22 @@ parsedToMatcher parsed = case partitionEithers parsed of
([], vs) -> Right $ generate vs
(es, _) -> Left $ unwords $ map ("Parse failure: " ++) es
-parseToken :: MkLimit -> GroupMap -> String -> Either String (Token MatchFiles)
-parseToken checkpresent groupmap t
+exprParser :: GroupMap -> M.Map UUID RemoteConfig -> Maybe UUID -> String -> [Either String (Token MatchFiles)]
+exprParser groupmap configmap mu expr =
+ map parse $ tokenizeMatcher expr
+ where
+ parse = parseToken
+ (limitPresent mu)
+ (limitInDir preferreddir)
+ groupmap
+ preferreddir = fromMaybe "public" $
+ M.lookup "preferreddir" =<< (`M.lookup` configmap) =<< mu
+
+parseToken :: MkLimit -> MkLimit -> GroupMap -> String -> Either String (Token MatchFiles)
+parseToken checkpresent checkpreferreddir groupmap t
| t `elem` tokens = Right $ token t
| t == "present" = use checkpresent
+ | t == "inpreferreddir" = use checkpreferreddir
| otherwise = maybe (Left $ "near " ++ show t) use $ M.lookup k $
M.fromList
[ ("include", limitInclude)
@@ -78,9 +92,9 @@ largeFilesMatcher = go =<< annexLargeFiles <$> Annex.getGitConfig
where
go Nothing = return matchAll
go (Just expr) = do
- m <- groupMap
+ gm <- groupMap
+ rc <- readRemoteLog
u <- getUUID
- either badexpr return $ parsedToMatcher $
- map (parseToken (limitPresent $ Just u) m)
- (tokenizeMatcher expr)
+ either badexpr return $
+ parsedToMatcher $ exprParser gm rc (Just u) expr
badexpr e = error $ "bad annex.largefiles configuration: " ++ e
diff --git a/Assistant/Changes.hs b/Assistant/Changes.hs
index 9daef511b..2ecd2036c 100644
--- a/Assistant/Changes.hs
+++ b/Assistant/Changes.hs
@@ -32,7 +32,7 @@ getChanges = (atomically . getTList) <<~ changePool
{- Gets all unhandled changes, without blocking. -}
getAnyChanges :: Assistant [Change]
-getAnyChanges = (atomically . readTList) <<~ changePool
+getAnyChanges = (atomically . takeTList) <<~ changePool
{- Puts unhandled changes back into the pool.
- Note: Original order is not preserved. -}
diff --git a/Assistant/Threads/Committer.hs b/Assistant/Threads/Committer.hs
index c07109489..1064f371a 100644
--- a/Assistant/Threads/Committer.hs
+++ b/Assistant/Threads/Committer.hs
@@ -52,7 +52,7 @@ commitThread = namedThread "Committer" $ do
=<< annexDelayAdd <$> Annex.getGitConfig
waitChangeTime $ \(changes, time) -> do
readychanges <- handleAdds delayadd changes
- if shouldCommit time readychanges
+ if shouldCommit time (length readychanges) readychanges
then do
debug
[ "committing"
@@ -62,8 +62,12 @@ commitThread = namedThread "Committer" $ do
void $ alertWhile commitAlert $
liftAnnex commitStaged
recordCommit
+ let numchanges = length readychanges
mapM_ checkChangeContent readychanges
- else refill readychanges
+ return numchanges
+ else do
+ refill readychanges
+ return 0
refill :: [Change] -> Assistant ()
refill [] = noop
@@ -72,21 +76,33 @@ refill cs = do
refillChanges cs
{- Wait for one or more changes to arrive to be committed. -}
-waitChangeTime :: (([Change], UTCTime) -> Assistant ()) -> Assistant ()
-waitChangeTime a = runEvery (Seconds 1) <~> do
- -- We already waited one second as a simple rate limiter.
- -- Next, wait until at least one change is available for
- -- processing.
- changes <- getChanges
- -- See if now's a good time to commit.
- now <- liftIO getCurrentTime
- case (shouldCommit now changes, possiblyrename changes) of
- (True, False) -> a (changes, now)
- (True, True) -> do
- morechanges <- getrelatedchanges changes
- a (changes ++ morechanges, now)
- _ -> refill changes
+waitChangeTime :: (([Change], UTCTime) -> Assistant Int) -> Assistant ()
+waitChangeTime a = go [] 0
where
+ go unhandled lastcommitsize = do
+ -- Wait one one second as a simple rate limiter.
+ liftIO $ threadDelaySeconds (Seconds 1)
+ -- Now, wait until at least one change is available for
+ -- processing.
+ cs <- getChanges
+ let changes = unhandled ++ cs
+ let len = length changes
+ -- See if now's a good time to commit.
+ now <- liftIO getCurrentTime
+ case (lastcommitsize >= maxCommitSize, shouldCommit now len changes, possiblyrename changes) of
+ (True, True, _)
+ | len > maxCommitSize ->
+ go [] =<< a (changes, now)
+ | otherwise -> aftermaxcommit changes
+ (_, True, False) ->
+ go [] =<< a (changes, now)
+ (_, True, True) -> do
+ morechanges <- getrelatedchanges changes
+ go [] =<< a (changes ++ morechanges, now)
+ _ -> do
+ refill changes
+ go [] lastcommitsize
+
{- Did we perhaps only get one of the AddChange and RmChange pair
- that make up a file rename? Or some of the pairs that make up
- a directory rename?
@@ -116,6 +132,41 @@ waitChangeTime a = runEvery (Seconds 1) <~> do
then return cs
else getbatchchanges (cs':cs)
+ {- The last commit was maximum size, so it's very likely there
+ - are more changes and we'd like to ensure we make another commit
+ - of maximum size if possible.
+ -
+ - But, it can take a while for the Watcher to wake back up
+ - after a commit. It can get blocked by another thread
+ - that is using the Annex state, such as a git-annex branch
+ - commit. Especially after such a large commit, this can
+ - take several seconds. When this happens, it defeats the
+ - normal commit batching, which sees some old changes the
+ - Watcher found while the commit was being prepared, and sees
+ - no recent ones, and wants to commit immediately.
+ -
+ - All that we need to do, then, is wait for the Watcher to
+ - wake up, and queue up one more change.
+ -
+ - However, it's also possible that we're at the end of changes for
+ - now. So to avoid waiting a really long time before committing
+ - those changes we have, poll for up to 30 seconds, and then
+ - commit them.
+ -
+ - Also, try to run something in Annex, to ensure we block
+ - longer if the Annex state is indeed blocked.
+ -}
+ aftermaxcommit oldchanges = loop (30 :: Int)
+ where
+ loop 0 = go oldchanges 0
+ loop n = do
+ liftAnnex noop -- ensure Annex state is free
+ liftIO $ threadDelaySeconds (Seconds 1)
+ changes <- getAnyChanges
+ if null changes
+ then loop (n - 1)
+ else go (oldchanges ++ changes) 0
+
isRmChange :: Change -> Bool
isRmChange (Change { changeInfo = i }) | i == RmChange = True
isRmChange _ = False
@@ -131,20 +182,22 @@ humanImperceptibleDelay :: IO ()
humanImperceptibleDelay = threadDelay $
truncate $ humanImperceptibleInterval * fromIntegral oneSecond
+maxCommitSize :: Int
+maxCommitSize = 5000
+
{- Decide if now is a good time to make a commit.
- Note that the list of changes has an undefined order.
-
- Current strategy: If there have been 10 changes within the past second,
- a batch activity is taking place, so wait for later.
-}
-shouldCommit :: UTCTime -> [Change] -> Bool
-shouldCommit now changes
+shouldCommit :: UTCTime -> Int -> [Change] -> Bool
+shouldCommit now len changes
| len == 0 = False
- | len > 5000 = True -- avoid bloating change pool too much
+ | len >= maxCommitSize = True
| length recentchanges < 10 = True
| otherwise = False -- batch activity
where
- len = length changes
thissecond c = timeDelta c <= 1
recentchanges = filter thissecond changes
timeDelta c = now `diffUTCTime` changeTime c
diff --git a/Assistant/Threads/Pusher.hs b/Assistant/Threads/Pusher.hs
index 57595b8c1..060f26cf5 100644
--- a/Assistant/Threads/Pusher.hs
+++ b/Assistant/Threads/Pusher.hs
@@ -9,7 +9,6 @@ module Assistant.Threads.Pusher where
import Assistant.Common
import Assistant.Commits
-import Assistant.Types.Commits
import Assistant.Pushes
import Assistant.DaemonStatus
import Assistant.Sync
diff --git a/Assistant/Threads/WebApp.hs b/Assistant/Threads/WebApp.hs
index caef570c4..f1ea24742 100644
--- a/Assistant/Threads/WebApp.hs
+++ b/Assistant/Threads/WebApp.hs
@@ -22,6 +22,7 @@ import Assistant.WebApp.Configurators.Local
import Assistant.WebApp.Configurators.Ssh
import Assistant.WebApp.Configurators.Pairing
import Assistant.WebApp.Configurators.AWS
+import Assistant.WebApp.Configurators.IA
import Assistant.WebApp.Configurators.WebDAV
import Assistant.WebApp.Configurators.XMPP
import Assistant.WebApp.Configurators.Preferences
diff --git a/Assistant/TransferQueue.hs b/Assistant/TransferQueue.hs
index ac9ed3216..f94e73c2b 100644
--- a/Assistant/TransferQueue.hs
+++ b/Assistant/TransferQueue.hs
@@ -29,6 +29,7 @@ import Types.Remote
import qualified Remote
import qualified Types.Remote as Remote
import Annex.Wanted
+import Utility.TList
import Control.Concurrent.STM
import qualified Data.Map as M
@@ -38,7 +39,7 @@ type Reason = String
{- Reads the queue's content without blocking or changing it. -}
getTransferQueue :: Assistant [(Transfer, TransferInfo)]
-getTransferQueue = (atomically . readTVar . queuelist) <<~ transferQueue
+getTransferQueue = (atomically . readTList . queuelist) <<~ transferQueue
stubInfo :: AssociatedFile -> Remote -> TransferInfo
stubInfo f r = stubTransferInfo
@@ -94,8 +95,7 @@ queueTransfersMatching matching reason schedule k f direction
| direction == Download = do
q <- getAssistant transferQueue
void $ liftIO $ atomically $
- modifyTVar' (deferreddownloads q) $
- \l -> (k, f):l
+ consTList (deferreddownloads q) (k, f)
| otherwise = noop
{- Queues any deferred downloads that can now be accomplished, leaving
@@ -103,12 +103,11 @@ queueTransfersMatching matching reason schedule k f direction
queueDeferredDownloads :: Reason -> Schedule -> Assistant ()
queueDeferredDownloads reason schedule = do
q <- getAssistant transferQueue
- l <- liftIO $ atomically $ swapTVar (deferreddownloads q) []
+ l <- liftIO $ atomically $ readTList (deferreddownloads q)
rs <- syncDataRemotes <$> getDaemonStatus
left <- filterM (queue rs) l
unless (null left) $
- liftIO $ atomically $ modifyTVar' (deferreddownloads q) $
- \new -> new ++ left
+ liftIO $ atomically $ appendTList (deferreddownloads q) left
where
queue rs (k, f) = do
uuids <- liftAnnex $ Remote.keyLocations k
@@ -127,10 +126,9 @@ queueDeferredDownloads reason schedule = do
enqueue :: Reason -> Schedule -> Transfer -> TransferInfo -> Assistant ()
enqueue reason schedule t info
- | schedule == Next = go (new:)
- | otherwise = go (\l -> l++[new])
+ | schedule == Next = go consTList
+ | otherwise = go snocTList
where
- new = (t, info)
go modlist = whenM (add modlist) $ do
debug [ "queued", describeTransfer t info, ": " ++ reason ]
notifyTransfer
@@ -140,11 +138,11 @@ enqueue reason schedule t info
liftIO $ atomically $ ifM (checkRunningTransferSTM dstatus t)
( return False
, do
- l <- readTVar (queuelist q)
+ l <- readTList (queuelist q)
if (t `notElem` map fst l)
then do
void $ modifyTVar' (queuesize q) succ
- void $ modifyTVar' (queuelist q) modlist
+ void $ modlist (queuelist q) (t, info)
return True
else return False
)
@@ -185,9 +183,9 @@ getNextTransfer acceptable = do
if sz < 1
then retry -- blocks until queuesize changes
else do
- (r@(t,info):rest) <- readTVar (queuelist q)
- writeTVar (queuelist q) rest
+ (r@(t,info):rest) <- readTList (queuelist q)
void $ modifyTVar' (queuesize q) pred
+ setTList (queuelist q) rest
if acceptable info
then do
adjustTransfersSTM dstatus $
@@ -219,8 +217,7 @@ dequeueTransfers c = do
dequeueTransfersSTM :: TransferQueue -> (Transfer -> Bool) -> STM [(Transfer, TransferInfo)]
dequeueTransfersSTM q c = do
- (removed, ts) <- partition (c . fst)
- <$> readTVar (queuelist q)
+ (removed, ts) <- partition (c . fst) <$> readTList (queuelist q)
void $ writeTVar (queuesize q) (length ts)
- void $ writeTVar (queuelist q) ts
+ setTList (queuelist q) ts
return removed
diff --git a/Assistant/Types/TransferQueue.hs b/Assistant/Types/TransferQueue.hs
index 6620ebdf6..e4e305d5a 100644
--- a/Assistant/Types/TransferQueue.hs
+++ b/Assistant/Types/TransferQueue.hs
@@ -12,11 +12,12 @@ import Logs.Transfer
import Types.Remote
import Control.Concurrent.STM
+import Utility.TList
data TransferQueue = TransferQueue
{ queuesize :: TVar Int
- , queuelist :: TVar [(Transfer, TransferInfo)]
- , deferreddownloads :: TVar [(Key, AssociatedFile)]
+ , queuelist :: TList (Transfer, TransferInfo)
+ , deferreddownloads :: TList (Key, AssociatedFile)
}
data Schedule = Next | Later
@@ -25,5 +26,5 @@ data Schedule = Next | Later
newTransferQueue :: IO TransferQueue
newTransferQueue = atomically $ TransferQueue
<$> newTVar 0
- <*> newTVar []
- <*> newTVar []
+ <*> newTList
+ <*> newTList
diff --git a/Limit.hs b/Limit.hs
index 9ce9d591e..679ebc199 100644
--- a/Limit.hs
+++ b/Limit.hs
@@ -1,6 +1,6 @@
{- user-specified limits on files to act on
-
- - Copyright 2011,2012 Joey Hess <joey@kitenet.net>
+ - Copyright 2011-2013 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@@ -88,9 +88,9 @@ limitExclude glob = Right $ const $ return . not . matchglob glob
- once. Also, we use regex-TDFA because it's less buggy in its support
- of non-unicode characters. -}
matchglob :: String -> Annex.FileInfo -> Bool
-matchglob glob (Annex.FileInfo { Annex.matchFile = f }) =
+matchglob glob fi =
case cregex of
- Right r -> case execute r f of
+ Right r -> case execute r (Annex.matchFile fi) of
Right (Just _) -> True
_ -> False
Left _ -> error $ "failed to compile regex: " ++ regex
@@ -138,6 +138,11 @@ limitPresent u _ = Right $ const $ check $ \key -> do
handle _ Nothing = return False
handle a (Just (key, _)) = a key
+{- Limit to content that is in a directory, anywhere in the repository tree -}
+limitInDir :: FilePath -> MkLimit
+limitInDir dir = const $ Right $ const $ \fi -> return $
+ any (== dir) $ splitPath $ takeDirectory $ Annex.matchFile fi
+
{- Adds a limit to skip files not believed to have the specified number
- of copies. -}
addCopies :: String -> Annex ()
diff --git a/Logs/PreferredContent.hs b/Logs/PreferredContent.hs
index d980cd373..8005fc0d3 100644
--- a/Logs/PreferredContent.hs
+++ b/Logs/PreferredContent.hs
@@ -30,7 +30,9 @@ import qualified Utility.Matcher
import Annex.FileMatcher
import Annex.UUID
import Types.Group
+import Types.Remote (RemoteConfig)
import Logs.Group
+import Logs.Remote
import Types.StandardGroups
{- Filename of preferred-content.log. -}
@@ -65,8 +67,9 @@ preferredContentMap = maybe preferredContentMapLoad return
preferredContentMapLoad :: Annex Annex.PreferredContentMap
preferredContentMapLoad = do
groupmap <- groupMap
+ configmap <- readRemoteLog
m <- simpleMap
- . parseLogWithUUID ((Just .) . makeMatcher groupmap)
+ . parseLogWithUUID ((Just .) . makeMatcher groupmap configmap)
<$> Annex.Branch.get preferredContentLog
Annex.changeState $ \s -> s { Annex.preferredcontentmap = Just m }
return m
@@ -79,30 +82,30 @@ preferredContentMapRaw = simpleMap . parseLog Just
- because the configuration is shared amoung repositories and newer
- versions of git-annex may add new features. Instead, parse errors
- result in a Matcher that will always succeed. -}
-makeMatcher :: GroupMap -> UUID -> String -> FileMatcher
-makeMatcher groupmap u s
- | s == "standard" = standardMatcher groupmap u
+makeMatcher :: GroupMap -> M.Map UUID RemoteConfig -> UUID -> String -> FileMatcher
+makeMatcher groupmap configmap u expr
+ | expr == "standard" = standardMatcher groupmap configmap u
| null (lefts tokens) = Utility.Matcher.generate $ rights tokens
| otherwise = matchAll
where
- tokens = map (parseToken (limitPresent $ Just u) groupmap) (tokenizeMatcher s)
+ tokens = exprParser groupmap configmap (Just u) expr
{- Standard matchers are pre-defined for some groups. If none is defined,
- or a repository is in multiple groups with standard matchers, match all. -}
-standardMatcher :: GroupMap -> UUID -> FileMatcher
-standardMatcher m u = maybe matchAll (makeMatcher m u . preferredContent) $
- getStandardGroup =<< u `M.lookup` groupsByUUID m
+standardMatcher :: GroupMap -> M.Map UUID RemoteConfig -> UUID -> FileMatcher
+standardMatcher groupmap configmap u =
+ maybe matchAll (makeMatcher groupmap configmap u . preferredContent) $
+ getStandardGroup =<< u `M.lookup` groupsByUUID groupmap
{- Checks if an expression can be parsed, if not returns Just error -}
checkPreferredContentExpression :: String -> Maybe String
-checkPreferredContentExpression s
- | s == "standard" = Nothing
- | otherwise = case parsedToMatcher vs of
+checkPreferredContentExpression expr
+ | expr == "standard" = Nothing
+ | otherwise = case parsedToMatcher tokens of
Left e -> Just e
Right _ -> Nothing
where
- vs = map (parseToken (limitPresent Nothing) emptyGroupMap)
- (tokenizeMatcher s)
+ tokens = exprParser emptyGroupMap M.empty Nothing expr
{- Puts a UUID in a standard group, and sets its preferred content to use
- the standard expression for that group, unless something is already set. -}
diff --git a/Remote/S3.hs b/Remote/S3.hs
index 00b5b5dc6..2772833fe 100644
--- a/Remote/S3.hs
+++ b/Remote/S3.hs
@@ -1,11 +1,11 @@
-{- Amazon S3 remotes.
+{- S3 remotes
-
- - Copyright 2011 Joey Hess <joey@kitenet.net>
+ - Copyright 2011-2013 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
-module Remote.S3 (remote) where
+module Remote.S3 (remote, iaHost, isIA, isIAHost, iaItemUrl) where
import Network.AWS.AWSConnection
import Network.AWS.S3Object
@@ -15,6 +15,7 @@ import qualified Data.Text as T
import qualified Data.ByteString.Lazy.Char8 as L
import qualified Data.Map as M
import Data.Char
+import Network.Socket (HostName)
import Common.Annex
import Types.Remote
@@ -29,6 +30,9 @@ import Crypto
import Creds
import Utility.Metered
import Annex.Content
+import Logs.Web
+
+type Bucket = String
remote :: RemoteType
remote = RemoteType {
@@ -53,7 +57,7 @@ gen r u c gc = new <$> remoteCost gc expensiveRemoteCost
storeKey = store this,
retrieveKeyFile = retrieve this,
retrieveKeyFileCheap = retrieveCheap this,
- removeKey = remove this,
+ removeKey = remove this c,
hasKey = checkPresent this,
hasKeyCheap = False,
whereisKey = Nothing,
@@ -67,7 +71,7 @@ gen r u c gc = new <$> remoteCost gc expensiveRemoteCost
}
s3Setup :: UUID -> RemoteConfig -> Annex RemoteConfig
-s3Setup u c = handlehost $ M.lookup "host" c
+s3Setup u c = if isIA c then archiveorg else defaulthost
where
remotename = fromJust (M.lookup "name" c)
defbucket = remotename ++ "-" ++ fromUUID u
@@ -79,11 +83,6 @@ s3Setup u c = handlehost $ M.lookup "host" c
, ("bucket", defbucket)
]
- handlehost Nothing = defaulthost
- handlehost (Just h)
- | ".archive.org" `isSuffixOf` map toLower h = archiveorg
- | otherwise = defaulthost
-
use fullconfig = do
gitConfigSpecialRemote u fullconfig "s3" "true"
setRemoteCredPair fullconfig (AWS.creds u)
@@ -115,21 +114,25 @@ s3Setup u c = handlehost $ M.lookup "host" c
store :: Remote -> Key -> AssociatedFile -> MeterUpdate -> Annex Bool
store r k _f p = s3Action r False $ \(conn, bucket) ->
- sendAnnex k (void $ remove r k) $ \src -> do
- res <- storeHelper (conn, bucket) r k p src
- s3Bool res
+ sendAnnex k (void $ remove' r k) $ \src -> do
+ ok <- s3Bool =<< storeHelper (conn, bucket) r k p src
+
+ -- Store public URL to item in Internet Archive.
+ when (ok && isIA (config r)) $
+ setUrlPresent k (iaKeyUrl r k)
+
+ return ok
storeEncrypted :: Remote -> (Cipher, Key) -> Key -> MeterUpdate -> Annex Bool
storeEncrypted r (cipher, enck) k p = s3Action r False $ \(conn, bucket) ->
-- To get file size of the encrypted content, have to use a temp file.
-- (An alternative would be chunking to to a constant size.)
- withTmp enck $ \tmp -> sendAnnex k (void $ remove r enck) $ \src -> do
+ withTmp enck $ \tmp -> sendAnnex k (void $ remove' r enck) $ \src -> do
liftIO $ encrypt (getGpgOpts r) cipher (feedFile src) $
readBytes $ L.writeFile tmp
- res <- storeHelper (conn, bucket) r enck p tmp
- s3Bool res
+ s3Bool =<< storeHelper (conn, bucket) r enck p tmp
-storeHelper :: (AWSConnection, String) -> Remote -> Key -> MeterUpdate -> FilePath -> Annex (AWSResult ())
+storeHelper :: (AWSConnection, Bucket) -> Remote -> Key -> MeterUpdate -> FilePath -> Annex (AWSResult ())
storeHelper (conn, bucket) r k p file = do
size <- maybe getsize (return . fromIntegral) $ keySize k
meteredBytes (Just p) size $ \meterupdate ->
@@ -177,10 +180,19 @@ retrieveEncrypted r (cipher, enck) k d p = s3Action r False $ \(conn, bucket) ->
return True
Left e -> s3Warning e
-remove :: Remote -> Key -> Annex Bool
-remove r k = s3Action r False $ \(conn, bucket) -> do
- res <- liftIO $ deleteObject conn $ bucketKey r bucket k
- s3Bool res
+{- Internet Archive doesn't easily allow removing content.
+ - While it may remove the file, there are generally other files
+ - derived from it that it does not remove. -}
+remove :: Remote -> RemoteConfig -> Key -> Annex Bool
+remove r c k
+ | isIA c = do
+ warning "Cannot remove content from the Internet Archive"
+ return False
+ | otherwise = remove' r k
+
+remove' :: Remote -> Key -> Annex Bool
+remove' r k = s3Action r False $ \(conn, bucket) ->
+ s3Bool =<< liftIO (deleteObject conn $ bucketKey r bucket k)
checkPresent :: Remote -> Key -> Annex (Either String Bool)
checkPresent r k = s3Action r noconn $ \(conn, bucket) -> do
@@ -205,7 +217,7 @@ s3Bool :: AWSResult () -> Annex Bool
s3Bool (Right _) = return True
s3Bool (Left e) = s3Warning e
-s3Action :: Remote -> a -> ((AWSConnection, String) -> Annex a) -> Annex a
+s3Action :: Remote -> a -> ((AWSConnection, Bucket) -> Annex a) -> Annex a
s3Action r noconn action = do
let bucket = M.lookup "bucket" $ config r
conn <- s3Connection (config r) (uuid r)
@@ -222,7 +234,7 @@ bucketFile r = munge . key2file
fileprefix = M.findWithDefault "" "fileprefix" c
c = config r
-bucketKey :: Remote -> String -> Key -> S3Object
+bucketKey :: Remote -> Bucket -> Key -> S3Object
bucketKey r bucket k = S3Object bucket (bucketFile r k) "" [] L.empty
{- Internet Archive limits filenames to a subset of ascii,
@@ -270,3 +282,21 @@ s3Connection c u = go =<< getRemoteCredPairFor "S3" c (AWS.creds u)
case reads s of
[(p, _)] -> p
_ -> error $ "bad S3 port value: " ++ s
+
+{- Hostname to use for archive.org S3. -}
+iaHost :: HostName
+iaHost = "s3.us.archive.org"
+
+isIA :: RemoteConfig -> Bool
+isIA c = maybe False isIAHost (M.lookup "host" c)
+
+isIAHost :: HostName -> Bool
+isIAHost h = ".archive.org" `isSuffixOf` map toLower h
+
+iaItemUrl :: Bucket -> URLString
+iaItemUrl bucket = "http://archive.org/details/" ++ bucket
+
+iaKeyUrl :: Remote -> Key -> URLString
+iaKeyUrl r k = "http://archive.org/download/" ++ bucket ++ "/" ++ bucketFile r k
+ where
+ bucket = fromJust $ M.lookup "bucket" $ config r
diff --git a/Types/StandardGroups.hs b/Types/StandardGroups.hs
index 417d6bec1..e7764d387 100644
--- a/Types/StandardGroups.hs
+++ b/Types/StandardGroups.hs
@@ -7,6 +7,11 @@
module Types.StandardGroups where
+import Types.Remote (RemoteConfig)
+
+import qualified Data.Map as M
+import Data.Maybe
+
data StandardGroup
= ClientGroup
| TransferGroup
@@ -16,6 +21,7 @@ data StandardGroup
| FullArchiveGroup
| SourceGroup
| ManualGroup
+ | PublicGroup
| UnwantedGroup
deriving (Eq, Ord, Enum, Bounded, Show)
@@ -28,6 +34,7 @@ fromStandardGroup SmallArchiveGroup = "smallarchive"
fromStandardGroup FullArchiveGroup = "archive"
fromStandardGroup SourceGroup = "source"
fromStandardGroup ManualGroup = "manual"
+fromStandardGroup PublicGroup = "public"
fromStandardGroup UnwantedGroup = "unwanted"
toStandardGroup :: String -> Maybe StandardGroup
@@ -39,19 +46,29 @@ toStandardGroup "smallarchive" = Just SmallArchiveGroup
toStandardGroup "archive" = Just FullArchiveGroup
toStandardGroup "source" = Just SourceGroup
toStandardGroup "manual" = Just ManualGroup
+toStandardGroup "public" = Just PublicGroup
toStandardGroup "unwanted" = Just UnwantedGroup
toStandardGroup _ = Nothing
-descStandardGroup :: StandardGroup -> String
-descStandardGroup ClientGroup = "client: a repository on your computer"
-descStandardGroup TransferGroup = "transfer: distributes files to clients"
-descStandardGroup BackupGroup = "full backup: backs up all files"
-descStandardGroup IncrementalBackupGroup = "incremental backup: backs up files not backed up elsewhere"
-descStandardGroup SmallArchiveGroup = "small archive: archives files located in \"archive\" directories"
-descStandardGroup FullArchiveGroup = "full archive: archives all files not archived elsewhere"
-descStandardGroup SourceGroup = "file source: moves files on to other repositories"
-descStandardGroup ManualGroup = "manual mode: only stores files you manually choose"
-descStandardGroup UnwantedGroup = "unwanted: remove content from this repository"
+descStandardGroup :: Maybe RemoteConfig -> StandardGroup -> String
+descStandardGroup _ ClientGroup = "client: a repository on your computer"
+descStandardGroup _ TransferGroup = "transfer: distributes files to clients"
+descStandardGroup _ BackupGroup = "full backup: backs up all files"
+descStandardGroup _ IncrementalBackupGroup = "incremental backup: backs up files not backed up elsewhere"
+descStandardGroup _ SmallArchiveGroup = "small archive: archives files located in \"archive\" directories"
+descStandardGroup _ FullArchiveGroup = "full archive: archives all files not archived elsewhere"
+descStandardGroup _ SourceGroup = "file source: moves files on to other repositories"
+descStandardGroup _ ManualGroup = "manual mode: only stores files you manually choose"
+descStandardGroup _ UnwantedGroup = "unwanted: remove content from this repository"
+descStandardGroup c PublicGroup = "public: only stores files located in \"" ++ fromJust (specialDirectory c PublicGroup) ++ "\" directories"
+
+specialDirectory :: Maybe RemoteConfig -> StandardGroup -> Maybe FilePath
+specialDirectory _ SmallArchiveGroup = Just "archive"
+specialDirectory _ FullArchiveGroup = Just "archive"
+specialDirectory (Just c) PublicGroup = Just $
+ fromMaybe "public" $ M.lookup "preferreddir" c
+specialDirectory Nothing PublicGroup = Just "public"
+specialDirectory _ _ = Nothing
{- See doc/preferred_content.mdwn for explanations of these expressions. -}
preferredContent :: StandardGroup -> String
@@ -67,6 +84,7 @@ preferredContent SmallArchiveGroup = lastResort $
preferredContent FullArchiveGroup = lastResort notArchived
preferredContent SourceGroup = "not (copies=1)"
preferredContent ManualGroup = "present and (" ++ preferredContent ClientGroup ++ ")"
+preferredContent PublicGroup = "inpreferreddir"
preferredContent UnwantedGroup = "exclude=*"
notArchived :: String
diff --git a/Utility/TList.hs b/Utility/TList.hs
index 33a50b7dd..716f72017 100644
--- a/Utility/TList.hs
+++ b/Utility/TList.hs
@@ -25,10 +25,14 @@ newTList = newEmptyTMVar
getTList :: TList a -> STM [a]
getTList tlist = D.toList <$> takeTMVar tlist
-{- Gets anything currently in the TList, without blocking.
+{- Takes anything currently in the TList, without blocking.
- TList is left empty. -}
+takeTList :: TList a -> STM [a]
+takeTList tlist = maybe [] D.toList <$> tryTakeTMVar tlist
+
+{- Reads anything in the list, without modifying it, or blocking. -}
readTList :: TList a -> STM [a]
-readTList tlist = maybe [] D.toList <$> tryTakeTMVar tlist
+readTList tlist = maybe [] D.toList <$> tryReadTMVar tlist
{- Mutates a TList. -}
modifyTList :: TList a -> (D.DList a -> D.DList a) -> STM ()
@@ -50,3 +54,6 @@ snocTList tlist v = modifyTList tlist $ \dl -> D.snoc dl v
appendTList :: TList a -> [a] -> STM ()
appendTList tlist l = modifyTList tlist $ \dl -> D.append dl (D.fromList l)
+
+setTList :: TList a -> [a] -> STM ()
+setTList tlist l = modifyTList tlist $ const $ D.fromList l
diff --git a/debian/changelog b/debian/changelog
index 1e6411e5b..4dde2760b 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -31,6 +31,15 @@ git-annex (4.20130418) UNRELEASED; urgency=low
* assistant: Sanitize XMPP presence information logged for debugging.
* initremote: If two existing remotes have the same name,
prefer the one with a higher trust level.
+ * Add public repository group.
+ (And inpreferreddir to preferred content expressions.)
+ * webapp: Can now set up Internet Archive repositories.
+ * S3: Dropping content from the Internet Archive doesn't work, but
+ their API indicates it does. Always refuse to drop from there.
+ * webapp: Display some additional information about a repository on
+ its edit page.
+ * Automatically register public urls for files uploaded to the
+ Internet Archive.
-- Joey Hess <joeyh@debian.org> Thu, 18 Apr 2013 16:22:48 -0400
diff --git a/doc/assistant/iaitem.png b/doc/assistant/iaitem.png
new file mode 100644
index 000000000..2fcc1b4c8
--- /dev/null
+++ b/doc/assistant/iaitem.png
Binary files differ
diff --git a/doc/design/assistant/blog/day_248__Internet_Archive.mdwn b/doc/design/assistant/blog/day_248__Internet_Archive.mdwn
new file mode 100644
index 000000000..f002939f1
--- /dev/null
+++ b/doc/design/assistant/blog/day_248__Internet_Archive.mdwn
@@ -0,0 +1,28 @@
+Very productive & long day today, spent adding a new feature to the
+webapp: Internet Archive support!
+
+[[!img /assistant/iaitem.png]]
+
+git-annex already supported using archive.org via its S3 special remotes,
+so this is just a nice UI around that.
+
+How does it decide which files to publish on archive.org? Well,
+the item has a unique name, which is based on the description
+field. Any files located in a directory with that name will be uploaded
+to that item. (This is done via a new preferred content expression I added.)
+
+So, you can have one repository with multiple IA items attached, and
+sort files between them however you like.
+I plan to make a screencast eventually demoing that.
+
+Another interesting use case, once the Android webapp is done, would be add
+a repository on the DCIM directory, set the archive.org repository to
+prefer all content, and *bam*, you have a phone or tablet that
+auto-publishes and archives every picture it takes.
+
+Another nice little feature added today is that whenever a file is uploaded
+to the Internet Archive, its public url is automatically recorded, same
+as if you'd ran `git annex addurl`. So any users who can clone your
+repository can download the files from archive.org, without needing any
+login or password info. This makes the Internet Archive a nice way to
+publish the large files associated with a public git repository.
diff --git a/doc/preferred_content.mdwn b/doc/preferred_content.mdwn
index 1bcdfdf07..23081fc30 100644
--- a/doc/preferred_content.mdwn
+++ b/doc/preferred_content.mdwn
@@ -72,6 +72,18 @@ Note that `not present` is a very bad thing to put in a preferred content
expression. It'll make it prefer to get content that's not present, and
drop content that is present! Don't go there..
+### difference: "inpreferreddir"
+
+There's a special "inpreferreddir" keyword you can use in a
+preferred content expression of a special remote. This means that the
+content is preferred if it's in a directory (located anywhere in the tree)
+with a special name.
+
+The name of the directory can be configured using
+`git annex initremote $remote preferreddir=$dirname`
+
+(If no directory name is configured, it uses "public" by default.)
+
## standard expressions
git-annex comes with some standard preferred content expressions, that can
@@ -166,6 +178,18 @@ reached an archive repository.
`present and ($client)`
+### public
+
+This is used for publishing information to a repository that can be
+publically accessed. Only files in a directory with a particular name
+will be published. (The directory can be located anywhere in the
+repository.)
+
+The name of the directory can be configured using
+`git annex initremote $remote preferreddir=$dirname`
+
+`inpreferreddir`
+
### unwanted
Use for repositories that you don't want to exist. This will result