diff options
-rw-r--r-- | Annex/FileMatcher.hs | 26 | ||||
-rw-r--r-- | Assistant/Changes.hs | 2 | ||||
-rw-r--r-- | Assistant/Threads/Committer.hs | 93 | ||||
-rw-r--r-- | Assistant/Threads/Pusher.hs | 1 | ||||
-rw-r--r-- | Assistant/Threads/WebApp.hs | 1 | ||||
-rw-r--r-- | Assistant/TransferQueue.hs | 29 | ||||
-rw-r--r-- | Assistant/Types/TransferQueue.hs | 9 | ||||
-rw-r--r-- | Limit.hs | 11 | ||||
-rw-r--r-- | Logs/PreferredContent.hs | 29 | ||||
-rw-r--r-- | Remote/S3.hs | 76 | ||||
-rw-r--r-- | Types/StandardGroups.hs | 38 | ||||
-rw-r--r-- | Utility/TList.hs | 11 | ||||
-rw-r--r-- | debian/changelog | 9 | ||||
-rw-r--r-- | doc/assistant/iaitem.png | bin | 0 -> 34868 bytes | |||
-rw-r--r-- | doc/design/assistant/blog/day_248__Internet_Archive.mdwn | 28 | ||||
-rw-r--r-- | doc/preferred_content.mdwn | 24 |
16 files changed, 288 insertions, 99 deletions
diff --git a/Annex/FileMatcher.hs b/Annex/FileMatcher.hs index 220fea286..cbf6f873b 100644 --- a/Annex/FileMatcher.hs +++ b/Annex/FileMatcher.hs @@ -14,9 +14,11 @@ import Limit import Utility.Matcher import Types.Group import Logs.Group +import Logs.Remote import Annex.UUID import qualified Annex import Git.FilePath +import Types.Remote (RemoteConfig) import Data.Either import qualified Data.Set as S @@ -45,10 +47,22 @@ parsedToMatcher parsed = case partitionEithers parsed of ([], vs) -> Right $ generate vs (es, _) -> Left $ unwords $ map ("Parse failure: " ++) es -parseToken :: MkLimit -> GroupMap -> String -> Either String (Token MatchFiles) -parseToken checkpresent groupmap t +exprParser :: GroupMap -> M.Map UUID RemoteConfig -> Maybe UUID -> String -> [Either String (Token MatchFiles)] +exprParser groupmap configmap mu expr = + map parse $ tokenizeMatcher expr + where + parse = parseToken + (limitPresent mu) + (limitInDir preferreddir) + groupmap + preferreddir = fromMaybe "public" $ + M.lookup "preferreddir" =<< (`M.lookup` configmap) =<< mu + +parseToken :: MkLimit -> MkLimit -> GroupMap -> String -> Either String (Token MatchFiles) +parseToken checkpresent checkpreferreddir groupmap t | t `elem` tokens = Right $ token t | t == "present" = use checkpresent + | t == "inpreferreddir" = use checkpreferreddir | otherwise = maybe (Left $ "near " ++ show t) use $ M.lookup k $ M.fromList [ ("include", limitInclude) @@ -78,9 +92,9 @@ largeFilesMatcher = go =<< annexLargeFiles <$> Annex.getGitConfig where go Nothing = return matchAll go (Just expr) = do - m <- groupMap + gm <- groupMap + rc <- readRemoteLog u <- getUUID - either badexpr return $ parsedToMatcher $ - map (parseToken (limitPresent $ Just u) m) - (tokenizeMatcher expr) + either badexpr return $ + parsedToMatcher $ exprParser gm rc (Just u) expr badexpr e = error $ "bad annex.largefiles configuration: " ++ e diff --git a/Assistant/Changes.hs b/Assistant/Changes.hs index 9daef511b..2ecd2036c 100644 --- a/Assistant/Changes.hs +++ b/Assistant/Changes.hs @@ -32,7 +32,7 @@ getChanges = (atomically . getTList) <<~ changePool {- Gets all unhandled changes, without blocking. -} getAnyChanges :: Assistant [Change] -getAnyChanges = (atomically . readTList) <<~ changePool +getAnyChanges = (atomically . takeTList) <<~ changePool {- Puts unhandled changes back into the pool. - Note: Original order is not preserved. -} diff --git a/Assistant/Threads/Committer.hs b/Assistant/Threads/Committer.hs index c07109489..1064f371a 100644 --- a/Assistant/Threads/Committer.hs +++ b/Assistant/Threads/Committer.hs @@ -52,7 +52,7 @@ commitThread = namedThread "Committer" $ do =<< annexDelayAdd <$> Annex.getGitConfig waitChangeTime $ \(changes, time) -> do readychanges <- handleAdds delayadd changes - if shouldCommit time readychanges + if shouldCommit time (length readychanges) readychanges then do debug [ "committing" @@ -62,8 +62,12 @@ commitThread = namedThread "Committer" $ do void $ alertWhile commitAlert $ liftAnnex commitStaged recordCommit + let numchanges = length readychanges mapM_ checkChangeContent readychanges - else refill readychanges + return numchanges + else do + refill readychanges + return 0 refill :: [Change] -> Assistant () refill [] = noop @@ -72,21 +76,33 @@ refill cs = do refillChanges cs {- Wait for one or more changes to arrive to be committed. -} -waitChangeTime :: (([Change], UTCTime) -> Assistant ()) -> Assistant () -waitChangeTime a = runEvery (Seconds 1) <~> do - -- We already waited one second as a simple rate limiter. - -- Next, wait until at least one change is available for - -- processing. - changes <- getChanges - -- See if now's a good time to commit. - now <- liftIO getCurrentTime - case (shouldCommit now changes, possiblyrename changes) of - (True, False) -> a (changes, now) - (True, True) -> do - morechanges <- getrelatedchanges changes - a (changes ++ morechanges, now) - _ -> refill changes +waitChangeTime :: (([Change], UTCTime) -> Assistant Int) -> Assistant () +waitChangeTime a = go [] 0 where + go unhandled lastcommitsize = do + -- Wait one one second as a simple rate limiter. + liftIO $ threadDelaySeconds (Seconds 1) + -- Now, wait until at least one change is available for + -- processing. + cs <- getChanges + let changes = unhandled ++ cs + let len = length changes + -- See if now's a good time to commit. + now <- liftIO getCurrentTime + case (lastcommitsize >= maxCommitSize, shouldCommit now len changes, possiblyrename changes) of + (True, True, _) + | len > maxCommitSize -> + go [] =<< a (changes, now) + | otherwise -> aftermaxcommit changes + (_, True, False) -> + go [] =<< a (changes, now) + (_, True, True) -> do + morechanges <- getrelatedchanges changes + go [] =<< a (changes ++ morechanges, now) + _ -> do + refill changes + go [] lastcommitsize + {- Did we perhaps only get one of the AddChange and RmChange pair - that make up a file rename? Or some of the pairs that make up - a directory rename? @@ -116,6 +132,41 @@ waitChangeTime a = runEvery (Seconds 1) <~> do then return cs else getbatchchanges (cs':cs) + {- The last commit was maximum size, so it's very likely there + - are more changes and we'd like to ensure we make another commit + - of maximum size if possible. + - + - But, it can take a while for the Watcher to wake back up + - after a commit. It can get blocked by another thread + - that is using the Annex state, such as a git-annex branch + - commit. Especially after such a large commit, this can + - take several seconds. When this happens, it defeats the + - normal commit batching, which sees some old changes the + - Watcher found while the commit was being prepared, and sees + - no recent ones, and wants to commit immediately. + - + - All that we need to do, then, is wait for the Watcher to + - wake up, and queue up one more change. + - + - However, it's also possible that we're at the end of changes for + - now. So to avoid waiting a really long time before committing + - those changes we have, poll for up to 30 seconds, and then + - commit them. + - + - Also, try to run something in Annex, to ensure we block + - longer if the Annex state is indeed blocked. + -} + aftermaxcommit oldchanges = loop (30 :: Int) + where + loop 0 = go oldchanges 0 + loop n = do + liftAnnex noop -- ensure Annex state is free + liftIO $ threadDelaySeconds (Seconds 1) + changes <- getAnyChanges + if null changes + then loop (n - 1) + else go (oldchanges ++ changes) 0 + isRmChange :: Change -> Bool isRmChange (Change { changeInfo = i }) | i == RmChange = True isRmChange _ = False @@ -131,20 +182,22 @@ humanImperceptibleDelay :: IO () humanImperceptibleDelay = threadDelay $ truncate $ humanImperceptibleInterval * fromIntegral oneSecond +maxCommitSize :: Int +maxCommitSize = 5000 + {- Decide if now is a good time to make a commit. - Note that the list of changes has an undefined order. - - Current strategy: If there have been 10 changes within the past second, - a batch activity is taking place, so wait for later. -} -shouldCommit :: UTCTime -> [Change] -> Bool -shouldCommit now changes +shouldCommit :: UTCTime -> Int -> [Change] -> Bool +shouldCommit now len changes | len == 0 = False - | len > 5000 = True -- avoid bloating change pool too much + | len >= maxCommitSize = True | length recentchanges < 10 = True | otherwise = False -- batch activity where - len = length changes thissecond c = timeDelta c <= 1 recentchanges = filter thissecond changes timeDelta c = now `diffUTCTime` changeTime c diff --git a/Assistant/Threads/Pusher.hs b/Assistant/Threads/Pusher.hs index 57595b8c1..060f26cf5 100644 --- a/Assistant/Threads/Pusher.hs +++ b/Assistant/Threads/Pusher.hs @@ -9,7 +9,6 @@ module Assistant.Threads.Pusher where import Assistant.Common import Assistant.Commits -import Assistant.Types.Commits import Assistant.Pushes import Assistant.DaemonStatus import Assistant.Sync diff --git a/Assistant/Threads/WebApp.hs b/Assistant/Threads/WebApp.hs index caef570c4..f1ea24742 100644 --- a/Assistant/Threads/WebApp.hs +++ b/Assistant/Threads/WebApp.hs @@ -22,6 +22,7 @@ import Assistant.WebApp.Configurators.Local import Assistant.WebApp.Configurators.Ssh import Assistant.WebApp.Configurators.Pairing import Assistant.WebApp.Configurators.AWS +import Assistant.WebApp.Configurators.IA import Assistant.WebApp.Configurators.WebDAV import Assistant.WebApp.Configurators.XMPP import Assistant.WebApp.Configurators.Preferences diff --git a/Assistant/TransferQueue.hs b/Assistant/TransferQueue.hs index ac9ed3216..f94e73c2b 100644 --- a/Assistant/TransferQueue.hs +++ b/Assistant/TransferQueue.hs @@ -29,6 +29,7 @@ import Types.Remote import qualified Remote import qualified Types.Remote as Remote import Annex.Wanted +import Utility.TList import Control.Concurrent.STM import qualified Data.Map as M @@ -38,7 +39,7 @@ type Reason = String {- Reads the queue's content without blocking or changing it. -} getTransferQueue :: Assistant [(Transfer, TransferInfo)] -getTransferQueue = (atomically . readTVar . queuelist) <<~ transferQueue +getTransferQueue = (atomically . readTList . queuelist) <<~ transferQueue stubInfo :: AssociatedFile -> Remote -> TransferInfo stubInfo f r = stubTransferInfo @@ -94,8 +95,7 @@ queueTransfersMatching matching reason schedule k f direction | direction == Download = do q <- getAssistant transferQueue void $ liftIO $ atomically $ - modifyTVar' (deferreddownloads q) $ - \l -> (k, f):l + consTList (deferreddownloads q) (k, f) | otherwise = noop {- Queues any deferred downloads that can now be accomplished, leaving @@ -103,12 +103,11 @@ queueTransfersMatching matching reason schedule k f direction queueDeferredDownloads :: Reason -> Schedule -> Assistant () queueDeferredDownloads reason schedule = do q <- getAssistant transferQueue - l <- liftIO $ atomically $ swapTVar (deferreddownloads q) [] + l <- liftIO $ atomically $ readTList (deferreddownloads q) rs <- syncDataRemotes <$> getDaemonStatus left <- filterM (queue rs) l unless (null left) $ - liftIO $ atomically $ modifyTVar' (deferreddownloads q) $ - \new -> new ++ left + liftIO $ atomically $ appendTList (deferreddownloads q) left where queue rs (k, f) = do uuids <- liftAnnex $ Remote.keyLocations k @@ -127,10 +126,9 @@ queueDeferredDownloads reason schedule = do enqueue :: Reason -> Schedule -> Transfer -> TransferInfo -> Assistant () enqueue reason schedule t info - | schedule == Next = go (new:) - | otherwise = go (\l -> l++[new]) + | schedule == Next = go consTList + | otherwise = go snocTList where - new = (t, info) go modlist = whenM (add modlist) $ do debug [ "queued", describeTransfer t info, ": " ++ reason ] notifyTransfer @@ -140,11 +138,11 @@ enqueue reason schedule t info liftIO $ atomically $ ifM (checkRunningTransferSTM dstatus t) ( return False , do - l <- readTVar (queuelist q) + l <- readTList (queuelist q) if (t `notElem` map fst l) then do void $ modifyTVar' (queuesize q) succ - void $ modifyTVar' (queuelist q) modlist + void $ modlist (queuelist q) (t, info) return True else return False ) @@ -185,9 +183,9 @@ getNextTransfer acceptable = do if sz < 1 then retry -- blocks until queuesize changes else do - (r@(t,info):rest) <- readTVar (queuelist q) - writeTVar (queuelist q) rest + (r@(t,info):rest) <- readTList (queuelist q) void $ modifyTVar' (queuesize q) pred + setTList (queuelist q) rest if acceptable info then do adjustTransfersSTM dstatus $ @@ -219,8 +217,7 @@ dequeueTransfers c = do dequeueTransfersSTM :: TransferQueue -> (Transfer -> Bool) -> STM [(Transfer, TransferInfo)] dequeueTransfersSTM q c = do - (removed, ts) <- partition (c . fst) - <$> readTVar (queuelist q) + (removed, ts) <- partition (c . fst) <$> readTList (queuelist q) void $ writeTVar (queuesize q) (length ts) - void $ writeTVar (queuelist q) ts + setTList (queuelist q) ts return removed diff --git a/Assistant/Types/TransferQueue.hs b/Assistant/Types/TransferQueue.hs index 6620ebdf6..e4e305d5a 100644 --- a/Assistant/Types/TransferQueue.hs +++ b/Assistant/Types/TransferQueue.hs @@ -12,11 +12,12 @@ import Logs.Transfer import Types.Remote import Control.Concurrent.STM +import Utility.TList data TransferQueue = TransferQueue { queuesize :: TVar Int - , queuelist :: TVar [(Transfer, TransferInfo)] - , deferreddownloads :: TVar [(Key, AssociatedFile)] + , queuelist :: TList (Transfer, TransferInfo) + , deferreddownloads :: TList (Key, AssociatedFile) } data Schedule = Next | Later @@ -25,5 +26,5 @@ data Schedule = Next | Later newTransferQueue :: IO TransferQueue newTransferQueue = atomically $ TransferQueue <$> newTVar 0 - <*> newTVar [] - <*> newTVar [] + <*> newTList + <*> newTList @@ -1,6 +1,6 @@ {- user-specified limits on files to act on - - - Copyright 2011,2012 Joey Hess <joey@kitenet.net> + - Copyright 2011-2013 Joey Hess <joey@kitenet.net> - - Licensed under the GNU GPL version 3 or higher. -} @@ -88,9 +88,9 @@ limitExclude glob = Right $ const $ return . not . matchglob glob - once. Also, we use regex-TDFA because it's less buggy in its support - of non-unicode characters. -} matchglob :: String -> Annex.FileInfo -> Bool -matchglob glob (Annex.FileInfo { Annex.matchFile = f }) = +matchglob glob fi = case cregex of - Right r -> case execute r f of + Right r -> case execute r (Annex.matchFile fi) of Right (Just _) -> True _ -> False Left _ -> error $ "failed to compile regex: " ++ regex @@ -138,6 +138,11 @@ limitPresent u _ = Right $ const $ check $ \key -> do handle _ Nothing = return False handle a (Just (key, _)) = a key +{- Limit to content that is in a directory, anywhere in the repository tree -} +limitInDir :: FilePath -> MkLimit +limitInDir dir = const $ Right $ const $ \fi -> return $ + any (== dir) $ splitPath $ takeDirectory $ Annex.matchFile fi + {- Adds a limit to skip files not believed to have the specified number - of copies. -} addCopies :: String -> Annex () diff --git a/Logs/PreferredContent.hs b/Logs/PreferredContent.hs index d980cd373..8005fc0d3 100644 --- a/Logs/PreferredContent.hs +++ b/Logs/PreferredContent.hs @@ -30,7 +30,9 @@ import qualified Utility.Matcher import Annex.FileMatcher import Annex.UUID import Types.Group +import Types.Remote (RemoteConfig) import Logs.Group +import Logs.Remote import Types.StandardGroups {- Filename of preferred-content.log. -} @@ -65,8 +67,9 @@ preferredContentMap = maybe preferredContentMapLoad return preferredContentMapLoad :: Annex Annex.PreferredContentMap preferredContentMapLoad = do groupmap <- groupMap + configmap <- readRemoteLog m <- simpleMap - . parseLogWithUUID ((Just .) . makeMatcher groupmap) + . parseLogWithUUID ((Just .) . makeMatcher groupmap configmap) <$> Annex.Branch.get preferredContentLog Annex.changeState $ \s -> s { Annex.preferredcontentmap = Just m } return m @@ -79,30 +82,30 @@ preferredContentMapRaw = simpleMap . parseLog Just - because the configuration is shared amoung repositories and newer - versions of git-annex may add new features. Instead, parse errors - result in a Matcher that will always succeed. -} -makeMatcher :: GroupMap -> UUID -> String -> FileMatcher -makeMatcher groupmap u s - | s == "standard" = standardMatcher groupmap u +makeMatcher :: GroupMap -> M.Map UUID RemoteConfig -> UUID -> String -> FileMatcher +makeMatcher groupmap configmap u expr + | expr == "standard" = standardMatcher groupmap configmap u | null (lefts tokens) = Utility.Matcher.generate $ rights tokens | otherwise = matchAll where - tokens = map (parseToken (limitPresent $ Just u) groupmap) (tokenizeMatcher s) + tokens = exprParser groupmap configmap (Just u) expr {- Standard matchers are pre-defined for some groups. If none is defined, - or a repository is in multiple groups with standard matchers, match all. -} -standardMatcher :: GroupMap -> UUID -> FileMatcher -standardMatcher m u = maybe matchAll (makeMatcher m u . preferredContent) $ - getStandardGroup =<< u `M.lookup` groupsByUUID m +standardMatcher :: GroupMap -> M.Map UUID RemoteConfig -> UUID -> FileMatcher +standardMatcher groupmap configmap u = + maybe matchAll (makeMatcher groupmap configmap u . preferredContent) $ + getStandardGroup =<< u `M.lookup` groupsByUUID groupmap {- Checks if an expression can be parsed, if not returns Just error -} checkPreferredContentExpression :: String -> Maybe String -checkPreferredContentExpression s - | s == "standard" = Nothing - | otherwise = case parsedToMatcher vs of +checkPreferredContentExpression expr + | expr == "standard" = Nothing + | otherwise = case parsedToMatcher tokens of Left e -> Just e Right _ -> Nothing where - vs = map (parseToken (limitPresent Nothing) emptyGroupMap) - (tokenizeMatcher s) + tokens = exprParser emptyGroupMap M.empty Nothing expr {- Puts a UUID in a standard group, and sets its preferred content to use - the standard expression for that group, unless something is already set. -} diff --git a/Remote/S3.hs b/Remote/S3.hs index 00b5b5dc6..2772833fe 100644 --- a/Remote/S3.hs +++ b/Remote/S3.hs @@ -1,11 +1,11 @@ -{- Amazon S3 remotes. +{- S3 remotes - - - Copyright 2011 Joey Hess <joey@kitenet.net> + - Copyright 2011-2013 Joey Hess <joey@kitenet.net> - - Licensed under the GNU GPL version 3 or higher. -} -module Remote.S3 (remote) where +module Remote.S3 (remote, iaHost, isIA, isIAHost, iaItemUrl) where import Network.AWS.AWSConnection import Network.AWS.S3Object @@ -15,6 +15,7 @@ import qualified Data.Text as T import qualified Data.ByteString.Lazy.Char8 as L import qualified Data.Map as M import Data.Char +import Network.Socket (HostName) import Common.Annex import Types.Remote @@ -29,6 +30,9 @@ import Crypto import Creds import Utility.Metered import Annex.Content +import Logs.Web + +type Bucket = String remote :: RemoteType remote = RemoteType { @@ -53,7 +57,7 @@ gen r u c gc = new <$> remoteCost gc expensiveRemoteCost storeKey = store this, retrieveKeyFile = retrieve this, retrieveKeyFileCheap = retrieveCheap this, - removeKey = remove this, + removeKey = remove this c, hasKey = checkPresent this, hasKeyCheap = False, whereisKey = Nothing, @@ -67,7 +71,7 @@ gen r u c gc = new <$> remoteCost gc expensiveRemoteCost } s3Setup :: UUID -> RemoteConfig -> Annex RemoteConfig -s3Setup u c = handlehost $ M.lookup "host" c +s3Setup u c = if isIA c then archiveorg else defaulthost where remotename = fromJust (M.lookup "name" c) defbucket = remotename ++ "-" ++ fromUUID u @@ -79,11 +83,6 @@ s3Setup u c = handlehost $ M.lookup "host" c , ("bucket", defbucket) ] - handlehost Nothing = defaulthost - handlehost (Just h) - | ".archive.org" `isSuffixOf` map toLower h = archiveorg - | otherwise = defaulthost - use fullconfig = do gitConfigSpecialRemote u fullconfig "s3" "true" setRemoteCredPair fullconfig (AWS.creds u) @@ -115,21 +114,25 @@ s3Setup u c = handlehost $ M.lookup "host" c store :: Remote -> Key -> AssociatedFile -> MeterUpdate -> Annex Bool store r k _f p = s3Action r False $ \(conn, bucket) -> - sendAnnex k (void $ remove r k) $ \src -> do - res <- storeHelper (conn, bucket) r k p src - s3Bool res + sendAnnex k (void $ remove' r k) $ \src -> do + ok <- s3Bool =<< storeHelper (conn, bucket) r k p src + + -- Store public URL to item in Internet Archive. + when (ok && isIA (config r)) $ + setUrlPresent k (iaKeyUrl r k) + + return ok storeEncrypted :: Remote -> (Cipher, Key) -> Key -> MeterUpdate -> Annex Bool storeEncrypted r (cipher, enck) k p = s3Action r False $ \(conn, bucket) -> -- To get file size of the encrypted content, have to use a temp file. -- (An alternative would be chunking to to a constant size.) - withTmp enck $ \tmp -> sendAnnex k (void $ remove r enck) $ \src -> do + withTmp enck $ \tmp -> sendAnnex k (void $ remove' r enck) $ \src -> do liftIO $ encrypt (getGpgOpts r) cipher (feedFile src) $ readBytes $ L.writeFile tmp - res <- storeHelper (conn, bucket) r enck p tmp - s3Bool res + s3Bool =<< storeHelper (conn, bucket) r enck p tmp -storeHelper :: (AWSConnection, String) -> Remote -> Key -> MeterUpdate -> FilePath -> Annex (AWSResult ()) +storeHelper :: (AWSConnection, Bucket) -> Remote -> Key -> MeterUpdate -> FilePath -> Annex (AWSResult ()) storeHelper (conn, bucket) r k p file = do size <- maybe getsize (return . fromIntegral) $ keySize k meteredBytes (Just p) size $ \meterupdate -> @@ -177,10 +180,19 @@ retrieveEncrypted r (cipher, enck) k d p = s3Action r False $ \(conn, bucket) -> return True Left e -> s3Warning e -remove :: Remote -> Key -> Annex Bool -remove r k = s3Action r False $ \(conn, bucket) -> do - res <- liftIO $ deleteObject conn $ bucketKey r bucket k - s3Bool res +{- Internet Archive doesn't easily allow removing content. + - While it may remove the file, there are generally other files + - derived from it that it does not remove. -} +remove :: Remote -> RemoteConfig -> Key -> Annex Bool +remove r c k + | isIA c = do + warning "Cannot remove content from the Internet Archive" + return False + | otherwise = remove' r k + +remove' :: Remote -> Key -> Annex Bool +remove' r k = s3Action r False $ \(conn, bucket) -> + s3Bool =<< liftIO (deleteObject conn $ bucketKey r bucket k) checkPresent :: Remote -> Key -> Annex (Either String Bool) checkPresent r k = s3Action r noconn $ \(conn, bucket) -> do @@ -205,7 +217,7 @@ s3Bool :: AWSResult () -> Annex Bool s3Bool (Right _) = return True s3Bool (Left e) = s3Warning e -s3Action :: Remote -> a -> ((AWSConnection, String) -> Annex a) -> Annex a +s3Action :: Remote -> a -> ((AWSConnection, Bucket) -> Annex a) -> Annex a s3Action r noconn action = do let bucket = M.lookup "bucket" $ config r conn <- s3Connection (config r) (uuid r) @@ -222,7 +234,7 @@ bucketFile r = munge . key2file fileprefix = M.findWithDefault "" "fileprefix" c c = config r -bucketKey :: Remote -> String -> Key -> S3Object +bucketKey :: Remote -> Bucket -> Key -> S3Object bucketKey r bucket k = S3Object bucket (bucketFile r k) "" [] L.empty {- Internet Archive limits filenames to a subset of ascii, @@ -270,3 +282,21 @@ s3Connection c u = go =<< getRemoteCredPairFor "S3" c (AWS.creds u) case reads s of [(p, _)] -> p _ -> error $ "bad S3 port value: " ++ s + +{- Hostname to use for archive.org S3. -} +iaHost :: HostName +iaHost = "s3.us.archive.org" + +isIA :: RemoteConfig -> Bool +isIA c = maybe False isIAHost (M.lookup "host" c) + +isIAHost :: HostName -> Bool +isIAHost h = ".archive.org" `isSuffixOf` map toLower h + +iaItemUrl :: Bucket -> URLString +iaItemUrl bucket = "http://archive.org/details/" ++ bucket + +iaKeyUrl :: Remote -> Key -> URLString +iaKeyUrl r k = "http://archive.org/download/" ++ bucket ++ "/" ++ bucketFile r k + where + bucket = fromJust $ M.lookup "bucket" $ config r diff --git a/Types/StandardGroups.hs b/Types/StandardGroups.hs index 417d6bec1..e7764d387 100644 --- a/Types/StandardGroups.hs +++ b/Types/StandardGroups.hs @@ -7,6 +7,11 @@ module Types.StandardGroups where +import Types.Remote (RemoteConfig) + +import qualified Data.Map as M +import Data.Maybe + data StandardGroup = ClientGroup | TransferGroup @@ -16,6 +21,7 @@ data StandardGroup | FullArchiveGroup | SourceGroup | ManualGroup + | PublicGroup | UnwantedGroup deriving (Eq, Ord, Enum, Bounded, Show) @@ -28,6 +34,7 @@ fromStandardGroup SmallArchiveGroup = "smallarchive" fromStandardGroup FullArchiveGroup = "archive" fromStandardGroup SourceGroup = "source" fromStandardGroup ManualGroup = "manual" +fromStandardGroup PublicGroup = "public" fromStandardGroup UnwantedGroup = "unwanted" toStandardGroup :: String -> Maybe StandardGroup @@ -39,19 +46,29 @@ toStandardGroup "smallarchive" = Just SmallArchiveGroup toStandardGroup "archive" = Just FullArchiveGroup toStandardGroup "source" = Just SourceGroup toStandardGroup "manual" = Just ManualGroup +toStandardGroup "public" = Just PublicGroup toStandardGroup "unwanted" = Just UnwantedGroup toStandardGroup _ = Nothing -descStandardGroup :: StandardGroup -> String -descStandardGroup ClientGroup = "client: a repository on your computer" -descStandardGroup TransferGroup = "transfer: distributes files to clients" -descStandardGroup BackupGroup = "full backup: backs up all files" -descStandardGroup IncrementalBackupGroup = "incremental backup: backs up files not backed up elsewhere" -descStandardGroup SmallArchiveGroup = "small archive: archives files located in \"archive\" directories" -descStandardGroup FullArchiveGroup = "full archive: archives all files not archived elsewhere" -descStandardGroup SourceGroup = "file source: moves files on to other repositories" -descStandardGroup ManualGroup = "manual mode: only stores files you manually choose" -descStandardGroup UnwantedGroup = "unwanted: remove content from this repository" +descStandardGroup :: Maybe RemoteConfig -> StandardGroup -> String +descStandardGroup _ ClientGroup = "client: a repository on your computer" +descStandardGroup _ TransferGroup = "transfer: distributes files to clients" +descStandardGroup _ BackupGroup = "full backup: backs up all files" +descStandardGroup _ IncrementalBackupGroup = "incremental backup: backs up files not backed up elsewhere" +descStandardGroup _ SmallArchiveGroup = "small archive: archives files located in \"archive\" directories" +descStandardGroup _ FullArchiveGroup = "full archive: archives all files not archived elsewhere" +descStandardGroup _ SourceGroup = "file source: moves files on to other repositories" +descStandardGroup _ ManualGroup = "manual mode: only stores files you manually choose" +descStandardGroup _ UnwantedGroup = "unwanted: remove content from this repository" +descStandardGroup c PublicGroup = "public: only stores files located in \"" ++ fromJust (specialDirectory c PublicGroup) ++ "\" directories" + +specialDirectory :: Maybe RemoteConfig -> StandardGroup -> Maybe FilePath +specialDirectory _ SmallArchiveGroup = Just "archive" +specialDirectory _ FullArchiveGroup = Just "archive" +specialDirectory (Just c) PublicGroup = Just $ + fromMaybe "public" $ M.lookup "preferreddir" c +specialDirectory Nothing PublicGroup = Just "public" +specialDirectory _ _ = Nothing {- See doc/preferred_content.mdwn for explanations of these expressions. -} preferredContent :: StandardGroup -> String @@ -67,6 +84,7 @@ preferredContent SmallArchiveGroup = lastResort $ preferredContent FullArchiveGroup = lastResort notArchived preferredContent SourceGroup = "not (copies=1)" preferredContent ManualGroup = "present and (" ++ preferredContent ClientGroup ++ ")" +preferredContent PublicGroup = "inpreferreddir" preferredContent UnwantedGroup = "exclude=*" notArchived :: String diff --git a/Utility/TList.hs b/Utility/TList.hs index 33a50b7dd..716f72017 100644 --- a/Utility/TList.hs +++ b/Utility/TList.hs @@ -25,10 +25,14 @@ newTList = newEmptyTMVar getTList :: TList a -> STM [a] getTList tlist = D.toList <$> takeTMVar tlist -{- Gets anything currently in the TList, without blocking. +{- Takes anything currently in the TList, without blocking. - TList is left empty. -} +takeTList :: TList a -> STM [a] +takeTList tlist = maybe [] D.toList <$> tryTakeTMVar tlist + +{- Reads anything in the list, without modifying it, or blocking. -} readTList :: TList a -> STM [a] -readTList tlist = maybe [] D.toList <$> tryTakeTMVar tlist +readTList tlist = maybe [] D.toList <$> tryReadTMVar tlist {- Mutates a TList. -} modifyTList :: TList a -> (D.DList a -> D.DList a) -> STM () @@ -50,3 +54,6 @@ snocTList tlist v = modifyTList tlist $ \dl -> D.snoc dl v appendTList :: TList a -> [a] -> STM () appendTList tlist l = modifyTList tlist $ \dl -> D.append dl (D.fromList l) + +setTList :: TList a -> [a] -> STM () +setTList tlist l = modifyTList tlist $ const $ D.fromList l diff --git a/debian/changelog b/debian/changelog index 1e6411e5b..4dde2760b 100644 --- a/debian/changelog +++ b/debian/changelog @@ -31,6 +31,15 @@ git-annex (4.20130418) UNRELEASED; urgency=low * assistant: Sanitize XMPP presence information logged for debugging. * initremote: If two existing remotes have the same name, prefer the one with a higher trust level. + * Add public repository group. + (And inpreferreddir to preferred content expressions.) + * webapp: Can now set up Internet Archive repositories. + * S3: Dropping content from the Internet Archive doesn't work, but + their API indicates it does. Always refuse to drop from there. + * webapp: Display some additional information about a repository on + its edit page. + * Automatically register public urls for files uploaded to the + Internet Archive. -- Joey Hess <joeyh@debian.org> Thu, 18 Apr 2013 16:22:48 -0400 diff --git a/doc/assistant/iaitem.png b/doc/assistant/iaitem.png Binary files differnew file mode 100644 index 000000000..2fcc1b4c8 --- /dev/null +++ b/doc/assistant/iaitem.png diff --git a/doc/design/assistant/blog/day_248__Internet_Archive.mdwn b/doc/design/assistant/blog/day_248__Internet_Archive.mdwn new file mode 100644 index 000000000..f002939f1 --- /dev/null +++ b/doc/design/assistant/blog/day_248__Internet_Archive.mdwn @@ -0,0 +1,28 @@ +Very productive & long day today, spent adding a new feature to the +webapp: Internet Archive support! + +[[!img /assistant/iaitem.png]] + +git-annex already supported using archive.org via its S3 special remotes, +so this is just a nice UI around that. + +How does it decide which files to publish on archive.org? Well, +the item has a unique name, which is based on the description +field. Any files located in a directory with that name will be uploaded +to that item. (This is done via a new preferred content expression I added.) + +So, you can have one repository with multiple IA items attached, and +sort files between them however you like. +I plan to make a screencast eventually demoing that. + +Another interesting use case, once the Android webapp is done, would be add +a repository on the DCIM directory, set the archive.org repository to +prefer all content, and *bam*, you have a phone or tablet that +auto-publishes and archives every picture it takes. + +Another nice little feature added today is that whenever a file is uploaded +to the Internet Archive, its public url is automatically recorded, same +as if you'd ran `git annex addurl`. So any users who can clone your +repository can download the files from archive.org, without needing any +login or password info. This makes the Internet Archive a nice way to +publish the large files associated with a public git repository. diff --git a/doc/preferred_content.mdwn b/doc/preferred_content.mdwn index 1bcdfdf07..23081fc30 100644 --- a/doc/preferred_content.mdwn +++ b/doc/preferred_content.mdwn @@ -72,6 +72,18 @@ Note that `not present` is a very bad thing to put in a preferred content expression. It'll make it prefer to get content that's not present, and drop content that is present! Don't go there.. +### difference: "inpreferreddir" + +There's a special "inpreferreddir" keyword you can use in a +preferred content expression of a special remote. This means that the +content is preferred if it's in a directory (located anywhere in the tree) +with a special name. + +The name of the directory can be configured using +`git annex initremote $remote preferreddir=$dirname` + +(If no directory name is configured, it uses "public" by default.) + ## standard expressions git-annex comes with some standard preferred content expressions, that can @@ -166,6 +178,18 @@ reached an archive repository. `present and ($client)` +### public + +This is used for publishing information to a repository that can be +publically accessed. Only files in a directory with a particular name +will be published. (The directory can be located anywhere in the +repository.) + +The name of the directory can be configured using +`git annex initremote $remote preferreddir=$dirname` + +`inpreferreddir` + ### unwanted Use for repositories that you don't want to exist. This will result |