summaryrefslogtreecommitdiff
path: root/doc/design/assistant/syncing
diff options
context:
space:
mode:
authorGravatar Joey Hess <joey@kitenet.net>2014-05-02 15:09:46 -0300
committerGravatar Joey Hess <joey@kitenet.net>2014-05-02 15:09:46 -0300
commit10a3d627dcb23035bc68c9912e927288be493d8e (patch)
treeeffc2fc6fbc8be9053c77d4c420946623916c794 /doc/design/assistant/syncing
parent1dacbb30b10743eb7234a637858c50856a6fa4fb (diff)
add routing simulation
Diffstat (limited to 'doc/design/assistant/syncing')
-rw-r--r--doc/design/assistant/syncing/efficiency.mdwn21
-rw-r--r--doc/design/assistant/syncing/simroutes.hs250
2 files changed, 271 insertions, 0 deletions
diff --git a/doc/design/assistant/syncing/efficiency.mdwn b/doc/design/assistant/syncing/efficiency.mdwn
index 7da721a2c..192dd6ecb 100644
--- a/doc/design/assistant/syncing/efficiency.mdwn
+++ b/doc/design/assistant/syncing/efficiency.mdwn
@@ -71,3 +71,24 @@ repositories have a non-git out of band signaling mechanism. This could,
for example, be used by laptopB to tell laptopA that it's trying to send
a file directly to laptopC. laptopA could then defer the upload to the
cloud for a while.
+
+## syncing only requested content
+
+In some situations, nodes only want particular files, and not everything.
+(Or don't have the bandwidth to get everything.) A way to handle this,
+that should work in a fully ad-hoc, offline distributed network,
+suggested by Vincenzo Tozzi:
+
+* Nodes generate a request for a specific file they want, committed
+ to git somewhere.
+* This request has a TTL (of eg 3).
+* When syncing, copy the requests that a node has, and decrease their TTL
+ by 1. Requests with a TTL of 0 have timed out and are not copied.
+ (So, requests are stored in git, but on eg, per-node branches.)
+* Only copy content to nodes that have a request for it (either one
+ originating with them, or one they copied from another node).
+* Each request indicates the requesting node, so once no nodes have an
+ active request for a particular file, it's ok to drop it from the
+ transfer nodes (honoring numcopies etc of course).
+
+A simulation of a network using this method is in [[simroutes.hs]]
diff --git a/doc/design/assistant/syncing/simroutes.hs b/doc/design/assistant/syncing/simroutes.hs
new file mode 100644
index 000000000..322ecf4fe
--- /dev/null
+++ b/doc/design/assistant/syncing/simroutes.hs
@@ -0,0 +1,250 @@
+-- Simulation of non-flood syncing of content, across a network of nodes.
+
+module Main where
+
+import System.Random
+import Control.Monad.Random
+import Control.Monad
+import Control.Applicative
+import Data.Ratio
+import Data.Ord
+import Data.List
+import qualified Data.Set as S
+
+{-
+ - Tunable values
+ -}
+
+totalFiles :: Int
+totalFiles = 10
+
+-- How likely is a given file to be wanted by any particular node?
+probabilityFilesWanted :: Probability
+probabilityFilesWanted = 0.10
+
+-- How many different locations can each transfer node move between?
+-- (Min, Max)
+transferDestinationsRange :: (Int, Int)
+transferDestinationsRange = (2, 5)
+
+-- Controls how likely transfer nodes are to move around in a given step
+-- of the simulation.
+-- (They actually move slightly less because they may start to move and
+-- pick the same location they are at.)
+-- (Min, Max)
+transferMoveFrequencyRange :: (Probability, Probability)
+transferMoveFrequencyRange = (0.10, 1.00)
+
+-- counts both immobile and transfer nodes as hops, so double Vince's
+-- theoretical TTL of 3.
+maxTTL :: TTL
+maxTTL = TTL 6
+
+minTTL :: TTL
+minTTL = TTL 1
+
+numImmobileNodes :: Int
+numImmobileNodes = 10
+
+numTransferNodes :: Int
+numTransferNodes = 20
+
+numSteps :: Int
+numSteps = 100
+
+-- IO code
+main = putStrLn . summarize =<< evalRandIO (simulate numSteps =<< genNetwork)
+-- Only pure code below :)
+
+data Network = Network [ImmobileNode] [TransferNode]
+ deriving (Show, Eq)
+
+data ImmobileNode = ImmobileNode NodeRepo
+ deriving (Show, Eq)
+
+-- Index in the Network's list of ImmobileNodes.
+type ImmobileNodeIdx = Int
+
+data TransferNode = TransferNode
+ { currentlocation :: ImmobileNodeIdx
+ , possiblelocations :: [ImmobileNodeIdx]
+ , movefrequency :: Probability
+ , transferrepo :: NodeRepo
+ }
+ deriving (Show, Eq)
+
+data NodeRepo = NodeRepo
+ { wantFiles :: [Request]
+ , haveFiles :: S.Set File
+ }
+ deriving (Show, Eq)
+
+data File = File Int
+ deriving (Show, Eq, Ord)
+
+randomFile :: (RandomGen g) => Rand g File
+randomFile = File <$> getRandomR (0, totalFiles)
+
+data Request = Request File TTL
+ deriving (Show)
+
+-- compare ignoring TTL
+instance Eq Request where
+ (Request f1 _) == (Request f2 _) = f1 == f2
+
+requestedFile :: Request -> File
+requestedFile (Request f _) = f
+
+requestTTL :: Request -> TTL
+requestTTL (Request _ ttl) = ttl
+
+data TTL = TTL Int
+ deriving (Show, Eq, Ord)
+
+incTTL :: TTL -> TTL
+incTTL (TTL t) = TTL (t + 1)
+
+decTTL :: TTL -> TTL
+decTTL (TTL t) = TTL (t - 1)
+
+staleTTL :: TTL -> Bool
+staleTTL (TTL t) = t < 1
+
+-- Origin of a request starts one higher than max, since the TTL
+-- will decrement the first time the Request is transferred to another node.
+originTTL :: TTL
+originTTL = incTTL maxTTL
+
+randomRequest :: (RandomGen g) => Rand g Request
+randomRequest = Request
+ <$> randomFile
+ <*> pure originTTL
+
+type Probability = Float
+
+randomProbability :: (RandomGen g) => Rand g Probability
+randomProbability = getRandomR (0, 1)
+
+simulate :: (RandomGen g) => Int -> Network -> Rand g Network
+simulate 0 net = return net
+simulate c net = simulate (c - 1) =<< step net
+
+-- Each step of the simulation, check if each TransferNode wants to move,
+-- and if so:
+-- 1. It and its current location exchange their Requests.
+-- 2. And they exchange any requested files.
+-- 3. Move it to a new random location.
+--
+-- Note: This implementation does not exchange requests between two
+-- TransferNodes that both arrive at the same location at the same step,
+-- and then move away in the next step.
+step :: (RandomGen g) => Network -> Rand g Network
+step (Network immobiles transfers) = go immobiles [] transfers
+ where
+ go is c [] = return (Network is c)
+ go is c (t:ts) = do
+ r <- randomProbability
+ if movefrequency t <= r
+ then do
+ let (is1, (currentloc:is2)) = splitAt (currentlocation t) is
+ let (currentloc', t') = exchangeRequestsFiles currentloc t
+ t'' <- move t'
+ go (is1 ++ currentloc' : is2) (c ++ [t'']) ts
+ else go is (c ++ [t]) ts
+
+type Exchanger = ImmobileNode -> TransferNode -> (ImmobileNode, TransferNode)
+
+exchangeRequestsFiles :: Exchanger
+exchangeRequestsFiles (ImmobileNode ir) t@(TransferNode { transferrepo = tr }) =
+ ( ImmobileNode (go ir tr)
+ , t { transferrepo = go tr ir }
+ )
+ where
+ go r1 r2 = r1
+ { wantFiles = foldr addRequest (wantFiles r1) (wantFiles r2)
+ , haveFiles = S.foldr (addFile (wantFiles r1)) (haveFiles r1) (haveFiles r2)
+ }
+
+-- Adds a file to the set, when there's a request for it.
+addFile :: [Request] -> File -> S.Set File -> S.Set File
+addFile rs f fs
+ | any (\r -> f == requestedFile r) rs = S.insert f fs
+ | otherwise = fs
+
+-- Decrements TTL, and avoids adding request with a stale TTL, or a
+-- request for an already added file with the same or a lower TTL.
+addRequest :: Request -> [Request] -> [Request]
+addRequest (Request f ttl) rs
+ | staleTTL ttl' = rs
+ | any (\r -> requestTTL r >= ttl) similar = rs
+ | otherwise = r' : other
+ where
+ ttl' = decTTL ttl
+ r' = Request f ttl'
+ (other, similar) = partition (/= r') rs
+
+move :: (RandomGen g) => TransferNode -> Rand g TransferNode
+move t = do
+ newloc <- randomfrom (possiblelocations t)
+ return $ t { currentlocation = newloc }
+
+genNetwork :: (RandomGen g) => Rand g Network
+genNetwork = do
+ immobiles <- sequence (replicate numImmobileNodes mkImmobile)
+ transfers <- sequence (replicate numTransferNodes (mkTransfer immobiles))
+ return $ Network immobiles transfers
+
+mkImmobile :: (RandomGen g) => Rand g ImmobileNode
+mkImmobile = ImmobileNode <$> genrepo
+ where
+ genrepo = NodeRepo
+ -- The files this node wants.
+ -- Currently assumes each file is equally popular.
+ <$> sequence (replicate (truncate (fromIntegral totalFiles * probabilityFilesWanted)) randomRequest)
+ -- The files this node already has.
+ --
+ -- We'll assume equal production, so split the total
+ -- number of files amoung the immobile nodes.
+ -- (This will produce some duplication of files
+ -- (consider birthday paradox), and some missing files.)
+ --
+ -- TODO: Some immobile nodes are internet connected,
+ -- and these should all share their files automatically)
+ -- (Also when running the sim.)
+ <*> (S.fromList <$> sequence (replicate (totalFiles `div` numImmobileNodes) randomFile))
+
+mkTransfer :: (RandomGen g) => [ImmobileNode] -> Rand g TransferNode
+mkTransfer immobiles = do
+ -- Transfer nodes are given random routes. May be simplistic.
+ -- Also, some immobile nodes will not be serviced by any transfer nodes.
+ numpossiblelocs <- getRandomR transferDestinationsRange
+ possiblelocs <- sequence (replicate numpossiblelocs (randomfrom indexes))
+ currentloc <- randomfrom possiblelocs
+ movefreq <- getRandomR transferMoveFrequencyRange
+ -- transfer nodes start out with no files or requests in their repo
+ let repo = (NodeRepo [] S.empty)
+ return $ TransferNode currentloc possiblelocs movefreq repo
+ where
+ indexes = [0..length immobiles - 1]
+
+randomfrom :: (RandomGen g) => [a] -> Rand g a
+randomfrom l = do
+ i <- getRandomR (1, length l)
+ return $ l !! (i - 1)
+
+summarize :: Network -> String
+summarize (Network is _ts) = unlines $ map (\(d, s) -> d ++ ": " ++ s)
+ [ ("Total wanted files",
+ show (sum (overis (length . findoriginreqs . wantFiles . repo))))
+ , ("Wanted files that were not transferred to requesting node",
+ show (sum (overis (S.size . findunsatisfied . repo))))
+ --, ("List of files not transferred", show unsatisfied)
+ , ("Immobile nodes at end", show is)
+ ]
+ where
+ findoriginreqs = filter (\r -> requestTTL r == originTTL)
+ findunsatisfied r =
+ let wantedfs = S.fromList $ map requestedFile (findoriginreqs (wantFiles r))
+ in S.difference wantedfs (haveFiles r)
+ repo (ImmobileNode r) = r
+ overis f = map f is