Skip to content

Commit

Permalink
feat: add basic sp selection score modelling
Browse files Browse the repository at this point in the history
  • Loading branch information
rvagg committed Jun 7, 2023
1 parent fe9e760 commit 1bcd218
Show file tree
Hide file tree
Showing 5 changed files with 627 additions and 2 deletions.
182 changes: 182 additions & 0 deletions pkg/session/model/cmd/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
package main

import (
"fmt"
"math/rand"
"os"
"strconv"
"time"

"github.com/dustin/go-humanize"
"github.com/filecoin-project/lassie/pkg/session"
"github.com/filecoin-project/lassie/pkg/session/model"
"github.com/multiformats/go-multicodec"
)

var (
GRAPHSYNC_FAST_RELIABLE_LOTS_OF_POPULAR_DATA = model.Provider{
Name: "graphsync fast, semi-reliable, lots of popular data",
Probabilities: map[multicodec.Code]model.Probabilities{
multicodec.TransportGraphsyncFilecoinv1: {
Candidate: model.Chance(0.5),
Success: model.Chance(0.6),
ConnectTimeMs: model.ProbDist{StdDev: 6, Mean: 10},
TimeToFirstByteMs: model.ProbDist{StdDev: 6, Mean: 10},
BandwidthBps: model.ProbDist{StdDev: 1e6, Mean: 1e8}, // Mean of 100Mb/s +/- 1MB/s
LatencyMs: model.ProbDist{StdDev: 1, Mean: 20},
FastRetrieval: model.Chance(0.9),
Verified: model.Chance(0.9),
},
},
}

GRAPHSYNC_MEDIUM_RELIABLE_SOME_POPULAR_DATA = model.Provider{
Name: "graphsync medium, semi-reliable, some popular data",
Probabilities: map[multicodec.Code]model.Probabilities{
multicodec.TransportGraphsyncFilecoinv1: {
Candidate: model.Chance(0.3),
Success: model.Chance(0.5),
ConnectTimeMs: model.ProbDist{StdDev: 6, Mean: 50},
TimeToFirstByteMs: model.ProbDist{StdDev: 10, Mean: 25},
BandwidthBps: model.ProbDist{StdDev: 1e6, Mean: 1e7}, // Mean of 10MB/s +/- 1MB/s
LatencyMs: model.ProbDist{StdDev: 10, Mean: 40},
FastRetrieval: model.Chance(0.9),
Verified: model.Chance(0.9),
},
},
}

GRAPHSYNC_MEDIUM_RELIABLE_MINIMAL_POPULAR_DATA = model.Provider{
Name: "graphsync medium, semi-reliable, minimal popular data",
Probabilities: map[multicodec.Code]model.Probabilities{
multicodec.TransportGraphsyncFilecoinv1: {
Candidate: model.Chance(0.1),
Success: model.Chance(0.5),
ConnectTimeMs: model.ProbDist{StdDev: 6, Mean: 50},
TimeToFirstByteMs: model.ProbDist{StdDev: 10, Mean: 25},
BandwidthBps: model.ProbDist{StdDev: 1e6, Mean: 1e7}, // Mean of 10MB/s +/- 1MB/s
LatencyMs: model.ProbDist{StdDev: 10, Mean: 40},
FastRetrieval: model.Chance(0.9),
Verified: model.Chance(0.9),
},
},
}

GRAPHSYNC_MEDIUM_UNRELIABLE_SOME_POPULAR_DATA = model.Provider{
Name: "graphsync medium, unreliable, some popular data",
Probabilities: map[multicodec.Code]model.Probabilities{
multicodec.TransportGraphsyncFilecoinv1: {
Candidate: model.Chance(0.3),
Success: model.Chance(0.3),
ConnectTimeMs: model.ProbDist{StdDev: 6, Mean: 50},
TimeToFirstByteMs: model.ProbDist{StdDev: 20, Mean: 50},
BandwidthBps: model.ProbDist{StdDev: 1e5, Mean: 1e6}, // Mean of 1MB/s +/- 100KB/s
LatencyMs: model.ProbDist{StdDev: 10, Mean: 40},
FastRetrieval: model.Chance(0.5),
Verified: model.Chance(0.5),
},
},
}

GRAPHSYNC_MEDIUM_VERY_UNRELIABLE_SOME_POPULAR_DATA = model.Provider{
Name: "graphsync medium, very unreliable, some popular data",
Probabilities: map[multicodec.Code]model.Probabilities{
multicodec.TransportGraphsyncFilecoinv1: {
Candidate: model.Chance(0.3),
Success: model.Chance(0.1),
ConnectTimeMs: model.ProbDist{StdDev: 100, Mean: 200},
TimeToFirstByteMs: model.ProbDist{StdDev: 6, Mean: 100},
BandwidthBps: model.ProbDist{StdDev: 1e5, Mean: 1e6}, // Mean of 1MB/s +/- 100KB/s
LatencyMs: model.ProbDist{StdDev: 10, Mean: 100},
FastRetrieval: model.Chance(0.2),
Verified: model.Chance(0.2),
},
},
}

HTTP_FAST_SEMIRELIABLE_LOTS_OF_POPULAR_DATA = model.Provider{
Name: "http fast, semi-reliable, lots of popular data", // e-ipfs?
Probabilities: map[multicodec.Code]model.Probabilities{
multicodec.TransportIpfsGatewayHttp: {
Candidate: model.Chance(0.5),
Success: model.Chance(0.5),
ConnectTimeMs: model.ProbDist{StdDev: 0, Mean: 0},
TimeToFirstByteMs: model.ProbDist{StdDev: 6, Mean: 10},
BandwidthBps: model.ProbDist{StdDev: 1e6, Mean: 1e8}, // Mean of 100Mb/s +/- 1MB/s
LatencyMs: model.ProbDist{StdDev: 1, Mean: 20},
},
},
}

HTTP_MEDIUM_FLAKY_SOME_POPULAR_DATA = model.Provider{
Name: "http medium, semi-reliable, lots of popular data", // e-ipfs?
Probabilities: map[multicodec.Code]model.Probabilities{
multicodec.TransportIpfsGatewayHttp: {
Candidate: model.Chance(0.7),
Success: model.Chance(0.6),
ConnectTimeMs: model.ProbDist{StdDev: 0, Mean: 0},
TimeToFirstByteMs: model.ProbDist{StdDev: 6, Mean: 10},
BandwidthBps: model.ProbDist{StdDev: 1e6, Mean: 1e7}, // Mean of 10MB/s +/- 1MB/s
LatencyMs: model.ProbDist{StdDev: 10, Mean: 40},
},
},
}
)

func main() {
seed := time.Now().UnixNano()
switch len(os.Args) {
case 1:
case 2:
// first arg is a seed if it's a number
if s, err := strconv.ParseInt(os.Args[1], 10, 64); err == nil {
seed = s
} else {
fmt.Println("Usage: go run main.go [seed]")
os.Exit(1)
}
default:
fmt.Println("Usage: go run main.go [seed]")
os.Exit(1)
}

simRand := rand.New(rand.NewSource(seed))

// TODO: generate static population up-front with fixed characteristics
pop := &model.Population{}
pop.Add(GRAPHSYNC_FAST_RELIABLE_LOTS_OF_POPULAR_DATA, 4)
pop.Add(GRAPHSYNC_MEDIUM_RELIABLE_SOME_POPULAR_DATA, 20)
pop.Add(GRAPHSYNC_MEDIUM_UNRELIABLE_SOME_POPULAR_DATA, 20)
pop.Add(GRAPHSYNC_MEDIUM_RELIABLE_MINIMAL_POPULAR_DATA, 50)
pop.Add(HTTP_FAST_SEMIRELIABLE_LOTS_OF_POPULAR_DATA, 1)

sim := model.Simulation{
Population: pop,
Retrievals: 50000,
RetrievalSize: model.ProbDist{StdDev: 2e7, Mean: 1e7}, // Mean of 20MB +/- 10MB
HttpChance: model.Chance(0.5),
GraphsyncChance: model.Chance(0.5),
}

ret := sim.Run(simRand)
cfg := session.DefaultConfig()
cfg.Random = simRand
ses := session.NewSession(cfg, true)
res := ret.RunWith(simRand, ses)

fmt.Println("---------------------------------------------------------------")
fmt.Println("Simulation of of", len(ret), "retrievals, seed:", seed)
fmt.Println()
fmt.Printf("\t Size per retrieval: %s < %s < %s\n", humanize.IBytes(uint64(ret.MinSize())), humanize.IBytes(uint64(ret.AvgSize())), humanize.IBytes(uint64(ret.MaxSize())))
fmt.Printf("\tCandidate per retrieval: %s < %s < %s\n", humanize.Comma(int64(ret.MinCandidateCount())), humanize.Comma(int64(ret.AvgCandidateCount())), humanize.Comma(int64(ret.MaxCandidateCount())))
fmt.Println("---------------------------------------------------------------")
fmt.Printf("\t Runs: %d\n", res.Runs)
fmt.Printf("\t Successes: %d\n", res.Successes)
fmt.Printf("\t Retrieval failures: %d\n", res.RetrievalFailures)
fmt.Printf("\t Size: %s\n", humanize.IBytes(uint64(res.Size)))
fmt.Printf("\t Total time: %v\n", time.Duration(res.TotalTimeMs)*time.Millisecond)
fmt.Printf("\t Average TTFB: %s\n", time.Duration(res.AverageTimeToFirstByteMs)*time.Millisecond)
fmt.Printf("\t Average bandwidth: %s/s\n", humanize.IBytes(uint64(res.AverageBandwidth)))
fmt.Printf("\t Total bandwidth: %s/s\n", humanize.IBytes(uint64(res.Size)/uint64(res.TotalTimeMs/1000)))
fmt.Println("---------------------------------------------------------------")
}
24 changes: 24 additions & 0 deletions pkg/session/model/pop.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
package model

import "github.com/multiformats/go-multicodec"

type Provider struct {
Name string
Probabilities map[multicodec.Code]Probabilities
}

type Population struct {
Providers []PC
}

type PC struct {
Provider Provider
Count int
}

func (p *Population) Add(provider Provider, count int) {
if p.Providers == nil {
p.Providers = make([]PC, 0)
}
p.Providers = append(p.Providers, PC{Provider: provider, Count: count})
}
45 changes: 45 additions & 0 deletions pkg/session/model/prob.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
package model

import "math/rand"

// Probabilities defines the probabilistic behaviour of a provider for a
// particular protocol
type Probabilities struct {
// Probability of being a candidate for any given retrieval [0,1]
Candidate Chance
// Probability of a successful retrieval [0,1]
Success Chance
// Distribution for connect time in milliseconds
ConnectTimeMs ProbDist
// Distribution for time to first byte in milliseconds
TimeToFirstByteMs ProbDist
// Distribution in bandwidth in bytes per second, this has to account for
// block fetching speed on the remote, not just the pipe
BandwidthBps ProbDist
// Distribution for latency in milliseconds, this will be multiplied to
// simulate connection initialisation round-trips
LatencyMs ProbDist
// Probability of having FastRetrieval for a graphsync retrieval [0,1]
FastRetrieval Chance
// Probability of having Verified for a graphsync retrieval [0,1]
Verified Chance
}

type ProbDist struct {
StdDev float64
Mean float64
}

func (pd ProbDist) Sample(rand *rand.Rand) float64 {
return rand.NormFloat64()*pd.StdDev + pd.Mean
}

// Chance is the probability of a Roll() being true, the higher the value in the
// range [0,1] the more likely it is to be true.
type Chance float64

func (c Chance) Roll(rand *rand.Rand) bool {
return rand.Float64() < float64(c)
}

const FIFTY_FIFTY = Chance(0.5)
Loading

0 comments on commit 1bcd218

Please sign in to comment.