From dd83ea184638aeb86b7af70dc3204fdee33e5a4d Mon Sep 17 00:00:00 2001 From: gagliardetto Date: Fri, 4 Oct 2024 14:33:07 +0200 Subject: [PATCH] July2024/improve gsfa perf (#124) * Use custom temp dir for gsfa * Flush based on performance * Fix tmpDir * Max is 1000 * Remove /health and /metrics req logging; closes #127 * Move metrics to metrics package * Prometheus for index and car lookups; closes #126 * Cleanup metrics; closes #128 * gsfa: include pubkeys from address lookup tables * Miner info: use exponential retry * Fix tests --- cmd-x-index-gsfa.go | 83 +++++++++++++++++++++++++++++---- gsfa/gsfa-write.go | 49 +++++++++++-------- gsfa/pop-rank.go | 72 ++++++++++++++++++++++++++++ gsfa/pop-rank_test.go | 55 ++++++++++++++++++++++ http-range.go | 17 ++++++- request-response.go | 10 ++-- split-car-fetcher/miner-info.go | 36 +++++++++++++- 7 files changed, 285 insertions(+), 37 deletions(-) create mode 100644 gsfa/pop-rank.go create mode 100644 gsfa/pop-rank_test.go diff --git a/cmd-x-index-gsfa.go b/cmd-x-index-gsfa.go index eb1ef7b3..26f0556a 100644 --- a/cmd-x-index-gsfa.go +++ b/cmd-x-index-gsfa.go @@ -21,6 +21,8 @@ import ( "github.com/rpcpool/yellowstone-faithful/indexmeta" "github.com/rpcpool/yellowstone-faithful/ipld/ipldbindcode" "github.com/rpcpool/yellowstone-faithful/iplddecoders" + solanatxmetaparsers "github.com/rpcpool/yellowstone-faithful/solana-tx-meta-parsers" + "github.com/rpcpool/yellowstone-faithful/third_party/solana_proto/confirmed_block" "github.com/urfave/cli/v2" "k8s.io/klog/v2" ) @@ -70,7 +72,7 @@ func newCmd_Index_gsfa() *cli.Command { }, &cli.StringFlag{ Name: "tmp-dir", - Usage: "temporary directory to use for storing intermediate files", + Usage: "temporary directory to use for storing intermediate files; WILL BE DELETED", Value: os.TempDir(), }, }, @@ -137,6 +139,10 @@ func newCmd_Index_gsfa() *cli.Command { return fmt.Errorf("failed to add network to sig_exists index metadata: %w", err) } tmpDir := c.String("tmp-dir") + tmpDir = filepath.Join(tmpDir, fmt.Sprintf("yellowstone-faithful-gsfa-%d", time.Now().UnixNano())) + if err := os.MkdirAll(tmpDir, 0o755); err != nil { + return fmt.Errorf("failed to create tmp dir: %w", err) + } indexW, err := gsfa.NewGsfaWriter( gsfaIndexDir, meta, @@ -218,12 +224,17 @@ func newCmd_Index_gsfa() *cli.Command { for ii := range transactions { txWithInfo := transactions[ii] numProcessedTransactions.Add(1) + accountKeys := txWithInfo.Transaction.Message.AccountKeys + if txWithInfo.Metadata != nil { + accountKeys = append(accountKeys, byteSlicesToKeySlice(txWithInfo.Metadata.LoadedReadonlyAddresses)...) + accountKeys = append(accountKeys, byteSlicesToKeySlice(txWithInfo.Metadata.LoadedWritableAddresses)...) + } err = indexW.Push( txWithInfo.Offset, txWithInfo.Length, txWithInfo.Slot, txWithInfo.Blocktime, - txWithInfo.Transaction.Message.AccountKeys, + accountKeys, ) if err != nil { klog.Exitf("Error while pushing to gsfa index: %s", err) @@ -270,9 +281,14 @@ func objectsToTransactions( objects []accum.ObjectWithMetadata, ) ([]*TransactionWithSlot, error) { transactions := make([]*TransactionWithSlot, 0, len(objects)) + dataBlocks := make([]accum.ObjectWithMetadata, 0) for _, object := range objects { // check if the object is a transaction: kind := iplddecoders.Kind(object.ObjectData[1]) + if kind == iplddecoders.KindDataFrame { + dataBlocks = append(dataBlocks, object) + continue + } if kind != iplddecoders.KindTransaction { continue } @@ -280,17 +296,65 @@ func objectsToTransactions( if err != nil { return nil, fmt.Errorf("error while decoding transaction from nodex %s: %w", object.Cid, err) } + tws := &TransactionWithSlot{ + Offset: object.Offset, + Length: object.SectionLength, + Slot: uint64(decoded.Slot), + Blocktime: uint64(block.Meta.Blocktime), + } + if total, ok := decoded.Metadata.GetTotal(); !ok || total == 1 { + completeBuffer := decoded.Metadata.Bytes() + if ha, ok := decoded.Metadata.GetHash(); ok { + err := ipldbindcode.VerifyHash(completeBuffer, ha) + if err != nil { + return nil, fmt.Errorf("failed to verify metadata hash: %w", err) + } + } + if len(completeBuffer) > 0 { + uncompressedMeta, err := decompressZstd(completeBuffer) + if err != nil { + return nil, fmt.Errorf("failed to decompress metadata: %w", err) + } + status, err := solanatxmetaparsers.ParseTransactionStatusMeta(uncompressedMeta) + if err == nil { + tws.Metadata = status + } + } + } else { + metaBuffer, err := loadDataFromDataFrames(&decoded.Metadata, func(ctx context.Context, wantedCid cid.Cid) (*ipldbindcode.DataFrame, error) { + for _, dataBlock := range dataBlocks { + if dataBlock.Cid == wantedCid { + df, err := iplddecoders.DecodeDataFrame(dataBlock.ObjectData) + if err != nil { + return nil, err + } + return df, nil + } + } + return nil, fmt.Errorf("dataframe not found") + }) + if err != nil { + return nil, fmt.Errorf("failed to load metadata: %w", err) + } + // reset dataBlocks: + dataBlocks = dataBlocks[:0] + if len(metaBuffer) > 0 { + uncompressedMeta, err := decompressZstd(metaBuffer) + if err != nil { + return nil, fmt.Errorf("failed to decompress metadata: %w", err) + } + status, err := solanatxmetaparsers.ParseTransactionStatusMeta(uncompressedMeta) + if err == nil { + tws.Metadata = status + } + } + } tx, err := decoded.GetSolanaTransaction() if err != nil { return nil, fmt.Errorf("error while getting solana transaction from object %s: %w", object.Cid, err) } - transactions = append(transactions, &TransactionWithSlot{ - Offset: object.Offset, - Length: object.SectionLength, - Slot: uint64(decoded.Slot), - Blocktime: uint64(block.Meta.Blocktime), - Transaction: *tx, - }) + tws.Transaction = *tx + transactions = append(transactions, tws) } return transactions, nil } @@ -311,4 +375,5 @@ type TransactionWithSlot struct { Slot uint64 Blocktime uint64 Transaction solana.Transaction + Metadata *confirmed_block.TransactionStatusMeta } diff --git a/gsfa/gsfa-write.go b/gsfa/gsfa-write.go index 640fa64f..136e5563 100644 --- a/gsfa/gsfa-write.go +++ b/gsfa/gsfa-write.go @@ -23,6 +23,7 @@ import ( type GsfaWriter struct { mu sync.Mutex indexRootDir string + popRank *rollingRankOfTopPerformers // top pubkeys by flush count offsets *hashmap.Map[solana.PublicKey, [2]uint64] ll *linkedlog.LinkedLog man *manifest.Manifest @@ -61,6 +62,7 @@ func NewGsfaWriter( ctx, cancel := context.WithCancel(context.Background()) index := &GsfaWriter{ fullBufferWriterChan: make(chan linkedlog.KeyToOffsetAndSizeAndBlocktime, 50), // TODO: make this configurable + popRank: newRollingRankOfTopPerformers(10_000), offsets: hashmap.New[solana.PublicKey, [2]uint64](int(1_000_000)), accum: hashmap.New[solana.PublicKey, []*linkedlog.OffsetAndSizeAndBlocktime](int(1_000_000)), ctx: ctx, @@ -120,6 +122,9 @@ func (a *GsfaWriter) fullBufferWriter() { has := tmpBuf.Has(buffer.Key) if len(tmpBuf) == howManyBuffersToFlushConcurrently || has { for _, buf := range tmpBuf { + if len(buf.Values) == 0 { + continue + } // Write the buffer to the linked log. klog.V(5).Infof("Flushing %d transactions for key %s", len(buf.Values), buf.Key) if err := a.flushKVs(buf); err != nil { @@ -131,7 +136,7 @@ func (a *GsfaWriter) fullBufferWriter() { tmpBuf = append(tmpBuf, buffer) } case <-time.After(1 * time.Second): - klog.Infof("Read %d buffers from channel", numReadFromChan) + klog.V(5).Infof("Read %d buffers from channel", numReadFromChan) } } } @@ -153,39 +158,45 @@ func (a *GsfaWriter) Push( } publicKeys = publicKeys.Dedupe() publicKeys.Sort() - if slot%1000 == 0 { - if a.accum.Len() > 130_000 { - // flush all - klog.Infof("Flushing all %d keys", a.accum.Len()) + if slot%500 == 0 && a.accum.Len() > 100_000 { + // flush all + klog.V(4).Infof("Flushing all %d keys", a.accum.Len()) - var keys solana.PublicKeySlice = a.accum.Keys() - keys.Sort() + var keys solana.PublicKeySlice = a.accum.Keys() + keys.Sort() - for iii := range keys { - key := keys[iii] - values, _ := a.accum.Get(key) + a.popRank.purge() - if len(values) < 100 && len(values) > 0 { - if err := a.flushKVs(linkedlog.KeyToOffsetAndSizeAndBlocktime{ - Key: key, - Values: values, - }); err != nil { - return err - } - a.accum.Delete(key) + for iii := range keys { + key := keys[iii] + values, _ := a.accum.Get(key) + // The objective is to have as big of a batch for each key as possible (max is 1000). + // So we optimize for delaying the flush for the most popular keys (popular=has been flushed a lot of times). + // And we flush the less popular keys, periodically if they haven't seen much activity. + + // if this key has less than 100 values and is not in the top list of keys by flush count, then + // it's very likely that this key isn't going to get a lot of values soon + if len(values) < 100 && len(values) > 0 && !a.popRank.has(key) { + if err := a.flushKVs(linkedlog.KeyToOffsetAndSizeAndBlocktime{ + Key: key, + Values: values, + }); err != nil { + return err } + a.accum.Delete(key) } } } for _, publicKey := range publicKeys { current, ok := a.accum.Get(publicKey) if !ok { - current = make([]*linkedlog.OffsetAndSizeAndBlocktime, 0) + current = make([]*linkedlog.OffsetAndSizeAndBlocktime, 0, itemsPerBatch) current = append(current, oas) a.accum.Set(publicKey, current) } else { current = append(current, oas) if len(current) >= itemsPerBatch { + a.popRank.Incr(publicKey, 1) a.fullBufferWriterChan <- linkedlog.KeyToOffsetAndSizeAndBlocktime{ Key: publicKey, Values: clone(current), diff --git a/gsfa/pop-rank.go b/gsfa/pop-rank.go new file mode 100644 index 00000000..92362b80 --- /dev/null +++ b/gsfa/pop-rank.go @@ -0,0 +1,72 @@ +package gsfa + +import ( + "slices" + "sort" + + "github.com/gagliardetto/solana-go" + "github.com/tidwall/hashmap" +) + +type rollingRankOfTopPerformers struct { + rankListSize int + maxValue int + minValue int + set hashmap.Map[solana.PublicKey, int] +} + +func newRollingRankOfTopPerformers(rankListSize int) *rollingRankOfTopPerformers { + return &rollingRankOfTopPerformers{ + rankListSize: rankListSize, + } +} + +func (r *rollingRankOfTopPerformers) Incr(key solana.PublicKey, delta int) int { + value, ok := r.set.Get(key) + if !ok { + value = 0 + } + value = value + delta + r.set.Set(key, value) + if value > r.maxValue { + r.maxValue = value + } + if value < r.minValue { + r.minValue = value + } + return value +} + +func (r *rollingRankOfTopPerformers) Get(key solana.PublicKey) (int, bool) { + value, ok := r.set.Get(key) + return value, ok +} + +// purge will remove all keys by the lowest values until the rankListSize is reached. +// keys with equivalent values are kept. +func (r *rollingRankOfTopPerformers) purge() { + values := r.set.Values() + sort.Ints(values) + values = slices.Compact(values) + if len(values) <= r.rankListSize { + return + } + + // remove the lowest values + for _, value := range values[:len(values)-r.rankListSize] { + for _, key := range r.set.Keys() { + if v, _ := r.set.Get(key); v == value { + r.set.Delete(key) + } + } + } + + // update the min and max values + r.minValue = values[len(values)-r.rankListSize] + r.maxValue = values[len(values)-1] +} + +func (r *rollingRankOfTopPerformers) has(key solana.PublicKey) bool { + _, ok := r.set.Get(key) + return ok +} diff --git a/gsfa/pop-rank_test.go b/gsfa/pop-rank_test.go new file mode 100644 index 00000000..f90c49e4 --- /dev/null +++ b/gsfa/pop-rank_test.go @@ -0,0 +1,55 @@ +package gsfa + +import ( + "testing" + + "github.com/gagliardetto/solana-go" + "github.com/stretchr/testify/require" +) + +func TestPopRank(t *testing.T) { + // Test the rollingRankOfTopPerformers type: + { + // Create a new rollingRankOfTopPerformers: + r := newRollingRankOfTopPerformers(5) + if r == nil { + t.Fatal("expected non-nil rollingRankOfTopPerformers") + } + // Test the Incr method: + { + key := solana.SysVarRentPubkey + delta := 1 + value := r.Incr(key, delta) + require.Equal(t, 1, value) + } + // Test the purge method: + { + r.purge() + // the value should still be 1 + value, ok := r.Get(solana.SysVarRentPubkey) + require.True(t, ok) + require.Equal(t, 1, value) + } + { + // now add a few more values: + r.Incr(solana.SysVarClockPubkey, 6) + r.Incr(solana.SysVarEpochSchedulePubkey, 5) + r.Incr(solana.SysVarFeesPubkey, 4) + r.Incr(solana.SysVarInstructionsPubkey, 3) + r.Incr(solana.SysVarRewardsPubkey, 2) + + // there should be 6 values now + require.Equal(t, 6, r.set.Len()) + + // purge should remove the lowest values + r.purge() + + // there should be 5 values now (equivalent values are kept) + require.Equal(t, 5, r.set.Len()) + + // the lowest value should be 2 + require.Equal(t, 2, r.minValue) + require.Equal(t, 6, r.maxValue) + } + } +} diff --git a/http-range.go b/http-range.go index 4033455e..593bf793 100644 --- a/http-range.go +++ b/http-range.go @@ -69,13 +69,26 @@ func (r *readCloserWrapper) ReadAt(p []byte, off int64) (n int, err error) { prefix := icon + "[READ-UNKNOWN]" if isIndex { prefix = icon + azureBG("[READ-INDEX]") - } else if isCar { - + // get the index name, which is the part before the .index suffix, after the last . + indexName := strings.TrimSuffix(r.name, ".index") + // split the index name by . and get the last part + byDot := strings.Split(indexName, ".") + if len(byDot) > 0 { + indexName = byDot[len(byDot)-1] + } + // TODO: distinguish between remote and local index reads + metrics.IndexLookupHistogram.WithLabelValues(indexName).Observe(float64(took.Seconds())) + } + // if has suffix .car, then it's a car file + if isCar { if r.isSplitCar { prefix = icon + azureBG("[READ-SPLIT-CAR]") } else { prefix = icon + purpleBG("[READ-CAR]") } + carName := filepath.Base(r.name) + // TODO: distinguish between remote and local index reads + metrics.CarLookupHistogram.WithLabelValues(carName).Observe(float64(took.Seconds())) } klog.V(5).Infof(prefix+" %s:%d+%d (%s)\n", (r.name), off, len(p), took) diff --git a/request-response.go b/request-response.go index 97c5e823..b592658f 100644 --- a/request-response.go +++ b/request-response.go @@ -410,10 +410,10 @@ func compiledInstructionsToJsonParsed( case *confirmed_block.TransactionStatusMeta: return &txstatus.LoadedAddresses{ Writable: func() []solana.PublicKey { - return byteSlicesToKeySlices(vv.LoadedWritableAddresses) + return byteSlicesToKeySlice(vv.LoadedWritableAddresses) }(), Readonly: func() []solana.PublicKey { - return byteSlicesToKeySlices(vv.LoadedReadonlyAddresses) + return byteSlicesToKeySlice(vv.LoadedReadonlyAddresses) }(), } default: @@ -471,8 +471,8 @@ func encodeTransactionResponseBasedOnWantedEncoding( if ok { { tables := map[solana.PublicKey]solana.PublicKeySlice{} - writable := byteSlicesToKeySlices(unwrappedMeta.LoadedWritableAddresses) - readonly := byteSlicesToKeySlices(unwrappedMeta.LoadedReadonlyAddresses) + writable := byteSlicesToKeySlice(unwrappedMeta.LoadedWritableAddresses) + readonly := byteSlicesToKeySlice(unwrappedMeta.LoadedReadonlyAddresses) for _, addr := range tx.Message.AddressTableLookups { numTakeWritable := len(addr.WritableIndexes) numTakeReadonly := len(addr.ReadonlyIndexes) @@ -609,7 +609,7 @@ func byeSliceToUint16Slice(in []byte) []uint16 { return out } -func byteSlicesToKeySlices(keys [][]byte) []solana.PublicKey { +func byteSlicesToKeySlice(keys [][]byte) []solana.PublicKey { var out []solana.PublicKey for _, key := range keys { var k solana.PublicKey diff --git a/split-car-fetcher/miner-info.go b/split-car-fetcher/miner-info.go index b9606f9c..a051a9b8 100644 --- a/split-car-fetcher/miner-info.go +++ b/split-car-fetcher/miner-info.go @@ -32,7 +32,8 @@ func NewMinerInfo( ) *MinerInfoCache { minerInfoCache := ttlcache.New[string, *MinerInfo]( ttlcache.WithTTL[string, *MinerInfo](cacheTTL), - ttlcache.WithDisableTouchOnHit[string, *MinerInfo]()) + ttlcache.WithDisableTouchOnHit[string, *MinerInfo](), + ) return &MinerInfoCache{ lotusClient: lotusClient, @@ -47,7 +48,15 @@ func (d *MinerInfoCache) GetProviderInfo(ctx context.Context, provider address.A return file.Value(), nil } - minerInfo, err := (&MinerInfoFetcher{Client: d.lotusClient}).GetProviderInfo(ctx, provider.String()) + ctx, cancel := context.WithTimeout(ctx, d.requestTimeout) + defer cancel() + minerInfo, err := retryExponentialBackoff(ctx, + func() (*MinerInfo, error) { + return (&MinerInfoFetcher{Client: d.lotusClient}).GetProviderInfo(ctx, provider.String()) + }, + time.Second*2, + 5, + ) if err != nil { return nil, err } @@ -59,6 +68,29 @@ type MinerInfoFetcher struct { Client jsonrpc.RPCClient } +func retryExponentialBackoff[T any]( + ctx context.Context, + fn func() (T, error), + startingBackoff time.Duration, + maxRetries int, +) (T, error) { + var err error + var out T + for i := 0; i < maxRetries; i++ { + out, err = fn() + if err == nil { + return out, nil + } + select { + case <-ctx.Done(): + return out, fmt.Errorf("context done: %w; last error: %s", ctx.Err(), err) + case <-time.After(startingBackoff): + startingBackoff *= 2 + } + } + return out, err +} + func (m *MinerInfoFetcher) GetProviderInfo(ctx context.Context, provider string) (*MinerInfo, error) { minerInfo := new(MinerInfo) err := m.Client.CallFor(ctx, minerInfo, "Filecoin.StateMinerInfo", provider, nil)