Skip to content

Commit 0ad0966

Browse files
rjl493456442fjl
andauthored
core/rawdb: introduce flush offset in freezer (#30392)
This is a follow-up PR to #29792 to get rid of the data file sync. **This is a non-backward compatible change, which increments the database version from 8 to 9**. We introduce a flushOffset for each freezer table, which tracks the position of the most recently fsync’d item in the index file. When this offset moves forward, it indicates that all index entries below it, along with their corresponding data items, have been properly persisted to disk. The offset can also be moved backward when truncating from either the head or tail of the file. Previously, the data file required an explicit fsync after every mutation, which was highly inefficient. With the introduction of the flush offset, the synchronization strategy becomes more flexible, allowing the freezer to sync every 30 seconds instead. The data items above the flush offset are regarded volatile and callers must ensure they are recoverable after the unclean shutdown, or explicitly sync the freezer before any proceeding operations. --------- Co-authored-by: Felix Lange <fjl@twurst.com>
1 parent e26dd77 commit 0ad0966

File tree

8 files changed

+654
-250
lines changed

8 files changed

+654
-250
lines changed

core/blockchain.go

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -113,23 +113,29 @@ const (
113113
// * the `BlockNumber`, `TxHash`, `TxIndex`, `BlockHash` and `Index` fields of log are deleted
114114
// * the `Bloom` field of receipt is deleted
115115
// * the `BlockIndex` and `TxIndex` fields of txlookup are deleted
116+
//
116117
// - Version 5
117118
// The following incompatible database changes were added:
118119
// * the `TxHash`, `GasCost`, and `ContractAddress` fields are no longer stored for a receipt
119120
// * the `TxHash`, `GasCost`, and `ContractAddress` fields are computed by looking up the
120121
// receipts' corresponding block
122+
//
121123
// - Version 6
122124
// The following incompatible database changes were added:
123125
// * Transaction lookup information stores the corresponding block number instead of block hash
126+
//
124127
// - Version 7
125128
// The following incompatible database changes were added:
126129
// * Use freezer as the ancient database to maintain all ancient data
130+
//
127131
// - Version 8
128132
// The following incompatible database changes were added:
129133
// * New scheme for contract code in order to separate the codes and trie nodes
134+
//
130135
// - Version 9
131-
// Total difficulty has been removed from both the key-value store and the
132-
// ancient store, the td freezer table has been deprecated since that.
136+
// The following incompatible database changes were added:
137+
// * Total difficulty has been removed from both the key-value store and the ancient store.
138+
// * The metadata structure of freezer is changed by adding 'flushOffset'
133139
BlockChainVersion uint64 = 9
134140
)
135141

core/rawdb/accessors_chain_test.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -849,6 +849,7 @@ func TestHeadersRLPStorage(t *testing.T) {
849849
t.Fatalf("failed to create database with ancient backend")
850850
}
851851
defer db.Close()
852+
852853
// Create blocks
853854
var chain []*types.Block
854855
var pHash common.Hash
@@ -864,7 +865,7 @@ func TestHeadersRLPStorage(t *testing.T) {
864865
chain = append(chain, block)
865866
pHash = block.Hash()
866867
}
867-
var receipts []types.Receipts = make([]types.Receipts, 100)
868+
receipts := make([]types.Receipts, 100)
868869
// Write first half to ancients
869870
WriteAncientBlocks(db, chain[:50], receipts[:50])
870871
// Write second half to db

core/rawdb/ancient_scheme.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ const (
5858
stateHistoryStorageData = "storage.data"
5959
)
6060

61+
// stateFreezerNoSnappy configures whether compression is disabled for the state freezer.
6162
var stateFreezerNoSnappy = map[string]bool{
6263
stateHistoryMeta: true,
6364
stateHistoryAccountIndex: false,

core/rawdb/freezer_batch.go

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ package rawdb
1919
import (
2020
"fmt"
2121
"math"
22+
"time"
2223

2324
"github.com/ethereum/go-ethereum/rlp"
2425
"github.com/golang/snappy"
@@ -188,9 +189,6 @@ func (batch *freezerTableBatch) commit() error {
188189
if err != nil {
189190
return err
190191
}
191-
if err := batch.t.head.Sync(); err != nil {
192-
return err
193-
}
194192
dataSize := int64(len(batch.dataBuffer))
195193
batch.dataBuffer = batch.dataBuffer[:0]
196194

@@ -208,6 +206,12 @@ func (batch *freezerTableBatch) commit() error {
208206
// Update metrics.
209207
batch.t.sizeGauge.Inc(dataSize + indexSize)
210208
batch.t.writeMeter.Mark(dataSize + indexSize)
209+
210+
// Periodically sync the table, todo (rjl493456442) make it configurable?
211+
if time.Since(batch.t.lastSync) > 30*time.Second {
212+
batch.t.lastSync = time.Now()
213+
return batch.t.Sync()
214+
}
211215
return nil
212216
}
213217

core/rawdb/freezer_meta.go

Lines changed: 133 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -17,93 +17,173 @@
1717
package rawdb
1818

1919
import (
20+
"errors"
2021
"io"
22+
"math"
2123
"os"
2224

2325
"github.com/ethereum/go-ethereum/log"
2426
"github.com/ethereum/go-ethereum/rlp"
2527
)
2628

27-
const freezerVersion = 1 // The initial version tag of freezer table metadata
29+
const (
30+
freezerTableV1 = 1 // Initial version of metadata struct
31+
freezerTableV2 = 2 // Add field: 'flushOffset'
32+
freezerVersion = freezerTableV2 // The current used version
33+
)
2834

29-
// freezerTableMeta wraps all the metadata of the freezer table.
35+
// freezerTableMeta is a collection of additional properties that describe the
36+
// freezer table. These properties are designed with error resilience, allowing
37+
// them to be automatically corrected after an error occurs without significantly
38+
// impacting overall correctness.
3039
type freezerTableMeta struct {
31-
// Version is the versioning descriptor of the freezer table.
32-
Version uint16
40+
file *os.File // file handler of metadata
41+
version uint16 // version descriptor of the freezer table
3342

34-
// VirtualTail indicates how many items have been marked as deleted.
35-
// Its value is equal to the number of items removed from the table
36-
// plus the number of items hidden in the table, so it should never
37-
// be lower than the "actual tail".
38-
VirtualTail uint64
39-
}
43+
// virtualTail represents the number of items marked as deleted. It is
44+
// calculated as the sum of items removed from the table and the items
45+
// hidden within the table, and should never be less than the "actual
46+
// tail".
47+
//
48+
// If lost due to a crash or other reasons, it will be reset to the number
49+
// of items deleted from the table, causing the previously hidden items
50+
// to become visible, which is an acceptable consequence.
51+
virtualTail uint64
4052

41-
// newMetadata initializes the metadata object with the given virtual tail.
42-
func newMetadata(tail uint64) *freezerTableMeta {
43-
return &freezerTableMeta{
44-
Version: freezerVersion,
45-
VirtualTail: tail,
46-
}
53+
// flushOffset represents the offset in the index file up to which the index
54+
// items along with the corresponding data items in data files has been flushed
55+
// (fsync’d) to disk. Beyond this offset, data integrity is not guaranteed,
56+
// the extra index items along with the associated data items should be removed
57+
// during the startup.
58+
//
59+
// The principle is that all data items above the flush offset are considered
60+
// volatile and should be recoverable if they are discarded after the unclean
61+
// shutdown. If data integrity is required, manually force a sync of the
62+
// freezer before proceeding with further operations (e.g. do freezer.Sync()
63+
// first and then write data to key value store in some circumstances).
64+
//
65+
// The offset could be moved forward by applying sync operation, or be moved
66+
// backward in cases of head/tail truncation, etc.
67+
flushOffset int64
4768
}
4869

49-
// readMetadata reads the metadata of the freezer table from the
50-
// given metadata file.
51-
func readMetadata(file *os.File) (*freezerTableMeta, error) {
70+
// decodeV1 attempts to decode the metadata structure in v1 format. If fails or
71+
// the result is incompatible, nil is returned.
72+
func decodeV1(file *os.File) *freezerTableMeta {
5273
_, err := file.Seek(0, io.SeekStart)
5374
if err != nil {
54-
return nil, err
75+
return nil
5576
}
56-
var meta freezerTableMeta
57-
if err := rlp.Decode(file, &meta); err != nil {
58-
return nil, err
77+
type obj struct {
78+
Version uint16
79+
Tail uint64
80+
}
81+
var o obj
82+
if err := rlp.Decode(file, &o); err != nil {
83+
return nil
84+
}
85+
if o.Version != freezerTableV1 {
86+
return nil
87+
}
88+
return &freezerTableMeta{
89+
file: file,
90+
version: o.Version,
91+
virtualTail: o.Tail,
5992
}
60-
return &meta, nil
6193
}
6294

63-
// writeMetadata writes the metadata of the freezer table into the
64-
// given metadata file.
65-
func writeMetadata(file *os.File, meta *freezerTableMeta) error {
95+
// decodeV2 attempts to decode the metadata structure in v2 format. If fails or
96+
// the result is incompatible, nil is returned.
97+
func decodeV2(file *os.File) *freezerTableMeta {
6698
_, err := file.Seek(0, io.SeekStart)
6799
if err != nil {
68-
return err
100+
return nil
101+
}
102+
type obj struct {
103+
Version uint16
104+
Tail uint64
105+
Offset uint64
106+
}
107+
var o obj
108+
if err := rlp.Decode(file, &o); err != nil {
109+
return nil
110+
}
111+
if o.Version != freezerTableV2 {
112+
return nil
113+
}
114+
if o.Offset > math.MaxInt64 {
115+
log.Error("Invalid flushOffset %d in freezer metadata", o.Offset, "file", file.Name())
116+
return nil
117+
}
118+
return &freezerTableMeta{
119+
file: file,
120+
version: freezerTableV2,
121+
virtualTail: o.Tail,
122+
flushOffset: int64(o.Offset),
69123
}
70-
return rlp.Encode(file, meta)
71124
}
72125

73-
// loadMetadata loads the metadata from the given metadata file.
74-
// Initializes the metadata file with the given "actual tail" if
75-
// it's empty.
76-
func loadMetadata(file *os.File, tail uint64) (*freezerTableMeta, error) {
126+
// newMetadata initializes the metadata object, either by loading it from the file
127+
// or by constructing a new one from scratch.
128+
func newMetadata(file *os.File) (*freezerTableMeta, error) {
77129
stat, err := file.Stat()
78130
if err != nil {
79131
return nil, err
80132
}
81-
// Write the metadata with the given actual tail into metadata file
82-
// if it's non-existent. There are two possible scenarios here:
83-
// - the freezer table is empty
84-
// - the freezer table is legacy
85-
// In both cases, write the meta into the file with the actual tail
86-
// as the virtual tail.
87133
if stat.Size() == 0 {
88-
m := newMetadata(tail)
89-
if err := writeMetadata(file, m); err != nil {
134+
m := &freezerTableMeta{
135+
file: file,
136+
version: freezerTableV2,
137+
virtualTail: 0,
138+
flushOffset: 0,
139+
}
140+
if err := m.write(true); err != nil {
90141
return nil, err
91142
}
92143
return m, nil
93144
}
94-
m, err := readMetadata(file)
145+
if m := decodeV2(file); m != nil {
146+
return m, nil
147+
}
148+
if m := decodeV1(file); m != nil {
149+
return m, nil // legacy metadata
150+
}
151+
return nil, errors.New("failed to decode metadata")
152+
}
153+
154+
// setVirtualTail sets the virtual tail and flushes the metadata if sync is true.
155+
func (m *freezerTableMeta) setVirtualTail(tail uint64, sync bool) error {
156+
m.virtualTail = tail
157+
return m.write(sync)
158+
}
159+
160+
// setFlushOffset sets the flush offset and flushes the metadata if sync is true.
161+
func (m *freezerTableMeta) setFlushOffset(offset int64, sync bool) error {
162+
m.flushOffset = offset
163+
return m.write(sync)
164+
}
165+
166+
// write flushes the content of metadata into file and performs a fsync if required.
167+
func (m *freezerTableMeta) write(sync bool) error {
168+
type obj struct {
169+
Version uint16
170+
Tail uint64
171+
Offset uint64
172+
}
173+
var o obj
174+
o.Version = freezerVersion // forcibly use the current version
175+
o.Tail = m.virtualTail
176+
o.Offset = uint64(m.flushOffset)
177+
178+
_, err := m.file.Seek(0, io.SeekStart)
95179
if err != nil {
96-
return nil, err
180+
return err
97181
}
98-
// Update the virtual tail with the given actual tail if it's even
99-
// lower than it. Theoretically it shouldn't happen at all, print
100-
// a warning here.
101-
if m.VirtualTail < tail {
102-
log.Warn("Updated virtual tail", "have", m.VirtualTail, "now", tail)
103-
m.VirtualTail = tail
104-
if err := writeMetadata(file, m); err != nil {
105-
return nil, err
106-
}
182+
if err := rlp.Encode(m.file, &o); err != nil {
183+
return err
184+
}
185+
if !sync {
186+
return nil
107187
}
108-
return m, nil
188+
return m.file.Sync()
109189
}

0 commit comments

Comments
 (0)