@@ -45,6 +45,10 @@ const (
45
45
)
46
46
47
47
const (
48
+ // MaxBackupLTXFileN is the number of LTX files that can be compacted
49
+ // together at a time when sending data to the backup service.
50
+ MaxBackupLTXFileN = 256
51
+
48
52
MetricsMonitorInterval = 1 * time .Second
49
53
)
50
54
@@ -1120,9 +1124,9 @@ func (s *Store) streamBackupDB(ctx context.Context, name string, remotePos ltx.P
1120
1124
1121
1125
// Check local replication position.
1122
1126
// If we haven't written anything yet then try to send data.
1123
- pos := db .Pos ()
1124
- if pos .IsZero () {
1125
- return pos , nil
1127
+ localPos := db .Pos ()
1128
+ if localPos .IsZero () {
1129
+ return localPos , nil
1126
1130
}
1127
1131
1128
1132
// If the database doesn't exist remotely, perform a full snapshot.
@@ -1132,11 +1136,11 @@ func (s *Store) streamBackupDB(ctx context.Context, name string, remotePos ltx.P
1132
1136
1133
1137
// If the position from the backup server is ahead of the primary then we
1134
1138
// need to perform a recovery so that we snapshot from the backup server.
1135
- if remotePos .TXID > pos .TXID {
1139
+ if remotePos .TXID > localPos .TXID {
1136
1140
slog .Warn ("restoring from backup" ,
1137
1141
slog .String ("name" , name ),
1138
1142
slog .Group ("pos" ,
1139
- slog .String ("local" , pos .String ()),
1143
+ slog .String ("local" , localPos .String ()),
1140
1144
slog .String ("remote" , remotePos .String ()),
1141
1145
),
1142
1146
slog .String ("reason" , "remote-ahead" ),
@@ -1148,12 +1152,12 @@ func (s *Store) streamBackupDB(ctx context.Context, name string, remotePos ltx.P
1148
1152
// If the TXID matches the backup server, we need to ensure the checksum
1149
1153
// does as well. If it doesn't, we need to grab a snapshot from the backup
1150
1154
// server. If it does, then we can exit as we're already in sync.
1151
- if remotePos .TXID == pos .TXID {
1152
- if remotePos .PostApplyChecksum != pos .PostApplyChecksum {
1155
+ if remotePos .TXID == localPos .TXID {
1156
+ if remotePos .PostApplyChecksum != localPos .PostApplyChecksum {
1153
1157
slog .Warn ("restoring from backup" ,
1154
1158
slog .String ("name" , name ),
1155
1159
slog .Group ("pos" ,
1156
- slog .String ("local" , pos .String ()),
1160
+ slog .String ("local" , localPos .String ()),
1157
1161
slog .String ("remote" , remotePos .String ()),
1158
1162
),
1159
1163
slog .String ("reason" , "chksum-mismatch" ),
@@ -1162,10 +1166,10 @@ func (s *Store) streamBackupDB(ctx context.Context, name string, remotePos ltx.P
1162
1166
}
1163
1167
1164
1168
slog .Debug ("database in sync with backup, skipping" , slog .String ("name" , name ))
1165
- return pos , nil // already in sync
1169
+ return localPos , nil // already in sync
1166
1170
}
1167
1171
1168
- assert (remotePos .TXID < pos .TXID , "remote/local position must be ordered" )
1172
+ assert (remotePos .TXID < localPos .TXID , "remote/local position must be ordered" )
1169
1173
1170
1174
// OPTIMIZE: Check that remote postApplyChecksum equals next TXID's preApplyChecksum
1171
1175
@@ -1176,13 +1180,14 @@ func (s *Store) streamBackupDB(ctx context.Context, name string, remotePos ltx.P
1176
1180
_ = r .(io.Closer ).Close ()
1177
1181
}
1178
1182
}()
1179
- for txID := remotePos .TXID + 1 ; txID <= pos .TXID ; txID ++ {
1183
+
1184
+ for txID , n := remotePos .TXID + 1 , 0 ; txID <= localPos .TXID && n < MaxBackupLTXFileN ; txID , n = txID + 1 , n + 1 {
1180
1185
f , err := db .OpenLTXFile (txID )
1181
1186
if os .IsNotExist (err ) {
1182
1187
slog .Warn ("restoring from backup" ,
1183
1188
slog .String ("name" , name ),
1184
1189
slog .Group ("pos" ,
1185
- slog .String ("local" , pos .String ()),
1190
+ slog .String ("local" , localPos .String ()),
1186
1191
slog .String ("remote" , remotePos .String ()),
1187
1192
),
1188
1193
slog .String ("txid" , txID .String ()),
@@ -1197,10 +1202,17 @@ func (s *Store) streamBackupDB(ctx context.Context, name string, remotePos ltx.P
1197
1202
1198
1203
// Compact LTX files through a pipe so we can pass it to the backup client.
1199
1204
pr , pw := io .Pipe ()
1205
+ var pos ltx.Pos
1200
1206
go func () {
1201
1207
compactor := ltx .NewCompactor (pw , rdrs )
1202
1208
compactor .HeaderFlags = s .ltxHeaderFlags ()
1203
- _ = pw .CloseWithError (compactor .Compact (ctx ))
1209
+ if err := compactor .Compact (ctx ); err != nil {
1210
+ _ = pw .CloseWithError (err )
1211
+ return
1212
+ }
1213
+
1214
+ pos = ltx .NewPos (compactor .Header ().MaxTXID , compactor .Trailer ().PostApplyChecksum )
1215
+ _ = pw .Close ()
1204
1216
}()
1205
1217
1206
1218
var pmErr * ltx.PosMismatchError
@@ -1209,7 +1221,7 @@ func (s *Store) streamBackupDB(ctx context.Context, name string, remotePos ltx.P
1209
1221
slog .Warn ("restoring from backup" ,
1210
1222
slog .String ("name" , name ),
1211
1223
slog .Group ("pos" ,
1212
- slog .String ("local" , pos .String ()),
1224
+ slog .String ("local" , localPos .String ()),
1213
1225
slog .String ("remote" , pmErr .Pos .String ()),
1214
1226
),
1215
1227
slog .String ("reason" , "out-of-sync" ),
0 commit comments