Skip to content

Commit d7ce2f2

Browse files
authored
br: wait tiflash replicas ready && fix unstable test (#46301) (#46342)
close #46302
1 parent 7241e78 commit d7ce2f2

File tree

3 files changed

+38
-10
lines changed

3 files changed

+38
-10
lines changed

br/pkg/restore/client.go

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1529,11 +1529,28 @@ func (rc *Client) GoWaitTiFlashReady(ctx context.Context, inCh <-chan *CreatedTa
15291529
zap.Stringer("table", tbl.OldTable.Info.Name),
15301530
zap.Stringer("db", tbl.OldTable.DB.Name))
15311531
for {
1532-
progress, err := infosync.CalculateTiFlashProgress(tbl.Table.ID, tbl.Table.TiFlashReplica.Count, tiFlashStores)
1533-
if err != nil {
1534-
log.Warn("failed to get tiflash replica progress, wait for next retry", zap.Error(err))
1535-
time.Sleep(time.Second)
1536-
continue
1532+
var progress float64
1533+
if pi := tbl.Table.GetPartitionInfo(); pi != nil && len(pi.Definitions) > 0 {
1534+
for _, p := range pi.Definitions {
1535+
progressOfPartition, err := infosync.MustGetTiFlashProgress(p.ID, tbl.Table.TiFlashReplica.Count, &tiFlashStores)
1536+
if err != nil {
1537+
log.Warn("failed to get progress for tiflash partition replica, retry it",
1538+
zap.Int64("tableID", tbl.Table.ID), zap.Int64("partitionID", p.ID), zap.Error(err))
1539+
time.Sleep(time.Second)
1540+
continue
1541+
}
1542+
progress += progressOfPartition
1543+
}
1544+
progress = progress / float64(len(pi.Definitions))
1545+
} else {
1546+
var err error
1547+
progress, err = infosync.MustGetTiFlashProgress(tbl.Table.ID, tbl.Table.TiFlashReplica.Count, &tiFlashStores)
1548+
if err != nil {
1549+
log.Warn("failed to get progress for tiflash replica, retry it",
1550+
zap.Int64("tableID", tbl.Table.ID), zap.Error(err))
1551+
time.Sleep(time.Second)
1552+
continue
1553+
}
15371554
}
15381555
// check until progress is 1
15391556
if progress == 1 {

br/pkg/task/restore.go

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -191,11 +191,7 @@ type RestoreConfig struct {
191191
PitrBatchSize uint32 `json:"pitr-batch-size" toml:"pitr-batch-size"`
192192
PitrConcurrency uint32 `json:"-" toml:"-"`
193193

194-
UseCheckpoint bool `json:"use-checkpoint" toml:"use-checkpoint"`
195-
checkpointSnapshotRestoreTaskName string `json:"-" toml:"-"`
196-
checkpointLogRestoreTaskName string `json:"-" toml:"-"`
197-
checkpointTaskInfoClusterID uint64 `json:"-" toml:"-"`
198-
WaitTiflashReady bool `json:"wait-tiflash-ready" toml:"wait-tiflash-ready"`
194+
WaitTiflashReady bool `json:"wait-tiflash-ready" toml:"wait-tiflash-ready"`
199195

200196
// for ebs-based restore
201197
FullBackupType FullBackupType `json:"full-backup-type" toml:"full-backup-type"`

br/tests/br_tiflash/run.sh

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,16 +23,29 @@ run_sql "CREATE DATABASE $DB"
2323

2424
run_sql "CREATE TABLE $DB.kv(k varchar(256) primary key, v int)"
2525

26+
run_sql "CREATE TABLE $DB.partition_kv(\
27+
k INT, \
28+
v INT, \
29+
PRIMARY KEY(k) CLUSTERED \
30+
) PARTITION BY RANGE(k) (\
31+
PARTITION p0 VALUES LESS THAN (200), \
32+
PARTITION p1 VALUES LESS THAN (400), \
33+
PARTITION p2 VALUES LESS THAN MAXVALUE)"
34+
2635
stmt="INSERT INTO $DB.kv(k, v) VALUES ('1-record', 1)"
36+
parition_stmt="INSERT INTO $DB.partition_kv(k, v) VALUES (1, 1)"
2737
for i in $(seq 2 $RECORD_COUNT); do
2838
stmt="$stmt,('$i-record', $i)"
39+
parition_stmt="$parition_stmt,($i, $i)"
2940
done
3041
run_sql "$stmt"
42+
run_sql "$parition_stmt"
3143

3244
if ! run_sql "ALTER TABLE $DB.kv SET TIFLASH REPLICA 1"; then
3345
# 10s should be enough for tiflash-proxy get started
3446
sleep 10
3547
run_sql "ALTER TABLE $DB.kv SET TIFLASH REPLICA 1"
48+
run_sql "ALTER TABLE $DB.partition_kv SET TIFLASH REPLICA 1"
3649
fi
3750

3851

@@ -54,6 +67,8 @@ run_sql "DROP DATABASE $DB"
5467
run_br restore full -s "local://$TEST_DIR/$DB" --pd $PD_ADDR --wait-tiflash-ready=true
5568

5669
# check TiFlash sync
70+
echo "wait 3 seconds for tiflash tick puller triggered"
71+
sleep 3
5772
if ! [ $(run_sql "select * from information_schema.tiflash_replica" | grep "PROGRESS" | sed "s/[^0-9]//g") -eq 1 ]; then
5873
echo "restore didn't wait tiflash synced after set --wait-tiflash-ready=true."
5974
exit 1

0 commit comments

Comments
 (0)