diff --git a/statistics/handle/bootstrap.go b/statistics/handle/bootstrap.go index 13309ac6419f2..09fa2a93df879 100644 --- a/statistics/handle/bootstrap.go +++ b/statistics/handle/bootstrap.go @@ -275,6 +275,12 @@ func (h *Handle) initStatsFMSketch(cache *statsCache) error { } func (h *Handle) initStatsBuckets4Chunk(cache *statsCache, iter *chunk.Iterator4Chunk) { + unspecifiedLengthTp := types.NewFieldType(mysql.TypeBlob) + var ( + hasErr bool + failedTableID int64 + failedHistID int64 + ) for row := iter.Begin(); row != iter.End(); row = iter.Next() { tableID, isIndex, histID := row.GetInt64(0), row.GetInt64(1), row.GetInt64(2) table, ok := cache.Get(tableID) @@ -304,22 +310,44 @@ func (h *Handle) initStatsBuckets4Chunk(cache *statsCache, iter *chunk.Iterator4 // TODO: do the correct time zone conversion for timestamp-type columns' upper/lower bounds. sc := &stmtctx.StatementContext{TimeZone: time.UTC, AllowInvalidDate: true, IgnoreZeroInDate: true} var err error - lower, err = d.ConvertTo(sc, &column.Info.FieldType) + if column.Info.FieldType.EvalType() == types.ETString && column.Info.FieldType.GetType() != mysql.TypeEnum && column.Info.FieldType.GetType() != mysql.TypeSet { + // For new collation data, when storing the bounds of the histogram, we store the collate key instead of the + // original value. + // But there's additional conversion logic for new collation data, and the collate key might be longer than + // the FieldType.flen. + // If we use the original FieldType here, there might be errors like "Invalid utf8mb4 character string" + // or "Data too long". + // So we change it to TypeBlob to bypass those logics here. + lower, err = d.ConvertTo(sc, unspecifiedLengthTp) + } else { + lower, err = d.ConvertTo(sc, &column.Info.FieldType) + } if err != nil { - logutil.BgLogger().Debug("decode bucket lower bound failed", zap.Error(err)) + hasErr = true + failedTableID = tableID + failedHistID = histID delete(table.Columns, histID) continue } d = types.NewBytesDatum(row.GetBytes(6)) - upper, err = d.ConvertTo(sc, &column.Info.FieldType) + if column.Info.FieldType.EvalType() == types.ETString && column.Info.FieldType.GetType() != mysql.TypeEnum && column.Info.FieldType.GetType() != mysql.TypeSet { + upper, err = d.ConvertTo(sc, unspecifiedLengthTp) + } else { + upper, err = d.ConvertTo(sc, &column.Info.FieldType) + } if err != nil { - logutil.BgLogger().Debug("decode bucket upper bound failed", zap.Error(err)) + hasErr = true + failedTableID = tableID + failedHistID = histID delete(table.Columns, histID) continue } } hist.AppendBucketWithNDV(&lower, &upper, row.GetInt64(3), row.GetInt64(4), row.GetInt64(7)) } + if hasErr { + logutil.BgLogger().Error("failed to convert datum for at least one histogram bucket", zap.Int64("table ID", failedTableID), zap.Int64("column ID", failedHistID)) + } } func (h *Handle) initStatsBuckets(cache *statsCache) error {