Skip to content

Commit 9b1f298

Browse files
Moshe ShemeshSaeed Mahameed
authored andcommitted
net/mlx5: Add support for FW fatal reporter dump
Add support of dump callback for mlx5 FW fatal reporter. The FW fatal dump uses cr-dump functionality to gather cr-space data for debug. The cr-dump uses vsc interface which is valid even if the FW command interface is not functional, which is the case in most FW fatal errors. Command example and output: $ devlink health dump show pci/0000:82:00.0 reporter fw_fatal crdump_data: 00 20 00 01 00 00 00 00 03 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 80 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ba 82 00 00 0c 00 00 00 00 00 00 00 00 00 00 00 00 00 00 20 00 00 00 00 00 00 00 00 00 00 00 00 00 00 fa 00 a4 0e 00 00 00 00 00 00 80 c7 fe ff 50 0a 00 00 ... ... Signed-off-by: Moshe Shemesh <moshe@mellanox.com> Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
1 parent 96c82cd commit 9b1f298

File tree

1 file changed

+50
-0
lines changed
  • drivers/net/ethernet/mellanox/mlx5/core

1 file changed

+50
-0
lines changed

drivers/net/ethernet/mellanox/mlx5/core/health.c

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -565,9 +565,59 @@ mlx5_fw_fatal_reporter_recover(struct devlink_health_reporter *reporter,
565565
return mlx5_health_try_recover(dev);
566566
}
567567

568+
#define MLX5_CR_DUMP_CHUNK_SIZE 256
569+
static int
570+
mlx5_fw_fatal_reporter_dump(struct devlink_health_reporter *reporter,
571+
struct devlink_fmsg *fmsg, void *priv_ctx)
572+
{
573+
struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter);
574+
u32 crdump_size = dev->priv.health.crdump_size;
575+
u32 *cr_data;
576+
u32 data_size;
577+
u32 offset;
578+
int err;
579+
580+
if (!mlx5_core_is_pf(dev))
581+
return -EPERM;
582+
583+
cr_data = kvmalloc(crdump_size, GFP_KERNEL);
584+
if (!cr_data)
585+
return -ENOMEM;
586+
err = mlx5_crdump_collect(dev, cr_data);
587+
if (err)
588+
return err;
589+
590+
if (priv_ctx) {
591+
struct mlx5_fw_reporter_ctx *fw_reporter_ctx = priv_ctx;
592+
593+
err = mlx5_fw_reporter_ctx_pairs_put(fmsg, fw_reporter_ctx);
594+
if (err)
595+
goto free_data;
596+
}
597+
598+
err = devlink_fmsg_arr_pair_nest_start(fmsg, "crdump_data");
599+
if (err)
600+
goto free_data;
601+
for (offset = 0; offset < crdump_size; offset += data_size) {
602+
if (crdump_size - offset < MLX5_CR_DUMP_CHUNK_SIZE)
603+
data_size = crdump_size - offset;
604+
else
605+
data_size = MLX5_CR_DUMP_CHUNK_SIZE;
606+
err = devlink_fmsg_binary_put(fmsg, cr_data, data_size);
607+
if (err)
608+
goto free_data;
609+
}
610+
err = devlink_fmsg_arr_pair_nest_end(fmsg);
611+
612+
free_data:
613+
kfree(cr_data);
614+
return err;
615+
}
616+
568617
static const struct devlink_health_reporter_ops mlx5_fw_fatal_reporter_ops = {
569618
.name = "fw_fatal",
570619
.recover = mlx5_fw_fatal_reporter_recover,
620+
.dump = mlx5_fw_fatal_reporter_dump,
571621
};
572622

573623
#define MLX5_REPORTER_FW_GRACEFUL_PERIOD 1200000

0 commit comments

Comments
 (0)