Skip to content

DLPX-77532 The iscsi estat script needs to be fixed so it reports dat… #69

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Oct 13, 2021
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 54 additions & 13 deletions bpf/estat/iscsi.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,24 @@
* SPDX-License-Identifier: GPL-2.0-or-later
*/

/*
* This tracer provides latency and througphput data for the iscsi target
* read and writes. The latency is measured from entering
* iscsit_process_scsi_cmd() to exiting iscsit_target_response(). The
* thread that enters iscsi_process_scsi_cmd() will put an entry on the
* request task queue. This entry will be removed from the queue and
* processed by another thread which calls iscsi_target_response.
* The tracing is performed by three probe functions.
* 1. iscsi_target_start - This function saves a timestamp of the entry
* into iscsit_process_scsi_cmd() hashed by a pointer to the iscssi_cmd.
* 2. iscsi_target_response - This function serves the purpose of moving
* the timestamp saved by iscsi_target_start to a thread id based hash.
* Also the size and direction are stored in the hash since kretprobes
* do not have access to parameters.
* 3. iscsi_target_end - This function retrieves the hashed base data by
* thread id and performs the data aggregation.
*/

#include <uapi/linux/ptrace.h>
#include <linux/bpf_common.h>
#include <uapi/linux/bpf.h>
Expand All @@ -20,20 +38,20 @@
typedef struct {
u64 ts;
u64 size;
u32 direction;
} iscsi_data_t;


BPF_HASH(iscsi_base_data, u64, iscsi_data_t);
BPF_HASH(iscsi_start_ts, u64, u64);
BPF_HASH(iscsi_base_data, u32, iscsi_data_t);

// @@ kprobe|iscsit_process_scsi_cmd|iscsi_target_start
int
iscsi_target_start(struct pt_regs *ctx, struct iscsi_conn *conn,
struct iscsi_cmd *cmd, struct iscsi_scsi_req *hdr)
{
iscsi_data_t data = {};
data.ts = bpf_ktime_get_ns();
data.size = hdr->data_length;
iscsi_base_data.update((u64 *) &cmd, &data);
u64 ts = bpf_ktime_get_ns();
iscsi_start_ts.update((u64 *) &cmd, &ts);

return (0);
}
Expand All @@ -53,25 +71,48 @@ aggregate_data(iscsi_data_t *data, u64 ts, char *opstr)
return (0);
}

// @@ kprobe|iscsit_build_rsp_pdu|iscsi_target_end
// @@ kprobe|iscsit_build_datain_pdu|iscsi_target_end


// @@ kprobe|iscsit_response_queue|iscsi_target_response
int
iscsi_target_end(struct pt_regs *ctx, struct iscsi_cmd *cmd)
iscsi_target_response(struct pt_regs *ctx, struct iscsi_conn *conn, struct iscsi_cmd *cmd, int state)
{
u32 tid = bpf_get_current_pid_tgid();
iscsi_data_t data = {};

u64 *tsp = iscsi_start_ts.lookup((u64 *) &cmd);
if (tsp == 0) {
return (0); // missed issue
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Have you tried recording that metric to see if we were hitting this case after the initial few seconds?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I did the experiment @pzakha suggested and didn't see any misses after the startup period.

}

data.ts = *tsp;
data.size = cmd->se_cmd.data_length;
data.direction = cmd->data_direction;

iscsi_base_data.update(&tid, &data);
iscsi_start_ts.delete((u64 *) &cmd);

return (0);
}

// @@ kretprobe|iscsit_response_queue|iscsi_target_end
int
iscsi_target_end(struct pt_regs *ctx)
{
u64 ts = bpf_ktime_get_ns();
iscsi_data_t *data = iscsi_base_data.lookup((u64 *) &cmd);
u64 delta;
u32 tid = bpf_get_current_pid_tgid();
iscsi_data_t *data = iscsi_base_data.lookup(&tid);

if (data == 0) {
return (0); // missed issue
}

if (cmd->data_direction == DMA_FROM_DEVICE) {
if (data->direction == DMA_FROM_DEVICE) {
aggregate_data(data, ts, READ_STR);
} else if (cmd->data_direction & DMA_TO_DEVICE) {
} else if (data->direction == DMA_TO_DEVICE) {
aggregate_data(data, ts, WRITE_STR);
}
iscsi_base_data.delete((u64 *) &cmd);
iscsi_base_data.delete(&tid);

return (0);
}