Skip to content

Commit 2fb976a

Browse files
Xu KuohaiNobody
Xu Kuohai
authored and
Nobody
committed
bpf, arm64: adjust the offset of str/ldr(immediate) to positive number
The BPF STX/LDX instruction uses offset relative to the FP to address stack space. Since the BPF_FP locates at the top of the frame, the offset is usually a negative number. However, arm64 str/ldr immediate instruction requires that offset be a positive number. Therefore, this patch tries to convert the offsets. The method is to find the negative offset furthest from the FP firstly. Then add it to the FP, calculate a bottom position, called FPB, and then adjust the offsets in other STR/LDX instructions relative to FPB. FPB is saved using the callee-saved register x27 of arm64 which is not used yet. Before adjusting the offset, the patch checks every instruction to ensure that the FP does not change in run-time. If the FP may change, no offset is adjusted. For example, for the following bpftrace command: bpftrace -e 'kprobe:do_sys_open { printf("opening: %s\n", str(arg1)); }' Without this patch, jited code(fragment): 0: bti c 4: stp x29, x30, [sp, #-16]! 8: mov x29, sp c: stp x19, x20, [sp, #-16]! 10: stp x21, x22, [sp, #-16]! 14: stp x25, x26, [sp, #-16]! 18: mov x25, sp 1c: mov x26, #0x0 // #0 20: bti j 24: sub sp, sp, #0x90 28: add x19, x0, #0x0 2c: mov x0, #0x0 // #0 30: mov x10, #0xffffffffffffff78 // #-136 34: str x0, [x25, x10] 38: mov x10, #0xffffffffffffff80 // #-128 3c: str x0, [x25, x10] 40: mov x10, #0xffffffffffffff88 // #-120 44: str x0, [x25, x10] 48: mov x10, #0xffffffffffffff90 // #-112 4c: str x0, [x25, x10] 50: mov x10, #0xffffffffffffff98 // #-104 54: str x0, [x25, x10] 58: mov x10, #0xffffffffffffffa0 // #-96 5c: str x0, [x25, x10] 60: mov x10, #0xffffffffffffffa8 // #-88 64: str x0, [x25, x10] 68: mov x10, #0xffffffffffffffb0 // #-80 6c: str x0, [x25, x10] 70: mov x10, #0xffffffffffffffb8 // #-72 74: str x0, [x25, x10] 78: mov x10, #0xffffffffffffffc0 // #-64 7c: str x0, [x25, x10] 80: mov x10, #0xffffffffffffffc8 // #-56 84: str x0, [x25, x10] 88: mov x10, #0xffffffffffffffd0 // #-48 8c: str x0, [x25, x10] 90: mov x10, #0xffffffffffffffd8 // #-40 94: str x0, [x25, x10] 98: mov x10, #0xffffffffffffffe0 // #-32 9c: str x0, [x25, x10] a0: mov x10, #0xffffffffffffffe8 // #-24 a4: str x0, [x25, x10] a8: mov x10, #0xfffffffffffffff0 // #-16 ac: str x0, [x25, x10] b0: mov x10, #0xfffffffffffffff8 // #-8 b4: str x0, [x25, x10] b8: mov x10, #0x8 // #8 bc: ldr x2, [x19, x10] [...] With this patch, jited code(fragment): 0: bti c 4: stp x29, x30, [sp, #-16]! 8: mov x29, sp c: stp x19, x20, [sp, #-16]! 10: stp x21, x22, [sp, #-16]! 14: stp x25, x26, [sp, #-16]! 18: stp x27, x28, [sp, #-16]! 1c: mov x25, sp 20: sub x27, x25, #0x88 24: mov x26, #0x0 // #0 28: bti j 2c: sub sp, sp, #0x90 30: add x19, x0, #0x0 34: mov x0, #0x0 // #0 38: str x0, [x27] 3c: str x0, [x27, #8] 40: str x0, [x27, #16] 44: str x0, [x27, #24] 48: str x0, [x27, #32] 4c: str x0, [x27, #40] 50: str x0, [x27, #48] 54: str x0, [x27, #56] 58: str x0, [x27, #64] 5c: str x0, [x27, #72] 60: str x0, [x27, #80] 64: str x0, [x27, #88] 68: str x0, [x27, #96] 6c: str x0, [x27, #104] 70: str x0, [x27, #112] 74: str x0, [x27, #120] 78: str x0, [x27, #128] 7c: ldr x2, [x19, #8] [...] Signed-off-by: Xu Kuohai <xukuohai@huawei.com>
1 parent 8f293dd commit 2fb976a

File tree

1 file changed

+79
-5
lines changed

1 file changed

+79
-5
lines changed

arch/arm64/net/bpf_jit_comp.c

+79-5
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#define TMP_REG_2 (MAX_BPF_JIT_REG + 1)
2727
#define TCALL_CNT (MAX_BPF_JIT_REG + 2)
2828
#define TMP_REG_3 (MAX_BPF_JIT_REG + 3)
29+
#define FP_BOTTOM (MAX_BPF_JIT_REG + 4)
2930

3031
#define check_imm(bits, imm) do { \
3132
if ((((imm) > 0) && ((imm) >> (bits))) || \
@@ -63,6 +64,7 @@ static const int bpf2a64[] = {
6364
[TCALL_CNT] = A64_R(26),
6465
/* temporary register for blinding constants */
6566
[BPF_REG_AX] = A64_R(9),
67+
[FP_BOTTOM] = A64_R(27),
6668
};
6769

6870
struct jit_ctx {
@@ -73,6 +75,7 @@ struct jit_ctx {
7375
int exentry_idx;
7476
__le32 *image;
7577
u32 stack_size;
78+
int fpb_offset;
7679
};
7780

7881
static inline void emit(const u32 insn, struct jit_ctx *ctx)
@@ -234,9 +237,9 @@ static noinline bool is_lsi_offset(s16 offset, int scale)
234237

235238
/* Tail call offset to jump into */
236239
#if IS_ENABLED(CONFIG_ARM64_BTI_KERNEL)
237-
#define PROLOGUE_OFFSET 8
240+
#define PROLOGUE_OFFSET 10
238241
#else
239-
#define PROLOGUE_OFFSET 7
242+
#define PROLOGUE_OFFSET 9
240243
#endif
241244

242245
static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf)
@@ -248,6 +251,7 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf)
248251
const u8 r9 = bpf2a64[BPF_REG_9];
249252
const u8 fp = bpf2a64[BPF_REG_FP];
250253
const u8 tcc = bpf2a64[TCALL_CNT];
254+
const u8 fpb = bpf2a64[FP_BOTTOM];
251255
const int idx0 = ctx->idx;
252256
int cur_offset;
253257

@@ -286,9 +290,11 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf)
286290
emit(A64_PUSH(r6, r7, A64_SP), ctx);
287291
emit(A64_PUSH(r8, r9, A64_SP), ctx);
288292
emit(A64_PUSH(fp, tcc, A64_SP), ctx);
293+
emit(A64_PUSH(fpb, A64_R(28), A64_SP), ctx);
289294

290295
/* Set up BPF prog stack base register */
291296
emit(A64_MOV(1, fp, A64_SP), ctx);
297+
emit(A64_SUB_I(1, fpb, fp, ctx->fpb_offset), ctx);
292298

293299
if (!ebpf_from_cbpf) {
294300
/* Initialize tail_call_cnt */
@@ -553,11 +559,13 @@ static void build_epilogue(struct jit_ctx *ctx)
553559
const u8 r8 = bpf2a64[BPF_REG_8];
554560
const u8 r9 = bpf2a64[BPF_REG_9];
555561
const u8 fp = bpf2a64[BPF_REG_FP];
562+
const u8 fpb = bpf2a64[FP_BOTTOM];
556563

557564
/* We're done with BPF stack */
558565
emit(A64_ADD_I(1, A64_SP, A64_SP, ctx->stack_size), ctx);
559566

560567
/* Restore fs (x25) and x26 */
568+
emit(A64_POP(fpb, A64_R(28), A64_SP), ctx);
561569
emit(A64_POP(fp, A64_R(26), A64_SP), ctx);
562570

563571
/* Restore callee-saved register */
@@ -645,12 +653,14 @@ static int add_exception_handler(const struct bpf_insn *insn,
645653
static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
646654
bool extra_pass)
647655
{
656+
u8 dst = bpf2a64[insn->dst_reg];
657+
u8 src = bpf2a64[insn->src_reg];
658+
s16 off = insn->off;
659+
const u8 fp = bpf2a64[BPF_REG_FP];
648660
const u8 code = insn->code;
649-
const u8 dst = bpf2a64[insn->dst_reg];
650-
const u8 src = bpf2a64[insn->src_reg];
651661
const u8 tmp = bpf2a64[TMP_REG_1];
652662
const u8 tmp2 = bpf2a64[TMP_REG_2];
653-
const s16 off = insn->off;
663+
const u8 fpb = bpf2a64[FP_BOTTOM];
654664
const s32 imm = insn->imm;
655665
const int i = insn - ctx->prog->insnsi;
656666
const bool is64 = BPF_CLASS(code) == BPF_ALU64 ||
@@ -1012,6 +1022,10 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
10121022
case BPF_LDX | BPF_PROBE_MEM | BPF_W:
10131023
case BPF_LDX | BPF_PROBE_MEM | BPF_H:
10141024
case BPF_LDX | BPF_PROBE_MEM | BPF_B:
1025+
if (ctx->fpb_offset > 0 && src == fp) {
1026+
src = fpb;
1027+
off += ctx->fpb_offset;
1028+
}
10151029
switch (BPF_SIZE(code)) {
10161030
case BPF_W:
10171031
if (is_lsi_offset(off, 2)) {
@@ -1070,6 +1084,10 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
10701084
case BPF_ST | BPF_MEM | BPF_H:
10711085
case BPF_ST | BPF_MEM | BPF_B:
10721086
case BPF_ST | BPF_MEM | BPF_DW:
1087+
if (ctx->fpb_offset > 0 && dst == fp) {
1088+
dst = fpb;
1089+
off += ctx->fpb_offset;
1090+
}
10731091
/* Load imm to a register then store it */
10741092
emit_a64_mov_i(1, tmp, imm, ctx);
10751093
switch (BPF_SIZE(code)) {
@@ -1113,6 +1131,10 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
11131131
case BPF_STX | BPF_MEM | BPF_H:
11141132
case BPF_STX | BPF_MEM | BPF_B:
11151133
case BPF_STX | BPF_MEM | BPF_DW:
1134+
if (ctx->fpb_offset > 0 && dst == fp) {
1135+
dst = fpb;
1136+
off += ctx->fpb_offset;
1137+
}
11161138
switch (BPF_SIZE(code)) {
11171139
case BPF_W:
11181140
if (is_lsi_offset(off, 2)) {
@@ -1167,6 +1189,56 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
11671189
return 0;
11681190
}
11691191

1192+
/*
1193+
* Return 0 if FP may change at runtime, otherwise find the minimum negative
1194+
* offset to FP and converts it to positive number.
1195+
*/
1196+
static int find_fpb_offset(struct bpf_prog *prog)
1197+
{
1198+
int i;
1199+
int offset = 0;
1200+
1201+
for (i = 0; i < prog->len; i++) {
1202+
const struct bpf_insn *insn = &prog->insnsi[i];
1203+
1204+
switch (BPF_CLASS(insn->code)) {
1205+
case BPF_STX:
1206+
case BPF_ST:
1207+
if (BPF_MODE(insn->code) == BPF_ATOMIC) {
1208+
if ((insn->imm == BPF_XCHG ||
1209+
insn->imm == (BPF_ADD | BPF_FETCH) ||
1210+
insn->imm == (BPF_AND | BPF_FETCH) ||
1211+
insn->imm == (BPF_OR | BPF_FETCH) ||
1212+
insn->imm == (BPF_XOR | BPF_FETCH)) &&
1213+
insn->src_reg == BPF_REG_FP) {
1214+
return 0;
1215+
}
1216+
}
1217+
if (BPF_MODE(insn->code) == BPF_MEM &&
1218+
insn->dst_reg == BPF_REG_FP) {
1219+
if (insn->off < offset)
1220+
offset = insn->off;
1221+
}
1222+
break;
1223+
1224+
case BPF_JMP32:
1225+
case BPF_JMP:
1226+
break;
1227+
1228+
case BPF_ALU:
1229+
case BPF_ALU64:
1230+
case BPF_LDX:
1231+
case BPF_LD:
1232+
default:
1233+
if (insn->dst_reg == BPF_REG_FP)
1234+
return 0;
1235+
}
1236+
}
1237+
1238+
/* safely be converted to a positive 'int', since insn->off is 's16' */
1239+
return -offset;
1240+
}
1241+
11701242
static int build_body(struct jit_ctx *ctx, bool extra_pass)
11711243
{
11721244
const struct bpf_prog *prog = ctx->prog;
@@ -1288,6 +1360,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
12881360
goto out_off;
12891361
}
12901362

1363+
ctx.fpb_offset = find_fpb_offset(prog);
1364+
12911365
/*
12921366
* 1. Initial fake pass to compute ctx->idx and ctx->offset.
12931367
*

0 commit comments

Comments
 (0)