x86 BPF JIT patch

优化的地方如下:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 5159c7a229229..7c130001fbfe7 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1234,13 +1234,11 @@ bool ex_handler_bpf(const struct exception_table_entry *x, struct pt_regs *regs)
 }

 static void detect_reg_usage(struct bpf_insn *insn, int insn_cnt,
-                bool *regs_used, bool *tail_call_seen)
+                bool *regs_used)
 {
     int i;

     for (i = 1; i <= insn_cnt; i++, insn++) {
-       if (insn->code == (BPF_JMP | BPF_TAIL_CALL))
-           *tail_call_seen = true;
         if (insn->dst_reg == BPF_REG_6 || insn->src_reg == BPF_REG_6)
             regs_used[0] = true;
         if (insn->dst_reg == BPF_REG_7 || insn->src_reg == BPF_REG_7)
@@ -1324,7 +1322,6 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
     struct bpf_insn *insn = bpf_prog->insnsi;
     bool callee_regs_used[4] = {};
     int insn_cnt = bpf_prog->len;
-   bool tail_call_seen = false;
     bool seen_exit = false;
     u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY];
     u64 arena_vm_start, user_vm_start;
@@ -1336,11 +1333,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
     arena_vm_start = bpf_arena_get_kern_vm_start(bpf_prog->aux->arena);
     user_vm_start = bpf_arena_get_user_vm_start(bpf_prog->aux->arena);

-   detect_reg_usage(insn, insn_cnt, callee_regs_used,
-            &tail_call_seen);
-
-   /* tail call's presence in current prog implies it is reachable */
-   tail_call_reachable |= tail_call_seen;
+   detect_reg_usage(insn, insn_cnt, callee_regs_used);

     emit_prologue(&prog, bpf_prog->aux->stack_depth,
               bpf_prog_was_classic(bpf_prog), tail_call_reachable,

在 x86 BPF JIT 中,detect_reg_usage() 函数中不再需要检查 tail_call_seen 了,因为在 verifier 里已能够正确地提供 tail_call_reachable 信息了:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 81a3d2ced78d5..d7045676246a7 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2982,8 +2982,10 @@ static int check_subprogs(struct bpf_verifier_env *env)

         if (code == (BPF_JMP | BPF_CALL) &&
             insn[i].src_reg == 0 &&
-           insn[i].imm == BPF_FUNC_tail_call)
+           insn[i].imm == BPF_FUNC_tail_call) {
             subprog[cur_subprog].has_tail_call = true;
+           subprog[cur_subprog].tail_call_reachable = true;
+       }
         if (BPF_CLASS(code) == BPF_LD &&
             (BPF_MODE(code) == BPF_ABS || BPF_MODE(code) == BPF_IND))
             subprog[cur_subprog].has_ld_abs = true;

为什么如此修改便能优化掉 detect_reg_usage() 中的 tail_call_seen 了呢?

因为 verifier 在进行 JIT 前,是如此给 JIT 提供 tail_call_reachable 信息:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
// https://github.com/torvalds/linux/blob/c3f2d783a459980eafd24c5af94ccd56a615961f/kernel/bpf/verifier.c#L19365
static int jit_subprogs(struct bpf_verifier_env *env)
{
    // ...

    for (i = 0; i < env->subprog_cnt; i++) {
        // ...
        func[i]->aux->tail_call_reachable = env->subprog_info[i].tail_call_reachable;
        // ...
        func[i] = bpf_int_jit_compile(func[i]);
        if (!func[i]->jited) {
            err = -ENOTSUPP;
            goto out_free;
        }
        cond_resched();
    }

    // ...
}