通过 bpf: Support bpf program calling kernel function 学习 kfuncs 的实现。

不过,此 kfuncs 不是 bpftrace kfunc/kretfunc: Kernel Functions Tracing。bpftrace 的 kfunc 是的底层是 fentry/fexit

该内核文档不适合用来学习 kfuncs 的实现,更多地讲解 kfuncs 的规范、方便内核开发者实现自己的 kfuncs

不过,从该文档里学习到,只有 __bpf_kfunc 描述的内核函数才是 kfuncs,才能在 bpf 代码里直接调用。所以,当要了解有哪些 kfuncs 时,可以在内核源码里搜索 __bpf_kfunc

demo 示例

没有 demo,直接看看内核源代码里的 selftests 吧。

例子:${KERNEL}/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2021 Facebook */
#include "../bpf_testmod/bpf_testmod_kfunc.h"

extern const int bpf_prog_active __ksym;
int active_res = -1;
int sk_state_res = -1;

int __noinline f1(struct __sk_buff *skb)
{
    struct bpf_sock *sk = skb->sk;
    int *active;

    if (!sk)
        return -1;

    sk = bpf_sk_fullsock(sk);
    if (!sk)
        return -1;

    active = (int *)bpf_per_cpu_ptr(&bpf_prog_active,
                    bpf_get_smp_processor_id());
    if (active)
        active_res = *active;

    sk_state_res = bpf_kfunc_call_test3((struct sock *)sk)->__sk_common.skc_state;

    return (__u32)bpf_kfunc_call_test1((struct sock *)sk, 1, 2, 3, 4);
}

SEC("tc")
int kfunc_call_test1(struct __sk_buff *skb)
{
    return f1(skb);
}

char _license[] SEC("license") = "GPL";

编译阶段

不了解 clang,直接看下 kfunc_call_test_subprog.bpf.o 的反汇编:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# llvm-objdump -S kfunc_call_test_subprog.bpf.o

kfunc_call_test_subprog.bpf.o:  file format elf64-bpf

Disassembly of section .text:

0000000000000000 <f1>:
; {
       0:   b4 07 00 00 ff ff ff ff w7 = -1
;   struct bpf_sock *sk = skb->sk;
       1:   79 11 a8 00 00 00 00 00 r1 = *(u64 *)(r1 + 168)
;   if (!sk)
       2:   15 01 1a 00 00 00 00 00 if r1 == 0 goto +26 <LBB0_5>
;   sk = bpf_sk_fullsock(sk);
       3:   85 00 00 00 5f 00 00 00 call 95
       4:   bf 06 00 00 00 00 00 00 r6 = r0
;   if (!sk)
       5:   15 06 17 00 00 00 00 00 if r6 == 0 goto +23 <LBB0_5>
;                   bpf_get_smp_processor_id());
       6:   85 00 00 00 08 00 00 00 call 8
;   active = (int *)bpf_per_cpu_ptr(&bpf_prog_active,
       7:   18 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r1 = 0 ll
       9:   bc 02 00 00 00 00 00 00 w2 = w0
      10:   85 00 00 00 99 00 00 00 call 153
;   if (active)
      11:   15 00 04 00 00 00 00 00 if r0 == 0 goto +4 <LBB0_4>
;       active_res = *active;
      12:   61 01 00 00 00 00 00 00 r1 = *(u32 *)(r0 + 0)
      13:   18 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r2 = 0 ll
      15:   63 12 00 00 00 00 00 00 *(u32 *)(r2 + 0) = r1

0000000000000080 <LBB0_4>:
;   sk_state_res = bpf_kfunc_call_test3((struct sock *)sk)->__sk_common.skc_state;
      16:   bf 61 00 00 00 00 00 00 r1 = r6
      17:   85 10 00 00 ff ff ff ff call -1
      18:   71 01 12 00 00 00 00 00 r1 = *(u8 *)(r0 + 18)
      19:   18 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r2 = 0 ll
      21:   63 12 00 00 00 00 00 00 *(u32 *)(r2 + 0) = r1
;   return (__u32)bpf_kfunc_call_test1((struct sock *)sk, 1, 2, 3, 4);
      22:   bf 61 00 00 00 00 00 00 r1 = r6
      23:   b4 02 00 00 01 00 00 00 w2 = 1
      24:   b7 03 00 00 02 00 00 00 r3 = 2
      25:   b4 04 00 00 03 00 00 00 w4 = 3
      26:   b7 05 00 00 04 00 00 00 r5 = 4
      27:   85 10 00 00 ff ff ff ff call -1
      28:   bf 07 00 00 00 00 00 00 r7 = r0

00000000000000e8 <LBB0_5>:
; }
      29:   bc 70 00 00 00 00 00 00 w0 = w7
      30:   95 00 00 00 00 00 00 00 exit

Disassembly of section tc:

0000000000000000 <kfunc_call_test1>:
;   return f1(skb);
       0:   85 10 00 00 ff ff ff ff call -1
       1:   95 00 00 00 00 00 00 00 exit

可以看到,两个 kfuncs 对应的汇编都是 85 10 00 00 ff ff ff ff call -1

加载阶段

在加载阶段,就会将汇编指令跟具体的内核函数的 BTF ID 关联起来。

以下分析 go-ebpf 库里的加载处理。

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
LoadCollectionSpecFromReader()                  // ${ebpf}/elf_reader.go
|-->LoadSpecAndExtInfosFromReader()             // ${ebpf}/btf/btf.go
|   |-->loadExtInfosFromELF()
|       |-->loadSpecFromELF()
|           |-->fixupDatasec() {
|                   _, ok := vsi.Type.(*Func)
|                   if !ok {
|                       // Only Funcs are supported in the .ksyms Datasec.
|                       return fmt.Errorf("data section %s: expected *btf.Func, not %T: %w", name, vsi.Type, ErrNotSupported)
|                   }
|               }
|-->loadKsymsSection() {                        // ${ebpf}/elf_reader.go
|       for _, v := range ds.Vars {
|           // we have already checked the .ksyms Datasec to only contain Func Vars.
|           ec.kfuncs[v.Type.TypeName()] = v.Type.(*btf.Func)
|       }
|   }
|-->loadProgramSections()
    |-->loadFunctions()
        |-->relocateInstruction() {
                name = rel.Name
                kf := ec.kfuncs[name]
                switch {
                // If a Call instruction is found and the datasec has a btf.Func with a Name
                // that matches the symbol name we mark the instruction as a call to a kfunc.
                case kf != nil && ins.OpCode.JumpOp() == asm.Call:
                    ins.Metadata.Set(kfuncMeta{}, kf)
                    ins.Src = asm.PseudoKfuncCall
                    ins.Constant = -1
                }

                *ins = ins.WithReference(name)
            }

newProgramWithOptions()                        // ${ebpf}/prog.go
|-->fixupKfuncs() {
        kfm, _ := ins.Metadata.Get(kfuncMeta{}).(*btf.Func)
        target := btf.Type((*btf.Func)(nil))
        spec, module, err := findTargetInKernel(kernelSpec, kfm.Name, &target)
        id, err := spec.TypeID(target)
        ins.Constant = int64(id)
    }

通过上面代码片段的分析,可以看到,最终将汇编指令的 ins.Constant (a.k.a. ins.imm in kernel) 设置为了 kfuncs 的 BTF ID。

校验阶段

接下来,看看在 verifier 里是怎么将 BTF ID 转换为真正的内核函数的。

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
bpf_check()                                     // ${KERNEL}/kernel/bpf/verifier.c
|-->add_subprog_and_kfunc()
|   |-->add_kfunc_call() {
|           func = btf_type_by_id(desc_btf, func_id);
|           func_name = btf_name_by_offset(desc_btf, func->name_off);
|           addr = kallsyms_lookup_name(func_name);
|           desc->func_id = func_id;
|           desc->imm = BPF_CAST_CALL(addr) - __bpf_call_base;
|       }
|**>do_check()
|   |-->check_kfunc_call()
|-->do_misc_fixups()
|   |-->fixup_kfunc_call() {
|           insn->imm = desc->imm;
|       }
fixup_call_args()
    |-->jit_subprogs()
        |-->bpf_int_jit_compile()               // ${KERNEL}/arch/x86/net/bpf_jit_comp.c
            |--do_jit() {
                    case BPF_JMP | BPF_CALL:
                        /* 不再区分 kfuncs 和其它 function call */
                        func = (u8 *) __bpf_call_base + imm32;
                        emit_call(&prog, func, image + addrs[i - 1] + offs)
                }

通过上面代码片段的分析,可以看到,kfuncsins.imm 从 BTF ID 转为函数地址,最终 kfuncs 转为 call x86 汇编指令。

小结

通过上面的分析,可以看到:

  1. 编译阶段,kfuncs 的信息会被保存在 ELF 文件里的 .ksyms section 里。
  2. 加载阶段,kfuncs 的 BTF ID 会被保存在 ins.Constant 里。
  3. 校验阶段,ins.Constant 会被转换为函数地址,最终 kfuncs 转为 call x86 汇编指令。