通过 bpf: Support bpf program calling kernel function 学习 kfuncs
的实现。
不过,此 kfuncs
不是 bpftrace kfunc/kretfunc: Kernel Functions Tracing。bpftrace 的 kfunc
是的底层是 fentry
/fexit
。
该内核文档不适合用来学习 kfuncs
的实现,更多地讲解 kfuncs
的规范、方便内核开发者实现自己的 kfuncs
。
不过,从该文档里学习到,只有 __bpf_kfunc
描述的内核函数才是 kfuncs
,才能在 bpf 代码里直接调用。所以,当要了解有哪些 kfuncs
时,可以在内核源码里搜索 __bpf_kfunc
。
demo 示例
没有 demo,直接看看内核源代码里的 selftests 吧。
例子:${KERNEL}/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c
。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
|
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2021 Facebook */
#include "../bpf_testmod/bpf_testmod_kfunc.h"
extern const int bpf_prog_active __ksym;
int active_res = -1;
int sk_state_res = -1;
int __noinline f1(struct __sk_buff *skb)
{
struct bpf_sock *sk = skb->sk;
int *active;
if (!sk)
return -1;
sk = bpf_sk_fullsock(sk);
if (!sk)
return -1;
active = (int *)bpf_per_cpu_ptr(&bpf_prog_active,
bpf_get_smp_processor_id());
if (active)
active_res = *active;
sk_state_res = bpf_kfunc_call_test3((struct sock *)sk)->__sk_common.skc_state;
return (__u32)bpf_kfunc_call_test1((struct sock *)sk, 1, 2, 3, 4);
}
SEC("tc")
int kfunc_call_test1(struct __sk_buff *skb)
{
return f1(skb);
}
char _license[] SEC("license") = "GPL";
|
编译阶段
不了解 clang,直接看下 kfunc_call_test_subprog.bpf.o
的反汇编:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
|
# llvm-objdump -S kfunc_call_test_subprog.bpf.o
kfunc_call_test_subprog.bpf.o: file format elf64-bpf
Disassembly of section .text:
0000000000000000 <f1>:
; {
0: b4 07 00 00 ff ff ff ff w7 = -1
; struct bpf_sock *sk = skb->sk;
1: 79 11 a8 00 00 00 00 00 r1 = *(u64 *)(r1 + 168)
; if (!sk)
2: 15 01 1a 00 00 00 00 00 if r1 == 0 goto +26 <LBB0_5>
; sk = bpf_sk_fullsock(sk);
3: 85 00 00 00 5f 00 00 00 call 95
4: bf 06 00 00 00 00 00 00 r6 = r0
; if (!sk)
5: 15 06 17 00 00 00 00 00 if r6 == 0 goto +23 <LBB0_5>
; bpf_get_smp_processor_id());
6: 85 00 00 00 08 00 00 00 call 8
; active = (int *)bpf_per_cpu_ptr(&bpf_prog_active,
7: 18 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r1 = 0 ll
9: bc 02 00 00 00 00 00 00 w2 = w0
10: 85 00 00 00 99 00 00 00 call 153
; if (active)
11: 15 00 04 00 00 00 00 00 if r0 == 0 goto +4 <LBB0_4>
; active_res = *active;
12: 61 01 00 00 00 00 00 00 r1 = *(u32 *)(r0 + 0)
13: 18 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r2 = 0 ll
15: 63 12 00 00 00 00 00 00 *(u32 *)(r2 + 0) = r1
0000000000000080 <LBB0_4>:
; sk_state_res = bpf_kfunc_call_test3((struct sock *)sk)->__sk_common.skc_state;
16: bf 61 00 00 00 00 00 00 r1 = r6
17: 85 10 00 00 ff ff ff ff call -1
18: 71 01 12 00 00 00 00 00 r1 = *(u8 *)(r0 + 18)
19: 18 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r2 = 0 ll
21: 63 12 00 00 00 00 00 00 *(u32 *)(r2 + 0) = r1
; return (__u32)bpf_kfunc_call_test1((struct sock *)sk, 1, 2, 3, 4);
22: bf 61 00 00 00 00 00 00 r1 = r6
23: b4 02 00 00 01 00 00 00 w2 = 1
24: b7 03 00 00 02 00 00 00 r3 = 2
25: b4 04 00 00 03 00 00 00 w4 = 3
26: b7 05 00 00 04 00 00 00 r5 = 4
27: 85 10 00 00 ff ff ff ff call -1
28: bf 07 00 00 00 00 00 00 r7 = r0
00000000000000e8 <LBB0_5>:
; }
29: bc 70 00 00 00 00 00 00 w0 = w7
30: 95 00 00 00 00 00 00 00 exit
Disassembly of section tc:
0000000000000000 <kfunc_call_test1>:
; return f1(skb);
0: 85 10 00 00 ff ff ff ff call -1
1: 95 00 00 00 00 00 00 00 exit
|
可以看到,两个 kfuncs
对应的汇编都是 85 10 00 00 ff ff ff ff call -1
。
加载阶段
在加载阶段,就会将汇编指令跟具体的内核函数的 BTF ID 关联起来。
以下分析 go-ebpf 库里的加载处理。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
|
LoadCollectionSpecFromReader() // ${ebpf}/elf_reader.go
|-->LoadSpecAndExtInfosFromReader() // ${ebpf}/btf/btf.go
| |-->loadExtInfosFromELF()
| |-->loadSpecFromELF()
| |-->fixupDatasec() {
| _, ok := vsi.Type.(*Func)
| if !ok {
| // Only Funcs are supported in the .ksyms Datasec.
| return fmt.Errorf("data section %s: expected *btf.Func, not %T: %w", name, vsi.Type, ErrNotSupported)
| }
| }
|-->loadKsymsSection() { // ${ebpf}/elf_reader.go
| for _, v := range ds.Vars {
| // we have already checked the .ksyms Datasec to only contain Func Vars.
| ec.kfuncs[v.Type.TypeName()] = v.Type.(*btf.Func)
| }
| }
|-->loadProgramSections()
|-->loadFunctions()
|-->relocateInstruction() {
name = rel.Name
kf := ec.kfuncs[name]
switch {
// If a Call instruction is found and the datasec has a btf.Func with a Name
// that matches the symbol name we mark the instruction as a call to a kfunc.
case kf != nil && ins.OpCode.JumpOp() == asm.Call:
ins.Metadata.Set(kfuncMeta{}, kf)
ins.Src = asm.PseudoKfuncCall
ins.Constant = -1
}
*ins = ins.WithReference(name)
}
newProgramWithOptions() // ${ebpf}/prog.go
|-->fixupKfuncs() {
kfm, _ := ins.Metadata.Get(kfuncMeta{}).(*btf.Func)
target := btf.Type((*btf.Func)(nil))
spec, module, err := findTargetInKernel(kernelSpec, kfm.Name, &target)
id, err := spec.TypeID(target)
ins.Constant = int64(id)
}
|
通过上面代码片段的分析,可以看到,最终将汇编指令的 ins.Constant
(a.k.a. ins.imm
in kernel) 设置为了 kfuncs
的 BTF ID。
校验阶段
接下来,看看在 verifier 里是怎么将 BTF ID 转换为真正的内核函数的。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
|
bpf_check() // ${KERNEL}/kernel/bpf/verifier.c
|-->add_subprog_and_kfunc()
| |-->add_kfunc_call() {
| func = btf_type_by_id(desc_btf, func_id);
| func_name = btf_name_by_offset(desc_btf, func->name_off);
| addr = kallsyms_lookup_name(func_name);
| desc->func_id = func_id;
| desc->imm = BPF_CAST_CALL(addr) - __bpf_call_base;
| }
|**>do_check()
| |-->check_kfunc_call()
|-->do_misc_fixups()
| |-->fixup_kfunc_call() {
| insn->imm = desc->imm;
| }
fixup_call_args()
|-->jit_subprogs()
|-->bpf_int_jit_compile() // ${KERNEL}/arch/x86/net/bpf_jit_comp.c
|--do_jit() {
case BPF_JMP | BPF_CALL:
/* 不再区分 kfuncs 和其它 function call */
func = (u8 *) __bpf_call_base + imm32;
emit_call(&prog, func, image + addrs[i - 1] + offs)
}
|
通过上面代码片段的分析,可以看到,kfuncs
的 ins.imm
从 BTF ID 转为函数地址,最终 kfuncs
转为 call
x86 汇编指令。
小结
通过上面的分析,可以看到:
- 编译阶段,
kfuncs
的信息会被保存在 ELF 文件里的 .ksyms
section 里。
- 加载阶段,
kfuncs
的 BTF ID 会被保存在 ins.Constant
里。
- 校验阶段,
ins.Constant
会被转换为函数地址,最终 kfuncs
转为 call
x86 汇编指令。