为了克服对 bpf verifier 的恐惧,打算从 verifier.c 最初的 commit 开始学习 verifier;这是因为最初版本的 verifier 并不复杂。相对而言,理解起来会容易一些。

在该 commit 里,有最初的 check_cfg() 函数实现。

DFS 实现的 CFG 检查算法

check_cfg() 检查 bpf prog 的控制流程图(CFG,Control Flow Graph)是不是一个有向无环图(DAG,Directed Acyclic Graph)。

check_cfg() 检查如下内容:

  • 循环
  • 不可达的指令
  • bpf prog 是否以 BPF_EXIT 指令结束
  • 所有分支是否皆在 bpf prog 范围内

非递归 DFS 伪代码:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
1 DFS-迭代(G,V):
2   标记 v  discovered
3   S 是一个 stack
4   S.push(v)
5   while S 非空
6       t = S.pop()
7       if t 是所寻:
8           return t
9       for 所有边 e in G.adjacentEdges(t) do
10          if  e 已被标记
11              continue 下一条边
12          w = G.adjacentVertex(t,e)
13          if 顶点 w 不是 discovered 也不是 explored
14              标记 e  tree-edge
15              标记 w  discovered
16              S.push(w)
17              continue at 5
18          else if 顶点 w  discovered
19              标记 e  back-edge
20          else
21              // 顶点 w 是 explored
22              标记 e  forward- or cross-edge
23      标记 t  explored
24      S.pop()

check_cfg() 源码实现

然而,check_cfg() 函数并不是直接实现了以上伪代码,而是根据 eBPF 的特点进行了简化:

  1. 使用一个栈保存需要遍历的边。
  2. 调用指令当作一条 FALLTHROUGH 边。
  3. 无条件跳转指令当作一条带有 offset 的 FALLTHROUGH 边。
  4. 有条件跳转指令当作一条 FALLTHROUGH 边和一条带有 offset 的 BRANCH 边。
  5. 其它指令当作一条 FALLTHROUGH 边。

通过顶点的状态判断 CFG 是否成环:

  1. 使用一个数组保存指令的状态,遍历指令即是遍历顶点。
  2. 外层遍历:每次从栈中取出一个顶点(相当于遍历一条边)。
  3. 遍历该顶点的边(有条件跳转指令才有两条边,其它指令只有一条边)。
  4. 伪代码里的边处理实现。
  5. 标记当前指令的状态为 EXPLORED。

该函数能如此实现的前提是:

  1. 编译器 clang 将 for 循环、goto 循环等转换为顺序执行的指令,不会生成往回跳转的指令(但不能排除人工恶意生成的指令)。

小结

多啃几遍初始版本的 check_cfg() 函数源代码,加深对 check_cfg() 的理解,以便理解较新版本的 check_cfg() 函数实现。

毕竟,核心算法并没有被重构。

附:check_cfg() 源代码

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
// https://github.com/torvalds/linux/commit/475fb78fbf48592ce541627c60a7b331060e31f5

/* non-recursive DFS pseudo code
 * 1  procedure DFS-iterative(G,v):
 * 2      label v as discovered
 * 3      let S be a stack
 * 4      S.push(v)
 * 5      while S is not empty
 * 6            t <- S.pop()
 * 7            if t is what we're looking for:
 * 8                return t
 * 9            for all edges e in G.adjacentEdges(t) do
 * 10               if edge e is already labelled
 * 11                   continue with the next edge
 * 12               w <- G.adjacentVertex(t,e)
 * 13               if vertex w is not discovered and not explored
 * 14                   label e as tree-edge
 * 15                   label w as discovered
 * 16                   S.push(w)
 * 17                   continue at 5
 * 18               else if vertex w is discovered
 * 19                   label e as back-edge
 * 20               else
 * 21                   // vertex w is explored
 * 22                   label e as forward- or cross-edge
 * 23           label t as explored
 * 24           S.pop()
 *
 * convention:
 * 0x10 - discovered
 * 0x11 - discovered and fall-through edge labelled
 * 0x12 - discovered and fall-through and branch edges labelled
 * 0x20 - explored
 */

enum {
    DISCOVERED = 0x10,
    EXPLORED = 0x20,
    FALLTHROUGH = 1,
    BRANCH = 2,
};

static int *insn_stack; /* stack of insns to process */
static int cur_stack;   /* current stack index */
static int *insn_state;

/* t, w, e - match pseudo-code above:
 * t - index of current instruction
 * w - next instruction
 * e - edge
 */
static int push_insn(int t, int w, int e, struct verifier_env *env)
{
    if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED | FALLTHROUGH))
        return 0;

    if (e == BRANCH && insn_state[t] >= (DISCOVERED | BRANCH))
        return 0;

    if (w < 0 || w >= env->prog->len) {
        verbose("jump out of range from insn %d to %d\n", t, w);
        return -EINVAL;
    }

    if (insn_state[w] == 0) {
        /* tree-edge */
        insn_state[t] = DISCOVERED | e;
        insn_state[w] = DISCOVERED;
        if (cur_stack >= env->prog->len)
            return -E2BIG;
        insn_stack[cur_stack++] = w;
        return 1;
    } else if ((insn_state[w] & 0xF0) == DISCOVERED) {
        verbose("back-edge from insn %d to %d\n", t, w);
        return -EINVAL;
    } else if (insn_state[w] == EXPLORED) {
        /* forward- or cross-edge */
        insn_state[t] = DISCOVERED | e;
    } else {
        verbose("insn state internal bug\n");
        return -EFAULT;
    }
    return 0;
}

/* non-recursive depth-first-search to detect loops in BPF program
 * loop == back-edge in directed graph
 */
static int check_cfg(struct verifier_env *env)
{
    struct bpf_insn *insns = env->prog->insnsi;
    int insn_cnt = env->prog->len;
    int ret = 0;
    int i, t;

    insn_state = kcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
    if (!insn_state)
        return -ENOMEM;

    insn_stack = kcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
    if (!insn_stack) {
        kfree(insn_state);
        return -ENOMEM;
    }

    insn_state[0] = DISCOVERED; /* mark 1st insn as discovered */
    insn_stack[0] = 0; /* 0 is the first instruction */
    cur_stack = 1;

peek_stack:
    if (cur_stack == 0)
        goto check_state;
    t = insn_stack[cur_stack - 1];

    if (BPF_CLASS(insns[t].code) == BPF_JMP) {
        u8 opcode = BPF_OP(insns[t].code);

        if (opcode == BPF_EXIT) {
            goto mark_explored;
        } else if (opcode == BPF_CALL) {
            ret = push_insn(t, t + 1, FALLTHROUGH, env);
            if (ret == 1)
                goto peek_stack;
            else if (ret < 0)
                goto err_free;
        } else if (opcode == BPF_JA) {
            if (BPF_SRC(insns[t].code) != BPF_K) {
                ret = -EINVAL;
                goto err_free;
            }
            /* unconditional jump with single edge */
            ret = push_insn(t, t + insns[t].off + 1,
                    FALLTHROUGH, env);
            if (ret == 1)
                goto peek_stack;
            else if (ret < 0)
                goto err_free;
        } else {
            /* conditional jump with two edges */
            ret = push_insn(t, t + 1, FALLTHROUGH, env);
            if (ret == 1)
                goto peek_stack;
            else if (ret < 0)
                goto err_free;

            ret = push_insn(t, t + insns[t].off + 1, BRANCH, env);
            if (ret == 1)
                goto peek_stack;
            else if (ret < 0)
                goto err_free;
        }
    } else {
        /* all other non-branch instructions with single
         * fall-through edge
         */
        ret = push_insn(t, t + 1, FALLTHROUGH, env);
        if (ret == 1)
            goto peek_stack;
        else if (ret < 0)
            goto err_free;
    }

mark_explored:
    insn_state[t] = EXPLORED;
    if (cur_stack-- <= 0) {
        verbose("pop stack internal bug\n");
        ret = -EFAULT;
        goto err_free;
    }
    goto peek_stack;

check_state:
    for (i = 0; i < insn_cnt; i++) {
        if (insn_state[i] != EXPLORED) {
            verbose("unreachable insn %d\n", i);
            ret = -EINVAL;
            goto err_free;
        }
    }
    ret = 0; /* cfg looks good */

err_free:
    kfree(insn_state);
    kfree(insn_stack);
    return ret;
}