/*
* \brief VirtIO queue implementation
* \author Piotr Tworek
* \date 2019-09-27
*/
/*
* Copyright (C) 2019 Genode Labs GmbH
*
* This file is part of the Genode OS framework, which is distributed
* under the terms of the GNU Affero General Public License version 3.
*/
#ifndef _INCLUDE__VIRTIO__QUEUE_H_
#define _INCLUDE__VIRTIO__QUEUE_H_
#include
#include
#include
#include
namespace Virtio
{
using namespace Genode;
template class Queue;
struct Queue_default_traits;
struct Queue_description;
}
struct Virtio::Queue_description
{
/**
* Physical address of the descriptor table.
*/
addr_t desc;
/**
* Physical address of the available descriptor ring.
*/
addr_t avail;
/**
* Physcical address of the used descriptor ring.
*/
addr_t used;
/**
* The size of the descriptor table (number of elements).
*/
uint16_t size;
};
struct Queue_default_traits
{
/**
* The queue is only supposed to be written to by the device.
*/
static const bool device_write_only = false;
/**
* Each queue event has additional data payload associated with it.
*/
static const bool has_data_payload = false;
};
/**
* This class implements VirtIO queue interface as defined in section 2.4 of VirtIO 1.0 specification.
*/
template
class Virtio::Queue
{
private:
/*
* Noncopyable
*/
Queue(Queue const &) = delete;
Queue &operator = (Queue const &) = delete;
static addr_t _dma_addr(Platform::Connection & p,
Dataspace_capability c) {
return p.dma_addr(static_cap_cast(c)); }
protected:
typedef HEADER_TYPE Header_type;
struct Descriptor
{
Descriptor(Descriptor const &) = delete;
Descriptor &operator = (Descriptor const &) = delete;
enum Flags : uint16_t
{
NEXT = 1,
WRITE = 2,
};
uint64_t addr;
uint32_t len;
uint16_t flags;
uint16_t next;
} __attribute__((packed));
struct Avail
{
enum Flags : uint16_t { NO_INTERRUPT = 1 };
uint16_t flags;
uint16_t idx;
uint16_t ring[];
/* uint16_t used_event; */
} __attribute__((packed));
struct Used
{
uint16_t flags;
uint16_t idx;
struct {
uint32_t id;
uint32_t len;
} ring[];
/* uint16_t avail_event; */
} __attribute__((packed));
/*
* This helper splits one RAM dataspace into multiple, equally sized chunks. The
* number of such chunk is expected to be equal to the number of entries in VirtIO
* descriptor ring. Each chunk will serve as a buffer for single VirtIO descriptor.
*/
class Buffer_pool
{
private:
Buffer_pool(Buffer_pool const &) = delete;
Buffer_pool &operator = (Buffer_pool const &) = delete;
Attached_dataspace _ds;
uint16_t const _buffer_count;
uint16_t const _buffer_size;
addr_t const _phys_base;
static size_t _ds_size(uint16_t buffer_count, uint16_t buffer_size) {
return buffer_count * align_natural(buffer_size); }
public:
struct Buffer
{
uint8_t *local_addr;
addr_t phys_addr;
uint16_t size;
};
Buffer_pool(Platform::Connection & plat,
Region_map & rm,
uint16_t const buffer_count,
uint16_t const buffer_size)
:
_ds(rm, plat.alloc_dma_buffer(buffer_count *
align_natural(buffer_size),
CACHED)),
_buffer_count(buffer_count),
_buffer_size(buffer_size),
_phys_base(_dma_addr(plat, _ds.cap())) {}
const Buffer get(uint16_t descriptor_idx) const
{
descriptor_idx %= _buffer_count;
return {
(uint8_t*)((addr_t)_ds.local_addr() +
descriptor_idx * align_natural(_buffer_size)),
_phys_base + descriptor_idx * align_natural(_buffer_size),
_buffer_size
};
}
uint16_t buffer_size() const { return _buffer_size; }
};
class Descriptor_ring
{
private:
Descriptor_ring(Descriptor_ring const &) = delete;
Descriptor_ring &operator = (Descriptor_ring const &) = delete;
Descriptor * const _desc_table;
uint16_t const _size;
uint16_t _head = 0;
uint16_t _tail = 0;
public:
Descriptor_ring(uint8_t const *table, uint16_t ring_size)
: _desc_table((Descriptor *)table), _size(ring_size) { }
uint16_t reserve() { return _head++ % _size; }
void free_all() { _tail = _head; }
uint16_t available_capacity() const {
return (uint16_t)((_tail > _head) ? _tail - _head
: _size - _head + _tail) - 1; }
Descriptor& get(uint16_t const idx) const {
return _desc_table[idx % _size]; }
};
uint16_t const _queue_size;
Attached_dataspace _ds;
Buffer_pool _buffers;
Avail volatile * const _avail;
Used volatile * const _used;
Descriptor_ring _descriptors;
uint16_t _last_used_idx = 0;
Queue_description const _description;
/* As defined in section 2.4 of VIRTIO 1.0 specification. */
static size_t _desc_size(uint16_t queue_size) {
return 16 * queue_size; }
static size_t _avail_size(uint16_t queue_size) {
return 6 + 2 * queue_size; }
static size_t _used_size(uint16_t queue_size) {
return 6 + 8 * queue_size; }
static uint16_t _check_buffer_size(uint16_t buffer_size)
{
/**
* Each buffer in the queue should be big enough to hold
* at least VirtIO header.
*/
if (buffer_size < sizeof(Header_type))
throw Invalid_buffer_size();
return buffer_size;
}
static size_t _ds_size(uint16_t queue_size)
{
size_t size = _desc_size(queue_size) + _avail_size(queue_size);
size = align_natural(size);
/* See section 2.4 of VirtIO 1.0 specification */
size += _used_size(queue_size);
return align_natural(size);
}
static Queue_description _init_description(uint16_t queue_size,
addr_t phys_addr)
{
uint8_t const *base_phys = (uint8_t *)phys_addr;
size_t const avail_offset = _desc_size(queue_size);
size_t const used_offset =
align_natural(avail_offset + _avail_size(queue_size));
return {
(addr_t)base_phys,
(addr_t)(base_phys + avail_offset),
(addr_t)(base_phys + used_offset),
queue_size,
};
}
static Avail *_init_avail(uint8_t *base_addr, uint16_t queue_size) {
return (Avail *)(base_addr + _desc_size(queue_size)); }
static Used *_init_used(uint8_t *base_addr, uint16_t queue_size) {
return (Used *)(base_addr + align_natural(_desc_size(queue_size) + _avail_size(queue_size))); }
void _fill_descriptor_table()
{
if (!TRAITS::device_write_only)
return;
/*
* When the queue is only writeable by the VirtIO device by we need to push
* all the descriptors to the available ring. The device will then use them
* whenever it wants to send us some data.
*/
while (_descriptors.available_capacity() > 0) {
auto const idx = _descriptors.reserve();
auto &desc = _descriptors.get(idx);
auto const buffer = _buffers.get(idx);
desc.addr = buffer.phys_addr;
desc.len = buffer.size;
desc.flags = Descriptor::Flags::WRITE;
desc.next = 0;
_avail->ring[idx] = idx;
}
_avail->flags = 0;
_avail->idx = _queue_size;
}
struct Write_result {
uint16_t first_descriptor_idx;
uint16_t last_descriptor_idx;
};
/*
* Write header and data (if data_size > 0) to a descirptor, or chain of descriptors.
* Returns the indexes of first and last descriptor in the chain. The caller must
* ensure there are enough descriptors to service the request.
*/
Write_result _write_data(Header_type const &header,
char const *data,
size_t data_size)
{
static_assert(!TRAITS::device_write_only);
static_assert(TRAITS::has_data_payload);
auto const first_desc_idx = _descriptors.reserve();
auto &desc = _descriptors.get(first_desc_idx);
auto buffer = _buffers.get(first_desc_idx);
desc.addr = buffer.phys_addr;
memcpy(buffer.local_addr, (void *)&header, sizeof(header));
desc.len = sizeof(header);
if (data_size > 0) {
/*
* Try to fit payload data into descriptor which holds the header.
*
* The size of the buffer is uint16_t so the result should also fit in the same quantity.
* The size of the header can never be larger than the size of the buffer, see
* _check_buffer_size function.
*/
auto const len = (uint16_t)min((size_t)buffer.size - sizeof(header), data_size);
memcpy(buffer.local_addr + sizeof(header), data, len);
desc.len += len;
}
size_t remaining_data_len = data_size + sizeof(header) - desc.len;
if (remaining_data_len == 0) {
/*
* There is no more data left to send, everything fit into the first descriptor.
*/
desc.flags = 0;
desc.next = 0;
return { first_desc_idx, first_desc_idx };
}
/*
* Some data did not fit into the first descriptor. Chain additional ones.
*/
auto chained_idx = _descriptors.reserve();
desc.flags = Descriptor::Flags::NEXT;
desc.next = chained_idx % _queue_size;
size_t data_offset = desc.len - sizeof(header);
do {
auto &chained_desc = _descriptors.get(chained_idx);
buffer = _buffers.get(chained_idx);
/*
* The size of the buffer is specified in uint16_t so the result should also
* fit in the same quantity.
*/
auto const write_len = (uint16_t)min((size_t)buffer.size, remaining_data_len);
memcpy(buffer.local_addr, data + data_offset, write_len);
chained_desc.addr = buffer.phys_addr;
chained_desc.len = write_len;
remaining_data_len -= write_len;
data_offset += write_len;
if (remaining_data_len > 0) {
/*
* There is still more data to send, chain additional descriptor.
*/
chained_idx = _descriptors.reserve();
chained_desc.flags = Descriptor::Flags::NEXT;
chained_desc.next = chained_idx;
} else {
/*
* This was the last descriptor in the chain.
*/
chained_desc.flags = 0;
chained_desc.next = 0;
}
} while (remaining_data_len > 0);
return { first_desc_idx, chained_idx};
}
public:
struct Invalid_buffer_size : Exception { };
Queue_description const description() const { return _description; }
bool has_used_buffers() const { return _last_used_idx != _used->idx; }
void ack_all_transfers()
{
static_assert(!TRAITS::device_write_only);
_last_used_idx = _used->idx;
if (!TRAITS::device_write_only)
_descriptors.free_all();
}
bool write_data(Header_type const &header,
char const *data,
size_t data_size)
{
static_assert(!TRAITS::device_write_only);
static_assert(TRAITS::has_data_payload);
int const req_desc_count = 1 + (sizeof(header) + data_size) / _buffers.buffer_size();
if (req_desc_count > _descriptors.available_capacity())
return false;
auto const write_result = _write_data(header, data, data_size);
/*
* Only the first descritor in the chain needs to be pushed to the available ring.
*/
_avail->ring[_avail->idx % _queue_size] = write_result.first_descriptor_idx;
_avail->idx += 1;
_avail->flags = Avail::Flags::NO_INTERRUPT;
return true;
}
template
void read_data(FN const &fn)
{
if (!has_used_buffers())
return;
auto const used_idx = _last_used_idx % _queue_size;
auto const buffer_idx = (uint16_t)(_used->ring[used_idx].id % _queue_size);
auto const len = _used->ring[used_idx].len;
auto const buffer = _buffers.get(buffer_idx);
char const *desc_data = (char *)buffer.local_addr;
Header_type const &header = *((Header_type *)(desc_data));
char const *data = desc_data + sizeof(Header_type);
size_t const data_size = len - sizeof(Header_type);
_last_used_idx += 1;
_avail->idx = _last_used_idx - 1;
fn(header, data, data_size);
}
Header_type read_data()
{
static_assert(!TRAITS::has_data_payload);
if (!has_used_buffers())
return Header_type{};
auto const used_idx = _last_used_idx % _queue_size;
auto const buffer_idx = (uint16_t)(_used->ring[used_idx].id % _queue_size);
auto const buffer = _buffers.get(buffer_idx);
char const *desc_data = (char *)buffer.local_addr;
_last_used_idx += 1;
_avail->idx = _last_used_idx - 1;
return *((Header_type *)(desc_data));
}
template
bool write_data_read_reply(Header_type const &header,
char const *data,
size_t data_size,
WAIT_REPLY_FN const &wait_for_reply,
REPLY_FN const &read_reply)
{
static_assert(!TRAITS::device_write_only);
static_assert(TRAITS::has_data_payload);
/*
* This restriction could be lifted by chaining multiple descriptors to receive
* the reply. Its probably better however to just ensure buffers are large enough
* when configuring the queue instead of adding more complexity to this function.
*/
if (sizeof(REPLY_TYPE) > _buffers.buffer_size())
return false;
/*
* The value of 2 is not a mistake. One additional desciptor is needed for
* receiving the response.
*/
auto const req_desc_count = 2 + (sizeof(header) + data_size) / _buffers.buffer_size();
if (req_desc_count > _descriptors.available_capacity())
return false;
auto const write_result = _write_data(header, data, data_size);
auto &last_write_desc = _descriptors.get(write_result.last_descriptor_idx);
/*
* Chain additional descriptor for receiving response.
*/
auto const reply_desc_idx = _descriptors.reserve();
auto &reply_desc = _descriptors.get(reply_desc_idx);
auto reply_buffer = _buffers.get(reply_desc_idx);
last_write_desc.flags = Descriptor::Flags::NEXT;
last_write_desc.next = reply_desc_idx;
reply_desc.addr = reply_buffer.phys_addr;
reply_desc.len = sizeof(REPLY_TYPE);
reply_desc.flags = Descriptor::Flags::WRITE;
reply_desc.next = 0;
/*
* Only the first descritor in the chain needs to be pushed to the available ring.
*/
_avail->ring[_avail->idx % _queue_size] = write_result.first_descriptor_idx;
_avail->idx += 1;
_avail->flags = Avail::Flags::NO_INTERRUPT;
wait_for_reply();
/*
* Make sure wait call did what it was supposed to do.
*/
if (!has_used_buffers())
return false;
/*
* We need to ACK the transfers regardless if the user provider read_reply function
* likes the reply or not. From our POV the transfer was succesful. Its irrelevant if
* the user likes the response, or not.
*/
ack_all_transfers();
return read_reply(*reinterpret_cast(reply_buffer.local_addr));
}
template
bool write_data_read_reply(Header_type const &header,
WAIT_REPLY_FN const &wait_for_reply,
REPLY_FN const &read_reply)
{
return write_data_read_reply(
header, nullptr, 0, wait_for_reply, read_reply);
}
void print(Output& output) const
{
print(output, "avail idx: ");
print(output, _avail->idx % _queue_size);
print(output, ", used idx = ");
print(output, _used->idx);
print(output, ", last seen used idx = ");
print(output, _last_used_idx);
print(output, ", capacity = ");
print(output, _descriptors.available_capacity());
print(output, ", size = ");
print(output, _queue_size);
}
Queue(Region_map & rm,
Platform::Connection & plat,
uint16_t queue_size,
uint16_t buffer_size)
: _queue_size(queue_size),
_ds(rm, plat.alloc_dma_buffer(_ds_size(queue_size), UNCACHED)),
_buffers(plat, rm, queue_size, _check_buffer_size(buffer_size)),
_avail(_init_avail(_ds.local_addr(), queue_size)),
_used(_init_used(_ds.local_addr(), queue_size)),
_descriptors(_ds.local_addr(), queue_size),
_description(_init_description(queue_size,
_dma_addr(plat, _ds.cap())))
{
_fill_descriptor_table();
}
};
#endif /* _INCLUDE__VIRTIO__QUEUE_H_ */