seoul: update disk backend

Fixes races in disk allocators, add more sanity checks and handles corner
case like alloc_packet failed in block packetstream.

Issue #2715
This commit is contained in:
Alexander Boettcher 2018-03-18 10:31:09 +01:00 committed by Christian Helmuth
parent 4b826d10ef
commit e51e3dcdbd
3 changed files with 321 additions and 164 deletions

View File

@ -7,7 +7,7 @@
/* /*
* Copyright (C) 2012 Intel Corporation * Copyright (C) 2012 Intel Corporation
* Copyright (C) 2013-2017 Genode Labs GmbH * Copyright (C) 2013-2018 Genode Labs GmbH
* *
* This file is distributed under the terms of the GNU General Public License * This file is distributed under the terms of the GNU General Public License
* version 2. * version 2.
@ -30,9 +30,6 @@
/* local includes */ /* local includes */
#include "disk.h" #include "disk.h"
/* Seoul includes */
#include <host/dma.h>
static Genode::Heap * disk_heap(Genode::Ram_session *ram = nullptr, static Genode::Heap * disk_heap(Genode::Ram_session *ram = nullptr,
Genode::Region_map *rm = nullptr) Genode::Region_map *rm = nullptr)
{ {
@ -85,24 +82,14 @@ void Seoul::Disk::handle_disk(unsigned disknr)
while (source->ack_avail()) while (source->ack_avail())
{ {
Block::Packet_descriptor packet = source->get_acked_packet(); Block::Packet_descriptor packet = source->get_acked_packet();
char * source_addr = source->packet_content(packet); char * const source_addr = source->packet_content(packet);
/* find the corresponding MessageDisk object */ /* find the corresponding MessageDisk object */
Avl_entry * obj = 0; Avl_entry * obj = lookup_and_remove(_lookup_msg, source_addr);
{
Genode::Lock::Guard lock_guard(_lookup_msg_lock);
obj = _lookup_msg.first();
if (obj)
obj = obj->find(reinterpret_cast<Genode::addr_t>(source_addr));
if (!obj) { if (!obj) {
Genode::warning("unknown MessageDisk object - drop ack of block session"); Genode::warning("unknown MessageDisk object - drop ack of block session ", (void *)source_addr);
continue; continue;
} }
/* remove helper object */
_lookup_msg.remove(obj);
}
/* got the MessageDisk object */ /* got the MessageDisk object */
MessageDisk * msg = obj->msg(); MessageDisk * msg = obj->msg();
/* delete helper object */ /* delete helper object */
@ -126,32 +113,43 @@ void Seoul::Disk::handle_disk(unsigned disknr)
unsigned long long sector = msg->sector; unsigned long long sector = msg->sector;
sector = (sector-packet.block_number()) * _diskcon[disknr].blk_size; sector = (sector-packet.block_number()) * _diskcon[disknr].blk_size;
for (unsigned i = 0; i < msg->dmacount; i++) { bool const ok = check_dma_descriptors(msg,
char * dma_addr = _backing_store_base + [&](char * const dma_addr, unsigned i)
msg->dma[i].byteoffset + msg->physoffset; {
size_t const bytecount = msg->dma[i].bytecount;
// check for bounds if (bytecount > packet.size() ||
if (dma_addr >= _backing_store_base + _backing_store_size sector > packet.size() ||
|| dma_addr < _backing_store_base) { sector + bytecount > packet.size() ||
Genode::error("dma bounds violation"); source_addr > source->packet_content(packet) + packet.size() - sector - bytecount ||
} else _backing_store_base + _backing_store_size - bytecount < dma_addr)
memcpy(dma_addr, source_addr + sector, return false;
msg->dma[i].bytecount);
sector += msg->dma[i].bytecount; memcpy(dma_addr, source_addr + sector, bytecount);
} sector += bytecount;
return true;
});
if (!ok)
Genode::warning("DMA bounds violation during read");
destroy(disk_heap(), msg->dma); destroy(disk_heap(), msg->dma);
msg->dma = 0; msg->dma = nullptr;
} }
MessageDiskCommit mdc (disknr, msg->usertag, MessageDisk::DISK_OK); MessageDiskCommit mdc (disknr, msg->usertag, MessageDisk::DISK_OK);
_motherboard()->bus_diskcommit.send(mdc); _motherboard()->bus_diskcommit.send(mdc);
} }
{
Genode::Lock::Guard lock_guard(_alloc_lock);
source->release_packet(packet); source->release_packet(packet);
}
destroy(&_tslab_msg, msg); destroy(&_tslab_msg, msg);
} }
/* restart disk operations suspended due to out of memory by alloc_packet */
check_restart();
} }
@ -163,42 +161,31 @@ bool Seoul::Disk::receive(MessageDisk &msg)
if (msg.disknr >= MAX_DISKS) if (msg.disknr >= MAX_DISKS)
Logging::panic("You configured more disks than supported.\n"); Logging::panic("You configured more disks than supported.\n");
struct disk_session &disk = _diskcon[msg.disknr];
if (!disk.blk_size) {
Genode::String<16> label("VirtualDisk ", msg.disknr);
/* /*
* If we receive a message for this disk the first time, create the * If we receive a message for this disk the first time, create the
* structure for it. * structure for it.
*/ */
Genode::String<16> label("VirtualDisk ", msg.disknr);
if (!_diskcon[msg.disknr].blk_size) {
try { try {
Genode::Allocator_avl * block_alloc = Genode::Allocator_avl * block_alloc =
new (disk_heap()) Genode::Allocator_avl(disk_heap()); new (disk_heap()) Genode::Allocator_avl(disk_heap());
_diskcon[msg.disknr].blk_con = disk.blk_con =
new (disk_heap()) Block::Connection(_env, block_alloc, new (disk_heap()) Block::Connection(_env, block_alloc,
4*512*1024, 4*512*1024,
label.string()); label.string());
_diskcon[msg.disknr].signal = disk.signal =
new (disk_heap()) Seoul::Disk_signal(_env.ep(), *this, new (disk_heap()) Seoul::Disk_signal(_env.ep(), *this,
msg.disknr); *disk.blk_con, msg.disknr);
_diskcon[msg.disknr].blk_con->tx_channel()->sigh_ack_avail(
_diskcon[msg.disknr].signal->sigh);
} catch (...) { } catch (...) {
/* there is none. */ /* there is none. */
return false; return false;
} }
_diskcon[msg.disknr].blk_con->info(&_diskcon[msg.disknr].blk_cnt, disk.blk_con->info(&disk.blk_cnt, &disk.blk_size, &disk.ops);
&_diskcon[msg.disknr].blk_size,
&_diskcon[msg.disknr].ops);
Logging::printf("Got info: %llu blocks (%lu B), ops (R: %x, W:%x)\n ",
_diskcon[msg.disknr].blk_cnt,
_diskcon[msg.disknr].blk_size,
_diskcon[msg.disknr].ops.supported(Block::Packet_descriptor::READ),
_diskcon[msg.disknr].ops.supported(Block::Packet_descriptor::WRITE)
);
} }
msg.error = MessageDisk::DISK_OK; msg.error = MessageDisk::DISK_OK;
@ -206,104 +193,232 @@ bool Seoul::Disk::receive(MessageDisk &msg)
switch (msg.type) { switch (msg.type) {
case MessageDisk::DISK_GET_PARAMS: case MessageDisk::DISK_GET_PARAMS:
{ {
msg.params->flags = DiskParameter::FLAG_HARDDISK; Genode::String<16> label("VirtualDisk ", msg.disknr);
msg.params->sectors = _diskcon[msg.disknr].blk_cnt;
msg.params->sectorsize = _diskcon[msg.disknr].blk_size;
msg.params->maxrequestcount = _diskcon[msg.disknr].blk_cnt;
memcpy(msg.params->name, label.string(), label.length());
}
return true;
case MessageDisk::DISK_READ:
case MessageDisk::DISK_WRITE:
{
bool write = (msg.type == MessageDisk::DISK_WRITE);
if (write && !_diskcon[msg.disknr].ops.supported(Block::Packet_descriptor::WRITE)) { msg.params->flags = DiskParameter::FLAG_HARDDISK;
msg.params->sectors = disk.blk_cnt;
msg.params->sectorsize = disk.blk_size;
msg.params->maxrequestcount = disk.blk_cnt;
memcpy(msg.params->name, label.string(), label.length());
return true;
}
case MessageDisk::DISK_WRITE:
/* don't write on read only medium */
if (!disk.ops.supported(Block::Packet_descriptor::WRITE)) {
MessageDiskCommit ro(msg.disknr, msg.usertag, MessageDiskCommit ro(msg.disknr, msg.usertag,
MessageDisk::DISK_STATUS_DEVICE); MessageDisk::DISK_STATUS_DEVICE);
_motherboard()->bus_diskcommit.send(ro); _motherboard()->bus_diskcommit.send(ro);
return true; return true;
} }
case MessageDisk::DISK_READ:
/* read and write handling */
return execute(msg.type == MessageDisk::DISK_WRITE, disk, msg);
default:
Logging::printf("Got MessageDisk type %x\n", msg.type);
return false;
}
}
unsigned long long sector = msg.sector; void Seoul::Disk::check_restart()
unsigned long total = DmaDescriptor::sum_length(msg.dmacount, msg.dma); {
unsigned long blocks = total/_diskcon[msg.disknr].blk_size; bool restarted = true;
unsigned const blk_size = _diskcon[msg.disknr].blk_size;
if (blocks * blk_size < total) blocks++; while (restarted) {
Avl_entry * obj = lookup_and_remove(_restart_msg);
if (!obj)
return;
MessageDisk * const msg = obj->msg();
struct disk_session const &disk = _diskcon[msg->disknr];
restarted = restart(disk, msg);
if (restarted) {
destroy(&_tslab_avl, obj);
} else {
Genode::Lock::Guard lock_guard(_alloc_lock);
_restart_msg.insert(obj);
}
}
}
bool Seoul::Disk::restart(struct disk_session const &disk,
MessageDisk * const msg)
{
Block::Session::Tx::Source * const source = disk.blk_con->tx();
unsigned long const total = DmaDescriptor::sum_length(msg->dmacount, msg->dma);
unsigned const blk_size = disk.blk_size;
unsigned long const blocks = total/blk_size + ((total%blk_size) ? 1 : 0);
bool const write = msg->type == MessageDisk::DISK_WRITE;
Block::Session::Tx::Source *source = _diskcon[msg.disknr].blk_con->tx();
Block::Packet_descriptor packet; Block::Packet_descriptor packet;
/* save original msg, required when we get acknowledgements */
MessageDisk * msg_cpy = 0;
try { try {
msg_cpy = new (&_tslab_msg) MessageDisk(msg); Genode::Lock::Guard lock_guard(_alloc_lock);
packet = Block::Packet_descriptor(
source->alloc_packet(blocks * blk_size),
(write) ? Block::Packet_descriptor::WRITE
: Block::Packet_descriptor::READ,
msg->sector, blocks);
char * source_addr = source->packet_content(packet);
_lookup_msg.insert(new (&_tslab_avl) Avl_entry(source_addr, msg));
} catch (...) { return false; }
/* read */
if (!write) {
source->submit_packet(packet);
return true;
}
/* write */
char * source_addr = source->packet_content(packet);
source_addr += (msg->sector - packet.block_number()) * blk_size;
for (unsigned i = 0; i < msg->dmacount; i++) {
char * const dma_addr = _backing_store_base +
msg->dma[i].byteoffset +
msg->physoffset;
memcpy(source_addr, dma_addr, msg->dma[i].bytecount);
source_addr += msg->dma[i].bytecount;
}
/* free up DMA descriptors of write */
destroy(disk_heap(), msg->dma);
msg->dma = nullptr;
source->submit_packet(packet);
return true;
}
bool Seoul::Disk::execute(bool const write, struct disk_session const &disk,
MessageDisk const &msg)
{
unsigned long long const sector = msg.sector;
unsigned long const total = DmaDescriptor::sum_length(msg.dmacount, msg.dma);
unsigned long const blk_size = disk.blk_size;
unsigned long const blocks = total/blk_size + ((total%blk_size) ? 1 : 0);
Block::Session::Tx::Source * const source = disk.blk_con->tx();
Block::Packet_descriptor packet;
/* msg copy required for acknowledgements */
MessageDisk * const msg_cpy = new (&_tslab_msg) MessageDisk(msg);
char * source_addr = nullptr;
try {
Genode::Lock::Guard lock_guard(_alloc_lock);
packet = Block::Packet_descriptor( packet = Block::Packet_descriptor(
source->alloc_packet(blocks * blk_size), source->alloc_packet(blocks * blk_size),
(write) ? Block::Packet_descriptor::WRITE (write) ? Block::Packet_descriptor::WRITE
: Block::Packet_descriptor::READ, : Block::Packet_descriptor::READ,
sector, blocks); sector, blocks);
source_addr = source->packet_content(packet);
_lookup_msg.insert(new (&_tslab_avl) Avl_entry(source_addr, msg_cpy));
} catch (Block::Session::Tx::Source::Packet_alloc_failed) {
/* msg_cpy object will be used/freed below by copy_dma_descriptors */
} catch (...) { } catch (...) {
if (msg_cpy) if (msg_cpy)
destroy(&_tslab_msg, msg_cpy); destroy(&_tslab_msg, msg_cpy);
Logging::printf("could not allocate disk block elements\n");
Logging::printf("could not allocate disk block elements - "
"write=%u blocks=%lu\n", write, blocks);
return false; return false;
} }
char * source_addr = source->packet_content(packet); /*
{ * Copy DMA descriptors ever for read requests and for deferred write
Genode::Lock::Guard lock_guard(_lookup_msg_lock); * requests. The descriptors can be changed by the guest at any time.
_lookup_msg.insert(new (&_tslab_avl) Avl_entry(source_addr, */
msg_cpy)); bool const copy_dma_descriptors = !source_addr || !write;
}
/* copy DMA descriptors for read requests - they may change */ /* invalid packet allocation or read request */
if (!write) { if (copy_dma_descriptors) {
msg_cpy->dma = new (disk_heap()) DmaDescriptor[msg_cpy->dmacount]; msg_cpy->dma = new (disk_heap()) DmaDescriptor[msg_cpy->dmacount];
for (unsigned i = 0; i < msg_cpy->dmacount; i++) for (unsigned i = 0; i < msg_cpy->dmacount; i++)
memcpy(msg_cpy->dma + i, msg.dma + i, sizeof(DmaDescriptor)); memcpy(msg_cpy->dma + i, msg.dma + i, sizeof(DmaDescriptor));
}
/* validate DMA descriptors */
bool const ok = check_dma_descriptors(msg_cpy,
[&](char * const dma_addr, unsigned i)
{
if (!write)
/* evaluated during receive */
return true;
/* for write operation */ size_t const bytecount = msg_cpy->dma[i].bytecount;
source_addr += (sector - packet.block_number()) * blk_size;
/* check bounds for read and write operations */ if (bytecount > packet.size() ||
for (unsigned i = 0; i < msg_cpy->dmacount; i++) { source_addr > source->packet_content(packet) + packet.size() - bytecount ||
char * dma_addr = _backing_store_base + msg_cpy->dma[i].byteoffset _backing_store_base + _backing_store_size - bytecount < dma_addr)
+ msg_cpy->physoffset;
/* check for bounds */
if (dma_addr >= _backing_store_base + _backing_store_size
|| dma_addr < _backing_store_base) {
/* drop allocated objects not needed in error case */
if (write)
destroy(disk_heap(), msg_cpy->dma);
destroy(&_tslab_msg, msg_cpy);
source->release_packet(packet);
return false; return false;
}
if (write) { return true;
memcpy(source_addr, dma_addr, msg.dma[i].bytecount); });
source_addr += msg.dma[i].bytecount;
}
}
if (ok) {
/* DMA descriptors look good - go ahead with disk request */
if (source_addr)
/* valid packet for read request */
source->submit_packet(packet); source->submit_packet(packet);
} else {
break; /* failed packet allocation, restart at later point in time */
default: Genode::Lock::Guard lock_guard(_alloc_lock);
Logging::printf("Got MessageDisk type %x\n", msg.type); _restart_msg.insert(new (&_tslab_avl) Avl_entry(source_addr,
return false; msg_cpy));
} }
return true; return true;
} }
/* DMA descriptors look bad - free resources */
Seoul::Disk::~Disk() destroy(disk_heap(), msg_cpy->dma);
{ destroy(&_tslab_msg, msg_cpy);
/* XXX: Close all connections */ if (source_addr) {
Genode::Lock::Guard lock_guard(_alloc_lock);
source->release_packet(packet);
}
return false;
}
/* valid packet allocation for write request */
source_addr += (sector - packet.block_number()) * blk_size;
bool const ok = check_dma_descriptors(msg_cpy,
[&](char * const dma_addr, unsigned i)
{
/* read bytecount from guest memory once and don't evaluate again */
size_t const bytecount = msg.dma[i].bytecount;
if (bytecount > packet.size() ||
source_addr > source->packet_content(packet) + packet.size() - bytecount ||
_backing_store_base + _backing_store_size - bytecount < dma_addr)
return false;
memcpy(source_addr, dma_addr, bytecount);
source_addr += bytecount;
return true;
});
if (ok) {
/* don't needed anymore + protect us to use it again */
msg_cpy->dma = nullptr;
source->submit_packet(packet);
} else {
destroy(&_tslab_msg, msg_cpy);
Genode::Lock::Guard lock_guard(_alloc_lock);
source->release_packet(packet);
}
return ok;
} }

View File

@ -31,6 +31,9 @@
/* local includes */ /* local includes */
#include "synced_motherboard.h" #include "synced_motherboard.h"
/* Seoul includes */
#include <host/dma.h>
namespace Seoul { namespace Seoul {
class Disk; class Disk;
class Disk_signal; class Disk_signal;
@ -49,10 +52,15 @@ class Seoul::Disk_signal
Genode::Signal_handler<Disk_signal> const sigh; Genode::Signal_handler<Disk_signal> const sigh;
Disk_signal(Genode::Entrypoint &ep, Disk &obj, unsigned disk_nr) Disk_signal(Genode::Entrypoint &ep, Disk &obj,
Block::Connection &block, unsigned disk_nr)
: :
_obj(obj), _id(disk_nr), sigh(ep, *this, &Disk_signal::_signal) _obj(obj), _id(disk_nr),
{ } sigh(ep, *this, &Disk_signal::_signal)
{
block.tx_channel()->sigh_ack_avail(sigh);
block.tx_channel()->sigh_ready_to_submit(sigh);
}
}; };
@ -68,8 +76,8 @@ class Seoul::Disk : public StaticReceiver<Seoul::Disk>
private: private:
Genode::addr_t _key; Genode::addr_t const _key;
MessageDisk * _msg; MessageDisk * const _msg;
/* /*
* Noncopyable * Noncopyable
@ -79,10 +87,10 @@ class Seoul::Disk : public StaticReceiver<Seoul::Disk>
public: public:
Avl_entry(void * key, MessageDisk * msg) Avl_entry(void * key, MessageDisk * const msg)
: _key(reinterpret_cast<Genode::addr_t>(key)), _msg(msg) { } : _key(reinterpret_cast<Genode::addr_t>(key)), _msg(msg) { }
bool higher(Avl_entry *e) { return e->_key > _key; } bool higher(Avl_entry *e) const { return e->_key > _key; }
Avl_entry *find(Genode::addr_t ptr) Avl_entry *find(Genode::addr_t ptr)
{ {
@ -96,7 +104,7 @@ class Seoul::Disk : public StaticReceiver<Seoul::Disk>
/* block session used by disk models of VMM */ /* block session used by disk models of VMM */
enum { MAX_DISKS = 4 }; enum { MAX_DISKS = 4 };
struct { struct disk_session {
Block::Connection *blk_con; Block::Connection *blk_con;
Block::Session::Operations ops; Block::Session::Operations ops;
Genode::size_t blk_size; Genode::size_t blk_size;
@ -121,7 +129,9 @@ class Seoul::Disk : public StaticReceiver<Seoul::Disk>
Avl_entry_slab_sync _tslab_avl; Avl_entry_slab_sync _tslab_avl;
Genode::Avl_tree<Avl_entry> _lookup_msg { }; Genode::Avl_tree<Avl_entry> _lookup_msg { };
Genode::Lock _lookup_msg_lock { }; Genode::Avl_tree<Avl_entry> _restart_msg { };
/* _alloc_lock protects both lists + alloc_packet/release_packet !!! */
Genode::Lock _alloc_lock { };
/* /*
* Noncopyable * Noncopyable
@ -129,6 +139,48 @@ class Seoul::Disk : public StaticReceiver<Seoul::Disk>
Disk(Disk const &); Disk(Disk const &);
Disk &operator = (Disk const &); Disk &operator = (Disk const &);
void check_restart();
bool restart(struct disk_session const &, MessageDisk * const);
bool execute(bool const write, struct disk_session const &,
MessageDisk const &);
template <typename FN>
bool check_dma_descriptors(MessageDisk const * const msg,
FN const &fn)
{
/* check bounds for read and write operations */
for (unsigned i = 0; i < msg->dmacount; i++) {
char * const dma_addr = _backing_store_base +
msg->dma[i].byteoffset +
msg->physoffset;
/* check for bounds */
if (dma_addr >= _backing_store_base + _backing_store_size ||
dma_addr < _backing_store_base)
return false;
if (!fn(dma_addr, i))
return false;
}
return true;
}
/* find the corresponding MessageDisk object */
Avl_entry * lookup_and_remove(Genode::Avl_tree<Avl_entry> &tree,
void * specific_obj = nullptr)
{
Genode::Lock::Guard lock_guard(_alloc_lock);
Avl_entry * obj = tree.first();
if (obj && specific_obj)
obj = obj->find(reinterpret_cast<Genode::addr_t>(specific_obj));
if (obj)
tree.remove(obj);
return obj;
}
public: public:
/** /**
@ -137,8 +189,6 @@ class Seoul::Disk : public StaticReceiver<Seoul::Disk>
Disk(Genode::Env &, Synced_motherboard &, char * backing_store_base, Disk(Genode::Env &, Synced_motherboard &, char * backing_store_base,
Genode::size_t backing_store_size); Genode::size_t backing_store_size);
~Disk();
void handle_disk(unsigned); void handle_disk(unsigned);
bool receive(MessageDisk &msg); bool receive(MessageDisk &msg);

View File

@ -1138,13 +1138,6 @@ class Machine : public StaticReceiver<Machine>
} }
} }
bool receive(MessageDisk &msg)
{
if (verbose_debug)
Logging::printf("MessageDisk\n");
return false;
}
bool receive(MessageTimer &msg) bool receive(MessageTimer &msg)
{ {
switch (msg.type) { switch (msg.type) {
@ -1265,7 +1258,6 @@ class Machine : public StaticReceiver<Machine>
/* register host operations, called back by the VMM */ /* register host operations, called back by the VMM */
_unsynchronized_motherboard.bus_hostop.add (this, receive_static<MessageHostOp>); _unsynchronized_motherboard.bus_hostop.add (this, receive_static<MessageHostOp>);
_unsynchronized_motherboard.bus_disk.add (this, receive_static<MessageDisk>);
_unsynchronized_motherboard.bus_timer.add (this, receive_static<MessageTimer>); _unsynchronized_motherboard.bus_timer.add (this, receive_static<MessageTimer>);
_unsynchronized_motherboard.bus_time.add (this, receive_static<MessageTime>); _unsynchronized_motherboard.bus_time.add (this, receive_static<MessageTime>);
_unsynchronized_motherboard.bus_network.add (this, receive_static<MessageNetwork>); _unsynchronized_motherboard.bus_network.add (this, receive_static<MessageNetwork>);