platform/pc: recover from invalidation errors

On the unexpected occasion of an invalidation-queue error, the
invalidator gets stuck in an infinite loop. We therefore need to check
for and recover from invalidation-queue errors while waiting.

genodelabs/genode#5523
This commit is contained in:
Johannes Schlatow 2025-04-11 08:45:20 +02:00 committed by Norman Feske
parent df97b39340
commit 29c6c4cc0e
5 changed files with 100 additions and 24 deletions

View File

@ -0,0 +1,30 @@
/*
* \brief Intel IOMMU fault handler
* \author Johannes Schlatow
* \date 2025-04-11
*/
/*
* Copyright (C) 2025 Genode Labs GmbH
*
* This file is part of the Genode OS framework, which is distributed
* under the terms of the GNU Affero General Public License version 3.
*/
#ifndef _SRC__DRIVERS__PLATFORM__INTEL__FAULT_HANDLER_H_
#define _SRC__DRIVERS__PLATFORM__INTEL__FAULT_HANDLER_H_
#include <util/interface.h>
namespace Intel {
class Fault_handler : Interface
{
public:
virtual bool iq_error() = 0;
virtual void handle_faults() = 0;
};
}
#endif /* _SRC__DRIVERS__PLATFORM__INTEL__FAULT_HANDLER_H_ */

View File

@ -13,7 +13,6 @@
/* local includes */
#include <intel/invalidator.h>
#include <intel/io_mmu.h>
/**
* Clear IOTLB.
@ -120,6 +119,20 @@ void Intel::Register_invalidator::invalidate_all(Domain_id domain_id, Pci::rid_t
}
void Intel::Queued_invalidator::_wait_for_completion()
{
while (!_empty()) {
if (_fault_handler.iq_error()) {
/* reset tail pointer to recover from invalidation queue error */
_queue_mmio.write<Queue_mmio::Tail>(_queue_mmio.read<Queue_mmio::Head>());
_fault_handler.handle_faults();
return;
}
}
}
/* Clear interrupt entry cache */
void Intel::Queued_invalidator::invalidate_irq(unsigned idx, bool global)
{
@ -130,8 +143,7 @@ void Intel::Queued_invalidator::invalidate_irq(unsigned idx, bool global)
_next();
/* wait for completion */
while (!_empty());
_wait_for_completion();
}
@ -159,8 +171,7 @@ void Intel::Queued_invalidator::invalidate_iotlb(Domain_id domain_id)
_next();
/* wait for completion */
while (!_empty());
_wait_for_completion();
/*
* Note: At the moment we have no practical benefit from implementing
@ -203,8 +214,7 @@ void Intel::Queued_invalidator::invalidate_context(Domain_id domain_id, Pci::rid
_next();
/* wait for completion */
while (!_empty());
_wait_for_completion();
}

View File

@ -21,11 +21,11 @@
/* local includes */
#include <intel/domain_allocator.h>
#include <intel/fault_handler.h>
namespace Intel {
using namespace Genode;
class Io_mmu; /* forward declaration */
class Invalidator;
class Register_invalidator;
class Queued_invalidator;
@ -213,6 +213,8 @@ class Intel::Queued_invalidator : public Invalidator
bool _empty() {
return _queue_mmio.read<Queue_mmio::Head>() == _queue_mmio.read<Queue_mmio::Tail>(); }
void _wait_for_completion();
Descriptor::access_t *_tail()
{
Descriptor::access_t *tail =
@ -234,6 +236,8 @@ class Intel::Queued_invalidator : public Invalidator
_queue_mmio.write<Queue_mmio::Tail>(tail_offset);
}
Fault_handler & _fault_handler;
public:
void invalidate_irq(unsigned, bool) override;
@ -242,9 +246,12 @@ class Intel::Queued_invalidator : public Invalidator
void invalidate_all(Domain_id domain = Domain_id { Domain_id::INVALID },
Pci::rid_t = 0) override;
Queued_invalidator(Genode::Env & env, addr_t queue_reg_base)
Queued_invalidator(Genode::Env & env,
Intel::Fault_handler & fh,
addr_t queue_reg_base)
: _queue_mmio({(char*)queue_reg_base, 56}),
_queue(env.ram(), env.rm(), 4096, Cache::CACHED)
_queue(env.ram(), env.rm(), 4096, Cache::CACHED),
_fault_handler(fh)
{
/* set tail register to zero */
_queue_mmio.write<Queue_mmio::Tail>(0);

View File

@ -137,25 +137,33 @@ void Intel::Io_mmu::_handle_faults()
if (_fault_irq.constructed())
_fault_irq->ack_irq();
if (read<Fault_status::Pending>()) {
if (read<Fault_status::Overflow>())
error("Fault recording overflow");
handle_faults();
}
if (read<Fault_status::Iqe>())
error("Invalidation queue error");
if (read<Fault_status::Ice>())
error("Invalidation completion error");
void Intel::Io_mmu::handle_faults()
{
Fault_status::access_t status = read<Fault_status>();
if (read<Fault_status::Ite>())
error("Invalidation time-out error");
if (Fault_status::Overflow::get(status))
error("Fault recording overflow");
/* acknowledge all faults */
write<Fault_status>(0x7d);
if (Fault_status::Iqe::get(status))
error("Invalidation queue error: ", Hex(read<Invalidation_queue_error>()));
if (Fault_status::Ice::get(status))
error("Invalidation completion error");
if (Fault_status::Ite::get(status))
error("Invalidation time-out error");
/* acknowledge all faults */
write<Fault_status>(status);
if (Fault_status::Pending::get(status)) {
error("Faults records for ", name());
unsigned num_registers = read<Capability::Nfr>() + 1;
for (unsigned i = read<Fault_status::Fri>(); ; i = (i + 1) % num_registers) {
for (unsigned i = Fault_status::Fri::get(status); ; i = (i + 1) % num_registers) {
Fault_record_hi::access_t hi = read_fault_record<Fault_record_hi>(i);
if (!Fault_record_hi::Fault::get(hi))
@ -180,6 +188,15 @@ void Intel::Io_mmu::_handle_faults()
}
bool Intel::Io_mmu::iq_error()
{
Fault_status::access_t status = read<Fault_status>();
return Fault_status::Iqe::get(status) ||
Fault_status::Ice::get(status) ||
Fault_status::Ite::get(status);
}
void Intel::Io_mmu::generate(Xml_generator & xml)
{
xml.node("intel", [&] () {
@ -386,7 +403,7 @@ void Intel::Io_mmu::_init()
if (read<Extended_capability::Qi>()) {
/* enable queued invalidation if supported */
_queued_invalidator.construct(_env, base() + 0x80);
_queued_invalidator.construct(_env, *this, base() + 0x80);
_global_command<Global_command::Qie>(true);
} else {
/* use register-based invalidation interface as fallback */

View File

@ -33,6 +33,7 @@
#include <intel/default_mappings.h>
#include <intel/invalidator.h>
#include <intel/irq_remap_table.h>
#include <intel/fault_handler.h>
#include <expanding_page_table_allocator.h>
namespace Intel {
@ -56,7 +57,8 @@ namespace Intel {
class Intel::Io_mmu : private Attached_mmio<0x800>,
public Driver::Io_mmu,
private Translation_table_registry
private Translation_table_registry,
public Fault_handler
{
public:
@ -353,6 +355,9 @@ class Intel::Io_mmu : private Attached_mmio<0x800>,
/* not using extended interrupt mode (x2APIC) */
};
struct Invalidation_queue_error : Register<0xb0,32>
{ };
struct Fault_status : Register<0x34, 32>
{
/* fault record index */
@ -563,6 +568,13 @@ class Intel::Io_mmu : private Attached_mmio<0x800>,
void flush_write_buffer();
/**
* Fault_handler interface
*/
bool iq_error() override;
void handle_faults() override;
/**
* Io_mmu suspend/resume interface
*/