genode/repos/os/src/drivers/nvme/main.cc

2040 lines
50 KiB
C++
Raw Normal View History

/*
* \brief NVMe Block session component
* \author Josef Soentgen
* \date 2018-03-05
*
* Spec used: NVM-Express-1_3a-20171024_ratified.pdf
*/
/*
* Copyright (C) 2018 Genode Labs GmbH
*
* This file is part of the Genode OS framework, which is distributed
* under the terms of the GNU Affero General Public License version 3.
*/
/* Genode includes */
#include <base/allocator_avl.h>
#include <base/attached_ram_dataspace.h>
#include <base/attached_rom_dataspace.h>
#include <base/component.h>
#include <base/heap.h>
#include <base/log.h>
#include <block/request_stream.h>
#include <dataspace/client.h>
#include <os/attached_mmio.h>
#include <os/reporter.h>
#include <os/session_policy.h>
#include <root/root.h>
#include <timer_session/connection.h>
#include <util/bit_array.h>
#include <util/interface.h>
#include <util/misc_math.h>
/* local includes */
#include <util.h>
#include <pci.h>
namespace {
using uint16_t = Genode::uint16_t;
using uint32_t = Genode::uint32_t;
using uint64_t = Genode::uint64_t;
using size_t = Genode::size_t;
using addr_t = Genode::addr_t;
using Response = Block::Request_stream::Response;
} /* anonymous namespace */
/**********
** NVMe **
**********/
namespace Nvme {
using namespace Genode;
struct Identify_data;
struct Identify_ns_data;
struct Doorbell;
struct Cqe;
struct Sqe;
struct Sqe_create_cq;
struct Sqe_create_sq;
struct Sqe_identify;
struct Sqe_io;
struct Queue;
struct Sq;
struct Cq;
struct Controller;
enum {
CQE_LEN_LOG2 = 4u,
CQE_LEN = 1u << CQE_LEN_LOG2,
SQE_LEN_LOG2 = 6u,
SQE_LEN = 1u << SQE_LEN_LOG2,
MAX_IO_QUEUES = 1,
/*
* Limit max number of I/O slots. By now most controllers
* should support >= 1024 but the current value is a trade-off
* as all data structures are allocated statically. However,
* the number of entries is rounded down to the number the
* controller actually supports in case it is smaller.
*/
MAX_IO_ENTRIES = 512,
MAX_IO_ENTRIES_MASK = MAX_IO_ENTRIES - 1,
MAX_ADMIN_ENTRIES = 128,
MAX_ADMIN_ENTRIES_MASK = MAX_ADMIN_ENTRIES - 1,
MPS_LOG2 = 12u,
MPS = 1u << MPS_LOG2,
};
enum {
/*
* Limit max I/O requests size; we can map up to 2 MiB with
* one list page (4K/8 = 512 * 4K). However, the size is
* rounded down to the size the controller actually supports
* according to the MDTS register.
*/
MAX_IO_LEN = 2u << 20,
PRP_DS_SIZE = MAX_IO_ENTRIES * MPS,
};
enum {
/*
* Limit namespace handling to the first namespace. Most
* if not all consumer NVMe devices only have one.
*/
IO_NSID = 1u,
MAX_NS = 1u,
NUM_QUEUES = 1 + MAX_NS,
};
enum Opcode {
/* Admin command set */
DELETE_IO_SQ = 0x00,
CREATE_IO_SQ = 0x01,
DELETE_IO_CQ = 0x04,
CREATE_IO_CQ = 0x05,
IDENTIFY = 0x06,
SET_FEATURES = 0x09,
GET_FEATURES = 0x0A,
/* NVM command set */
FLUSH = 0x00,
WRITE = 0x01,
READ = 0x02,
WRITE_ZEROS = 0x08,
};
struct Block_session_component;
struct Driver;
struct Main;
};
/*
* Identify command data
*/
struct Nvme::Identify_data : Genode::Mmio
{
enum {
SN_OFFSET = 0x04, SN_LEN = 20,
MN_OFFSET = 0x18, MN_LEN = 40,
FR_OFFSET = 0x40, FR_LEN = 12,
};
using Sn = Genode::String<SN_LEN + 1>;
using Mn = Genode::String<MN_LEN + 1>;
using Fr = Genode::String<FR_LEN + 1>;
Sn sn { }; /* serial number */
Mn mn { }; /* model number */
Fr fr { }; /* firmware revision */
struct Vid : Register<0x000, 16> { }; /* vendor id */
struct Ssvid : Register<0x002, 16> { }; /* sub system vendor id */
struct Mdts : Register<0x04d, 8> { }; /* maximum data transfer size */
/* optional admin command support */
struct Oacs : Register<0x100, 32>
{
struct Ssr : Bitfield< 0, 1> { }; /* security send/receive */
struct Nvmf : Bitfield< 1, 1> { }; /* NVM format */
struct Fwcd : Bitfield< 2, 1> { }; /* firmware commit/download image */
struct Nsm : Bitfield< 3, 1> { }; /* namespace management */
struct Vm : Bitfield< 7, 1> { }; /* virtualization management */
};
struct Nn : Register<0x204, 32> { }; /* number of namespaces */
struct Vwc : Register<0x204, 8> { }; /* volatile write cache */
Identify_data(addr_t const base)
: Genode::Mmio(base)
{
char const *p = (char const*)base;
sn = Sn(Util::extract_string(p, SN_OFFSET, SN_LEN+1));
mn = Mn(Util::extract_string(p, MN_OFFSET, MN_LEN+1));
fr = Fr(Util::extract_string(p, FR_OFFSET, FR_LEN+1));
}
};
/*
* Identify name space command data
*/
struct Nvme::Identify_ns_data : public Genode::Mmio
{
struct Nsze : Register<0x00, 64> { }; /* name space size */
struct Ncap : Register<0x08, 64> { }; /* name space capacity */
struct Nuse : Register<0x10, 64> { }; /* name space utilization */
struct Nsfeat : Register<0x18, 8> { }; /* name space features */
struct Nlbaf : Register<0x19, 8> { }; /* number of LBA formats */
/* formatted LBA size */
struct Flbas : Register<0x1a, 8>
{
struct Formats : Bitfield< 0, 3> { };
};
struct Mc : Register<0x1b, 8> { }; /* metadata capabilities */
struct Dpc : Register<0x1c, 8> { }; /* end-to-end data protection capabilities */
struct Dps : Register<0x1d, 8> { }; /* end-to-end data protection settings */
enum { MAX_LBAF = 16, };
/* LBA format support */
struct Lbaf : Register_array<0x80, 32, MAX_LBAF, 32>
{
struct Ms : Bitfield< 0, 16> { }; /* metadata size */
struct Lbads : Bitfield<16, 8> { }; /* LBA data size (2^n) */
struct Rp : Bitfield<24, 2> { }; /* relative performance */
};
Identify_ns_data(addr_t const base)
: Genode::Mmio(base)
{ }
};
/*
* Queue doorbell register
*/
struct Nvme::Doorbell : public Genode::Mmio
{
struct Sqtdbl : Register<0x00, 32>
{
struct Sqt : Bitfield< 0, 16> { }; /* submission queue tail */
};
struct Cqhdbl : Register<0x04, 32>
{
struct Cqh : Bitfield< 0, 16> { }; /* submission queue tail */
};
Doorbell(addr_t const base)
: Genode::Mmio(base) { }
};
/*
* Completion queue entry
*/
struct Nvme::Cqe : Genode::Mmio
{
struct Dw0 : Register<0x00, 32> { }; /* command specific */
struct Dw1 : Register<0x04, 32> { }; /* reserved */
struct Sqhd : Register<0x08, 16> { };
struct Sqid : Register<0x0a, 16> { };
struct Cid : Register<0x0c, 16> { };
struct Sf : Register<0x0e, 16>
{
struct P : Bitfield< 0, 1> { };
struct Sc : Bitfield< 1, 8> { }; /* status code */
struct Sct : Bitfield< 9, 3> { }; /* status code type */
struct M : Bitfield<14, 1> { }; /* more (get log) */
struct Dnr : Bitfield<15, 1> { }; /* do not retry */
};
Cqe(addr_t const base) : Genode::Mmio(base) { }
static uint32_t request_id(Nvme::Cqe const &b)
{
return (b.read<Sqid>() << 16)|b.read<Cid>();
}
static uint16_t command_id(Nvme::Cqe const &b)
{
return b.read<Cid>();
}
static bool succeeded(Nvme::Cqe const &b)
{
return !b.read<Sf::Sc>();
}
static void dump(Nvme::Cqe const &b)
{
using namespace Genode;
log("sqhd:", b.read<Sqhd>(), " "
"sqid:", b.read<Sqid>(), " "
"cid:", b.read<Cid>(), " "
"p:", b.read<Sf::P>(), " "
"status: ", Hex(b.read<Sf>()), " "
"sc:", Hex(b.read<Sf::Sc>()), " "
"sct:", Hex(b.read<Sf::Sct>()), " "
"m:", b.read<Sf::M>(), " "
"dnr:", b.read<Sf::Dnr>());
}
};
/*
* Submission queue entry base
*/
struct Nvme::Sqe : Genode::Mmio
{
struct Cdw0 : Register<0x00, 32>
{
struct Opc : Bitfield< 0, 8> { }; /* opcode */
struct Fuse : Bitfield< 9, 2> { }; /* fused operation */
struct Psdt : Bitfield<14, 2> { }; /* PRP or SGL for data transfer */
struct Cid : Bitfield<16, 16> { }; /* command identifier */
};
struct Nsid : Register<0x04, 32> { };
struct Mptr : Register<0x10, 64> { };
struct Prp1 : Register<0x18, 64> { };
struct Prp2 : Register<0x20, 64> { };
/* SGL not supported */
Sqe(addr_t const base) : Genode::Mmio(base) { }
bool valid() const { return base() != 0ul; }
};
/*
* Identify command
*/
struct Nvme::Sqe_identify : Nvme::Sqe
{
struct Cdw10 : Register<0x28, 32>
{
struct Cns : Bitfield< 0, 8> { }; /* controller or namespace structure */
};
Sqe_identify(addr_t const base) : Sqe(base) { }
};
/*
* Create completion queue command
*/
struct Nvme::Sqe_create_cq : Nvme::Sqe
{
struct Cdw10 : Register<0x28, 32>
{
struct Qid : Bitfield< 0, 16> { }; /* queue identifier */
struct Qsize : Bitfield<16, 16> { }; /* queue size 0-based vale */
};
struct Cdw11 : Register<0x2c, 32>
{
struct Pc : Bitfield< 0, 1> { }; /* physically contiguous */
struct En : Bitfield< 1, 1> { }; /* interrupts enabled */
struct Iv : Bitfield<16, 16> { }; /* interrupt vector */
};
Sqe_create_cq(addr_t const base) : Sqe(base) { }
};
/*
* Create submission queue command
*/
struct Nvme::Sqe_create_sq : Nvme::Sqe
{
struct Cdw10 : Register<0x28, 32>
{
struct Qid : Bitfield< 0, 16> { }; /* queue identifier */
struct Qsize : Bitfield<16, 16> { }; /* queue size 0-based vale */
};
struct Cdw11 : Register<0x2c, 32>
{
struct Pc : Bitfield< 0, 1> { }; /* physically contiguous */
struct Qprio : Bitfield< 1, 2> { }; /* queue priority */
struct Cqid : Bitfield<16, 16> { }; /* completion queue identifier */
};
Sqe_create_sq(addr_t const base) : Sqe(base) { }
};
/*
* I/O command
*/
struct Nvme::Sqe_io : Nvme::Sqe
{
struct Slba_lower : Register<0x28, 32> { };
struct Slba_upper : Register<0x2c, 32> { };
struct Slba : Genode::Bitset_2<Slba_lower, Slba_upper> { };
struct Cdw12 : Register<0x30, 32>
{
struct Deac : Bitfield<25, 1> { }; /* for WRITE_ZEROS needed by TRIM */
struct Nlb : Bitfield< 0, 16> { };
};
Sqe_io(addr_t const base) : Sqe(base) { }
};
/*
* Queue base structure
*/
struct Nvme::Queue
{
Genode::Ram_dataspace_capability ds { };
addr_t pa { 0 };
addr_t va { 0 };
uint32_t max_entries { 0 };
bool valid() const { return pa != 0ul; }
};
/*
* Submission queue
*/
struct Nvme::Sq : Nvme::Queue
{
uint32_t tail { 0 };
uint16_t id { 0 };
addr_t next()
{
addr_t a = va + (tail * SQE_LEN);
Genode::memset((void*)a, 0, SQE_LEN);
tail = (tail + 1) % max_entries;
return a;
}
};
/*
* Completion queue
*/
struct Nvme::Cq : Nvme::Queue
{
uint32_t head { 0 };
uint32_t phase { 1 };
addr_t next() { return va + (head * CQE_LEN); }
void advance_head()
{
if (++head >= max_entries) {
head = 0;
phase ^= 1;
}
}
};
/*
* Controller
*/
struct Nvme::Controller : public Genode::Attached_mmio
{
/**********
** MMIO **
**********/
/*
* Controller capabilities (p. 40 ff.)
*/
struct Cap : Register<0x0, 64>
{
struct Mqes : Bitfield< 0, 15> { }; /* maximum queue entries supported 0-based */
struct Cqr : Bitfield<16, 1> { }; /* contiguous queues required */
struct Ams : Bitfield<17, 2> { }; /* arbitration mechanism supported */
struct To : Bitfield<24, 8> { }; /* timeout (csts.rdy) */
struct Dstrd : Bitfield<32, 4> { }; /* doorbell stride */
struct Nssrs : Bitfield<36, 1> { }; /* NVM subsystem reset supported */
struct Css : Bitfield<37, 8> { }; /* command sets supported */
struct Bps : Bitfield<45, 1> { }; /* boot partition support */
struct Mpsmin : Bitfield<48, 4> { }; /* memory page size minimum */
struct Mpsmax : Bitfield<52, 4> { }; /* memory page size maximum */
};
/*
* Version
*/
struct Vs : Register<0x8, 32>
{
struct Ter : Bitfield< 0, 8> { }; /* tertiary */
struct Mnr : Bitfield< 8, 8> { }; /* minor */
struct Mjr : Bitfield<16, 16> { }; /* major */
};
/*
* Interrupt mask set (for !MSI-X)
*/
struct Intms : Register<0x0c, 32>
{
struct Ivms : Bitfield<0, 32> { }; /* interrupt vector mask set */
};
/*
* Interrupt mask clear
*/
struct Intmc : Register<0x10, 32>
{
struct Ivmc : Bitfield<0, 32> { }; /* interrupt vector mask clear */
};
/*
* Controller configuration
*/
struct Cc : Register<0x14, 32>
{
struct En : Bitfield< 0, 1> { }; /* enable */
struct Css : Bitfield< 4, 3> { }; /* I/O command set selected */
struct Mps : Bitfield< 7, 4> { }; /* memory page size */
struct Ams : Bitfield<11, 3> { }; /* arbitration mechanism selected */
struct Shn : Bitfield<14, 2> { }; /* shutdown notification */
struct Iosqes : Bitfield<16, 4> { }; /* I/O submission queue entry size */
struct Iocqes : Bitfield<20, 4> { }; /* I/O completion queue entry size */
};
/*
* Controller status
*/
struct Csts : Register<0x1c, 32>
{
struct Rdy : Bitfield< 0, 1> { }; /* ready */
struct Cfs : Bitfield< 1, 1> { }; /* controller fatal status */
struct Shst : Bitfield< 2, 1> { }; /* shutdown status */
struct Nssro : Bitfield< 4, 1> { }; /* NVM subsystem reset occurred */
struct Pp : Bitfield< 5, 1> { }; /* processing paused */
};
/*
* NVM subsystem reset
*/
struct Nssr : Register<0x20, 32>
{
struct Nssrc : Bitfield< 0, 32> { }; /* NVM subsystem reset control */
};
/*
* Admin queue attributes
*/
struct Aqa : Register<0x24, 32>
{
struct Asqs : Bitfield< 0, 12> { }; /* admin submission queue size 0-based */
struct Acqs : Bitfield<16, 12> { }; /* admin completion queue size 0-based */
};
/*
* Admin submission queue base address
*/
struct Asq : Register<0x28, 64>
{
struct Asqb : Bitfield<12, 52> { }; /* admin submission queue base */
};
/*
* Admin completion queue base address
*/
struct Acq : Register<0x30, 64>
{
struct Acqb : Bitfield<12, 52> { }; /* admin completion queue base */
};
/*
* Controller memory buffer location
*/
struct Cmbloc : Register<0x38, 32>
{
struct Bir : Bitfield< 0, 2> { }; /* base indicator register */
struct Ofst : Bitfield<12, 24> { }; /* offset */
};
/*
* Controller memory buffer size
*/
struct Cmbsz : Register<0x3c, 32>
{
struct Sqs : Bitfield< 0, 1> { }; /* submission queue support */
struct Cqs : Bitfield< 1, 1> { }; /* completion queue support */
struct Lists : Bitfield< 2, 1> { }; /* PRP SGL list support */
struct Rds : Bitfield< 3, 1> { }; /* read data support */
struct Wds : Bitfield< 4, 1> { }; /* write data support */
struct Szu : Bitfield< 8, 4> { }; /* size units */
struct Sz : Bitfield<12, 24> { }; /* size */
};
/*
* Boot partition information
*/
struct Bpinfo : Register<0x40, 32>
{
struct Bpsz : Bitfield< 0, 14> { }; /* boot partition size (in 128KiB) */
struct Brs : Bitfield<24, 2> { }; /* boot read status */
struct Abpid : Bitfield<31, 1> { }; /* active boot partition id */
};
/*
* Boot partition read select
*/
struct Bprsel : Register<0x44, 32>
{
struct Bprsz : Bitfield< 0, 10> { }; /* boot partition read size (in 4KiB) */
struct Bprof : Bitfield<10, 30> { }; /* boot partition read offset (in 4KiB) */
struct Bpid : Bitfield<31, 1> { }; /* boot partition identifier */
};
/*
* Boot partition memory buffer location
*/
struct Bpmbl : Register<0x48, 64>
{
struct Bmbba : Bitfield<12, 52> { }; /* boot partition memory buffer base address */
};
/*
* Admin submission doorbell
*/
struct Admin_sdb : Register<0x1000, 32>
{
struct Sqt : Bitfield< 0, 16> { }; /* submission queue tail */
};
/*
* Admin completion doorbell
*/
struct Admin_cdb : Register<0x1004, 32>
{
struct Cqh : Bitfield< 0, 16> { }; /* completion queue tail */
};
/*
* I/O submission doorbell
*/
struct Io_sdb : Register<0x1008, 32>
{
struct Sqt : Bitfield< 0, 16> { }; /* submission queue tail */
};
/*
* I/O completion doorbell
*/
struct Io_cdb : Register<0x100C, 32>
{
struct Cqh : Bitfield< 0, 16> { }; /* completion queue tail */
};
/**********
** CODE **
**********/
struct Mem_address
{
addr_t va { 0 };
addr_t pa { 0 };
};
struct Initialization_failed : Genode::Exception { };
Genode::Env &_env;
Util::Dma_allocator &_dma_alloc;
Mmio::Delayer &_delayer;
/*
* There is a completion and submission queue for
* every namespace and one pair for the admin queues.
*/
Nvme::Cq _cq[NUM_QUEUES] { };
Nvme::Sq _sq[NUM_QUEUES] { };
Nvme::Cq &_admin_cq = _cq[0];
Nvme::Sq &_admin_sq = _sq[0];
Mem_address _nvme_identify { };
Genode::Constructible<Identify_data> _identify_data { };
Mem_address _nvme_nslist { };
uint32_t _nvme_nslist_count { 0 };
size_t _mdts_bytes { 0 };
size_t _max_io_entries { MAX_IO_ENTRIES };
size_t _max_io_entries_mask { _max_io_entries - 1 };
enum Cns {
IDENTIFY_NS = 0x00,
IDENTIFY = 0x01,
NSLIST = 0x02,
};
enum {
IDENTIFY_LEN = 4096,
IDENTIFY_CID = 0x666,
NSLIST_CID,
QUERYNS_CID,
CREATE_IO_CQ_CID,
CREATE_IO_SQ_CID,
};
Mem_address _nvme_query_ns[MAX_NS] { };
struct Info
{
Genode::String<8> version { };
Identify_data::Sn sn { };
Identify_data::Mn mn { };
Identify_data::Fr fr { };
size_t mdts { };
};
Info _info { };
struct Nsinfo
{
Block::sector_t count { 0 };
size_t size { 0 };
Block::sector_t max_request_count { 0 };
bool valid() const { return count && size; }
};
/* create larger array to use namespace id to as index */
Nsinfo _nsinfo[MAX_NS+1] { };
/**
* Wait for ready bit to change
*
* \param val value of ready bit
*
* \throw Mmio::Polling_timeout
*/
void _wait_for_rdy(unsigned val)
{
enum { MAX = 50u, TO_UNIT = 500u, };
Attempts const a(MAX);
Microseconds const t(((uint64_t)read<Cap::To>() * TO_UNIT) * (1000 / MAX));
try {
wait_for(a, t, _delayer, Csts::Rdy::Equal(val));
} catch (Mmio::Polling_timeout) {
error("Csts::Rdy(", val, ") failed");
throw;
}
}
/**
* Reset controller
*
* \throw Initialization_failed
*/
void _reset()
{
/* disable intr and ctrlr */
write<Intms>(1);
write<Cc>(0);
try { _wait_for_rdy(0); }
catch (...) { throw Initialization_failed(); }
/*
* For now we limit the memory page size to 4K because besides Qemu
* there are not that many consumer NVMe device that support larger
* page sizes and we do not want to align the DMA buffers to larger
* sizes. Essentially, we limit the memory page size to the statically
* defined Nvme::MPS.
*/
Cap::access_t const mpsmax = read<Cap::Mpsmax>();
if (mpsmax > 0) { warning("ignore mpsmax:", mpsmax); }
/* the value written to the register amounts to 2^(12 + v) bytes */
Cap::access_t const v = Nvme::MPS_LOG2 - 12;
write<Cc::Mps>(v);
write<Cc::Iocqes>(CQE_LEN_LOG2);
write<Cc::Iosqes>(SQE_LEN_LOG2);
}
/**
* Setup queue, i.e., fill out fields
*
* \param q reference to queue
* \param num number of entries
* \param len size of one entry
*/
void _setup_queue(Queue &q, size_t const num, size_t const len)
{
size_t const size = num * len;
q.ds = _dma_alloc.alloc(size);
q.pa = Dataspace_client(q.ds).phys_addr();
q.va = (addr_t)_env.rm().attach(q.ds);
q.max_entries = num;
}
/**
* Check if given queue tuple is full
*
* \param sq reference to submission queue
* \param cq reference to completion queue
*
* \return returns true if queue is full and false otherwise
*/
bool _queue_full(Nvme::Sq const &sq, Nvme::Cq const &cq) const
{
return ((sq.tail + 1) & (_max_io_entries_mask)) == cq.head;
}
/**
* Setup admin queues
*/
void _setup_admin()
{
_setup_queue(_admin_cq, MAX_ADMIN_ENTRIES, CQE_LEN);
write<Aqa::Acqs>(MAX_ADMIN_ENTRIES_MASK);
write<Acq>(_admin_cq.pa);
_setup_queue(_admin_sq, MAX_ADMIN_ENTRIES, SQE_LEN);
write<Aqa::Asqs>(MAX_ADMIN_ENTRIES_MASK);
write<Asq>(_admin_sq.pa);
}
/**
* Get address of the next free entry in the admin submission queue
*
* \param opc entry opcode
* \param nsid namespace identifier
* \param cid command identifier
*
* \return returns address of the next free entry or 0 if there is
* no free entry
*/
addr_t _admin_command(Opcode opc, uint32_t nsid, uint32_t cid)
{
if (_queue_full(_admin_sq, _admin_cq)) { return 0ul; }
Sqe b(_admin_sq.next());
b.write<Nvme::Sqe::Cdw0::Opc>(opc);
b.write<Nvme::Sqe::Cdw0::Cid>(cid);
b.write<Nvme::Sqe::Nsid>(nsid);
return b.base();
}
/**
* Wait until admin command has finished
*
* \param num number of attempts
* \param cid command identifier
*
* \return returns true if attempt to wait was successfull, otherwise
* false is returned
*/
bool _wait_for_admin_cq(uint32_t num, uint16_t cid)
{
bool success = false;
for (uint32_t i = 0; i < num; i++) {
_delayer.usleep(100 * 1000);
Cqe b(_admin_cq.next());
if (b.read<Nvme::Cqe::Cid>() != cid) {
continue;
}
_admin_cq.advance_head();
success = true;
write<Admin_cdb::Cqh>(_admin_cq.head);
break;
}
return success;
}
/**
* Get list of namespaces
*/
void _query_nslist()
{
if (!_nvme_nslist.va) {
Ram_dataspace_capability ds = _dma_alloc.alloc(IDENTIFY_LEN);
_nvme_nslist.va = (addr_t)_env.rm().attach(ds);
_nvme_nslist.pa = Dataspace_client(ds).phys_addr();
}
uint32_t *nslist = (uint32_t*)_nvme_nslist.va;
bool const nsm = _identify_data->read<Identify_data::Oacs::Nsm>();
if (!nsm) {
nslist[0] = 1;
_nvme_nslist_count = 1;
return;
}
Sqe_identify b(_admin_command(Opcode::IDENTIFY, 0, NSLIST_CID));
b.write<Nvme::Sqe::Prp1>(_nvme_nslist.pa);
b.write<Nvme::Sqe_identify::Cdw10::Cns>(Cns::NSLIST);
write<Admin_sdb::Sqt>(_admin_sq.tail);
if (!_wait_for_admin_cq(10, NSLIST_CID)) {
error("identify name space list failed");
throw Initialization_failed();
}
for (size_t i = 0; i < 1024; i++) {
if (nslist[i] == 0) { break; }
++_nvme_nslist_count;
}
}
/**
* Get information of namespaces
*/
void _query_ns()
{
uint32_t const max = _nvme_nslist_count > (uint32_t)MAX_NS ?
(uint32_t)MAX_NS : _nvme_nslist_count;
if (!max) {
error("no name spaces found");
throw Initialization_failed();
}
if (max > 1) { warning("only the first name space is used"); }
uint32_t const *ns = (uint32_t const*)_nvme_nslist.va;
uint16_t const id = 0;
if (!_nvme_query_ns[id].va) {
Ram_dataspace_capability ds = _dma_alloc.alloc(IDENTIFY_LEN);
_nvme_query_ns[id].va = (addr_t)_env.rm().attach(ds);
_nvme_query_ns[id].pa = Dataspace_client(ds).phys_addr();
}
Sqe_identify b(_admin_command(Opcode::IDENTIFY, ns[id], QUERYNS_CID));
b.write<Nvme::Sqe::Prp1>(_nvme_query_ns[id].pa);
b.write<Nvme::Sqe_identify::Cdw10::Cns>(Cns::IDENTIFY_NS);
write<Admin_sdb::Sqt>(_admin_sq.tail);
if (!_wait_for_admin_cq(10, QUERYNS_CID)) {
error("identify name space failed");
throw Initialization_failed();
}
Identify_ns_data nsdata(_nvme_query_ns[id].va);
uint32_t const flbas = nsdata.read<Nvme::Identify_ns_data::Flbas>();
/* use array subscription, omit first entry */
uint16_t const ns_id = id + 1;
_nsinfo[ns_id].count = nsdata.read<Nvme::Identify_ns_data::Nsze>();
_nsinfo[ns_id].size = 1u << nsdata.read<Nvme::Identify_ns_data::Lbaf::Lbads>(flbas);
_nsinfo[ns_id].max_request_count = _mdts_bytes / _nsinfo[ns_id].size;
}
/**
* Query the controller information
*/
void _identify()
{
if (!_nvme_identify.va) {
Ram_dataspace_capability ds = _dma_alloc.alloc(IDENTIFY_LEN);
_nvme_identify.va = (addr_t)_env.rm().attach(ds);
_nvme_identify.pa = Dataspace_client(ds).phys_addr();
}
Sqe_identify b(_admin_command(Opcode::IDENTIFY, 0, IDENTIFY_CID));
b.write<Nvme::Sqe::Prp1>(_nvme_identify.pa);
b.write<Nvme::Sqe_identify::Cdw10::Cns>(Cns::IDENTIFY);
write<Admin_sdb::Sqt>(_admin_sq.tail);
if (!_wait_for_admin_cq(10, IDENTIFY_CID)) {
error("identify failed");
throw Initialization_failed();
}
_identify_data.construct(_nvme_identify.va);
/* store information */
_info.version = Genode::String<8>(read<Vs::Mjr>(), ".",
read<Vs::Mnr>(), ".",
read<Vs::Ter>());
_info.sn = _identify_data->sn;
_info.mn = _identify_data->mn;
_info.fr = _identify_data->fr;
/* limit maximum I/O request length */
uint8_t const mdts = _identify_data->read<Identify_data::Mdts>();
_mdts_bytes = !mdts ? (size_t)Nvme::MAX_IO_LEN
: Genode::min((size_t)(1u << mdts) * Nvme::MPS,
(size_t)Nvme::MAX_IO_LEN);
/* limit maximum queue length */
uint16_t const mqes = read<Cap::Mqes>() + 1;
_max_io_entries = Genode::min((uint16_t)Nvme::MAX_IO_ENTRIES,
mqes);
_max_io_entries_mask = _max_io_entries - 1;
}
/**
* Setup I/O completion queue
*
* \param id identifier of the completion queue
*
* \throw Initialization_failed() in case the queue could not be created
*/
void _setup_io_cq(uint16_t id)
{
Nvme::Cq &cq = _cq[id];
if (!cq.valid()) { _setup_queue(cq, _max_io_entries, CQE_LEN); }
Sqe_create_cq b(_admin_command(Opcode::CREATE_IO_CQ, 0, CREATE_IO_CQ_CID));
b.write<Nvme::Sqe::Prp1>(cq.pa);
b.write<Nvme::Sqe_create_cq::Cdw10::Qid>(id);
b.write<Nvme::Sqe_create_cq::Cdw10::Qsize>(_max_io_entries_mask);
b.write<Nvme::Sqe_create_cq::Cdw11::Pc>(1);
b.write<Nvme::Sqe_create_cq::Cdw11::En>(1);
write<Admin_sdb::Sqt>(_admin_sq.tail);
if (!_wait_for_admin_cq(10, CREATE_IO_CQ_CID)) {
error("create I/O cq failed");
throw Initialization_failed();
}
}
/**
* Setup I/O submission queue
*
* \param id identifier of the submission queue
* \param cqid identifier of the completion queue
*
* \throw Initialization_failed() in case the queue could not be created
*/
void _setup_io_sq(uint16_t id, uint16_t cqid)
{
Nvme::Sq &sq = _sq[id];
if (!sq.valid()) { _setup_queue(sq, _max_io_entries, SQE_LEN); }
Sqe_create_sq b(_admin_command(Opcode::CREATE_IO_SQ, 0, CREATE_IO_SQ_CID));
b.write<Nvme::Sqe::Prp1>(sq.pa);
b.write<Nvme::Sqe_create_sq::Cdw10::Qid>(id);
b.write<Nvme::Sqe_create_sq::Cdw10::Qsize>(_max_io_entries_mask);
b.write<Nvme::Sqe_create_sq::Cdw11::Pc>(1);
b.write<Nvme::Sqe_create_sq::Cdw11::Qprio>(0b00); /* urgent for now */
b.write<Nvme::Sqe_create_sq::Cdw11::Cqid>(cqid);
write<Admin_sdb::Sqt>(_admin_sq.tail);
if (!_wait_for_admin_cq(10, CREATE_IO_SQ_CID)) {
error("create I/O sq failed");
throw Initialization_failed();
}
}
/**
* Constructor
*/
Controller(Genode::Env &env, Util::Dma_allocator &dma_alloc,
addr_t const base, size_t const size,
Mmio::Delayer &delayer)
:
Genode::Attached_mmio(env, base, size),
_env(env), _dma_alloc(dma_alloc), _delayer(delayer)
{ }
/**
* Initialize controller
*
* \throw Initialization_failed
*/
void init()
{
_reset();
_setup_admin();
write<Cc::En>(1);
try { _wait_for_rdy(1); }
catch (...) {
if (read<Csts::Cfs>()) {
error("fatal controller status");
}
throw Initialization_failed();
}
}
/**
* Mask interrupts
*/
void mask_intr() { write<Intms>(1); }
/**
* Clean interrupts
*/
void clear_intr() { write<Intmc>(1); }
/*
* Identify NVM system
*/
void identify()
{
_identify();
_query_nslist();
_query_ns();
}
/**
* Setup I/O queue
*/
void setup_io(uint16_t cid, uint16_t sid)
{
_setup_io_cq(cid);
_setup_io_sq(sid, cid);
}
/**
* Get next free IO submission queue slot
*
* \param nsid namespace identifier
*
* \return returns virtual address of the I/O command
*/
addr_t io_command(uint16_t nsid, uint16_t cid)
{
Nvme::Sq &sq = _sq[nsid];
Sqe e(sq.next());
e.write<Nvme::Sqe::Cdw0::Cid>(cid);
e.write<Nvme::Sqe::Nsid>(nsid);
return e.base();
}
/**
* Check if I/O queue is full
*
* \param nsid namespace identifier
*
* \return true if full, otherwise false
*/
bool io_queue_full(uint16_t nsid) const
{
Nvme::Sq const &sq = _sq[nsid];
Nvme::Cq const &cq = _cq[nsid];
return _queue_full(sq, cq);
}
/**
* Write current I/O submission queue tail
*
* \param nsid namespace identifier
*/
void commit_io(uint16_t nsid)
{
Nvme::Sq &sq = _sq[nsid];
write<Io_sdb::Sqt>(sq.tail);
}
/**
* Process a pending I/O completion
*
* \param nsid namespace identifier
* \param func function that is called on each completion
*/
template <typename FUNC>
void handle_io_completion(uint16_t nsid, FUNC const &func)
{
Nvme::Cq &cq = _cq[nsid];
if (!cq.valid()) { return; }
do {
Cqe e(cq.next());
/* process until old phase */
if (e.read<Nvme::Cqe::Sf::P>() != cq.phase) { break; }
func(e);
cq.advance_head();
/*
* Acknowledging the completions is done below,
* so that we can handle them batch-wise.
*/
} while(0);
}
/**
* Acknowledge every pending I/O already handled
*
* \param nsid namespace identifier
*/
void ack_io_completions(uint16_t nsid)
{
Nvme::Cq &cq = _cq[nsid];
write<Io_cdb::Cqh>(cq.head);
}
/**
* Get block metrics of namespace
*
* \param nsid namespace identifier
*
* \return returns information of the namespace
*/
Nsinfo nsinfo(uint16_t nsid)
{
return _nsinfo[nsid];
}
/**
* Get controller information
*
* \return returns controller information
*/
Info const &info() const { return _info; }
/**
* Get supported maximum number of blocks per request for namespace
*
* \param nsid namespace identifier
*
* \return returns maximal count of blocks in one request
*/
Block::sector_t max_count(uint16_t nsid) const { return _nsinfo[nsid].max_request_count; }
/**
* Get number of slots in the I/O queue
*
* \return returns maximal number of I/O requests
*/
uint16_t max_io_entries() const { return _max_io_entries; }
/***********
** Debug **
***********/
void dump_cap()
{
log("CAP:", " ",
"Mqes:", read<Cap::Mqes>()+1, " ",
"Cqr:", read<Cap::Cqr>(), " ",
"Ams:", read<Cap::Ams>(), " ",
"To:", read<Cap::To>(), " ",
"Dstrd:", read<Cap::Dstrd>(), " ",
"Nssrs:", read<Cap::Nssrs>(), " ",
"Css:", read<Cap::Css>(), " ",
"Bps:", read<Cap::Bps>(), " ",
"Mpsmin:", read<Cap::Mpsmin>(), " ",
"Mpsmax:", read<Cap::Mpsmax>());
log("VS: ", " ", read<Vs::Mjr>(), ".",
read<Vs::Mnr>(), ".", read<Vs::Ter>());
}
void dump_identify()
{
log("vid:", Hex(_identify_data->read<Identify_data::Vid>()));
log("ssvid:", Hex(_identify_data->read<Identify_data::Ssvid>()));
log("oacs:", Hex(_identify_data->read<Identify_data::Oacs>()));
log(" nsm:", Hex(_identify_data->read<Identify_data::Oacs::Nsm>()));
log("sn:'", _identify_data->sn.string(), "'");
log("mn:'", _identify_data->mn.string(), "'");
log("fr:'", _identify_data->fr.string(), "'");
log("nn:", _identify_data->read<Identify_data::Nn>());
log("vwc:", _identify_data->read<Identify_data::Vwc>());
log("mdts:", _identify_data->read<Identify_data::Mdts>());
}
void dump_nslist()
{
uint32_t const *p = (uint32_t const*)_nvme_nslist.va;
if (!p) { return; }
for (size_t i = 0; i < 1024; i++) {
if (p[i] == 0) { break; }
log("ns:#", p[i], " found");
}
}
};
struct Nvme::Block_session_component : Rpc_object<Block::Session>,
Block::Request_stream
{
Env &_env;
Block::Session::Info _info;
Block_session_component(Env &env, Dataspace_capability ds,
Signal_context_capability sigh,
Block::Session::Info info)
:
Request_stream(env.rm(), ds, env.ep(), sigh, info), _env(env),
_info(info)
{
_env.ep().manage(*this);
}
~Block_session_component() { _env.ep().dissolve(*this); }
Info info() const override
{
return _info;
}
Capability<Tx> tx_cap() override { return Request_stream::tx_cap(); }
};
/******************
** Block driver **
******************/
class Nvme::Driver : Genode::Noncopyable
{
public:
bool _verbose_checks { false };
bool _verbose_identify { false };
bool _verbose_io { false };
bool _verbose_mem { false };
bool _verbose_regs { false };
struct Io_error : Genode::Exception { };
struct Request_congestion : Genode::Exception { };
private:
Driver(const Driver&) = delete;
Driver& operator=(const Driver&) = delete;
Genode::Env &_env;
Genode::Allocator &_alloc;
Genode::Attached_rom_dataspace &_config_rom;
void _handle_config_update()
{
_config_rom.update();
if (!_config_rom.valid()) { return; }
Genode::Xml_node config = _config_rom.xml();
_verbose_checks = config.attribute_value("verbose_checks", _verbose_checks);
_verbose_identify = config.attribute_value("verbose_identify", _verbose_identify);
_verbose_io = config.attribute_value("verbose_io", _verbose_io);
_verbose_mem = config.attribute_value("verbose_mem", _verbose_mem);
_verbose_regs = config.attribute_value("verbose_regs", _verbose_regs);
}
Genode::Signal_handler<Driver> _config_sigh {
_env.ep(), *this, &Driver::_handle_config_update };
/**************
** Reporter **
**************/
Genode::Reporter _namespace_reporter { _env, "controller" };
void _report_namespaces()
{
try {
Genode::Reporter::Xml_generator xml(_namespace_reporter, [&]() {
Nvme::Controller::Info const &info = _nvme_ctrlr->info();
xml.attribute("serial", info.sn);
xml.attribute("model", info.mn);
Nvme::Controller::Nsinfo ns = _nvme_ctrlr->nsinfo(Nvme::IO_NSID);
xml.node("namespace", [&]() {
xml.attribute("id", (uint16_t)Nvme::IO_NSID);
xml.attribute("block_size", ns.size);
xml.attribute("block_count", ns.count);
});
});
} catch (...) { }
}
/*********
** DMA **
*********/
addr_t _dma_base { 0 };
Genode::Constructible<Nvme::Pci> _nvme_pci { };
/*
* The PRP (Physical Region Pages) page is used to setup
* large requests.
*/
struct Prp_list_helper
{
struct Page
{
addr_t pa;
addr_t va;
};
Genode::Ram_dataspace_capability _ds;
addr_t _phys_addr;
addr_t _virt_addr;
Prp_list_helper(Genode::Ram_dataspace_capability ds,
addr_t phys, addr_t virt)
: _ds(ds), _phys_addr(phys), _virt_addr(virt) { }
Genode::Ram_dataspace_capability dataspace() { return _ds; }
Page page(uint16_t cid)
{
addr_t const offset = cid * Nvme::MPS;
return Page { .pa = offset + _phys_addr,
.va = offset + _virt_addr };
}
};
Genode::Constructible<Prp_list_helper> _prp_list_helper { };
/**************
** Requests **
**************/
struct Request
{
Block::Request block_request { };
uint32_t id { 0 };
};
template <unsigned ENTRIES>
struct Command_id
{
using Bitmap = Genode::Bit_array<ENTRIES>;
Bitmap _bitmap { };
uint16_t _bitmap_find_free() const
{
for (size_t i = 0; i < ENTRIES; i++) {
if (_bitmap.get(i, 1)) { continue; }
return i;
}
return ENTRIES;
}
bool used(uint16_t const cid) const
{
return _bitmap.get(cid, 1);
}
uint16_t alloc()
{
uint16_t const id = _bitmap_find_free();
_bitmap.set(id, 1);
return id;
}
void free(uint16_t id)
{
_bitmap.clear(id, 1);
}
};
Command_id<Nvme::MAX_IO_ENTRIES> _command_id_allocator { };
Request _requests[Nvme::MAX_IO_ENTRIES] { };
template <typename FUNC>
bool _for_any_request(FUNC const &func) const
{
for (uint16_t i = 0; i < _nvme_ctrlr->max_io_entries(); i++) {
if (_command_id_allocator.used(i) && func(_requests[i])) {
return true;
}
}
return false;
}
bool _submits_pending { false };
bool _completed_pending { false };
/*********************
** MMIO Controller **
*********************/
struct Timer_delayer : Genode::Mmio::Delayer,
Timer::Connection
{
Timer_delayer(Genode::Env &env)
: Timer::Connection(env) { }
void usleep(uint64_t us) override { Timer::Connection::usleep(us); }
} _delayer { _env };
Genode::Constructible<Nvme::Controller> _nvme_ctrlr { };
/***********
** Block **
***********/
Block::Session::Info _info { };
public:
/**
* Constructor
*/
Driver(Genode::Env &env,
Genode::Allocator &alloc,
Genode::Attached_rom_dataspace &config_rom,
Genode::Signal_context_capability request_sigh)
: _env(env), _alloc(alloc), _config_rom(config_rom)
{
_config_rom.sigh(_config_sigh);
_handle_config_update();
/*
* Setup and identify NVMe PCI controller
*/
try {
_nvme_pci.construct(_env);
} catch (Nvme::Pci::Missing_controller) {
error("no NVMe PCIe controller found");
throw;
}
try {
_nvme_ctrlr.construct(_env, *_nvme_pci, _nvme_pci->base(),
_nvme_pci->size(), _delayer);
} catch (...) {
error("could not access NVMe controller MMIO");
throw;
}
if (_verbose_regs) { _nvme_ctrlr->dump_cap(); }
_nvme_ctrlr->init();
_nvme_ctrlr->identify();
if (_verbose_identify) {
_nvme_ctrlr->dump_identify();
_nvme_ctrlr->dump_nslist();
}
/*
* Setup I/O
*/
{
Genode::Ram_dataspace_capability ds = _nvme_pci->alloc(Nvme::PRP_DS_SIZE);
if (!ds.valid()) {
error("could not allocate DMA backing store");
throw Nvme::Controller::Initialization_failed();
}
addr_t const phys_addr = Genode::Dataspace_client(ds).phys_addr();
addr_t const virt_addr = (addr_t)_env.rm().attach(ds);
_prp_list_helper.construct(ds, phys_addr, virt_addr);
if (_verbose_mem) {
log("DMA", " virt: [", Hex(virt_addr), ",",
Hex(virt_addr + Nvme::PRP_DS_SIZE), "]",
" phys: [", Hex(phys_addr), ",",
Hex(phys_addr + Nvme::PRP_DS_SIZE), "]");
}
}
_nvme_ctrlr->setup_io(Nvme::IO_NSID, Nvme::IO_NSID);
/*
* Setup Block session
*/
/* set Block session properties */
Nvme::Controller::Nsinfo nsinfo = _nvme_ctrlr->nsinfo(Nvme::IO_NSID);
if (!nsinfo.valid()) {
error("could not query namespace information");
throw Nvme::Controller::Initialization_failed();
}
_info = { .block_size = nsinfo.size,
.block_count = nsinfo.count,
.align_log2 = Nvme::MPS_LOG2,
.writeable = false };
Nvme::Controller::Info const &info = _nvme_ctrlr->info();
log("NVMe:", info.version.string(), " "
"serial:'", info.sn.string(), "'", " "
"model:'", info.mn.string(), "'", " "
"frev:'", info.fr.string(), "'");
log("Block", " "
"size: ", _info.block_size, " "
"count: ", _info.block_count, " "
"I/O entries: ", _nvme_ctrlr->max_io_entries());
/* generate Report if requested */
try {
Genode::Xml_node report = _config_rom.xml().sub_node("report");
if (report.attribute_value("namespaces", false)) {
_namespace_reporter.enabled(true);
_report_namespaces();
}
} catch (...) { }
_nvme_pci->sigh_irq(request_sigh);
_nvme_ctrlr->clear_intr();
_nvme_pci->ack_irq();
}
~Driver() { /* free resources */ }
Block::Session::Info info() const { return _info; }
Genode::Ram_dataspace_capability dma_alloc(size_t size)
{
Genode::Ram_dataspace_capability cap = _nvme_pci->alloc(size);
_dma_base = Dataspace_client(cap).phys_addr();
return cap;
}
void dma_free(Genode::Ram_dataspace_capability cap)
{
_dma_base = 0;
_nvme_pci->free(cap);
}
void writeable(bool writeable) { _info.writeable = writeable; }
/******************************
** Block request stream API **
******************************/
Response _check_acceptance(Block::Request request) const
{
/*
* All memory is dimensioned in a way that it will allow for
* MAX_IO_ENTRIES requests, so it is safe to only check the
* I/O queue.
*/
if (_nvme_ctrlr->io_queue_full(Nvme::IO_NSID)) {
return Response::RETRY;
}
switch (request.operation.type) {
case Block::Operation::Type::INVALID:
return Response::REJECTED;
case Block::Operation::Type::SYNC:
return Response::ACCEPTED;
case Block::Operation::Type::TRIM:
[[fallthrough]];
case Block::Operation::Type::WRITE:
if (!_info.writeable) {
return Response::REJECTED;
}
[[fallthrough]];
case Block::Operation::Type::READ:
/* limit request to what we can handle, needed for overlap check */
if (request.operation.count > _nvme_ctrlr->max_count(Nvme::IO_NSID)) {
request.operation.count = _nvme_ctrlr->max_count(Nvme::IO_NSID);
}
}
size_t const count = request.operation.count;
Block::sector_t const lba = request.operation.block_number;
Block::sector_t const lba_end = lba + count - 1;
// XXX trigger overlap only in case of mixed read and write requests?
auto overlap_check = [&] (Request const &req) {
Block::sector_t const start = req.block_request.operation.block_number;
Block::sector_t const end = start + req.block_request.operation.count - 1;
bool const in_req = (lba >= start && lba_end <= end);
bool const over_req = (lba <= start && lba_end <= end) &&
(start >= lba && start <= lba_end);
bool const cross_req = (lba <= start && lba_end >= end);
bool const overlap = (in_req || over_req || cross_req);
if (_verbose_checks && overlap) {
warning("overlap: ", "[", lba, ",", lba_end, ") with "
"[", start, ",", end, ")",
" ", in_req, " ", over_req, " ", cross_req);
}
return overlap;
};
if (_for_any_request(overlap_check)) { return Response::RETRY; }
return Response::ACCEPTED;
}
void _submit(Block::Request request)
{
bool const write =
request.operation.type == Block::Operation::Type::WRITE;
/* limit request to what we can handle */
if (request.operation.count > _nvme_ctrlr->max_count(Nvme::IO_NSID)) {
request.operation.count = _nvme_ctrlr->max_count(Nvme::IO_NSID);
}
size_t const count = request.operation.count;
Block::sector_t const lba = request.operation.block_number;
size_t const len = request.operation.count * _info.block_size;
bool const need_list = len > 2 * Nvme::MPS;
addr_t const request_pa = _dma_base + request.offset;
if (_verbose_io) {
log("Submit: ", write ? "WRITE" : "READ",
" len: ", len, " mps: ", (unsigned)Nvme::MPS,
" need_list: ", need_list,
" block count: ", count,
" lba: ", lba,
" dma_base: ", Hex(_dma_base),
" offset: ", Hex(request.offset));
}
uint16_t const cid = _command_id_allocator.alloc();
uint32_t const id = cid | (Nvme::IO_NSID<<16);
Request &r = _requests[cid];
r = Request { .block_request = request,
.id = id };
Nvme::Sqe_io b(_nvme_ctrlr->io_command(Nvme::IO_NSID, cid));
Nvme::Opcode const op = write ? Nvme::Opcode::WRITE : Nvme::Opcode::READ;
b.write<Nvme::Sqe::Cdw0::Opc>(op);
b.write<Nvme::Sqe::Prp1>(request_pa);
/* payload will fit into 2 mps chunks */
if (len > Nvme::MPS && !need_list) {
b.write<Nvme::Sqe::Prp2>(request_pa + Nvme::MPS);
} else if (need_list) {
/* get page to store list of mps chunks */
Prp_list_helper::Page page = _prp_list_helper->page(cid);
/* omit first page and write remaining pages to iob */
addr_t npa = request_pa + Nvme::MPS;
using Page_entry = uint64_t;
Page_entry *pe = (Page_entry*)page.va;
size_t const mps_len = Genode::align_addr(len, Nvme::MPS_LOG2);
size_t const num = (mps_len - Nvme::MPS) / Nvme::MPS;
if (_verbose_io) {
log(" page.va: ", Hex(page.va), " page.pa: ",
Hex(page.pa), " num: ", num);
}
for (size_t i = 0; i < num; i++) {
if (_verbose_io) {
log(" [", i, "]: ", Hex(npa));
}
pe[i] = npa;
npa += Nvme::MPS;
}
b.write<Nvme::Sqe::Prp2>(page.pa);
}
b.write<Nvme::Sqe_io::Slba>(lba);
b.write<Nvme::Sqe_io::Cdw12::Nlb>(count - 1); /* 0-base value */
}
void _submit_sync(Block::Request const request)
{
uint16_t const cid = _command_id_allocator.alloc();
uint32_t const id = cid | (Nvme::IO_NSID<<16);
Request &r = _requests[cid];
r = Request { .block_request = request,
.id = id };
Nvme::Sqe_io b(_nvme_ctrlr->io_command(Nvme::IO_NSID, cid));
b.write<Nvme::Sqe::Cdw0::Opc>(Nvme::Opcode::FLUSH);
}
void _submit_trim(Block::Request const request)
{
uint16_t const cid = _command_id_allocator.alloc();
uint32_t const id = cid | (Nvme::IO_NSID<<16);
Request &r = _requests[cid];
r = Request { .block_request = request,
.id = id };
size_t const count = request.operation.count;
Block::sector_t const lba = request.operation.block_number;
Nvme::Sqe_io b(_nvme_ctrlr->io_command(Nvme::IO_NSID, cid));
b.write<Nvme::Sqe::Cdw0::Opc>(Nvme::Opcode::WRITE_ZEROS);
b.write<Nvme::Sqe_io::Slba>(lba);
/*
* XXX For now let the device decide if it wants to deallocate
* the blocks or not.
*
* b.write<Nvme::Sqe_io::Cdw12::Deac>(1);
*/
b.write<Nvme::Sqe_io::Cdw12::Nlb>(count - 1); /* 0-base value */
}
void _get_completed_request(Block::Request &out, uint16_t &out_cid)
{
_nvme_ctrlr->handle_io_completion(Nvme::IO_NSID, [&] (Nvme::Cqe const &b) {
if (_verbose_io) { Nvme::Cqe::dump(b); }
uint32_t const id = Nvme::Cqe::request_id(b);
uint16_t const cid = Nvme::Cqe::command_id(b);
Request &r = _requests[cid];
if (r.id != id) {
error("no pending request found for CQ entry: id: ",
id, " != r.id: ", r.id);
Nvme::Cqe::dump(b);
return;
}
out_cid = cid;
r.block_request.success = Nvme::Cqe::succeeded(b);
out = r.block_request;
_completed_pending = true;
});
}
void _free_completed_request(uint16_t const cid)
{
_command_id_allocator.free(cid);
}
/**********************
** driver interface **
**********************/
Response acceptable(Block::Request const request) const
{
return _check_acceptance(request);
}
void submit(Block::Request const request)
{
switch (request.operation.type) {
case Block::Operation::Type::READ:
case Block::Operation::Type::WRITE:
_submit(request);
break;
case Block::Operation::Type::SYNC:
_submit_sync(request);
break;
case Block::Operation::Type::TRIM:
_submit_trim(request);
break;
default:
return;
}
_submits_pending = true;
}
void mask_irq()
{
_nvme_ctrlr->mask_intr();
}
void ack_irq()
{
_nvme_ctrlr->clear_intr();
_nvme_pci->ack_irq();
}
bool execute()
{
if (!_submits_pending) { return false; }
_nvme_ctrlr->commit_io(Nvme::IO_NSID);
_submits_pending = false;
return true;
}
template <typename FN>
void with_any_completed_job(FN const &fn)
{
uint16_t cid { 0 };
Block::Request request { };
_get_completed_request(request, cid);
if (request.operation.valid()) {
fn(request);
_free_completed_request(cid);
}
}
void acknowledge_if_completed()
{
if (!_completed_pending) { return; }
_nvme_ctrlr->ack_io_completions(Nvme::IO_NSID);
_completed_pending = false;
}
};
/**********
** Main **
**********/
struct Nvme::Main : Rpc_object<Typed_root<Block::Session>>
{
Genode::Env &_env;
Genode::Heap _heap { _env.ram(), _env.rm() };
Genode::Attached_rom_dataspace _config_rom { _env, "config" };
Genode::Ram_dataspace_capability _block_ds_cap { };
Constructible<Block_session_component> _block_session { };
Constructible<Nvme::Driver> _driver { };
Signal_handler<Main> _request_handler { _env.ep(), *this, &Main::_handle_requests };
Signal_handler<Main> _irq_handler { _env.ep(), *this, &Main::_handle_irq };
void _handle_irq()
{
_driver->mask_irq();
_handle_requests();
_driver->ack_irq();
}
void _handle_requests()
{
if (!_block_session.constructed() || !_driver.constructed())
return;
Block_session_component &block_session = *_block_session;
for (;;) {
bool progress = false;
/* import new requests */
block_session.with_requests([&] (Block::Request request) {
Response response = _driver->acceptable(request);
switch (response) {
case Response::ACCEPTED:
_driver->submit(request);
[[fallthrough]];
case Response::REJECTED:
progress = true;
[[fallthrough]];
case Response::RETRY:
break;
}
return response;
});
/* process I/O */
progress |= _driver->execute();
/* acknowledge finished jobs */
block_session.try_acknowledge([&] (Block_session_component::Ack &ack) {
_driver->with_any_completed_job([&] (Block::Request request) {
ack.submit(request);
progress = true;
});
});
/* defered acknowledge on the controller */
_driver->acknowledge_if_completed();
if (!progress) { break; }
}
block_session.wakeup_client_if_needed();
}
Capability<Session> session(Root::Session_args const &args,
Affinity const &) override
{
log("new block session: ", args.string());
Session_label const label { label_from_args(args.string()) };
Session_policy const policy { label, _config_rom.xml() };
size_t const min_tx_buf_size = 128 * 1024;
size_t const tx_buf_size =
Arg_string::find_arg(args.string(), "tx_buf_size")
.ulong_value(min_tx_buf_size);
Ram_quota const ram_quota = ram_quota_from_args(args.string());
if (tx_buf_size > ram_quota.value) {
error("insufficient 'ram_quota' from '", label, "',"
" got ", ram_quota, ", need ", tx_buf_size);
throw Insufficient_ram_quota();
}
bool const writeable = policy.attribute_value("writeable", false);
_driver->writeable(writeable);
_block_ds_cap = _driver->dma_alloc(tx_buf_size);
_block_session.construct(_env, _block_ds_cap, _request_handler,
_driver->info());
return _block_session->cap();
}
void upgrade(Capability<Session>, Root::Upgrade_args const&) override { }
void close(Capability<Session>) override
{
_block_session.destruct();
/*
* XXX a malicious client could submit all its requests
* and close the session...
*/
_driver->dma_free(_block_ds_cap);
}
Main(Genode::Env &env) : _env(env)
{
_driver.construct(_env, _heap, _config_rom, _irq_handler);
_env.parent().announce(_env.ep().manage(*this));
}
};
void Component::construct(Genode::Env &env) { static Nvme::Main main(env); }