diff --git a/repos/os/recipes/src/nvme_drv/content.mk b/repos/os/recipes/src/nvme_drv/content.mk new file mode 100644 index 0000000000..05b9d866e4 --- /dev/null +++ b/repos/os/recipes/src/nvme_drv/content.mk @@ -0,0 +1,2 @@ +SRC_DIR = src/drivers/nvme +include $(GENODE_DIR)/repos/base/recipes/src/content.inc diff --git a/repos/os/recipes/src/nvme_drv/hash b/repos/os/recipes/src/nvme_drv/hash new file mode 100644 index 0000000000..de23291045 --- /dev/null +++ b/repos/os/recipes/src/nvme_drv/hash @@ -0,0 +1 @@ +2018-03-27 fcf9749c441d830aa4666f70e04cd1560c783b2f diff --git a/repos/os/recipes/src/nvme_drv/used_apis b/repos/os/recipes/src/nvme_drv/used_apis new file mode 100644 index 0000000000..c22f617174 --- /dev/null +++ b/repos/os/recipes/src/nvme_drv/used_apis @@ -0,0 +1,6 @@ +base +os +platform_session +block_session +report_session +timer_session diff --git a/repos/os/run/nvme.run b/repos/os/run/nvme.run new file mode 100644 index 0000000000..e393752330 --- /dev/null +++ b/repos/os/run/nvme.run @@ -0,0 +1,189 @@ +assert_spec x86 + +# perform write tests when requested +if {[info exists env(GENODE_TEST_WRITE)]} { +set test_write 1 +} else { +set test_write 0 +} + +set is_qemu [have_include power_on/qemu] +set is_old [expr [have_spec fiasco] || [have_spec okl4] || [have_spec pistachio]] +set is_32bit_x86_hw [expr !$is_qemu && [have_spec 32bit]] + +# +# Only run tests on supported platforms +# +if {[expr [have_spec linux] || $is_32bit_x86_hw || [expr $is_qemu && $is_old]]} { + puts "This run script is not supported on this platform." + exit 0 +} + +# +# Qemu and on certain platforms only use the small set of tests +# +set small_test [expr $is_qemu || [have_spec foc] || [have_spec sel4]] + +# +# Check used commands +# +set dd [check_installed dd] + +# +# Build +# +set build_components { + core init + drivers/nvme + drivers/timer + app/block_tester +} + +source ${genode_dir}/repos/base/run/platform_drv.inc +append_platform_drv_build_components + +build $build_components + + +# +# Create raw image +# +catch { exec $dd if=/dev/zero of=bin/nvme.raw bs=1M count=0 seek=32768 } + +create_boot_directory + +# +# Generate config +# +append config { + + + + + + + + + + + + + + + + + + + + + + + + } + +append_platform_drv_config + +append config { + + + + + + + + + + + + } + +append_if $small_test config { + + } + +append_if [expr !$small_test] config { + + + + + + + } + +append_if $test_write config { + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + } +append config { + + + + + + + +} + +install_config $config + +# +# Boot modules +# + +set boot_modules { + core init timer nvme_drv + ld.lib.so block_tester +} + +append_platform_drv_boot_modules + +build_boot_image $boot_modules + +append qemu_args " -nographic -m 512 " +append qemu_args " -drive id=nvme0,file=bin/nvme.raw,format=raw,if=none " +append qemu_args " -device nvme,drive=nvme0,serial=fnord,id=nvme0n1 " + +run_genode_until {.*child "block_tester" exited with exit value 0.*\n} 300 + +exec rm -f bin/nvme.raw diff --git a/repos/os/src/drivers/nvme/README b/repos/os/src/drivers/nvme/README new file mode 100644 index 0000000000..da43782c99 --- /dev/null +++ b/repos/os/src/drivers/nvme/README @@ -0,0 +1,40 @@ +This directory contains the implementation of a NVMe driver component. + + +Brief +===== + +The driver supports PCIe NVMe devices matching at least revision 1.1 of +the NVMe specification. For now it only supports one name space and uses +one completion and one submission queue to handle all I/O requests; one +request is limited to 1MiB of data. It lacks any name space management +functionality. + + +Configuration +============= + +The following config illustrates how the driver is configured: + +! +! +! +! +! +! +! + + +Report +====== + +The driver supports reporting of active name spaces, which can be enabled +via the configuration 'report' sub-node: + +! + +The report structure is depicted by the following example: + +! +! +! diff --git a/repos/os/src/drivers/nvme/main.cc b/repos/os/src/drivers/nvme/main.cc new file mode 100644 index 0000000000..826d16d3cc --- /dev/null +++ b/repos/os/src/drivers/nvme/main.cc @@ -0,0 +1,1752 @@ +/* + * \brief NVMe Block session component + * \author Josef Soentgen + * \date 2018-03-05 + * + * Spec used: NVM-Express-1_3a-20171024_ratified.pdf + */ + +/* + * Copyright (C) 2018 Genode Labs GmbH + * + * This file is part of the Genode OS framework, which is distributed + * under the terms of the GNU Affero General Public License version 3. + */ + +/* Genode includes */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* local includes */ +#include +#include + + +namespace { + +using uint16_t = Genode::uint16_t; +using uint32_t = Genode::uint32_t; +using uint64_t = Genode::uint64_t; +using size_t = Genode::size_t; +using addr_t = Genode::addr_t; +using Packet_descriptor = Block::Packet_descriptor; + +} /* anonymous namespace */ + + +/********** + ** NVMe ** + **********/ + +namespace Nvme { + using namespace Genode; + + struct Identify_data; + struct Identify_ns_data; + struct Doorbell; + + struct Cqe; + + struct Sqe; + struct Sqe_create_cq; + struct Sqe_create_sq; + struct Sqe_identify; + struct Sqe_io; + + struct Queue; + struct Sq; + struct Cq; + + struct Controller; + + enum { + CQE_LEN = 16, + SQE_LEN = 64, + MAX_IO_QUEUES = 1, + MAX_IO_ENTRIES = 128, + MAX_IO_ENTRIES_MASK = MAX_IO_ENTRIES - 1, + MAX_IO_PENDING = MAX_IO_ENTRIES - 1, /* tail + 1 == head -> full */ + MAX_ADMIN_ENTRIES = 128, + MAX_ADMIN_ENTRIES_MASK = MAX_ADMIN_ENTRIES - 1, + }; + + enum { + /* + * Limit max I/O requests size; we can map up to 2MiB with one list + * page (4K/8 = 512 * 4K) but 1MiB is plenty + */ + MAX_IO_LEN = 1u << 20, + DMA_DS_SIZE = 4u << 20, + DMA_LIST_DS_SIZE = 256u << 10, + MPS = 4096u, + }; + + enum { + IO_NSID = 1u, + MAX_NS = 1u, + NUM_QUEUES = 1 + MAX_NS, + }; + + enum Opcode { + /* Admin command set */ + DELETE_IO_SQ = 0x00, + CREATE_IO_SQ = 0x01, + DELETE_IO_CQ = 0x04, + CREATE_IO_CQ = 0x05, + IDENTIFY = 0x06, + SET_FEATURES = 0x09, + GET_FEATURES = 0x0A, + /* NVM command set */ + FLUSH = 0x00, + WRITE = 0x01, + READ = 0x02, + }; +}; + + +/* + * Identify command data + */ +struct Nvme::Identify_data : Genode::Mmio +{ + enum { + SN_OFFSET = 0x04, SN_LEN = 20, + MN_OFFSET = 0x18, MN_LEN = 40, + FR_OFFSET = 0x40, FR_LEN = 12, + }; + + using Sn = Genode::String; + using Mn = Genode::String; + using Fr = Genode::String; + + Sn sn { }; /* serial number */ + Mn mn { }; /* model number */ + Fr fr { }; /* firmware revision */ + + struct Vid : Register<0x000, 16> { }; /* vendor id */ + struct Ssvid : Register<0x002, 16> { }; /* sub system vendor id */ + /* optional admin command support */ + struct Oacs : Register<0x100, 32> + { + struct Ssr : Bitfield< 0, 1> { }; /* security send/receive */ + struct Nvmf : Bitfield< 1, 1> { }; /* NVM format */ + struct Fwcd : Bitfield< 2, 1> { }; /* firmware commit/download image */ + struct Nsm : Bitfield< 3, 1> { }; /* namespace management */ + struct Vm : Bitfield< 7, 1> { }; /* virtualization management */ + }; + struct Nn : Register<0x204, 32> { }; /* number of namespaces */ + struct Vwc : Register<0x204, 8> { }; /* volatile write cache */ + + Identify_data(addr_t const base) + : Genode::Mmio(base) + { + char const *p = (char const*)base; + + sn = Sn(Util::extract_string(p, SN_OFFSET, SN_LEN+1)); + mn = Mn(Util::extract_string(p, MN_OFFSET, MN_LEN+1)); + fr = Fr(Util::extract_string(p, FR_OFFSET, FR_LEN+1)); + } +}; + + +/* + * Identify name space command data + */ +struct Nvme::Identify_ns_data : public Genode::Mmio +{ + struct Nsze : Register<0x00, 64> { }; /* name space size */ + struct Ncap : Register<0x08, 64> { }; /* name space capacity */ + struct Nuse : Register<0x10, 64> { }; /* name space utilization */ + struct Nsfeat : Register<0x18, 8> { }; /* name space features */ + struct Nlbaf : Register<0x19, 8> { }; /* number of LBA formats */ + /* formatted LBA size */ + struct Flbas : Register<0x1a, 8> + { + struct Formats : Bitfield< 0, 3> { }; + }; + struct Mc : Register<0x1b, 8> { }; /* metadata capabilities */ + struct Dpc : Register<0x1c, 8> { }; /* end-to-end data protection capabilities */ + struct Dps : Register<0x1d, 8> { }; /* end-to-end data protection settings */ + + enum { MAX_LBAF = 16, }; + /* LBA format support */ + struct Lbaf : Register_array<0x80, 32, MAX_LBAF, 32> + { + struct Ms : Bitfield< 0, 16> { }; /* metadata size */ + struct Lbads : Bitfield<16, 8> { }; /* LBA data size (2^n) */ + struct Rp : Bitfield<24, 2> { }; /* relative performance */ + }; + + Identify_ns_data(addr_t const base) + : Genode::Mmio(base) + { } +}; + + +/* + * Queue doorbell register + */ +struct Nvme::Doorbell : public Genode::Mmio +{ + struct Sqtdbl : Register<0x00, 32> + { + struct Sqt : Bitfield< 0, 16> { }; /* submission queue tail */ + }; + + struct Cqhdbl : Register<0x04, 32> + { + struct Cqh : Bitfield< 0, 16> { }; /* submission queue tail */ + }; + + Doorbell(addr_t const base) + : Genode::Mmio(base) { } +}; + + +/* + * Completion queue entry + */ +struct Nvme::Cqe : Genode::Mmio +{ + struct Dw0 : Register<0x00, 32> { }; /* command specific */ + struct Dw1 : Register<0x04, 32> { }; /* reserved */ + + struct Sqhd : Register<0x08, 16> { }; + struct Sqid : Register<0x0a, 16> { }; + struct Cid : Register<0x0c, 16> { }; + struct Sf : Register<0x0e, 16> + { + struct P : Bitfield< 0, 1> { }; + struct Sc : Bitfield< 1, 8> { }; /* status code */ + struct Sct : Bitfield< 9, 3> { }; /* status code type */ + struct M : Bitfield<14, 1> { }; /* more (get log) */ + struct Dnr : Bitfield<15, 1> { }; /* do not retry */ + }; + + Cqe(addr_t const base) : Genode::Mmio(base) { } + + static uint32_t request_id(Nvme::Cqe const &b) + { + return (b.read() << 16)|b.read(); + } + + static bool succeeded(Nvme::Cqe const &b) + { + return !b.read(); + } + + static void dump(Nvme::Cqe const &b) + { + using namespace Genode; + log("sqhd:", b.read(), " " + "sqid:", b.read(), " " + "cid:", b.read(), " " + "p:", b.read(), " " + "status: ", Hex(b.read()), " " + "sc:", Hex(b.read()), " " + "sct:", Hex(b.read()), " " + "m:", b.read(), " " + "dnr:", b.read()); + } +}; + + +/* + * Submission queue entry base + */ +struct Nvme::Sqe : Genode::Mmio +{ + struct Cdw0 : Register<0x00, 32> + { + struct Opc : Bitfield< 0, 8> { }; /* opcode */ + struct Fuse : Bitfield< 9, 2> { }; /* fused operation */ + struct Psdt : Bitfield<14, 2> { }; /* PRP or SGL for data transfer */ + struct Cid : Bitfield<16, 16> { }; /* command identifier */ + }; + struct Nsid : Register<0x04, 32> { }; + struct Mptr : Register<0x10, 64> { }; + struct Prp1 : Register<0x18, 64> { }; + struct Prp2 : Register<0x20, 64> { }; + + /* SGL not supported */ + + Sqe(addr_t const base) : Genode::Mmio(base) { } + + bool valid() const { return base() != 0ul; } +}; + + +/* + * Identify command + */ +struct Nvme::Sqe_identify : Nvme::Sqe +{ + struct Cdw10 : Register<0x28, 32> + { + struct Cns : Bitfield< 0, 8> { }; /* controller or namespace structure */ + }; + + Sqe_identify(addr_t const base) : Sqe(base) { } +}; + + +/* + * Create completion queue command + */ +struct Nvme::Sqe_create_cq : Nvme::Sqe +{ + struct Cdw10 : Register<0x28, 32> + { + struct Qid : Bitfield< 0, 16> { }; /* queue identifier */ + struct Qsize : Bitfield<16, 16> { }; /* queue size 0-based vale */ + }; + + struct Cdw11 : Register<0x2c, 32> + { + struct Pc : Bitfield< 0, 1> { }; /* physically contiguous */ + struct En : Bitfield< 1, 1> { }; /* interrupts enabled */ + struct Iv : Bitfield<16, 16> { }; /* interrupt vector */ + }; + + Sqe_create_cq(addr_t const base) : Sqe(base) { } +}; + + +/* + * Create submission queue command + */ +struct Nvme::Sqe_create_sq : Nvme::Sqe +{ + struct Cdw10 : Register<0x28, 32> + { + struct Qid : Bitfield< 0, 16> { }; /* queue identifier */ + struct Qsize : Bitfield<16, 16> { }; /* queue size 0-based vale */ + }; + + struct Cdw11 : Register<0x2c, 32> + { + struct Pc : Bitfield< 0, 1> { }; /* physically contiguous */ + struct Qprio : Bitfield< 1, 2> { }; /* queue priority */ + struct Cqid : Bitfield<16, 16> { }; /* completion queue identifier */ + }; + + Sqe_create_sq(addr_t const base) : Sqe(base) { } +}; + + +/* + * I/O command + */ +struct Nvme::Sqe_io : Nvme::Sqe +{ + struct Slba_lower : Register<0x28, 32> { }; + struct Slba_upper : Register<0x2c, 32> { }; + struct Slba : Genode::Bitset_2 { }; + + struct Cdw12 : Register<0x30, 32> + { + struct Nlb : Bitfield<0, 16> { }; + }; + + Sqe_io(addr_t const base) : Sqe(base) { } +}; + + +/* + * Queue base structure + */ +struct Nvme::Queue +{ + Genode::Ram_dataspace_capability ds { }; + addr_t pa { 0 }; + addr_t va { 0 }; + uint32_t max_entries { 0 }; + + bool valid() const { return pa != 0ul; } +}; + + +/* + * Submission queue + */ +struct Nvme::Sq : Nvme::Queue +{ + uint32_t tail { 0 }; + uint16_t id { 0 }; + + addr_t next() + { + addr_t a = va + (tail * SQE_LEN); + Genode::memset((void*)a, 0, SQE_LEN); + tail = (tail + 1) % max_entries; + return a; + } +}; + + +/* + * Completion queue + */ +struct Nvme::Cq : Nvme::Queue +{ + uint32_t head { 0 }; + uint32_t phase { 1 }; + + addr_t next() { return va + (head * CQE_LEN); } + + void advance_head() + { + if (++head >= max_entries) { + head = 0; + phase ^= 1; + } + } +}; + + +/* + * Controller + */ +struct Nvme::Controller : public Genode::Attached_mmio +{ + /********** + ** MMIO ** + **********/ + + /* + * Controller capabilities (p. 40 ff.) + */ + struct Cap : Register<0x0, 64> + { + struct Mqes : Bitfield< 0, 15> { }; /* maximum queue entries supported 0-based */ + struct Cqr : Bitfield<16, 1> { }; /* contiguous queues required */ + struct Ams : Bitfield<17, 2> { }; /* arbitration mechanism supported */ + struct To : Bitfield<24, 8> { }; /* timeout (csts.rdy) */ + struct Dstrd : Bitfield<32, 4> { }; /* doorbell stride */ + struct Nssrs : Bitfield<36, 1> { }; /* NVM subsystem reset supported */ + struct Css : Bitfield<37, 8> { }; /* command sets supported */ + struct Bps : Bitfield<45, 1> { }; /* boot partition support */ + struct Mpsmin : Bitfield<48, 4> { }; /* memory page size minimum */ + struct Mpsmax : Bitfield<52, 4> { }; /* memory page size maximum */ + }; + + /* + * Version + */ + struct Vs : Register<0x8, 32> + { + struct Ter : Bitfield< 0, 8> { }; /* tertiary */ + struct Mnr : Bitfield< 8, 8> { }; /* minor */ + struct Mjr : Bitfield<16, 16> { }; /* major */ + }; + + /* + * Interrupt mask set (for !MSI-X) + */ + struct Intms : Register<0x0c, 32> + { + struct Ivms : Bitfield<0, 32> { }; /* interrupt vector mask set */ + }; + + /* + * Interrupt mask clear + */ + struct Intmc : Register<0x10, 32> + { + struct Ivmc : Bitfield<0, 32> { }; /* interrupt vector mask clear */ + }; + + /* + * Controller configuration + */ + struct Cc : Register<0x14, 32> + { + struct En : Bitfield< 0, 1> { }; /* enable */ + struct Css : Bitfield< 4, 3> { }; /* I/O command set selected */ + struct Mps : Bitfield< 7, 4> { }; /* memory page size */ + struct Ams : Bitfield<11, 3> { }; /* arbitration mechanism selected */ + struct Shn : Bitfield<14, 2> { }; /* shutdown notification */ + struct Iosqes : Bitfield<16, 4> { }; /* I/O submission queue entry size */ + struct Iocqes : Bitfield<20, 4> { }; /* I/O completion queue entry size */ + }; + + /* + * Controller status + */ + struct Csts : Register<0x1c, 32> + { + struct Rdy : Bitfield< 0, 1> { }; /* ready */ + struct Cfs : Bitfield< 1, 1> { }; /* controller fatal status */ + struct Shst : Bitfield< 2, 1> { }; /* shutdown status */ + struct Nssro : Bitfield< 4, 1> { }; /* NVM subsystem reset occurred */ + struct Pp : Bitfield< 5, 1> { }; /* processing paused */ + }; + + /* + * NVM subsystem reset + */ + struct Nssr : Register<0x20, 32> + { + struct Nssrc : Bitfield< 0, 32> { }; /* NVM subsystem reset control */ + }; + + /* + * Admin queue attributes + */ + struct Aqa : Register<0x24, 32> + { + struct Asqs : Bitfield< 0, 12> { }; /* admin submission queue size 0-based */ + struct Acqs : Bitfield<16, 12> { }; /* admin completion queue size 0-based */ + }; + + /* + * Admin submission queue base address + */ + struct Asq : Register<0x28, 64> + { + struct Asqb : Bitfield<12, 52> { }; /* admin submission queue base */ + }; + + /* + * Admin completion queue base address + */ + struct Acq : Register<0x30, 64> + { + struct Acqb : Bitfield<12, 52> { }; /* admin completion queue base */ + }; + + /* + * Controller memory buffer location + */ + struct Cmbloc : Register<0x38, 32> + { + struct Bir : Bitfield< 0, 2> { }; /* base indicator register */ + struct Ofst : Bitfield<12, 24> { }; /* offset */ + }; + + /* + * Controller memory buffer size + */ + struct Cmbsz : Register<0x3c, 32> + { + struct Sqs : Bitfield< 0, 1> { }; /* submission queue support */ + struct Cqs : Bitfield< 1, 1> { }; /* completion queue support */ + struct Lists : Bitfield< 2, 1> { }; /* PRP SGL list support */ + struct Rds : Bitfield< 3, 1> { }; /* read data support */ + struct Wds : Bitfield< 4, 1> { }; /* write data support */ + struct Szu : Bitfield< 8, 4> { }; /* size units */ + struct Sz : Bitfield<12, 24> { }; /* size */ + }; + + /* + * Boot partition information + */ + struct Bpinfo : Register<0x40, 32> + { + struct Bpsz : Bitfield< 0, 14> { }; /* boot partition size (in 128KiB) */ + struct Brs : Bitfield<24, 2> { }; /* boot read status */ + struct Abpid : Bitfield<31, 1> { }; /* active boot partition id */ + }; + + /* + * Boot partition read select + */ + struct Bprsel : Register<0x44, 32> + { + struct Bprsz : Bitfield< 0, 10> { }; /* boot partition read size (in 4KiB) */ + struct Bprof : Bitfield<10, 30> { }; /* boot partition read offset (in 4KiB) */ + struct Bpid : Bitfield<31, 1> { }; /* boot partition identifier */ + }; + + /* + * Boot partition memory buffer location + */ + struct Bpmbl : Register<0x48, 64> + { + struct Bmbba : Bitfield<12, 52> { }; /* boot partition memory buffer base address */ + }; + + /* + * Admin submission doorbell + */ + struct Admin_sdb : Register<0x1000, 32> + { + struct Sqt : Bitfield< 0, 16> { }; /* submission queue tail */ + }; + + /* + * Admin completion doorbell + */ + struct Admin_cdb : Register<0x1004, 32> + { + struct Cqh : Bitfield< 0, 16> { }; /* completion queue tail */ + }; + + /* + * I/O submission doorbell + */ + struct Io_sdb : Register<0x1008, 32> + { + struct Sqt : Bitfield< 0, 16> { }; /* submission queue tail */ + }; + + /* + * I/O completion doorbell + */ + struct Io_cdb : Register<0x100C, 32> + { + struct Cqh : Bitfield< 0, 16> { }; /* completion queue tail */ + }; + + /********** + ** CODE ** + **********/ + + struct Mem_address + { + addr_t va { 0 }; + addr_t pa { 0 }; + }; + + struct Initialization_failed : Genode::Exception { }; + + Genode::Env &_env; + + Util::Dma_allocator &_dma_alloc; + Mmio::Delayer &_delayer; + + size_t _mps { 0 }; + + Nvme::Cq _cq[NUM_QUEUES] { }; + Nvme::Sq _sq[NUM_QUEUES] { }; + + Nvme::Cq &_admin_cq = _cq[0]; + Nvme::Sq &_admin_sq = _sq[0]; + + Mem_address _nvme_identify { }; + + Genode::Constructible _identify_data { }; + + Mem_address _nvme_nslist { }; + uint32_t _nvme_nslist_count { 0 }; + + enum Cns { + IDENTIFY_NS = 0x00, + IDENTIFY = 0x01, + NSLIST = 0x02, + }; + + enum { + IDENTIFY_LEN = 4096, + + IDENTIFY_CID = 0x666, + NSLIST_CID, + QUERYNS_CID, + CREATE_IO_CQ_CID, + CREATE_IO_SQ_CID, + }; + + Mem_address _nvme_query_ns[MAX_NS] { }; + + struct Info + { + Genode::String<8> version { }; + Identify_data::Sn sn { }; + Identify_data::Mn mn { }; + Identify_data::Fr fr { }; + } _info { }; + + struct Nsinfo + { + Block::sector_t count { 0 }; + size_t size { 0 }; + bool valid() const { return count && size; } + } _nsinfo[MAX_NS] { }; + + /** + * Wait for ready bit to change + * + * \param val value of ready bit + * + * \throw Mmio::Polling_timeout + */ + void _wait_for_rdy(unsigned val) + { + enum { MAX = 50u, TO_UNIT = 500u, }; + Attempts const a(MAX); + Microseconds const t((read() * TO_UNIT) * (1000 / MAX)); + try { + wait_for(a, t, _delayer, Csts::Rdy::Equal(val)); + } catch (Mmio::Polling_timeout) { + Genode::error("Csts::Rdy(", val, ") failed"); + throw; + } + } + + /** + * Reset controller + * + * \throw Initialization_failed + */ + void _reset() + { + /* disable intr and ctrlr */ + write(~0u); + write(0); + + try { _wait_for_rdy(0); } + catch (...) { throw Initialization_failed(); } + + /* + * For now we limit the memory page size to 4K because besides Qemu + * there are not that many consumer NVMe device that support larger + * page sizes and we do not want to align the DMA buffers to larger + * sizes. + */ + Cap::access_t const mpsmax = read(); + if (mpsmax > 0) { Genode::warning("ignore mpsmax:", mpsmax); } + + /* the value written to the register amounts to 2^(12 + v) bytes */ + Cap::access_t const v = Genode::log2((unsigned)Nvme::MPS) - 12; + _mps = 1u << (12 + v); + write(v); + + write(log2((unsigned)CQE_LEN)); + write(log2((unsigned)SQE_LEN)); + } + + /** + * Setup queue, i.e., fill out fields + * + * \param q reference to queue + * \param num number of entries + * \param len size of one entry + */ + void _setup_queue(Queue &q, size_t const num, size_t const len) + { + size_t const size = num * len; + q.ds = _dma_alloc.alloc(size); + q.pa = Dataspace_client(q.ds).phys_addr(); + q.va = (addr_t)_env.rm().attach(q.ds); + q.max_entries = num; + } + + /** + * Check if given queue tuple is full + * + * \param sq reference to submission queue + * \param cq reference to completion queue + * + * \return returns true if queue is full and false otherwise + */ + bool _queue_full(Nvme::Sq const &sq, Nvme::Cq const &cq) const + { + return ((sq.tail + 1) & (MAX_IO_ENTRIES_MASK)) == cq.head; + } + + /** + * Setup admin queues + */ + void _setup_admin() + { + _setup_queue(_admin_cq, MAX_ADMIN_ENTRIES, CQE_LEN); + write(MAX_ADMIN_ENTRIES_MASK); + write(_admin_cq.pa); + + _setup_queue(_admin_sq, MAX_ADMIN_ENTRIES, SQE_LEN); + write(MAX_ADMIN_ENTRIES_MASK); + write(_admin_sq.pa); + } + + /** + * Get address of the next free entry in the admin submission queue + * + * \param opc entry opcode + * \param nsid namespace identifier + * \param cid command identifier + * + * \return returns address of the next free entry or 0 if there is + * no free entry + */ + addr_t _admin_command(Opcode opc, uint32_t nsid, uint32_t cid) + { + if (_queue_full(_admin_sq, _admin_cq)) { return 0ul; } + + Sqe b(_admin_sq.next()); + b.write(opc); + b.write(cid); + b.write(nsid); + return b.base(); + } + + /** + * Wait until admin command has finished + * + * \param num number of attempts + * \param cid command identifier + * + * \return returns true if attempt to wait was successfull, otherwise + * false is returned + */ + bool _wait_for_admin_cq(uint32_t num, uint16_t cid) + { + bool success = false; + + for (uint32_t i = 0; i < num; i++) { + _delayer.usleep(100 * 1000); + + Cqe b(_admin_cq.next()); + + if (b.read() != cid) { + continue; + } + + _admin_cq.advance_head(); + + success = true; + + write(_admin_cq.head); + } + + return success; + } + + /** + * Get list of namespaces + */ + void _query_nslist() + { + if (!_nvme_nslist.va) { + Ram_dataspace_capability ds = _dma_alloc.alloc(IDENTIFY_LEN); + _nvme_nslist.va = (addr_t)_env.rm().attach(ds); + _nvme_nslist.pa = Dataspace_client(ds).phys_addr(); + } + + uint32_t *nslist = (uint32_t*)_nvme_nslist.va; + + bool const nsm = _identify_data->read(); + if (!nsm) { + nslist[0] = 1; + _nvme_nslist_count = 1; + return; + } + + Sqe_identify b(_admin_command(Opcode::IDENTIFY, 0, NSLIST_CID)); + + b.write(_nvme_nslist.pa); + b.write(Cns::NSLIST); + + write(_admin_sq.tail); + + if (!_wait_for_admin_cq(10, NSLIST_CID)) { + Genode::error("identify name space list failed"); + throw Initialization_failed(); + } + + for (size_t i = 0; i < 1024; i++) { + if (nslist[i] == 0) { break; } + ++_nvme_nslist_count; + } + } + + /** + * Get information of namespaces + */ + void _query_ns() + { + uint32_t const max = _nvme_nslist_count > (uint32_t)MAX_NS ? + (uint32_t)MAX_NS : _nvme_nslist_count; + + if (!max) { + error("no name spaces found"); + throw Initialization_failed(); + } + + if (max > 1) { warning("only the first name space is used"); } + + uint32_t const *ns = (uint32_t const*)_nvme_nslist.va; + uint32_t const id = 0; + + if (!_nvme_query_ns[id].va) { + Ram_dataspace_capability ds = _dma_alloc.alloc(IDENTIFY_LEN); + _nvme_query_ns[id].va = (addr_t)_env.rm().attach(ds); + _nvme_query_ns[id].pa = Dataspace_client(ds).phys_addr(); + } + + Sqe_identify b(_admin_command(Opcode::IDENTIFY, ns[id], QUERYNS_CID)); + b.write(_nvme_query_ns[id].pa); + b.write(Cns::IDENTIFY_NS); + + write(_admin_sq.tail); + + if (!_wait_for_admin_cq(10, QUERYNS_CID)) { + Genode::error("identify name space failed"); + throw Initialization_failed(); + } + + Identify_ns_data nsdata(_nvme_query_ns[id].va); + uint32_t const flbas = nsdata.read(); + + _nsinfo[id].count = nsdata.read(); + _nsinfo[id].size = 1u << nsdata.read(flbas); + } + + /** + * Query the controller information + */ + void _identify() + { + if (!_nvme_identify.va) { + Ram_dataspace_capability ds = _dma_alloc.alloc(IDENTIFY_LEN); + _nvme_identify.va = (addr_t)_env.rm().attach(ds); + _nvme_identify.pa = Dataspace_client(ds).phys_addr(); + } + + Sqe_identify b(_admin_command(Opcode::IDENTIFY, 0, IDENTIFY_CID)); + b.write(_nvme_identify.pa); + b.write(Cns::IDENTIFY); + + write(_admin_sq.tail); + + if (!_wait_for_admin_cq(10, IDENTIFY_CID)) { + Genode::error("identify failed"); + throw Initialization_failed(); + } + + _identify_data.construct(_nvme_identify.va); + + /* store information */ + _info.version = Genode::String<8>(read(), ".", + read(), ".", + read()); + _info.sn = _identify_data->sn; + _info.mn = _identify_data->mn; + _info.fr = _identify_data->fr; + } + + /** + * Setup I/O completion queue + * + * \param id identifier of the completion queue + * + * \throw Initialization_failed() in case the queue could not be created + */ + void _setup_io_cq(uint16_t id) + { + Nvme::Cq &cq = _cq[id]; + if (!cq.valid()) { _setup_queue(cq, MAX_IO_ENTRIES, CQE_LEN); } + + Sqe_create_cq b(_admin_command(Opcode::CREATE_IO_CQ, 0, CREATE_IO_CQ_CID)); + b.write(cq.pa); + b.write(id); + b.write(MAX_IO_ENTRIES_MASK); + b.write(1); + b.write(1); + + write(_admin_sq.tail); + + if (!_wait_for_admin_cq(10, CREATE_IO_CQ_CID)) { + Genode::error("create I/O cq failed"); + throw Initialization_failed(); + } + } + + /** + * Setup I/O submission queue + * + * \param id identifier of the submission queue + * \param cqid identifier of the completion queue + * + * \throw Initialization_failed() in case the queue could not be created + */ + void _setup_io_sq(uint16_t id, uint16_t cqid) + { + Nvme::Sq &sq = _sq[id]; + if (!sq.valid()) { _setup_queue(sq, MAX_IO_ENTRIES, SQE_LEN); } + + Sqe_create_sq b(_admin_command(Opcode::CREATE_IO_SQ, 0, CREATE_IO_SQ_CID)); + b.write(sq.pa); + b.write(id); + b.write(MAX_IO_ENTRIES_MASK); + b.write(1); + b.write(0b00); /* urgent for now */ + b.write(cqid); + + write(_admin_sq.tail); + + if (!_wait_for_admin_cq(10, CREATE_IO_SQ_CID)) { + Genode::error("create I/O sq failed"); + throw Initialization_failed(); + } + } + + /** + * Constructor + */ + Controller(Genode::Env &env, Util::Dma_allocator &dma_alloc, + addr_t const base, size_t const size, + Mmio::Delayer &delayer) + : + Genode::Attached_mmio(env, base, size), + _env(env), _dma_alloc(dma_alloc), _delayer(delayer) + { } + + /** + * Initialize controller + * + * \throw Initialization_failed + */ + void init() + { + _reset(); + _setup_admin(); + + write(1); + + try { _wait_for_rdy(1); } + catch (...) { + if (read()) { + Genode::error("fatal controller status"); + } + throw Initialization_failed(); + } + } + + /** + * Mask interrupts + */ + void mask_intr() { write(1); } + + /** + * Clean interrupts + */ + void clear_intr() { write(1); } + + /* + * Identify NVM system + */ + void identify() + { + _identify(); + _query_nslist(); + _query_ns(); + } + + /** + * Setup I/O queue + */ + void setup_io(uint16_t cid, uint16_t sid) + { + _setup_io_cq(cid); + _setup_io_sq(sid, cid); + } + + /** + * Get next free IO submission queue slot + */ + addr_t io_command(uint16_t id) + { + Nvme::Sq &sq = _sq[id]; + Nvme::Cq &cq = _cq[id]; + + if (_queue_full(sq, cq)) { return 0ul; } + + Sqe e(sq.next()); + e.write(sq.id++); + e.write(id); + return e.base(); + } + + /** + * Write current I/O submission queue tail + */ + void commit_io(uint16_t id) + { + Nvme::Sq &sq = _sq[id]; + write(sq.tail); + } + + /** + * Flush cache + */ + void flush_cache(uint16_t id) + { + (void)id; + } + + /** + * Process every pending I/O completion + * + * \param func function that is called on each completion + */ + template + void handle_io_completions(uint16_t id, FUNC const &func) + { + Nvme::Cq &cq = _cq[id]; + + if (!cq.valid()) { return; } + + for (;;) { + Cqe e(cq.next()); + + /* process until old phase */ + if (e.read() != cq.phase) { break; } + + func(e); + + cq.advance_head(); + + /* + * Instead of acknowledging the completions here, + * we could handle them batch-wise after the loop. + */ + write(cq.head); + } + } + + /** + * Get memory page size in bytes + */ + size_t mps() const { return _mps; } + + /** + * Get block metrics of namespace + * + * \param nsid namespace identifier + * + * \return returns information of the namespace + */ + Nsinfo nsinfo(uint32_t id) + { + id = id - 1; + if (id >= MAX_NS) { return Nsinfo(); } + return _nsinfo[id]; + } + + /** + * Get controller information + */ + Info const &info() const { return _info; } + + /*********** + ** Debug ** + ***********/ + + void dump_cap() + { + Genode::log("CAP:", " ", + "Mqes:", read()+1, " ", + "Cqr:", read(), " ", + "Ams:", read(), " ", + "To:", read(), " ", + "Dstrd:", read(), " ", + "Nssrs:", read(), " ", + "Css:", read(), " ", + "Bps:", read(), " ", + "Mpsmin:", read(), " ", + "Mpsmax:", read()); + + Genode::log("VS: ", " ", read(), ".", + read(), ".", read()); + } + + void dump_identify() + { + log("vid:", Hex(_identify_data->read())); + log("ssvid:", Hex(_identify_data->read())); + log("oacs:", Hex(_identify_data->read())); + log(" nsm:", Hex(_identify_data->read())); + log("sn:'", _identify_data->sn.string(), "'"); + log("mn:'", _identify_data->mn.string(), "'"); + log("fr:'", _identify_data->fr.string(), "'"); + log("nn:", _identify_data->read()); + log("vwc:", _identify_data->read()); + } + + void dump_nslist() + { + uint32_t const *p = (uint32_t const*)_nvme_nslist.va; + if (!p) { return; } + + for (size_t i = 0; i < 1024; i++) { + if (p[i] == 0) { break; } + Genode::log("ns:#", p[i], " found"); + } + } +}; + + +/****************** + ** Block driver ** + ******************/ + +class Driver : public Block::Driver +{ + public: + + bool _verbose_checks { false }; + bool _verbose_identify { false }; + bool _verbose_io { false }; + bool _verbose_mem { false }; + bool _verbose_regs { false }; + + private: + + Genode::Env &_env; + Genode::Allocator &_alloc; + + Genode::Signal_context_capability _announce_sigh; + + Genode::Attached_rom_dataspace _config_rom { _env, "config" }; + + void _handle_config_update() + { + _config_rom.update(); + + if (!_config_rom.valid()) { return; } + + Genode::Xml_node config = _config_rom.xml(); + _verbose_checks = config.attribute_value("verbose_checks", _verbose_checks); + _verbose_identify = config.attribute_value("verbose_identify", _verbose_identify); + _verbose_io = config.attribute_value("verbose_io", _verbose_io); + _verbose_mem = config.attribute_value("verbose_mem", _verbose_mem); + _verbose_regs = config.attribute_value("verbose_regs", _verbose_regs); + } + + Genode::Signal_handler _config_sigh { + _env.ep(), *this, &Driver::_handle_config_update }; + + /************** + ** Reporter ** + **************/ + + Genode::Reporter _namespace_reporter { _env, "controller" }; + + void _report_namespaces() + { + try { + Genode::Reporter::Xml_generator xml(_namespace_reporter, [&]() { + Nvme::Controller::Info const &info = _nvme_ctrlr->info(); + + xml.attribute("serial", info.sn); + xml.attribute("model", info.mn); + + for (int i = 1; i <= Nvme::MAX_NS; i++) { + Nvme::Controller::Nsinfo ns = _nvme_ctrlr->nsinfo(i); + + xml.node("namespace", [&]() { + xml.attribute("id", i); + xml.attribute("block_size", ns.size); + xml.attribute("block_count", ns.count); + }); + } + }); + } catch (...) { } + } + + /********* + ** DMA ** + *********/ + + Genode::Constructible _nvme_pci { }; + + struct Io_buffer + { + addr_t pa { 0 }; + addr_t va { 0 }; + size_t size { 0 }; + + bool valid() const { return size && pa && va; } + void invalidate() { Genode::memset(this, 0, sizeof(*this)); } + }; + + template + struct Io_buffer_mapper + { + using Bitmap = Util::Bitmap; + Bitmap _bitmap { }; + + Util::Slots _buffers { }; + + Genode::Ram_dataspace_capability _ds { }; + addr_t _phys_addr { 0 }; + addr_t _virt_addr { 0 }; + + Io_buffer_mapper(Genode::Ram_dataspace_capability ds, + addr_t phys, addr_t virt) + : _ds(ds), _phys_addr(phys), _virt_addr(virt) { } + + Io_buffer *alloc(size_t size) + { + Io_buffer *iob = _buffers.get(); + if (!iob) { return nullptr; } + + try { + size_t const bits = size / MPS; + addr_t const start = _bitmap.alloc(bits); + iob->pa = (start * MPS) + _phys_addr; + iob->va = (start * MPS) + _virt_addr; + iob->size = size; + } catch (...) { + iob->invalidate(); + return nullptr; + } + return iob; + } + + void free(Io_buffer *iob) + { + if (iob) { + size_t const size = iob->size; + addr_t const start = (iob->pa - _phys_addr) / MPS; + _bitmap.free(start, size / MPS); + iob->invalidate(); + } + } + }; + + Genode::Constructible> _io_mapper { }; + + Genode::Constructible> _io_list_mapper { }; + + void _setup_large_request(addr_t va, + Io_buffer const &iob, + size_t const num, + size_t const mps) + { + /* omit first page */ + addr_t pa = iob.pa + mps; + uint64_t *p = (uint64_t*)va; + + for (size_t i = 0; i < num; i++) { + p[i] = pa; + pa += mps; + } + } + + /************** + ** Requests ** + **************/ + + struct Request + { + uint32_t id { 0 }; + Packet_descriptor pd { }; + char *buffer { nullptr }; + + Io_buffer *iob { nullptr }; + Io_buffer *large_request { nullptr }; + + bool valid() const { return id != 0; } + + void invalidate() + { + id = 0; + buffer = nullptr; + pd = Packet_descriptor(); + + iob = nullptr; + large_request = nullptr; + } + }; + + Util::Slots _requests { }; + size_t _requests_pending { 0 }; + + /********************* + ** MMIO Controller ** + *********************/ + + struct Timer_delayer : Genode::Mmio::Delayer, + Timer::Connection + { + Timer_delayer(Genode::Env &env) + : Timer::Connection(env) { } + + void usleep(unsigned us) { Timer::Connection::usleep(us); } + } _delayer { _env }; + + Genode::Constructible _nvme_ctrlr { }; + + void _handle_completions() + { + _nvme_ctrlr->handle_io_completions(Nvme::IO_NSID, [&] (Nvme::Cqe const &b) { + + if (_verbose_io) { Nvme::Cqe::dump(b); } + + uint32_t const id = Nvme::Cqe::request_id(b); + + Request *r = _requests.lookup([&] (Request &r) { + if (r.id == id) { return true; } + return false; + }); + if (!r) { + Genode::error("no pending request found for CQ entry"); + Nvme::Cqe::dump(b); + return; + } + + bool const succeeded = Nvme::Cqe::succeeded(b); + + Packet_descriptor pd = r->pd; + pd.succeeded(succeeded); + + Io_buffer *iob = r->iob; + + if (succeeded && pd.operation() == Packet_descriptor::READ) { + size_t const len = pd.block_count() * _block_size; + Genode::memcpy(r->buffer, (void*)iob->va, len); + } + _io_mapper->free(iob); + + if (r->large_request) { + _io_list_mapper->free(r->large_request); + } + + r->invalidate(); + --_requests_pending; + ack_packet(pd, succeeded); + }); + } + + void _handle_intr() + { + _nvme_ctrlr->mask_intr(); + _handle_completions(); + _nvme_ctrlr->clear_intr(); + _nvme_pci->ack_irq(); + } + + Genode::Signal_handler _intr_sigh { + _env.ep(), *this, &Driver::_handle_intr }; + + /*********** + ** Block ** + ***********/ + + size_t _block_size { 0 }; + Block::sector_t _block_count { 0 }; + Block::Session::Operations _block_ops { }; + + public: + + /** + * Constructor + */ + Driver(Genode::Env &env, Genode::Allocator &alloc, + Genode::Signal_context_capability sigh) + : Block::Driver(env.ram()), _env(env), _alloc(alloc), _announce_sigh(sigh) + { + _config_rom.sigh(_config_sigh); + _handle_config_update(); + + /* + * Setup and identify NVMe PCI controller + */ + + try { + _nvme_pci.construct(_env); + } catch (Nvme::Pci::Missing_controller) { + Genode::error("no NVMe PCIe controller found"); + throw; + } + + try { + _nvme_ctrlr.construct(_env, *_nvme_pci, _nvme_pci->base(), + _nvme_pci->size(), _delayer); + } catch (...) { + Genode::error("could not access NVMe controller MMIO"); + throw; + } + + if (_verbose_regs) { _nvme_ctrlr->dump_cap(); } + + _nvme_ctrlr->init(); + _nvme_ctrlr->identify(); + + if (_verbose_identify) { + Genode::warning(_requests_pending); + _nvme_ctrlr->dump_identify(); + _nvme_ctrlr->dump_nslist(); + } + + /* + * Setup I/O + */ + + { + Genode::Ram_dataspace_capability ds = _nvme_pci->alloc(Nvme::DMA_DS_SIZE); + if (!ds.valid()) { + Genode::error("could not allocate DMA backing store"); + throw Nvme::Controller::Initialization_failed(); + } + addr_t const phys_addr = Genode::Dataspace_client(ds).phys_addr(); + addr_t const virt_addr = (addr_t)_env.rm().attach(ds); + _io_mapper.construct(ds, phys_addr, virt_addr); + + if (_verbose_mem) { + Genode::log("DMA", " virt: [", Genode::Hex(virt_addr), ",", + Genode::Hex(virt_addr + Nvme::DMA_DS_SIZE), "]", + " phys: [", Genode::Hex(phys_addr), ",", + Genode::Hex(phys_addr + Nvme::DMA_DS_SIZE), "]"); + } + } + + { + Genode::Ram_dataspace_capability ds = _nvme_pci->alloc(Nvme::DMA_LIST_DS_SIZE); + if (!ds.valid()) { + Genode::error("could not allocate DMA list-pages backing store"); + throw Nvme::Controller::Initialization_failed(); + } + addr_t const phys_addr = Genode::Dataspace_client(ds).phys_addr(); + addr_t const virt_addr = (addr_t)_env.rm().attach(ds); + _io_list_mapper.construct(ds, phys_addr, virt_addr); + + if (_verbose_mem) { + Genode::log("DMA list-pages", " virt: [", Genode::Hex(virt_addr), ",", + Genode::Hex(virt_addr + Nvme::DMA_DS_SIZE), "]", + " phys: [", Genode::Hex(phys_addr), ",", + Genode::Hex(phys_addr + Nvme::DMA_DS_SIZE), "]"); + } + } + + _nvme_ctrlr->setup_io(Nvme::IO_NSID, Nvme::IO_NSID); + + /* from now on use interrupts */ + _nvme_pci->sigh_irq(_intr_sigh); + _nvme_ctrlr->clear_intr(); + + /* + * Setup Block session + */ + + /* set Block session properties */ + Nvme::Controller::Nsinfo nsinfo = _nvme_ctrlr->nsinfo(Nvme::IO_NSID); + if (!nsinfo.valid()) { + Genode::error("could not query namespace information"); + throw Nvme::Controller::Initialization_failed(); + } + + _block_count = nsinfo.count; + _block_size = nsinfo.size; + + _block_ops.set_operation(Packet_descriptor::READ); + _block_ops.set_operation(Packet_descriptor::WRITE); + + Nvme::Controller::Info const &info = _nvme_ctrlr->info(); + + Genode::log("NVMe:", info.version.string(), " " + "serial:'", info.sn.string(), "'", " " + "model:'", info.mn.string(), "'", " " + "frev:'", info.fr.string(), "'"); + + Genode::log("Block", " " + "size:", _block_size, " " + "count:", _block_count); + + /* generate Report if requested */ + try { + Genode::Xml_node report = _config_rom.xml().sub_node("report"); + if (report.attribute_value("namespaces", false)) { + _namespace_reporter.enabled(true); + _report_namespaces(); + } + } catch (...) { } + + /* finally announce Block session */ + Genode::Signal_transmitter(_announce_sigh).submit(); + } + + ~Driver() { } + + /******************************* + ** Block::Driver interface ** + *******************************/ + + size_t block_size() override { return _block_size; } + Block::sector_t block_count() override { return _block_count; } + Block::Session::Operations ops() override { return _block_ops; } + + void _io(bool write, Block::sector_t lba, size_t count, + char *buffer, Packet_descriptor &pd) + { + using namespace Genode; + + size_t const len = count * _block_size; + + if (_verbose_io) { + Genode::error(write ? "write" : "read", " " + "lba:", lba, " " + "count:", count, " " + "buffer:", (void*)buffer, " " + "len:", len); + } + + if (len > Nvme::MAX_IO_LEN) { + error("request too large (max:", (size_t)Nvme::MAX_IO_LEN, " bytes)"); + throw Io_error(); + } + + if (_requests_pending == (Nvme::MAX_IO_PENDING)) { + throw Request_congestion(); + } + + Block::sector_t const lba_end = lba + count - 1; + auto overlap_check = [&] (Request &req) { + Block::sector_t const start = req.pd.block_number(); + Block::sector_t const end = start + req.pd.block_count() - 1; + + bool const in_req = (lba >= start && lba_end <= end); + bool const over_req = (lba <= start && lba_end <= end) && + (start >= lba && start <= lba_end); + bool const cross_req = (lba <= start && lba_end >= end); + bool const overlap = (in_req || over_req || cross_req); + + if (_verbose_checks && overlap) { + warning("overlap: ", "[", lba, ",", lba_end, ") with " + "[", start, ",", end, ")", + " ", in_req, " ", over_req, " ", cross_req); + } + return overlap; + }; + if (_requests.for_each(overlap_check)) { throw Request_congestion(); } + + Request *r = _requests.get(); + if (!r) { throw Request_congestion(); } + + size_t const mps = _nvme_ctrlr->mps(); + size_t const mps_len = Genode::align_addr(len, Genode::log2(mps)); + bool const need_list = len > 2 * mps; + + Io_buffer *iob = _io_mapper->alloc(mps_len); + if (!iob) { throw Request_congestion(); } + + if (need_list) { + r->large_request = _io_list_mapper->alloc(mps); + if (!r->large_request) { + _io_mapper->free(iob); + throw Request_congestion(); + } + } + + if (write) { Genode::memcpy((void*)iob->va, buffer, len); } + + Nvme::Sqe_io b(_nvme_ctrlr->io_command(Nvme::IO_NSID)); + if (!b.valid()) { + if (r->large_request) { + _io_list_mapper->free(r->large_request); + } + _io_mapper->free(iob); + throw Request_congestion(); + } + + addr_t const pa = iob->pa; + + Nvme::Opcode op = write ? Nvme::Opcode::WRITE : Nvme::Opcode::READ; + b.write(op); + b.write(pa); + + /* payload will fit into 2 mps chunks */ + if (len > mps && !r->large_request) { + b.write(pa + mps); + } else if (r->large_request) { + /* payload needs list of mps chunks */ + Io_buffer &lr = *r->large_request; + _setup_large_request(lr.va, + *iob, (mps_len - mps)/mps, mps); + b.write(lr.pa); + } + + b.write(lba); + b.write(count - 1); /* 0-base value */ + + r->iob = iob; + r->pd = pd; /* must be a copy */ + r->buffer = write ? nullptr : buffer; + r->id = b.read() | (Nvme::IO_NSID<<16); + + ++_requests_pending; + _nvme_ctrlr->commit_io(Nvme::IO_NSID); + } + + void read(Block::sector_t lba, size_t count, + char *buffer, Packet_descriptor &pd) override + { + if (!_block_ops.supported(Packet_descriptor::READ)) { + throw Io_error(); + } + _io(false, lba, count, buffer, pd); + } + + void write(Block::sector_t lba, size_t count, + char const *buffer, Packet_descriptor &pd) override + { + if (!_block_ops.supported(Packet_descriptor::WRITE)) { + throw Io_error(); + } + _io(true, lba, count, const_cast(buffer), pd); + } + + void sync() override { _nvme_ctrlr->flush_cache(Nvme::IO_NSID); } +}; + + +/********** + ** Main ** + **********/ + +struct Main +{ + Genode::Env &_env; + Genode::Heap _heap { _env.ram(), _env.rm() }; + + void _handle_announce() + { + _env.parent().announce(_env.ep().manage(_root)); + } + + Genode::Signal_handler
_announce_sigh { + _env.ep(), *this, &Main::_handle_announce }; + + struct Factory : Block::Driver_factory + { + Genode::Env &_env; + Genode::Allocator &_alloc; + Genode::Signal_context_capability _sigh; + + Genode::Constructible<::Driver> _driver { }; + + Factory(Genode::Env &env, Genode::Allocator &alloc, + Genode::Signal_context_capability sigh) + : _env(env), _alloc(alloc), _sigh(sigh) + { + _driver.construct(_env, _alloc, _sigh); + } + + ~Factory() { _driver.destruct(); } + + Block::Driver *create() { return &*_driver; } + void destroy(Block::Driver *) { } + }; + + Factory _factory { _env, _heap, _announce_sigh }; + Block::Root _root { _env.ep(), _heap, _env.rm(), _factory, true }; + + Main(Genode::Env &env) : _env(env) { } +}; + + +void Component::construct(Genode::Env &env) { static Main main(env); } diff --git a/repos/os/src/drivers/nvme/pci.h b/repos/os/src/drivers/nvme/pci.h new file mode 100644 index 0000000000..a734d01952 --- /dev/null +++ b/repos/os/src/drivers/nvme/pci.h @@ -0,0 +1,149 @@ +/* + * \brief NVMe PCIe backend + * \author Josef Soentgen + * \date 2018-03-05 + */ + +/* + * Copyright (C) 2018 Genode Labs GmbH + * + * This file is part of the Genode OS framework, which is distributed + * under the terms of the GNU Affero General Public License version 3. + */ + +#ifndef _NVME_PCI_H_ +#define _NVME_PCI_H_ + +/* Genode includes */ +#include +#include +#include + + +namespace Nvme { + + using namespace Genode; + + struct Pci; +} + + +struct Nvme::Pci : Platform::Connection, + Util::Dma_allocator +{ + struct Missing_controller : Genode::Exception { }; + + enum { + CLASS_MASS_STORAGE = 0x010000u, + CLASS_MASK = 0xffff00u, + SUBCLASS_NVME = 0x000800u, + NVME_DEVICE = CLASS_MASS_STORAGE | SUBCLASS_NVME, + NVME_PCI = 0x02, + NVME_BASE_ID = 0, + }; + + enum Pci_config { IRQ = 0x3c, CMD = 0x4, CMD_IO = 0x1, + CMD_MEMORY = 0x2, CMD_MASTER = 0x4 }; + + Platform::Device::Resource _res { }; + Platform::Device_capability _device_cap { }; + Genode::Constructible _device { }; + + Genode::Constructible _irq { }; + + /** + * Constructor + */ + Pci(Genode::Env &env) : Platform::Connection(env) + { + upgrade_ram(2*4096u); + upgrade_caps(8); + + _device_cap = with_upgrade([&] () { + return next_device(_device_cap, + NVME_DEVICE, CLASS_MASK); + }); + + if (!_device_cap.valid()) { throw Missing_controller(); } + + _device.construct(_device_cap); + + _res = _device->resource(NVME_BASE_ID); + + uint16_t cmd = _device->config_read(Pci_config::CMD, Platform::Device::ACCESS_16BIT); + cmd |= 0x2; /* respond to memory space accesses */ + cmd |= 0x4; /* enable bus master */ + + _device->config_write(Pci_config::CMD, cmd, Platform::Device::ACCESS_16BIT); + + _irq.construct(_device->irq(0)); + + Genode::log("NVMe PCIe controller found (", + Genode::Hex(_device->vendor_id()), ":", + Genode::Hex(_device->device_id()), ")"); + } + + /** + * Return base address of controller MMIO region + */ + addr_t base() const { return _res.base(); } + + /** + * Return size of controller MMIO region + */ + size_t size() const { return _res.size(); } + + /** + * Set interrupt signal handler + * + * \parm sigh signal capability + */ + void sigh_irq(Genode::Signal_context_capability sigh) + { + _irq->sigh(sigh); + _irq->ack_irq(); + } + + /** + * Acknowledge interrupt + */ + void ack_irq() { _irq->ack_irq(); } + + /***************************** + ** Dma_allocator interface ** + *****************************/ + + /** + * Allocator DMA buffer + * + * \param size size of the buffer + * + * \return Ram_dataspace_capability + */ + Genode::Ram_dataspace_capability alloc(size_t size) override + { + size_t donate = size; + return retry( + [&] () { + return retry( + [&] () { return Pci::Connection::alloc_dma_buffer(size); }, + [&] () { upgrade_caps(2); }); + }, + [&] () { + upgrade_ram(donate); + donate = donate * 2 > size ? 4096 : donate * 2; + }); + } + + /** + * Free DMA buffer + * + * \param cap RAM dataspace capability + */ + void free(Genode::Ram_dataspace_capability cap) override + { + Pci::Connection::free_dma_buffer(cap); + } +}; + +#endif /* _NVME_PCI_H_ */ diff --git a/repos/os/src/drivers/nvme/target.mk b/repos/os/src/drivers/nvme/target.mk new file mode 100644 index 0000000000..0e13210638 --- /dev/null +++ b/repos/os/src/drivers/nvme/target.mk @@ -0,0 +1,5 @@ +TARGET = nvme_drv +SRC_CC = main.cc +INC_DIR += $(PRG_DIR) +LIBS += base +REQUIRES = pci diff --git a/repos/os/src/drivers/nvme/util.h b/repos/os/src/drivers/nvme/util.h new file mode 100644 index 0000000000..2a34cf63ab --- /dev/null +++ b/repos/os/src/drivers/nvme/util.h @@ -0,0 +1,152 @@ +/* + * \brief Utilitize used by the NVMe driver + * \author Josef Soentgen + * \date 2018-03-05 + */ + +/* + * Copyright (C) 2018 Genode Labs GmbH + * + * This file is part of the Genode OS framework, which is distributed + * under the terms of the GNU Affero General Public License version 3. + */ + +#ifndef _NVME_UTIL_H_ +#define _NVME_UTIL_H_ + +/* Genode includes */ +#include + +namespace Util { + + using namespace Genode; + + /* + * DMA allocator helper + */ + struct Dma_allocator : Genode::Interface + { + virtual Genode::Ram_dataspace_capability alloc(size_t) = 0; + virtual void free(Genode::Ram_dataspace_capability) = 0; + }; + + /* + * Wrap Bit_array into a convinient Bitmap allocator + */ + template + struct Bitmap + { + struct Full : Genode::Exception { }; + + static constexpr addr_t INVALID { BITS - 1 }; + Genode::Bit_array _array { }; + size_t _used { 0 }; + + addr_t _find_free(size_t const bits) + { + for (size_t i = 0; i < BITS; i += bits) { + if (_array.get(i, bits)) { continue; } + return i; + } + throw Full(); + } + + /** + * Return index from where given number of bits was allocated + * + * \param bits number of bits to allocate + * + * \return index of start bit + */ + addr_t alloc(size_t const bits) + { + addr_t const start = _find_free(bits); + _array.set(start, bits); + _used += bits; + return start; + } + + /** + * Free given number of bits from start index + * + * \param start index of the start bit + * \param bits number of bits to free + */ + void free(addr_t const start, size_t const bits) + { + _used -= bits; + _array.clear(start, bits); + } + }; + + /* + * Wrap array into convinient interface + * + * The used datatype T must implement the following methods: + * + * bool valid() const returns true if the object is valid + * void invalidate() adjusts the object so that valid() returns false + */ + template + struct Slots + { + T _entries[CAP] { }; + + /** + * Lookup slot + */ + template + T *lookup(FUNC const &func) + { + for (size_t i = 0; i < CAP; i++) { + if (!_entries[i].valid()) { continue; } + if ( func(_entries[i])) { return &_entries[i]; } + } + return nullptr; + } + + /** + * Get free slot + */ + T *get() + { + for (size_t i = 0; i < CAP; i++) { + if (!_entries[i].valid()) { return &_entries[i]; } + } + return nullptr; + } + + /** + * Iterate over all slots until FUNC returns true + */ + template + bool for_each(FUNC const &func) + { + for (size_t i = 0; i < CAP; i++) { + if (!_entries[i].valid()) { continue; } + if ( func(_entries[i])) { return true; } + } + return false; + } + }; + + /** + * Extract string from memory + * + * This function is used to extract the information strings from the + * identify structure. + */ + char const *extract_string(char const *base, size_t offset, size_t len) + { + static char tmp[64] = { }; + if (len > sizeof(tmp)) { return nullptr; } + + Genode::strncpy(tmp, base + offset, len); + + len--; /* skip NUL */ + while (len > 0 && tmp[--len] == ' ') { tmp[len] = 0; } + return tmp; + } +} + +#endif /* _NVME_UTIL_H_ */ diff --git a/tool/autopilot.list b/tool/autopilot.list index 0e101fe6aa..b6b42a153a 100644 --- a/tool/autopilot.list +++ b/tool/autopilot.list @@ -112,3 +112,4 @@ utf8 demo ping ping_nic_router +nvme