From 04516a0d3910c55011be76281fc1ee5ae7ac16ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Josef=20S=C3=B6ntgen?= Date: Mon, 9 Apr 2018 16:13:54 +0200 Subject: [PATCH] nvme_drv: add driver for NVMe storage devices This driver component provides support for using consumer NVMe storage devices, i.e. it omits name space managment and will always use the first name space, on Genode. For now it defaults to a reasonable low configuration: - 1 I/O queue (completion/submission tuple) - 128 entries in the I/O queue - 4096 as the only I/O transaction memory page size Fixes #2747. --- repos/os/recipes/src/nvme_drv/content.mk | 2 + repos/os/recipes/src/nvme_drv/hash | 1 + repos/os/recipes/src/nvme_drv/used_apis | 6 + repos/os/run/nvme.run | 189 +++ repos/os/src/drivers/nvme/README | 40 + repos/os/src/drivers/nvme/main.cc | 1752 ++++++++++++++++++++++ repos/os/src/drivers/nvme/pci.h | 149 ++ repos/os/src/drivers/nvme/target.mk | 5 + repos/os/src/drivers/nvme/util.h | 152 ++ tool/autopilot.list | 1 + 10 files changed, 2297 insertions(+) create mode 100644 repos/os/recipes/src/nvme_drv/content.mk create mode 100644 repos/os/recipes/src/nvme_drv/hash create mode 100644 repos/os/recipes/src/nvme_drv/used_apis create mode 100644 repos/os/run/nvme.run create mode 100644 repos/os/src/drivers/nvme/README create mode 100644 repos/os/src/drivers/nvme/main.cc create mode 100644 repos/os/src/drivers/nvme/pci.h create mode 100644 repos/os/src/drivers/nvme/target.mk create mode 100644 repos/os/src/drivers/nvme/util.h diff --git a/repos/os/recipes/src/nvme_drv/content.mk b/repos/os/recipes/src/nvme_drv/content.mk new file mode 100644 index 0000000000..05b9d866e4 --- /dev/null +++ b/repos/os/recipes/src/nvme_drv/content.mk @@ -0,0 +1,2 @@ +SRC_DIR = src/drivers/nvme +include $(GENODE_DIR)/repos/base/recipes/src/content.inc diff --git a/repos/os/recipes/src/nvme_drv/hash b/repos/os/recipes/src/nvme_drv/hash new file mode 100644 index 0000000000..de23291045 --- /dev/null +++ b/repos/os/recipes/src/nvme_drv/hash @@ -0,0 +1 @@ +2018-03-27 fcf9749c441d830aa4666f70e04cd1560c783b2f diff --git a/repos/os/recipes/src/nvme_drv/used_apis b/repos/os/recipes/src/nvme_drv/used_apis new file mode 100644 index 0000000000..c22f617174 --- /dev/null +++ b/repos/os/recipes/src/nvme_drv/used_apis @@ -0,0 +1,6 @@ +base +os +platform_session +block_session +report_session +timer_session diff --git a/repos/os/run/nvme.run b/repos/os/run/nvme.run new file mode 100644 index 0000000000..e393752330 --- /dev/null +++ b/repos/os/run/nvme.run @@ -0,0 +1,189 @@ +assert_spec x86 + +# perform write tests when requested +if {[info exists env(GENODE_TEST_WRITE)]} { +set test_write 1 +} else { +set test_write 0 +} + +set is_qemu [have_include power_on/qemu] +set is_old [expr [have_spec fiasco] || [have_spec okl4] || [have_spec pistachio]] +set is_32bit_x86_hw [expr !$is_qemu && [have_spec 32bit]] + +# +# Only run tests on supported platforms +# +if {[expr [have_spec linux] || $is_32bit_x86_hw || [expr $is_qemu && $is_old]]} { + puts "This run script is not supported on this platform." + exit 0 +} + +# +# Qemu and on certain platforms only use the small set of tests +# +set small_test [expr $is_qemu || [have_spec foc] || [have_spec sel4]] + +# +# Check used commands +# +set dd [check_installed dd] + +# +# Build +# +set build_components { + core init + drivers/nvme + drivers/timer + app/block_tester +} + +source ${genode_dir}/repos/base/run/platform_drv.inc +append_platform_drv_build_components + +build $build_components + + +# +# Create raw image +# +catch { exec $dd if=/dev/zero of=bin/nvme.raw bs=1M count=0 seek=32768 } + +create_boot_directory + +# +# Generate config +# +append config { + + + + + + + + + + + + + + + + + + + + + + + + } + +append_platform_drv_config + +append config { + + + + + + + + + + + + } + +append_if $small_test config { + + } + +append_if [expr !$small_test] config { + + + + + + + } + +append_if $test_write config { + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + } +append config { + + + + + + + +} + +install_config $config + +# +# Boot modules +# + +set boot_modules { + core init timer nvme_drv + ld.lib.so block_tester +} + +append_platform_drv_boot_modules + +build_boot_image $boot_modules + +append qemu_args " -nographic -m 512 " +append qemu_args " -drive id=nvme0,file=bin/nvme.raw,format=raw,if=none " +append qemu_args " -device nvme,drive=nvme0,serial=fnord,id=nvme0n1 " + +run_genode_until {.*child "block_tester" exited with exit value 0.*\n} 300 + +exec rm -f bin/nvme.raw diff --git a/repos/os/src/drivers/nvme/README b/repos/os/src/drivers/nvme/README new file mode 100644 index 0000000000..da43782c99 --- /dev/null +++ b/repos/os/src/drivers/nvme/README @@ -0,0 +1,40 @@ +This directory contains the implementation of a NVMe driver component. + + +Brief +===== + +The driver supports PCIe NVMe devices matching at least revision 1.1 of +the NVMe specification. For now it only supports one name space and uses +one completion and one submission queue to handle all I/O requests; one +request is limited to 1MiB of data. It lacks any name space management +functionality. + + +Configuration +============= + +The following config illustrates how the driver is configured: + +! +! +! +! +! +! +! + + +Report +====== + +The driver supports reporting of active name spaces, which can be enabled +via the configuration 'report' sub-node: + +! + +The report structure is depicted by the following example: + +! +! +! diff --git a/repos/os/src/drivers/nvme/main.cc b/repos/os/src/drivers/nvme/main.cc new file mode 100644 index 0000000000..826d16d3cc --- /dev/null +++ b/repos/os/src/drivers/nvme/main.cc @@ -0,0 +1,1752 @@ +/* + * \brief NVMe Block session component + * \author Josef Soentgen + * \date 2018-03-05 + * + * Spec used: NVM-Express-1_3a-20171024_ratified.pdf + */ + +/* + * Copyright (C) 2018 Genode Labs GmbH + * + * This file is part of the Genode OS framework, which is distributed + * under the terms of the GNU Affero General Public License version 3. + */ + +/* Genode includes */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* local includes */ +#include +#include + + +namespace { + +using uint16_t = Genode::uint16_t; +using uint32_t = Genode::uint32_t; +using uint64_t = Genode::uint64_t; +using size_t = Genode::size_t; +using addr_t = Genode::addr_t; +using Packet_descriptor = Block::Packet_descriptor; + +} /* anonymous namespace */ + + +/********** + ** NVMe ** + **********/ + +namespace Nvme { + using namespace Genode; + + struct Identify_data; + struct Identify_ns_data; + struct Doorbell; + + struct Cqe; + + struct Sqe; + struct Sqe_create_cq; + struct Sqe_create_sq; + struct Sqe_identify; + struct Sqe_io; + + struct Queue; + struct Sq; + struct Cq; + + struct Controller; + + enum { + CQE_LEN = 16, + SQE_LEN = 64, + MAX_IO_QUEUES = 1, + MAX_IO_ENTRIES = 128, + MAX_IO_ENTRIES_MASK = MAX_IO_ENTRIES - 1, + MAX_IO_PENDING = MAX_IO_ENTRIES - 1, /* tail + 1 == head -> full */ + MAX_ADMIN_ENTRIES = 128, + MAX_ADMIN_ENTRIES_MASK = MAX_ADMIN_ENTRIES - 1, + }; + + enum { + /* + * Limit max I/O requests size; we can map up to 2MiB with one list + * page (4K/8 = 512 * 4K) but 1MiB is plenty + */ + MAX_IO_LEN = 1u << 20, + DMA_DS_SIZE = 4u << 20, + DMA_LIST_DS_SIZE = 256u << 10, + MPS = 4096u, + }; + + enum { + IO_NSID = 1u, + MAX_NS = 1u, + NUM_QUEUES = 1 + MAX_NS, + }; + + enum Opcode { + /* Admin command set */ + DELETE_IO_SQ = 0x00, + CREATE_IO_SQ = 0x01, + DELETE_IO_CQ = 0x04, + CREATE_IO_CQ = 0x05, + IDENTIFY = 0x06, + SET_FEATURES = 0x09, + GET_FEATURES = 0x0A, + /* NVM command set */ + FLUSH = 0x00, + WRITE = 0x01, + READ = 0x02, + }; +}; + + +/* + * Identify command data + */ +struct Nvme::Identify_data : Genode::Mmio +{ + enum { + SN_OFFSET = 0x04, SN_LEN = 20, + MN_OFFSET = 0x18, MN_LEN = 40, + FR_OFFSET = 0x40, FR_LEN = 12, + }; + + using Sn = Genode::String; + using Mn = Genode::String; + using Fr = Genode::String; + + Sn sn { }; /* serial number */ + Mn mn { }; /* model number */ + Fr fr { }; /* firmware revision */ + + struct Vid : Register<0x000, 16> { }; /* vendor id */ + struct Ssvid : Register<0x002, 16> { }; /* sub system vendor id */ + /* optional admin command support */ + struct Oacs : Register<0x100, 32> + { + struct Ssr : Bitfield< 0, 1> { }; /* security send/receive */ + struct Nvmf : Bitfield< 1, 1> { }; /* NVM format */ + struct Fwcd : Bitfield< 2, 1> { }; /* firmware commit/download image */ + struct Nsm : Bitfield< 3, 1> { }; /* namespace management */ + struct Vm : Bitfield< 7, 1> { }; /* virtualization management */ + }; + struct Nn : Register<0x204, 32> { }; /* number of namespaces */ + struct Vwc : Register<0x204, 8> { }; /* volatile write cache */ + + Identify_data(addr_t const base) + : Genode::Mmio(base) + { + char const *p = (char const*)base; + + sn = Sn(Util::extract_string(p, SN_OFFSET, SN_LEN+1)); + mn = Mn(Util::extract_string(p, MN_OFFSET, MN_LEN+1)); + fr = Fr(Util::extract_string(p, FR_OFFSET, FR_LEN+1)); + } +}; + + +/* + * Identify name space command data + */ +struct Nvme::Identify_ns_data : public Genode::Mmio +{ + struct Nsze : Register<0x00, 64> { }; /* name space size */ + struct Ncap : Register<0x08, 64> { }; /* name space capacity */ + struct Nuse : Register<0x10, 64> { }; /* name space utilization */ + struct Nsfeat : Register<0x18, 8> { }; /* name space features */ + struct Nlbaf : Register<0x19, 8> { }; /* number of LBA formats */ + /* formatted LBA size */ + struct Flbas : Register<0x1a, 8> + { + struct Formats : Bitfield< 0, 3> { }; + }; + struct Mc : Register<0x1b, 8> { }; /* metadata capabilities */ + struct Dpc : Register<0x1c, 8> { }; /* end-to-end data protection capabilities */ + struct Dps : Register<0x1d, 8> { }; /* end-to-end data protection settings */ + + enum { MAX_LBAF = 16, }; + /* LBA format support */ + struct Lbaf : Register_array<0x80, 32, MAX_LBAF, 32> + { + struct Ms : Bitfield< 0, 16> { }; /* metadata size */ + struct Lbads : Bitfield<16, 8> { }; /* LBA data size (2^n) */ + struct Rp : Bitfield<24, 2> { }; /* relative performance */ + }; + + Identify_ns_data(addr_t const base) + : Genode::Mmio(base) + { } +}; + + +/* + * Queue doorbell register + */ +struct Nvme::Doorbell : public Genode::Mmio +{ + struct Sqtdbl : Register<0x00, 32> + { + struct Sqt : Bitfield< 0, 16> { }; /* submission queue tail */ + }; + + struct Cqhdbl : Register<0x04, 32> + { + struct Cqh : Bitfield< 0, 16> { }; /* submission queue tail */ + }; + + Doorbell(addr_t const base) + : Genode::Mmio(base) { } +}; + + +/* + * Completion queue entry + */ +struct Nvme::Cqe : Genode::Mmio +{ + struct Dw0 : Register<0x00, 32> { }; /* command specific */ + struct Dw1 : Register<0x04, 32> { }; /* reserved */ + + struct Sqhd : Register<0x08, 16> { }; + struct Sqid : Register<0x0a, 16> { }; + struct Cid : Register<0x0c, 16> { }; + struct Sf : Register<0x0e, 16> + { + struct P : Bitfield< 0, 1> { }; + struct Sc : Bitfield< 1, 8> { }; /* status code */ + struct Sct : Bitfield< 9, 3> { }; /* status code type */ + struct M : Bitfield<14, 1> { }; /* more (get log) */ + struct Dnr : Bitfield<15, 1> { }; /* do not retry */ + }; + + Cqe(addr_t const base) : Genode::Mmio(base) { } + + static uint32_t request_id(Nvme::Cqe const &b) + { + return (b.read() << 16)|b.read(); + } + + static bool succeeded(Nvme::Cqe const &b) + { + return !b.read(); + } + + static void dump(Nvme::Cqe const &b) + { + using namespace Genode; + log("sqhd:", b.read(), " " + "sqid:", b.read(), " " + "cid:", b.read(), " " + "p:", b.read(), " " + "status: ", Hex(b.read()), " " + "sc:", Hex(b.read()), " " + "sct:", Hex(b.read()), " " + "m:", b.read(), " " + "dnr:", b.read()); + } +}; + + +/* + * Submission queue entry base + */ +struct Nvme::Sqe : Genode::Mmio +{ + struct Cdw0 : Register<0x00, 32> + { + struct Opc : Bitfield< 0, 8> { }; /* opcode */ + struct Fuse : Bitfield< 9, 2> { }; /* fused operation */ + struct Psdt : Bitfield<14, 2> { }; /* PRP or SGL for data transfer */ + struct Cid : Bitfield<16, 16> { }; /* command identifier */ + }; + struct Nsid : Register<0x04, 32> { }; + struct Mptr : Register<0x10, 64> { }; + struct Prp1 : Register<0x18, 64> { }; + struct Prp2 : Register<0x20, 64> { }; + + /* SGL not supported */ + + Sqe(addr_t const base) : Genode::Mmio(base) { } + + bool valid() const { return base() != 0ul; } +}; + + +/* + * Identify command + */ +struct Nvme::Sqe_identify : Nvme::Sqe +{ + struct Cdw10 : Register<0x28, 32> + { + struct Cns : Bitfield< 0, 8> { }; /* controller or namespace structure */ + }; + + Sqe_identify(addr_t const base) : Sqe(base) { } +}; + + +/* + * Create completion queue command + */ +struct Nvme::Sqe_create_cq : Nvme::Sqe +{ + struct Cdw10 : Register<0x28, 32> + { + struct Qid : Bitfield< 0, 16> { }; /* queue identifier */ + struct Qsize : Bitfield<16, 16> { }; /* queue size 0-based vale */ + }; + + struct Cdw11 : Register<0x2c, 32> + { + struct Pc : Bitfield< 0, 1> { }; /* physically contiguous */ + struct En : Bitfield< 1, 1> { }; /* interrupts enabled */ + struct Iv : Bitfield<16, 16> { }; /* interrupt vector */ + }; + + Sqe_create_cq(addr_t const base) : Sqe(base) { } +}; + + +/* + * Create submission queue command + */ +struct Nvme::Sqe_create_sq : Nvme::Sqe +{ + struct Cdw10 : Register<0x28, 32> + { + struct Qid : Bitfield< 0, 16> { }; /* queue identifier */ + struct Qsize : Bitfield<16, 16> { }; /* queue size 0-based vale */ + }; + + struct Cdw11 : Register<0x2c, 32> + { + struct Pc : Bitfield< 0, 1> { }; /* physically contiguous */ + struct Qprio : Bitfield< 1, 2> { }; /* queue priority */ + struct Cqid : Bitfield<16, 16> { }; /* completion queue identifier */ + }; + + Sqe_create_sq(addr_t const base) : Sqe(base) { } +}; + + +/* + * I/O command + */ +struct Nvme::Sqe_io : Nvme::Sqe +{ + struct Slba_lower : Register<0x28, 32> { }; + struct Slba_upper : Register<0x2c, 32> { }; + struct Slba : Genode::Bitset_2 { }; + + struct Cdw12 : Register<0x30, 32> + { + struct Nlb : Bitfield<0, 16> { }; + }; + + Sqe_io(addr_t const base) : Sqe(base) { } +}; + + +/* + * Queue base structure + */ +struct Nvme::Queue +{ + Genode::Ram_dataspace_capability ds { }; + addr_t pa { 0 }; + addr_t va { 0 }; + uint32_t max_entries { 0 }; + + bool valid() const { return pa != 0ul; } +}; + + +/* + * Submission queue + */ +struct Nvme::Sq : Nvme::Queue +{ + uint32_t tail { 0 }; + uint16_t id { 0 }; + + addr_t next() + { + addr_t a = va + (tail * SQE_LEN); + Genode::memset((void*)a, 0, SQE_LEN); + tail = (tail + 1) % max_entries; + return a; + } +}; + + +/* + * Completion queue + */ +struct Nvme::Cq : Nvme::Queue +{ + uint32_t head { 0 }; + uint32_t phase { 1 }; + + addr_t next() { return va + (head * CQE_LEN); } + + void advance_head() + { + if (++head >= max_entries) { + head = 0; + phase ^= 1; + } + } +}; + + +/* + * Controller + */ +struct Nvme::Controller : public Genode::Attached_mmio +{ + /********** + ** MMIO ** + **********/ + + /* + * Controller capabilities (p. 40 ff.) + */ + struct Cap : Register<0x0, 64> + { + struct Mqes : Bitfield< 0, 15> { }; /* maximum queue entries supported 0-based */ + struct Cqr : Bitfield<16, 1> { }; /* contiguous queues required */ + struct Ams : Bitfield<17, 2> { }; /* arbitration mechanism supported */ + struct To : Bitfield<24, 8> { }; /* timeout (csts.rdy) */ + struct Dstrd : Bitfield<32, 4> { }; /* doorbell stride */ + struct Nssrs : Bitfield<36, 1> { }; /* NVM subsystem reset supported */ + struct Css : Bitfield<37, 8> { }; /* command sets supported */ + struct Bps : Bitfield<45, 1> { }; /* boot partition support */ + struct Mpsmin : Bitfield<48, 4> { }; /* memory page size minimum */ + struct Mpsmax : Bitfield<52, 4> { }; /* memory page size maximum */ + }; + + /* + * Version + */ + struct Vs : Register<0x8, 32> + { + struct Ter : Bitfield< 0, 8> { }; /* tertiary */ + struct Mnr : Bitfield< 8, 8> { }; /* minor */ + struct Mjr : Bitfield<16, 16> { }; /* major */ + }; + + /* + * Interrupt mask set (for !MSI-X) + */ + struct Intms : Register<0x0c, 32> + { + struct Ivms : Bitfield<0, 32> { }; /* interrupt vector mask set */ + }; + + /* + * Interrupt mask clear + */ + struct Intmc : Register<0x10, 32> + { + struct Ivmc : Bitfield<0, 32> { }; /* interrupt vector mask clear */ + }; + + /* + * Controller configuration + */ + struct Cc : Register<0x14, 32> + { + struct En : Bitfield< 0, 1> { }; /* enable */ + struct Css : Bitfield< 4, 3> { }; /* I/O command set selected */ + struct Mps : Bitfield< 7, 4> { }; /* memory page size */ + struct Ams : Bitfield<11, 3> { }; /* arbitration mechanism selected */ + struct Shn : Bitfield<14, 2> { }; /* shutdown notification */ + struct Iosqes : Bitfield<16, 4> { }; /* I/O submission queue entry size */ + struct Iocqes : Bitfield<20, 4> { }; /* I/O completion queue entry size */ + }; + + /* + * Controller status + */ + struct Csts : Register<0x1c, 32> + { + struct Rdy : Bitfield< 0, 1> { }; /* ready */ + struct Cfs : Bitfield< 1, 1> { }; /* controller fatal status */ + struct Shst : Bitfield< 2, 1> { }; /* shutdown status */ + struct Nssro : Bitfield< 4, 1> { }; /* NVM subsystem reset occurred */ + struct Pp : Bitfield< 5, 1> { }; /* processing paused */ + }; + + /* + * NVM subsystem reset + */ + struct Nssr : Register<0x20, 32> + { + struct Nssrc : Bitfield< 0, 32> { }; /* NVM subsystem reset control */ + }; + + /* + * Admin queue attributes + */ + struct Aqa : Register<0x24, 32> + { + struct Asqs : Bitfield< 0, 12> { }; /* admin submission queue size 0-based */ + struct Acqs : Bitfield<16, 12> { }; /* admin completion queue size 0-based */ + }; + + /* + * Admin submission queue base address + */ + struct Asq : Register<0x28, 64> + { + struct Asqb : Bitfield<12, 52> { }; /* admin submission queue base */ + }; + + /* + * Admin completion queue base address + */ + struct Acq : Register<0x30, 64> + { + struct Acqb : Bitfield<12, 52> { }; /* admin completion queue base */ + }; + + /* + * Controller memory buffer location + */ + struct Cmbloc : Register<0x38, 32> + { + struct Bir : Bitfield< 0, 2> { }; /* base indicator register */ + struct Ofst : Bitfield<12, 24> { }; /* offset */ + }; + + /* + * Controller memory buffer size + */ + struct Cmbsz : Register<0x3c, 32> + { + struct Sqs : Bitfield< 0, 1> { }; /* submission queue support */ + struct Cqs : Bitfield< 1, 1> { }; /* completion queue support */ + struct Lists : Bitfield< 2, 1> { }; /* PRP SGL list support */ + struct Rds : Bitfield< 3, 1> { }; /* read data support */ + struct Wds : Bitfield< 4, 1> { }; /* write data support */ + struct Szu : Bitfield< 8, 4> { }; /* size units */ + struct Sz : Bitfield<12, 24> { }; /* size */ + }; + + /* + * Boot partition information + */ + struct Bpinfo : Register<0x40, 32> + { + struct Bpsz : Bitfield< 0, 14> { }; /* boot partition size (in 128KiB) */ + struct Brs : Bitfield<24, 2> { }; /* boot read status */ + struct Abpid : Bitfield<31, 1> { }; /* active boot partition id */ + }; + + /* + * Boot partition read select + */ + struct Bprsel : Register<0x44, 32> + { + struct Bprsz : Bitfield< 0, 10> { }; /* boot partition read size (in 4KiB) */ + struct Bprof : Bitfield<10, 30> { }; /* boot partition read offset (in 4KiB) */ + struct Bpid : Bitfield<31, 1> { }; /* boot partition identifier */ + }; + + /* + * Boot partition memory buffer location + */ + struct Bpmbl : Register<0x48, 64> + { + struct Bmbba : Bitfield<12, 52> { }; /* boot partition memory buffer base address */ + }; + + /* + * Admin submission doorbell + */ + struct Admin_sdb : Register<0x1000, 32> + { + struct Sqt : Bitfield< 0, 16> { }; /* submission queue tail */ + }; + + /* + * Admin completion doorbell + */ + struct Admin_cdb : Register<0x1004, 32> + { + struct Cqh : Bitfield< 0, 16> { }; /* completion queue tail */ + }; + + /* + * I/O submission doorbell + */ + struct Io_sdb : Register<0x1008, 32> + { + struct Sqt : Bitfield< 0, 16> { }; /* submission queue tail */ + }; + + /* + * I/O completion doorbell + */ + struct Io_cdb : Register<0x100C, 32> + { + struct Cqh : Bitfield< 0, 16> { }; /* completion queue tail */ + }; + + /********** + ** CODE ** + **********/ + + struct Mem_address + { + addr_t va { 0 }; + addr_t pa { 0 }; + }; + + struct Initialization_failed : Genode::Exception { }; + + Genode::Env &_env; + + Util::Dma_allocator &_dma_alloc; + Mmio::Delayer &_delayer; + + size_t _mps { 0 }; + + Nvme::Cq _cq[NUM_QUEUES] { }; + Nvme::Sq _sq[NUM_QUEUES] { }; + + Nvme::Cq &_admin_cq = _cq[0]; + Nvme::Sq &_admin_sq = _sq[0]; + + Mem_address _nvme_identify { }; + + Genode::Constructible _identify_data { }; + + Mem_address _nvme_nslist { }; + uint32_t _nvme_nslist_count { 0 }; + + enum Cns { + IDENTIFY_NS = 0x00, + IDENTIFY = 0x01, + NSLIST = 0x02, + }; + + enum { + IDENTIFY_LEN = 4096, + + IDENTIFY_CID = 0x666, + NSLIST_CID, + QUERYNS_CID, + CREATE_IO_CQ_CID, + CREATE_IO_SQ_CID, + }; + + Mem_address _nvme_query_ns[MAX_NS] { }; + + struct Info + { + Genode::String<8> version { }; + Identify_data::Sn sn { }; + Identify_data::Mn mn { }; + Identify_data::Fr fr { }; + } _info { }; + + struct Nsinfo + { + Block::sector_t count { 0 }; + size_t size { 0 }; + bool valid() const { return count && size; } + } _nsinfo[MAX_NS] { }; + + /** + * Wait for ready bit to change + * + * \param val value of ready bit + * + * \throw Mmio::Polling_timeout + */ + void _wait_for_rdy(unsigned val) + { + enum { MAX = 50u, TO_UNIT = 500u, }; + Attempts const a(MAX); + Microseconds const t((read() * TO_UNIT) * (1000 / MAX)); + try { + wait_for(a, t, _delayer, Csts::Rdy::Equal(val)); + } catch (Mmio::Polling_timeout) { + Genode::error("Csts::Rdy(", val, ") failed"); + throw; + } + } + + /** + * Reset controller + * + * \throw Initialization_failed + */ + void _reset() + { + /* disable intr and ctrlr */ + write(~0u); + write(0); + + try { _wait_for_rdy(0); } + catch (...) { throw Initialization_failed(); } + + /* + * For now we limit the memory page size to 4K because besides Qemu + * there are not that many consumer NVMe device that support larger + * page sizes and we do not want to align the DMA buffers to larger + * sizes. + */ + Cap::access_t const mpsmax = read(); + if (mpsmax > 0) { Genode::warning("ignore mpsmax:", mpsmax); } + + /* the value written to the register amounts to 2^(12 + v) bytes */ + Cap::access_t const v = Genode::log2((unsigned)Nvme::MPS) - 12; + _mps = 1u << (12 + v); + write(v); + + write(log2((unsigned)CQE_LEN)); + write(log2((unsigned)SQE_LEN)); + } + + /** + * Setup queue, i.e., fill out fields + * + * \param q reference to queue + * \param num number of entries + * \param len size of one entry + */ + void _setup_queue(Queue &q, size_t const num, size_t const len) + { + size_t const size = num * len; + q.ds = _dma_alloc.alloc(size); + q.pa = Dataspace_client(q.ds).phys_addr(); + q.va = (addr_t)_env.rm().attach(q.ds); + q.max_entries = num; + } + + /** + * Check if given queue tuple is full + * + * \param sq reference to submission queue + * \param cq reference to completion queue + * + * \return returns true if queue is full and false otherwise + */ + bool _queue_full(Nvme::Sq const &sq, Nvme::Cq const &cq) const + { + return ((sq.tail + 1) & (MAX_IO_ENTRIES_MASK)) == cq.head; + } + + /** + * Setup admin queues + */ + void _setup_admin() + { + _setup_queue(_admin_cq, MAX_ADMIN_ENTRIES, CQE_LEN); + write(MAX_ADMIN_ENTRIES_MASK); + write(_admin_cq.pa); + + _setup_queue(_admin_sq, MAX_ADMIN_ENTRIES, SQE_LEN); + write(MAX_ADMIN_ENTRIES_MASK); + write(_admin_sq.pa); + } + + /** + * Get address of the next free entry in the admin submission queue + * + * \param opc entry opcode + * \param nsid namespace identifier + * \param cid command identifier + * + * \return returns address of the next free entry or 0 if there is + * no free entry + */ + addr_t _admin_command(Opcode opc, uint32_t nsid, uint32_t cid) + { + if (_queue_full(_admin_sq, _admin_cq)) { return 0ul; } + + Sqe b(_admin_sq.next()); + b.write(opc); + b.write(cid); + b.write(nsid); + return b.base(); + } + + /** + * Wait until admin command has finished + * + * \param num number of attempts + * \param cid command identifier + * + * \return returns true if attempt to wait was successfull, otherwise + * false is returned + */ + bool _wait_for_admin_cq(uint32_t num, uint16_t cid) + { + bool success = false; + + for (uint32_t i = 0; i < num; i++) { + _delayer.usleep(100 * 1000); + + Cqe b(_admin_cq.next()); + + if (b.read() != cid) { + continue; + } + + _admin_cq.advance_head(); + + success = true; + + write(_admin_cq.head); + } + + return success; + } + + /** + * Get list of namespaces + */ + void _query_nslist() + { + if (!_nvme_nslist.va) { + Ram_dataspace_capability ds = _dma_alloc.alloc(IDENTIFY_LEN); + _nvme_nslist.va = (addr_t)_env.rm().attach(ds); + _nvme_nslist.pa = Dataspace_client(ds).phys_addr(); + } + + uint32_t *nslist = (uint32_t*)_nvme_nslist.va; + + bool const nsm = _identify_data->read(); + if (!nsm) { + nslist[0] = 1; + _nvme_nslist_count = 1; + return; + } + + Sqe_identify b(_admin_command(Opcode::IDENTIFY, 0, NSLIST_CID)); + + b.write(_nvme_nslist.pa); + b.write(Cns::NSLIST); + + write(_admin_sq.tail); + + if (!_wait_for_admin_cq(10, NSLIST_CID)) { + Genode::error("identify name space list failed"); + throw Initialization_failed(); + } + + for (size_t i = 0; i < 1024; i++) { + if (nslist[i] == 0) { break; } + ++_nvme_nslist_count; + } + } + + /** + * Get information of namespaces + */ + void _query_ns() + { + uint32_t const max = _nvme_nslist_count > (uint32_t)MAX_NS ? + (uint32_t)MAX_NS : _nvme_nslist_count; + + if (!max) { + error("no name spaces found"); + throw Initialization_failed(); + } + + if (max > 1) { warning("only the first name space is used"); } + + uint32_t const *ns = (uint32_t const*)_nvme_nslist.va; + uint32_t const id = 0; + + if (!_nvme_query_ns[id].va) { + Ram_dataspace_capability ds = _dma_alloc.alloc(IDENTIFY_LEN); + _nvme_query_ns[id].va = (addr_t)_env.rm().attach(ds); + _nvme_query_ns[id].pa = Dataspace_client(ds).phys_addr(); + } + + Sqe_identify b(_admin_command(Opcode::IDENTIFY, ns[id], QUERYNS_CID)); + b.write(_nvme_query_ns[id].pa); + b.write(Cns::IDENTIFY_NS); + + write(_admin_sq.tail); + + if (!_wait_for_admin_cq(10, QUERYNS_CID)) { + Genode::error("identify name space failed"); + throw Initialization_failed(); + } + + Identify_ns_data nsdata(_nvme_query_ns[id].va); + uint32_t const flbas = nsdata.read(); + + _nsinfo[id].count = nsdata.read(); + _nsinfo[id].size = 1u << nsdata.read(flbas); + } + + /** + * Query the controller information + */ + void _identify() + { + if (!_nvme_identify.va) { + Ram_dataspace_capability ds = _dma_alloc.alloc(IDENTIFY_LEN); + _nvme_identify.va = (addr_t)_env.rm().attach(ds); + _nvme_identify.pa = Dataspace_client(ds).phys_addr(); + } + + Sqe_identify b(_admin_command(Opcode::IDENTIFY, 0, IDENTIFY_CID)); + b.write(_nvme_identify.pa); + b.write(Cns::IDENTIFY); + + write(_admin_sq.tail); + + if (!_wait_for_admin_cq(10, IDENTIFY_CID)) { + Genode::error("identify failed"); + throw Initialization_failed(); + } + + _identify_data.construct(_nvme_identify.va); + + /* store information */ + _info.version = Genode::String<8>(read(), ".", + read(), ".", + read()); + _info.sn = _identify_data->sn; + _info.mn = _identify_data->mn; + _info.fr = _identify_data->fr; + } + + /** + * Setup I/O completion queue + * + * \param id identifier of the completion queue + * + * \throw Initialization_failed() in case the queue could not be created + */ + void _setup_io_cq(uint16_t id) + { + Nvme::Cq &cq = _cq[id]; + if (!cq.valid()) { _setup_queue(cq, MAX_IO_ENTRIES, CQE_LEN); } + + Sqe_create_cq b(_admin_command(Opcode::CREATE_IO_CQ, 0, CREATE_IO_CQ_CID)); + b.write(cq.pa); + b.write(id); + b.write(MAX_IO_ENTRIES_MASK); + b.write(1); + b.write(1); + + write(_admin_sq.tail); + + if (!_wait_for_admin_cq(10, CREATE_IO_CQ_CID)) { + Genode::error("create I/O cq failed"); + throw Initialization_failed(); + } + } + + /** + * Setup I/O submission queue + * + * \param id identifier of the submission queue + * \param cqid identifier of the completion queue + * + * \throw Initialization_failed() in case the queue could not be created + */ + void _setup_io_sq(uint16_t id, uint16_t cqid) + { + Nvme::Sq &sq = _sq[id]; + if (!sq.valid()) { _setup_queue(sq, MAX_IO_ENTRIES, SQE_LEN); } + + Sqe_create_sq b(_admin_command(Opcode::CREATE_IO_SQ, 0, CREATE_IO_SQ_CID)); + b.write(sq.pa); + b.write(id); + b.write(MAX_IO_ENTRIES_MASK); + b.write(1); + b.write(0b00); /* urgent for now */ + b.write(cqid); + + write(_admin_sq.tail); + + if (!_wait_for_admin_cq(10, CREATE_IO_SQ_CID)) { + Genode::error("create I/O sq failed"); + throw Initialization_failed(); + } + } + + /** + * Constructor + */ + Controller(Genode::Env &env, Util::Dma_allocator &dma_alloc, + addr_t const base, size_t const size, + Mmio::Delayer &delayer) + : + Genode::Attached_mmio(env, base, size), + _env(env), _dma_alloc(dma_alloc), _delayer(delayer) + { } + + /** + * Initialize controller + * + * \throw Initialization_failed + */ + void init() + { + _reset(); + _setup_admin(); + + write(1); + + try { _wait_for_rdy(1); } + catch (...) { + if (read()) { + Genode::error("fatal controller status"); + } + throw Initialization_failed(); + } + } + + /** + * Mask interrupts + */ + void mask_intr() { write(1); } + + /** + * Clean interrupts + */ + void clear_intr() { write(1); } + + /* + * Identify NVM system + */ + void identify() + { + _identify(); + _query_nslist(); + _query_ns(); + } + + /** + * Setup I/O queue + */ + void setup_io(uint16_t cid, uint16_t sid) + { + _setup_io_cq(cid); + _setup_io_sq(sid, cid); + } + + /** + * Get next free IO submission queue slot + */ + addr_t io_command(uint16_t id) + { + Nvme::Sq &sq = _sq[id]; + Nvme::Cq &cq = _cq[id]; + + if (_queue_full(sq, cq)) { return 0ul; } + + Sqe e(sq.next()); + e.write(sq.id++); + e.write(id); + return e.base(); + } + + /** + * Write current I/O submission queue tail + */ + void commit_io(uint16_t id) + { + Nvme::Sq &sq = _sq[id]; + write(sq.tail); + } + + /** + * Flush cache + */ + void flush_cache(uint16_t id) + { + (void)id; + } + + /** + * Process every pending I/O completion + * + * \param func function that is called on each completion + */ + template + void handle_io_completions(uint16_t id, FUNC const &func) + { + Nvme::Cq &cq = _cq[id]; + + if (!cq.valid()) { return; } + + for (;;) { + Cqe e(cq.next()); + + /* process until old phase */ + if (e.read() != cq.phase) { break; } + + func(e); + + cq.advance_head(); + + /* + * Instead of acknowledging the completions here, + * we could handle them batch-wise after the loop. + */ + write(cq.head); + } + } + + /** + * Get memory page size in bytes + */ + size_t mps() const { return _mps; } + + /** + * Get block metrics of namespace + * + * \param nsid namespace identifier + * + * \return returns information of the namespace + */ + Nsinfo nsinfo(uint32_t id) + { + id = id - 1; + if (id >= MAX_NS) { return Nsinfo(); } + return _nsinfo[id]; + } + + /** + * Get controller information + */ + Info const &info() const { return _info; } + + /*********** + ** Debug ** + ***********/ + + void dump_cap() + { + Genode::log("CAP:", " ", + "Mqes:", read()+1, " ", + "Cqr:", read(), " ", + "Ams:", read(), " ", + "To:", read(), " ", + "Dstrd:", read(), " ", + "Nssrs:", read(), " ", + "Css:", read(), " ", + "Bps:", read(), " ", + "Mpsmin:", read(), " ", + "Mpsmax:", read()); + + Genode::log("VS: ", " ", read(), ".", + read(), ".", read()); + } + + void dump_identify() + { + log("vid:", Hex(_identify_data->read())); + log("ssvid:", Hex(_identify_data->read())); + log("oacs:", Hex(_identify_data->read())); + log(" nsm:", Hex(_identify_data->read())); + log("sn:'", _identify_data->sn.string(), "'"); + log("mn:'", _identify_data->mn.string(), "'"); + log("fr:'", _identify_data->fr.string(), "'"); + log("nn:", _identify_data->read()); + log("vwc:", _identify_data->read()); + } + + void dump_nslist() + { + uint32_t const *p = (uint32_t const*)_nvme_nslist.va; + if (!p) { return; } + + for (size_t i = 0; i < 1024; i++) { + if (p[i] == 0) { break; } + Genode::log("ns:#", p[i], " found"); + } + } +}; + + +/****************** + ** Block driver ** + ******************/ + +class Driver : public Block::Driver +{ + public: + + bool _verbose_checks { false }; + bool _verbose_identify { false }; + bool _verbose_io { false }; + bool _verbose_mem { false }; + bool _verbose_regs { false }; + + private: + + Genode::Env &_env; + Genode::Allocator &_alloc; + + Genode::Signal_context_capability _announce_sigh; + + Genode::Attached_rom_dataspace _config_rom { _env, "config" }; + + void _handle_config_update() + { + _config_rom.update(); + + if (!_config_rom.valid()) { return; } + + Genode::Xml_node config = _config_rom.xml(); + _verbose_checks = config.attribute_value("verbose_checks", _verbose_checks); + _verbose_identify = config.attribute_value("verbose_identify", _verbose_identify); + _verbose_io = config.attribute_value("verbose_io", _verbose_io); + _verbose_mem = config.attribute_value("verbose_mem", _verbose_mem); + _verbose_regs = config.attribute_value("verbose_regs", _verbose_regs); + } + + Genode::Signal_handler _config_sigh { + _env.ep(), *this, &Driver::_handle_config_update }; + + /************** + ** Reporter ** + **************/ + + Genode::Reporter _namespace_reporter { _env, "controller" }; + + void _report_namespaces() + { + try { + Genode::Reporter::Xml_generator xml(_namespace_reporter, [&]() { + Nvme::Controller::Info const &info = _nvme_ctrlr->info(); + + xml.attribute("serial", info.sn); + xml.attribute("model", info.mn); + + for (int i = 1; i <= Nvme::MAX_NS; i++) { + Nvme::Controller::Nsinfo ns = _nvme_ctrlr->nsinfo(i); + + xml.node("namespace", [&]() { + xml.attribute("id", i); + xml.attribute("block_size", ns.size); + xml.attribute("block_count", ns.count); + }); + } + }); + } catch (...) { } + } + + /********* + ** DMA ** + *********/ + + Genode::Constructible _nvme_pci { }; + + struct Io_buffer + { + addr_t pa { 0 }; + addr_t va { 0 }; + size_t size { 0 }; + + bool valid() const { return size && pa && va; } + void invalidate() { Genode::memset(this, 0, sizeof(*this)); } + }; + + template + struct Io_buffer_mapper + { + using Bitmap = Util::Bitmap; + Bitmap _bitmap { }; + + Util::Slots _buffers { }; + + Genode::Ram_dataspace_capability _ds { }; + addr_t _phys_addr { 0 }; + addr_t _virt_addr { 0 }; + + Io_buffer_mapper(Genode::Ram_dataspace_capability ds, + addr_t phys, addr_t virt) + : _ds(ds), _phys_addr(phys), _virt_addr(virt) { } + + Io_buffer *alloc(size_t size) + { + Io_buffer *iob = _buffers.get(); + if (!iob) { return nullptr; } + + try { + size_t const bits = size / MPS; + addr_t const start = _bitmap.alloc(bits); + iob->pa = (start * MPS) + _phys_addr; + iob->va = (start * MPS) + _virt_addr; + iob->size = size; + } catch (...) { + iob->invalidate(); + return nullptr; + } + return iob; + } + + void free(Io_buffer *iob) + { + if (iob) { + size_t const size = iob->size; + addr_t const start = (iob->pa - _phys_addr) / MPS; + _bitmap.free(start, size / MPS); + iob->invalidate(); + } + } + }; + + Genode::Constructible> _io_mapper { }; + + Genode::Constructible> _io_list_mapper { }; + + void _setup_large_request(addr_t va, + Io_buffer const &iob, + size_t const num, + size_t const mps) + { + /* omit first page */ + addr_t pa = iob.pa + mps; + uint64_t *p = (uint64_t*)va; + + for (size_t i = 0; i < num; i++) { + p[i] = pa; + pa += mps; + } + } + + /************** + ** Requests ** + **************/ + + struct Request + { + uint32_t id { 0 }; + Packet_descriptor pd { }; + char *buffer { nullptr }; + + Io_buffer *iob { nullptr }; + Io_buffer *large_request { nullptr }; + + bool valid() const { return id != 0; } + + void invalidate() + { + id = 0; + buffer = nullptr; + pd = Packet_descriptor(); + + iob = nullptr; + large_request = nullptr; + } + }; + + Util::Slots _requests { }; + size_t _requests_pending { 0 }; + + /********************* + ** MMIO Controller ** + *********************/ + + struct Timer_delayer : Genode::Mmio::Delayer, + Timer::Connection + { + Timer_delayer(Genode::Env &env) + : Timer::Connection(env) { } + + void usleep(unsigned us) { Timer::Connection::usleep(us); } + } _delayer { _env }; + + Genode::Constructible _nvme_ctrlr { }; + + void _handle_completions() + { + _nvme_ctrlr->handle_io_completions(Nvme::IO_NSID, [&] (Nvme::Cqe const &b) { + + if (_verbose_io) { Nvme::Cqe::dump(b); } + + uint32_t const id = Nvme::Cqe::request_id(b); + + Request *r = _requests.lookup([&] (Request &r) { + if (r.id == id) { return true; } + return false; + }); + if (!r) { + Genode::error("no pending request found for CQ entry"); + Nvme::Cqe::dump(b); + return; + } + + bool const succeeded = Nvme::Cqe::succeeded(b); + + Packet_descriptor pd = r->pd; + pd.succeeded(succeeded); + + Io_buffer *iob = r->iob; + + if (succeeded && pd.operation() == Packet_descriptor::READ) { + size_t const len = pd.block_count() * _block_size; + Genode::memcpy(r->buffer, (void*)iob->va, len); + } + _io_mapper->free(iob); + + if (r->large_request) { + _io_list_mapper->free(r->large_request); + } + + r->invalidate(); + --_requests_pending; + ack_packet(pd, succeeded); + }); + } + + void _handle_intr() + { + _nvme_ctrlr->mask_intr(); + _handle_completions(); + _nvme_ctrlr->clear_intr(); + _nvme_pci->ack_irq(); + } + + Genode::Signal_handler _intr_sigh { + _env.ep(), *this, &Driver::_handle_intr }; + + /*********** + ** Block ** + ***********/ + + size_t _block_size { 0 }; + Block::sector_t _block_count { 0 }; + Block::Session::Operations _block_ops { }; + + public: + + /** + * Constructor + */ + Driver(Genode::Env &env, Genode::Allocator &alloc, + Genode::Signal_context_capability sigh) + : Block::Driver(env.ram()), _env(env), _alloc(alloc), _announce_sigh(sigh) + { + _config_rom.sigh(_config_sigh); + _handle_config_update(); + + /* + * Setup and identify NVMe PCI controller + */ + + try { + _nvme_pci.construct(_env); + } catch (Nvme::Pci::Missing_controller) { + Genode::error("no NVMe PCIe controller found"); + throw; + } + + try { + _nvme_ctrlr.construct(_env, *_nvme_pci, _nvme_pci->base(), + _nvme_pci->size(), _delayer); + } catch (...) { + Genode::error("could not access NVMe controller MMIO"); + throw; + } + + if (_verbose_regs) { _nvme_ctrlr->dump_cap(); } + + _nvme_ctrlr->init(); + _nvme_ctrlr->identify(); + + if (_verbose_identify) { + Genode::warning(_requests_pending); + _nvme_ctrlr->dump_identify(); + _nvme_ctrlr->dump_nslist(); + } + + /* + * Setup I/O + */ + + { + Genode::Ram_dataspace_capability ds = _nvme_pci->alloc(Nvme::DMA_DS_SIZE); + if (!ds.valid()) { + Genode::error("could not allocate DMA backing store"); + throw Nvme::Controller::Initialization_failed(); + } + addr_t const phys_addr = Genode::Dataspace_client(ds).phys_addr(); + addr_t const virt_addr = (addr_t)_env.rm().attach(ds); + _io_mapper.construct(ds, phys_addr, virt_addr); + + if (_verbose_mem) { + Genode::log("DMA", " virt: [", Genode::Hex(virt_addr), ",", + Genode::Hex(virt_addr + Nvme::DMA_DS_SIZE), "]", + " phys: [", Genode::Hex(phys_addr), ",", + Genode::Hex(phys_addr + Nvme::DMA_DS_SIZE), "]"); + } + } + + { + Genode::Ram_dataspace_capability ds = _nvme_pci->alloc(Nvme::DMA_LIST_DS_SIZE); + if (!ds.valid()) { + Genode::error("could not allocate DMA list-pages backing store"); + throw Nvme::Controller::Initialization_failed(); + } + addr_t const phys_addr = Genode::Dataspace_client(ds).phys_addr(); + addr_t const virt_addr = (addr_t)_env.rm().attach(ds); + _io_list_mapper.construct(ds, phys_addr, virt_addr); + + if (_verbose_mem) { + Genode::log("DMA list-pages", " virt: [", Genode::Hex(virt_addr), ",", + Genode::Hex(virt_addr + Nvme::DMA_DS_SIZE), "]", + " phys: [", Genode::Hex(phys_addr), ",", + Genode::Hex(phys_addr + Nvme::DMA_DS_SIZE), "]"); + } + } + + _nvme_ctrlr->setup_io(Nvme::IO_NSID, Nvme::IO_NSID); + + /* from now on use interrupts */ + _nvme_pci->sigh_irq(_intr_sigh); + _nvme_ctrlr->clear_intr(); + + /* + * Setup Block session + */ + + /* set Block session properties */ + Nvme::Controller::Nsinfo nsinfo = _nvme_ctrlr->nsinfo(Nvme::IO_NSID); + if (!nsinfo.valid()) { + Genode::error("could not query namespace information"); + throw Nvme::Controller::Initialization_failed(); + } + + _block_count = nsinfo.count; + _block_size = nsinfo.size; + + _block_ops.set_operation(Packet_descriptor::READ); + _block_ops.set_operation(Packet_descriptor::WRITE); + + Nvme::Controller::Info const &info = _nvme_ctrlr->info(); + + Genode::log("NVMe:", info.version.string(), " " + "serial:'", info.sn.string(), "'", " " + "model:'", info.mn.string(), "'", " " + "frev:'", info.fr.string(), "'"); + + Genode::log("Block", " " + "size:", _block_size, " " + "count:", _block_count); + + /* generate Report if requested */ + try { + Genode::Xml_node report = _config_rom.xml().sub_node("report"); + if (report.attribute_value("namespaces", false)) { + _namespace_reporter.enabled(true); + _report_namespaces(); + } + } catch (...) { } + + /* finally announce Block session */ + Genode::Signal_transmitter(_announce_sigh).submit(); + } + + ~Driver() { } + + /******************************* + ** Block::Driver interface ** + *******************************/ + + size_t block_size() override { return _block_size; } + Block::sector_t block_count() override { return _block_count; } + Block::Session::Operations ops() override { return _block_ops; } + + void _io(bool write, Block::sector_t lba, size_t count, + char *buffer, Packet_descriptor &pd) + { + using namespace Genode; + + size_t const len = count * _block_size; + + if (_verbose_io) { + Genode::error(write ? "write" : "read", " " + "lba:", lba, " " + "count:", count, " " + "buffer:", (void*)buffer, " " + "len:", len); + } + + if (len > Nvme::MAX_IO_LEN) { + error("request too large (max:", (size_t)Nvme::MAX_IO_LEN, " bytes)"); + throw Io_error(); + } + + if (_requests_pending == (Nvme::MAX_IO_PENDING)) { + throw Request_congestion(); + } + + Block::sector_t const lba_end = lba + count - 1; + auto overlap_check = [&] (Request &req) { + Block::sector_t const start = req.pd.block_number(); + Block::sector_t const end = start + req.pd.block_count() - 1; + + bool const in_req = (lba >= start && lba_end <= end); + bool const over_req = (lba <= start && lba_end <= end) && + (start >= lba && start <= lba_end); + bool const cross_req = (lba <= start && lba_end >= end); + bool const overlap = (in_req || over_req || cross_req); + + if (_verbose_checks && overlap) { + warning("overlap: ", "[", lba, ",", lba_end, ") with " + "[", start, ",", end, ")", + " ", in_req, " ", over_req, " ", cross_req); + } + return overlap; + }; + if (_requests.for_each(overlap_check)) { throw Request_congestion(); } + + Request *r = _requests.get(); + if (!r) { throw Request_congestion(); } + + size_t const mps = _nvme_ctrlr->mps(); + size_t const mps_len = Genode::align_addr(len, Genode::log2(mps)); + bool const need_list = len > 2 * mps; + + Io_buffer *iob = _io_mapper->alloc(mps_len); + if (!iob) { throw Request_congestion(); } + + if (need_list) { + r->large_request = _io_list_mapper->alloc(mps); + if (!r->large_request) { + _io_mapper->free(iob); + throw Request_congestion(); + } + } + + if (write) { Genode::memcpy((void*)iob->va, buffer, len); } + + Nvme::Sqe_io b(_nvme_ctrlr->io_command(Nvme::IO_NSID)); + if (!b.valid()) { + if (r->large_request) { + _io_list_mapper->free(r->large_request); + } + _io_mapper->free(iob); + throw Request_congestion(); + } + + addr_t const pa = iob->pa; + + Nvme::Opcode op = write ? Nvme::Opcode::WRITE : Nvme::Opcode::READ; + b.write(op); + b.write(pa); + + /* payload will fit into 2 mps chunks */ + if (len > mps && !r->large_request) { + b.write(pa + mps); + } else if (r->large_request) { + /* payload needs list of mps chunks */ + Io_buffer &lr = *r->large_request; + _setup_large_request(lr.va, + *iob, (mps_len - mps)/mps, mps); + b.write(lr.pa); + } + + b.write(lba); + b.write(count - 1); /* 0-base value */ + + r->iob = iob; + r->pd = pd; /* must be a copy */ + r->buffer = write ? nullptr : buffer; + r->id = b.read() | (Nvme::IO_NSID<<16); + + ++_requests_pending; + _nvme_ctrlr->commit_io(Nvme::IO_NSID); + } + + void read(Block::sector_t lba, size_t count, + char *buffer, Packet_descriptor &pd) override + { + if (!_block_ops.supported(Packet_descriptor::READ)) { + throw Io_error(); + } + _io(false, lba, count, buffer, pd); + } + + void write(Block::sector_t lba, size_t count, + char const *buffer, Packet_descriptor &pd) override + { + if (!_block_ops.supported(Packet_descriptor::WRITE)) { + throw Io_error(); + } + _io(true, lba, count, const_cast(buffer), pd); + } + + void sync() override { _nvme_ctrlr->flush_cache(Nvme::IO_NSID); } +}; + + +/********** + ** Main ** + **********/ + +struct Main +{ + Genode::Env &_env; + Genode::Heap _heap { _env.ram(), _env.rm() }; + + void _handle_announce() + { + _env.parent().announce(_env.ep().manage(_root)); + } + + Genode::Signal_handler
_announce_sigh { + _env.ep(), *this, &Main::_handle_announce }; + + struct Factory : Block::Driver_factory + { + Genode::Env &_env; + Genode::Allocator &_alloc; + Genode::Signal_context_capability _sigh; + + Genode::Constructible<::Driver> _driver { }; + + Factory(Genode::Env &env, Genode::Allocator &alloc, + Genode::Signal_context_capability sigh) + : _env(env), _alloc(alloc), _sigh(sigh) + { + _driver.construct(_env, _alloc, _sigh); + } + + ~Factory() { _driver.destruct(); } + + Block::Driver *create() { return &*_driver; } + void destroy(Block::Driver *) { } + }; + + Factory _factory { _env, _heap, _announce_sigh }; + Block::Root _root { _env.ep(), _heap, _env.rm(), _factory, true }; + + Main(Genode::Env &env) : _env(env) { } +}; + + +void Component::construct(Genode::Env &env) { static Main main(env); } diff --git a/repos/os/src/drivers/nvme/pci.h b/repos/os/src/drivers/nvme/pci.h new file mode 100644 index 0000000000..a734d01952 --- /dev/null +++ b/repos/os/src/drivers/nvme/pci.h @@ -0,0 +1,149 @@ +/* + * \brief NVMe PCIe backend + * \author Josef Soentgen + * \date 2018-03-05 + */ + +/* + * Copyright (C) 2018 Genode Labs GmbH + * + * This file is part of the Genode OS framework, which is distributed + * under the terms of the GNU Affero General Public License version 3. + */ + +#ifndef _NVME_PCI_H_ +#define _NVME_PCI_H_ + +/* Genode includes */ +#include +#include +#include + + +namespace Nvme { + + using namespace Genode; + + struct Pci; +} + + +struct Nvme::Pci : Platform::Connection, + Util::Dma_allocator +{ + struct Missing_controller : Genode::Exception { }; + + enum { + CLASS_MASS_STORAGE = 0x010000u, + CLASS_MASK = 0xffff00u, + SUBCLASS_NVME = 0x000800u, + NVME_DEVICE = CLASS_MASS_STORAGE | SUBCLASS_NVME, + NVME_PCI = 0x02, + NVME_BASE_ID = 0, + }; + + enum Pci_config { IRQ = 0x3c, CMD = 0x4, CMD_IO = 0x1, + CMD_MEMORY = 0x2, CMD_MASTER = 0x4 }; + + Platform::Device::Resource _res { }; + Platform::Device_capability _device_cap { }; + Genode::Constructible _device { }; + + Genode::Constructible _irq { }; + + /** + * Constructor + */ + Pci(Genode::Env &env) : Platform::Connection(env) + { + upgrade_ram(2*4096u); + upgrade_caps(8); + + _device_cap = with_upgrade([&] () { + return next_device(_device_cap, + NVME_DEVICE, CLASS_MASK); + }); + + if (!_device_cap.valid()) { throw Missing_controller(); } + + _device.construct(_device_cap); + + _res = _device->resource(NVME_BASE_ID); + + uint16_t cmd = _device->config_read(Pci_config::CMD, Platform::Device::ACCESS_16BIT); + cmd |= 0x2; /* respond to memory space accesses */ + cmd |= 0x4; /* enable bus master */ + + _device->config_write(Pci_config::CMD, cmd, Platform::Device::ACCESS_16BIT); + + _irq.construct(_device->irq(0)); + + Genode::log("NVMe PCIe controller found (", + Genode::Hex(_device->vendor_id()), ":", + Genode::Hex(_device->device_id()), ")"); + } + + /** + * Return base address of controller MMIO region + */ + addr_t base() const { return _res.base(); } + + /** + * Return size of controller MMIO region + */ + size_t size() const { return _res.size(); } + + /** + * Set interrupt signal handler + * + * \parm sigh signal capability + */ + void sigh_irq(Genode::Signal_context_capability sigh) + { + _irq->sigh(sigh); + _irq->ack_irq(); + } + + /** + * Acknowledge interrupt + */ + void ack_irq() { _irq->ack_irq(); } + + /***************************** + ** Dma_allocator interface ** + *****************************/ + + /** + * Allocator DMA buffer + * + * \param size size of the buffer + * + * \return Ram_dataspace_capability + */ + Genode::Ram_dataspace_capability alloc(size_t size) override + { + size_t donate = size; + return retry( + [&] () { + return retry( + [&] () { return Pci::Connection::alloc_dma_buffer(size); }, + [&] () { upgrade_caps(2); }); + }, + [&] () { + upgrade_ram(donate); + donate = donate * 2 > size ? 4096 : donate * 2; + }); + } + + /** + * Free DMA buffer + * + * \param cap RAM dataspace capability + */ + void free(Genode::Ram_dataspace_capability cap) override + { + Pci::Connection::free_dma_buffer(cap); + } +}; + +#endif /* _NVME_PCI_H_ */ diff --git a/repos/os/src/drivers/nvme/target.mk b/repos/os/src/drivers/nvme/target.mk new file mode 100644 index 0000000000..0e13210638 --- /dev/null +++ b/repos/os/src/drivers/nvme/target.mk @@ -0,0 +1,5 @@ +TARGET = nvme_drv +SRC_CC = main.cc +INC_DIR += $(PRG_DIR) +LIBS += base +REQUIRES = pci diff --git a/repos/os/src/drivers/nvme/util.h b/repos/os/src/drivers/nvme/util.h new file mode 100644 index 0000000000..2a34cf63ab --- /dev/null +++ b/repos/os/src/drivers/nvme/util.h @@ -0,0 +1,152 @@ +/* + * \brief Utilitize used by the NVMe driver + * \author Josef Soentgen + * \date 2018-03-05 + */ + +/* + * Copyright (C) 2018 Genode Labs GmbH + * + * This file is part of the Genode OS framework, which is distributed + * under the terms of the GNU Affero General Public License version 3. + */ + +#ifndef _NVME_UTIL_H_ +#define _NVME_UTIL_H_ + +/* Genode includes */ +#include + +namespace Util { + + using namespace Genode; + + /* + * DMA allocator helper + */ + struct Dma_allocator : Genode::Interface + { + virtual Genode::Ram_dataspace_capability alloc(size_t) = 0; + virtual void free(Genode::Ram_dataspace_capability) = 0; + }; + + /* + * Wrap Bit_array into a convinient Bitmap allocator + */ + template + struct Bitmap + { + struct Full : Genode::Exception { }; + + static constexpr addr_t INVALID { BITS - 1 }; + Genode::Bit_array _array { }; + size_t _used { 0 }; + + addr_t _find_free(size_t const bits) + { + for (size_t i = 0; i < BITS; i += bits) { + if (_array.get(i, bits)) { continue; } + return i; + } + throw Full(); + } + + /** + * Return index from where given number of bits was allocated + * + * \param bits number of bits to allocate + * + * \return index of start bit + */ + addr_t alloc(size_t const bits) + { + addr_t const start = _find_free(bits); + _array.set(start, bits); + _used += bits; + return start; + } + + /** + * Free given number of bits from start index + * + * \param start index of the start bit + * \param bits number of bits to free + */ + void free(addr_t const start, size_t const bits) + { + _used -= bits; + _array.clear(start, bits); + } + }; + + /* + * Wrap array into convinient interface + * + * The used datatype T must implement the following methods: + * + * bool valid() const returns true if the object is valid + * void invalidate() adjusts the object so that valid() returns false + */ + template + struct Slots + { + T _entries[CAP] { }; + + /** + * Lookup slot + */ + template + T *lookup(FUNC const &func) + { + for (size_t i = 0; i < CAP; i++) { + if (!_entries[i].valid()) { continue; } + if ( func(_entries[i])) { return &_entries[i]; } + } + return nullptr; + } + + /** + * Get free slot + */ + T *get() + { + for (size_t i = 0; i < CAP; i++) { + if (!_entries[i].valid()) { return &_entries[i]; } + } + return nullptr; + } + + /** + * Iterate over all slots until FUNC returns true + */ + template + bool for_each(FUNC const &func) + { + for (size_t i = 0; i < CAP; i++) { + if (!_entries[i].valid()) { continue; } + if ( func(_entries[i])) { return true; } + } + return false; + } + }; + + /** + * Extract string from memory + * + * This function is used to extract the information strings from the + * identify structure. + */ + char const *extract_string(char const *base, size_t offset, size_t len) + { + static char tmp[64] = { }; + if (len > sizeof(tmp)) { return nullptr; } + + Genode::strncpy(tmp, base + offset, len); + + len--; /* skip NUL */ + while (len > 0 && tmp[--len] == ' ') { tmp[len] = 0; } + return tmp; + } +} + +#endif /* _NVME_UTIL_H_ */ diff --git a/tool/autopilot.list b/tool/autopilot.list index 0e101fe6aa..b6b42a153a 100644 --- a/tool/autopilot.list +++ b/tool/autopilot.list @@ -112,3 +112,4 @@ utf8 demo ping ping_nic_router +nvme