From 0f6110ea97512df111a5d8b51e062fed4603565e Mon Sep 17 00:00:00 2001 From: Norman Feske Date: Wed, 25 Jul 2012 18:00:26 +0200 Subject: [PATCH] Let OMAP4 SD-card driver use DMA and interrupts With this patch, the driver code gets complemented with DMA support. The support for master DMA, in turn, cleared the way for using interrupts to wait for the completion of transfers, which largely relieves the CPU compared to the polling PIO mode. Consequently, the new version has a much lower CPU footprint. In the current version, both modes of operation PIO and DMA are functional. However, PIO mode is retained for benchmarking purposes only and will possibly be removed to keep the driver simple. It is disabled in the driver's 'main.cc'. --- os/src/drivers/sd_card/omap4/driver.h | 16 +- os/src/drivers/sd_card/omap4/main.cc | 3 +- os/src/drivers/sd_card/omap4/mmchs.h | 304 ++++++++++++++++++++++---- 3 files changed, 279 insertions(+), 44 deletions(-) diff --git a/os/src/drivers/sd_card/omap4/driver.h b/os/src/drivers/sd_card/omap4/driver.h index b8ff04c672..c03484798a 100644 --- a/os/src/drivers/sd_card/omap4/driver.h +++ b/os/src/drivers/sd_card/omap4/driver.h @@ -57,12 +57,16 @@ class Block::Omap4_driver : public Block::Driver /* hsmmc controller instance */ Omap4_hsmmc_controller _controller; + bool const _use_dma; + public: - Omap4_driver() + Omap4_driver(bool use_dma) : _mmchs1_mmio(MMCHS1_MMIO_BASE, MMCHS1_MMIO_SIZE), - _controller((addr_t)_mmchs1_mmio.local_addr(), _delayer) + _controller((addr_t)_mmchs1_mmio.local_addr(), + _delayer, use_dma), + _use_dma(use_dma) { Sd_card::Card_info const card_info = _controller.card_info(); @@ -102,17 +106,19 @@ class Block::Omap4_driver : public Block::Driver Genode::size_t block_count, Genode::addr_t phys) { - throw Io_error(); + if (!_controller.read_blocks_dma(block_number, block_count, phys)) + throw Io_error(); } void write_dma(Genode::size_t block_number, Genode::size_t block_count, Genode::addr_t phys) { - throw Io_error(); + if (!_controller.write_blocks_dma(block_number, block_count, phys)) + throw Io_error(); } - bool dma_enabled() { return false; } + bool dma_enabled() { return _use_dma; } }; #endif /* _DRIVER_H_ */ diff --git a/os/src/drivers/sd_card/omap4/main.cc b/os/src/drivers/sd_card/omap4/main.cc index 6f8d581d29..7953139801 100644 --- a/os/src/drivers/sd_card/omap4/main.cc +++ b/os/src/drivers/sd_card/omap4/main.cc @@ -30,7 +30,8 @@ int main(int argc, char **argv) { Block::Driver *create() { - return new (env()->heap()) Block::Omap4_driver(); + bool use_dma = true; + return new (env()->heap()) Block::Omap4_driver(use_dma); } void destroy(Block::Driver *driver) diff --git a/os/src/drivers/sd_card/omap4/mmchs.h b/os/src/drivers/sd_card/omap4/mmchs.h index 3b079dffb8..fba980bc4d 100644 --- a/os/src/drivers/sd_card/omap4/mmchs.h +++ b/os/src/drivers/sd_card/omap4/mmchs.h @@ -9,6 +9,8 @@ /* Genode includes */ #include +#include +#include /* local includes */ #include @@ -54,6 +56,11 @@ struct Mmchs : Genode::Mmio struct Dw8 : Bitfield<5, 1> { }; + /** + * Master master slave selection (set if master DMA) + */ + struct Dma_mns : Bitfield<20, 1> { }; + }; /** @@ -98,6 +105,11 @@ struct Mmchs : Genode::Mmio * Auto-CMD12 enable */ struct Acen : Bitfield<2, 1> { }; + + /** + * DMA enable + */ + struct De : Bitfield<0, 1> { }; }; /** @@ -262,6 +274,11 @@ struct Mmchs : Genode::Mmio */ struct Cc_enable : Bitfield<0, 1> { }; + /** + * Transfer completed + */ + struct Tc_enable : Bitfield<1, 1> { }; + /** * Card interrupt */ @@ -273,7 +290,14 @@ struct Mmchs : Genode::Mmio struct Cto_enable : Bitfield<16, 1> { }; }; - struct Ise : Register<0x238, 32> { }; + struct Ise : Register<0x238, 32> + { + /* + * The naming follows the lines of the 'Ie' register + */ + struct Tc_sigen : Bitfield<1, 1> { }; + struct Cto_sigen : Bitfield<16, 1> { }; + }; /** * Capabilities @@ -284,6 +308,27 @@ struct Mmchs : Genode::Mmio struct Vs18 : Bitfield<26, 1> { }; }; + /** + * ADMA system address + * + * Base address of the ADMA descriptor table + */ + struct Admasal : Register<0x258, 32> { }; + + /** + * ADMA descriptor layout + */ + struct Adma_desc : Genode::Register<64> + { + struct Valid : Bitfield<0, 1> { }; + struct Ent : Bitfield<1, 1> { }; + struct Int : Bitfield<2, 1> { }; + struct Act1 : Bitfield<4, 1> { }; + struct Act2 : Bitfield<5, 1> { }; + struct Length : Bitfield<16, 16> { }; + struct Address : Bitfield<32, 32> { }; + }; + bool reset_cmd_line(Delayer &delayer) { write(1); @@ -437,6 +482,17 @@ struct Omap4_hsmmc_controller : private Mmchs, public Sd_card::Host_controller Delayer &_delayer; Sd_card::Card_info _card_info; + bool const _use_dma; + + /* + * DMA memory for holding the ADMA descriptor table + */ + enum { ADMA_DESC_MAX_ENTRIES = 1024 }; + Genode::Attached_ram_dataspace _adma_desc_ds; + Adma_desc::access_t * const _adma_desc; + Genode::addr_t const _adma_desc_phys; + + Genode::Irq_connection _irq; Sd_card::Card_info _init() { @@ -536,19 +592,174 @@ struct Omap4_hsmmc_controller : private Mmchs, public Sd_card::Host_controller throw Detection_failed(); } + /* enable master DMA */ + write(1); + + /* enable IRQs */ + write(1); + write(1); + write(1); + write(1); + return card_info; } + /** + * Marshal ADMA descriptors according to block request + * + * \return false if block request is too large + */ + bool _setup_adma_descriptor_table(size_t block_count, + Genode::addr_t out_buffer_phys) + { + using namespace Sd_card; + + /* reset ADMA offset to first descriptor */ + write(_adma_desc_phys); + + enum { BLOCK_SIZE = 512 /* bytes */ }; + + size_t const max_adma_request_size = 64*1024 - 4; /* bytes */ + + /* + * sanity check + * + * XXX An alternative to this sanity check would be to expose + * the maximum DMA transfer size to the driver and let the + * driver partition large requests into ones that are + * supported by the controller. + */ + if (block_count*BLOCK_SIZE > max_adma_request_size*ADMA_DESC_MAX_ENTRIES) { + PERR("Block request too large"); + return false; + } + + /* + * Each ADMA descriptor can transfer up to MAX_ADMA_REQUEST_SIZE + * bytes. If the request is larger, we generate a list of ADMA + * descriptors. + */ + + size_t const total_bytes = block_count*BLOCK_SIZE; + + /* number of bytes for which descriptors have been created */ + addr_t consumed_bytes = 0; + + for (int index = 0; consumed_bytes < total_bytes; index++) { + + size_t const remaining_bytes = total_bytes - consumed_bytes; + + /* clamp current request to maximum ADMA request size */ + size_t const curr_bytes = Genode::min(max_adma_request_size, + remaining_bytes); + /* + * Assemble new ADMA descriptor + */ + Adma_desc::access_t desc = 0; + Adma_desc::Address::set(desc, out_buffer_phys + consumed_bytes); + Adma_desc::Length::set(desc, curr_bytes); + + /* set action to transfer */ + Adma_desc::Act1::set(desc, 0); + Adma_desc::Act2::set(desc, 1); + + Adma_desc::Valid::set(desc, 1); + + /* + * Let the last descriptor generate transfer-complete interrupt + */ + if (consumed_bytes + curr_bytes == total_bytes) + Adma_desc::Ent::set(desc, 1); + + /* install descriptor into ADMA descriptor table */ + _adma_desc[index] = desc; + + consumed_bytes += curr_bytes; + } + + return true; + } + + bool _wait_for_transfer_complete() + { + if (!wait_for(1, _delayer, 1000*1000, 0) + && !wait_for(1, _delayer)) { + PERR("Stat::Tc timed out"); + return false; + } + + /* clear transfer-completed bit */ + write(1); + return true; + } + + bool _wait_for_bre() + { + if (!wait_for(1, _delayer, 1000*1000, 0) + && !wait_for(1, _delayer)) { + PERR("Pstate::Bre timed out"); + return false; + } + return true; + } + + bool _wait_for_bwe() + { + if (!wait_for(1, _delayer, 1000*1000, 0) + && !wait_for(1, _delayer)) { + PERR("Pstate::Bwe timed out"); + return false; + } + return true; + } + + bool _wait_for_transfer_complete_irq() + { + /* + * XXX For now, the driver works fully synchronous. We merely use + * the interrupt mechanism to yield CPU time to concurrently + * running processes. + */ + for (;;) { + _irq.wait_for_irq(); + + /* check for transfer completion */ + if (read() == 1) { + + /* clear transfer-completed bit */ + write(1); + + if (read() != 0) + PWRN("unexpected state (Stat: 0x%x Blen: 0x%x Nblk: %d)", + read(), read(), read()); + + return true; + } + + PWRN("unexpected interrupt, Stat: 0x%08x", read()); + } + } + public: + enum { IRQ_NUMBER = 83 + 32 }; + /** * Constructor * * \param mmio_base local base address of MMIO registers */ - Omap4_hsmmc_controller(Genode::addr_t const mmio_base, Delayer &delayer) + Omap4_hsmmc_controller(Genode::addr_t const mmio_base, Delayer &delayer, + bool use_dma) : - Mmchs(mmio_base), _delayer(delayer), _card_info(_init()) + Mmchs(mmio_base), _delayer(delayer), _card_info(_init()), + _use_dma(use_dma), + _adma_desc_ds(Genode::env()->ram_session(), + ADMA_DESC_MAX_ENTRIES*sizeof(Adma_desc::access_t), + false), + _adma_desc(_adma_desc_ds.local_addr()), + _adma_desc_phys(Genode::Dataspace_client(_adma_desc_ds.cap()).phys_addr()), + _irq(IRQ_NUMBER) { } @@ -580,9 +791,13 @@ struct Omap4_hsmmc_controller : private Mmchs, public Sd_card::Host_controller Cmd::Msbs::set(cmd); if (command.index == Sd_card::Read_multiple_block::INDEX - || command.index == Sd_card::Write_multiple_block::INDEX) + || command.index == Sd_card::Write_multiple_block::INDEX) { Cmd::Acen::set(cmd); + if (_use_dma) + Cmd::De::set(cmd); + } + /* set data-direction bit depending on the command */ bool const read = command.transfer == Sd_card::TRANSFER_READ; Cmd::Ddir::set(cmd, read ? Cmd::Ddir::READ : Cmd::Ddir::WRITE); @@ -662,39 +877,6 @@ struct Omap4_hsmmc_controller : private Mmchs, public Sd_card::Host_controller return Sd_card::Send_relative_addr::Response::Rca::get(read()); } - bool _wait_for_transfer_complete() - { - if (!wait_for(1, _delayer, 1000*1000, 0) - && !wait_for(1, _delayer)) { - PERR("Stat::Tc timed out"); - return false; - } - - /* clear transfer-completed bit */ - write(1); - return true; - } - - bool _wait_for_bre() - { - if (!wait_for(1, _delayer, 1000*1000, 0) - && !wait_for(1, _delayer)) { - PERR("Pstate::Bre timed out"); - return false; - } - return true; - } - - bool _wait_for_bwe() - { - if (!wait_for(1, _delayer, 1000*1000, 0) - && !wait_for(1, _delayer)) { - PERR("Pstate::Bwe timed out"); - return false; - } - return true; - } - /** * Read data blocks from SD card * @@ -708,7 +890,7 @@ struct Omap4_hsmmc_controller : private Mmchs, public Sd_card::Host_controller write(block_count); if (!issue_command(Read_multiple_block(block_number))) { - PERR("Read_multiple_block failed"); + PERR("Read_multiple_block failed, Stat: 0x%08x", read()); return false; } @@ -754,6 +936,52 @@ struct Omap4_hsmmc_controller : private Mmchs, public Sd_card::Host_controller return _wait_for_transfer_complete(); } + + /** + * Read data blocks from SD card via master DMA + * + * \return true on success + */ + bool read_blocks_dma(size_t block_number, size_t block_count, + Genode::addr_t out_buffer_phys) + { + using namespace Sd_card; + + write(0x200); + write(block_count); + + _setup_adma_descriptor_table(block_count, out_buffer_phys); + + if (!issue_command(Read_multiple_block(block_number))) { + PERR("Read_multiple_block failed, Stat: 0x%08x", read()); + return false; + } + + return _wait_for_transfer_complete_irq(); + } + + /** + * Write data blocks to SD card via master DMA + * + * \return true on success + */ + bool write_blocks_dma(size_t block_number, size_t block_count, + Genode::addr_t buffer_phys) + { + using namespace Sd_card; + + write(0x200); + write(block_count); + + _setup_adma_descriptor_table(block_count, buffer_phys); + + if (!issue_command(Write_multiple_block(block_number))) { + PERR("Write_multiple_block failed"); + return false; + } + + return _wait_for_transfer_complete_irq(); + } }; #endif /* _MMCHS_H_ */