mirror of
synced 2025-03-23 04:25:21 +00:00
hw_x86_64: Implementation of IA-32e paging
IA-32e paging translates 48-bit linear addresses to 52-bit physical addresses. Translation structures are hierarchical and four levels deep. The current implementation supports regular 4KB and 1 GB and 2 MB large page mappings. Memory typing is not yet implemented since the encoded type bits depend on the active page attribute table (PAT)*. For detailed information refer to Intel SDM Vol. 3A, section 4.5. * The default PAT after power up does not allow the encoding of the write-combining memory type, see Intel SDM Vol. 3A, section 11.12.4. * Add common IA-32e paging descriptor type: The type represents a table entry and encompasses all fields shared by paging structure entries of all four levels (PML4, PDPT, PD and PT). * Simplify PT entry type by using common descriptor: Differing fields are the physical address, the global flag and the memory type flags. * Simplify directory entry type by using common descriptor: Page directory entries (PDPT and PD) have an additional 'page size' field that specifies if the entry references a next level paging structure or represents a large page mapping. * Simplify PML4 entry type by using common descriptor Top-level paging structure entries (PML4) do not have a 'pat' flag and the memory type is specified by the 'pwt' and 'pcd' fields only. * Implement access right merging for directory paging entries The access rights for translations are determined by the U/S, R/W and XD flags. Paging structure entries that reference other tables must provide the superset of rights required for all entries of the referenced table. Thus merge access rights of new mappings into existing directory entries to grant additional rights if needed. * Add cr3 register definition: The control register 3 is used to set the current page-directory base register. * Add cr3 variable to x86_64 Cpu Context The variable designates the address of the top-level paging structure. * Return current cr3 value as translation table base * Set context cr3 value on translation table assignment * Implement switch to virtual mode in kernel Activate translation table in init_virt_kernel function by updating the cr3 register. * Ignore accessed and dirty flags when comparing existing table entries These flags can be set by the MMU and must be disregarded.
This commit is contained in:
@ -51,20 +51,56 @@ class Genode::Cpu
static constexpr addr_t exception_entry = 0x0; /* XXX */
static constexpr addr_t mtc_size = 1 << 13;
* Control register 3: Page-Directory base register
* See Intel SDM Vol. 3A, section 2.5.
struct Cr3 : Register<64>
struct Pwt : Bitfield<3,1> { }; /* Page-level write-through */
struct Pcd : Bitfield<4,1> { }; /* Page-level cache disable */
struct Pdb : Bitfield<12, 36> { }; /* Page-directory base address */
static void write(access_t const v) {
asm volatile ("mov %0, %%cr3" :: "r" (v) : ); }
static access_t read()
access_t v;
asm volatile ("mov %%cr3, %0" : "=r" (v) :: );
return v;
* Return initialized value
* \param table base of targeted translation table
static access_t init(addr_t const table) {
return Pdb::masked(table); }
* Extend basic CPU state by members relevant for 'base-hw' only
struct Context : Cpu_state
* Address of top-level paging structure.
addr_t cr3;
* Return base of assigned translation table
addr_t translation_table() const { return 0UL; }
addr_t translation_table() const { return cr3; }
* Assign translation-table base 'table'
void translation_table(addr_t const table) { }
void translation_table(addr_t const table) {
cr3 = Cr3::init(table); }
* Assign protection domain
@ -187,8 +223,8 @@ class Genode::Cpu
* \param process_id process ID of the kernel address-space
static void
init_virt_kernel(addr_t const table, unsigned const process_id)
{ }
init_virt_kernel(addr_t const table, unsigned const process_id) {
Cr3::write(Cr3::init(table)); }
inline static void finish_init_phys_kernel()
{ }
@ -15,58 +15,257 @@
#include <page_flags.h>
/* Genode includes */
#include <util/misc_math.h>
#include <util/register.h>
#include <base/printf.h>
#include <assert.h>
/* base-hw includes */
#include <page_flags.h>
#include <page_slab.h>
namespace Genode
* First level translation table
* IA-32e paging translates 48-bit linear addresses to 52-bit physical
* addresses. Translation structures are hierarchical and four levels
* deep.
* For detailed information refer to Intel SDM Vol. 3A, section 4.5.
class Translation_table;
enum {
SIZE_LOG2_4KB = 12,
SIZE_LOG2_2MB = 21,
SIZE_LOG2_1GB = 30,
SIZE_LOG2_512GB = 39,
SIZE_LOG2_256TB = 48,
class Level_4_translation_table;
class PML4_table;
* IA-32e page directory template.
* Page directories can refer to paging structures of the next higher level
* or directly map page frames by using large page mappings.
* \param PAGE_SIZE_LOG2 virtual address range size in log2
* of a single table entry
* \param SIZE_LOG2 virtual address range size in log2 of whole table
template <typename ENTRY, unsigned PAGE_SIZE_LOG2, unsigned SIZE_LOG2>
class Page_directory;
using Level_3_translation_table =
using Level_2_translation_table =
using Translation_table = PML4_table;
* IA-32e common descriptor.
* Table entry containing descriptor fields common to all four levels.
struct Common_descriptor : Register<64>
struct P : Bitfield<0, 1> { }; /* present */
struct Rw : Bitfield<1, 1> { }; /* read/write */
struct Us : Bitfield<2, 1> { }; /* user/supervisor */
struct Pwt : Bitfield<3, 1> { }; /* write-through */
struct Pcd : Bitfield<4, 1> { }; /* cache disable */
struct A : Bitfield<5, 1> { }; /* accessed */
struct D : Bitfield<6, 1> { }; /* dirty */
struct Xd : Bitfield<63, 1> { }; /* execute-disable */
static bool present(access_t const v) { return P::get(v); }
static access_t create(Page_flags const &flags)
return P::bits(1)
| Rw::bits(flags.writeable)
| Us::bits(!flags.privileged)
| Xd::bits(!flags.executable);
* Return descriptor value with cleared accessed and dirty flags. These
* flags can be set by the MMU.
static access_t clear_mmu_flags(access_t value)
return value;
* Merge access rights of descriptor with given flags.
static void merge_access_rights(access_t &desc,
Page_flags const &flags)
Rw::set(desc, Rw::get(desc) | flags.writeable);
Us::set(desc, Us::get(desc) | !flags.privileged);
Xd::set(desc, Xd::get(desc) & !flags.executable);
class Genode::Translation_table
class Genode::Level_4_translation_table
enum {
static constexpr size_t PAGE_SIZE_LOG2 = SIZE_LOG2_4KB;
static constexpr size_t SIZE_LOG2 = SIZE_LOG2_2MB;
static constexpr size_t MAX_ENTRIES = 1 << (SIZE_LOG2-PAGE_SIZE_LOG2);
static constexpr size_t PAGE_SIZE = 1 << PAGE_SIZE_LOG2;
static constexpr size_t PAGE_MASK = ~((1 << PAGE_SIZE_LOG2) - 1);
class Misaligned {};
class Invalid_range {};
class Double_insertion {};
struct Descriptor : Common_descriptor
using Common = Common_descriptor;
struct Pat : Bitfield<7, 1> { }; /* page attribute table */
struct G : Bitfield<8, 1> { }; /* global */
struct Pa : Bitfield<12, 36> { }; /* physical address */
struct Mt : Bitset_3<Pwt, Pcd, Pat> { }; /* memory type */
static access_t create(Page_flags const &flags, addr_t const pa)
/* XXX: Set memory type depending on active PAT */
return Common::create(flags)
| G::bits(flags.global)
| Pa::masked(pa);
void * operator new (size_t, void * p) { return p; }
typename Descriptor::access_t _entries[MAX_ENTRIES];
* Constructor
Translation_table() { }
inline bool _aligned(addr_t const a, size_t const alignm_log2) {
return a == ((a >> alignm_log2) << alignm_log2); }
* Maximum virtual offset that can be translated by this table
static addr_t max_virt_offset()
struct Insert_func
PDBG("not implemented");
return 0;
Page_flags const & flags;
Page_slab * slab;
Insert_func(Page_flags const & flags,
Page_slab * slab) : flags(flags), slab(slab) { }
void operator () (addr_t const vo,
addr_t const pa,
size_t const size,
Descriptor::access_t &desc)
if ((vo & ~PAGE_MASK) || (pa & ~PAGE_MASK) ||
size < PAGE_SIZE)
throw Invalid_range();
Descriptor::access_t table_entry =
Descriptor::create(flags, pa);
if (Descriptor::present(desc) &&
Descriptor::clear_mmu_flags(desc) != table_entry)
throw Double_insertion();
desc = table_entry;
struct Remove_func
Page_slab * slab;
Remove_func(Page_slab * slab) : slab(slab) { }
void operator () (addr_t const vo,
addr_t const pa,
size_t const size,
Descriptor::access_t &desc) {
desc = 0; }
template <typename FUNC>
void _range_op(addr_t vo, addr_t pa, size_t size, FUNC &&func)
for (size_t i = vo >> PAGE_SIZE_LOG2; size > 0;
i = vo >> PAGE_SIZE_LOG2) {
addr_t end = (vo + PAGE_SIZE) & PAGE_MASK;
size_t sz = min(size, end-vo);
func(vo, pa, sz, _entries[i]);
/* check whether we wrap */
if (end < vo) return;
size = size - sz;
vo += sz;
pa += sz;
static constexpr size_t MIN_PAGE_SIZE_LOG2 = SIZE_LOG2_4KB;
static constexpr size_t ALIGNM_LOG2 = SIZE_LOG2_4KB;
* IA-32e page table (Level 4)
* A page table consists of 512 entries that each maps a 4KB page
* frame.
* For further details refer to Intel SDM Vol. 3A, table 4-19.
if (!_aligned((addr_t)this, ALIGNM_LOG2))
throw Misaligned();
memset(&_entries, 0, sizeof(_entries));
* Returns True if table does not contain any page mappings.
* \return false if an entry is present, True otherwise
bool empty()
for (unsigned i = 0; i < MAX_ENTRIES; i++)
if (Descriptor::present(_entries[i]))
return false;
return true;
* Insert translations into this table
* \param vo offset of virt. transl. region in virt. table region
* \param pa base of physical backing store
* \param size size of translated region
* \param f mapping flags
* \param s second level page slab allocator
* \param vo offset of the virtual region represented
* by the translation within the virtual
* region represented by this table
* \param pa base of the physical backing store
* \param size size of the translated region
* \param flags mapping flags
* \param slab second level page slab allocator
void insert_translation(addr_t vo, addr_t pa, size_t size,
Page_flags const & f, Page_slab * const s)
void insert_translation(addr_t vo,
addr_t pa,
size_t size,
Page_flags const & flags,
Page_slab * slab)
PDBG("not implemented");
this->_range_op(vo, pa, size, Insert_func(flags, slab));
@ -78,8 +277,440 @@ class Genode::Translation_table
void remove_translation(addr_t vo, size_t size, Page_slab * slab)
PDBG("not implemented");
this->_range_op(vo, 0, size, Remove_func(slab));
} __attribute__((aligned(1 << ALIGNM_LOG2)));
template <typename ENTRY, unsigned PAGE_SIZE_LOG2, unsigned SIZE_LOG2>
class Genode::Page_directory
static constexpr size_t MAX_ENTRIES = 1 << (SIZE_LOG2-PAGE_SIZE_LOG2);
static constexpr size_t PAGE_SIZE = 1 << PAGE_SIZE_LOG2;
static constexpr size_t PAGE_MASK = ~((1 << PAGE_SIZE_LOG2) - 1);
class Misaligned {};
class Invalid_range {};
class Double_insertion {};
struct Base_descriptor : Common_descriptor
using Common = Common_descriptor;
struct Ps : Common::template Bitfield<7, 1> { }; /* page size */
static bool maps_page(access_t const v) { return Ps::get(v); }
struct Page_descriptor : Base_descriptor
using Base = Base_descriptor;
* Global attribute
struct G : Base::template Bitfield<8, 1> { };
* Page attribute table
struct Pat : Base::template Bitfield<12, 1> { };
* Physical address
struct Pa : Base::template Bitfield<PAGE_SIZE_LOG2,
48 - PAGE_SIZE_LOG2> { };
* Memory type
struct Mt : Base::template Bitset_3<Base::Pwt,
Base::Pcd, Pat> { };
static typename Base::access_t create(Page_flags const &flags,
addr_t const pa)
/* XXX: Set memory type depending on active PAT */
return Base::create(flags)
| Base::Ps::bits(1)
| G::bits(flags.global)
| Pa::masked(pa);
struct Table_descriptor : Base_descriptor
using Base = Base_descriptor;
* Physical address
struct Pa : Base::template Bitfield<12, 36> { };
* Memory types
struct Mt : Base::template Bitset_2<Base::Pwt,
Base::Pcd> { };
static typename Base::access_t create(Page_flags const &flags,
addr_t const pa)
/* XXX: Set memory type depending on active PAT */
return Base::create(flags)
| Pa::masked(pa);
typename Base_descriptor::access_t _entries[MAX_ENTRIES];
inline bool _aligned(addr_t const a, size_t const alignm_log2) {
return a == ((a >> alignm_log2) << alignm_log2); }
struct Insert_func
Page_flags const & flags;
Page_slab * slab;
Insert_func(Page_flags const & flags,
Page_slab * slab) : flags(flags), slab(slab) { }
void operator () (addr_t const vo,
addr_t const pa,
size_t const size,
typename Base_descriptor::access_t &desc)
/* can we insert a large page mapping? */
if (!((vo & ~PAGE_MASK) || (pa & ~PAGE_MASK) ||
size < PAGE_SIZE)) {
typename Base_descriptor::access_t table_entry =
Page_descriptor::create(flags, pa);
if (Base_descriptor::present(desc) &&
Base_descriptor::clear_mmu_flags(desc) != table_entry)
throw Double_insertion();
desc = table_entry;
/* we need to use a next level table */
ENTRY *table;
if (!Base_descriptor::present(desc)) {
if (!slab)
throw Allocator::Out_of_memory();
/* create and link next level table */
table = new (slab) ENTRY();
ENTRY * phys_addr = (ENTRY*) slab->phys_addr(table);
desc = (typename Base_descriptor::access_t)
(addr_t)(phys_addr ? phys_addr
: table));
} else if (Base_descriptor::maps_page(desc)) {
throw Double_insertion();
} else {
Base_descriptor::merge_access_rights(desc, flags);
ENTRY * phys_addr = (ENTRY*)
table = (ENTRY*) slab->virt_addr(phys_addr);
table = table ? table : (ENTRY*)phys_addr;
/* insert translation */
table->insert_translation(vo - (vo & PAGE_MASK),
pa, size, flags, slab);
struct Remove_func
Page_slab * slab;
Remove_func(Page_slab * slab) : slab(slab) { }
void operator () (addr_t const vo,
addr_t const pa,
size_t const size,
typename Base_descriptor::access_t &desc)
if (Base_descriptor::present(desc)) {
if (Base_descriptor::maps_page(desc)) {
desc = 0;
} else {
/* use allocator to retrieve virt address of table */
ENTRY* phys_addr = (ENTRY*)
ENTRY* table = (ENTRY*) slab->virt_addr(phys_addr);
table = table ? table : (ENTRY*)phys_addr;
table->remove_translation(vo - (vo & PAGE_MASK),
size, slab);
if (table->empty()) {
destroy(slab, table);
desc = 0;
template <typename FUNC>
void _range_op(addr_t vo, addr_t pa, size_t size, FUNC &&func)
for (size_t i = vo >> PAGE_SIZE_LOG2; size > 0;
i = vo >> PAGE_SIZE_LOG2) {
addr_t end = (vo + PAGE_SIZE) & PAGE_MASK;
size_t sz = min(size, end-vo);
func(vo, pa, sz, _entries[i]);
/* check whether we wrap */
if (end < vo) return;
size = size - sz;
vo += sz;
pa += sz;
static constexpr size_t MIN_PAGE_SIZE_LOG2 = SIZE_LOG2_4KB;
static constexpr size_t ALIGNM_LOG2 = SIZE_LOG2_4KB;
if (!_aligned((addr_t)this, ALIGNM_LOG2))
throw Misaligned();
memset(&_entries, 0, sizeof(_entries));
* Returns True if table does not contain any page mappings.
* \return false if an entry is present, True otherwise
bool empty()
for (unsigned i = 0; i < MAX_ENTRIES; i++)
if (Base_descriptor::present(_entries[i]))
return false;
return true;
* Insert translations into this table
* \param vo offset of the virtual region represented
* by the translation within the virtual
* region represented by this table
* \param pa base of the physical backing store
* \param size size of the translated region
* \param flags mapping flags
* \param slab second level page slab allocator
void insert_translation(addr_t vo,
addr_t pa,
size_t size,
Page_flags const & flags,
Page_slab * slab)
_range_op(vo, pa, size, Insert_func(flags, slab));
* Remove translations that overlap with a given virtual region
* \param vo region offset within the tables virtual region
* \param size region size
* \param slab second level page slab allocator
void remove_translation(addr_t vo, size_t size, Page_slab * slab)
_range_op(vo, 0, size, Remove_func(slab));
} __attribute__((aligned(1 << ALIGNM_LOG2)));
class Genode::PML4_table
static constexpr size_t PAGE_SIZE_LOG2 = SIZE_LOG2_256TB;
static constexpr size_t SIZE_LOG2 = SIZE_LOG2_512GB;
static constexpr size_t MAX_ENTRIES = 512;
static constexpr size_t PAGE_SIZE = 1UL << PAGE_SIZE_LOG2;
static constexpr size_t PAGE_MASK = ~((1UL << PAGE_SIZE_LOG2) - 1);
class Misaligned {};
class Invalid_range {};
struct Descriptor : Common_descriptor
struct Pa : Bitfield<12, SIZE_LOG2> { }; /* physical address */
struct Mt : Bitset_2<Pwt, Pcd> { }; /* memory type */
static access_t create(Page_flags const &flags, addr_t const pa)
/* XXX: Set memory type depending on active PAT */
return Common_descriptor::create(flags)
| Pa::masked(pa);
typename Descriptor::access_t _entries[MAX_ENTRIES];
inline bool _aligned(addr_t const a, size_t const alignm_log2) {
return a == ((a >> alignm_log2) << alignm_log2); }
using ENTRY = Level_2_translation_table;
struct Insert_func
Page_flags const & flags;
Page_slab * slab;
Insert_func(Page_flags const & flags,
Page_slab * slab) : flags(flags), slab(slab) { }
void operator () (addr_t const vo,
addr_t const pa,
size_t const size,
Descriptor::access_t &desc)
/* we need to use a next level table */
ENTRY *table;
if (!Descriptor::present(desc)) {
if (!slab)
throw Allocator::Out_of_memory();
/* create and link next level table */
table = new (slab) ENTRY();
ENTRY * phys_addr = (ENTRY*) slab->phys_addr(table);
desc = Descriptor::create(flags,
(addr_t)(phys_addr ? phys_addr
: table));
} else {
Descriptor::merge_access_rights(desc, flags);
ENTRY * phys_addr = (ENTRY*)
table = (ENTRY*) slab->virt_addr(phys_addr);
table = table ? table : (ENTRY*)phys_addr;
/* insert translation */
table->insert_translation(vo - (vo & PAGE_MASK),
pa, size, flags, slab);
struct Remove_func
Page_slab * slab;
Remove_func(Page_slab * slab) : slab(slab) { }
void operator () (addr_t const vo,
addr_t const pa,
size_t const size,
Descriptor::access_t &desc)
if (Descriptor::present(desc)) {
/* use allocator to retrieve virt address of table */
ENTRY* phys_addr = (ENTRY*)
ENTRY* table = (ENTRY*) slab->virt_addr(phys_addr);
table = table ? table : (ENTRY*)phys_addr;
table->remove_translation(vo - (vo & PAGE_MASK), size,
if (table->empty()) {
destroy(slab, table);
desc = 0;
template <typename FUNC>
void _range_op(addr_t vo, addr_t pa, size_t size, FUNC &&func)
for (size_t i = vo >> PAGE_SIZE_LOG2; size > 0;
i = vo >> PAGE_SIZE_LOG2) {
addr_t end = (vo + PAGE_SIZE) & PAGE_MASK;
size_t sz = min(size, end-vo);
func(vo, pa, sz, _entries[i]);
/* check whether we wrap */
if (end < vo) return;
size = size - sz;
vo += sz;
pa += sz;
static constexpr size_t MIN_PAGE_SIZE_LOG2 = SIZE_LOG2_4KB;
static constexpr size_t ALIGNM_LOG2 = SIZE_LOG2_4KB;
if (!_aligned((addr_t)this, ALIGNM_LOG2))
throw Misaligned();
memset(&_entries, 0, sizeof(_entries));
* Returns True if table does not contain any page mappings.
* \return false if an entry is present, True otherwise
bool empty()
for (unsigned i = 0; i < MAX_ENTRIES; i++)
if (Descriptor::present(_entries[i]))
return false;
return true;
* Insert translations into this table
* \param vo offset of the virtual region represented
* by the translation within the virtual
* region represented by this table
* \param pa base of the physical backing store
* \param size size of the translated region
* \param flags mapping flags
* \param slab second level page slab allocator
void insert_translation(addr_t vo,
addr_t pa,
size_t size,
Page_flags const & flags,
Page_slab * slab)
_range_op(vo, pa, size, Insert_func(flags, slab));
* Remove translations that overlap with a given virtual region
* \param vo region offset within the tables virtual region
* \param size region size
* \param slab second level page slab allocator
void remove_translation(addr_t vo, size_t size, Page_slab * slab)
_range_op(vo, 0, size, Remove_func(slab));
} __attribute__((aligned(1 << ALIGNM_LOG2)));
#endif /* _TRANSLATION_TABLE_H_ */
@ -35,7 +35,7 @@
/* space must be at least as large as 'Cpu_state' */
.space 20*8
.space 21*8
.global _mt_master_context_end
Reference in New Issue
Block a user