add patches for haswell nRI

get ready to use NRI with haswell boards such as T440p and W541
prior to this haswell boards booted very slowly due to improper mrc blob for the required cbfs size

reproduction steps:

In coreboot: git checkout 5d291de6011a56bfd767c4bcdfdc3aa6ee87a2dd && git format-patch 7b36319fd9..HEAD --start-number=10.
5d291de6011a56bfd767c4bcdfdc3aa6ee87a2dd is the last commit of the haswell-NRI patch train.
7b36319fd9 is the last commit before the haswell-NRI patch train.
Then move the resulting patch files under patches/coreboot-24.12/ directory in heads.

Signed-off-by: gaspar-ilom <gasparilom@riseup.net>
This commit is contained in:
gaspar-ilom 2025-03-09 21:17:45 +01:00
parent 6279500e5b
commit 3f4052c2ad
No known key found for this signature in database
15 changed files with 9297 additions and 0 deletions

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,545 @@
From fdb57ee43dc47908b16823aa6ed4d6c08c8d6a48 Mon Sep 17 00:00:00 2001
From: Angel Pons <th3fanbus@gmail.com>
Date: Sat, 7 May 2022 20:59:58 +0200
Subject: [PATCH 11/24] haswell NRI: Add timings/refresh programming
Program the registers with timing and refresh parameters.
Change-Id: Id2ea339d2c9ea8b56c71d6e88ec76949653ff5c2
Signed-off-by: Angel Pons <th3fanbus@gmail.com>
Reviewed-on: https://review.coreboot.org/c/coreboot/+/64187
Tested-by: build bot (Jenkins) <no-reply@coreboot.org>
Reviewed-by: Maximilian Brune <maximilian.brune@9elements.com>
Reviewed-by: Alicja Michalska <ahplka19@gmail.com>
---
.../haswell/native_raminit/lookup_timings.c | 102 ++++++++
.../haswell/native_raminit/raminit_native.h | 14 ++
.../haswell/native_raminit/reg_structs.h | 93 +++++++
.../haswell/native_raminit/timings_refresh.c | 233 +++++++++++++++++-
.../intel/haswell/registers/mchbar.h | 12 +
5 files changed, 452 insertions(+), 2 deletions(-)
diff --git a/src/northbridge/intel/haswell/native_raminit/lookup_timings.c b/src/northbridge/intel/haswell/native_raminit/lookup_timings.c
index 8b81c7c341..b8d6c1ef40 100644
--- a/src/northbridge/intel/haswell/native_raminit/lookup_timings.c
+++ b/src/northbridge/intel/haswell/native_raminit/lookup_timings.c
@@ -60,3 +60,105 @@ uint32_t get_tXP(const uint32_t mem_clock_mhz)
};
return lookup_timing(mem_clock_mhz, lut, ARRAY_SIZE(lut));
}
+
+static uint32_t get_lpddr_tCKE(const uint32_t mem_clock_mhz)
+{
+ const struct timing_lookup lut[] = {
+ { 533, 4 },
+ { 666, 5 },
+ { fmax, 6 },
+ };
+ return lookup_timing(mem_clock_mhz, lut, ARRAY_SIZE(lut));
+}
+
+static uint32_t get_ddr_tCKE(const uint32_t mem_clock_mhz)
+{
+ const struct timing_lookup lut[] = {
+ { 533, 3 },
+ { 800, 4 },
+ { 933, 5 },
+ { 1200, 6 },
+ { fmax, 7 },
+ };
+ return lookup_timing(mem_clock_mhz, lut, ARRAY_SIZE(lut));
+}
+
+uint32_t get_tCKE(const uint32_t mem_clock_mhz, const bool lpddr)
+{
+ return lpddr ? get_lpddr_tCKE(mem_clock_mhz) : get_ddr_tCKE(mem_clock_mhz);
+}
+
+uint32_t get_tXPDLL(const uint32_t mem_clock_mhz)
+{
+ const struct timing_lookup lut[] = {
+ { 400, 10 },
+ { 533, 13 },
+ { 666, 16 },
+ { 800, 20 },
+ { 933, 23 },
+ { 1066, 26 },
+ { 1200, 29 },
+ { fmax, 32 },
+ };
+ return lookup_timing(mem_clock_mhz, lut, ARRAY_SIZE(lut));
+}
+
+uint32_t get_tAONPD(const uint32_t mem_clock_mhz)
+{
+ const struct timing_lookup lut[] = {
+ { 400, 4 },
+ { 533, 5 },
+ { 666, 6 },
+ { 800, 7 }, /* SNB had 8 */
+ { 933, 8 },
+ { 1066, 10 },
+ { 1200, 11 },
+ { fmax, 12 },
+ };
+ return lookup_timing(mem_clock_mhz, lut, ARRAY_SIZE(lut));
+}
+
+uint32_t get_tMOD(const uint32_t mem_clock_mhz)
+{
+ const struct timing_lookup lut[] = {
+ { 800, 12 },
+ { 933, 14 },
+ { 1066, 16 },
+ { 1200, 18 },
+ { fmax, 20 },
+ };
+ return lookup_timing(mem_clock_mhz, lut, ARRAY_SIZE(lut));
+}
+
+uint32_t get_tXS_offset(const uint32_t mem_clock_mhz)
+{
+ return DIV_ROUND_UP(mem_clock_mhz, 100);
+}
+
+static uint32_t get_lpddr_tZQOPER(const uint32_t mem_clock_mhz)
+{
+ return (mem_clock_mhz * 360) / 1000;
+}
+
+static uint32_t get_ddr_tZQOPER(const uint32_t mem_clock_mhz)
+{
+ const struct timing_lookup lut[] = {
+ { 800, 256 },
+ { 933, 299 },
+ { 1066, 342 },
+ { 1200, 384 },
+ { fmax, 427 },
+ };
+ return lookup_timing(mem_clock_mhz, lut, ARRAY_SIZE(lut));
+}
+
+/* tZQOPER defines the period required for ZQCL after SR exit */
+uint32_t get_tZQOPER(const uint32_t mem_clock_mhz, const bool lpddr)
+{
+ return lpddr ? get_lpddr_tZQOPER(mem_clock_mhz) : get_ddr_tZQOPER(mem_clock_mhz);
+}
+
+uint32_t get_tZQCS(const uint32_t mem_clock_mhz, const bool lpddr)
+{
+ return DIV_ROUND_UP(get_tZQOPER(mem_clock_mhz, lpddr), 4);
+}
diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.h b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
index fffa6d5450..5915a2bab0 100644
--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.h
+++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
@@ -156,6 +156,12 @@ struct sysinfo {
uint8_t cke_cmd_pi_code[NUM_CHANNELS][NUM_GROUPS];
uint8_t cmd_north_pi_code[NUM_CHANNELS][NUM_GROUPS];
uint8_t cmd_south_pi_code[NUM_CHANNELS][NUM_GROUPS];
+
+ union tc_bank_reg tc_bank[NUM_CHANNELS];
+ union tc_bank_rank_a_reg tc_bankrank_a[NUM_CHANNELS];
+ union tc_bank_rank_b_reg tc_bankrank_b[NUM_CHANNELS];
+ union tc_bank_rank_c_reg tc_bankrank_c[NUM_CHANNELS];
+ union tc_bank_rank_d_reg tc_bankrank_d[NUM_CHANNELS];
};
static inline bool is_hsw_ult(void)
@@ -201,6 +207,14 @@ enum raminit_status configure_mc(struct sysinfo *ctrl);
void configure_timings(struct sysinfo *ctrl);
void configure_refresh(struct sysinfo *ctrl);
+uint32_t get_tCKE(uint32_t mem_clock_mhz, bool lpddr);
+uint32_t get_tXPDLL(uint32_t mem_clock_mhz);
+uint32_t get_tAONPD(uint32_t mem_clock_mhz);
+uint32_t get_tMOD(uint32_t mem_clock_mhz);
+uint32_t get_tXS_offset(uint32_t mem_clock_mhz);
+uint32_t get_tZQOPER(uint32_t mem_clock_mhz, bool lpddr);
+uint32_t get_tZQCS(uint32_t mem_clock_mhz, bool lpddr);
+
enum raminit_status wait_for_first_rcomp(void);
uint8_t get_rx_bias(const struct sysinfo *ctrl);
diff --git a/src/northbridge/intel/haswell/native_raminit/reg_structs.h b/src/northbridge/intel/haswell/native_raminit/reg_structs.h
index d11cda4b3d..70487e1640 100644
--- a/src/northbridge/intel/haswell/native_raminit/reg_structs.h
+++ b/src/northbridge/intel/haswell/native_raminit/reg_structs.h
@@ -335,6 +335,99 @@ union mcscheds_cbit_reg {
uint32_t raw;
};
+union tc_bank_reg {
+ struct __packed {
+ uint32_t tRCD : 5; // Bits 4:0
+ uint32_t tRP : 5; // Bits 9:5
+ uint32_t tRAS : 6; // Bits 15:10
+ uint32_t tRDPRE : 4; // Bits 19:16
+ uint32_t tWRPRE : 6; // Bits 25:20
+ uint32_t tRRD : 4; // Bits 29:26
+ uint32_t tRPab_ext : 2; // Bits 31:30
+ };
+ uint32_t raw;
+};
+
+union tc_bank_rank_a_reg {
+ struct __packed {
+ uint32_t tCKE : 4; // Bits 3:0
+ uint32_t tFAW : 8; // Bits 11:4
+ uint32_t tRDRD_sr : 3; // Bits 14:12
+ uint32_t tRDRD_dr : 4; // Bits 18:15
+ uint32_t tRDRD_dd : 4; // Bits 22:19
+ uint32_t tRDPDEN : 5; // Bits 27:23
+ uint32_t : 1; // Bits 28:28
+ uint32_t cmd_3st_dis : 1; // Bits 29:29
+ uint32_t cmd_stretch : 2; // Bits 31:30
+ };
+ uint32_t raw;
+};
+
+union tc_bank_rank_b_reg {
+ struct __packed {
+ uint32_t tWRRD_sr : 6; // Bits 5:0
+ uint32_t tWRRD_dr : 4; // Bits 9:6
+ uint32_t tWRRD_dd : 4; // Bits 13:10
+ uint32_t tWRWR_sr : 3; // Bits 16:14
+ uint32_t tWRWR_dr : 4; // Bits 20:17
+ uint32_t tWRWR_dd : 4; // Bits 24:21
+ uint32_t tWRPDEN : 6; // Bits 30:25
+ uint32_t dec_wrd : 1; // Bits 31:31
+ };
+ uint32_t raw;
+};
+
+union tc_bank_rank_c_reg {
+ struct __packed {
+ uint32_t tXPDLL : 6; // Bits 5:0
+ uint32_t tXP : 4; // Bits 9:6
+ uint32_t tAONPD : 4; // Bits 13:10
+ uint32_t tRDWR_sr : 5; // Bits 18:14
+ uint32_t tRDWR_dr : 5; // Bits 23:19
+ uint32_t tRDWR_dd : 5; // Bits 28:24
+ uint32_t : 3; // Bits 31:29
+ };
+ uint32_t raw;
+};
+
+/* NOTE: Non-ULT only implements the lower 21 bits (odt_write_delay is 2 bits) */
+union tc_bank_rank_d_reg {
+ struct __packed {
+ uint32_t tAA : 5; // Bits 4:0
+ uint32_t tCWL : 5; // Bits 9:5
+ uint32_t tCPDED : 2; // Bits 11:10
+ uint32_t tPRPDEN : 2; // Bits 13:12
+ uint32_t odt_read_delay : 3; // Bits 16:14
+ uint32_t odt_read_duration : 2; // Bits 18:17
+ uint32_t odt_write_duration : 3; // Bits 21:19
+ uint32_t odt_write_delay : 3; // Bits 24:22
+ uint32_t odt_always_rank_0 : 1; // Bits 25:25
+ uint32_t cmd_delay : 2; // Bits 27:26
+ uint32_t : 4; // Bits 31:28
+ };
+ uint32_t raw;
+};
+
+union tc_rftp_reg {
+ struct __packed {
+ uint32_t tREFI : 16; // Bits 15:0
+ uint32_t tRFC : 9; // Bits 24:16
+ uint32_t tREFIx9 : 7; // Bits 31:25
+ };
+ uint32_t raw;
+};
+
+union tc_srftp_reg {
+ struct __packed {
+ uint32_t tXSDLL : 12; // Bits 11:0
+ uint32_t tXS_offset : 4; // Bits 15:12
+ uint32_t tZQOPER : 10; // Bits 25:16
+ uint32_t : 2; // Bits 27:26
+ uint32_t tMOD : 4; // Bits 31:28
+ };
+ uint32_t raw;
+};
+
union mcmain_command_rate_limit_reg {
struct __packed {
uint32_t enable_cmd_limit : 1; // Bits 0:0
diff --git a/src/northbridge/intel/haswell/native_raminit/timings_refresh.c b/src/northbridge/intel/haswell/native_raminit/timings_refresh.c
index a9d960f31b..54fee0121d 100644
--- a/src/northbridge/intel/haswell/native_raminit/timings_refresh.c
+++ b/src/northbridge/intel/haswell/native_raminit/timings_refresh.c
@@ -1,13 +1,242 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
+#include <assert.h>
+#include <commonlib/bsd/clamp.h>
+#include <console/console.h>
+#include <delay.h>
+#include <device/pci_ops.h>
+#include <northbridge/intel/haswell/haswell.h>
+
#include "raminit_native.h"
+#define BL 8 /* Burst length */
+#define tCCD 4
+#define tRPRE 1
+#define tWPRE 1
+#define tDLLK 512
+
+static bool is_sodimm(const enum spd_dimm_type_ddr3 type)
+{
+ return type == SPD_DDR3_DIMM_TYPE_SO_DIMM || type == SPD_DDR3_DIMM_TYPE_72B_SO_UDIMM;
+}
+
+static uint8_t get_odt_stretch(const struct sysinfo *const ctrl)
+{
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ /* Only stretch with 2 DIMMs per channel */
+ if (ctrl->dpc[channel] != 2)
+ continue;
+
+ const struct raminit_dimm_info *dimms = ctrl->dimms[channel];
+
+ /* Only stretch when using SO-DIMMs */
+ if (!is_sodimm(dimms[0].data.dimm_type) || !is_sodimm(dimms[1].data.dimm_type))
+ continue;
+
+ /* Only stretch with mismatched card types */
+ if (dimms[0].data.reference_card == dimms[1].data.reference_card)
+ continue;
+
+ /* Stretch if one SO-DIMM is card F */
+ for (uint8_t slot = 0; slot < NUM_SLOTS; slot++) {
+ if (dimms[slot].data.reference_card == 5)
+ return 1;
+ }
+ }
+ return 0;
+}
+
+static union tc_bank_reg make_tc_bank(struct sysinfo *const ctrl)
+{
+ return (union tc_bank_reg) {
+ .tRCD = ctrl->tRCD,
+ .tRP = ctrl->tRP,
+ .tRAS = ctrl->tRAS,
+ .tRDPRE = ctrl->tRTP,
+ .tWRPRE = 4 + ctrl->tCWL + ctrl->tWR,
+ .tRRD = ctrl->tRRD,
+ .tRPab_ext = 0, /** TODO: For LPDDR, this is ctrl->tRPab - ctrl->tRP **/
+ };
+}
+
+static union tc_bank_rank_a_reg make_tc_bankrank_a(struct sysinfo *ctrl, uint8_t odt_stretch)
+{
+ /* Use 3N mode for DDR during training, but always use 1N mode for LPDDR */
+ const uint32_t tCMD = ctrl->lpddr ? 0 : 3;
+ const uint32_t tRDRD_drdd = BL / 2 + 1 + tRPRE + odt_stretch + !!ctrl->lpddr;
+
+ return (union tc_bank_rank_a_reg) {
+ .tCKE = get_tCKE(ctrl->mem_clock_mhz, ctrl->lpddr),
+ .tFAW = ctrl->tFAW,
+ .tRDRD_sr = tCCD,
+ .tRDRD_dr = tRDRD_drdd,
+ .tRDRD_dd = tRDRD_drdd,
+ .tRDPDEN = ctrl->tAA + BL / 2 + 1,
+ .cmd_3st_dis = 1, /* Disable command tri-state before training */
+ .cmd_stretch = tCMD,
+ };
+}
+
+static union tc_bank_rank_b_reg make_tc_bankrank_b(struct sysinfo *const ctrl)
+{
+ const uint8_t tWRRD_drdd = ctrl->tCWL - ctrl->tAA + BL / 2 + 2 + tRPRE;
+ const uint8_t tWRWR_drdd = BL / 2 + 2 + tWPRE;
+
+ return (union tc_bank_rank_b_reg) {
+ .tWRRD_sr = tCCD + ctrl->tCWL + ctrl->tWTR + 2,
+ .tWRRD_dr = ctrl->lpddr ? 8 : tWRRD_drdd,
+ .tWRRD_dd = ctrl->lpddr ? 8 : tWRRD_drdd,
+ .tWRWR_sr = tCCD,
+ .tWRWR_dr = tWRWR_drdd,
+ .tWRWR_dd = tWRWR_drdd,
+ .tWRPDEN = ctrl->tWR + ctrl->tCWL + BL / 2,
+ .dec_wrd = ctrl->tCWL >= 6,
+ };
+}
+
+static uint32_t get_tRDWR_sr(const struct sysinfo *ctrl)
+{
+ if (ctrl->lpddr) {
+ const uint32_t tdqsck_max = DIV_ROUND_UP(5500, ctrl->qclkps * 2);
+ return ctrl->tAA - ctrl->tCWL + tCCD + tWPRE + tdqsck_max + 1;
+ } else {
+ const bool fast_clock = ctrl->mem_clock_mhz > 666;
+ return ctrl->tAA - ctrl->tCWL + tCCD + tWPRE + 2 + fast_clock;
+ }
+}
+
+static union tc_bank_rank_c_reg make_tc_bankrank_c(struct sysinfo *ctrl, uint8_t odt_stretch)
+{
+ const uint32_t tRDWR_sr = get_tRDWR_sr(ctrl);
+ const uint32_t tRDWR_drdd = tRDWR_sr + odt_stretch;
+
+ return (union tc_bank_rank_c_reg) {
+ .tXPDLL = get_tXPDLL(ctrl->mem_clock_mhz),
+ .tXP = MAX(ctrl->tXP, 7), /* Use a higher tXP for training */
+ .tAONPD = get_tAONPD(ctrl->mem_clock_mhz),
+ .tRDWR_sr = tRDWR_sr,
+ .tRDWR_dr = tRDWR_drdd,
+ .tRDWR_dd = tRDWR_drdd,
+ };
+}
+
+static union tc_bank_rank_d_reg make_tc_bankrank_d(struct sysinfo *ctrl, uint8_t odt_stretch)
+{
+ const uint32_t odt_rd_delay = ctrl->tAA - ctrl->tCWL;
+ if (!ctrl->lpddr) {
+ return (union tc_bank_rank_d_reg) {
+ .tAA = ctrl->tAA,
+ .tCWL = ctrl->tCWL,
+ .tCPDED = 1,
+ .tPRPDEN = 1,
+ .odt_read_delay = odt_rd_delay,
+ .odt_read_duration = odt_stretch,
+ };
+ }
+
+ /* tCWL has 1 extra clock because of tDQSS, subtract it here */
+ const uint32_t tCWL_lpddr = ctrl->tCWL - 1;
+ const uint32_t odt_wr_delay = tCWL_lpddr + DIV_ROUND_UP(3500, ctrl->qclkps * 2);
+ const uint32_t odt_wr_duration = DIV_ROUND_UP(3500 - 1750, ctrl->qclkps * 2) + 1;
+
+ return (union tc_bank_rank_d_reg) {
+ .tAA = ctrl->tAA,
+ .tCWL = tCWL_lpddr,
+ .tCPDED = 2, /* Required by JEDEC LPDDR3 spec */
+ .tPRPDEN = 1,
+ .odt_read_delay = odt_rd_delay,
+ .odt_read_duration = odt_stretch,
+ .odt_write_delay = odt_wr_delay,
+ .odt_write_duration = odt_wr_duration,
+ .odt_always_rank_0 = ctrl->lpddr_dram_odt
+ };
+}
+
+/* ZQCS period values, in (tREFI * 128) units */
+#define ZQCS_PERIOD_DDR3 128 /* tREFI * 128 = 7.8 us * 128 = 1ms */
+#define ZQCS_PERIOD_LPDDR3 256 /* tREFI * 128 = 3.9 us * 128 = 0.5ms */
+
+static uint32_t make_tc_zqcal(const struct sysinfo *const ctrl)
+{
+ const uint32_t zqcs_period = ctrl->lpddr ? ZQCS_PERIOD_LPDDR3 : ZQCS_PERIOD_DDR3;
+ const uint32_t tZQCS = get_tZQCS(ctrl->mem_clock_mhz, ctrl->lpddr);
+ return tZQCS << (is_hsw_ult() ? 10 : 8) | zqcs_period;
+}
+
+static union tc_rftp_reg make_tc_rftp(const struct sysinfo *const ctrl)
+{
+ /*
+ * The tREFIx9 field should be programmed to minimum of 8.9 * tREFI (to allow
+ * for possible delays from ZQ or isoc) and tRASmax (70us) divided by 1024.
+ */
+ return (union tc_rftp_reg) {
+ .tREFI = ctrl->tREFI,
+ .tRFC = ctrl->tRFC,
+ .tREFIx9 = ctrl->tREFI * 89 / 10240,
+ };
+}
+
+static union tc_srftp_reg make_tc_srftp(const struct sysinfo *const ctrl)
+{
+ return (union tc_srftp_reg) {
+ .tXSDLL = tDLLK,
+ .tXS_offset = get_tXS_offset(ctrl->mem_clock_mhz),
+ .tZQOPER = get_tZQOPER(ctrl->mem_clock_mhz, ctrl->lpddr),
+ .tMOD = get_tMOD(ctrl->mem_clock_mhz) - 8,
+ };
+}
+
void configure_timings(struct sysinfo *ctrl)
{
- /** TODO: Stub **/
+ if (ctrl->lpddr)
+ die("%s: Missing support for LPDDR\n", __func__);
+
+ const uint8_t odt_stretch = get_odt_stretch(ctrl);
+ const union tc_bank_reg tc_bank = make_tc_bank(ctrl);
+ const union tc_bank_rank_a_reg tc_bank_rank_a = make_tc_bankrank_a(ctrl, odt_stretch);
+ const union tc_bank_rank_b_reg tc_bank_rank_b = make_tc_bankrank_b(ctrl);
+ const union tc_bank_rank_c_reg tc_bank_rank_c = make_tc_bankrank_c(ctrl, odt_stretch);
+ const union tc_bank_rank_d_reg tc_bank_rank_d = make_tc_bankrank_d(ctrl, odt_stretch);
+
+ const uint8_t wr_delay = tc_bank_rank_b.dec_wrd + 1;
+ uint8_t sc_wr_add_delay = 0;
+ sc_wr_add_delay |= wr_delay << 0;
+ sc_wr_add_delay |= wr_delay << 2;
+ sc_wr_add_delay |= wr_delay << 4;
+ sc_wr_add_delay |= wr_delay << 6;
+
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!does_ch_exist(ctrl, channel))
+ continue;
+
+ ctrl->tc_bank[channel] = tc_bank;
+ ctrl->tc_bankrank_a[channel] = tc_bank_rank_a;
+ ctrl->tc_bankrank_b[channel] = tc_bank_rank_b;
+ ctrl->tc_bankrank_c[channel] = tc_bank_rank_c;
+ ctrl->tc_bankrank_d[channel] = tc_bank_rank_d;
+
+ mchbar_write32(TC_BANK_ch(channel), ctrl->tc_bank[channel].raw);
+ mchbar_write32(TC_BANK_RANK_A_ch(channel), ctrl->tc_bankrank_a[channel].raw);
+ mchbar_write32(TC_BANK_RANK_B_ch(channel), ctrl->tc_bankrank_b[channel].raw);
+ mchbar_write32(TC_BANK_RANK_C_ch(channel), ctrl->tc_bankrank_c[channel].raw);
+ mchbar_write32(TC_BANK_RANK_D_ch(channel), ctrl->tc_bankrank_d[channel].raw);
+ mchbar_write8(SC_WR_ADD_DELAY_ch(channel), sc_wr_add_delay);
+ }
}
void configure_refresh(struct sysinfo *ctrl)
{
- /** TODO: Stub **/
+ const union tc_srftp_reg tc_srftp = make_tc_srftp(ctrl);
+ const union tc_rftp_reg tc_rftp = make_tc_rftp(ctrl);
+ const uint32_t tc_zqcal = make_tc_zqcal(ctrl);
+
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!does_ch_exist(ctrl, channel))
+ continue;
+
+ mchbar_setbits32(TC_RFP_ch(channel), 0xff);
+ mchbar_write32(TC_RFTP_ch(channel), tc_rftp.raw);
+ mchbar_write32(TC_SRFTP_ch(channel), tc_srftp.raw);
+ mchbar_write32(TC_ZQCAL_ch(channel), tc_zqcal);
+ }
}
diff --git a/src/northbridge/intel/haswell/registers/mchbar.h b/src/northbridge/intel/haswell/registers/mchbar.h
index 4c3f399b5d..2acc5cbbc8 100644
--- a/src/northbridge/intel/haswell/registers/mchbar.h
+++ b/src/northbridge/intel/haswell/registers/mchbar.h
@@ -86,9 +86,21 @@
#define DDR_COMP_VSSHI_CONTROL 0x3a24
/* MCMAIN per-channel */
+#define TC_BANK_ch(ch) _MCMAIN_C(0x4000, ch)
+#define TC_BANK_RANK_A_ch(ch) _MCMAIN_C(0x4004, ch)
+#define TC_BANK_RANK_B_ch(ch) _MCMAIN_C(0x4008, ch)
+#define TC_BANK_RANK_C_ch(ch) _MCMAIN_C(0x400c, ch)
#define COMMAND_RATE_LIMIT_ch(ch) _MCMAIN_C(0x4010, ch)
+#define TC_BANK_RANK_D_ch(ch) _MCMAIN_C(0x4014, ch)
+#define SC_ROUNDT_LAT_ch(ch) _MCMAIN_C(0x4024, ch)
+#define SC_WR_ADD_DELAY_ch(ch) _MCMAIN_C(0x40d0, ch)
+
+#define TC_ZQCAL_ch(ch) _MCMAIN_C(0x4290, ch)
+#define TC_RFP_ch(ch) _MCMAIN_C(0x4294, ch)
+#define TC_RFTP_ch(ch) _MCMAIN_C(0x4298, ch)
#define MC_INIT_STATE_ch(ch) _MCMAIN_C(0x42a0, ch)
+#define TC_SRFTP_ch(ch) _MCMAIN_C(0x42a4, ch)
/* MCMAIN broadcast */
#define MCSCHEDS_CBIT 0x4c20
--
2.39.5

View File

@ -0,0 +1,267 @@
From e95c52ef0871bc8b57d7e8e560b2e139d9e7112e Mon Sep 17 00:00:00 2001
From: Angel Pons <th3fanbus@gmail.com>
Date: Sat, 7 May 2022 21:24:50 +0200
Subject: [PATCH 12/24] haswell NRI: Program memory map
This is very similar to Sandy/Ivy Bridge, except that there's several
registers to program in GDXCBAR. One of these GDXCBAR registers has a
lock bit that must be set in order for the memory controller to allow
normal access to DRAM. And it took me four months to realize this one
bit was the only reason why native raminit did not work.
Change-Id: I3af73a018a7ba948701a542e661e7fefd57591fe
Signed-off-by: Angel Pons <th3fanbus@gmail.com>
Reviewed-on: https://review.coreboot.org/c/coreboot/+/64188
Reviewed-by: Maximilian Brune <maximilian.brune@9elements.com>
Reviewed-by: Patrick Rudolph <patrick.rudolph@9elements.com>
Tested-by: build bot (Jenkins) <no-reply@coreboot.org>
---
.../intel/haswell/native_raminit/Makefile.mk | 1 +
.../intel/haswell/native_raminit/memory_map.c | 183 ++++++++++++++++++
.../haswell/native_raminit/raminit_main.c | 1 +
.../haswell/native_raminit/raminit_native.h | 1 +
.../intel/haswell/registers/host_bridge.h | 2 +
5 files changed, 188 insertions(+)
create mode 100644 src/northbridge/intel/haswell/native_raminit/memory_map.c
diff --git a/src/northbridge/intel/haswell/native_raminit/Makefile.mk b/src/northbridge/intel/haswell/native_raminit/Makefile.mk
index fc55277a65..37d527e972 100644
--- a/src/northbridge/intel/haswell/native_raminit/Makefile.mk
+++ b/src/northbridge/intel/haswell/native_raminit/Makefile.mk
@@ -4,6 +4,7 @@ romstage-y += configure_mc.c
romstage-y += lookup_timings.c
romstage-y += init_mpll.c
romstage-y += io_comp_control.c
+romstage-y += memory_map.c
romstage-y += raminit_main.c
romstage-y += raminit_native.c
romstage-y += spd_bitmunching.c
diff --git a/src/northbridge/intel/haswell/native_raminit/memory_map.c b/src/northbridge/intel/haswell/native_raminit/memory_map.c
new file mode 100644
index 0000000000..e3aded2b37
--- /dev/null
+++ b/src/northbridge/intel/haswell/native_raminit/memory_map.c
@@ -0,0 +1,183 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include <device/pci_ops.h>
+#include <northbridge/intel/haswell/haswell.h>
+#include <southbridge/intel/lynxpoint/me.h>
+#include <types.h>
+
+#include "raminit_native.h"
+
+/* GDXCBAR */
+#define MPCOHTRK_GDXC_MOT_ADDRESS_LO 0x10
+#define MPCOHTRK_GDXC_MOT_ADDRESS_HI 0x14
+#define MPCOHTRK_GDXC_MOT_REGION 0x18
+
+#define MPCOHTRK_GDXC_OCLA_ADDRESS_LO 0x20
+#define MPCOHTRK_GDXC_OCLA_ADDRESS_HI 0x24
+#define MPCOHTRK_GDXC_OCLA_REGION 0x28
+
+/* This lock bit made me lose what little sanity I had left. - Angel Pons */
+#define MPCOHTRK_GDXC_OCLA_ADDRESS_HI_LOCK BIT(2)
+
+static inline uint32_t gdxcbar_read32(const uintptr_t offset)
+{
+ return read32p((mchbar_read32(GDXCBAR) & ~1) + offset);
+}
+
+static inline void gdxcbar_write32(const uintptr_t offset, const uint32_t value)
+{
+ write32p((mchbar_read32(GDXCBAR) & ~1) + offset, value);
+}
+
+static inline void gdxcbar_clrsetbits32(const uintptr_t offset, uint32_t clear, uint32_t set)
+{
+ const uintptr_t address = (mchbar_read32(GDXCBAR) & ~1) + offset;
+ clrsetbits32((void *)address, clear, set);
+}
+
+#define gdxcbar_setbits32(offset, set) gdxcbar_clrsetbits32(offset, 0, set)
+#define gdxcbar_clrbits32(offset, clear) gdxcbar_clrsetbits32(offset, clear, 0)
+
+/* All values stored in here (except the bool) are specified in MiB */
+struct memory_map_data {
+ uint32_t dpr_size;
+ uint32_t tseg_size;
+ uint32_t gtt_size;
+ uint32_t gms_size;
+ uint32_t me_stolen_size;
+ uint32_t mmio_size;
+ uint32_t touud;
+ uint32_t remaplimit;
+ uint32_t remapbase;
+ uint32_t tom;
+ uint32_t tom_minus_me;
+ uint32_t tolud;
+ uint32_t bdsm_base;
+ uint32_t gtt_base;
+ uint32_t tseg_base;
+ bool reclaim_possible;
+};
+
+static void compute_memory_map(struct memory_map_data *map)
+{
+ map->tom_minus_me = map->tom - map->me_stolen_size;
+
+ /*
+ * MMIO size will actually be slightly smaller than computed,
+ * but matches what MRC does and is more MTRR-friendly given
+ * that TSEG is treated as WB, but SMRR makes TSEG UC anyway.
+ */
+ const uint32_t mmio_size = MIN(map->tom_minus_me, 4096) / 2;
+ map->gtt_base = ALIGN_DOWN(mmio_size, map->tseg_size);
+ map->tseg_base = map->gtt_base - map->tseg_size;
+ map->bdsm_base = map->gtt_base + map->gtt_size;
+ map->tolud = map->bdsm_base + map->gms_size;
+ map->reclaim_possible = map->tom_minus_me > map->tolud;
+
+ if (map->reclaim_possible) {
+ map->remapbase = MAX(4096, map->tom_minus_me);
+ map->touud = MIN(4096, map->tom_minus_me) + map->remapbase - map->tolud;
+ map->remaplimit = map->touud - 1;
+ } else {
+ map->remapbase = 0;
+ map->remaplimit = 0;
+ map->touud = map->tom_minus_me;
+ }
+}
+
+static void display_memory_map(const struct memory_map_data *map)
+{
+ if (!CONFIG(DEBUG_RAM_SETUP))
+ return;
+
+ printk(BIOS_DEBUG, "============ MEMORY MAP ============\n");
+ printk(BIOS_DEBUG, "\n");
+ printk(BIOS_DEBUG, "dpr_size = %u MiB\n", map->dpr_size);
+ printk(BIOS_DEBUG, "tseg_size = %u MiB\n", map->tseg_size);
+ printk(BIOS_DEBUG, "gtt_size = %u MiB\n", map->gtt_size);
+ printk(BIOS_DEBUG, "gms_size = %u MiB\n", map->gms_size);
+ printk(BIOS_DEBUG, "me_stolen_size = %u MiB\n", map->me_stolen_size);
+ printk(BIOS_DEBUG, "\n");
+ printk(BIOS_DEBUG, "touud = %u MiB\n", map->touud);
+ printk(BIOS_DEBUG, "remaplimit = %u MiB\n", map->remaplimit);
+ printk(BIOS_DEBUG, "remapbase = %u MiB\n", map->remapbase);
+ printk(BIOS_DEBUG, "tom = %u MiB\n", map->tom);
+ printk(BIOS_DEBUG, "tom_minus_me = %u MiB\n", map->tom_minus_me);
+ printk(BIOS_DEBUG, "tolud = %u MiB\n", map->tolud);
+ printk(BIOS_DEBUG, "bdsm_base = %u MiB\n", map->bdsm_base);
+ printk(BIOS_DEBUG, "gtt_base = %u MiB\n", map->gtt_base);
+ printk(BIOS_DEBUG, "tseg_base = %u MiB\n", map->tseg_base);
+ printk(BIOS_DEBUG, "\n");
+ printk(BIOS_DEBUG, "reclaim_possible = %s\n", map->reclaim_possible ? "Yes" : "No");
+}
+
+static void map_write_reg64(const uint16_t reg, const uint64_t size)
+{
+ const uint64_t value = size << 20;
+ pci_write_config32(HOST_BRIDGE, reg + 4, value >> 32);
+ pci_write_config32(HOST_BRIDGE, reg + 0, value >> 0);
+}
+
+static void map_write_reg32(const uint16_t reg, const uint32_t size)
+{
+ const uint32_t value = size << 20;
+ pci_write_config32(HOST_BRIDGE, reg, value);
+}
+
+static void program_memory_map(const struct memory_map_data *map)
+{
+ map_write_reg64(TOUUD, map->touud);
+ map_write_reg64(TOM, map->tom);
+ if (map->reclaim_possible) {
+ map_write_reg64(REMAPBASE, map->remapbase);
+ map_write_reg64(REMAPLIMIT, map->remaplimit);
+ }
+ if (map->me_stolen_size) {
+ map_write_reg64(MESEG_LIMIT, 0x80000 - map->me_stolen_size);
+ map_write_reg64(MESEG_BASE, map->tom_minus_me);
+ pci_or_config32(HOST_BRIDGE, MESEG_LIMIT, ME_STLEN_EN);
+ }
+ map_write_reg32(TOLUD, map->tolud);
+ map_write_reg32(BDSM, map->bdsm_base);
+ map_write_reg32(BGSM, map->gtt_base);
+ map_write_reg32(TSEG, map->tseg_base);
+
+ const uint32_t dpr_reg = map->tseg_base << 20 | map->dpr_size << 4;
+ pci_write_config32(HOST_BRIDGE, DPR, dpr_reg);
+
+ const uint16_t gfx_stolen_size = GGC_IGD_MEM_IN_32MB_UNITS(map->gms_size / 32);
+ const uint16_t ggc = map->gtt_size << 8 | gfx_stolen_size;
+ pci_write_config16(HOST_BRIDGE, GGC, ggc);
+
+ /** TODO: Do not hardcode these? GDXC has weird alignment requirements, though. **/
+ gdxcbar_write32(MPCOHTRK_GDXC_MOT_ADDRESS_LO, 0);
+ gdxcbar_write32(MPCOHTRK_GDXC_MOT_ADDRESS_HI, 0);
+ gdxcbar_write32(MPCOHTRK_GDXC_MOT_REGION, 0);
+
+ gdxcbar_write32(MPCOHTRK_GDXC_OCLA_ADDRESS_LO, 0);
+ gdxcbar_write32(MPCOHTRK_GDXC_OCLA_ADDRESS_HI, 0);
+ gdxcbar_write32(MPCOHTRK_GDXC_OCLA_REGION, 0);
+
+ gdxcbar_setbits32(MPCOHTRK_GDXC_OCLA_ADDRESS_HI, MPCOHTRK_GDXC_OCLA_ADDRESS_HI_LOCK);
+}
+
+enum raminit_status configure_memory_map(struct sysinfo *ctrl)
+{
+ struct memory_map_data memory_map = {
+ .tom = ctrl->channel_size_mb[0] + ctrl->channel_size_mb[1],
+ .dpr_size = CONFIG_INTEL_TXT_DPR_SIZE,
+ .tseg_size = CONFIG_SMM_TSEG_SIZE >> 20,
+ .me_stolen_size = intel_early_me_uma_size(),
+ };
+ /** FIXME: MRC hardcodes iGPU parameters, but we should not **/
+ const bool igpu_on = pci_read_config32(HOST_BRIDGE, DEVEN) & DEVEN_D2EN;
+ if (CONFIG(ONBOARD_VGA_IS_PRIMARY) || igpu_on) {
+ memory_map.gtt_size = 2;
+ memory_map.gms_size = 64;
+ pci_or_config32(HOST_BRIDGE, DEVEN, DEVEN_D2EN);
+ }
+ compute_memory_map(&memory_map);
+ display_memory_map(&memory_map);
+ program_memory_map(&memory_map);
+ return 0;
+}
diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_main.c b/src/northbridge/intel/haswell/native_raminit/raminit_main.c
index a13ae5641e..a697343ef9 100644
--- a/src/northbridge/intel/haswell/native_raminit/raminit_main.c
+++ b/src/northbridge/intel/haswell/native_raminit/raminit_main.c
@@ -24,6 +24,7 @@ static const struct task_entry cold_boot[] = {
{ initialise_mpll, true, "INITMPLL", },
{ convert_timings, true, "CONVTIM", },
{ configure_mc, true, "CONFMC", },
+ { configure_memory_map, true, "MEMMAP", },
};
/* Return a generic stepping value to make stepping checks simpler */
diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.h b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
index 5915a2bab0..8f937c4ccd 100644
--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.h
+++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
@@ -203,6 +203,7 @@ enum raminit_status collect_spd_info(struct sysinfo *ctrl);
enum raminit_status initialise_mpll(struct sysinfo *ctrl);
enum raminit_status convert_timings(struct sysinfo *ctrl);
enum raminit_status configure_mc(struct sysinfo *ctrl);
+enum raminit_status configure_memory_map(struct sysinfo *ctrl);
void configure_timings(struct sysinfo *ctrl);
void configure_refresh(struct sysinfo *ctrl);
diff --git a/src/northbridge/intel/haswell/registers/host_bridge.h b/src/northbridge/intel/haswell/registers/host_bridge.h
index 1ee0ab2890..0228cf6bb9 100644
--- a/src/northbridge/intel/haswell/registers/host_bridge.h
+++ b/src/northbridge/intel/haswell/registers/host_bridge.h
@@ -34,6 +34,8 @@
#define MESEG_BASE 0x70 /* Management Engine Base */
#define MESEG_LIMIT 0x78 /* Management Engine Limit */
+#define MELCK (1 << 10) /* ME Range Lock */
+#define ME_STLEN_EN (1 << 11) /* ME Stolen Memory Enable */
#define PAM0 0x80
#define PAM1 0x81
--
2.39.5

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,396 @@
From 9d0897a0bc98e8b400c88b2a099c2ffe8d21b8e6 Mon Sep 17 00:00:00 2001
From: Angel Pons <th3fanbus@gmail.com>
Date: Sat, 7 May 2022 23:12:18 +0200
Subject: [PATCH 14/24] haswell NRI: Add pre-training steps
Implement pre-training steps, which consist of enabling ECC I/O and
filling the WDB (Write Data Buffer, stores test patterns) through a
magic LDAT port.
Change-Id: Ie2e09e3b218c4569ed8de5c5e1b05d491032e0f1
Signed-off-by: Angel Pons <th3fanbus@gmail.com>
Reviewed-on: https://review.coreboot.org/c/coreboot/+/64190
Reviewed-by: Alicja Michalska <ahplka19@gmail.com>
Reviewed-by: Maximilian Brune <maximilian.brune@9elements.com>
Tested-by: build bot (Jenkins) <no-reply@coreboot.org>
---
.../intel/haswell/native_raminit/Makefile.mk | 1 +
.../haswell/native_raminit/raminit_main.c | 35 ++++
.../haswell/native_raminit/raminit_native.h | 24 +++
.../haswell/native_raminit/reg_structs.h | 45 +++++
.../intel/haswell/native_raminit/setup_wdb.c | 159 ++++++++++++++++++
.../intel/haswell/registers/mchbar.h | 9 +
6 files changed, 273 insertions(+)
create mode 100644 src/northbridge/intel/haswell/native_raminit/setup_wdb.c
diff --git a/src/northbridge/intel/haswell/native_raminit/Makefile.mk b/src/northbridge/intel/haswell/native_raminit/Makefile.mk
index e9212df9e6..8d7d4e4db0 100644
--- a/src/northbridge/intel/haswell/native_raminit/Makefile.mk
+++ b/src/northbridge/intel/haswell/native_raminit/Makefile.mk
@@ -10,5 +10,6 @@ romstage-y += memory_map.c
romstage-y += raminit_main.c
romstage-y += raminit_native.c
romstage-y += reut.c
+romstage-y += setup_wdb.c
romstage-y += spd_bitmunching.c
romstage-y += timings_refresh.c
diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_main.c b/src/northbridge/intel/haswell/native_raminit/raminit_main.c
index 137a45007d..cbc64315cd 100644
--- a/src/northbridge/intel/haswell/native_raminit/raminit_main.c
+++ b/src/northbridge/intel/haswell/native_raminit/raminit_main.c
@@ -3,6 +3,7 @@
#include <assert.h>
#include <console/console.h>
#include <cpu/intel/haswell/haswell.h>
+#include <delay.h>
#include <device/pci_ops.h>
#include <northbridge/intel/haswell/chip.h>
#include <northbridge/intel/haswell/haswell.h>
@@ -13,6 +14,39 @@
#include "raminit_native.h"
+static enum raminit_status pre_training(struct sysinfo *ctrl)
+{
+ /* Skip on S3 resume */
+ if (ctrl->bootmode == BOOTMODE_S3)
+ return RAMINIT_STATUS_SUCCESS;
+
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ for (uint8_t slot = 0; slot < NUM_SLOTS; slot++) {
+ if (!rank_in_ch(ctrl, slot + slot, channel))
+ continue;
+
+ printk(RAM_DEBUG, "C%uS%u:\n", channel, slot);
+ printk(RAM_DEBUG, "\tMR0: 0x%04x\n", ctrl->mr0[channel][slot]);
+ printk(RAM_DEBUG, "\tMR1: 0x%04x\n", ctrl->mr1[channel][slot]);
+ printk(RAM_DEBUG, "\tMR2: 0x%04x\n", ctrl->mr2[channel][slot]);
+ printk(RAM_DEBUG, "\tMR3: 0x%04x\n", ctrl->mr3[channel][slot]);
+ printk(RAM_DEBUG, "\n");
+ }
+ if (ctrl->is_ecc) {
+ union mad_dimm_reg mad_dimm = {
+ .raw = mchbar_read32(MAD_DIMM(channel)),
+ };
+ /* Enable ECC I/O */
+ mad_dimm.ecc_mode = 1;
+ mchbar_write32(MAD_DIMM(channel), mad_dimm.raw);
+ /* Wait 4 usec after enabling the ECC I/O, needed by HW */
+ udelay(4);
+ }
+ }
+ setup_wdb(ctrl);
+ return RAMINIT_STATUS_SUCCESS;
+}
+
struct task_entry {
enum raminit_status (*task)(struct sysinfo *);
bool is_enabled;
@@ -26,6 +60,7 @@ static const struct task_entry cold_boot[] = {
{ configure_mc, true, "CONFMC", },
{ configure_memory_map, true, "MEMMAP", },
{ do_jedec_init, true, "JEDECINIT", },
+ { pre_training, true, "PRETRAIN", },
};
/* Return a generic stepping value to make stepping checks simpler */
diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.h b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
index 759d755d6d..4d9487d79c 100644
--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.h
+++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
@@ -36,6 +36,13 @@
#define RTTNOM_MASK (BIT(9) | BIT(6) | BIT(2))
+#define BASIC_VA_PAT_SPREAD_8 0x01010101
+
+#define WDB_CACHE_LINE_SIZE 8
+
+#define NUM_WDB_CL_MUX_SEEDS 3
+#define NUM_CADB_MUX_SEEDS 3
+
/* ZQ calibration types */
enum {
ZQ_INIT, /* DDR3: ZQCL with tZQinit, LPDDR3: ZQ Init with tZQinit */
@@ -317,6 +324,23 @@ void reut_issue_mrs_all(
enum raminit_status reut_issue_zq(struct sysinfo *ctrl, uint8_t chanmask, uint8_t zq_type);
+void write_wdb_fixed_pat(
+ const struct sysinfo *ctrl,
+ const uint8_t patterns[],
+ const uint8_t pat_mask[],
+ uint8_t spread,
+ uint16_t start);
+
+void write_wdb_va_pat(
+ const struct sysinfo *ctrl,
+ uint32_t agg_mask,
+ uint32_t vic_mask,
+ uint8_t vic_rot,
+ uint16_t start);
+
+void program_wdb_lfsr(const struct sysinfo *ctrl, bool cleanup);
+void setup_wdb(const struct sysinfo *ctrl);
+
uint8_t get_rx_bias(const struct sysinfo *ctrl);
uint8_t get_tCWL(uint32_t mem_clock_mhz);
diff --git a/src/northbridge/intel/haswell/native_raminit/reg_structs.h b/src/northbridge/intel/haswell/native_raminit/reg_structs.h
index 9929f617fe..7aa8d8c8b2 100644
--- a/src/northbridge/intel/haswell/native_raminit/reg_structs.h
+++ b/src/northbridge/intel/haswell/native_raminit/reg_structs.h
@@ -335,6 +335,18 @@ union mcscheds_cbit_reg {
uint32_t raw;
};
+union reut_pat_cl_mux_lmn_reg {
+ struct __packed {
+ uint32_t l_data_select : 1; // Bits 0:0
+ uint32_t en_sweep_freq : 1; // Bits 1:1
+ uint32_t : 6; // Bits 7:2
+ uint32_t l_counter : 8; // Bits 15:8
+ uint32_t m_counter : 8; // Bits 23:16
+ uint32_t n_counter : 8; // Bits 31:24
+ };
+ uint32_t raw;
+};
+
union reut_pat_cadb_prog_reg {
struct __packed {
uint32_t addr : 16; // Bits 15:0
@@ -439,6 +451,39 @@ union reut_misc_odt_ctrl_reg {
uint32_t raw;
};
+union ldat_pdat_reg {
+ struct __packed {
+ uint32_t fast_addr : 12; // Bits 11:0
+ uint32_t : 4; // Bits 15:12
+ uint32_t addr_en : 1; // Bits 16:16
+ uint32_t seq_en : 1; // Bits 17:17
+ uint32_t pol_0 : 1; // Bits 18:18
+ uint32_t pol_1 : 1; // Bits 19:19
+ uint32_t cmd_a : 4; // Bits 23:20
+ uint32_t cmd_b : 4; // Bits 27:24
+ uint32_t cmd_c : 4; // Bits 31:28
+ };
+ uint32_t raw;
+};
+
+union ldat_sdat_reg {
+ struct __packed {
+ uint32_t bank_sel : 4; // Bits 3:0
+ uint32_t : 1; // Bits 4:4
+ uint32_t array_sel : 5; // Bits 9:5
+ uint32_t cmp : 1; // Bits 10:10
+ uint32_t replicate : 1; // Bits 11:11
+ uint32_t dword : 4; // Bits 15:12
+ uint32_t mode : 2; // Bits 17:16
+ uint32_t mpmap : 6; // Bits 23:18
+ uint32_t mpb_offset : 4; // Bits 27:24
+ uint32_t stage_en : 1; // Bits 28:28
+ uint32_t shadow : 2; // Bits 30:29
+ uint32_t : 1; // Bits 31:31
+ };
+ uint32_t raw;
+};
+
union mcscheds_dft_misc_reg {
struct __packed {
uint32_t wdar : 1; // Bits 0:0
diff --git a/src/northbridge/intel/haswell/native_raminit/setup_wdb.c b/src/northbridge/intel/haswell/native_raminit/setup_wdb.c
new file mode 100644
index 0000000000..ec37c48415
--- /dev/null
+++ b/src/northbridge/intel/haswell/native_raminit/setup_wdb.c
@@ -0,0 +1,159 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include <northbridge/intel/haswell/haswell.h>
+#include <types.h>
+
+#include "raminit_native.h"
+
+static void ldat_write_cacheline(
+ const struct sysinfo *const ctrl,
+ const uint8_t chunk,
+ const uint16_t start,
+ const uint64_t data)
+{
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!does_ch_exist(ctrl, channel))
+ continue;
+
+ /*
+ * Do not do a 64-bit write here. The register is not aligned
+ * to a 64-bit boundary, which could potentially cause issues.
+ */
+ mchbar_write32(QCLK_ch_LDAT_DATA_IN_x(channel, 0), data & UINT32_MAX);
+ mchbar_write32(QCLK_ch_LDAT_DATA_IN_x(channel, 1), data >> 32);
+ /*
+ * Set REPLICATE = 0 as you don't want to replicate the data.
+ * Set BANK_SEL to the chunk you want to write the 64 bits to.
+ * Set ARRAY_SEL = 0 (the MC WDB) and MODE = 1.
+ */
+ const union ldat_sdat_reg ldat_sdat = {
+ .bank_sel = chunk,
+ .mode = 1,
+ };
+ mchbar_write32(QCLK_ch_LDAT_SDAT(channel), ldat_sdat.raw);
+ /*
+ * Finally, write the PDAT register indicating which cacheline
+ * of the WDB you want to write to by setting FAST_ADDR field
+ * to one of the 64 cache lines. Also set CMD_B in the PDAT
+ * register to 4'b1000, indicating that this is a LDAT write.
+ */
+ const union ldat_pdat_reg ldat_pdat = {
+ .fast_addr = MIN(start, 0xfff),
+ .cmd_b = 8,
+ };
+ mchbar_write32(QCLK_ch_LDAT_PDAT(channel), ldat_pdat.raw);
+ }
+}
+
+static void clear_ldat_mode(const struct sysinfo *const ctrl)
+{
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++)
+ mchbar_write32(QCLK_ch_LDAT_SDAT(channel), 0);
+}
+
+void write_wdb_fixed_pat(
+ const struct sysinfo *const ctrl,
+ const uint8_t patterns[],
+ const uint8_t pat_mask[],
+ const uint8_t spread,
+ const uint16_t start)
+{
+ for (uint8_t chunk = 0; chunk < WDB_CACHE_LINE_SIZE; chunk++) {
+ uint64_t data = 0;
+ for (uint8_t b = 0; b < 64; b++) {
+ const uint8_t beff = b % spread;
+ const uint8_t burst = patterns[pat_mask[beff]];
+ if (burst & BIT(chunk))
+ data |= 1ULL << b;
+ }
+ ldat_write_cacheline(ctrl, chunk, start, data);
+ }
+ clear_ldat_mode(ctrl);
+}
+
+static inline uint32_t rol_u32(const uint32_t val)
+{
+ return (val << 1) | ((val >> 31) & 1);
+}
+
+void write_wdb_va_pat(
+ const struct sysinfo *const ctrl,
+ const uint32_t agg_mask,
+ const uint32_t vic_mask,
+ const uint8_t vic_rot,
+ const uint16_t start)
+{
+ static const uint8_t va_mask_to_compressed[4] = {0xaa, 0xc0, 0xcc, 0xf0};
+ uint32_t v_mask = vic_mask;
+ uint32_t a_mask = agg_mask;
+ for (uint8_t v = 0; v < vic_rot; v++) {
+ uint8_t compressed[32] = {0};
+ /* Iterate through all 32 bits and create a compressed version of cacheline */
+ for (uint8_t b = 0; b < ARRAY_SIZE(compressed); b++) {
+ const uint8_t vic = !!(v_mask & BIT(b));
+ const uint8_t agg = !!(a_mask & BIT(b));
+ const uint8_t index = !vic << 1 | agg << 0;
+ compressed[b] = va_mask_to_compressed[index];
+ }
+ for (uint8_t chunk = 0; chunk < WDB_CACHE_LINE_SIZE; chunk++) {
+ uint32_t data = 0;
+ for (uint8_t b = 0; b < ARRAY_SIZE(compressed); b++)
+ data |= !!(compressed[b] & BIT(chunk)) << b;
+
+ const uint64_t data64 = (uint64_t)data << 32 | data;
+ ldat_write_cacheline(ctrl, chunk, start + v, data64);
+ }
+ v_mask = rol_u32(v_mask);
+ a_mask = rol_u32(a_mask);
+ }
+ clear_ldat_mode(ctrl);
+}
+
+void program_wdb_lfsr(const struct sysinfo *ctrl, const bool cleanup)
+{
+ /* Cleanup LFSR seeds are sequential */
+ const uint32_t cleanup_seeds[NUM_WDB_CL_MUX_SEEDS] = { 0xaaaaaa, 0xcccccc, 0xf0f0f0 };
+ const uint32_t regular_seeds[NUM_WDB_CL_MUX_SEEDS] = { 0xa10ca1, 0xef0d08, 0xad0a1e };
+ const uint32_t *seeds = cleanup ? cleanup_seeds : regular_seeds;
+
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!does_ch_exist(ctrl, channel))
+ continue;
+
+ for (uint8_t i = 0; i < NUM_WDB_CL_MUX_SEEDS; i++) {
+ mchbar_write32(REUT_ch_PAT_WDB_CL_MUX_RD_x(channel, i), seeds[i]);
+ mchbar_write32(REUT_ch_PAT_WDB_CL_MUX_WR_x(channel, i), seeds[i]);
+ }
+ }
+}
+
+void setup_wdb(const struct sysinfo *ctrl)
+{
+ const uint32_t amask[9] = {
+ 0x86186186, 0x18618618, 0x30c30c30,
+ 0xa28a28a2, 0x8a28a28a, 0x14514514,
+ 0x28a28a28, 0x92492492, 0x24924924,
+ };
+ const uint32_t vmask = 0x41041041;
+
+ /* Fill first 8 entries with simple 2-LFSR VA pattern */
+ write_wdb_va_pat(ctrl, 0, BASIC_VA_PAT_SPREAD_8, 8, 0);
+
+ /* Fill next 54 entries with 3-LFSR VA pattern */
+ for (uint8_t a = 0; a < ARRAY_SIZE(amask); a++)
+ write_wdb_va_pat(ctrl, amask[a], vmask, 6, 8 + a * 6);
+
+ program_wdb_lfsr(ctrl, false);
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!does_ch_exist(ctrl, channel))
+ continue;
+
+ const union reut_pat_cl_mux_lmn_reg wdb_cl_mux_lmn = {
+ .en_sweep_freq = 1,
+ .l_counter = 1,
+ .m_counter = 1,
+ .n_counter = 10,
+ };
+ mchbar_write32(REUT_ch_PAT_WDB_CL_MUX_LMN(channel), wdb_cl_mux_lmn.raw);
+ }
+}
diff --git a/src/northbridge/intel/haswell/registers/mchbar.h b/src/northbridge/intel/haswell/registers/mchbar.h
index 4fc78a7f43..f8408e51a0 100644
--- a/src/northbridge/intel/haswell/registers/mchbar.h
+++ b/src/northbridge/intel/haswell/registers/mchbar.h
@@ -94,6 +94,11 @@
#define TC_BANK_RANK_D_ch(ch) _MCMAIN_C(0x4014, ch)
#define SC_ROUNDT_LAT_ch(ch) _MCMAIN_C(0x4024, ch)
+#define REUT_ch_PAT_WDB_CL_MUX_WR_x(ch, x) _MCMAIN_C_X(0x4048, ch, x) /* x in 0 .. 2 */
+#define REUT_ch_PAT_WDB_CL_MUX_RD_x(ch, x) _MCMAIN_C_X(0x4054, ch, x) /* x in 0 .. 2 */
+
+#define REUT_ch_PAT_WDB_CL_MUX_LMN(ch) _MCMAIN_C(0x4078, ch)
+
#define SC_WR_ADD_DELAY_ch(ch) _MCMAIN_C(0x40d0, ch)
#define REUT_ch_MISC_CKE_CTRL(ch) _MCMAIN_C(0x4190, ch)
@@ -110,6 +115,10 @@
#define MC_INIT_STATE_ch(ch) _MCMAIN_C(0x42a0, ch)
#define TC_SRFTP_ch(ch) _MCMAIN_C(0x42a4, ch)
+#define QCLK_ch_LDAT_PDAT(ch) _MCMAIN_C(0x42d0, ch)
+#define QCLK_ch_LDAT_SDAT(ch) _MCMAIN_C(0x42d4, ch)
+#define QCLK_ch_LDAT_DATA_IN_x(ch, x) _MCMAIN_C_X(0x42dc, ch, x) /* x in 0 .. 1 */
+
#define REUT_GLOBAL_ERR 0x4804
#define REUT_ch_SEQ_CFG(ch) (0x48a8 + 8 * (ch))
--
2.39.5

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,226 @@
From b1ec1bee7e469eaad764419a781982c0a4e4a68f Mon Sep 17 00:00:00 2001
From: Angel Pons <th3fanbus@gmail.com>
Date: Sun, 8 May 2022 00:56:00 +0200
Subject: [PATCH 16/24] haswell NRI: Add range tracking library
Implement a small library used to keep track of passing ranges. This
will be used by 1D training algorithms when margining some parameter.
Change-Id: I8718e85165160afd7c0c8e730b5ce6c9c00f8a60
Signed-off-by: Angel Pons <th3fanbus@gmail.com>
Reviewed-on: https://review.coreboot.org/c/coreboot/+/64192
Reviewed-by: Alicja Michalska <ahplka19@gmail.com>
Reviewed-by: Maximilian Brune <maximilian.brune@9elements.com>
Tested-by: build bot (Jenkins) <no-reply@coreboot.org>
---
.../intel/haswell/native_raminit/Makefile.mk | 1 +
.../intel/haswell/native_raminit/ranges.c | 109 ++++++++++++++++++
.../intel/haswell/native_raminit/ranges.h | 68 +++++++++++
3 files changed, 178 insertions(+)
create mode 100644 src/northbridge/intel/haswell/native_raminit/ranges.c
create mode 100644 src/northbridge/intel/haswell/native_raminit/ranges.h
diff --git a/src/northbridge/intel/haswell/native_raminit/Makefile.mk b/src/northbridge/intel/haswell/native_raminit/Makefile.mk
index 6e1b365602..2da950771d 100644
--- a/src/northbridge/intel/haswell/native_raminit/Makefile.mk
+++ b/src/northbridge/intel/haswell/native_raminit/Makefile.mk
@@ -9,6 +9,7 @@ romstage-y += io_comp_control.c
romstage-y += memory_map.c
romstage-y += raminit_main.c
romstage-y += raminit_native.c
+romstage-y += ranges.c
romstage-y += reut.c
romstage-y += setup_wdb.c
romstage-y += spd_bitmunching.c
diff --git a/src/northbridge/intel/haswell/native_raminit/ranges.c b/src/northbridge/intel/haswell/native_raminit/ranges.c
new file mode 100644
index 0000000000..cdebc1fa66
--- /dev/null
+++ b/src/northbridge/intel/haswell/native_raminit/ranges.c
@@ -0,0 +1,109 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include <types.h>
+
+#include "ranges.h"
+
+void linear_record_pass(
+ struct linear_train_data *const data,
+ const bool pass,
+ const int32_t value,
+ const int32_t start,
+ const int32_t step)
+{
+ /* If this is the first time, initialize all values */
+ if (value == start) {
+ /*
+ * If value passed, create a zero-length region for the current value,
+ * which may be extended as long as the successive values are passing.
+ *
+ * Otherwise, create a zero-length range for the preceding value. This
+ * range cannot be extended by other passing values, which is desired.
+ */
+ data->current.start = start - (pass ? 0 : step);
+ data->current.end = data->current.start;
+ data->largest = data->current;
+ } else if (pass) {
+ /* If this pass is not contiguous, it belongs to a new region */
+ if (data->current.end != (value - step))
+ data->current.start = value;
+
+ /* Update end of current region */
+ data->current.end = value;
+
+ /* Update largest region */
+ if (range_width(data->current) > range_width(data->largest))
+ data->largest = data->current;
+ }
+}
+
+void phase_record_pass(
+ struct phase_train_data *const data,
+ const bool pass,
+ const int32_t value,
+ const int32_t start,
+ const int32_t step)
+{
+ /* If this is the first time, initialize all values */
+ if (value == start) {
+ /*
+ * If value passed, create a zero-length region for the current value,
+ * which may be extended as long as the successive values are passing.
+ *
+ * Otherwise, create a zero-length range for the preceding value. This
+ * range cannot be extended by other passing values, which is desired.
+ */
+ data->current.start = start - (pass ? 0 : step);
+ data->current.end = data->current.start;
+ data->largest = data->current;
+ data->initial = data->current;
+ return;
+ }
+ if (!pass)
+ return;
+
+ /* Update initial region */
+ if (data->initial.end == (value - step))
+ data->initial.end = value;
+
+ /* If this pass is not contiguous, it belongs to a new region */
+ if (data->current.end != (value - step))
+ data->current.start = value;
+
+ /* Update end of current region */
+ data->current.end = value;
+
+ /* Update largest region */
+ if (range_width(data->current) > range_width(data->largest))
+ data->largest = data->current;
+}
+
+void phase_append_initial_to_current(
+ struct phase_train_data *const data,
+ const int32_t start,
+ const int32_t step)
+{
+ /* If initial region is valid and does not overlap, append it */
+ if (data->initial.start == start && data->initial.end != data->current.end)
+ data->current.end += step + range_width(data->initial);
+
+ /* Update largest region */
+ if (range_width(data->current) > range_width(data->largest))
+ data->largest = data->current;
+}
+
+void phase_append_current_to_initial(
+ struct phase_train_data *const data,
+ const int32_t start,
+ const int32_t step)
+{
+ /* If initial region is valid and does not overlap, append it */
+ if (data->initial.start == start && data->initial.end != data->current.end) {
+ data->initial.start -= (step + range_width(data->current));
+ data->current = data->initial;
+ }
+
+ /* Update largest region */
+ if (range_width(data->current) > range_width(data->largest))
+ data->largest = data->current;
+}
diff --git a/src/northbridge/intel/haswell/native_raminit/ranges.h b/src/northbridge/intel/haswell/native_raminit/ranges.h
new file mode 100644
index 0000000000..235392df96
--- /dev/null
+++ b/src/northbridge/intel/haswell/native_raminit/ranges.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#ifndef HASWELL_RAMINIT_RANGES_H
+#define HASWELL_RAMINIT_RANGES_H
+
+#include <types.h>
+
+/*
+ * Many algorithms shmoo some parameter to determine the largest passing
+ * range. Provide a common implementation to avoid redundant boilerplate.
+ */
+struct passing_range {
+ int32_t start;
+ int32_t end;
+};
+
+/* Structure for linear parameters, such as roundtrip delays */
+struct linear_train_data {
+ struct passing_range current;
+ struct passing_range largest;
+};
+
+/*
+ * Phase ranges are "circular": the first and last indices are contiguous.
+ * To correctly determine the largest passing range, one has to combine
+ * the initial range and the current range when processing the last index.
+ */
+struct phase_train_data {
+ struct passing_range initial;
+ struct passing_range current;
+ struct passing_range largest;
+};
+
+static inline int32_t range_width(const struct passing_range range)
+{
+ return range.end - range.start;
+}
+
+static inline int32_t range_center(const struct passing_range range)
+{
+ return range.start + range_width(range) / 2;
+}
+
+void linear_record_pass(
+ struct linear_train_data *data,
+ bool pass,
+ int32_t value,
+ int32_t start,
+ int32_t step);
+
+void phase_record_pass(
+ struct phase_train_data *data,
+ bool pass,
+ int32_t value,
+ int32_t start,
+ int32_t step);
+
+void phase_append_initial_to_current(
+ struct phase_train_data *data,
+ int32_t start,
+ int32_t step);
+
+void phase_append_current_to_initial(
+ struct phase_train_data *data,
+ int32_t start,
+ int32_t step);
+
+#endif
--
2.39.5

View File

@ -0,0 +1,297 @@
From fc72b9f3d93781c5ccb958e870e49773be92b759 Mon Sep 17 00:00:00 2001
From: Angel Pons <th3fanbus@gmail.com>
Date: Sun, 8 May 2022 01:11:03 +0200
Subject: [PATCH 17/24] haswell NRI: Add library to change margins
Implement a library to change Rx/Tx margins. It will be expanded later.
Change-Id: I0b55aba428d8b4d4e16d2fbdec57235ce3ce8adf
Signed-off-by: Angel Pons <th3fanbus@gmail.com>
Reviewed-on: https://review.coreboot.org/c/coreboot/+/64193
Reviewed-by: Maximilian Brune <maximilian.brune@9elements.com>
Tested-by: build bot (Jenkins) <no-reply@coreboot.org>
---
.../intel/haswell/native_raminit/Makefile.mk | 1 +
.../haswell/native_raminit/change_margin.c | 154 ++++++++++++++++++
.../haswell/native_raminit/raminit_native.h | 50 ++++++
.../intel/haswell/registers/mchbar.h | 9 +
4 files changed, 214 insertions(+)
create mode 100644 src/northbridge/intel/haswell/native_raminit/change_margin.c
diff --git a/src/northbridge/intel/haswell/native_raminit/Makefile.mk b/src/northbridge/intel/haswell/native_raminit/Makefile.mk
index 2da950771d..ebe9e9b762 100644
--- a/src/northbridge/intel/haswell/native_raminit/Makefile.mk
+++ b/src/northbridge/intel/haswell/native_raminit/Makefile.mk
@@ -1,5 +1,6 @@
## SPDX-License-Identifier: GPL-2.0-or-later
+romstage-y += change_margin.c
romstage-y += configure_mc.c
romstage-y += ddr3.c
romstage-y += jedec_reset.c
diff --git a/src/northbridge/intel/haswell/native_raminit/change_margin.c b/src/northbridge/intel/haswell/native_raminit/change_margin.c
new file mode 100644
index 0000000000..055c666eee
--- /dev/null
+++ b/src/northbridge/intel/haswell/native_raminit/change_margin.c
@@ -0,0 +1,154 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include <commonlib/bsd/clamp.h>
+#include <console/console.h>
+#include <delay.h>
+#include <northbridge/intel/haswell/haswell.h>
+#include <timer.h>
+
+#include "raminit_native.h"
+
+void update_rxt(
+ struct sysinfo *ctrl,
+ const uint8_t channel,
+ const uint8_t rank,
+ const uint8_t byte,
+ const enum rxt_subfield subfield,
+ const int32_t value)
+{
+ union ddr_data_rx_train_rank_reg rxt = {
+ .rcven = ctrl->rcven[channel][rank][byte],
+ .dqs_p = ctrl->rxdqsp[channel][rank][byte],
+ .rx_eq = ctrl->rx_eq[channel][rank][byte],
+ .dqs_n = ctrl->rxdqsn[channel][rank][byte],
+ .vref = ctrl->rxvref[channel][rank][byte],
+ };
+ int32_t new_value;
+ switch (subfield) {
+ case RXT_RCVEN:
+ new_value = clamp_s32(0, value, 511);
+ rxt.rcven = new_value;
+ break;
+ case RXT_RXDQS_P:
+ new_value = clamp_s32(0, value, 63);
+ rxt.dqs_p = new_value;
+ break;
+ case RXT_RX_EQ:
+ new_value = clamp_s32(0, value, 31);
+ rxt.rx_eq = new_value;
+ break;
+ case RXT_RXDQS_N:
+ new_value = clamp_s32(0, value, 63);
+ rxt.dqs_n = new_value;
+ break;
+ case RXT_RX_VREF:
+ new_value = clamp_s32(-32, value, 31);
+ rxt.vref = new_value;
+ break;
+ case RXT_RXDQS_BOTH:
+ new_value = clamp_s32(0, value, 63);
+ rxt.dqs_p = new_value;
+ rxt.dqs_n = new_value;
+ break;
+ case RXT_RESTORE:
+ new_value = value;
+ break;
+ default:
+ die("%s: Unhandled subfield index %u\n", __func__, subfield);
+ }
+
+ if (new_value != value) {
+ printk(BIOS_ERR, "%s: Overflow for subfield %u: %d ---> %d\n",
+ __func__, subfield, value, new_value);
+ }
+ mchbar_write32(RX_TRAIN_ch_r_b(channel, rank, byte), rxt.raw);
+ download_regfile(ctrl, channel, false, rank, REG_FILE_USE_RANK, byte, true, false);
+}
+
+void update_txt(
+ struct sysinfo *ctrl,
+ const uint8_t channel,
+ const uint8_t rank,
+ const uint8_t byte,
+ const enum txt_subfield subfield,
+ const int32_t value)
+{
+ union ddr_data_tx_train_rank_reg txt = {
+ .dq_delay = ctrl->tx_dq[channel][rank][byte],
+ .dqs_delay = ctrl->txdqs[channel][rank][byte],
+ .tx_eq = ctrl->tx_eq[channel][rank][byte],
+ };
+ int32_t new_value;
+ switch (subfield) {
+ case TXT_TX_DQ:
+ new_value = clamp_s32(0, value, 511);
+ txt.dq_delay = new_value;
+ break;
+ case TXT_TXDQS:
+ new_value = clamp_s32(0, value, 511);
+ txt.dqs_delay = new_value;
+ break;
+ case TXT_TX_EQ:
+ new_value = clamp_s32(0, value, 63);
+ txt.tx_eq = new_value;
+ break;
+ case TXT_DQDQS_OFF:
+ new_value = value;
+ txt.dqs_delay += new_value;
+ txt.dq_delay += new_value;
+ break;
+ case TXT_RESTORE:
+ new_value = value;
+ break;
+ default:
+ die("%s: Unhandled subfield index %u\n", __func__, subfield);
+ }
+ if (new_value != value) {
+ printk(BIOS_ERR, "%s: Overflow for subfield %u: %d ---> %d\n",
+ __func__, subfield, value, new_value);
+ }
+ mchbar_write32(TX_TRAIN_ch_r_b(channel, rank, byte), txt.raw);
+ download_regfile(ctrl, channel, false, rank, REG_FILE_USE_RANK, byte, false, true);
+}
+
+void download_regfile(
+ struct sysinfo *ctrl,
+ const uint8_t channel,
+ const bool multicast,
+ const uint8_t rank,
+ const enum regfile_mode regfile,
+ const uint8_t byte,
+ const bool read_rf_rd,
+ const bool read_rf_wr)
+{
+ union reut_seq_base_addr_reg reut_seq_base_addr;
+ switch (regfile) {
+ case REG_FILE_USE_START:
+ reut_seq_base_addr.raw = mchbar_read64(REUT_ch_SEQ_ADDR_START(channel));
+ break;
+ case REG_FILE_USE_CURRENT:
+ reut_seq_base_addr.raw = mchbar_read64(REUT_ch_SEQ_ADDR_CURRENT(channel));
+ break;
+ case REG_FILE_USE_RANK:
+ reut_seq_base_addr.raw = 0;
+ if (rank >= NUM_SLOTRANKS)
+ die("%s: bad rank %u\n", __func__, rank);
+ break;
+ default:
+ die("%s: Invalid regfile param %u\n", __func__, regfile);
+ }
+ uint8_t phys_rank = rank;
+ if (reut_seq_base_addr.raw != 0) {
+ /* Map REUT logical rank to physical rank */
+ const uint32_t log_to_phys = mchbar_read32(REUT_ch_RANK_LOG_TO_PHYS(channel));
+ phys_rank = log_to_phys >> (reut_seq_base_addr.rank_addr * 4) & 0x3;
+ }
+ uint32_t reg = multicast ? DDR_DATA_ch_CONTROL_0(channel) : DQ_CONTROL_0(channel, byte);
+ union ddr_data_control_0_reg ddr_data_control_0 = {
+ .raw = mchbar_read32(reg),
+ };
+ ddr_data_control_0.read_rf_rd = read_rf_rd;
+ ddr_data_control_0.read_rf_wr = read_rf_wr;
+ ddr_data_control_0.read_rf_rank = phys_rank;
+ mchbar_write32(reg, ddr_data_control_0.raw);
+}
diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.h b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
index f029e7f076..8707257b27 100644
--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.h
+++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
@@ -118,6 +118,30 @@ enum test_stop {
ALSOE = 3, /* Stop on all lanes error */
};
+enum rxt_subfield {
+ RXT_RCVEN = 0,
+ RXT_RXDQS_P = 1,
+ RXT_RX_EQ = 2,
+ RXT_RXDQS_N = 3,
+ RXT_RX_VREF = 4,
+ RXT_RXDQS_BOTH = 5,
+ RXT_RESTORE = 255,
+};
+
+enum txt_subfield {
+ TXT_TX_DQ = 0,
+ TXT_TXDQS = 1,
+ TXT_TX_EQ = 2,
+ TXT_DQDQS_OFF = 3,
+ TXT_RESTORE = 255,
+};
+
+enum regfile_mode {
+ REG_FILE_USE_RANK, /* Used when changing parameters for each rank */
+ REG_FILE_USE_START, /* Used when changing parameters before the test */
+ REG_FILE_USE_CURRENT, /* Used when changing parameters after the test */
+};
+
struct wdb_pat {
uint32_t start_ptr; /* Starting pointer in WDB */
uint32_t stop_ptr; /* Stopping pointer in WDB */
@@ -451,6 +475,32 @@ uint8_t select_reut_ranks(struct sysinfo *ctrl, uint8_t channel, uint8_t rankmas
void run_mpr_io_test(bool clear_errors);
uint8_t run_io_test(struct sysinfo *ctrl, uint8_t chanmask, uint8_t dq_pat, bool clear_errors);
+void update_rxt(
+ struct sysinfo *ctrl,
+ uint8_t channel,
+ uint8_t rank,
+ uint8_t byte,
+ enum rxt_subfield subfield,
+ int32_t value);
+
+void update_txt(
+ struct sysinfo *ctrl,
+ uint8_t channel,
+ uint8_t rank,
+ uint8_t byte,
+ enum txt_subfield subfield,
+ int32_t value);
+
+void download_regfile(
+ struct sysinfo *ctrl,
+ uint8_t channel,
+ bool multicast,
+ uint8_t rank,
+ enum regfile_mode regfile,
+ uint8_t byte,
+ bool read_rf_rd,
+ bool read_rf_wr);
+
uint8_t get_rx_bias(const struct sysinfo *ctrl);
uint8_t get_tCWL(uint32_t mem_clock_mhz);
diff --git a/src/northbridge/intel/haswell/registers/mchbar.h b/src/northbridge/intel/haswell/registers/mchbar.h
index 817a9f8bf8..a81559bb1e 100644
--- a/src/northbridge/intel/haswell/registers/mchbar.h
+++ b/src/northbridge/intel/haswell/registers/mchbar.h
@@ -15,7 +15,11 @@
/* Register definitions */
/* DDR DATA per-channel per-bytelane */
+#define RX_TRAIN_ch_r_b(ch, rank, byte) _DDRIO_C_R_B(0x0000, ch, rank, byte)
+#define TX_TRAIN_ch_r_b(ch, rank, byte) _DDRIO_C_R_B(0x0020, ch, rank, byte)
+
#define DQ_CONTROL_2(ch, byte) _DDRIO_C_R_B(0x0064, ch, 0, byte)
+#define DQ_CONTROL_0(ch, byte) _DDRIO_C_R_B(0x0074, ch, 0, byte)
/* DDR CKE per-channel */
#define DDR_CKE_ch_CMD_COMP_OFFSET(ch) _DDRIO_C_R_B(0x1204, ch, 0, 0)
@@ -38,6 +42,9 @@
#define DDR_SCRAMBLE_ch(ch) (0x2000 + 4 * (ch))
#define DDR_SCRAM_MISC_CONTROL 0x2008
+/* DDR DATA per-channel multicast */
+#define DDR_DATA_ch_CONTROL_0(ch) _DDRIO_C_R_B(0x3074, ch, 0, 0)
+
/* DDR CMDN/CMDS per-channel (writes go to both CMDN and CMDS fubs) */
#define DDR_CMD_ch_COMP_OFFSET(ch) _DDRIO_C_R_B(0x3204, ch, 0, 0)
#define DDR_CMD_ch_PI_CODING(ch) _DDRIO_C_R_B(0x3208, ch, 0, 0)
@@ -147,6 +154,8 @@
#define REUT_ch_SEQ_ADDR_WRAP(ch) (0x48e8 + 8 * (ch))
+#define REUT_ch_SEQ_ADDR_CURRENT(ch) (0x48f8 + 8 * (ch))
+
#define REUT_ch_SEQ_MISC_CTL(ch) (0x4908 + 4 * (ch))
#define REUT_ch_SEQ_ADDR_INC_CTL(ch) (0x4910 + 8 * (ch))
--
2.39.5

View File

@ -0,0 +1,711 @@
From d434ec3453b8aca6ac77641d94c164f48a93c99b Mon Sep 17 00:00:00 2001
From: Angel Pons <th3fanbus@gmail.com>
Date: Sun, 8 May 2022 00:05:41 +0200
Subject: [PATCH 18/24] haswell NRI: Add RcvEn training
Implement the RcvEn (Receive Enable) calibration procedure.
Change-Id: Ifbfa520f3e0486c56d0988ce67af2ddb9cf29888
Signed-off-by: Angel Pons <th3fanbus@gmail.com>
Reviewed-on: https://review.coreboot.org/c/coreboot/+/64194
Tested-by: build bot (Jenkins) <no-reply@coreboot.org>
Reviewed-by: Maximilian Brune <maximilian.brune@9elements.com>
---
.../intel/haswell/native_raminit/Makefile.mk | 1 +
.../haswell/native_raminit/raminit_main.c | 1 +
.../haswell/native_raminit/raminit_native.h | 14 +
.../haswell/native_raminit/reg_structs.h | 13 +
.../native_raminit/train_receive_enable.c | 561 ++++++++++++++++++
.../intel/haswell/registers/mchbar.h | 3 +
6 files changed, 593 insertions(+)
create mode 100644 src/northbridge/intel/haswell/native_raminit/train_receive_enable.c
diff --git a/src/northbridge/intel/haswell/native_raminit/Makefile.mk b/src/northbridge/intel/haswell/native_raminit/Makefile.mk
index ebe9e9b762..e2fbfb4211 100644
--- a/src/northbridge/intel/haswell/native_raminit/Makefile.mk
+++ b/src/northbridge/intel/haswell/native_raminit/Makefile.mk
@@ -16,3 +16,4 @@ romstage-y += setup_wdb.c
romstage-y += spd_bitmunching.c
romstage-y += testing_io.c
romstage-y += timings_refresh.c
+romstage-y += train_receive_enable.c
diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_main.c b/src/northbridge/intel/haswell/native_raminit/raminit_main.c
index cbc64315cd..97eb957294 100644
--- a/src/northbridge/intel/haswell/native_raminit/raminit_main.c
+++ b/src/northbridge/intel/haswell/native_raminit/raminit_main.c
@@ -61,6 +61,7 @@ static const struct task_entry cold_boot[] = {
{ configure_memory_map, true, "MEMMAP", },
{ do_jedec_init, true, "JEDECINIT", },
{ pre_training, true, "PRETRAIN", },
+ { train_receive_enable, true, "RCVET", },
};
/* Return a generic stepping value to make stepping checks simpler */
diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.h b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
index 8707257b27..eaaaedad1e 100644
--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.h
+++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
@@ -43,6 +43,9 @@
#define NUM_WDB_CL_MUX_SEEDS 3
#define NUM_CADB_MUX_SEEDS 3
+/* Specified in PI ticks. 64 PI ticks == 1 qclk */
+#define tDQSCK_DRIFT 64
+
/* ZQ calibration types */
enum {
ZQ_INIT, /* DDR3: ZQCL with tZQinit, LPDDR3: ZQ Init with tZQinit */
@@ -189,6 +192,7 @@ enum raminit_status {
RAMINIT_STATUS_MPLL_INIT_FAILURE,
RAMINIT_STATUS_POLL_TIMEOUT,
RAMINIT_STATUS_REUT_ERROR,
+ RAMINIT_STATUS_RCVEN_FAILURE,
RAMINIT_STATUS_UNSPECIFIED_ERROR, /** TODO: Deprecated in favor of specific values **/
};
@@ -271,6 +275,10 @@ struct sysinfo {
union ddr_data_vref_adjust_reg dimm_vref;
+ uint8_t io_latency[NUM_CHANNELS][NUM_SLOTRANKS];
+ uint8_t rt_latency[NUM_CHANNELS][NUM_SLOTRANKS];
+ uint32_t rt_io_comp[NUM_CHANNELS];
+
uint32_t data_offset_train[NUM_CHANNELS][NUM_LANES];
uint32_t data_offset_comp[NUM_CHANNELS][NUM_LANES];
@@ -345,6 +353,11 @@ static inline void clear_data_offset_train_all(struct sysinfo *ctrl)
memset(ctrl->data_offset_train, 0, sizeof(ctrl->data_offset_train));
}
+static inline uint32_t get_data_train_feedback(const uint8_t channel, const uint8_t byte)
+{
+ return mchbar_read32(DDR_DATA_TRAIN_FEEDBACK(channel, byte));
+}
+
/* Number of ticks to wait in units of 69.841279 ns (citation needed) */
static inline void tick_delay(const uint32_t delay)
{
@@ -400,6 +413,7 @@ enum raminit_status convert_timings(struct sysinfo *ctrl);
enum raminit_status configure_mc(struct sysinfo *ctrl);
enum raminit_status configure_memory_map(struct sysinfo *ctrl);
enum raminit_status do_jedec_init(struct sysinfo *ctrl);
+enum raminit_status train_receive_enable(struct sysinfo *ctrl);
void configure_timings(struct sysinfo *ctrl);
void configure_refresh(struct sysinfo *ctrl);
diff --git a/src/northbridge/intel/haswell/native_raminit/reg_structs.h b/src/northbridge/intel/haswell/native_raminit/reg_structs.h
index b943259b91..b099f4bb82 100644
--- a/src/northbridge/intel/haswell/native_raminit/reg_structs.h
+++ b/src/northbridge/intel/haswell/native_raminit/reg_structs.h
@@ -297,6 +297,19 @@ union ddr_scram_misc_control_reg {
uint32_t raw;
};
+union sc_io_latency_reg {
+ struct __packed {
+ uint32_t iolat_rank0 : 4; // Bits 3:0
+ uint32_t iolat_rank1 : 4; // Bits 7:4
+ uint32_t iolat_rank2 : 4; // Bits 11:8
+ uint32_t iolat_rank3 : 4; // Bits 15:12
+ uint32_t rt_iocomp : 6; // Bits 21:16
+ uint32_t : 9; // Bits 30:22
+ uint32_t dis_rt_clk_gate : 1; // Bits 31:31
+ };
+ uint32_t raw;
+};
+
union mcscheds_cbit_reg {
struct __packed {
uint32_t dis_opp_cas : 1; // Bits 0:0
diff --git a/src/northbridge/intel/haswell/native_raminit/train_receive_enable.c b/src/northbridge/intel/haswell/native_raminit/train_receive_enable.c
new file mode 100644
index 0000000000..576c6bc21e
--- /dev/null
+++ b/src/northbridge/intel/haswell/native_raminit/train_receive_enable.c
@@ -0,0 +1,561 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include <console/console.h>
+#include <northbridge/intel/haswell/haswell.h>
+#include <types.h>
+
+#include "raminit_native.h"
+#include "ranges.h"
+
+#define RCVEN_PLOT RAM_DEBUG
+
+static enum raminit_status change_rcven_timing(struct sysinfo *ctrl, const uint8_t channel)
+{
+ int16_t max_rcven = -4096;
+ int16_t min_rcven = 4096;
+ int16_t max_rcven_rank[NUM_SLOTRANKS];
+ int16_t min_rcven_rank[NUM_SLOTRANKS];
+ for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) {
+ max_rcven_rank[rank] = max_rcven;
+ min_rcven_rank[rank] = min_rcven;
+ }
+ for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) {
+ if (!rank_in_ch(ctrl, rank, channel))
+ continue;
+
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
+ int16_t new_rcven = ctrl->rcven[channel][rank][byte];
+ new_rcven -= ctrl->io_latency[channel][rank] * 64;
+ if (max_rcven_rank[rank] < new_rcven)
+ max_rcven_rank[rank] = new_rcven;
+
+ if (min_rcven_rank[rank] > new_rcven)
+ min_rcven_rank[rank] = new_rcven;
+ }
+ if (max_rcven < max_rcven_rank[rank])
+ max_rcven = max_rcven_rank[rank];
+
+ if (min_rcven > min_rcven_rank[rank])
+ min_rcven = min_rcven_rank[rank];
+ }
+
+ /*
+ * Determine how far we are from the ideal center point for RcvEn timing.
+ * (PiIdeal - AveRcvEn) / 64 is the ideal number of cycles we should have
+ * for IO latency. command training will reduce this by 64, so plan for
+ * that now in the ideal value. Round to closest integer.
+ */
+ const int16_t rre_pi_ideal = 256 + 64;
+ const int16_t pi_reserve = 64;
+ const int16_t rcven_center = (max_rcven + min_rcven) / 2;
+ const int8_t iolat_target = DIV_ROUND_CLOSEST(rre_pi_ideal - rcven_center, 64);
+
+ int8_t io_g_offset = 0;
+ int8_t io_lat[NUM_SLOTRANKS] = { 0 };
+ for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) {
+ if (!rank_in_ch(ctrl, rank, channel))
+ continue;
+
+ io_lat[rank] = iolat_target;
+
+ /* Check for RcvEn underflow/overflow */
+ const int16_t rcven_lower = 64 * io_lat[rank] + min_rcven_rank[rank];
+ if (rcven_lower < pi_reserve)
+ io_lat[rank] += DIV_ROUND_UP(pi_reserve - rcven_lower, 64);
+
+ const int16_t rcven_upper = 64 * io_lat[rank] + max_rcven_rank[rank];
+ if (rcven_upper > 511 - pi_reserve)
+ io_lat[rank] -= DIV_ROUND_UP(rcven_upper - (511 - pi_reserve), 64);
+
+ /* Check for IO latency over/underflow */
+ if (io_lat[rank] - io_g_offset > 14)
+ io_g_offset = io_lat[rank] - 14;
+
+ if (io_lat[rank] - io_g_offset < 1)
+ io_g_offset = io_lat[rank] - 1;
+
+ const int8_t cycle_offset = io_lat[rank] - ctrl->io_latency[channel][rank];
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
+ ctrl->rcven[channel][rank][byte] += 64 * cycle_offset;
+ update_rxt(ctrl, channel, rank, byte, RXT_RESTORE, 0);
+ }
+ }
+
+ /* Calculate new IO comp latency */
+ union sc_io_latency_reg sc_io_lat = {
+ .raw = mchbar_read32(SC_IO_LATENCY_ch(channel)),
+ };
+
+ /* Check if we are underflowing or overflowing this field */
+ if (io_g_offset < 0 && sc_io_lat.rt_iocomp < -io_g_offset) {
+ printk(BIOS_ERR, "%s: IO COMP underflow\n", __func__);
+ printk(BIOS_ERR, "io_g_offset: %d\n", io_g_offset);
+ printk(BIOS_ERR, "rt_iocomp: %u\n", sc_io_lat.rt_iocomp);
+ return RAMINIT_STATUS_RCVEN_FAILURE;
+ }
+ if (io_g_offset > 0 && io_g_offset > 0x3f - sc_io_lat.rt_iocomp) {
+ printk(BIOS_ERR, "%s: IO COMP overflow\n", __func__);
+ printk(BIOS_ERR, "io_g_offset: %d\n", io_g_offset);
+ printk(BIOS_ERR, "rt_iocomp: %u\n", sc_io_lat.rt_iocomp);
+ return RAMINIT_STATUS_RCVEN_FAILURE;
+ }
+ sc_io_lat.rt_iocomp += io_g_offset;
+ ctrl->rt_io_comp[channel] = sc_io_lat.rt_iocomp;
+ for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) {
+ if (ctrl->rankmap[channel] & BIT(rank))
+ ctrl->io_latency[channel][rank] = io_lat[rank] - io_g_offset;
+
+ const uint8_t shift = rank * 4;
+ sc_io_lat.raw &= ~(0xf << shift);
+ sc_io_lat.raw |= ctrl->io_latency[channel][rank] << shift;
+ }
+ mchbar_write32(SC_IO_LATENCY_ch(channel), sc_io_lat.raw);
+ return RAMINIT_STATUS_SUCCESS;
+}
+
+#define RL_START (256 + 24)
+#define RL_STOP (384 + 24)
+#define RL_STEP 8
+
+#define RE_NUM_SAMPLES 6
+
+static enum raminit_status verify_high_region(const int32_t center, const int32_t lwidth)
+{
+ if (center > RL_STOP) {
+ /* Check if center of high was found where it should be */
+ printk(BIOS_ERR, "RcvEn: Center of high (%d) higher than expected\n", center);
+ return RAMINIT_STATUS_RCVEN_FAILURE;
+ }
+ if (lwidth <= 32) {
+ /* Check if width is large enough */
+ printk(BIOS_ERR, "RcvEn: Width of high region (%d) too small\n", lwidth);
+ return RAMINIT_STATUS_RCVEN_FAILURE;
+ }
+ if (lwidth >= 96) {
+ /* Since we're calibrating a phase, a too large region is a problem */
+ printk(BIOS_ERR, "RcvEn: Width of high region (%d) too large\n", lwidth);
+ return RAMINIT_STATUS_RCVEN_FAILURE;
+ }
+ return RAMINIT_STATUS_SUCCESS;
+}
+
+static void program_io_latency(struct sysinfo *ctrl, const uint8_t channel, const uint8_t rank)
+{
+ const uint8_t shift = rank * 4;
+ const uint8_t iolat = ctrl->io_latency[channel][rank];
+ mchbar_clrsetbits32(SC_IO_LATENCY_ch(channel), 0xf << shift, iolat << shift);
+}
+
+static void program_rl_delays(struct sysinfo *ctrl, const uint8_t rank, const uint16_t rl_delay)
+{
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!rank_in_ch(ctrl, rank, channel))
+ continue;
+
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++)
+ update_rxt(ctrl, channel, rank, byte, RXT_RCVEN, rl_delay);
+ }
+}
+
+static bool sample_dqs(const uint8_t channel, const uint8_t byte)
+{
+ return (get_data_train_feedback(channel, byte) & 0x1ff) >= BIT(RE_NUM_SAMPLES - 1);
+}
+
+enum raminit_status train_receive_enable(struct sysinfo *ctrl)
+{
+ const struct reut_box reut_addr = {
+ .col = {
+ .start = 0,
+ .stop = 1023,
+ .inc_rate = 0,
+ .inc_val = 1,
+ },
+ };
+ const struct wdb_pat wdb_pattern = {
+ .start_ptr = 0,
+ .stop_ptr = 9,
+ .inc_rate = 32,
+ .dq_pattern = BASIC_VA,
+ };
+
+ const uint16_t bytemask = BIT(ctrl->lanes) - 1;
+ const uint8_t fine_step = 1;
+
+ const uint8_t rt_delta = is_hsw_ult() ? 4 : 2;
+ const uint8_t rt_io_comp = 21 + rt_delta;
+ const uint8_t rt_latency = 16 + rt_delta;
+ setup_io_test(
+ ctrl,
+ ctrl->chanmap,
+ PAT_RD,
+ 2,
+ RE_NUM_SAMPLES + 1,
+ &reut_addr,
+ 0,
+ &wdb_pattern,
+ 0,
+ 8);
+
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!does_ch_exist(ctrl, channel))
+ continue;
+
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
+ union ddr_data_control_2_reg data_control_2 = {
+ .raw = ctrl->dq_control_2[channel][byte],
+ };
+ data_control_2.force_rx_on = 1;
+ mchbar_write32(DQ_CONTROL_2(channel, byte), data_control_2.raw);
+ }
+ union ddr_data_control_0_reg data_control_0 = {
+ .raw = ctrl->dq_control_0[channel],
+ };
+ if (ctrl->lpddr) {
+ /**
+ * W/A for b4618574 - @todo: remove for HSW ULT C0
+ * Can't have force_odt_on together with leaker, disable LPDDR
+ * mode during this training step. lpddr_mode is restored
+ * at the end of this function from the host structure.
+ */
+ data_control_0.lpddr_mode = 0;
+ mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), data_control_0.raw);
+ }
+ data_control_0.force_odt_on = 1;
+ data_control_0.rl_training_mode = 1;
+ mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), data_control_0.raw);
+ mchbar_write32(SC_IO_LATENCY_ch(channel), (union sc_io_latency_reg) {
+ .rt_iocomp = rt_io_comp,
+ }.raw);
+ }
+ enum raminit_status status = RAMINIT_STATUS_SUCCESS;
+ for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) {
+ if (!does_rank_exist(ctrl, rank))
+ continue;
+
+ /*
+ * Set initial roundtrip latency values. Assume -4 QCLK for worst board
+ * layout. This is calculated as HW_ROUNDT_LAT_DEFAULT_VALUE plus:
+ *
+ * DDR3: Default + (2 * tAA) + 4 QCLK + PI_CLK + N-mode value * 2
+ * LPDDR3: Default + (2 * tAA) + 4 QCLK + PI_CLK + tDQSCK_max
+ *
+ * N-mode is 3 during training mode. Both channels use the same timings.
+ */
+ /** TODO: differs for LPDDR **/
+ const uint32_t tmp = MAX(ctrl->multiplier, 4) + 5 + 2 * ctrl->tAA;
+ const uint32_t initial_rt_latency = MIN(rt_latency + tmp, 0x3f);
+
+ uint8_t chanmask = 0;
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ chanmask |= select_reut_ranks(ctrl, channel, BIT(rank));
+ if (!rank_in_ch(ctrl, rank, channel))
+ continue;
+
+ ctrl->io_latency[channel][rank] = 0;
+ mchbar_write8(SC_ROUNDT_LAT_ch(channel) + rank, initial_rt_latency);
+ ctrl->rt_latency[channel][rank] = initial_rt_latency;
+ }
+
+ printk(BIOS_DEBUG, "Rank %u\n", rank);
+ printk(BIOS_DEBUG, "Steps 1 and 2: Find middle of high region\n");
+ printk(RCVEN_PLOT, "Byte");
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!rank_in_ch(ctrl, rank, channel))
+ continue;
+
+ printk(RCVEN_PLOT, "\t");
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++)
+ printk(RCVEN_PLOT, "%u ", byte);
+ }
+ printk(RCVEN_PLOT, "\nRcvEn\n");
+ struct phase_train_data region_data[NUM_CHANNELS][NUM_LANES] = { 0 };
+ for (uint16_t rl_delay = RL_START; rl_delay < RL_STOP; rl_delay += RL_STEP) {
+ printk(RCVEN_PLOT, " % 3d", rl_delay);
+ program_rl_delays(ctrl, rank, rl_delay);
+ run_io_test(ctrl, chanmask, BASIC_VA, true);
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!rank_in_ch(ctrl, rank, channel))
+ continue;
+
+ printk(RCVEN_PLOT, "\t");
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
+ const bool high = sample_dqs(channel, byte);
+ printk(RCVEN_PLOT, high ? ". " : "# ");
+ phase_record_pass(
+ &region_data[channel][byte],
+ high,
+ rl_delay,
+ RL_START,
+ RL_STEP);
+ }
+ }
+ printk(RCVEN_PLOT, "\n");
+ }
+ printk(RCVEN_PLOT, "\n");
+ printk(BIOS_DEBUG, "Update RcvEn timing to be in the center of high region\n");
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!rank_in_ch(ctrl, rank, channel))
+ continue;
+
+ printk(BIOS_DEBUG, "C%u.R%u: \tLeft\tRight\tWidth\tCenter\n",
+ channel, rank);
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
+ struct phase_train_data *const curr_data =
+ &region_data[channel][byte];
+ phase_append_current_to_initial(curr_data, RL_START, RL_STEP);
+ const int32_t lwidth = range_width(curr_data->largest);
+ const int32_t center = range_center(curr_data->largest);
+ printk(BIOS_DEBUG, " B%u: \t%d\t%d\t%d\t%d\n",
+ byte,
+ curr_data->largest.start,
+ curr_data->largest.end,
+ lwidth,
+ center);
+
+ status = verify_high_region(center, lwidth);
+ if (status) {
+ printk(BIOS_ERR,
+ "RcvEn problems on channel %u, byte %u\n",
+ channel, byte);
+ goto clean_up;
+ }
+ ctrl->rcven[channel][rank][byte] = center;
+ update_rxt(ctrl, channel, rank, byte, RXT_RESTORE, 0);
+ }
+ printk(BIOS_DEBUG, "\n");
+ }
+
+ printk(BIOS_DEBUG, "Step 3: Quarter preamble - Walk backwards\n");
+ printk(RCVEN_PLOT, "Byte");
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!rank_in_ch(ctrl, rank, channel))
+ continue;
+
+ printk(RCVEN_PLOT, "\t");
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++)
+ printk(RCVEN_PLOT, "%u ", byte);
+ }
+ printk(RCVEN_PLOT, "\nIOLAT\n");
+ bool done = false;
+ while (!done) {
+ run_io_test(ctrl, chanmask, BASIC_VA, true);
+ done = true;
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!rank_in_ch(ctrl, rank, channel))
+ continue;
+
+ printk(RCVEN_PLOT, " %2u\t", ctrl->io_latency[channel][rank]);
+ uint16_t highs = 0;
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
+ const bool high = sample_dqs(channel, byte);
+ printk(RCVEN_PLOT, high ? "H " : "L ");
+ if (high)
+ highs |= BIT(byte);
+ }
+ if (!highs)
+ continue;
+
+ done = false;
+
+ /* If all bytes sample high, adjust timing globally */
+ if (highs == bytemask && ctrl->io_latency[channel][rank] < 14) {
+ ctrl->io_latency[channel][rank] += 2;
+ ctrl->io_latency[channel][rank] %= 16;
+ program_io_latency(ctrl, channel, rank);
+ continue;
+ }
+
+ /* Otherwise, adjust individual bytes */
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
+ if (!(highs & BIT(byte)))
+ continue;
+
+ if (ctrl->rcven[channel][rank][byte] < 128) {
+ printk(BIOS_ERR,
+ "RcvEn underflow: walking backwards\n");
+ printk(BIOS_ERR,
+ "For channel %u, rank %u, byte %u\n",
+ channel, rank, byte);
+ status = RAMINIT_STATUS_RCVEN_FAILURE;
+ goto clean_up;
+ }
+ ctrl->rcven[channel][rank][byte] -= 128;
+ update_rxt(ctrl, channel, rank, byte, RXT_RESTORE, 0);
+ }
+ }
+ printk(RCVEN_PLOT, "\n");
+ }
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!rank_in_ch(ctrl, rank, channel))
+ continue;
+
+ printk(BIOS_DEBUG, "\nC%u: Preamble\n", channel);
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
+ printk(BIOS_DEBUG,
+ " B%u: %u\n", byte, ctrl->rcven[channel][rank][byte]);
+ }
+ }
+ printk(BIOS_DEBUG, "\n");
+
+ printk(BIOS_DEBUG, "Step 4: Add 1 qclk\n");
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!rank_in_ch(ctrl, rank, channel))
+ continue;
+
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
+ ctrl->rcven[channel][rank][byte] += 64;
+ update_rxt(ctrl, channel, rank, byte, RXT_RESTORE, 0);
+ }
+ }
+ printk(BIOS_DEBUG, "\n");
+
+ printk(BIOS_DEBUG, "Step 5: Walk forward to find rising edge\n");
+ printk(RCVEN_PLOT, "Byte");
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!rank_in_ch(ctrl, rank, channel))
+ continue;
+
+ printk(RCVEN_PLOT, "\t");
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++)
+ printk(RCVEN_PLOT, "%u ", byte);
+ }
+ printk(RCVEN_PLOT, "\n inc\n");
+ uint16_t ch_result[NUM_CHANNELS] = { 0 };
+ uint8_t inc_preamble[NUM_CHANNELS][NUM_LANES] = { 0 };
+ for (uint8_t inc = 0; inc < 64; inc += fine_step) {
+ printk(RCVEN_PLOT, " %2u\t", inc);
+ run_io_test(ctrl, chanmask, BASIC_VA, true);
+ done = true;
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!rank_in_ch(ctrl, rank, channel))
+ continue;
+
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
+ if (ch_result[channel] & BIT(byte)) {
+ /* Skip bytes that are already done */
+ printk(RCVEN_PLOT, ". ");
+ continue;
+ }
+ const bool pass = sample_dqs(channel, byte);
+ printk(RCVEN_PLOT, pass ? ". " : "# ");
+ if (pass) {
+ ch_result[channel] |= BIT(byte);
+ continue;
+ }
+ ctrl->rcven[channel][rank][byte] += fine_step;
+ update_rxt(ctrl, channel, rank, byte, RXT_RESTORE, 0);
+ inc_preamble[channel][byte] = inc;
+ }
+ printk(RCVEN_PLOT, "\t");
+ if (ch_result[channel] != bytemask)
+ done = false;
+ }
+ printk(RCVEN_PLOT, "\n");
+ if (done)
+ break;
+ }
+ printk(BIOS_DEBUG, "\n");
+ if (!done) {
+ printk(BIOS_ERR, "Error: Preamble edge not found for all bytes\n");
+ printk(BIOS_ERR, "The final RcvEn results are as follows:\n");
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!rank_in_ch(ctrl, rank, channel))
+ continue;
+
+ printk(BIOS_ERR, "Channel %u Rank %u: preamble\n",
+ channel, rank);
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
+ printk(BIOS_ERR, " Byte %u: %u%s\n", byte,
+ ctrl->rcven[channel][rank][byte],
+ (ch_result[channel] ^ bytemask) & BIT(byte)
+ ? ""
+ : " *** Check this byte! ***");
+ }
+ }
+ status = RAMINIT_STATUS_RCVEN_FAILURE;
+ goto clean_up;
+ }
+
+ printk(BIOS_DEBUG, "Step 6: center on preamble and clean up rank\n");
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!rank_in_ch(ctrl, rank, channel))
+ continue;
+
+ printk(BIOS_DEBUG, "C%u: Preamble increment\n", channel);
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
+ /*
+ * For Traditional, pull in RcvEn by 64. For ULT, take the DQS
+ * drift into account to the specified guardband: tDQSCK_DRIFT.
+ */
+ ctrl->rcven[channel][rank][byte] -= tDQSCK_DRIFT;
+ update_rxt(ctrl, channel, rank, byte, RXT_RESTORE, 0);
+ printk(BIOS_DEBUG, " B%u: %u %u\n", byte,
+ ctrl->rcven[channel][rank][byte],
+ inc_preamble[channel][byte]);
+ }
+ printk(BIOS_DEBUG, "\n");
+ }
+ printk(BIOS_DEBUG, "\n");
+ }
+
+clean_up:
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!does_ch_exist(ctrl, channel))
+ continue;
+
+ if (ctrl->lpddr) {
+ /**
+ * W/A for b4618574 - @todo: remove for HSW ULT C0
+ * Can't have force_odt_on together with leaker, disable LPDDR mode for
+ * this training step. This write will disable force_odt_on while still
+ * keeping LPDDR mode disabled. Second write will restore LPDDR mode.
+ */
+ union ddr_data_control_0_reg data_control_0 = {
+ .raw = ctrl->dq_control_0[channel],
+ };
+ data_control_0.lpddr_mode = 0;
+ mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), data_control_0.raw);
+ }
+ mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), ctrl->dq_control_0[channel]);
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
+ mchbar_write32(DQ_CONTROL_2(channel, byte),
+ ctrl->dq_control_2[channel][byte]);
+ }
+ }
+ io_reset();
+ if (status)
+ return status;
+
+ printk(BIOS_DEBUG, "Step 7: Sync IO latency across all ranks\n");
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!does_ch_exist(ctrl, channel))
+ continue;
+
+ status = change_rcven_timing(ctrl, channel);
+ if (status)
+ return status;
+ }
+ printk(BIOS_DEBUG, "\nFinal Receive Enable and IO latency settings:\n");
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!does_ch_exist(ctrl, channel))
+ continue;
+
+ for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) {
+ if (!rank_in_ch(ctrl, rank, channel))
+ continue;
+
+ const union sc_io_latency_reg sc_io_latency = {
+ .raw = mchbar_read32(SC_IO_LATENCY_ch(channel)),
+ };
+ printk(BIOS_DEBUG, " C%u.R%u: IOLAT = %u rt_iocomp = %u\n", channel,
+ rank, ctrl->io_latency[channel][rank], sc_io_latency.rt_iocomp);
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
+ printk(BIOS_DEBUG, " B%u: %u\n", byte,
+ ctrl->rcven[channel][rank][byte]);
+ }
+ printk(BIOS_DEBUG, "\n");
+ }
+ }
+ return status;
+}
diff --git a/src/northbridge/intel/haswell/registers/mchbar.h b/src/northbridge/intel/haswell/registers/mchbar.h
index a81559bb1e..9172d4f2b0 100644
--- a/src/northbridge/intel/haswell/registers/mchbar.h
+++ b/src/northbridge/intel/haswell/registers/mchbar.h
@@ -18,6 +18,8 @@
#define RX_TRAIN_ch_r_b(ch, rank, byte) _DDRIO_C_R_B(0x0000, ch, rank, byte)
#define TX_TRAIN_ch_r_b(ch, rank, byte) _DDRIO_C_R_B(0x0020, ch, rank, byte)
+#define DDR_DATA_TRAIN_FEEDBACK(ch, byte) _DDRIO_C_R_B(0x0054, ch, 0, byte)
+
#define DQ_CONTROL_2(ch, byte) _DDRIO_C_R_B(0x0064, ch, 0, byte)
#define DQ_CONTROL_0(ch, byte) _DDRIO_C_R_B(0x0074, ch, 0, byte)
@@ -100,6 +102,7 @@
#define COMMAND_RATE_LIMIT_ch(ch) _MCMAIN_C(0x4010, ch)
#define TC_BANK_RANK_D_ch(ch) _MCMAIN_C(0x4014, ch)
#define SC_ROUNDT_LAT_ch(ch) _MCMAIN_C(0x4024, ch)
+#define SC_IO_LATENCY_ch(ch) _MCMAIN_C(0x4028, ch)
#define REUT_ch_PAT_WDB_CL_MUX_CFG(ch) _MCMAIN_C(0x4040, ch)
--
2.39.5

View File

@ -0,0 +1,275 @@
From 47f31f24d4be9ebc56db4bcce28ac2939ebfcabb Mon Sep 17 00:00:00 2001
From: Angel Pons <th3fanbus@gmail.com>
Date: Sun, 8 May 2022 11:58:59 +0200
Subject: [PATCH 19/24] haswell NRI: Add function to change margins
Implement a function to change margin parameters. Haswell provides a
register to apply an offset to margin parameters during training, so
make use of it. There are other margin parameters that have not been
implemented yet, as they are not needed for now and special handling
is needed to provide offset training functionality.
Change-Id: I5392380e13de3c44e77b7bc9f3b819e2661d1e2d
Signed-off-by: Angel Pons <th3fanbus@gmail.com>
Reviewed-on: https://review.coreboot.org/c/coreboot/+/64195
Tested-by: build bot (Jenkins) <no-reply@coreboot.org>
Reviewed-by: Maximilian Brune <maximilian.brune@9elements.com>
---
.../haswell/native_raminit/change_margin.c | 136 ++++++++++++++++++
.../haswell/native_raminit/raminit_native.h | 39 +++++
.../haswell/native_raminit/reg_structs.h | 12 ++
.../intel/haswell/registers/mchbar.h | 1 +
4 files changed, 188 insertions(+)
diff --git a/src/northbridge/intel/haswell/native_raminit/change_margin.c b/src/northbridge/intel/haswell/native_raminit/change_margin.c
index 055c666eee..299c44a6b0 100644
--- a/src/northbridge/intel/haswell/native_raminit/change_margin.c
+++ b/src/northbridge/intel/haswell/native_raminit/change_margin.c
@@ -1,5 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
+#include <assert.h>
#include <commonlib/bsd/clamp.h>
#include <console/console.h>
#include <delay.h>
@@ -152,3 +153,138 @@ void download_regfile(
ddr_data_control_0.read_rf_rank = phys_rank;
mchbar_write32(reg, ddr_data_control_0.raw);
}
+
+static void update_data_offset_train(
+ struct sysinfo *ctrl,
+ const uint8_t param,
+ const uint8_t en_multicast,
+ const uint8_t channel_in,
+ const uint8_t rank,
+ const uint8_t byte_in,
+ const bool update_ctrl,
+ const enum regfile_mode regfile,
+ const uint32_t value)
+{
+ bool is_rd = false;
+ bool is_wr = false;
+ switch (param) {
+ case RdT:
+ case RdV:
+ case RcvEna:
+ is_rd = true;
+ break;
+ case WrT:
+ case WrDqsT:
+ is_wr = true;
+ break;
+ default:
+ die("%s: Invalid margin parameter %u\n", __func__, param);
+ }
+ if (en_multicast) {
+ mchbar_write32(DDR_DATA_OFFSET_TRAIN, value);
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!does_ch_exist(ctrl, channel))
+ continue;
+
+ download_regfile(ctrl, channel, true, rank, regfile, 0, is_rd, is_wr);
+ if (update_ctrl) {
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++)
+ ctrl->data_offset_train[channel][byte] = value;
+ }
+ }
+ } else {
+ mchbar_write32(DDR_DATA_OFFSET_TRAIN_ch_b(channel_in, byte_in), value);
+ download_regfile(ctrl, channel_in, false, rank, regfile, byte_in, is_rd, is_wr);
+ if (update_ctrl)
+ ctrl->data_offset_train[channel_in][byte_in] = value;
+ }
+}
+
+static uint32_t get_max_margin(const enum margin_parameter param)
+{
+ switch (param) {
+ case RcvEna:
+ case RdT:
+ case WrT:
+ case WrDqsT:
+ return MAX_POSSIBLE_TIME;
+ case RdV:
+ return MAX_POSSIBLE_VREF;
+ default:
+ die("%s: Invalid margin parameter %u\n", __func__, param);
+ }
+}
+
+void change_margin(
+ struct sysinfo *ctrl,
+ const enum margin_parameter param,
+ const int32_t value0,
+ const bool en_multicast,
+ const uint8_t channel,
+ const uint8_t rank,
+ const uint8_t byte,
+ const bool update_ctrl,
+ const enum regfile_mode regfile)
+{
+ /** FIXME: Remove this **/
+ if (rank == 0xff)
+ die("%s: rank is 0xff\n", __func__);
+
+ if (!en_multicast && !does_ch_exist(ctrl, channel))
+ die("%s: Tried to change margin of empty channel %u\n", __func__, channel);
+
+ const uint32_t max_value = get_max_margin(param);
+ const int32_t v0 = clamp_s32(-max_value, value0, max_value);
+
+ union ddr_data_offset_train_reg ddr_data_offset_train = {
+ .raw = en_multicast ? 0 : ctrl->data_offset_train[channel][byte],
+ };
+ bool update_offset_train = false;
+ switch (param) {
+ case RcvEna:
+ ddr_data_offset_train.rcven = v0;
+ update_offset_train = true;
+ break;
+ case RdT:
+ ddr_data_offset_train.rx_dqs = v0;
+ update_offset_train = true;
+ break;
+ case WrT:
+ ddr_data_offset_train.tx_dq = v0;
+ update_offset_train = true;
+ break;
+ case WrDqsT:
+ ddr_data_offset_train.tx_dqs = v0;
+ update_offset_train = true;
+ break;
+ case RdV:
+ ddr_data_offset_train.vref = v0;
+ update_offset_train = true;
+ break;
+ default:
+ die("%s: Invalid margin parameter %u\n", __func__, param);
+ }
+ if (update_offset_train) {
+ update_data_offset_train(
+ ctrl,
+ param,
+ en_multicast,
+ channel,
+ rank,
+ byte,
+ update_ctrl,
+ regfile,
+ ddr_data_offset_train.raw);
+ }
+}
+
+void change_1d_margin_multicast(
+ struct sysinfo *ctrl,
+ const enum margin_parameter param,
+ const int32_t value0,
+ const uint8_t rank,
+ const bool update_ctrl,
+ const enum regfile_mode regfile)
+{
+ change_margin(ctrl, param, value0, true, 0, rank, 0, update_ctrl, regfile);
+}
diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.h b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
index eaaaedad1e..1c8473056b 100644
--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.h
+++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
@@ -36,6 +36,18 @@
#define RTTNOM_MASK (BIT(9) | BIT(6) | BIT(2))
+/* Margin parameter limits */
+#define MAX_POSSIBLE_TIME 31
+#define MAX_POSSIBLE_VREF 54
+
+#define MAX_POSSIBLE_BOTH MAX_POSSIBLE_VREF
+
+#define MIN_TIME (-MAX_POSSIBLE_TIME)
+#define MAX_TIME (MAX_POSSIBLE_TIME)
+
+#define MIN_VREF (-MAX_POSSIBLE_VREF)
+#define MAX_VREF (MAX_POSSIBLE_VREF)
+
#define BASIC_VA_PAT_SPREAD_8 0x01010101
#define WDB_CACHE_LINE_SIZE 8
@@ -46,6 +58,14 @@
/* Specified in PI ticks. 64 PI ticks == 1 qclk */
#define tDQSCK_DRIFT 64
+enum margin_parameter {
+ RcvEna,
+ RdT,
+ WrT,
+ WrDqsT,
+ RdV,
+};
+
/* ZQ calibration types */
enum {
ZQ_INIT, /* DDR3: ZQCL with tZQinit, LPDDR3: ZQ Init with tZQinit */
@@ -515,6 +535,25 @@ void download_regfile(
bool read_rf_rd,
bool read_rf_wr);
+void change_margin(
+ struct sysinfo *ctrl,
+ const enum margin_parameter param,
+ const int32_t value0,
+ const bool en_multicast,
+ const uint8_t channel,
+ const uint8_t rank,
+ const uint8_t byte,
+ const bool update_ctrl,
+ const enum regfile_mode regfile);
+
+void change_1d_margin_multicast(
+ struct sysinfo *ctrl,
+ const enum margin_parameter param,
+ const int32_t value0,
+ const uint8_t rank,
+ const bool update_ctrl,
+ const enum regfile_mode regfile);
+
uint8_t get_rx_bias(const struct sysinfo *ctrl);
uint8_t get_tCWL(uint32_t mem_clock_mhz);
diff --git a/src/northbridge/intel/haswell/native_raminit/reg_structs.h b/src/northbridge/intel/haswell/native_raminit/reg_structs.h
index b099f4bb82..a0e36ed082 100644
--- a/src/northbridge/intel/haswell/native_raminit/reg_structs.h
+++ b/src/northbridge/intel/haswell/native_raminit/reg_structs.h
@@ -25,6 +25,18 @@ union ddr_data_tx_train_rank_reg {
uint32_t raw;
};
+union ddr_data_offset_train_reg {
+ struct __packed {
+ int32_t rcven : 6; // Bits 5:0
+ int32_t rx_dqs : 6; // Bits 11:6
+ int32_t tx_dq : 6; // Bits 17:12
+ int32_t tx_dqs : 6; // Bits 23:18
+ int32_t vref : 7; // Bits 30:24
+ int32_t : 1; // Bits 31:31
+ };
+ uint32_t raw;
+};
+
union ddr_data_control_0_reg {
struct __packed {
uint32_t rx_training_mode : 1; // Bits 0:0
diff --git a/src/northbridge/intel/haswell/registers/mchbar.h b/src/northbridge/intel/haswell/registers/mchbar.h
index 9172d4f2b0..0acafbc826 100644
--- a/src/northbridge/intel/haswell/registers/mchbar.h
+++ b/src/northbridge/intel/haswell/registers/mchbar.h
@@ -21,6 +21,7 @@
#define DDR_DATA_TRAIN_FEEDBACK(ch, byte) _DDRIO_C_R_B(0x0054, ch, 0, byte)
#define DQ_CONTROL_2(ch, byte) _DDRIO_C_R_B(0x0064, ch, 0, byte)
+#define DDR_DATA_OFFSET_TRAIN_ch_b(ch, byte) _DDRIO_C_R_B(0x0070, ch, 0, byte)
#define DQ_CONTROL_0(ch, byte) _DDRIO_C_R_B(0x0074, ch, 0, byte)
/* DDR CKE per-channel */
--
2.39.5

View File

@ -0,0 +1,335 @@
From 7d8f12b5654f0dfdad033552d7a19f90f15789c1 Mon Sep 17 00:00:00 2001
From: Angel Pons <th3fanbus@gmail.com>
Date: Sun, 8 May 2022 11:35:49 +0200
Subject: [PATCH 20/24] haswell NRI: Add read MPR training
Implement read training using DDR3 MPR (Multi-Purpose Register).
Change-Id: Id17cb2c4c399ac9bcc937b595b58f863c152461b
Signed-off-by: Angel Pons <th3fanbus@gmail.com>
Reviewed-on: https://review.coreboot.org/c/coreboot/+/64196
Reviewed-by: Maximilian Brune <maximilian.brune@9elements.com>
Tested-by: build bot (Jenkins) <no-reply@coreboot.org>
---
.../intel/haswell/native_raminit/Makefile.mk | 1 +
.../haswell/native_raminit/raminit_main.c | 1 +
.../haswell/native_raminit/raminit_native.h | 4 +
.../haswell/native_raminit/train_read_mpr.c | 241 ++++++++++++++++++
.../intel/haswell/registers/mchbar.h | 2 +-
5 files changed, 248 insertions(+), 1 deletion(-)
create mode 100644 src/northbridge/intel/haswell/native_raminit/train_read_mpr.c
diff --git a/src/northbridge/intel/haswell/native_raminit/Makefile.mk b/src/northbridge/intel/haswell/native_raminit/Makefile.mk
index e2fbfb4211..c442be0728 100644
--- a/src/northbridge/intel/haswell/native_raminit/Makefile.mk
+++ b/src/northbridge/intel/haswell/native_raminit/Makefile.mk
@@ -16,4 +16,5 @@ romstage-y += setup_wdb.c
romstage-y += spd_bitmunching.c
romstage-y += testing_io.c
romstage-y += timings_refresh.c
+romstage-y += train_read_mpr.c
romstage-y += train_receive_enable.c
diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_main.c b/src/northbridge/intel/haswell/native_raminit/raminit_main.c
index 97eb957294..82f1b945ba 100644
--- a/src/northbridge/intel/haswell/native_raminit/raminit_main.c
+++ b/src/northbridge/intel/haswell/native_raminit/raminit_main.c
@@ -62,6 +62,7 @@ static const struct task_entry cold_boot[] = {
{ do_jedec_init, true, "JEDECINIT", },
{ pre_training, true, "PRETRAIN", },
{ train_receive_enable, true, "RCVET", },
+ { train_read_mpr, true, "RDMPRT", },
};
/* Return a generic stepping value to make stepping checks simpler */
diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.h b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
index 1c8473056b..7a486479ea 100644
--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.h
+++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
@@ -28,6 +28,8 @@
/* Always use 12 legs for emphasis (not trained) */
#define TXEQFULLDRV (3 << 4)
+#define LOOPCOUNT_INFINITE 0xff
+
/* DDR3 mode register bits */
#define MR0_DLL_RESET BIT(8)
@@ -213,6 +215,7 @@ enum raminit_status {
RAMINIT_STATUS_POLL_TIMEOUT,
RAMINIT_STATUS_REUT_ERROR,
RAMINIT_STATUS_RCVEN_FAILURE,
+ RAMINIT_STATUS_RMPR_FAILURE,
RAMINIT_STATUS_UNSPECIFIED_ERROR, /** TODO: Deprecated in favor of specific values **/
};
@@ -434,6 +437,7 @@ enum raminit_status configure_mc(struct sysinfo *ctrl);
enum raminit_status configure_memory_map(struct sysinfo *ctrl);
enum raminit_status do_jedec_init(struct sysinfo *ctrl);
enum raminit_status train_receive_enable(struct sysinfo *ctrl);
+enum raminit_status train_read_mpr(struct sysinfo *ctrl);
void configure_timings(struct sysinfo *ctrl);
void configure_refresh(struct sysinfo *ctrl);
diff --git a/src/northbridge/intel/haswell/native_raminit/train_read_mpr.c b/src/northbridge/intel/haswell/native_raminit/train_read_mpr.c
new file mode 100644
index 0000000000..ade1e36148
--- /dev/null
+++ b/src/northbridge/intel/haswell/native_raminit/train_read_mpr.c
@@ -0,0 +1,241 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include <commonlib/bsd/clamp.h>
+#include <console/console.h>
+#include <delay.h>
+#include <northbridge/intel/haswell/haswell.h>
+#include <types.h>
+
+#include "raminit_native.h"
+#include "ranges.h"
+
+#define RMPR_START (-32)
+#define RMPR_STOP (32)
+#define RMPR_STEP 1
+
+#define RMPR_MIN_WIDTH 12
+
+#define RMPR_PLOT RAM_DEBUG
+
+/*
+ * Clear rx_training_mode. For LPDDR, we first need to disable odt_samp_extend_en,
+ * then disable rx_training_mode, and finally re-enable odt_samp_extend_en.
+ */
+static void clear_rx_training_mode(struct sysinfo *ctrl, const uint8_t channel)
+{
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++)
+ mchbar_write32(DQ_CONTROL_2(channel, byte), ctrl->dq_control_2[channel][byte]);
+
+ if (ctrl->lpddr) {
+ union ddr_data_control_0_reg data_control_0 = {
+ .raw = mchbar_read32(DDR_DATA_ch_CONTROL_0(channel)),
+ };
+ data_control_0.odt_samp_extend_en = 0;
+ mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), data_control_0.raw);
+ tick_delay(1);
+ data_control_0.rx_training_mode = 0;
+ mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), data_control_0.raw);
+ tick_delay(1);
+ }
+ mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), ctrl->dq_control_0[channel]);
+}
+
+static void set_rxdqs_edges_to_midpoint(struct sysinfo *ctrl)
+{
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!does_ch_exist(ctrl, channel))
+ continue;
+
+ for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) {
+ if (!rank_in_ch(ctrl, rank, channel))
+ continue;
+
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++)
+ update_rxt(ctrl, channel, rank, byte, RXT_RXDQS_BOTH, 32);
+ }
+ }
+}
+
+static void enter_mpr_train_ddr_mode(struct sysinfo *ctrl, const uint8_t rank)
+{
+ /* Program MR3 and mask RAS/WE to prevent scheduler from issuing non-read commands */
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!rank_in_ch(ctrl, rank, channel))
+ continue;
+
+ if (!ctrl->lpddr)
+ reut_issue_mrs(ctrl, channel, BIT(rank), 3, 1 << 2);
+
+ union reut_misc_odt_ctrl_reg reut_misc_odt_ctrl = {
+ .raw = mchbar_read32(REUT_ch_MISC_ODT_CTRL(channel)),
+ };
+ reut_misc_odt_ctrl.mpr_train_ddr_on = 1;
+ mchbar_write32(REUT_ch_MISC_ODT_CTRL(channel), reut_misc_odt_ctrl.raw);
+ }
+}
+
+static void leave_mpr_train_ddr_mode(struct sysinfo *ctrl, const uint8_t rank)
+{
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!rank_in_ch(ctrl, rank, channel))
+ continue;
+
+ /*
+ * The mpr_train_ddr_on bit will force a special command.
+ * Therefore, clear it before issuing the MRS command.
+ */
+ union reut_misc_odt_ctrl_reg reut_misc_odt_ctrl = {
+ .raw = mchbar_read32(REUT_ch_MISC_ODT_CTRL(channel)),
+ };
+ reut_misc_odt_ctrl.mpr_train_ddr_on = 0;
+ mchbar_write32(REUT_ch_MISC_ODT_CTRL(channel), reut_misc_odt_ctrl.raw);
+ if (!ctrl->lpddr)
+ reut_issue_mrs(ctrl, channel, BIT(rank), 3, 0 << 2);
+ }
+}
+
+enum raminit_status train_read_mpr(struct sysinfo *ctrl)
+{
+ set_rxdqs_edges_to_midpoint(ctrl);
+ clear_data_offset_train_all(ctrl);
+ setup_io_test_mpr(ctrl, ctrl->chanmap, LOOPCOUNT_INFINITE, NSOE);
+ enum raminit_status status = RAMINIT_STATUS_SUCCESS;
+ for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) {
+ if (!does_rank_exist(ctrl, rank))
+ continue;
+
+ printk(BIOS_DEBUG, "Rank %u\n", rank);
+ printk(RMPR_PLOT, "Channel");
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!rank_in_ch(ctrl, rank, channel))
+ continue;
+
+ printk(RMPR_PLOT, "\t%u\t\t", channel);
+ }
+ printk(RMPR_PLOT, "\nByte");
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!rank_in_ch(ctrl, rank, channel))
+ continue;
+
+ printk(RMPR_PLOT, "\t");
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++)
+ printk(RMPR_PLOT, "%u ", byte);
+ }
+ enter_mpr_train_ddr_mode(ctrl, rank);
+ struct linear_train_data region_data[NUM_CHANNELS][NUM_LANES] = { 0 };
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++)
+ select_reut_ranks(ctrl, channel, BIT(rank));
+
+ printk(RMPR_PLOT, "\nDqsDelay\n");
+ int8_t dqs_delay;
+ for (dqs_delay = RMPR_START; dqs_delay < RMPR_STOP; dqs_delay += RMPR_STEP) {
+ printk(RMPR_PLOT, "% 5d", dqs_delay);
+ const enum regfile_mode regfile = REG_FILE_USE_START;
+ /* Looks like MRC uses rank 0 here, but it feels wrong */
+ change_1d_margin_multicast(ctrl, RdT, dqs_delay, rank, false, regfile);
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!rank_in_ch(ctrl, rank, channel))
+ continue;
+
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
+ union ddr_data_control_2_reg data_control_2 = {
+ .raw = ctrl->dq_control_2[channel][byte],
+ };
+ data_control_2.force_bias_on = 1;
+ data_control_2.force_rx_on = 1;
+ data_control_2.leaker_comp = 0;
+ mchbar_write32(DQ_CONTROL_2(channel, byte),
+ data_control_2.raw);
+ }
+ union ddr_data_control_0_reg data_control_0 = {
+ .raw = ctrl->dq_control_0[channel],
+ };
+ data_control_0.rx_training_mode = 1;
+ data_control_0.force_odt_on = !ctrl->lpddr;
+ data_control_0.en_read_preamble = 0;
+ data_control_0.odt_samp_extend_en = ctrl->lpddr;
+ const uint32_t reg_offset = DDR_DATA_ch_CONTROL_0(channel);
+ mchbar_write32(reg_offset, data_control_0.raw);
+ }
+ run_mpr_io_test(false);
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!rank_in_ch(ctrl, rank, channel))
+ continue;
+
+ printk(RMPR_PLOT, "\t");
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
+ uint32_t fb = get_data_train_feedback(channel, byte);
+ const bool pass = fb == 1;
+ printk(RMPR_PLOT, pass ? ". " : "# ");
+ linear_record_pass(
+ &region_data[channel][byte],
+ pass,
+ dqs_delay,
+ RMPR_START,
+ RMPR_STEP);
+ }
+ }
+ printk(RMPR_PLOT, "\n");
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!rank_in_ch(ctrl, rank, channel))
+ continue;
+
+ clear_rx_training_mode(ctrl, channel);
+ }
+ io_reset();
+ }
+ printk(RMPR_PLOT, "\n");
+ leave_mpr_train_ddr_mode(ctrl, rank);
+ clear_data_offset_train_all(ctrl);
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!rank_in_ch(ctrl, rank, channel))
+ continue;
+
+ printk(BIOS_DEBUG, "C%u.R%u: \tLeft\tRight\tWidth\tCenter\tRxDqsPN\n",
+ channel, rank);
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
+ struct linear_train_data *data = &region_data[channel][byte];
+ const int32_t lwidth = range_width(data->largest);
+ if (lwidth <= RMPR_MIN_WIDTH) {
+ printk(BIOS_ERR,
+ "Bad eye (lwidth %d <= min %d) for byte %u\n",
+ lwidth, RMPR_MIN_WIDTH, byte);
+ status = RAMINIT_STATUS_RMPR_FAILURE;
+ }
+ /*
+ * The MPR center may not be ideal on certain platforms for
+ * unknown reasons. If so, adjust it with a magical number.
+ * For Haswell, the magical number is zero. Hell knows why.
+ */
+ const int32_t center = range_center(data->largest);
+ ctrl->rxdqsp[channel][rank][byte] = center - RMPR_START;
+ ctrl->rxdqsn[channel][rank][byte] = center - RMPR_START;
+ printk(BIOS_DEBUG, " B%u: \t%d\t%d\t%d\t%d\t%u\n", byte,
+ data->largest.start, data->largest.end, lwidth,
+ center, ctrl->rxdqsp[channel][rank][byte]);
+ }
+ printk(BIOS_DEBUG, "\n");
+ }
+ }
+
+ /*
+ * Now program the DQS center values on populated ranks. data is taken from
+ * the host struct. We need to do it after all ranks are trained, because we
+ * need to keep the same DQS value on all ranks during the training procedure.
+ */
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!does_ch_exist(ctrl, channel))
+ continue;
+
+ for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) {
+ if (!rank_in_ch(ctrl, rank, channel))
+ continue;
+
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++)
+ update_rxt(ctrl, channel, rank, byte, RXT_RESTORE, 0);
+ }
+ }
+ change_1d_margin_multicast(ctrl, RdT, 0, 0, false, REG_FILE_USE_CURRENT);
+ io_reset();
+ return status;
+}
diff --git a/src/northbridge/intel/haswell/registers/mchbar.h b/src/northbridge/intel/haswell/registers/mchbar.h
index 0acafbc826..6a31d3a32c 100644
--- a/src/northbridge/intel/haswell/registers/mchbar.h
+++ b/src/northbridge/intel/haswell/registers/mchbar.h
@@ -122,7 +122,7 @@
#define REUT_ch_ERR_DATA_MASK(ch) _MCMAIN_C(0x40d8, ch)
#define REUT_ch_MISC_CKE_CTRL(ch) _MCMAIN_C(0x4190, ch)
-
+#define REUT_ch_MISC_ODT_CTRL(ch) _MCMAIN_C(0x4194, ch)
#define REUT_ch_MISC_PAT_CADB_CTRL(ch) _MCMAIN_C(0x4198, ch)
#define REUT_ch_PAT_CADB_MRS(ch) _MCMAIN_C(0x419c, ch)
#define REUT_ch_PAT_CADB_MUX_CTRL(ch) _MCMAIN_C(0x41a0, ch)
--
2.39.5

View File

@ -0,0 +1,692 @@
From 174380c9046975cef79a7faa2d54e007ca44d2be Mon Sep 17 00:00:00 2001
From: Angel Pons <th3fanbus@gmail.com>
Date: Sun, 8 May 2022 12:56:04 +0200
Subject: [PATCH 21/24] haswell NRI: Add write leveling
Implement JEDEC write leveling, which is done in two steps. The first
step uses the JEDEC procedure to do "fine" write leveling, i.e. align
the DQS phase to the clock signal. The second step performs a regular
read-write test to correct "coarse" cycle errors.
Change-Id: I27678523fe22c38173a688e2a4751c259a20f009
Signed-off-by: Angel Pons <th3fanbus@gmail.com>
Reviewed-on: https://review.coreboot.org/c/coreboot/+/64197
Tested-by: build bot (Jenkins) <no-reply@coreboot.org>
Reviewed-by: Maximilian Brune <maximilian.brune@9elements.com>
---
.../intel/haswell/native_raminit/Makefile.mk | 1 +
.../haswell/native_raminit/raminit_main.c | 1 +
.../haswell/native_raminit/raminit_native.h | 10 +
.../train_jedec_write_leveling.c | 581 ++++++++++++++++++
.../intel/haswell/registers/mchbar.h | 2 +
5 files changed, 595 insertions(+)
create mode 100644 src/northbridge/intel/haswell/native_raminit/train_jedec_write_leveling.c
diff --git a/src/northbridge/intel/haswell/native_raminit/Makefile.mk b/src/northbridge/intel/haswell/native_raminit/Makefile.mk
index c442be0728..40c2f5e014 100644
--- a/src/northbridge/intel/haswell/native_raminit/Makefile.mk
+++ b/src/northbridge/intel/haswell/native_raminit/Makefile.mk
@@ -16,5 +16,6 @@ romstage-y += setup_wdb.c
romstage-y += spd_bitmunching.c
romstage-y += testing_io.c
romstage-y += timings_refresh.c
+romstage-y += train_jedec_write_leveling.c
romstage-y += train_read_mpr.c
romstage-y += train_receive_enable.c
diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_main.c b/src/northbridge/intel/haswell/native_raminit/raminit_main.c
index 82f1b945ba..b947ecbd46 100644
--- a/src/northbridge/intel/haswell/native_raminit/raminit_main.c
+++ b/src/northbridge/intel/haswell/native_raminit/raminit_main.c
@@ -63,6 +63,7 @@ static const struct task_entry cold_boot[] = {
{ pre_training, true, "PRETRAIN", },
{ train_receive_enable, true, "RCVET", },
{ train_read_mpr, true, "RDMPRT", },
+ { train_jedec_write_leveling, true, "JWRL", },
};
/* Return a generic stepping value to make stepping checks simpler */
diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.h b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
index 7a486479ea..d6b11b9d3c 100644
--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.h
+++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
@@ -60,6 +60,9 @@
/* Specified in PI ticks. 64 PI ticks == 1 qclk */
#define tDQSCK_DRIFT 64
+/* Maximum additional latency */
+#define MAX_ADD_DELAY 2
+
enum margin_parameter {
RcvEna,
RdT,
@@ -216,6 +219,7 @@ enum raminit_status {
RAMINIT_STATUS_REUT_ERROR,
RAMINIT_STATUS_RCVEN_FAILURE,
RAMINIT_STATUS_RMPR_FAILURE,
+ RAMINIT_STATUS_JWRL_FAILURE,
RAMINIT_STATUS_UNSPECIFIED_ERROR, /** TODO: Deprecated in favor of specific values **/
};
@@ -381,6 +385,11 @@ static inline uint32_t get_data_train_feedback(const uint8_t channel, const uint
return mchbar_read32(DDR_DATA_TRAIN_FEEDBACK(channel, byte));
}
+static inline uint16_t get_byte_group_errors(const uint8_t channel)
+{
+ return mchbar_read32(4 + REUT_ch_ERR_MISC_STATUS(channel)) & 0x1ff;
+}
+
/* Number of ticks to wait in units of 69.841279 ns (citation needed) */
static inline void tick_delay(const uint32_t delay)
{
@@ -438,6 +447,7 @@ enum raminit_status configure_memory_map(struct sysinfo *ctrl);
enum raminit_status do_jedec_init(struct sysinfo *ctrl);
enum raminit_status train_receive_enable(struct sysinfo *ctrl);
enum raminit_status train_read_mpr(struct sysinfo *ctrl);
+enum raminit_status train_jedec_write_leveling(struct sysinfo *ctrl);
void configure_timings(struct sysinfo *ctrl);
void configure_refresh(struct sysinfo *ctrl);
diff --git a/src/northbridge/intel/haswell/native_raminit/train_jedec_write_leveling.c b/src/northbridge/intel/haswell/native_raminit/train_jedec_write_leveling.c
new file mode 100644
index 0000000000..ef6483e2bd
--- /dev/null
+++ b/src/northbridge/intel/haswell/native_raminit/train_jedec_write_leveling.c
@@ -0,0 +1,581 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include <assert.h>
+#include <console/console.h>
+#include <delay.h>
+#include <northbridge/intel/haswell/haswell.h>
+#include <types.h>
+
+#include "raminit_native.h"
+#include "ranges.h"
+
+#define JWLC_PLOT RAM_DEBUG
+#define JWRL_PLOT RAM_DEBUG
+
+static void reset_dram_dll(struct sysinfo *ctrl, const uint8_t channel, const uint8_t rank)
+{
+ const uint16_t mr0reg = ctrl->mr0[channel][rank / 2];
+ reut_issue_mrs(ctrl, channel, BIT(rank), 0, mr0reg | MR0_DLL_RESET);
+}
+
+static void program_wdb_pattern(struct sysinfo *ctrl, const bool invert)
+{
+ /* Pattern to keep DQ-DQS simple but detect any failures. Same as NHM/WSM. */
+ const uint8_t pat[4][2] = {
+ { 0x00, 0xff },
+ { 0xff, 0x00 },
+ { 0xc3, 0x3c },
+ { 0x3c, 0xc3 },
+ };
+ const uint8_t pmask[2][8] = {
+ { 0, 0, 1, 1, 1, 1, 0, 0 },
+ { 1, 1, 0, 0, 0, 0, 1, 1 },
+ };
+ for (uint8_t s = 0; s < ARRAY_SIZE(pat); s++)
+ write_wdb_fixed_pat(ctrl, pat[s], pmask[invert], ARRAY_SIZE(pmask[invert]), s);
+}
+
+static int16_t set_add_delay(uint32_t *add_delay, uint8_t rank, int8_t target_off)
+{
+ const uint8_t shift = rank * 2;
+ if (target_off > MAX_ADD_DELAY) {
+ *add_delay &= ~(3 << shift);
+ *add_delay |= MAX_ADD_DELAY << shift;
+ return 128 * (target_off - MAX_ADD_DELAY);
+ } else if (target_off < 0) {
+ *add_delay &= ~(3 << shift);
+ *add_delay |= 0 << shift;
+ return 128 * target_off;
+ } else {
+ *add_delay &= ~(3 << shift);
+ *add_delay |= target_off << shift;
+ return 0;
+ }
+}
+
+static enum raminit_status train_jedec_write_leveling_cleanup(struct sysinfo *ctrl)
+{
+ const struct reut_box reut_addr = {
+ .col = {
+ .start = 0,
+ .stop = 1023,
+ .inc_val = 1,
+ },
+ };
+ const struct wdb_pat wdb_pattern = {
+ .start_ptr = 0,
+ .stop_ptr = 3,
+ .inc_rate = 1,
+ .dq_pattern = BASIC_VA,
+ };
+ const int8_t offsets[] = { 0, 1, -1, 2, 3 };
+ const int8_t dq_offsets[] = { 0, -10, 10, -5, 5, -15, 15 };
+ const uint8_t dq_offset_max = ARRAY_SIZE(dq_offsets);
+
+ /* Set LFSR seeds to be sequential */
+ program_wdb_lfsr(ctrl, true);
+ setup_io_test(
+ ctrl,
+ ctrl->chanmap,
+ PAT_WR_RD,
+ 2,
+ 4,
+ &reut_addr,
+ NSOE,
+ &wdb_pattern,
+ 0,
+ 0);
+
+ const union reut_pat_wdb_cl_mux_cfg_reg reut_wdb_cl_mux_cfg = {
+ .mux_0_control = REUT_MUX_BTBUFFER,
+ .mux_1_control = REUT_MUX_BTBUFFER,
+ .mux_2_control = REUT_MUX_BTBUFFER,
+ .ecc_data_source_sel = 1,
+ };
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!does_ch_exist(ctrl, channel))
+ continue;
+
+ mchbar_write32(REUT_ch_PAT_WDB_CL_MUX_CFG(channel), reut_wdb_cl_mux_cfg.raw);
+ }
+
+ int8_t byte_off[NUM_CHANNELS][NUM_LANES] = { 0 };
+ uint32_t add_delay[NUM_CHANNELS] = { 0 };
+ enum raminit_status status = RAMINIT_STATUS_SUCCESS;
+ bool invert = false;
+ const uint16_t valid_byte_mask = BIT(ctrl->lanes) - 1;
+ for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) {
+ uint8_t chanmask = 0;
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++)
+ chanmask |= select_reut_ranks(ctrl, channel, BIT(rank));
+
+ if (!chanmask)
+ continue;
+
+ printk(BIOS_DEBUG, "Rank %u\n", rank);
+ printk(JWLC_PLOT, "Channel");
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!rank_in_ch(ctrl, rank, channel))
+ continue;
+
+ printk(JWLC_PLOT, "\t\t%u\t", channel);
+ }
+ printk(JWLC_PLOT, "\nByte\t");
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!rank_in_ch(ctrl, rank, channel))
+ continue;
+
+ printk(JWLC_PLOT, "\t");
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++)
+ printk(JWLC_PLOT, "%u ", byte);
+ }
+ printk(JWLC_PLOT, "\nDelay DqOffset");
+ bool done = false;
+ int8_t byte_sum[NUM_CHANNELS] = { 0 };
+ uint16_t byte_pass[NUM_CHANNELS] = { 0 };
+ for (uint8_t off = 0; off < ARRAY_SIZE(offsets); off++) {
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!rank_in_ch(ctrl, rank, channel))
+ continue;
+
+ const int16_t global_byte_off =
+ set_add_delay(&add_delay[channel], rank, offsets[off]);
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
+ update_txt(ctrl, channel, rank, byte, TXT_DQDQS_OFF,
+ global_byte_off);
+ }
+ mchbar_write32(SC_WR_ADD_DELAY_ch(channel),
+ add_delay[channel]);
+ }
+ /* Reset FIFOs and DRAM DLL (Micron workaround) */
+ if (!ctrl->lpddr) {
+ io_reset();
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!rank_in_ch(ctrl, rank, channel))
+ continue;
+
+ reset_dram_dll(ctrl, channel, rank);
+ }
+ udelay(1);
+ }
+ for (uint8_t dq_offset = 0; dq_offset < dq_offset_max; dq_offset++) {
+ printk(JWLC_PLOT, "\n% 3d\t% 3d",
+ offsets[off], dq_offsets[dq_offset]);
+ change_1d_margin_multicast(
+ ctrl,
+ WrT,
+ dq_offsets[dq_offset],
+ rank,
+ false,
+ REG_FILE_USE_RANK);
+
+ /*
+ * Re-program the WDB pattern. Change the pattern
+ * for the next test to avoid false pass issues.
+ */
+ program_wdb_pattern(ctrl, invert);
+ invert = !invert;
+ run_io_test(ctrl, chanmask, BASIC_VA, true);
+ done = true;
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!rank_in_ch(ctrl, rank, channel))
+ continue;
+
+ printk(JWLC_PLOT, "\t");
+ uint16_t result = get_byte_group_errors(channel);
+ result &= valid_byte_mask;
+
+ /* Skip bytes that have failed or already passed */
+ const uint16_t skip_me = result | byte_pass[channel];
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
+ const bool pass = result & BIT(byte);
+ printk(JWLC_PLOT, pass ? "# " : ". ");
+ if (skip_me & BIT(byte))
+ continue;
+
+ byte_pass[channel] |= BIT(byte);
+ byte_off[channel][byte] = offsets[off];
+ byte_sum[channel] += offsets[off];
+ }
+ if (byte_pass[channel] != valid_byte_mask)
+ done = false;
+ }
+ if (done)
+ break;
+ }
+ if (done)
+ break;
+ }
+ printk(BIOS_DEBUG, "\n\n");
+ if (!done) {
+ printk(BIOS_ERR, "JWLC: Could not find a pass for all bytes\n");
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!rank_in_ch(ctrl, rank, channel))
+ continue;
+
+ printk(BIOS_ERR, "Channel %u, rank %u fail:", channel, rank);
+ const uint16_t passing_mask = byte_pass[channel];
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
+ if (BIT(byte) & passing_mask)
+ continue;
+
+ printk(BIOS_ERR, " %u", byte);
+ }
+ printk(BIOS_ERR, "\n");
+ }
+ status = RAMINIT_STATUS_JWRL_FAILURE;
+ break;
+ }
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!rank_in_ch(ctrl, rank, channel))
+ continue;
+
+ /* Refine target offset to make sure it works for all bytes */
+ int8_t target_off = DIV_ROUND_CLOSEST(byte_sum[channel], ctrl->lanes);
+ int16_t global_byte_off = 0;
+ uint8_t all_good_loops = 0;
+ bool all_good = 0;
+ while (!all_good) {
+ global_byte_off =
+ set_add_delay(&add_delay[channel], rank, target_off);
+ all_good = true;
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
+ int16_t local_offset;
+ local_offset = byte_off[channel][byte] - target_off;
+ local_offset = local_offset * 128 + global_byte_off;
+ const uint16_t tx_dq = ctrl->tx_dq[channel][rank][byte];
+ if (tx_dq + local_offset >= (512 - 64)) {
+ all_good = false;
+ all_good_loops++;
+ target_off++;
+ break;
+ }
+ const uint16_t txdqs = ctrl->tx_dq[channel][rank][byte];
+ if (txdqs + local_offset < 96) {
+ all_good = false;
+ all_good_loops++;
+ target_off--;
+ break;
+ }
+ }
+ /* Avoid an infinite loop */
+ if (all_good_loops > 3)
+ break;
+ }
+ if (!all_good) {
+ printk(BIOS_ERR, "JWLC: Target offset refining failed\n");
+ status = RAMINIT_STATUS_JWRL_FAILURE;
+ break;
+ }
+ printk(BIOS_DEBUG, "C%u.R%u: Offset\tFinalEdge\n", channel, rank);
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
+ int16_t local_offset;
+ local_offset = byte_off[channel][byte] - target_off;
+ local_offset = local_offset * 128 + global_byte_off;
+ ctrl->tx_dq[channel][rank][byte] += local_offset;
+ ctrl->txdqs[channel][rank][byte] += local_offset;
+ update_txt(ctrl, channel, rank, byte, TXT_RESTORE, 0);
+ printk(BIOS_DEBUG, " B%u: %d\t%d\n", byte, local_offset,
+ ctrl->txdqs[channel][rank][byte]);
+ }
+ mchbar_write32(SC_WR_ADD_DELAY_ch(channel), add_delay[channel]);
+ if (!ctrl->lpddr) {
+ reset_dram_dll(ctrl, channel, rank);
+ udelay(1);
+ }
+ printk(BIOS_DEBUG, "\n");
+ }
+ printk(BIOS_DEBUG, "\n");
+ }
+
+ /* Restore WDB after test */
+ write_wdb_va_pat(ctrl, 0, BASIC_VA_PAT_SPREAD_8, 8, 0);
+ program_wdb_lfsr(ctrl, false);
+ mchbar_write32(DDR_DATA_OFFSET_TRAIN, 0);
+
+ /** TODO: Do full JEDEC init instead? **/
+ io_reset();
+ return status;
+}
+
+static enum raminit_status verify_wl_width(const int32_t lwidth)
+{
+ if (lwidth <= 32) {
+ /* Check if width is valid */
+ printk(BIOS_ERR, "WrLevel: Width region (%d) too small\n", lwidth);
+ return RAMINIT_STATUS_JWRL_FAILURE;
+ }
+ if (lwidth >= 96) {
+ /* Since we're calibrating a phase, a too large region is a problem */
+ printk(BIOS_ERR, "WrLevel: Width region (%d) too large\n", lwidth);
+ return RAMINIT_STATUS_JWRL_FAILURE;
+ }
+ return 0;
+}
+
+enum raminit_status train_jedec_write_leveling(struct sysinfo *ctrl)
+{
+ /*
+ * Enabling WL mode causes DQS to toggle for 1024 QCLK.
+ * Wait for this to stop. Round up to nearest microsecond.
+ */
+ const bool wl_long_delay = ctrl->lpddr;
+ const uint32_t dqs_toggle_time = wl_long_delay ? 2048 : 1024;
+ const uint32_t wait_time_us = DIV_ROUND_UP(ctrl->qclkps * dqs_toggle_time, 1000 * 1000);
+
+ const uint16_t wl_start = 192;
+ const uint16_t wl_stop = 192 + 128;
+ const uint16_t wl_step = 2;
+
+ /* Do not use cached MR values */
+ const bool save_restore_mrs = ctrl->restore_mrs;
+ ctrl->restore_mrs = 0;
+
+ /* Propagate delay values (without a write command) */
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!does_ch_exist(ctrl, channel))
+ continue;
+
+ /* Propagate delay values from rank 0 to prevent assertion failures in RTL */
+ union ddr_data_control_0_reg data_control_0 = {
+ .raw = ctrl->dq_control_0[channel],
+ };
+ data_control_0.read_rf_rd = 0;
+ data_control_0.read_rf_wr = 1;
+ data_control_0.read_rf_rank = 0;
+ mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), data_control_0.raw);
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
+ union ddr_data_control_2_reg data_control_2 = {
+ .raw = ctrl->dq_control_2[channel][byte],
+ };
+ data_control_2.force_bias_on = 1;
+ data_control_2.force_rx_on = 0;
+ data_control_2.wl_long_delay = wl_long_delay;
+ mchbar_write32(DQ_CONTROL_2(channel, byte), data_control_2.raw);
+ }
+ }
+
+ if (ctrl->lpddr)
+ die("%s: Missing LPDDR support\n", __func__);
+
+ if (!ctrl->lpddr)
+ ddr3_program_mr1(ctrl, 0, 1);
+
+ enum raminit_status status = RAMINIT_STATUS_SUCCESS;
+ struct phase_train_data region_data[NUM_CHANNELS][NUM_LANES] = { 0 };
+ for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) {
+ if (!does_rank_exist(ctrl, rank))
+ continue;
+
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!rank_in_ch(ctrl, rank, channel))
+ continue;
+
+ /** TODO: Differs for LPDDR **/
+ uint16_t mr1reg = ctrl->mr1[channel][rank / 2];
+ mr1reg &= ~MR1_QOFF_ENABLE;
+ mr1reg |= MR1_WL_ENABLE;
+ if (is_hsw_ult()) {
+ mr1reg &= ~RTTNOM_MASK;
+ mr1reg |= encode_ddr3_rttnom(120);
+ } else if (ctrl->dpc[channel] == 2) {
+ mr1reg &= ~RTTNOM_MASK;
+ mr1reg |= encode_ddr3_rttnom(60);
+ }
+ reut_issue_mrs(ctrl, channel, BIT(rank), 1, mr1reg);
+
+ /* Assert ODT for myself */
+ uint8_t odt_matrix = BIT(rank);
+ if (ctrl->dpc[channel] == 2) {
+ /* Assert ODT for non-target DIMM */
+ const uint8_t other_dimm = ((rank + 2) / 2) & 1;
+ odt_matrix |= BIT(2 * other_dimm);
+ }
+
+ union reut_misc_odt_ctrl_reg reut_misc_odt_ctrl = {
+ .raw = 0,
+ };
+ if (ctrl->lpddr) {
+ /* Only one ODT pin for ULT */
+ reut_misc_odt_ctrl.odt_on = 1;
+ reut_misc_odt_ctrl.odt_override = 1;
+ } else if (!is_hsw_ult()) {
+ reut_misc_odt_ctrl.odt_on = odt_matrix;
+ reut_misc_odt_ctrl.odt_override = 0xf;
+ }
+ mchbar_write32(REUT_ch_MISC_ODT_CTRL(channel), reut_misc_odt_ctrl.raw);
+ }
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!rank_in_ch(ctrl, rank, channel))
+ continue;
+
+ /*
+ * Enable write leveling mode in DDR and propagate delay
+ * values (without a write command). Stay in WL mode.
+ */
+ union ddr_data_control_0_reg data_control_0 = {
+ .raw = ctrl->dq_control_0[channel],
+ };
+ data_control_0.wl_training_mode = 1;
+ data_control_0.tx_pi_on = 1;
+ data_control_0.read_rf_rd = 0;
+ data_control_0.read_rf_wr = 1;
+ data_control_0.read_rf_rank = rank;
+ mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), data_control_0.raw);
+ }
+ printk(BIOS_DEBUG, "\nRank %u\n", rank);
+ printk(JWRL_PLOT, "Channel\t");
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!rank_in_ch(ctrl, rank, channel))
+ continue;
+
+ printk(JWRL_PLOT, "%u", channel);
+ if (channel > 0)
+ continue;
+
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++)
+ printk(JWRL_PLOT, "\t");
+ }
+ printk(JWRL_PLOT, "\nByte");
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!rank_in_ch(ctrl, rank, channel))
+ continue;
+
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++)
+ printk(JWRL_PLOT, "\t%u", byte);
+ }
+ printk(JWRL_PLOT, "\nWlDelay");
+ for (uint16_t wl_delay = wl_start; wl_delay < wl_stop; wl_delay += wl_step) {
+ printk(JWRL_PLOT, "\n %3u:", wl_delay);
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!rank_in_ch(ctrl, rank, channel))
+ continue;
+
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
+ update_txt(ctrl, channel, rank, byte, TXT_TXDQS,
+ wl_delay);
+ }
+ }
+ /* Wait for the first burst to finish */
+ if (wl_delay == wl_start)
+ udelay(wait_time_us);
+
+ io_reset();
+ udelay(wait_time_us);
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!rank_in_ch(ctrl, rank, channel))
+ continue;
+
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
+ const uint32_t feedback =
+ get_data_train_feedback(channel, byte);
+ const bool pass = (feedback & 0x1ff) >= 16;
+ printk(JWRL_PLOT, "\t%c%u", pass ? '.' : '#', feedback);
+ phase_record_pass(
+ &region_data[channel][byte],
+ pass,
+ wl_delay,
+ wl_start,
+ wl_step);
+ }
+ }
+ }
+ printk(JWRL_PLOT, "\n");
+ printk(BIOS_DEBUG, "\n\tInitSt\tInitEn\tCurrSt\tCurrEn\tLargSt\tLargEn\n");
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!rank_in_ch(ctrl, rank, channel))
+ continue;
+
+ printk(BIOS_DEBUG, "C%u\n", channel);
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
+ struct phase_train_data *data = &region_data[channel][byte];
+
+ phase_append_initial_to_current(data, wl_start, wl_step);
+ printk(BIOS_DEBUG, " B%u:\t%d\t%d\t%d\t%d\t%d\t%d\n",
+ byte,
+ data->initial.start,
+ data->initial.end,
+ data->current.start,
+ data->current.end,
+ data->largest.start,
+ data->largest.end);
+ }
+ }
+
+ /*
+ * Clean up after test. Very coarsely adjust for
+ * any cycle errors. Program values for TxDQS.
+ */
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!rank_in_ch(ctrl, rank, channel))
+ continue;
+
+ /* Clear ODT before MRS (JEDEC spec) */
+ mchbar_write32(REUT_ch_MISC_ODT_CTRL(channel), 0);
+
+ /** TODO: Differs for LPDDR **/
+ const uint16_t mr1reg = ctrl->mr1[channel][rank / 2] | MR1_QOFF_ENABLE;
+ reut_issue_mrs(ctrl, channel, BIT(rank), 1, mr1reg);
+
+ printk(BIOS_DEBUG, "\nC%u.R%u: LftEdge Width\n", channel, rank);
+ const bool rank_x16 = ctrl->dimms[channel][rank / 2].data.width == 16;
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
+ struct phase_train_data *data = &region_data[channel][byte];
+ const int32_t lwidth = range_width(data->largest);
+ int32_t tx_start = data->largest.start;
+ printk(BIOS_DEBUG, " B%u: %d\t%d\n", byte, tx_start, lwidth);
+ status = verify_wl_width(lwidth);
+ if (status) {
+ printk(BIOS_ERR,
+ "WrLevel problems on channel %u, byte %u\n",
+ channel, byte);
+ goto clean_up;
+ }
+
+ /* Align byte pairs if DIMM is x16 */
+ if (rank_x16 && (byte & 1)) {
+ const struct phase_train_data *const ref_data =
+ &region_data[channel][byte - 1];
+
+ if (tx_start > ref_data->largest.start + 64)
+ tx_start -= 128;
+
+ if (tx_start < ref_data->largest.start - 64)
+ tx_start += 128;
+ }
+
+ /* Fix for b4618067 - need to add 1 QCLK to DQS PI */
+ if (is_hsw_ult())
+ tx_start += 64;
+
+ assert(tx_start >= 0);
+ ctrl->txdqs[channel][rank][byte] = tx_start;
+ ctrl->tx_dq[channel][rank][byte] = tx_start + 32;
+ update_txt(ctrl, channel, rank, byte, TXT_RESTORE, 0);
+ }
+ }
+ printk(BIOS_DEBUG, "\n");
+ }
+
+clean_up:
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!does_ch_exist(ctrl, channel))
+ continue;
+
+ mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), ctrl->dq_control_0[channel]);
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
+ mchbar_write32(DQ_CONTROL_2(channel, byte),
+ ctrl->dq_control_2[channel][byte]);
+ }
+ }
+ if (!ctrl->lpddr)
+ ddr3_program_mr1(ctrl, 0, 0);
+
+ ctrl->restore_mrs = save_restore_mrs;
+
+ if (status)
+ return status;
+
+ /** TODO: If this step fails and dec_wrd is set, clear it and try again **/
+ return train_jedec_write_leveling_cleanup(ctrl);
+}
diff --git a/src/northbridge/intel/haswell/registers/mchbar.h b/src/northbridge/intel/haswell/registers/mchbar.h
index 6a31d3a32c..7c0b5a49de 100644
--- a/src/northbridge/intel/haswell/registers/mchbar.h
+++ b/src/northbridge/intel/haswell/registers/mchbar.h
@@ -121,6 +121,8 @@
#define REUT_ch_ERR_DATA_MASK(ch) _MCMAIN_C(0x40d8, ch)
+#define REUT_ch_ERR_MISC_STATUS(ch) _MCMAIN_C(0x40e8, ch)
+
#define REUT_ch_MISC_CKE_CTRL(ch) _MCMAIN_C(0x4190, ch)
#define REUT_ch_MISC_ODT_CTRL(ch) _MCMAIN_C(0x4194, ch)
#define REUT_ch_MISC_PAT_CADB_CTRL(ch) _MCMAIN_C(0x4198, ch)
--
2.39.5

View File

@ -0,0 +1,573 @@
From a2f059c87e66111b9333e21b205726813a8604c9 Mon Sep 17 00:00:00 2001
From: Angel Pons <th3fanbus@gmail.com>
Date: Sun, 8 May 2022 14:29:05 +0200
Subject: [PATCH 22/24] haswell NRI: Add final raminit steps
Implement the remaining raminit steps. Although many training steps are
missing, this is enough to boot on the Asrock B85M Pro4.
Change-Id: I94f3b65f0218d4da4fda4d84592dfd91f77f8f21
Signed-off-by: Angel Pons <th3fanbus@gmail.com>
Reviewed-on: https://review.coreboot.org/c/coreboot/+/64198
Reviewed-by: Maximilian Brune <maximilian.brune@9elements.com>
Tested-by: build bot (Jenkins) <no-reply@coreboot.org>
---
src/northbridge/intel/haswell/Kconfig | 4 +-
.../intel/haswell/native_raminit/Makefile.mk | 1 +
.../haswell/native_raminit/activate_mc.c | 388 ++++++++++++++++++
.../haswell/native_raminit/raminit_main.c | 5 +-
.../haswell/native_raminit/raminit_native.c | 5 +-
.../haswell/native_raminit/raminit_native.h | 2 +
.../haswell/native_raminit/reg_structs.h | 12 +
.../intel/haswell/registers/mchbar.h | 7 +
8 files changed, 416 insertions(+), 8 deletions(-)
create mode 100644 src/northbridge/intel/haswell/native_raminit/activate_mc.c
diff --git a/src/northbridge/intel/haswell/Kconfig b/src/northbridge/intel/haswell/Kconfig
index 35403373e7..6191cb6ccf 100644
--- a/src/northbridge/intel/haswell/Kconfig
+++ b/src/northbridge/intel/haswell/Kconfig
@@ -11,12 +11,12 @@ config NORTHBRIDGE_INTEL_HASWELL
if NORTHBRIDGE_INTEL_HASWELL
config USE_NATIVE_RAMINIT
- bool "[NOT WORKING] Use native raminit"
+ bool "[NOT COMPLETE] Use native raminit"
default n
select HAVE_DEBUG_RAM_SETUP
help
Select if you want to use coreboot implementation of raminit rather than
- MRC.bin. Currently incomplete and does not boot.
+ MRC.bin. Currently incomplete and does not support S3 resume.
config HASWELL_VBOOT_IN_BOOTBLOCK
depends on VBOOT
diff --git a/src/northbridge/intel/haswell/native_raminit/Makefile.mk b/src/northbridge/intel/haswell/native_raminit/Makefile.mk
index 40c2f5e014..d97da72890 100644
--- a/src/northbridge/intel/haswell/native_raminit/Makefile.mk
+++ b/src/northbridge/intel/haswell/native_raminit/Makefile.mk
@@ -1,5 +1,6 @@
## SPDX-License-Identifier: GPL-2.0-or-later
+romstage-y += activate_mc.c
romstage-y += change_margin.c
romstage-y += configure_mc.c
romstage-y += ddr3.c
diff --git a/src/northbridge/intel/haswell/native_raminit/activate_mc.c b/src/northbridge/intel/haswell/native_raminit/activate_mc.c
new file mode 100644
index 0000000000..78a7ad27ef
--- /dev/null
+++ b/src/northbridge/intel/haswell/native_raminit/activate_mc.c
@@ -0,0 +1,388 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include <console/console.h>
+#include <delay.h>
+#include <device/pci_ops.h>
+#include <northbridge/intel/haswell/haswell.h>
+#include <timer.h>
+#include <types.h>
+
+#include "raminit_native.h"
+
+static void update_internal_clocks_on(struct sysinfo *ctrl)
+{
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!does_ch_exist(ctrl, channel))
+ continue;
+
+ bool clocks_on = false;
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
+ const union ddr_data_control_1_reg data_control_1 = {
+ .raw = ctrl->dq_control_1[channel][byte],
+ };
+ const int8_t o_on = data_control_1.odt_delay;
+ const int8_t s_on = data_control_1.sense_amp_delay;
+ const int8_t o_off = data_control_1.odt_duration;
+ const int8_t s_off = data_control_1.sense_amp_duration;
+ if (o_on + o_off >= 7 || s_on + s_off >= 7) {
+ clocks_on = true;
+ break;
+ }
+ }
+ union ddr_data_control_0_reg data_control_0 = {
+ .raw = ctrl->dq_control_0[channel],
+ };
+ data_control_0.internal_clocks_on = clocks_on;
+ ctrl->dq_control_0[channel] = data_control_0.raw;
+ mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), data_control_0.raw);
+ }
+}
+
+/* Switch off unused segments of the SDLL to save power */
+static void update_sdll_length(struct sysinfo *ctrl)
+{
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!does_ch_exist(ctrl, channel))
+ continue;
+
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
+ uint8_t max_pi = 0;
+ for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) {
+ if (!rank_in_ch(ctrl, rank, channel))
+ continue;
+
+ const uint8_t rx_dqs_p = ctrl->rxdqsp[channel][rank][byte];
+ const uint8_t rx_dqs_n = ctrl->rxdqsn[channel][rank][byte];
+ max_pi = MAX(max_pi, MAX(rx_dqs_p, rx_dqs_n));
+ }
+ /* Update SDLL length for power savings */
+ union ddr_data_control_1_reg data_control_1 = {
+ .raw = ctrl->dq_control_1[channel][byte],
+ };
+ /* Calculate which segments to turn off */
+ data_control_1.sdll_segment_disable = (7 - (max_pi >> 3)) & ~1;
+ ctrl->dq_control_1[channel][byte] = data_control_1.raw;
+ mchbar_write32(DQ_CONTROL_1(channel, byte), data_control_1.raw);
+ }
+ }
+}
+
+static void set_rx_clk_stg_num(struct sysinfo *ctrl, const uint8_t channel)
+{
+ const uint8_t rcven_drift = ctrl->lpddr ? DIV_ROUND_UP(tDQSCK_DRIFT, ctrl->qclkps) : 1;
+ uint8_t max_rcven = 0;
+ for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) {
+ if (!rank_in_ch(ctrl, rank, channel))
+ continue;
+
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++)
+ max_rcven = MAX(max_rcven, ctrl->rcven[channel][rank][byte] / 64);
+ }
+ const union ddr_data_control_1_reg ddr_data_control_1 = {
+ .raw = ctrl->dq_control_1[channel][0],
+ };
+ const bool lpddr_long_odt = ddr_data_control_1.lpddr_long_odt_en;
+ const uint8_t rcven_turnoff = max_rcven + 18 + 2 * rcven_drift + lpddr_long_odt;
+ const union ddr_data_control_0_reg ddr_data_control_0 = {
+ .raw = ctrl->dq_control_0[channel],
+ };
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
+ union ddr_data_control_2_reg ddr_data_control_2 = {
+ .raw = ctrl->dq_control_2[channel][byte],
+ };
+ if (ddr_data_control_0.odt_samp_extend_en) {
+ if (ddr_data_control_2.rx_clk_stg_num < rcven_turnoff)
+ ddr_data_control_2.rx_clk_stg_num = rcven_turnoff;
+ } else {
+ const int8_t o_on = ddr_data_control_1.odt_delay;
+ const int8_t o_off = ddr_data_control_1.odt_duration;
+ ddr_data_control_2.rx_clk_stg_num = MAX(17, o_on + o_off + 14);
+ }
+ ctrl->dq_control_2[channel][byte] = ddr_data_control_2.raw;
+ mchbar_write32(DQ_CONTROL_2(channel, byte), ddr_data_control_2.raw);
+ }
+}
+
+#define SELF_REFRESH_IDLE_COUNT 0x200
+
+static void enter_sr(void)
+{
+ mchbar_write32(PM_SREF_CONFIG, SELF_REFRESH_IDLE_COUNT | BIT(16));
+ udelay(1);
+}
+
+enum power_down_mode {
+ PDM_NO_PD = 0,
+ PDM_APD = 1,
+ PDM_PPD = 2,
+ PDM_PPD_DLL_OFF = 6,
+};
+
+static void power_down_config(struct sysinfo *ctrl)
+{
+ const enum power_down_mode pd_mode = ctrl->lpddr ? PDM_PPD : PDM_PPD_DLL_OFF;
+ mchbar_write32(PM_PDWN_CONFIG, pd_mode << 12 | 0x40);
+}
+
+static void train_power_modes_post(struct sysinfo *ctrl)
+{
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!does_ch_exist(ctrl, channel))
+ continue;
+
+ /* Adjust tCPDED and tPRPDEN */
+ if (ctrl->mem_clock_mhz >= 933)
+ ctrl->tc_bankrank_d[channel].tCPDED = 2;
+
+ if (ctrl->mem_clock_mhz >= 1066)
+ ctrl->tc_bankrank_d[channel].tPRPDEN = 2;
+
+ mchbar_write32(TC_BANK_RANK_D_ch(channel), ctrl->tc_bankrank_d[channel].raw);
+ }
+ power_down_config(ctrl);
+ mchbar_write32(MCDECS_CBIT, BIT(30)); /* dis_msg_clk_gate */
+}
+
+static uint8_t compute_burst_end_odt_delay(const struct sysinfo *const ctrl)
+{
+ /* Must be disabled for LPDDR */
+ if (ctrl->lpddr)
+ return 0;
+
+ const uint8_t beod = MIN(7, DIV_ROUND_CLOSEST(14300 * 20 / 100, ctrl->qclkps));
+ if (beod < 3)
+ return 0;
+
+ if (beod < 4)
+ return 4;
+
+ return beod;
+}
+
+static void program_burst_end_odt_delay(struct sysinfo *ctrl)
+{
+ /* Program burst_end_odt_delay - it should be zero during training steps */
+ const uint8_t beod = compute_burst_end_odt_delay(ctrl);
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!does_ch_exist(ctrl, channel))
+ continue;
+
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
+ union ddr_data_control_1_reg ddr_data_control_1 = {
+ .raw = ctrl->dq_control_1[channel][byte],
+ };
+ ddr_data_control_1.burst_end_odt_delay = beod;
+ ctrl->dq_control_1[channel][byte] = ddr_data_control_1.raw;
+ mchbar_write32(DQ_CONTROL_1(channel, byte), ddr_data_control_1.raw);
+ }
+ }
+}
+
+/*
+ * Return a random value to use for scrambler seeds. Try to use RDRAND
+ * first and fall back to hardcoded values if RDRAND does not succeed.
+ */
+static uint16_t get_random_number(const uint8_t channel)
+{
+ /* The RDRAND instruction is only available 100k cycles after reset */
+ for (size_t i = 0; i < 100000; i++) {
+ uint32_t status;
+ uint32_t random;
+ /** TODO: Clean up asm **/
+ __asm__ __volatile__(
+ "\n\t .byte 0x0F, 0xC7, 0xF0"
+ "\n\t movl %%eax, %0"
+ "\n\t pushf"
+ "\n\t pop %%eax"
+ "\n\t movl %%eax, %1"
+ : "=m"(random),
+ "=m"(status)
+ : /* No inputs */
+ : "eax", "cc");
+
+ /* Only consider non-zero random values as valid */
+ if (status & 1 && random)
+ return random;
+ }
+
+ /* https://xkcd.com/221 */
+ if (channel)
+ return 0x28f4;
+ else
+ return 0x893e;
+}
+
+/* Work around "error: 'typeof' applied to a bit-field" */
+static inline uint32_t max(const uint32_t a, const uint32_t b)
+{
+ return MAX(a, b);
+}
+
+enum raminit_status activate_mc(struct sysinfo *ctrl)
+{
+ const bool enable_scrambling = true;
+ const bool enable_cmd_tristate = true;
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!does_ch_exist(ctrl, channel))
+ continue;
+
+ if (enable_scrambling && ctrl->stepping < STEPPING_C0) {
+ /* Make sure tRDRD_(sr, dr, dd) are at least 6 for scrambler W/A */
+ union tc_bank_rank_a_reg tc_bank_rank_a = {
+ .raw = mchbar_read32(TC_BANK_RANK_A_ch(channel)),
+ };
+ tc_bank_rank_a.tRDRD_sr = max(tc_bank_rank_a.tRDRD_sr, 6);
+ tc_bank_rank_a.tRDRD_dr = max(tc_bank_rank_a.tRDRD_dr, 6);
+ tc_bank_rank_a.tRDRD_dd = max(tc_bank_rank_a.tRDRD_dd, 6);
+ mchbar_write32(TC_BANK_RANK_A_ch(channel), tc_bank_rank_a.raw);
+ }
+ if (enable_scrambling) {
+ const union ddr_scramble_reg ddr_scramble = {
+ .scram_key = get_random_number(channel),
+ .scram_en = 1,
+ };
+ mchbar_write32(DDR_SCRAMBLE_ch(channel), ddr_scramble.raw);
+ }
+ if (ctrl->tCMD == 1) {
+ /* If we are in 1N mode, enable and set command rate limit to 3 */
+ union mcmain_command_rate_limit_reg cmd_rate_limit = {
+ .raw = mchbar_read32(COMMAND_RATE_LIMIT_ch(channel)),
+ };
+ cmd_rate_limit.enable_cmd_limit = 1;
+ cmd_rate_limit.cmd_rate_limit = 3;
+ mchbar_write32(COMMAND_RATE_LIMIT_ch(channel), cmd_rate_limit.raw);
+ }
+ if (enable_cmd_tristate) {
+ /* Enable command tri-state at the end of training */
+ union tc_bank_rank_a_reg tc_bank_rank_a = {
+ .raw = mchbar_read32(TC_BANK_RANK_A_ch(channel)),
+ };
+ tc_bank_rank_a.cmd_3st_dis = 0;
+ mchbar_write32(TC_BANK_RANK_A_ch(channel), tc_bank_rank_a.raw);
+ }
+ /* Set MC to normal mode and clean the ODT and CKE */
+ mchbar_write32(REUT_ch_SEQ_CFG(channel), REUT_MODE_NOP << 12);
+ /* Set again the rank occupancy */
+ mchbar_write8(MC_INIT_STATE_ch(channel), ctrl->rankmap[channel]);
+ if (ctrl->is_ecc) {
+ /* Enable ECC I/O and logic */
+ union mad_dimm_reg mad_dimm = {
+ .raw = mchbar_read32(MAD_DIMM(channel)),
+ };
+ mad_dimm.ecc_mode = 3;
+ mchbar_write32(MAD_DIMM(channel), mad_dimm.raw);
+ }
+ }
+
+ if (!is_hsw_ult())
+ update_internal_clocks_on(ctrl);
+
+ update_sdll_length(ctrl);
+
+ program_burst_end_odt_delay(ctrl);
+
+ if (is_hsw_ult()) {
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!does_ch_exist(ctrl, channel))
+ continue;
+
+ set_rx_clk_stg_num(ctrl, channel);
+ }
+ /** TODO: Program DDRPL_CR_DDR_TX_DELAY if Memory Trace is enabled **/
+ }
+
+ /* Enable periodic COMP */
+ mchbar_write32(M_COMP, (union pcu_comp_reg) {
+ .comp_interval = COMP_INT,
+ }.raw);
+
+ /* Enable the power mode before PCU starts working */
+ train_power_modes_post(ctrl);
+
+ /* Set idle timer and self refresh enable bits */
+ enter_sr();
+
+ /** FIXME: Do not hardcode power weights and RAPL settings **/
+ mchbar_write32(0x5888, 0x00000d0d);
+ mchbar_write32(0x5884, 0x00000004); /* 58.2 pJ */
+
+ mchbar_write32(0x58e0, 0);
+ mchbar_write32(0x58e4, 0);
+
+ mchbar_write32(0x5890, 0xffff);
+ mchbar_write32(0x5894, 0xffff);
+ mchbar_write32(0x5898, 0xffff);
+ mchbar_write32(0x589c, 0xffff);
+ mchbar_write32(0x58d0, 0xffff);
+ mchbar_write32(0x58d4, 0xffff);
+ mchbar_write32(0x58d8, 0xffff);
+ mchbar_write32(0x58dc, 0xffff);
+
+ /* Overwrite thermal parameters */
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ mchbar_write32(_MCMAIN_C(0x42ec, channel), 0x0000000f);
+ mchbar_write32(_MCMAIN_C(0x42f0, channel), 0x00000009);
+ mchbar_write32(_MCMAIN_C(0x42f4, channel), 0x00000093);
+ mchbar_write32(_MCMAIN_C(0x42f8, channel), 0x00000087);
+ mchbar_write32(_MCMAIN_C(0x42fc, channel), 0x000000de);
+
+ /** TODO: Differs for LPDDR **/
+ mchbar_write32(PM_THRT_CKE_MIN_ch(channel), 0x30);
+ }
+ mchbar_write32(PCU_DDR_PTM_CTL, 0x40);
+ return RAMINIT_STATUS_SUCCESS;
+}
+
+static void mc_lockdown(void)
+{
+ /* Lock memory controller registers */
+ mchbar_write32(MC_LOCK, 0x8f);
+
+ /* MPCOHTRK_GDXC_OCLA_ADDRESS_HI_LOCK is set when programming the memory map */
+
+ /* Lock memory map registers */
+ pci_or_config16(HOST_BRIDGE, GGC, 1 << 0);
+ pci_or_config32(HOST_BRIDGE, DPR, 1 << 0);
+ pci_or_config32(HOST_BRIDGE, MESEG_LIMIT, 1 << 10);
+ pci_or_config32(HOST_BRIDGE, REMAPBASE, 1 << 0);
+ pci_or_config32(HOST_BRIDGE, REMAPLIMIT, 1 << 0);
+ pci_or_config32(HOST_BRIDGE, TOM, 1 << 0);
+ pci_or_config32(HOST_BRIDGE, TOUUD, 1 << 0);
+ pci_or_config32(HOST_BRIDGE, BDSM, 1 << 0);
+ pci_or_config32(HOST_BRIDGE, BGSM, 1 << 0);
+ pci_or_config32(HOST_BRIDGE, TOLUD, 1 << 0);
+}
+
+enum raminit_status raminit_done(struct sysinfo *ctrl)
+{
+ union mc_init_state_g_reg mc_init_state_g = {
+ .raw = mchbar_read32(MC_INIT_STATE_G),
+ };
+ mc_init_state_g.refresh_enable = 1;
+ mc_init_state_g.pu_mrc_done = 1;
+ mc_init_state_g.mrc_done = 1;
+ mchbar_write32(MC_INIT_STATE_G, mc_init_state_g.raw);
+
+ /* Lock the memory controller to enable normal operation */
+ mc_lockdown();
+
+ /* Poll for mc_init_done_ack to make sure memory initialization is complete */
+ printk(BIOS_DEBUG, "Waiting for mc_init_done acknowledgement... ");
+
+ struct stopwatch timer;
+ stopwatch_init_msecs_expire(&timer, 2000);
+ do {
+ mc_init_state_g.raw = mchbar_read32(MC_INIT_STATE_G);
+
+ /* DRAM will NOT work without the acknowledgement. There is no hope. */
+ if (stopwatch_expired(&timer))
+ die("\nTimed out waiting for mc_init_done acknowledgement\n");
+
+ } while (mc_init_state_g.mc_init_done_ack == 0);
+ printk(BIOS_DEBUG, "DONE!\n");
+
+ /* Provide some data for the graphics driver. Yes, it's hardcoded. */
+ mchbar_write32(SSKPD + 0, 0x05a2404f);
+ mchbar_write32(SSKPD + 4, 0x140000a0);
+ return RAMINIT_STATUS_SUCCESS;
+}
diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_main.c b/src/northbridge/intel/haswell/native_raminit/raminit_main.c
index b947ecbd46..2073b54782 100644
--- a/src/northbridge/intel/haswell/native_raminit/raminit_main.c
+++ b/src/northbridge/intel/haswell/native_raminit/raminit_main.c
@@ -64,6 +64,8 @@ static const struct task_entry cold_boot[] = {
{ train_receive_enable, true, "RCVET", },
{ train_read_mpr, true, "RDMPRT", },
{ train_jedec_write_leveling, true, "JWRL", },
+ { activate_mc, true, "ACTIVATE", },
+ { raminit_done, true, "RAMINITEND", },
};
/* Return a generic stepping value to make stepping checks simpler */
@@ -144,7 +146,4 @@ void raminit_main(const enum raminit_boot_mode bootmode)
if (status != RAMINIT_STATUS_SUCCESS)
die("Memory initialization was met with utmost failure and misery\n");
-
- /** TODO: Implement the required magic **/
- die("NATIVE RAMINIT: More Magic (tm) required.\n");
}
diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.c b/src/northbridge/intel/haswell/native_raminit/raminit_native.c
index 2fed93de5b..5f7ceec222 100644
--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.c
+++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.c
@@ -199,8 +199,6 @@ void perform_raminit(const int s3resume)
else
me_status = ME_INIT_STATUS_SUCCESS;
- /** TODO: Remove this once raminit is implemented **/
- me_status = ME_INIT_STATUS_ERROR;
intel_early_me_init_done(me_status);
}
@@ -214,7 +212,8 @@ void perform_raminit(const int s3resume)
}
/* Save training data on non-S3 resumes */
- if (!s3resume)
+ /** TODO: Enable this once training data is populated **/
+ if (0 && !s3resume)
save_mrc_data(&md);
/** TODO: setup_sdram_meminfo **/
diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.h b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
index d6b11b9d3c..a0a913f926 100644
--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.h
+++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
@@ -448,6 +448,8 @@ enum raminit_status do_jedec_init(struct sysinfo *ctrl);
enum raminit_status train_receive_enable(struct sysinfo *ctrl);
enum raminit_status train_read_mpr(struct sysinfo *ctrl);
enum raminit_status train_jedec_write_leveling(struct sysinfo *ctrl);
+enum raminit_status activate_mc(struct sysinfo *ctrl);
+enum raminit_status raminit_done(struct sysinfo *ctrl);
void configure_timings(struct sysinfo *ctrl);
void configure_refresh(struct sysinfo *ctrl);
diff --git a/src/northbridge/intel/haswell/native_raminit/reg_structs.h b/src/northbridge/intel/haswell/native_raminit/reg_structs.h
index a0e36ed082..0d9aaa1f7c 100644
--- a/src/northbridge/intel/haswell/native_raminit/reg_structs.h
+++ b/src/northbridge/intel/haswell/native_raminit/reg_structs.h
@@ -294,6 +294,18 @@ union ddr_cke_ctl_controls_reg {
uint32_t raw;
};
+union ddr_scramble_reg {
+ struct __packed {
+ uint32_t scram_en : 1; // Bits 0:0
+ uint32_t scram_key : 16; // Bits 16:1
+ uint32_t clk_gate_ab : 2; // Bits 18:17
+ uint32_t clk_gate_c : 2; // Bits 20:19
+ uint32_t en_dbi_ab : 1; // Bits 21:21
+ uint32_t : 10; // Bits 31:17
+ };
+ uint32_t raw;
+};
+
union ddr_scram_misc_control_reg {
struct __packed {
uint32_t wl_wake_cycles : 2; // Bits 1:0
diff --git a/src/northbridge/intel/haswell/registers/mchbar.h b/src/northbridge/intel/haswell/registers/mchbar.h
index 7c0b5a49de..49a215aa71 100644
--- a/src/northbridge/intel/haswell/registers/mchbar.h
+++ b/src/northbridge/intel/haswell/registers/mchbar.h
@@ -20,6 +20,7 @@
#define DDR_DATA_TRAIN_FEEDBACK(ch, byte) _DDRIO_C_R_B(0x0054, ch, 0, byte)
+#define DQ_CONTROL_1(ch, byte) _DDRIO_C_R_B(0x0060, ch, 0, byte)
#define DQ_CONTROL_2(ch, byte) _DDRIO_C_R_B(0x0064, ch, 0, byte)
#define DDR_DATA_OFFSET_TRAIN_ch_b(ch, byte) _DDRIO_C_R_B(0x0070, ch, 0, byte)
#define DQ_CONTROL_0(ch, byte) _DDRIO_C_R_B(0x0074, ch, 0, byte)
@@ -147,6 +148,8 @@
#define QCLK_ch_LDAT_SDAT(ch) _MCMAIN_C(0x42d4, ch)
#define QCLK_ch_LDAT_DATA_IN_x(ch, x) _MCMAIN_C_X(0x42dc, ch, x) /* x in 0 .. 1 */
+#define PM_THRT_CKE_MIN_ch(ch) _MCMAIN_C(0x4328, ch)
+
#define REUT_GLOBAL_CTL 0x4800
#define REUT_GLOBAL_ERR 0x4804
@@ -175,6 +178,8 @@
#define MCSCHEDS_DFT_MISC 0x4c30
+#define PM_PDWN_CONFIG 0x4cb0
+
#define REUT_ERR_DATA_STATUS 0x4ce0
#define REUT_MISC_CKE_CTRL 0x4d90
@@ -186,8 +191,10 @@
#define MAD_CHNL 0x5000 /* Address Decoder Channel Configuration */
#define MAD_DIMM(ch) (0x5004 + (ch) * 4)
#define MAD_ZR 0x5014
+#define MCDECS_CBIT 0x501c
#define MC_INIT_STATE_G 0x5030
#define MRC_REVISION 0x5034 /* MRC Revision */
+#define PM_SREF_CONFIG 0x5060
#define RCOMP_TIMER 0x5084
--
2.39.5

View File

@ -0,0 +1,725 @@
From b6d3af1361681d91d157ba42cfe25232a267aa0b Mon Sep 17 00:00:00 2001
From: Angel Pons <th3fanbus@gmail.com>
Date: Sat, 13 Apr 2024 01:16:30 +0200
Subject: [PATCH 23/24] haswell NRI: Implement fast boot path
When the memory configuration hasn't changed, there is no need to do
full memory training. Instead, boot firmware can use saved training
data to reinitialise the memory controller and memory.
Unlike native RAM init for other platforms, Haswell does not save the
main structure (the "mighty ctrl" struct) to flash. Instead, separate
structures define the data to be saved, which can be smaller than the
main structure.
This makes S3 suspend and resume work: RAM contents MUST be preserved
for a S3 resume to succeed, but RAM training destroys RAM contents.
Change-Id: I06f6cd39ceecdca104fae89159f28e85cf7ff4e6
Signed-off-by: Angel Pons <th3fanbus@gmail.com>
Reviewed-on: https://review.coreboot.org/c/coreboot/+/81897
Tested-by: build bot (Jenkins) <no-reply@coreboot.org>
Reviewed-by: Maximilian Brune <maximilian.brune@9elements.com>
---
.../intel/haswell/native_raminit/Makefile.mk | 1 +
.../haswell/native_raminit/activate_mc.c | 17 +
.../intel/haswell/native_raminit/ddr3.c | 41 ++
.../haswell/native_raminit/raminit_main.c | 34 +-
.../haswell/native_raminit/raminit_native.c | 30 +-
.../haswell/native_raminit/raminit_native.h | 18 +
.../haswell/native_raminit/save_restore.c | 387 ++++++++++++++++++
7 files changed, 504 insertions(+), 24 deletions(-)
create mode 100644 src/northbridge/intel/haswell/native_raminit/save_restore.c
diff --git a/src/northbridge/intel/haswell/native_raminit/Makefile.mk b/src/northbridge/intel/haswell/native_raminit/Makefile.mk
index d97da72890..8fdd17c542 100644
--- a/src/northbridge/intel/haswell/native_raminit/Makefile.mk
+++ b/src/northbridge/intel/haswell/native_raminit/Makefile.mk
@@ -13,6 +13,7 @@ romstage-y += raminit_main.c
romstage-y += raminit_native.c
romstage-y += ranges.c
romstage-y += reut.c
+romstage-y += save_restore.c
romstage-y += setup_wdb.c
romstage-y += spd_bitmunching.c
romstage-y += testing_io.c
diff --git a/src/northbridge/intel/haswell/native_raminit/activate_mc.c b/src/northbridge/intel/haswell/native_raminit/activate_mc.c
index 78a7ad27ef..0b3eb917da 100644
--- a/src/northbridge/intel/haswell/native_raminit/activate_mc.c
+++ b/src/northbridge/intel/haswell/native_raminit/activate_mc.c
@@ -333,6 +333,23 @@ enum raminit_status activate_mc(struct sysinfo *ctrl)
return RAMINIT_STATUS_SUCCESS;
}
+enum raminit_status normal_state(struct sysinfo *ctrl)
+{
+ /* Enable periodic COMP */
+ mchbar_write32(M_COMP, (union pcu_comp_reg) {
+ .comp_interval = COMP_INT,
+ }.raw);
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!does_ch_exist(ctrl, channel))
+ continue;
+
+ /* Set MC to normal mode and clean the ODT and CKE */
+ mchbar_write32(REUT_ch_SEQ_CFG(channel), REUT_MODE_NOP << 12);
+ }
+ power_down_config(ctrl);
+ return RAMINIT_STATUS_SUCCESS;
+}
+
static void mc_lockdown(void)
{
/* Lock memory controller registers */
diff --git a/src/northbridge/intel/haswell/native_raminit/ddr3.c b/src/northbridge/intel/haswell/native_raminit/ddr3.c
index 6ddb11488b..9b6368edb1 100644
--- a/src/northbridge/intel/haswell/native_raminit/ddr3.c
+++ b/src/northbridge/intel/haswell/native_raminit/ddr3.c
@@ -2,6 +2,7 @@
#include <assert.h>
#include <console/console.h>
+#include <delay.h>
#include <northbridge/intel/haswell/haswell.h>
#include <types.h>
@@ -215,3 +216,43 @@ enum raminit_status ddr3_jedec_init(struct sysinfo *ctrl)
ddr3_program_mr0(ctrl, 1);
return reut_issue_zq(ctrl, ctrl->chanmap, ZQ_INIT);
}
+
+enum raminit_status exit_selfrefresh(struct sysinfo *ctrl)
+{
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!does_ch_exist(ctrl, channel))
+ continue;
+
+ /* Fields in ctrl aren't populated on a warm boot */
+ union ddr_data_control_0_reg data_control_0 = {
+ .raw = mchbar_read32(DQ_CONTROL_0(channel, 0)),
+ };
+ data_control_0.read_rf_rd = 1;
+ for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) {
+ if (!rank_in_ch(ctrl, rank, channel))
+ continue;
+
+ data_control_0.read_rf_rank = rank;
+ mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), data_control_0.raw);
+ }
+ }
+
+ /* Time needed to stabilize the DCLK (~6 us) */
+ udelay(6);
+
+ /* Pull the DIMMs out of self refresh by asserting CKE high */
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ const union reut_misc_cke_ctrl_reg reut_misc_cke_ctrl = {
+ .cke_on = ctrl->rankmap[channel],
+ };
+ mchbar_write32(REUT_ch_MISC_CKE_CTRL(channel), reut_misc_cke_ctrl.raw);
+ }
+ mchbar_write32(REUT_MISC_ODT_CTRL, 0);
+
+ const enum raminit_status status = reut_issue_zq(ctrl, ctrl->chanmap, ZQ_LONG);
+ if (status) {
+ /* ZQCL errors don't seem to be a fatal problem here */
+ printk(BIOS_ERR, "ZQ Long failed during S3 resume or warm reset flow\n");
+ }
+ return RAMINIT_STATUS_SUCCESS;
+}
diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_main.c b/src/northbridge/intel/haswell/native_raminit/raminit_main.c
index 2073b54782..b6f3144529 100644
--- a/src/northbridge/intel/haswell/native_raminit/raminit_main.c
+++ b/src/northbridge/intel/haswell/native_raminit/raminit_main.c
@@ -65,6 +65,22 @@ static const struct task_entry cold_boot[] = {
{ train_read_mpr, true, "RDMPRT", },
{ train_jedec_write_leveling, true, "JWRL", },
{ activate_mc, true, "ACTIVATE", },
+ { save_training_values, true, "SAVE_TRAIN", },
+ { save_non_training, true, "SAVE_NONT", },
+ { raminit_done, true, "RAMINITEND", },
+};
+
+static const struct task_entry fast_boot[] = {
+ { collect_spd_info, true, "PROCSPD", },
+ { restore_non_training, true, "RST_NONT", },
+ { initialise_mpll, true, "INITMPLL", },
+ { configure_mc, true, "CONFMC", },
+ { configure_memory_map, true, "MEMMAP", },
+ { do_jedec_init, true, "JEDECINIT", },
+ { pre_training, true, "PRETRAIN", },
+ { restore_training_values, true, "RST_TRAIN", },
+ { exit_selfrefresh, true, "EXIT_SR", },
+ { normal_state, true, "NORMALMODE", },
{ raminit_done, true, "RAMINITEND", },
};
@@ -103,11 +119,11 @@ static void initialize_ctrl(struct sysinfo *ctrl)
ctrl->bootmode = bootmode;
}
-static enum raminit_status try_raminit(struct sysinfo *ctrl)
+static enum raminit_status try_raminit(
+ struct sysinfo *ctrl,
+ const struct task_entry *const schedule,
+ const size_t length)
{
- const struct task_entry *const schedule = cold_boot;
- const size_t length = ARRAY_SIZE(cold_boot);
-
enum raminit_status status = RAMINIT_STATUS_UNSPECIFIED_ERROR;
for (size_t i = 0; i < length; i++) {
@@ -141,8 +157,16 @@ void raminit_main(const enum raminit_boot_mode bootmode)
mighty_ctrl.bootmode = bootmode;
initialize_ctrl(&mighty_ctrl);
+ enum raminit_status status = RAMINIT_STATUS_UNSPECIFIED_ERROR;
+
+ if (bootmode != BOOTMODE_COLD) {
+ status = try_raminit(&mighty_ctrl, fast_boot, ARRAY_SIZE(fast_boot));
+ if (status == RAMINIT_STATUS_SUCCESS)
+ return;
+ }
+
/** TODO: Try more than once **/
- enum raminit_status status = try_raminit(&mighty_ctrl);
+ status = try_raminit(&mighty_ctrl, cold_boot, ARRAY_SIZE(cold_boot));
if (status != RAMINIT_STATUS_SUCCESS)
die("Memory initialization was met with utmost failure and misery\n");
diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.c b/src/northbridge/intel/haswell/native_raminit/raminit_native.c
index 5f7ceec222..3ad8ce29e7 100644
--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.c
+++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.c
@@ -54,23 +54,17 @@ static bool early_init_native(enum raminit_boot_mode bootmode)
return cpu_replaced;
}
-#define MRC_CACHE_VERSION 1
-
-struct mrc_data {
- const void *buffer;
- size_t buffer_len;
-};
-
-static void save_mrc_data(struct mrc_data *md)
+static void save_mrc_data(void)
{
- mrc_cache_stash_data(MRC_TRAINING_DATA, MRC_CACHE_VERSION, md->buffer, md->buffer_len);
+ mrc_cache_stash_data(MRC_TRAINING_DATA, reg_frame_rev(),
+ reg_frame_ptr(), reg_frame_size());
}
static struct mrc_data prepare_mrc_cache(void)
{
struct mrc_data md = {0};
md.buffer = mrc_cache_current_mmap_leak(MRC_TRAINING_DATA,
- MRC_CACHE_VERSION,
+ reg_frame_rev(),
&md.buffer_len);
return md;
}
@@ -94,14 +88,15 @@ static void raminit_reset(void)
}
static enum raminit_boot_mode do_actual_raminit(
- struct mrc_data *md,
const bool s3resume,
const bool cpu_replaced,
const enum raminit_boot_mode orig_bootmode)
{
+ struct mrc_data md = prepare_mrc_cache();
+
enum raminit_boot_mode bootmode = orig_bootmode;
- bool save_data_valid = md->buffer && md->buffer_len == USHRT_MAX; /** TODO: sizeof() **/
+ bool save_data_valid = md.buffer && md.buffer_len == reg_frame_size();
if (s3resume) {
if (bootmode == BOOTMODE_COLD) {
@@ -154,7 +149,7 @@ static enum raminit_boot_mode do_actual_raminit(
assert(save_data_valid != (bootmode == BOOTMODE_COLD));
if (save_data_valid) {
printk(BIOS_INFO, "Using cached memory parameters\n");
- die("RAMINIT: Fast boot is not yet implemented\n");
+ memcpy(reg_frame_ptr(), md.buffer, reg_frame_size());
}
printk(RAM_DEBUG, "Initial bootmode: %s\n", bm_names[orig_bootmode]);
printk(RAM_DEBUG, "Current bootmode: %s\n", bm_names[bootmode]);
@@ -181,10 +176,8 @@ void perform_raminit(const int s3resume)
wait_txt_clear();
wrmsr(0x2e6, (msr_t) {.lo = 0, .hi = 0});
- struct mrc_data md = prepare_mrc_cache();
-
const enum raminit_boot_mode bootmode =
- do_actual_raminit(&md, s3resume, cpu_replaced, orig_bootmode);
+ do_actual_raminit(s3resume, cpu_replaced, orig_bootmode);
/** TODO: report_memory_config **/
@@ -212,9 +205,8 @@ void perform_raminit(const int s3resume)
}
/* Save training data on non-S3 resumes */
- /** TODO: Enable this once training data is populated **/
- if (0 && !s3resume)
- save_mrc_data(&md);
+ if (!s3resume)
+ save_mrc_data();
/** TODO: setup_sdram_meminfo **/
}
diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.h b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
index a0a913f926..2ac16eaad3 100644
--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.h
+++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
@@ -170,6 +170,8 @@ enum regfile_mode {
REG_FILE_USE_CURRENT, /* Used when changing parameters after the test */
};
+struct register_save_frame;
+
struct wdb_pat {
uint32_t start_ptr; /* Starting pointer in WDB */
uint32_t stop_ptr; /* Stopping pointer in WDB */
@@ -220,6 +222,7 @@ enum raminit_status {
RAMINIT_STATUS_RCVEN_FAILURE,
RAMINIT_STATUS_RMPR_FAILURE,
RAMINIT_STATUS_JWRL_FAILURE,
+ RAMINIT_STATUS_INVALID_CACHE,
RAMINIT_STATUS_UNSPECIFIED_ERROR, /** TODO: Deprecated in favor of specific values **/
};
@@ -229,6 +232,11 @@ enum generic_stepping {
STEPPING_C0 = 3,
};
+struct mrc_data {
+ const void *buffer;
+ size_t buffer_len;
+};
+
struct raminit_dimm_info {
spd_ddr3_raw_data raw_spd;
struct dimm_attr_ddr3_st data;
@@ -448,12 +456,22 @@ enum raminit_status do_jedec_init(struct sysinfo *ctrl);
enum raminit_status train_receive_enable(struct sysinfo *ctrl);
enum raminit_status train_read_mpr(struct sysinfo *ctrl);
enum raminit_status train_jedec_write_leveling(struct sysinfo *ctrl);
+enum raminit_status save_training_values(struct sysinfo *ctrl);
+enum raminit_status restore_training_values(struct sysinfo *ctrl);
+enum raminit_status save_non_training(struct sysinfo *ctrl);
+enum raminit_status restore_non_training(struct sysinfo *ctrl);
+enum raminit_status exit_selfrefresh(struct sysinfo *ctrl);
+enum raminit_status normal_state(struct sysinfo *ctrl);
enum raminit_status activate_mc(struct sysinfo *ctrl);
enum raminit_status raminit_done(struct sysinfo *ctrl);
void configure_timings(struct sysinfo *ctrl);
void configure_refresh(struct sysinfo *ctrl);
+struct register_save_frame *reg_frame_ptr(void);
+size_t reg_frame_size(void);
+uint32_t reg_frame_rev(void);
+
uint32_t get_tCKE(uint32_t mem_clock_mhz, bool lpddr);
uint32_t get_tXPDLL(uint32_t mem_clock_mhz);
uint32_t get_tAONPD(uint32_t mem_clock_mhz);
diff --git a/src/northbridge/intel/haswell/native_raminit/save_restore.c b/src/northbridge/intel/haswell/native_raminit/save_restore.c
new file mode 100644
index 0000000000..f1f50e3ff8
--- /dev/null
+++ b/src/northbridge/intel/haswell/native_raminit/save_restore.c
@@ -0,0 +1,387 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include <assert.h>
+#include <console/console.h>
+#include <northbridge/intel/haswell/haswell.h>
+#include <types.h>
+
+#include "raminit_native.h"
+
+uint32_t reg_frame_rev(void)
+{
+ /*
+ * Equivalent to MRC_CACHE_REVISION, but hidden via abstraction.
+ * The structures that get saved to flash are contained within
+ * this translation unit, so changes outside this file shouldn't
+ * require invalidating the cache.
+ */
+ return 1;
+}
+
+struct register_save {
+ uint16_t lower;
+ uint16_t upper;
+};
+
+/** TODO: Haswell DDRIO aliases writes: 0x80 .. 0xff => 0x00 .. 0x7f **/
+static const struct register_save ddrio_per_byte_list[] = {
+ {0x0000, 0x003c}, /* 16 registers */
+// {0x0048, 0x0084}, /* 16 registers */ /** TODO: BDW support **/
+ {0x0048, 0x004c}, /* 2 registers */
+ {0x005c, 0x0078}, /* 8 registers */
+};
+#define DDRIO_PER_BYTE_REGISTER_COUNT (16 + 2 + 8)
+
+static const struct register_save ddrio_per_ch_list[] = {
+ /* CKE */
+ {0x1204, 0x1208}, /* 2 registers */
+ {0x1214, 0x121c}, /* 3 registers */
+ /* CMD North */
+ {0x1404, 0x140c}, /* 3 registers */
+ /* CLK */
+ {0x1808, 0x1810}, /* 3 registers */
+ /* CMD South */
+ {0x1a04, 0x1a0c}, /* 3 registers */
+ /* CTL */
+ {0x1c14, 0x1c1c}, /* 3 registers */
+};
+#define DDRIO_PER_CH_REGISTER_COUNT (2 + 3 * 5)
+
+static const struct register_save ddrio_common_list[] = {
+ {0x2000, 0x2008}, /* 3 registers */
+ {0x3a14, 0x3a1c}, /* 3 registers */
+ {0x3a24, 0x3a24}, /* 1 registers */
+};
+
+#define DDRIO_COMMON_REGISTER_COUNT (3 + 3 + 1)
+
+static const struct register_save mcmain_per_ch_list[] = {
+ {0x4000, 0x4014}, /* 6 registers */
+ {0x4024, 0x4028}, /* 2 registers */
+ {0x40d0, 0x40d0}, /* 1 registers */
+ {0x4220, 0x4224}, /* 2 registers */
+ {0x4294, 0x4294}, /* 1 registers */
+ {0x429c, 0x42a0}, /* 2 registers */
+ {0x42ec, 0x42fc}, /* 5 registers */
+ {0x4328, 0x4328}, /* 1 registers */
+ {0x438c, 0x4390}, /* 2 registers */
+};
+#define MCMAIN_PER_CH_REGISTER_COUNT (6 + 2 + 1 + 2 + 1 + 2 + 5 + 1 + 2)
+
+static const struct register_save misc_common_list[] = {
+ {0x5884, 0x5888}, /* 2 registers */
+ {0x5890, 0x589c}, /* 4 registers */
+ {0x58a4, 0x58a4}, /* 1 registers */
+ {0x58d0, 0x58e4}, /* 6 registers */
+ {0x5880, 0x5880}, /* 1 registers */
+ {0x5000, 0x50dc}, /* 56 registers */
+ {0x59b8, 0x59b8} /* 1 registers */
+};
+#define MISC_COMMON_REGISTER_COUNT (2 + 4 + 1 + 6 + 1 + 56 + 1)
+
+struct save_params {
+ bool is_initialised;
+
+ /* Memory base frequency, either 100 or 133 MHz */
+ uint8_t base_freq;
+
+ /* Multiplier */
+ uint32_t multiplier;
+
+ /* Memory clock in MHz */
+ uint32_t mem_clock_mhz;
+
+ /* Memory clock in femtoseconds */
+ uint32_t mem_clock_fs;
+
+ /* Quadrature clock in picoseconds */
+ uint16_t qclkps;
+
+ /* Bitfield of supported CAS latencies */
+ uint16_t cas_supported;
+
+ /* CPUID value */
+ uint32_t cpu;
+
+ /* Cached CPU stepping value */
+ uint8_t stepping;
+
+ uint16_t vdd_mv;
+
+ union dimm_flags_ddr3_st flags;
+
+ /* Except for tCK, everything is stored in DCLKs */
+ uint32_t tCK;
+ uint32_t tAA;
+ uint32_t tWR;
+ uint32_t tRCD;
+ uint32_t tRRD;
+ uint32_t tRP;
+ uint32_t tRAS;
+ uint32_t tRC;
+ uint32_t tRFC;
+ uint32_t tWTR;
+ uint32_t tRTP;
+ uint32_t tFAW;
+ uint32_t tCWL;
+ uint32_t tCMD;
+
+ uint32_t tREFI;
+ uint32_t tXP;
+
+ uint8_t lpddr_cke_rank_map[NUM_CHANNELS];
+
+ struct raminit_dimm_info dimms[NUM_CHANNELS][NUM_SLOTS];
+
+ uint8_t chanmap;
+
+ uint32_t channel_size_mb[NUM_CHANNELS];
+
+ /* DIMMs per channel */
+ uint8_t dpc[NUM_CHANNELS];
+
+ uint8_t rankmap[NUM_CHANNELS];
+
+ /* Whether a rank is mirrored or not (only rank 1 of each DIMM can be) */
+ uint8_t rank_mirrored[NUM_CHANNELS];
+
+ /*
+ * FIXME: LPDDR support is incomplete. The largest chunks are missing,
+ * but some LPDDR-specific variations in algorithms have been handled.
+ * LPDDR-specific functions have stubs which will halt upon execution.
+ */
+ bool lpddr;
+
+ uint8_t lanes;
+
+ /* FIXME: ECC support missing */
+ bool is_ecc;
+};
+
+struct register_save_frame {
+ uint32_t ddrio_per_byte[NUM_CHANNELS][NUM_LANES][DDRIO_PER_BYTE_REGISTER_COUNT];
+ uint32_t ddrio_per_ch[NUM_CHANNELS][DDRIO_PER_CH_REGISTER_COUNT];
+ uint32_t ddrio_common[DDRIO_COMMON_REGISTER_COUNT];
+ uint32_t mcmain_per_ch[NUM_CHANNELS][MCMAIN_PER_CH_REGISTER_COUNT];
+ uint32_t misc_common[MISC_COMMON_REGISTER_COUNT];
+ struct save_params params;
+};
+
+struct register_save_frame *reg_frame_ptr(void)
+{
+ /* The chonky register save frame struct, used for fast boot and S3 resume */
+ static struct register_save_frame register_frame = { 0 };
+ return &register_frame;
+}
+
+size_t reg_frame_size(void)
+{
+ return sizeof(struct register_save_frame);
+}
+
+typedef void (*reg_func_t)(const uint16_t offset, uint32_t *const value);
+
+static void save_value(const uint16_t offset, uint32_t *const value)
+{
+ *value = mchbar_read32(offset);
+}
+
+static void restore_value(const uint16_t offset, uint32_t *const value)
+{
+ mchbar_write32(offset, *value);
+}
+
+static void save_restore(
+ uint32_t *reg_frame,
+ const uint16_t g_offset,
+ const struct register_save *reg_save_list,
+ const size_t reg_save_length,
+ reg_func_t handle_reg)
+{
+ for (size_t i = 0; i < reg_save_length; i++) {
+ const struct register_save *entry = &reg_save_list[i];
+ for (uint16_t offset = entry->lower; offset <= entry->upper; offset += 4) {
+ handle_reg(offset + g_offset, reg_frame++);
+ }
+ }
+}
+
+static void save_restore_all(struct register_save_frame *reg_frame, reg_func_t handle_reg)
+{
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ for (uint8_t byte = 0; byte < NUM_LANES; byte++) {
+ const uint16_t g_offset = _DDRIO_C_R_B(0, channel, 0, byte);
+ save_restore(
+ reg_frame->ddrio_per_byte[channel][byte],
+ g_offset,
+ ddrio_per_byte_list,
+ ARRAY_SIZE(ddrio_per_byte_list),
+ handle_reg);
+ }
+ }
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ const uint16_t g_offset = _DDRIO_C_R_B(0, channel, 0, 0);
+ save_restore(
+ reg_frame->ddrio_per_ch[channel],
+ g_offset,
+ ddrio_per_ch_list,
+ ARRAY_SIZE(ddrio_per_ch_list),
+ handle_reg);
+ }
+ save_restore(
+ reg_frame->ddrio_common,
+ 0,
+ ddrio_common_list,
+ ARRAY_SIZE(ddrio_common_list),
+ handle_reg);
+
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ const uint16_t g_offset = _MCMAIN_C(0, channel);
+ save_restore(
+ reg_frame->mcmain_per_ch[channel],
+ g_offset,
+ mcmain_per_ch_list,
+ ARRAY_SIZE(mcmain_per_ch_list),
+ handle_reg);
+ }
+ save_restore(
+ reg_frame->misc_common,
+ 0,
+ misc_common_list,
+ ARRAY_SIZE(misc_common_list),
+ handle_reg);
+}
+
+enum raminit_status save_training_values(struct sysinfo *ctrl)
+{
+ save_restore_all(reg_frame_ptr(), save_value);
+ return RAMINIT_STATUS_SUCCESS;
+}
+
+enum raminit_status restore_training_values(struct sysinfo *ctrl)
+{
+ save_restore_all(reg_frame_ptr(), restore_value);
+ return RAMINIT_STATUS_SUCCESS;
+}
+
+enum raminit_status save_non_training(struct sysinfo *ctrl)
+{
+ struct register_save_frame *reg_frame = reg_frame_ptr();
+ struct save_params *params = &reg_frame->params;
+
+ params->is_initialised = true;
+
+ params->base_freq = ctrl->base_freq;
+ params->multiplier = ctrl->multiplier;
+ params->mem_clock_mhz = ctrl->mem_clock_mhz;
+ params->mem_clock_fs = ctrl->mem_clock_fs;
+ params->qclkps = ctrl->qclkps;
+ params->cas_supported = ctrl->cas_supported;
+ params->cpu = ctrl->cpu;
+ params->stepping = ctrl->stepping;
+ params->vdd_mv = ctrl->vdd_mv;
+ params->flags = ctrl->flags;
+
+ params->tCK = ctrl->tCK;
+ params->tAA = ctrl->tAA;
+ params->tWR = ctrl->tWR;
+ params->tRCD = ctrl->tRCD;
+ params->tRRD = ctrl->tRRD;
+ params->tRP = ctrl->tRP;
+ params->tRAS = ctrl->tRAS;
+ params->tRC = ctrl->tRC;
+ params->tRFC = ctrl->tRFC;
+ params->tWTR = ctrl->tWTR;
+ params->tRTP = ctrl->tRTP;
+ params->tFAW = ctrl->tFAW;
+ params->tCWL = ctrl->tCWL;
+ params->tCMD = ctrl->tCMD;
+ params->tREFI = ctrl->tREFI;
+ params->tXP = ctrl->tXP;
+
+ params->chanmap = ctrl->chanmap;
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ params->lpddr_cke_rank_map[channel] = ctrl->lpddr_cke_rank_map[channel];
+ for (uint8_t slot = 0; slot < NUM_SLOTS; slot++)
+ params->dimms[channel][slot] = ctrl->dimms[channel][slot];
+ }
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ params->dpc[channel] = ctrl->dpc[channel];
+ params->rankmap[channel] = ctrl->rankmap[channel];
+ params->rank_mirrored[channel] = ctrl->rank_mirrored[channel];
+ params->channel_size_mb[channel] = ctrl->channel_size_mb[channel];
+ }
+ params->lpddr = ctrl->lpddr;
+ params->lanes = ctrl->lanes;
+ params->is_ecc = ctrl->is_ecc;
+ return RAMINIT_STATUS_SUCCESS;
+}
+
+#define RAMINIT_COMPARE(_s1, _s2) \
+ ((sizeof(_s1) == sizeof(_s2)) && !memcmp(_s1, _s2, sizeof(_s1)))
+
+enum raminit_status restore_non_training(struct sysinfo *ctrl)
+{
+ struct register_save_frame *reg_frame = reg_frame_ptr();
+ struct save_params *params = &reg_frame->params;
+
+ if (!params->is_initialised) {
+ printk(BIOS_WARNING, "Cannot fast boot: saved data is invalid\n");
+ return RAMINIT_STATUS_INVALID_CACHE;
+ }
+
+ if (!RAMINIT_COMPARE(ctrl->dimms, params->dimms)) {
+ printk(BIOS_WARNING, "Cannot fast boot: DIMMs have changed\n");
+ return RAMINIT_STATUS_INVALID_CACHE;
+ }
+
+ if (ctrl->cpu != params->cpu) {
+ printk(BIOS_WARNING, "Cannot fast boot: CPU has changed\n");
+ return RAMINIT_STATUS_INVALID_CACHE;
+ }
+
+ ctrl->base_freq = params->base_freq;
+ ctrl->multiplier = params->multiplier;
+ ctrl->mem_clock_mhz = params->mem_clock_mhz;
+ ctrl->mem_clock_fs = params->mem_clock_fs;
+ ctrl->qclkps = params->qclkps;
+ ctrl->cas_supported = params->cas_supported;
+ ctrl->cpu = params->cpu;
+ ctrl->stepping = params->stepping;
+ ctrl->vdd_mv = params->vdd_mv;
+ ctrl->flags = params->flags;
+
+ ctrl->tCK = params->tCK;
+ ctrl->tAA = params->tAA;
+ ctrl->tWR = params->tWR;
+ ctrl->tRCD = params->tRCD;
+ ctrl->tRRD = params->tRRD;
+ ctrl->tRP = params->tRP;
+ ctrl->tRAS = params->tRAS;
+ ctrl->tRC = params->tRC;
+ ctrl->tRFC = params->tRFC;
+ ctrl->tWTR = params->tWTR;
+ ctrl->tRTP = params->tRTP;
+ ctrl->tFAW = params->tFAW;
+ ctrl->tCWL = params->tCWL;
+ ctrl->tCMD = params->tCMD;
+ ctrl->tREFI = params->tREFI;
+ ctrl->tXP = params->tXP;
+
+ ctrl->chanmap = params->chanmap;
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ ctrl->lpddr_cke_rank_map[channel] = params->lpddr_cke_rank_map[channel];
+ for (uint8_t slot = 0; slot < NUM_SLOTS; slot++)
+ ctrl->dimms[channel][slot] = params->dimms[channel][slot];
+ }
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ ctrl->dpc[channel] = params->dpc[channel];
+ ctrl->rankmap[channel] = params->rankmap[channel];
+ ctrl->rank_mirrored[channel] = params->rank_mirrored[channel];
+ ctrl->channel_size_mb[channel] = params->channel_size_mb[channel];
+ }
+ ctrl->lpddr = params->lpddr;
+ ctrl->lanes = params->lanes;
+ ctrl->is_ecc = params->is_ecc;
+ return RAMINIT_STATUS_SUCCESS;
+}
--
2.39.5

View File

@ -0,0 +1,472 @@
From 5d291de6011a56bfd767c4bcdfdc3aa6ee87a2dd Mon Sep 17 00:00:00 2001
From: Angel Pons <th3fanbus@gmail.com>
Date: Wed, 17 Apr 2024 13:20:32 +0200
Subject: [PATCH 24/24] haswell NRI: Do sense amplifier offset training
Quoting Wikipedia:
A sense amplifier is a circuit that is used to amplify and detect
small signals in electronic systems. It is commonly used in memory
circuits, such as dynamic random access memory (DRAM), to read and
amplify the weak signals stored in memory cells.
In this case, we're calibrating the sense amplifiers in the memory
controller. This training procedure uses a magic "sense amp offset
cancel" mode of the DDRIO to observe the sampled logic levels, and
sweeps Vref to find the low-high transition for each bit lane. The
procedure consists of two stages: the first stage centers per-byte
Vref (to ensure per-bit Vref offsets are as small as possible) and
the second stage centers per-bit Vref.
Because this procedure uses the "sense amp offset cancel" mode, it
does not rely on DRAM being trained. It is assumed that the memory
controller simply makes sense amp output levels observable via the
`DDR_DATA_TRAIN_FEEDBACK` register and that the memory bus is idle
during this training step (so the lane voltage is Vdd / 2).
Note: This procedure will need to be adapted for Broadwell because
it has per-rank per-bit RxVref registers, whereas Haswell only has
a single per-bit RxVref register for all ranks.
Change-Id: Ia07db68763f90e9701c8a376e01279ada8dbbe07
Signed-off-by: Angel Pons <th3fanbus@gmail.com>
Reviewed-on: https://review.coreboot.org/c/coreboot/+/81948
Reviewed-by: Maximilian Brune <maximilian.brune@9elements.com>
Tested-by: build bot (Jenkins) <no-reply@coreboot.org>
---
.../intel/haswell/native_raminit/Makefile.mk | 1 +
.../haswell/native_raminit/raminit_main.c | 1 +
.../haswell/native_raminit/raminit_native.h | 12 +
.../native_raminit/train_sense_amp_offset.c | 334 ++++++++++++++++++
.../intel/haswell/registers/mchbar.h | 2 +
5 files changed, 350 insertions(+)
create mode 100644 src/northbridge/intel/haswell/native_raminit/train_sense_amp_offset.c
diff --git a/src/northbridge/intel/haswell/native_raminit/Makefile.mk b/src/northbridge/intel/haswell/native_raminit/Makefile.mk
index 8fdd17c542..4bd668a2d6 100644
--- a/src/northbridge/intel/haswell/native_raminit/Makefile.mk
+++ b/src/northbridge/intel/haswell/native_raminit/Makefile.mk
@@ -21,3 +21,4 @@ romstage-y += timings_refresh.c
romstage-y += train_jedec_write_leveling.c
romstage-y += train_read_mpr.c
romstage-y += train_receive_enable.c
+romstage-y += train_sense_amp_offset.c
diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_main.c b/src/northbridge/intel/haswell/native_raminit/raminit_main.c
index b6f3144529..21c953b332 100644
--- a/src/northbridge/intel/haswell/native_raminit/raminit_main.c
+++ b/src/northbridge/intel/haswell/native_raminit/raminit_main.c
@@ -61,6 +61,7 @@ static const struct task_entry cold_boot[] = {
{ configure_memory_map, true, "MEMMAP", },
{ do_jedec_init, true, "JEDECINIT", },
{ pre_training, true, "PRETRAIN", },
+ { train_sense_amp_offset, true, "SOT", },
{ train_receive_enable, true, "RCVET", },
{ train_read_mpr, true, "RDMPRT", },
{ train_jedec_write_leveling, true, "JWRL", },
diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.h b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
index 2ac16eaad3..07eea98831 100644
--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.h
+++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
@@ -23,6 +23,8 @@
#define NUM_LANES 9
#define NUM_LANES_NO_ECC 8
+#define NUM_BITS 8
+
#define COMP_INT 10
/* Always use 12 legs for emphasis (not trained) */
@@ -219,6 +221,7 @@ enum raminit_status {
RAMINIT_STATUS_MPLL_INIT_FAILURE,
RAMINIT_STATUS_POLL_TIMEOUT,
RAMINIT_STATUS_REUT_ERROR,
+ RAMINIT_STATUS_SAMP_OFFSET_FAILURE,
RAMINIT_STATUS_RCVEN_FAILURE,
RAMINIT_STATUS_RMPR_FAILURE,
RAMINIT_STATUS_JWRL_FAILURE,
@@ -244,6 +247,12 @@ struct raminit_dimm_info {
bool valid;
};
+struct vref_margin {
+ uint8_t low;
+ uint8_t center;
+ uint8_t high;
+};
+
struct sysinfo {
enum raminit_boot_mode bootmode;
enum generic_stepping stepping;
@@ -331,6 +340,8 @@ struct sysinfo {
uint8_t rxdqsn[NUM_CHANNELS][NUM_SLOTRANKS][NUM_LANES];
int8_t rxvref[NUM_CHANNELS][NUM_SLOTRANKS][NUM_LANES];
+ struct vref_margin rxdqvrefpb[NUM_CHANNELS][NUM_SLOTRANKS][NUM_LANES][NUM_BITS];
+
uint8_t clk_pi_code[NUM_CHANNELS][NUM_SLOTRANKS];
uint8_t ctl_pi_code[NUM_CHANNELS][NUM_SLOTRANKS];
uint8_t cke_pi_code[NUM_CHANNELS][NUM_SLOTRANKS];
@@ -453,6 +464,7 @@ enum raminit_status convert_timings(struct sysinfo *ctrl);
enum raminit_status configure_mc(struct sysinfo *ctrl);
enum raminit_status configure_memory_map(struct sysinfo *ctrl);
enum raminit_status do_jedec_init(struct sysinfo *ctrl);
+enum raminit_status train_sense_amp_offset(struct sysinfo *ctrl);
enum raminit_status train_receive_enable(struct sysinfo *ctrl);
enum raminit_status train_read_mpr(struct sysinfo *ctrl);
enum raminit_status train_jedec_write_leveling(struct sysinfo *ctrl);
diff --git a/src/northbridge/intel/haswell/native_raminit/train_sense_amp_offset.c b/src/northbridge/intel/haswell/native_raminit/train_sense_amp_offset.c
new file mode 100644
index 0000000000..6989dd6318
--- /dev/null
+++ b/src/northbridge/intel/haswell/native_raminit/train_sense_amp_offset.c
@@ -0,0 +1,334 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include <assert.h>
+#include <commonlib/bsd/clamp.h>
+#include <console/console.h>
+#include <delay.h>
+#include <lib.h>
+#include <types.h>
+
+#include "raminit_native.h"
+
+#define VREF_OFFSET_PLOT RAM_DEBUG
+#define SAMP_OFFSET_PLOT RAM_DEBUG
+
+struct vref_train_data {
+ int8_t best_sum;
+ int8_t best_vref;
+ int8_t sum_bits;
+ uint8_t high_mask;
+ uint8_t low_mask;
+};
+
+static void propagate_delay_values(struct sysinfo *ctrl, uint8_t channel)
+{
+ /* Propagate delay values (without a read command) */
+ union ddr_data_control_0_reg data_control_0 = {
+ .raw = ctrl->dq_control_0[channel],
+ };
+ data_control_0.read_rf_rd = 1;
+ data_control_0.read_rf_wr = 0;
+ data_control_0.read_rf_rank = 0;
+ data_control_0.force_odt_on = 1;
+ data_control_0.samp_train_mode = 1;
+ mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), data_control_0.raw);
+ udelay(1);
+ data_control_0.samp_train_mode = 0;
+ mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), data_control_0.raw);
+}
+
+static enum raminit_status train_vref_offset(struct sysinfo *ctrl)
+{
+ const int8_t vref_start = -15;
+ const int8_t vref_stop = 15;
+ const struct vref_train_data initial_vref_values = {
+ .best_sum = -NUM_LANES,
+ .best_vref = 0,
+ .high_mask = 0,
+ .low_mask = 0xff,
+ };
+ struct vref_train_data vref_data[NUM_CHANNELS][NUM_LANES];
+
+ printk(VREF_OFFSET_PLOT, "Plot of sum_bits across Vref settings\nChannel");
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!does_ch_exist(ctrl, channel))
+ continue;
+
+ printk(VREF_OFFSET_PLOT, "\t%u\t\t", channel);
+ }
+
+ printk(VREF_OFFSET_PLOT, "\nByte");
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!does_ch_exist(ctrl, channel))
+ continue;
+
+ printk(VREF_OFFSET_PLOT, "\t");
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
+ printk(VREF_OFFSET_PLOT, "%u ", byte);
+ vref_data[channel][byte] = initial_vref_values;
+ union ddr_data_control_2_reg data_control_2 = {
+ .raw = ctrl->dq_control_2[channel][byte],
+ };
+ data_control_2.force_bias_on = 1;
+ data_control_2.force_rx_on = 1;
+ mchbar_write32(DQ_CONTROL_2(channel, byte), data_control_2.raw);
+ }
+ }
+
+ /* Sweep through Vref settings and find point SampOffset of +/- 7 passes */
+ printk(VREF_OFFSET_PLOT, "\n1/2 Vref");
+ for (int8_t vref = vref_start; vref <= vref_stop; vref++) {
+ printk(VREF_OFFSET_PLOT, "\n% 3d", vref);
+
+ /*
+ * To perform this test, enable offset cancel mode and enable ODT.
+ * Check results and update variables. Ideal result is all zeroes.
+ * Clear offset cancel mode at end of test to write RX_OFFSET_VDQ.
+ */
+ change_1d_margin_multicast(ctrl, RdV, vref, 0, false, REG_FILE_USE_RANK);
+
+ /* Program settings for Vref and SampOffset = 7 (8 + 7) */
+ mchbar_write32(DDR_DATA_RX_OFFSET_VDQ, 0xffffffff);
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!does_ch_exist(ctrl, channel))
+ continue;
+
+ propagate_delay_values(ctrl, channel);
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
+ const uint8_t feedback = get_data_train_feedback(channel, byte);
+ struct vref_train_data *curr_data = &vref_data[channel][byte];
+ curr_data->low_mask &= feedback;
+ curr_data->sum_bits = -popcnt(feedback);
+ }
+ }
+
+ /* Program settings for Vref and SampOffset = -7 (8 - 7) */
+ mchbar_write32(DDR_DATA_RX_OFFSET_VDQ, 0x11111111);
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!does_ch_exist(ctrl, channel))
+ continue;
+
+ propagate_delay_values(ctrl, channel);
+ printk(VREF_OFFSET_PLOT, "\t");
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
+ const uint8_t feedback = get_data_train_feedback(channel, byte);
+ struct vref_train_data *curr_data = &vref_data[channel][byte];
+ curr_data->high_mask |= feedback;
+ curr_data->sum_bits += popcnt(feedback);
+ printk(VREF_OFFSET_PLOT, "%d ", curr_data->sum_bits);
+ if (curr_data->sum_bits > curr_data->best_sum) {
+ curr_data->best_sum = curr_data->sum_bits;
+ curr_data->best_vref = vref;
+ ctrl->rxvref[channel][0][byte] = vref;
+ } else if (curr_data->sum_bits == curr_data->best_sum) {
+ curr_data->best_vref = vref;
+ }
+ }
+ }
+ }
+ printk(BIOS_DEBUG, "\n\nHi-Lo (XOR):");
+ enum raminit_status status = RAMINIT_STATUS_SUCCESS;
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!does_ch_exist(ctrl, channel))
+ continue;
+
+ printk(BIOS_DEBUG, "\n C%u:", channel);
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
+ struct vref_train_data *const curr_data = &vref_data[channel][byte];
+ const uint8_t bit_xor = curr_data->high_mask ^ curr_data->low_mask;
+ printk(BIOS_DEBUG, "\t0x%02x", bit_xor);
+ if (bit_xor == 0xff)
+ continue;
+
+ /* Report an error if any bit did not change */
+ status = RAMINIT_STATUS_SAMP_OFFSET_FAILURE;
+ }
+ }
+ if (status)
+ printk(BIOS_ERR, "\nUnexpected bit error in Vref offset training\n");
+
+ printk(BIOS_DEBUG, "\n\nRdVref:");
+ change_1d_margin_multicast(ctrl, RdV, 0, 0, false, REG_FILE_USE_RANK);
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!does_ch_exist(ctrl, channel))
+ continue;
+
+ printk(BIOS_DEBUG, "\n C%u:", channel);
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
+ struct vref_train_data *const curr_data = &vref_data[channel][byte];
+ const int8_t vref_width =
+ curr_data->best_vref - ctrl->rxvref[channel][0][byte];
+
+ /*
+ * Step size for Rx Vref in DATA_OFFSET_TRAIN is about 3.9 mV
+ * whereas Rx Vref step size in RX_TRAIN_RANK is about 7.8 mV
+ */
+ int8_t vref = ctrl->rxvref[channel][0][byte] + vref_width / 2;
+ if (vref < 0)
+ vref--;
+ else
+ vref++;
+
+ for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) {
+ if (!rank_in_ch(ctrl, rank, channel))
+ continue;
+
+ ctrl->rxvref[channel][rank][byte] = vref / 2;
+ update_rxt(ctrl, channel, rank, byte, RXT_RESTORE, 0);
+ }
+ printk(BIOS_DEBUG, "\t% 4d", ctrl->rxvref[channel][0][byte]);
+ }
+ }
+ printk(BIOS_DEBUG, "\n\n");
+ return status;
+}
+
+/**
+ * LPDDR has an additional bit for DQS per each byte.
+ *
+ * TODO: The DQS value must be written into Data Control 2.
+ */
+#define NUM_OFFSET_TRAIN_BITS (NUM_BITS + 1)
+
+#define PLOT_CH_SPACE " "
+
+struct samp_train_data {
+ uint8_t first_zero;
+ uint8_t last_one;
+};
+
+static void train_samp_offset(struct sysinfo *ctrl)
+{
+ const uint8_t max_train_bits = ctrl->lpddr ? NUM_OFFSET_TRAIN_BITS : NUM_BITS;
+
+ struct samp_train_data samp_data[NUM_CHANNELS][NUM_LANES][NUM_OFFSET_TRAIN_BITS] = {0};
+
+ printk(BIOS_DEBUG, "Channel ");
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!does_ch_exist(ctrl, channel))
+ continue;
+
+ printk(BIOS_DEBUG, "%u ", channel); /* Same length as PLOT_CH_SPACE */
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++)
+ printk(BIOS_DEBUG, " %s ", ctrl->lpddr ? " " : "");
+ }
+ printk(BIOS_DEBUG, "\nByte ");
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!does_ch_exist(ctrl, channel))
+ continue;
+
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++)
+ printk(BIOS_DEBUG, "%u %s ", byte, ctrl->lpddr ? " " : "");
+
+ printk(BIOS_DEBUG, PLOT_CH_SPACE);
+ }
+ printk(SAMP_OFFSET_PLOT, "\nBits ");
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++)
+ printk(SAMP_OFFSET_PLOT, "01234567%s ", ctrl->lpddr ? "S" : "");
+
+ printk(SAMP_OFFSET_PLOT, PLOT_CH_SPACE);
+ }
+ printk(SAMP_OFFSET_PLOT, "\n SAmp\n");
+ for (uint8_t samp_offset = 1; samp_offset <= 15; samp_offset++) {
+ printk(SAMP_OFFSET_PLOT, "% 5d\t", samp_offset);
+
+ uint32_t rx_offset_vdq = 0;
+ for (uint8_t bit = 0; bit < NUM_BITS; bit++) {
+ rx_offset_vdq += samp_offset << (4 * bit);
+ }
+ mchbar_write32(DDR_DATA_RX_OFFSET_VDQ, rx_offset_vdq);
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!does_ch_exist(ctrl, channel))
+ continue;
+
+ /* Propagate delay values (without a read command) */
+ union ddr_data_control_0_reg data_control_0 = {
+ .raw = ctrl->dq_control_0[channel],
+ };
+ data_control_0.read_rf_rd = 1;
+ data_control_0.read_rf_wr = 0;
+ data_control_0.read_rf_rank = 0;
+ data_control_0.force_odt_on = 1;
+ data_control_0.samp_train_mode = 1;
+ mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), data_control_0.raw);
+ udelay(1);
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
+ const uint32_t feedback =
+ get_data_train_feedback(channel, byte);
+
+ for (uint8_t bit = 0; bit < max_train_bits; bit++) {
+ struct samp_train_data *const curr_data =
+ &samp_data[channel][byte][bit];
+ const bool result = feedback & BIT(bit);
+ if (result) {
+ curr_data->last_one = samp_offset;
+ } else if (curr_data->first_zero == 0) {
+ curr_data->first_zero = samp_offset;
+ }
+ printk(SAMP_OFFSET_PLOT, result ? "." : "#");
+ }
+ printk(SAMP_OFFSET_PLOT, " ");
+ }
+ printk(SAMP_OFFSET_PLOT, PLOT_CH_SPACE);
+ data_control_0.samp_train_mode = 0;
+ mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), data_control_0.raw);
+ }
+ printk(SAMP_OFFSET_PLOT, "\n");
+ }
+ printk(BIOS_DEBUG, "\nBitSAmp ");
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!does_ch_exist(ctrl, channel))
+ continue;
+
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
+ uint32_t rx_offset_vdq = 0;
+ for (uint8_t bit = 0; bit < max_train_bits; bit++) {
+ struct samp_train_data *const curr_data =
+ &samp_data[channel][byte][bit];
+
+ uint8_t vref = curr_data->first_zero + curr_data->last_one;
+ vref = clamp_u8(0, vref / 2, 15);
+ /*
+ * Check for saturation conditions to make sure
+ * we are as close as possible to Vdd/2 (750 mV).
+ */
+ if (curr_data->first_zero == 0)
+ vref = 15;
+ if (curr_data->last_one == 0)
+ vref = 0;
+
+ ctrl->rxdqvrefpb[channel][0][byte][bit].center = vref;
+ rx_offset_vdq += vref & 0xf << (4 * bit);
+ printk(BIOS_DEBUG, "%x", vref);
+ }
+ mchbar_write32(RX_OFFSET_VDQ(channel, byte), rx_offset_vdq);
+ printk(BIOS_DEBUG, " ");
+ download_regfile(ctrl, channel, 1, 0, REG_FILE_USE_RANK, 0, 1, 0);
+ }
+ printk(BIOS_DEBUG, PLOT_CH_SPACE);
+ }
+ printk(BIOS_DEBUG, "\n");
+}
+
+enum raminit_status train_sense_amp_offset(struct sysinfo *ctrl)
+{
+ printk(BIOS_DEBUG, "Stage 1: Vref offset training\n");
+ const enum raminit_status status = train_vref_offset(ctrl);
+
+ printk(BIOS_DEBUG, "Stage 2: Samp offset training\n");
+ train_samp_offset(ctrl);
+
+ /* Clean up after test */
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!does_ch_exist(ctrl, channel))
+ continue;
+
+ mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), ctrl->dq_control_0[channel]);
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++)
+ mchbar_write32(DQ_CONTROL_2(channel, byte),
+ ctrl->dq_control_2[channel][byte]);
+ }
+ io_reset();
+ return status;
+}
diff --git a/src/northbridge/intel/haswell/registers/mchbar.h b/src/northbridge/intel/haswell/registers/mchbar.h
index 49a215aa71..1a168a3fc8 100644
--- a/src/northbridge/intel/haswell/registers/mchbar.h
+++ b/src/northbridge/intel/haswell/registers/mchbar.h
@@ -18,6 +18,8 @@
#define RX_TRAIN_ch_r_b(ch, rank, byte) _DDRIO_C_R_B(0x0000, ch, rank, byte)
#define TX_TRAIN_ch_r_b(ch, rank, byte) _DDRIO_C_R_B(0x0020, ch, rank, byte)
+#define RX_OFFSET_VDQ(ch, byte) _DDRIO_C_R_B(0x004c, ch, 0, byte)
+
#define DDR_DATA_TRAIN_FEEDBACK(ch, byte) _DDRIO_C_R_B(0x0054, ch, 0, byte)
#define DQ_CONTROL_1(ch, byte) _DDRIO_C_R_B(0x0060, ch, 0, byte)
--
2.39.5