LTF cross-correlation window update: 16->32 complex samples

This commit is contained in:
mmehari 2022-01-04 22:16:18 +01:00
parent 36c738fe98
commit d2d5494f57
2 changed files with 241 additions and 117 deletions

View File

@ -4,24 +4,23 @@ module stage_mult
input enable,
input reset,
input signed [15:0] X0,
input signed [15:0] X1,
input signed [15:0] X2,
input signed [15:0] X3,
input signed [15:0] X4,
input signed [15:0] X5,
input signed [15:0] X6,
input signed [15:0] X7,
input signed [31:0] X0,
input signed [31:0] X1,
input signed [31:0] X2,
input signed [31:0] X3,
input signed [31:0] X4,
input signed [31:0] X5,
input signed [31:0] X6,
input signed [31:0] X7,
input signed [15:0] Y0,
input signed [15:0] Y1,
input signed [15:0] Y2,
input signed [15:0] Y3,
input signed [15:0] Y4,
input signed [15:0] Y5,
input signed [15:0] Y6,
input signed [15:0] Y7,
input signed [31:0] Y0,
input signed [31:0] Y1,
input signed [31:0] Y2,
input signed [31:0] Y3,
input signed [31:0] Y4,
input signed [31:0] Y5,
input signed [31:0] Y6,
input signed [31:0] Y7,
input input_strobe,
@ -29,6 +28,40 @@ module stage_mult
output output_strobe
);
wire signed [15:0] X0_q = X0[31:16];
wire signed [15:0] X0_i = X0[15:0];
wire signed [15:0] X1_q = X1[31:16];
wire signed [15:0] X1_i = X1[15:0];
wire signed [15:0] X2_q = X2[31:16];
wire signed [15:0] X2_i = X2[15:0];
wire signed [15:0] X3_q = X3[31:16];
wire signed [15:0] X3_i = X3[15:0];
wire signed [15:0] X4_q = X4[31:16];
wire signed [15:0] X4_i = X4[15:0];
wire signed [15:0] X5_q = X5[31:16];
wire signed [15:0] X5_i = X5[15:0];
wire signed [15:0] X6_q = X6[31:16];
wire signed [15:0] X6_i = X6[15:0];
wire signed [15:0] X7_q = X7[31:16];
wire signed [15:0] X7_i = X7[15:0];
wire signed [15:0] Y0_q = Y0[31:16];
wire signed [15:0] Y0_i = Y0[15:0];
wire signed [15:0] Y1_q = Y1[31:16];
wire signed [15:0] Y1_i = Y1[15:0];
wire signed [15:0] Y2_q = Y2[31:16];
wire signed [15:0] Y2_i = Y2[15:0];
wire signed [15:0] Y3_q = Y3[31:16];
wire signed [15:0] Y3_i = Y3[15:0];
wire signed [15:0] Y4_q = Y4[31:16];
wire signed [15:0] Y4_i = Y4[15:0];
wire signed [15:0] Y5_q = Y5[31:16];
wire signed [15:0] Y5_i = Y5[15:0];
wire signed [15:0] Y6_q = Y6[31:16];
wire signed [15:0] Y6_i = Y6[15:0];
wire signed [15:0] Y7_q = Y7[31:16];
wire signed [15:0] Y7_i = Y7[15:0];
wire signed [31:0] prod_0_i;
wire signed [31:0] prod_0_q;
wire signed [31:0] prod_1_i;
@ -37,52 +70,103 @@ wire signed [31:0] prod_2_i;
wire signed [31:0] prod_2_q;
wire signed [31:0] prod_3_i;
wire signed [31:0] prod_3_q;
wire signed [31:0] prod_4_i;
wire signed [31:0] prod_4_q;
wire signed [31:0] prod_5_i;
wire signed [31:0] prod_5_q;
wire signed [31:0] prod_6_i;
wire signed [31:0] prod_6_q;
wire signed [31:0] prod_7_i;
wire signed [31:0] prod_7_q;
complex_multiplier mult_inst (
.aclk(clock),
.s_axis_a_tvalid(input_strobe),
.s_axis_a_tdata({X1,X0}),
.s_axis_b_tvalid(input_strobe),
.s_axis_b_tdata({Y1,Y0}),
.m_axis_dout_tvalid(),
.m_axis_dout_tdata({prod_0_q,prod_0_i})
complex_multiplier mult_inst1 (
.aclk(clock),
.s_axis_a_tvalid(input_strobe),
.s_axis_a_tdata({X0_i,X0_q}),
.s_axis_b_tvalid(input_strobe),
.s_axis_b_tdata({Y0_i,Y0_q}),
.m_axis_dout_tvalid(),
.m_axis_dout_tdata({prod_0_q,prod_0_i})
);
complex_multiplier mult_inst2 (
.aclk(clock),
.s_axis_a_tvalid(input_strobe),
.s_axis_a_tdata({X3,X2}),
.s_axis_b_tvalid(input_strobe),
.s_axis_b_tdata({Y3,Y2}),
.m_axis_dout_tvalid(),
.m_axis_dout_tdata({prod_1_q,prod_1_i})
.aclk(clock),
.s_axis_a_tvalid(input_strobe),
.s_axis_a_tdata({X1_i,X1_q}),
.s_axis_b_tvalid(input_strobe),
.s_axis_b_tdata({Y1_i,Y1_q}),
.m_axis_dout_tvalid(),
.m_axis_dout_tdata({prod_1_q,prod_1_i})
);
complex_multiplier mult_inst3 (
.aclk(clock),
.s_axis_a_tvalid(input_strobe),
.s_axis_a_tdata({X5,X4}),
.s_axis_b_tvalid(input_strobe),
.s_axis_b_tdata({Y5,Y4}),
.m_axis_dout_tvalid(),
.m_axis_dout_tdata({prod_2_q,prod_2_i})
.aclk(clock),
.s_axis_a_tvalid(input_strobe),
.s_axis_a_tdata({X2_i,X2_q}),
.s_axis_b_tvalid(input_strobe),
.s_axis_b_tdata({Y2_i,Y2_q}),
.m_axis_dout_tvalid(),
.m_axis_dout_tdata({prod_2_q,prod_2_i})
);
complex_multiplier mult_inst4 (
.aclk(clock),
.s_axis_a_tvalid(input_strobe),
.s_axis_a_tdata({X7,X6}),
.s_axis_b_tvalid(input_strobe),
.s_axis_b_tdata({Y7,Y6}),
.m_axis_dout_tvalid(),
.m_axis_dout_tdata({prod_3_q,prod_3_i})
.aclk(clock),
.s_axis_a_tvalid(input_strobe),
.s_axis_a_tdata({X3_i,X3_q}),
.s_axis_b_tvalid(input_strobe),
.s_axis_b_tdata({Y3_i,Y3_q}),
.m_axis_dout_tvalid(),
.m_axis_dout_tdata({prod_3_q,prod_3_i})
);
complex_multiplier mult_inst5 (
.aclk(clock),
.s_axis_a_tvalid(input_strobe),
.s_axis_a_tdata({X4_i,X4_q}),
.s_axis_b_tvalid(input_strobe),
.s_axis_b_tdata({Y4_i,Y4_q}),
.m_axis_dout_tvalid(),
.m_axis_dout_tdata({prod_4_q,prod_4_i})
);
complex_multiplier mult_inst6 (
.aclk(clock),
.s_axis_a_tvalid(input_strobe),
.s_axis_a_tdata({X5_i,X5_q}),
.s_axis_b_tvalid(input_strobe),
.s_axis_b_tdata({Y5_i,Y5_q}),
.m_axis_dout_tvalid(),
.m_axis_dout_tdata({prod_5_q,prod_5_i})
);
complex_multiplier mult_inst7 (
.aclk(clock),
.s_axis_a_tvalid(input_strobe),
.s_axis_a_tdata({X6_i,X6_q}),
.s_axis_b_tvalid(input_strobe),
.s_axis_b_tdata({Y6_i,Y6_q}),
.m_axis_dout_tvalid(),
.m_axis_dout_tdata({prod_6_q,prod_6_i})
);
complex_multiplier mult_inst8 (
.aclk(clock),
.s_axis_a_tvalid(input_strobe),
.s_axis_a_tdata({X7_i,X7_q}),
.s_axis_b_tvalid(input_strobe),
.s_axis_b_tdata({Y7_i,Y7_q}),
.m_axis_dout_tvalid(),
.m_axis_dout_tdata({prod_7_q,prod_7_i})
);
reg signed [31:0] sum_i1;
reg signed [31:0] sum_i2;
reg signed [31:0] sum_i3;
reg signed [31:0] sum_i4;
reg signed [31:0] sum_q1;
reg signed [31:0] sum_q2;
reg signed [31:0] sum_q3;
reg signed [31:0] sum_q4;
delayT #(.DATA_WIDTH(1), .DELAY(5)) sum_delay_inst (
.clock(clock),
@ -97,16 +181,24 @@ always @(posedge clock) begin
sum <= 0;
sum_i1 <= 0;
sum_i2 <= 0;
sum_i3 <= 0;
sum_i4 <= 0;
sum_q1 <= 0;
sum_q2 <= 0;
sum_q3 <= 0;
sum_q4 <= 0;
end else if (enable) begin
sum_i1 <= prod_0_i + prod_1_i;
sum_i2 <= prod_2_i + prod_3_i;
sum_i3 <= prod_4_i + prod_5_i;
sum_i4 <= prod_6_i + prod_7_i;
sum_q1 <= prod_0_q + prod_1_q;
sum_q2 <= prod_2_q + prod_3_q;
sum_q3 <= prod_4_q + prod_5_q;
sum_q4 <= prod_6_q + prod_7_q;
sum[63:32] <= sum_i1 + sum_i2;
sum[31:0] <= sum_q1 + sum_q2;
sum[63:32] <= sum_i1 + sum_i2 + sum_i3 + sum_i4;
sum[31:0] <= sum_q1 + sum_q2 + sum_q3 + sum_q4;
end
end

View File

@ -123,40 +123,48 @@ complex_to_mag #(.DATA_WIDTH(32)) sum_mag_inst (
reg [31:0] metric_max1;
reg [(IN_BUF_LEN_SHIFT-1):0] addr1;
reg [31:0] cross_corr_buf[0:15];
reg [31:0] cross_corr_buf[0:31];
reg [31:0] stage_X0;
reg [31:0] stage_X1;
reg [31:0] stage_X2;
reg [31:0] stage_X3;
reg [31:0] stage_X4;
reg [31:0] stage_X5;
reg [31:0] stage_X6;
reg [31:0] stage_X7;
reg [31:0] stage_Y0;
reg [31:0] stage_Y1;
reg [31:0] stage_Y2;
reg [31:0] stage_Y3;
reg [31:0] stage_Y4;
reg [31:0] stage_Y5;
reg [31:0] stage_Y6;
reg [31:0] stage_Y7;
stage_mult stage_mult_inst (
.clock(clock),
.enable(enable),
.reset(reset),
.X0(stage_X0[31:16]),
.X1(stage_X0[15:0]),
.X2(stage_X1[31:16]),
.X3(stage_X1[15:0]),
.X4(stage_X2[31:16]),
.X5(stage_X2[15:0]),
.X6(stage_X3[31:16]),
.X7(stage_X3[15:0]),
.X0(stage_X0),
.X1(stage_X1),
.X2(stage_X2),
.X3(stage_X3),
.X4(stage_X4),
.X5(stage_X5),
.X6(stage_X6),
.X7(stage_X7),
.Y0(stage_Y0[31:16]),
.Y1(stage_Y0[15:0]),
.Y2(stage_Y1[31:16]),
.Y3(stage_Y1[15:0]),
.Y4(stage_Y2[31:16]),
.Y5(stage_Y2[15:0]),
.Y6(stage_Y3[31:16]),
.Y7(stage_Y3[15:0]),
.Y0(stage_Y0),
.Y1(stage_Y1),
.Y2(stage_Y2),
.Y3(stage_Y3),
.Y4(stage_Y4),
.Y5(stage_Y5),
.Y6(stage_Y6),
.Y7(stage_Y7),
.input_strobe(mult_strobe),
@ -287,7 +295,7 @@ integer i;
integer j;
always @(posedge clock) begin
if (reset) begin
for (j = 0; j < 16; j= j+1) begin
for (j = 0; j < 32; j= j+1) begin
cross_corr_buf[j] <= 0;
end
do_clear();
@ -319,15 +327,15 @@ always @(posedge clock) begin
addr1 <= in_raddr - 1;
end
if (num_sample >= 64) begin
if (num_sample >= 88) begin
long_preamble_detected <= 1;
num_sample <= 0;
mult_strobe <= 0;
sum_stb <= 0;
// offset it by the length of cross correlation buffer
// size
in_raddr <= addr1 - 16;
num_input_consumed <= addr1 - 16;
in_raddr <= addr1 - 32;
num_input_consumed <= addr1 - 32;
in_offset <= 0;
num_ofdm_symbol <= 0;
phase_correction <= 0;
@ -348,7 +356,7 @@ always @(posedge clock) begin
long_preamble_detected <= 0;
end
if (~fft_loading && num_input_avail > 64) begin
if (~fft_loading && num_input_avail > 88) begin
fft_start <= 1;
in_offset <= 0;
end
@ -424,8 +432,8 @@ integer do_mult_i;
task do_mult; begin
// cross correlation of the first 16 samples of LTS
if (sample_in_strobe) begin
cross_corr_buf[15] <= sample_in;
for (do_mult_i = 0; do_mult_i < 15; do_mult_i = do_mult_i+1) begin
cross_corr_buf[31] <= sample_in;
for (do_mult_i = 0; do_mult_i < 31; do_mult_i = do_mult_i+1) begin
cross_corr_buf[do_mult_i] <= cross_corr_buf[do_mult_i+1];
end
@ -438,66 +446,82 @@ task do_mult; begin
stage_X1 <= cross_corr_buf[2];
stage_X2 <= cross_corr_buf[3];
stage_X3 <= cross_corr_buf[4];
stage_X4 <= cross_corr_buf[5];
stage_X5 <= cross_corr_buf[6];
stage_X6 <= cross_corr_buf[7];
stage_X7 <= cross_corr_buf[8];
stage_Y0[31:16] <= 156;
stage_Y0[15:0] <= 0;
stage_Y1[31:16] <= -5;
stage_Y1[15:0] <= 120;
stage_Y2[31:16] <= 40;
stage_Y2[15:0] <= 111;
stage_Y3[31:16] <= 97;
stage_Y3[15:0] <= -83;
stage_Y0 <= { 16'd156, 16'd0};
stage_Y1 <= {-16'd5, 16'd120};
stage_Y2 <= { 16'd40, 16'd111};
stage_Y3 <= { 16'd97, -16'd83};
stage_Y4 <= { 16'd21, -16'd28};
stage_Y5 <= { 16'd60, 16'd88};
stage_Y6 <= {-16'd115, 16'd55};
stage_Y7 <= {-16'd38, 16'd106};
mult_strobe <= 1;
mult_stage <= 1;
end
if (mult_stage == 1) begin
stage_X0 <= cross_corr_buf[4];
stage_X1 <= cross_corr_buf[5];
stage_X2 <= cross_corr_buf[6];
stage_X3 <= cross_corr_buf[7];
stage_Y0[31:16] <= 21;
stage_Y0[15:0] <= -28;
stage_Y1[31:16] <= 60;
stage_Y1[15:0] <= 88;
stage_Y2[31:16] <= -115;
stage_Y2[15:0] <= 55;
stage_Y3[31:16] <= -38;
stage_Y3[15:0] <= 106;
mult_stage <= 2;
end else if (mult_stage == 2) begin
stage_X0 <= cross_corr_buf[8];
stage_X1 <= cross_corr_buf[9];
stage_X2 <= cross_corr_buf[10];
stage_X3 <= cross_corr_buf[11];
stage_X4 <= cross_corr_buf[12];
stage_X5 <= cross_corr_buf[13];
stage_X6 <= cross_corr_buf[14];
stage_X7 <= cross_corr_buf[15];
stage_Y0[31:16] <= 98;
stage_Y0[15:0] <= 26;
stage_Y1[31:16] <= 53;
stage_Y1[15:0] <= -4;
stage_Y2[31:16] <= 1;
stage_Y2[15:0] <= 115;
stage_Y3[31:16] <= -137;
stage_Y3[15:0] <= 47;
stage_Y0 <= { 16'd98, 16'd26};
stage_Y1 <= { 16'd53, -16'd4};
stage_Y2 <= { 16'd1, 16'd115};
stage_Y3 <= {-16'd137, 16'd47};
stage_Y4 <= { 16'd24, 16'd59};
stage_Y5 <= { 16'd59, 16'd15};
stage_Y6 <= {-16'd22, -16'd161};
stage_Y7 <= { 16'd119, 16'd4};
mult_stage <= 2;
end else if (mult_stage == 2) begin
stage_X0 <= cross_corr_buf[16];
stage_X1 <= cross_corr_buf[17];
stage_X2 <= cross_corr_buf[18];
stage_X3 <= cross_corr_buf[19];
stage_X4 <= cross_corr_buf[20];
stage_X5 <= cross_corr_buf[21];
stage_X6 <= cross_corr_buf[22];
stage_X7 <= cross_corr_buf[23];
stage_Y0 <= { 16'd62, 16'd62};
stage_Y1 <= { 16'd37, -16'd98};
stage_Y2 <= {-16'd57, -16'd39};
stage_Y3 <= {-16'd131, -16'd65};
stage_Y4 <= { 16'd82, -16'd92};
stage_Y5 <= { 16'd70, -16'd14};
stage_Y6 <= {-16'd60, -16'd81};
stage_Y7 <= {-16'd56, 16'd22};
mult_stage <= 3;
end else if (mult_stage == 3) begin
stage_X0 <= cross_corr_buf[12];
stage_X1 <= cross_corr_buf[13];
stage_X2 <= cross_corr_buf[14];
stage_X3 <= cross_corr_buf[15];
stage_X0 <= cross_corr_buf[24];
stage_X1 <= cross_corr_buf[25];
stage_X2 <= cross_corr_buf[26];
stage_X3 <= cross_corr_buf[27];
stage_X4 <= cross_corr_buf[28];
stage_X5 <= cross_corr_buf[29];
stage_X6 <= cross_corr_buf[30];
stage_X7 <= cross_corr_buf[31];
stage_Y0[31:16] <= 24;
stage_Y0[15:0] <= 59;
stage_Y1[31:16] <= 59;
stage_Y1[15:0] <= 15;
stage_Y2[31:16] <= -22;
stage_Y2[15:0] <= -161;
stage_Y3[31:16] <= 119;
stage_Y3[15:0] <= 4;
stage_Y0 <= {-16'd35, 16'd151};
stage_Y1 <= {-16'd122, 16'd17};
stage_Y2 <= {-16'd127, 16'd21};
stage_Y3 <= { 16'd75, 16'd74};
stage_Y4 <= {-16'd3, -16'd54};
stage_Y5 <= {-16'd92, -16'd115};
stage_Y6 <= { 16'd92, -16'd106};
stage_Y7 <= { 16'd12, -16'd98};
mult_stage <= 4;
end else if (mult_stage == 4) begin
@ -561,11 +585,19 @@ task do_clear; begin
stage_X1 <= 0;
stage_X2 <= 0;
stage_X3 <= 0;
stage_X4 <= 0;
stage_X5 <= 0;
stage_X6 <= 0;
stage_X7 <= 0;
stage_Y0 <= 0;
stage_Y1 <= 0;
stage_Y2 <= 0;
stage_Y3 <= 0;
stage_Y4 <= 0;
stage_Y5 <= 0;
stage_Y6 <= 0;
stage_Y7 <= 0;
end
endtask