module runtime_reconfigurable_fir (
    input wire clk,
    input wire rst,
    input wire signed [7:0] data_in,
    input wire data_valid,
    input wire coeff_wr_en,
    input wire [2:0] coeff_addr,
    input wire signed [15:0] coeff_data,
    output reg signed [26:0] data_out,
    output reg data_out_valid
);
    reg signed [15:0] active_coeff [0:7];
    reg signed [15:0] staged_coeff [0:7];
    reg signed [7:0] history [0:6];
    reg signed [26:0] pending_data;
    reg pending_valid;
    reg pending_dirty;
    reg [2:0] frame_pos;

    integer i;

    function signed [26:0] fir_eval;
        input signed [15:0] c0;
        input signed [15:0] c1;
        input signed [15:0] c2;
        input signed [15:0] c3;
        input signed [15:0] c4;
        input signed [15:0] c5;
        input signed [15:0] c6;
        input signed [15:0] c7;
        input signed [7:0] x0;
        input signed [7:0] x1;
        input signed [7:0] x2;
        input signed [7:0] x3;
        input signed [7:0] x4;
        input signed [7:0] x5;
        input signed [7:0] x6;
        input signed [7:0] x7;
        begin
            fir_eval =
                $signed(c0) * $signed(x0) +
                $signed(c1) * $signed(x1) +
                $signed(c2) * $signed(x2) +
                $signed(c3) * $signed(x3) +
                $signed(c4) * $signed(x4) +
                $signed(c5) * $signed(x5) +
                $signed(c6) * $signed(x6) +
                $signed(c7) * $signed(x7);
        end
    endfunction

    always @(posedge clk) begin
        if (rst) begin
            data_out <= 27'sd0;
            data_out_valid <= 1'b0;
            pending_data <= 27'sd0;
            pending_valid <= 1'b0;
            pending_dirty <= 1'b0;
            frame_pos <= 3'd0;

            for (i = 0; i < 8; i = i + 1) begin
                active_coeff[i] <= 16'sd0;
                staged_coeff[i] <= 16'sd0;
            end

            for (i = 0; i < 7; i = i + 1) begin
                history[i] <= 8'sd0;
            end
        end else begin
            data_out <= pending_data;
            data_out_valid <= pending_valid;

            if (data_valid) begin
                pending_valid <= 1'b1;
                pending_data <= fir_eval(
                    ((frame_pos == 3'd0) && pending_dirty) ? staged_coeff[0] : active_coeff[0],
                    ((frame_pos == 3'd0) && pending_dirty) ? staged_coeff[1] : active_coeff[1],
                    ((frame_pos == 3'd0) && pending_dirty) ? staged_coeff[2] : active_coeff[2],
                    ((frame_pos == 3'd0) && pending_dirty) ? staged_coeff[3] : active_coeff[3],
                    ((frame_pos == 3'd0) && pending_dirty) ? staged_coeff[4] : active_coeff[4],
                    ((frame_pos == 3'd0) && pending_dirty) ? staged_coeff[5] : active_coeff[5],
                    ((frame_pos == 3'd0) && pending_dirty) ? staged_coeff[6] : active_coeff[6],
                    ((frame_pos == 3'd0) && pending_dirty) ? staged_coeff[7] : active_coeff[7],
                    data_in,
                    history[0],
                    history[1],
                    history[2],
                    history[3],
                    history[4],
                    history[5],
                    history[6]
                );
            end else begin
                pending_valid <= 1'b0;
                pending_data <= 27'sd0;
            end

            if ((frame_pos == 3'd0) && data_valid && pending_dirty) begin
                for (i = 0; i < 8; i = i + 1) begin
                    active_coeff[i] <= staged_coeff[i];
                end
            end

            if (coeff_wr_en) begin
                staged_coeff[coeff_addr] <= coeff_data;
                pending_dirty <= 1'b1;
            end else if ((frame_pos == 3'd0) && data_valid && pending_dirty) begin
                pending_dirty <= 1'b0;
            end

            if (data_valid) begin
                history[6] <= history[5];
                history[5] <= history[4];
                history[4] <= history[3];
                history[3] <= history[2];
                history[2] <= history[1];
                history[1] <= history[0];
                history[0] <= data_in;
                frame_pos <= (frame_pos == 3'd7) ? 3'd0 : (frame_pos + 3'd1);
            end
        end
    end
endmodule
