module prefix_scan8 (
    input wire clk,
    input wire rst,
    input wire in_valid,
    input wire [95:0] in_data,
    output wire out_valid,
    output wire [119:0] out_data
);
    reg signed [11:0] lane_value;
    reg signed [15:0] running_sum;
    reg [119:0] prefix_data_comb;

    reg pipe_valid [0:3];
    reg [119:0] pipe_data [0:3];

    integer lane;
    integer stage;

    always @* begin
        running_sum = 16'sd0;
        prefix_data_comb = 120'b0;

        for (lane = 0; lane < 8; lane = lane + 1) begin
            lane_value = in_data[(12 * lane) +: 12];
            running_sum = running_sum + lane_value;
            prefix_data_comb[(15 * lane) +: 15] = running_sum[14:0];
        end
    end

    always @(posedge clk) begin
        if (rst) begin
            for (stage = 0; stage < 4; stage = stage + 1) begin
                pipe_valid[stage] <= 1'b0;
                pipe_data[stage] <= 120'b0;
            end
        end else begin
            pipe_valid[0] <= in_valid;
            pipe_data[0] <= in_valid ? prefix_data_comb : 120'b0;

            for (stage = 1; stage < 4; stage = stage + 1) begin
                pipe_valid[stage] <= pipe_valid[stage - 1];
                pipe_data[stage] <= pipe_valid[stage - 1] ? pipe_data[stage - 1] : 120'b0;
            end
        end
    end

    assign out_valid = pipe_valid[3];
    assign out_data = pipe_valid[3] ? pipe_data[3] : 120'b0;
endmodule
