`timescale 1ns / 1ps

// =============================================================================
// Testbench for CNN 3x3 Convolution Layer
// 28x28 input, 4 output filters, stride=1, pad=1, bias + ReLU
// Golden values generated by: python3 generate_golden.py --array
// =============================================================================

module tb_cnn_conv3x3;
    // Clock and Reset
    reg clk;
    reg rst;
    
    // Control signals
    reg start;
    
    // Input streaming
    reg [7:0] pixel_in;
    reg pixel_valid;
    wire pixel_ready;
    
    // Output streaming
    wire [31:0] out_pixel;  // 4 x 8-bit packed
    wire out_valid;
    reg out_ready;
    wire done;
    
    // Test control
    integer errors = 0;
    integer pixels_sent = 0;
    integer pixels_received = 0;
    integer timeout_count;
    parameter MAX_TIMEOUT = 10000;
    parameter IMG_W = 28;
    parameter IMG_H = 28;
    parameter IMG_SIZE = IMG_W * IMG_H;  // 784
    
    // Kernels and biases (hardcoded in the DUT per spec, documented here for reference)
    // Filter 0: Horizontal Sobel [-1,-2,-1; 0,0,0; 1,2,1]  -> 72'h010201000000FFFEFF
    // Filter 1: Vertical Sobel   [-1,0,1; -2,0,2; -1,0,1]  -> 72'h0100FF0200FE0100FF
    // Filter 2: Gaussian blur    [1,2,1; 2,4,2; 1,2,1]     -> 72'h010201020402010201
    // Filter 3: Sharpen          [0,-1,0; -1,5,-1; 0,-1,0] -> 72'h00FF00FF05FF00FF00
    // Biases: [0, 0, 8, 10]                                -> 32'h0A080000
    
    // Input image buffer: gradient pattern (r*4 + c*2) % 256
    reg [7:0] test_image [0:783];
    
    // Golden expected outputs loaded from expected.hex (generated by generate_golden.py)
    // Supports run-directory differences via +expected_hex=<path> plusarg and fallback paths.
    /* Legacy inlined array retained for reference:
    reg [31:0] expected_output [0:783] = '{
        32'h041A0A0E, 32'h0A300C18, 32'h0E480C20, 32'h12600C28, 32'h16780C30, 32'h1A900C38, 32'h1EA80C40, 32'h22C00C48,
        32'h26D80C50, 32'h2AF00C58, 32'h2EFF0C60, 32'h32FF0C68, 32'h36FF0C70, 32'h3AFF0C78, 32'h3EFF0C80, 32'h42FF0C88,
        32'h46FF0C90, 32'h4AFF0C98, 32'h4EFF0CA0, 32'h52FF0CA8, 32'h56FF0CB0, 32'h5AFF0CB8, 32'h5EFF0CC0, 32'h62FF0CC8,
        32'h66FF0CD0, 32'h6AFF0CD8, 32'h6EFF0CE0, 32'hAAFF00AC, 32'h10401818, 32'h10681020, 32'h12881020, 32'h14A81020,
        32'h16C81020, 32'h18E81020, 32'h1AFF1020, 32'h1CFF1020, 32'h1EFF1020, 32'h20FF1020, 32'h22FF1020, 32'h24FF1020,
        32'h26FF1020, 32'h28FF1020, 32'h2AFF1020, 32'h2CFF1020, 32'h2EFF1020, 32'h30FF1020, 32'h32FF1020, 32'h34FF1020,
        32'h36FF1020, 32'h38FF1020, 32'h3AFF1020, 32'h3CFF1020, 32'h3EFF1020, 32'h40FF1020, 32'h42FF1020, 32'h80FF0018,
        32'h18702818, 32'h14A81020, 32'h16C81020, 32'h18E81020, 32'h1AFF1020, 32'h1CFF1020, 32'h1EFF1020, 32'h20FF1020,
        32'h22FF1020, 32'h24FF1020, 32'h26FF1020, 32'h28FF1020, 32'h2AFF1020, 32'h2CFF1020, 32'h2EFF1020, 32'h30FF1020,
        32'h32FF1020, 32'h34FF1020, 32'h36FF1020, 32'h38FF1020, 32'h3AFF1020, 32'h3CFF1020, 32'h3EFF1020, 32'h40FF1020,
        32'h42FF1020, 32'h44FF1020, 32'h46FF1020, 32'h88FF0018, 32'h20A03818, 32'h18E81020, 32'h1AFF1020, 32'h1CFF1020,
        32'h1EFF1020, 32'h20FF1020, 32'h22FF1020, 32'h24FF1020, 32'h26FF1020, 32'h28FF1020, 32'h2AFF1020, 32'h2CFF1020,
        32'h2EFF1020, 32'h30FF1020, 32'h32FF1020, 32'h34FF1020, 32'h36FF1020, 32'h38FF1020, 32'h3AFF1020, 32'h3CFF1020,
        32'h3EFF1020, 32'h40FF1020, 32'h42FF1020, 32'h44FF1020, 32'h46FF1020, 32'h48FF1020, 32'h4AFF1020, 32'h90FF0018,
        32'h28D04818, 32'h1CFF1020, 32'h1EFF1020, 32'h20FF1020, 32'h22FF1020, 32'h24FF1020, 32'h26FF1020, 32'h28FF1020,
        32'h2AFF1020, 32'h2CFF1020, 32'h2EFF1020, 32'h30FF1020, 32'h32FF1020, 32'h34FF1020, 32'h36FF1020, 32'h38FF1020,
        32'h3AFF1020, 32'h3CFF1020, 32'h3EFF1020, 32'h40FF1020, 32'h42FF1020, 32'h44FF1020, 32'h46FF1020, 32'h48FF1020,
        32'h4AFF1020, 32'h4CFF1020, 32'h4EFF1020, 32'h98FF0018, 32'h30FF5818, 32'h20FF1020, 32'h22FF1020, 32'h24FF1020,
        32'h26FF1020, 32'h28FF1020, 32'h2AFF1020, 32'h2CFF1020, 32'h2EFF1020, 32'h30FF1020, 32'h32FF1020, 32'h34FF1020,
        32'h36FF1020, 32'h38FF1020, 32'h3AFF1020, 32'h3CFF1020, 32'h3EFF1020, 32'h40FF1020, 32'h42FF1020, 32'h44FF1020,
        32'h46FF1020, 32'h48FF1020, 32'h4AFF1020, 32'h4CFF1020, 32'h4EFF1020, 32'h50FF1020, 32'h52FF1020, 32'hA0FF0018,
        32'h38FF6818, 32'h24FF1020, 32'h26FF1020, 32'h28FF1020, 32'h2AFF1020, 32'h2CFF1020, 32'h2EFF1020, 32'h30FF1020,
        32'h32FF1020, 32'h34FF1020, 32'h36FF1020, 32'h38FF1020, 32'h3AFF1020, 32'h3CFF1020, 32'h3EFF1020, 32'h40FF1020,
        32'h42FF1020, 32'h44FF1020, 32'h46FF1020, 32'h48FF1020, 32'h4AFF1020, 32'h4CFF1020, 32'h4EFF1020, 32'h50FF1020,
        32'h52FF1020, 32'h54FF1020, 32'h56FF1020, 32'hA8FF0018, 32'h40FF7818, 32'h28FF1020, 32'h2AFF1020, 32'h2CFF1020,
        32'h2EFF1020, 32'h30FF1020, 32'h32FF1020, 32'h34FF1020, 32'h36FF1020, 32'h38FF1020, 32'h3AFF1020, 32'h3CFF1020,
        32'h3EFF1020, 32'h40FF1020, 32'h42FF1020, 32'h44FF1020, 32'h46FF1020, 32'h48FF1020, 32'h4AFF1020, 32'h4CFF1020,
        32'h4EFF1020, 32'h50FF1020, 32'h52FF1020, 32'h54FF1020, 32'h56FF1020, 32'h58FF1020, 32'h5AFF1020, 32'hB0FF0018,
        32'h48FF8818, 32'h2CFF1020, 32'h2EFF1020, 32'h30FF1020, 32'h32FF1020, 32'h34FF1020, 32'h36FF1020, 32'h38FF1020,
        32'h3AFF1020, 32'h3CFF1020, 32'h3EFF1020, 32'h40FF1020, 32'h42FF1020, 32'h44FF1020, 32'h46FF1020, 32'h48FF1020,
        32'h4AFF1020, 32'h4CFF1020, 32'h4EFF1020, 32'h50FF1020, 32'h52FF1020, 32'h54FF1020, 32'h56FF1020, 32'h58FF1020,
        32'h5AFF1020, 32'h5CFF1020, 32'h5EFF1020, 32'hB8FF0018, 32'h50FF9818, 32'h30FF1020, 32'h32FF1020, 32'h34FF1020,
        32'h36FF1020, 32'h38FF1020, 32'h3AFF1020, 32'h3CFF1020, 32'h3EFF1020, 32'h40FF1020, 32'h42FF1020, 32'h44FF1020,
        32'h46FF1020, 32'h48FF1020, 32'h4AFF1020, 32'h4CFF1020, 32'h4EFF1020, 32'h50FF1020, 32'h52FF1020, 32'h54FF1020,
        32'h56FF1020, 32'h58FF1020, 32'h5AFF1020, 32'h5CFF1020, 32'h5EFF1020, 32'h60FF1020, 32'h62FF1020, 32'hC0FF0018,
        32'h58FFA818, 32'h34FF1020, 32'h36FF1020, 32'h38FF1020, 32'h3AFF1020, 32'h3CFF1020, 32'h3EFF1020, 32'h40FF1020,
        32'h42FF1020, 32'h44FF1020, 32'h46FF1020, 32'h48FF1020, 32'h4AFF1020, 32'h4CFF1020, 32'h4EFF1020, 32'h50FF1020,
        32'h52FF1020, 32'h54FF1020, 32'h56FF1020, 32'h58FF1020, 32'h5AFF1020, 32'h5CFF1020, 32'h5EFF1020, 32'h60FF1020,
        32'h62FF1020, 32'h64FF1020, 32'h66FF1020, 32'hC8FF0018, 32'h60FFB818, 32'h38FF1020, 32'h3AFF1020, 32'h3CFF1020,
        32'h3EFF1020, 32'h40FF1020, 32'h42FF1020, 32'h44FF1020, 32'h46FF1020, 32'h48FF1020, 32'h4AFF1020, 32'h4CFF1020,
        32'h4EFF1020, 32'h50FF1020, 32'h52FF1020, 32'h54FF1020, 32'h56FF1020, 32'h58FF1020, 32'h5AFF1020, 32'h5CFF1020,
        32'h5EFF1020, 32'h60FF1020, 32'h62FF1020, 32'h64FF1020, 32'h66FF1020, 32'h68FF1020, 32'h6AFF1020, 32'hD0FF0018,
        32'h68FFC818, 32'h3CFF1020, 32'h3EFF1020, 32'h40FF1020, 32'h42FF1020, 32'h44FF1020, 32'h46FF1020, 32'h48FF1020,
        32'h4AFF1020, 32'h4CFF1020, 32'h4EFF1020, 32'h50FF1020, 32'h52FF1020, 32'h54FF1020, 32'h56FF1020, 32'h58FF1020,
        32'h5AFF1020, 32'h5CFF1020, 32'h5EFF1020, 32'h60FF1020, 32'h62FF1020, 32'h64FF1020, 32'h66FF1020, 32'h68FF1020,
        32'h6AFF1020, 32'h6CFF1020, 32'h6EFF1020, 32'hD8FF0018, 32'h70FFD818, 32'h40FF1020, 32'h42FF1020, 32'h44FF1020,
        32'h46FF1020, 32'h48FF1020, 32'h4AFF1020, 32'h4CFF1020, 32'h4EFF1020, 32'h50FF1020, 32'h52FF1020, 32'h54FF1020,
        32'h56FF1020, 32'h58FF1020, 32'h5AFF1020, 32'h5CFF1020, 32'h5EFF1020, 32'h60FF1020, 32'h62FF1020, 32'h64FF1020,
        32'h66FF1020, 32'h68FF1020, 32'h6AFF1020, 32'h6CFF1020, 32'h6EFF1020, 32'h70FF1020, 32'h72FF1020, 32'hE0FF0018,
        32'h78FFE818, 32'h44FF1020, 32'h46FF1020, 32'h48FF1020, 32'h4AFF1020, 32'h4CFF1020, 32'h4EFF1020, 32'h50FF1020,
        32'h52FF1020, 32'h54FF1020, 32'h56FF1020, 32'h58FF1020, 32'h5AFF1020, 32'h5CFF1020, 32'h5EFF1020, 32'h60FF1020,
        32'h62FF1020, 32'h64FF1020, 32'h66FF1020, 32'h68FF1020, 32'h6AFF1020, 32'h6CFF1020, 32'h6EFF1020, 32'h70FF1020,
        32'h72FF1020, 32'h74FF1020, 32'h76FF1020, 32'hE8FF0018, 32'h80FFF818, 32'h48FF1020, 32'h4AFF1020, 32'h4CFF1020,
        32'h4EFF1020, 32'h50FF1020, 32'h52FF1020, 32'h54FF1020, 32'h56FF1020, 32'h58FF1020, 32'h5AFF1020, 32'h5CFF1020,
        32'h5EFF1020, 32'h60FF1020, 32'h62FF1020, 32'h64FF1020, 32'h66FF1020, 32'h68FF1020, 32'h6AFF1020, 32'h6CFF1020,
        32'h6EFF1020, 32'h70FF1020, 32'h72FF1020, 32'h74FF1020, 32'h76FF1020, 32'h78FF1020, 32'h7AFF1020, 32'hF0FF0018,
        32'h88FFFF18, 32'h4CFF1020, 32'h4EFF1020, 32'h50FF1020, 32'h52FF1020, 32'h54FF1020, 32'h56FF1020, 32'h58FF1020,
        32'h5AFF1020, 32'h5CFF1020, 32'h5EFF1020, 32'h60FF1020, 32'h62FF1020, 32'h64FF1020, 32'h66FF1020, 32'h68FF1020,
        32'h6AFF1020, 32'h6CFF1020, 32'h6EFF1020, 32'h70FF1020, 32'h72FF1020, 32'h74FF1020, 32'h76FF1020, 32'h78FF1020,
        32'h7AFF1020, 32'h7CFF1020, 32'h7EFF1020, 32'hF8FF0018, 32'h90FFFF18, 32'h50FF1020, 32'h52FF1020, 32'h54FF1020,
        32'h56FF1020, 32'h58FF1020, 32'h5AFF1020, 32'h5CFF1020, 32'h5EFF1020, 32'h60FF1020, 32'h62FF1020, 32'h64FF1020,
        32'h66FF1020, 32'h68FF1020, 32'h6AFF1020, 32'h6CFF1020, 32'h6EFF1020, 32'h70FF1020, 32'h72FF1020, 32'h74FF1020,
        32'h76FF1020, 32'h78FF1020, 32'h7AFF1020, 32'h7CFF1020, 32'h7EFF1020, 32'h80FF1020, 32'h82FF1020, 32'hFFFF0018,
        32'h98FFFF18, 32'h54FF1020, 32'h56FF1020, 32'h58FF1020, 32'h5AFF1020, 32'h5CFF1020, 32'h5EFF1020, 32'h60FF1020,
        32'h62FF1020, 32'h64FF1020, 32'h66FF1020, 32'h68FF1020, 32'h6AFF1020, 32'h6CFF1020, 32'h6EFF1020, 32'h70FF1020,
        32'h72FF1020, 32'h74FF1020, 32'h76FF1020, 32'h78FF1020, 32'h7AFF1020, 32'h7CFF1020, 32'h7EFF1020, 32'h80FF1020,
        32'h82FF1020, 32'h84FF1020, 32'h86FF1020, 32'hFFFF0018, 32'hA0FFFF18, 32'h58FF1020, 32'h5AFF1020, 32'h5CFF1020,
        32'h5EFF1020, 32'h60FF1020, 32'h62FF1020, 32'h64FF1020, 32'h66FF1020, 32'h68FF1020, 32'h6AFF1020, 32'h6CFF1020,
        32'h6EFF1020, 32'h70FF1020, 32'h72FF1020, 32'h74FF1020, 32'h76FF1020, 32'h78FF1020, 32'h7AFF1020, 32'h7CFF1020,
        32'h7EFF1020, 32'h80FF1020, 32'h82FF1020, 32'h84FF1020, 32'h86FF1020, 32'h88FF1020, 32'h8AFF1020, 32'hFFFF0018,
        32'hA8FFFF18, 32'h5CFF1020, 32'h5EFF1020, 32'h60FF1020, 32'h62FF1020, 32'h64FF1020, 32'h66FF1020, 32'h68FF1020,
        32'h6AFF1020, 32'h6CFF1020, 32'h6EFF1020, 32'h70FF1020, 32'h72FF1020, 32'h74FF1020, 32'h76FF1020, 32'h78FF1020,
        32'h7AFF1020, 32'h7CFF1020, 32'h7EFF1020, 32'h80FF1020, 32'h82FF1020, 32'h84FF1020, 32'h86FF1020, 32'h88FF1020,
        32'h8AFF1020, 32'h8CFF1020, 32'h8EFF1020, 32'hFFFF0018, 32'hB0FFFF18, 32'h60FF1020, 32'h62FF1020, 32'h64FF1020,
        32'h66FF1020, 32'h68FF1020, 32'h6AFF1020, 32'h6CFF1020, 32'h6EFF1020, 32'h70FF1020, 32'h72FF1020, 32'h74FF1020,
        32'h76FF1020, 32'h78FF1020, 32'h7AFF1020, 32'h7CFF1020, 32'h7EFF1020, 32'h80FF1020, 32'h82FF1020, 32'h84FF1020,
        32'h86FF1020, 32'h88FF1020, 32'h8AFF1020, 32'h8CFF1020, 32'h8EFF1020, 32'h90FF1020, 32'h92FF1020, 32'hFFFF0018,
        32'hB8FFFF18, 32'h64FF1020, 32'h66FF1020, 32'h68FF1020, 32'h6AFF1020, 32'h6CFF1020, 32'h6EFF1020, 32'h70FF1020,
        32'h72FF1020, 32'h74FF1020, 32'h76FF1020, 32'h78FF1020, 32'h7AFF1020, 32'h7CFF1020, 32'h7EFF1020, 32'h80FF1020,
        32'h82FF1020, 32'h84FF1020, 32'h86FF1020, 32'h88FF1020, 32'h8AFF1020, 32'h8CFF1020, 32'h8EFF1020, 32'h90FF1020,
        32'h92FF1020, 32'h94FF1020, 32'h96FF1020, 32'hFFFF0018, 32'hC0FFFF18, 32'h68FF1020, 32'h6AFF1020, 32'h6CFF1020,
        32'h6EFF1020, 32'h70FF1020, 32'h72FF1020, 32'h74FF1020, 32'h76FF1020, 32'h78FF1020, 32'h7AFF1020, 32'h7CFF1020,
        32'h7EFF1020, 32'h80FF1020, 32'h82FF1020, 32'h84FF1020, 32'h86FF1020, 32'h88FF1020, 32'h8AFF1020, 32'h8CFF1020,
        32'h8EFF1020, 32'h90FF1020, 32'h92FF1020, 32'h94FF1020, 32'h96FF1020, 32'h98FF1020, 32'h9AFF1020, 32'hFFFF0018,
        32'hC8FFFF18, 32'h6CFF1020, 32'h6EFF1020, 32'h70FF1020, 32'h72FF1020, 32'h74FF1020, 32'h76FF1020, 32'h78FF1020,
        32'h7AFF1020, 32'h7CFF1020, 32'h7EFF1020, 32'h80FF1020, 32'h82FF1020, 32'h84FF1020, 32'h86FF1020, 32'h88FF1020,
        32'h8AFF1020, 32'h8CFF1020, 32'h8EFF1020, 32'h90FF1020, 32'h92FF1020, 32'h94FF1020, 32'h96FF1020, 32'h98FF1020,
        32'h9AFF1020, 32'h9CFF1020, 32'h9EFF1020, 32'hFFFF0018, 32'hD0FFFF18, 32'h70FF1020, 32'h72FF1020, 32'h74FF1020,
        32'h76FF1020, 32'h78FF1020, 32'h7AFF1020, 32'h7CFF1020, 32'h7EFF1020, 32'h80FF1020, 32'h82FF1020, 32'h84FF1020,
        32'h86FF1020, 32'h88FF1020, 32'h8AFF1020, 32'h8CFF1020, 32'h8EFF1020, 32'h90FF1020, 32'h92FF1020, 32'h94FF1020,
        32'h96FF1020, 32'h98FF1020, 32'h9AFF1020, 32'h9CFF1020, 32'h9EFF1020, 32'hA0FF1020, 32'hA2FF1020, 32'hFFFF0018,
        32'hD8FFFF18, 32'h74FF1020, 32'h76FF1020, 32'h78FF1020, 32'h7AFF1020, 32'h7CFF1020, 32'h7EFF1020, 32'h80FF1020,
        32'h82FF1020, 32'h84FF1020, 32'h86FF1020, 32'h88FF1020, 32'h8AFF1020, 32'h8CFF1020, 32'h8EFF1020, 32'h90FF1020,
        32'h92FF1020, 32'h94FF1020, 32'h96FF1020, 32'h98FF1020, 32'h9AFF1020, 32'h9CFF1020, 32'h9EFF1020, 32'hA0FF1020,
        32'hA2FF1020, 32'hA4FF1020, 32'hA6FF1020, 32'hFFFF0018, 32'hFFFFFF00, 32'hEAFF0C00, 32'hEEFF0C00, 32'hF2FF0C00,
        32'hF6FF0C00, 32'hFAFF0C00, 32'hFEFF0C00, 32'hFFFF0C00, 32'hFFFF0C00, 32'hFFFF0C00, 32'hFFFF0C00, 32'hFFFF0C00,
        32'hFFFF0C00, 32'hFFFF0C00, 32'hFFFF0C00, 32'hFFFF0C00, 32'hFFFF0C00, 32'hFFFF0C00, 32'hFFFF0C00, 32'hFFFF0C00,
        32'hFFFF0C00, 32'hFFFF0C00, 32'hFFFF0C00, 32'hFFFF0C00, 32'hFFFF0C00, 32'hFFFF0C00, 32'hFFFF0C00, 32'hFFFF0000
    }; */
    reg [31:0] expected_output [0:783];
    reg [8*256-1:0] expected_path;
    integer expected_file;
    integer i, r, c;
    
    // Instantiate UUT (kernels are hardcoded in the solution per spec)
    cnn_conv3x3 uut (
        .clk(clk),
        .rst(rst),
        .start(start),
        .pixel_in(pixel_in),
        .pixel_valid(pixel_valid),
        .pixel_ready(pixel_ready),
        .out_pixel(out_pixel),
        .out_valid(out_valid),
        .out_ready(out_ready),
        .done(done)
    );
    
    // Clock Generation (100MHz)
    initial begin
        clk = 0;
        forever #5 clk = ~clk;
    end
    
    // Initialize test image: gradient pattern (r*4 + c*2) % 256
    initial begin
        for (r = 0; r < IMG_H; r = r + 1) begin
            for (c = 0; c < IMG_W; c = c + 1) begin
                test_image[r * IMG_W + c] = (r * 4 + c * 2) % 256;
            end
        end
    end

    // Load golden outputs from file (Icarus-friendly replacement for array literals).
    // Path priority:
    //   1) +expected_hex=<path> (recommended for external runners)
    //   2) expected.hex (when run inside the benchmark folder)
    //   3) benchmarks/problems/004_cnn_conv3x3/expected.hex (repo root run)
    //   4) Absolute workspace path fallback
    initial begin : load_expected_output
        if ($value$plusargs("expected_hex=%s", expected_path)) begin
            expected_file = $fopen(expected_path, "r");
            if (!expected_file) begin
                $display("ERROR: +expected_hex file not found: %0s", expected_path);
                $finish;
            end
            $fclose(expected_file);
            $readmemh(expected_path, expected_output);
            $display("Loaded expected output from %0s", expected_path);
        end
        else begin
            expected_file = $fopen("expected.hex", "r");
            if (expected_file) begin
                $fclose(expected_file);
                $readmemh("expected.hex", expected_output);
                $display("Loaded expected output from expected.hex");
            end
            else begin
                expected_file = $fopen("benchmarks/problems/004_cnn_conv3x3/expected.hex", "r");
                if (expected_file) begin
                    $fclose(expected_file);
                    $readmemh("benchmarks/problems/004_cnn_conv3x3/expected.hex", expected_output);
                    $display("Loaded expected output from benchmarks/problems/004_cnn_conv3x3/expected.hex");
                end
                else begin
                    expected_file = $fopen("/home/bubshait/Desktop/FYP/evaluationPipeline/benchmarks/problems/004_cnn_conv3x3/expected.hex", "r");
                    if (expected_file) begin
                        $fclose(expected_file);
                        $readmemh("/home/bubshait/Desktop/FYP/evaluationPipeline/benchmarks/problems/004_cnn_conv3x3/expected.hex", expected_output);
                        $display("Loaded expected output from absolute workspace path");
                    end
                    else begin
                        $display("ERROR: Could not locate expected.hex.");
                        $display("ERROR: Tried +expected_hex, expected.hex, repo-relative, and absolute workspace paths.");
                        $finish;
                    end
                end
            end
        end
    end
    
    // Test Procedure
    initial begin
        // Initialize
        rst = 1;
        start = 0;
        pixel_in = 0;
        pixel_valid = 0;
        out_ready = 1;
        
        $display("===========================================");
        $display("  CNN 3x3 Convolution Testbench");
        $display("  28x28 input, 4 filters, stride=1, pad=1");
        $display("  Golden values from generate_golden.py");
        $display("===========================================");
        
        // Reset sequence
        repeat(5) @(posedge clk);
        rst = 0;
        repeat(2) @(posedge clk);
        
        // Start convolution (drive on negedge to avoid posedge race with DUT sampling)
        @(negedge clk);
        start = 1;
        @(negedge clk);
        start = 0;
        
        // Stream input pixels
        fork
            // Send pixels
            begin : send_block
                for (i = 0; i < IMG_SIZE; i = i + 1) begin
                    // Drive testbench outputs on negedge to avoid races with DUT posedge sampling.
                    @(negedge clk);
                    pixel_in = test_image[i];
                    pixel_valid = 1;

                    // Wait until this pixel is accepted.
                    timeout_count = 0;
                    begin : wait_for_accept
                        while (timeout_count < MAX_TIMEOUT) begin
                            @(posedge clk);
                            if (pixel_ready) begin
                                pixels_sent = pixels_sent + 1;
                                disable wait_for_accept;
                            end
                            timeout_count = timeout_count + 1;
                        end
                    end

                    if (timeout_count >= MAX_TIMEOUT) begin
                        $display("ERROR: Timeout waiting for pixel_ready at pixel %0d", i);
                        errors = errors + 1;
                        disable send_block;
                    end
                end
                @(negedge clk);
                pixel_valid = 0;
                $display("All %0d input pixels sent", pixels_sent);
            end
            
            // Receive outputs
            begin : recv_block
                integer local_timeout;
                local_timeout = 0;
                while (pixels_received < IMG_SIZE && local_timeout < MAX_TIMEOUT * 2) begin
                    @(posedge clk);
                    if (out_valid && out_ready) begin
                        // Check against expected from generate_golden.py
                        if (out_pixel !== expected_output[pixels_received]) begin
                            if (errors < 20) begin  // Limit error messages
                                $display("ERROR: Pixel %0d: Expected 0x%08X, got 0x%08X",
                                         pixels_received, expected_output[pixels_received], out_pixel);
                            end
                            errors = errors + 1;
                        end
                        
                        pixels_received = pixels_received + 1;
                        local_timeout = 0;
                    end else begin
                        local_timeout = local_timeout + 1;
                    end
                end
                
                if (pixels_received < IMG_SIZE) begin
                    $display("ERROR: Timeout waiting for outputs. Received %0d/%0d",
                             pixels_received, IMG_SIZE);
                    errors = errors + (IMG_SIZE - pixels_received);
                end
            end
        join
        
        // Wait for done signal
        timeout_count = 0;
        while (!done && timeout_count < 100) begin
            @(posedge clk);
            timeout_count = timeout_count + 1;
        end
        
        // Final Results
        $display("");
        $display("===========================================");
        $display("  Pixels Sent: %0d", pixels_sent);
        $display("  Pixels Received: %0d", pixels_received);
        $display("===========================================");
        
        if (errors == 0) begin
            $display("TEST_RESULT: PASS");
        end else begin
            $display("TEST_RESULT: FAIL (%0d errors)", errors);
        end
        
        $finish;
    end

endmodule
