`timescale 1ns / 1ps

module tb_cache_direct_mapped;

    reg clk;
    reg rst;

    // CPU Interface
    reg         cpu_req_valid;
    reg         cpu_req_rw;
    reg  [31:0] cpu_req_addr;
    reg  [31:0] cpu_req_data;
    wire        cpu_req_ready;
    wire        cpu_resp_valid;
    wire [31:0] cpu_resp_data;

    // Memory Interface
    wire        mem_req_valid;
    wire        mem_req_rw;
    wire [31:0] mem_req_addr;
    wire [31:0] mem_req_data;
    reg         mem_req_ready;
    reg         mem_resp_valid;
    reg  [31:0] mem_resp_data;

    // Simulation variables
    integer errors = 0;
    integer test_step = 0;
    reg [31:0] golden_mem [0:4095]; // 16KB Golden Memory
    
    // Instantiate DUT
    cache_direct_mapped uut (
        .clk(clk),
        .rst(rst),
        .cpu_req_valid(cpu_req_valid),
        .cpu_req_rw(cpu_req_rw),
        .cpu_req_addr(cpu_req_addr),
        .cpu_req_data(cpu_req_data),
        .cpu_req_ready(cpu_req_ready),
        .cpu_resp_valid(cpu_resp_valid),
        .cpu_resp_data(cpu_resp_data),
        .mem_req_valid(mem_req_valid),
        .mem_req_rw(mem_req_rw),
        .mem_req_addr(mem_req_addr),
        .mem_req_data(mem_req_data),
        .mem_req_ready(mem_req_ready),
        .mem_resp_valid(mem_resp_valid),
        .mem_resp_data(mem_resp_data)
    );

    // Clock Generation
    initial begin
        clk = 0;
        forever #5 clk = ~clk;
    end

    // Memory Simulation
    // Simple memory model: Fixed latency
    parameter MEM_LATENCY = 4;
    reg [3:0] mem_delay_cnt;
    reg       mem_processing;
    reg       mem_is_write;
    reg [31:0] mem_addr_latched;

    initial begin
        mem_req_ready = 1; // Always ready
        mem_resp_valid = 0;
        mem_resp_data = 0;
        mem_processing = 0;
    end

    always @(posedge clk) begin
        if (rst) begin
            mem_processing <= 0;
            mem_resp_valid <= 0;
        end else begin
            mem_resp_valid <= 0; // Default

            if (mem_processing) begin
                if (mem_delay_cnt > 0) begin
                    mem_delay_cnt <= mem_delay_cnt - 1;
                end else begin
                    // Done
                    if (!mem_is_write) begin
                        mem_resp_valid <= 1;
                        mem_resp_data <= golden_mem[mem_addr_latched[13:2]];
                    end 
                    // Writes are acknowledged implicitly by lack of error, 
                    // or we could add a write response if the interface supported it.
                    // For this simple interface, memory writes just "finish".
                    mem_processing <= 0;
                end
            end else if (mem_req_valid && mem_req_ready) begin
                mem_processing <= 1;
                mem_delay_cnt <= MEM_LATENCY - 1;
                mem_is_write <= mem_req_rw;
                mem_addr_latched <= mem_req_addr;
                
                if (mem_req_rw) begin
                     golden_mem[mem_req_addr[13:2]] <= mem_req_data;
                     // $display("TIME=%0t MEM_WRITE addr=%x data=%x", $time, mem_req_addr, mem_req_data);
                end else begin
                     // $display("TIME=%0t MEM_READ_REQ addr=%x", $time, mem_req_addr);
                end
            end
        end
    end

    // Timeout mechanism
    initial begin
        #100000;
        $display("ERROR: Simulation Timed Out");
        $finish;
    end

    // Tasks
    task cpu_read;
        input [31:0] addr;
        input [31:0] exp_data;
        begin
            // Wait for Ready
            while (cpu_req_ready == 0) #10;
            
            @(posedge clk);
            cpu_req_valid <= 1;
            cpu_req_rw <= 0;
            cpu_req_addr <= addr;
            
            // Wait for handshake
            do begin
                @(posedge clk);
            end while (cpu_req_ready == 0);
            
            cpu_req_valid <= 0;
            
            // Wait for response
            while (cpu_resp_valid == 0) @(posedge clk);
            
            if (cpu_resp_data !== exp_data) begin
                $display("ERROR: Read Mismatch at %x. Exp: %x, Got: %x", addr, exp_data, cpu_resp_data);
                errors = errors + 1;
            end 
            // else begin
            //    $display("PASS: Read %x = %x", addr, cpu_resp_data);
            // end
        end
    endtask

    task cpu_write;
        input [31:0] addr;
        input [31:0] data;
        begin
             // Wait for Ready
            while (cpu_req_ready == 0) #10;

            @(posedge clk);
            cpu_req_valid <= 1;
            cpu_req_rw <= 1;
            cpu_req_addr <= addr;
            cpu_req_data <= data;

            // Wait for handshake
            do begin
                @(posedge clk);
            end while (cpu_req_ready == 0);
            
            cpu_req_valid <= 0;
            
            // Wait for ready to return (write complete)
            while (cpu_req_ready == 0) @(posedge clk);
            
            // Update golden model
            golden_mem[addr[13:2]] = data;
        end
    endtask

    integer i;

    initial begin
        // Init
        for (i=0; i<4096; i=i+1) golden_mem[i] = i; // simple pattern
        
        cpu_req_valid = 0;
        cpu_req_rw = 0;
        cpu_req_addr = 0;
        cpu_req_data = 0;
        rst = 1;
        
        #100;
        rst = 0;
        #20;
        
        $display("--- Starting Tests ---");

        // Test 1: Basic Read Miss (Cold)
        cpu_read(32'h0000_1000, 32'h0000_0400); // addr 1000 -> word idx 400 -> data 400

        // Test 2: Basic Read Hit
        cpu_read(32'h0000_1000, 32'h0000_0400);

        // Test 3: Write Through
        cpu_write(32'h0000_1000, 32'hDEAD_BEEF);
        
        // Test 4: Read Back (Hit)
        cpu_read(32'h0000_1000, 32'hDEAD_BEEF);

        // Test 5: Conflict Miss (Same Index, Diff Tag)
        // Index bits [9:2]. 0x1000 = ...0001000000000000 = Index 0
        // Need another address with Index 0 but different Tag.
        // 0x1000 + 0x400 (stride 1024) -> Same index?
        // Index is 8 bits (256 entries). 256 * 4 = 1024 bytes.
        // So address + 1024 maps to same index.
        cpu_read(32'h0000_1400, 32'h0000_0500); // 0x500 is 1280/4? No 0x1400=5120. 5120/4=1280.
        // golden_mem[1280] shoudl be 1280. 0x500
        
        // Test 6: Old address should miss now
        cpu_read(32'h0000_1000, 32'hDEAD_BEEF);

        if (errors == 0) begin
            $display("TEST_RESULT: PASS");
        end else begin
            $display("TEST_RESULT: FAIL");
        end
        $finish;
    end

endmodule
