module cache_direct_mapped (
    input  wire        clk,
    input  wire        rst,
    
    // CPU Interface
    input  wire        cpu_req_valid,
    input  wire        cpu_req_rw,      // 0=Read, 1=Write
    input  wire [31:0] cpu_req_addr,
    input  wire [31:0] cpu_req_data,
    output reg         cpu_req_ready,
    output reg         cpu_resp_valid,
    output reg  [31:0] cpu_resp_data,

    // Memory Interface
    output reg         mem_req_valid,
    output reg         mem_req_rw,
    output reg  [31:0] mem_req_addr,
    output reg  [31:0] mem_req_data,
    input  wire        mem_req_ready,
    input  wire        mem_resp_valid,
    input  wire [31:0] mem_resp_data
);

    // Cache Lines
    // 256 entries
    reg [31:0] data_mem [0:255];
    reg [21:0] tag_mem  [0:255];
    reg        valid_mem [0:255];

    // Decode Address
    // [31:10] Tag (22 bits)
    // [9:2]   Index (8 bits)
    // [1:0]   Offset (2 bits)
    wire [7:0] index = cpu_req_addr[9:2];
    wire [21:0] tag = cpu_req_addr[31:10];

    // State Machine
    localparam STATE_IDLE = 2'd0;
    localparam STATE_WAIT_MEM_READ = 2'd1;
    localparam STATE_WAIT_MEM_WRITE = 2'd2;
    
    reg [1:0] state;

    // Registers to latch request
    reg [31:0] r_cpu_addr;
    reg [31:0] r_cpu_data;
    reg        r_cpu_rw;
    wire [7:0] r_index = r_cpu_addr[9:2];
    wire [21:0] r_tag = r_cpu_addr[31:10];

    always @(posedge clk) begin
        if (rst) begin
            state <= STATE_IDLE;
            cpu_req_ready <= 1;
            cpu_resp_valid <= 0;
            mem_req_valid <= 0;
            // Clear valid bits
            for (integer i=0; i<256; i=i+1) valid_mem[i] <= 0;
        end else begin
            // Default pulse signals
            cpu_resp_valid <= 0;
            mem_req_valid <= 0;

            case (state)
                STATE_IDLE: begin
                    if (cpu_req_valid) begin
                        // Latch request
                        r_cpu_addr <= cpu_req_addr;
                        r_cpu_data <= cpu_req_data;
                        r_cpu_rw <= cpu_req_rw;
                        
                        // Check for Read Hit
                        if (!cpu_req_rw) begin
                            if (valid_mem[index] && (tag_mem[index] == tag)) begin
                                // HIT
                                cpu_resp_valid <= 1;
                                cpu_resp_data <= data_mem[index];
                                cpu_req_ready <= 1; // Stay ready
                            end else begin
                                // MISS
                                cpu_req_ready <= 0;
                                mem_req_valid <= 1;
                                mem_req_rw <= 0;
                                mem_req_addr <= cpu_req_addr;
                                state <= STATE_WAIT_MEM_READ;
                            end
                        end else begin
                            // WRITE (Write-Through)
                            // Always go to memory
                            cpu_req_ready <= 0;
                            mem_req_valid <= 1;
                            mem_req_rw <= 1;
                            mem_req_addr <= cpu_req_addr;
                            mem_req_data <= cpu_req_data;
                            state <= STATE_WAIT_MEM_WRITE;
                            
                            // Valid Hit? Update Cache
                            if (valid_mem[index] && (tag_mem[index] == tag)) begin
                                data_mem[index] <= cpu_req_data;
                            end
                        end
                    end
                end

                STATE_WAIT_MEM_READ: begin
                    // Wait for memory data
                    if (mem_resp_valid) begin
                        // Update cache
                        valid_mem[r_index] <= 1;
                        tag_mem[r_index] <= r_tag;
                        data_mem[r_index] <= mem_resp_data;
                        
                        // Return to CPU
                        cpu_resp_valid <= 1;
                        cpu_resp_data <= mem_resp_data;
                        
                        // Done
                        cpu_req_ready <= 1;
                        state <= STATE_IDLE;
                    end
                end

                STATE_WAIT_MEM_WRITE: begin
                    // For this simple model, we assume write completes immediately if accepted
                    // Or if we wait for some response. Protocol says "mem_req_ready" used in request.
                    // If we are here, request was sent.
                    // We just assume it's done for simplicity, or we could wait a cycle.
                    cpu_req_ready <= 1;
                    state <= STATE_IDLE;
                end
            endcase
        end
    end

endmodule
