library(ggplot2)
library(readr) # read_csv
library(dplyr) # filter, mutate
library(tidyr) # complete
library(scales)

# Usage: Rscript single_heatmap.r exp_abspath

# =============================================================================
# CONFIG
# =============================================================================

# Starting row width (might be scaled up)
row_width_init <- 16L

# Max number of occupied rows before row_width is doubled
max_rows <- 64L

# How many x-axis tick labels to show
n_x_ticks <- 16L

# Target size (inches, no margins)
target_w <- 10.0
target_h <- 6.0

# Max size so small grids don't produce huge tiles
max_tile <- 0.5

# Generate all heatmaps with crossproduct of this
benchmarks <- c("ip", "mem", "regs")
markers <- c(
  "OK_MARKER",
  "FAIL_MARKER",
  "DETECTED_MARKER",
  "ACCESS_OUTERSPACE",
  "WRITE_TEXTSEGMENT",
  "GROUP1_MARKER",
  "TRAP",
  "TIMEOUT"
)

# Labels for _start/_end symbols from linker.ld
regions <- list(
  list(label = "WAMR AOT", start = "_wamr_aot_start", end = "_wamr_aot_end"),
  list(
    label = "WAMR os_mmap",
    start = "_wamr_mmap_start",
    end = "_wamr_mmap_end"
  ),
  list(
    label = "WAMR runtime mem",
    start = "_wamr_runtime_pool_start",
    end = "_wamr_runtime_pool_end"
  ),
  list(
    label = "WAMR linear mem",
    start = "_wamr_linear_pool_start",
    end = "_wamr_linear_pool_end"
  ),
  list(
    label = "WAMR global heap",
    start = "_wamr_global_heap_start",
    end = "_wamr_global_heap_end"
  ),
  list(
    label = "IWASM AOT runtime",
    start = "_iwasm_aot_runtime_start",
    end = "_iwasm_aot_runtime_end"
  ),
  list(
    label = "IWASM bh/util",
    start = "_iwasm_bh_start",
    end = "_iwasm_bh_end"
  ),
  list(
    label = "IWASM mem_alloc",
    start = "_iwasm_mem_alloc_start",
    end = "_iwasm_mem_alloc_end"
  ),
  list(
    label = "IWASM platform",
    start = "_iwasm_platform_init_start",
    end = "_iwasm_platform_init_end"
  ),
  list(
    label = "IWASM exec_env",
    start = "_iwasm_exec_env_start",
    end = "_iwasm_exec_env_end"
  ),
  list(
    label = "IWASM interp",
    start = "_iwasm_interp_classic_start",
    end = "_iwasm_interp_classic_end"
  ),
  list(
    label = "IWASM memory",
    start = "_iwasm_memory_start",
    end = "_iwasm_memory_end"
  ),
  list(
    label = "IWASM native",
    start = "_iwasm_native_start",
    end = "_iwasm_native_end"
  ),
  list(
    label = "IWASM runtime",
    start = "_iwasm_runtime_start",
    end = "_iwasm_runtime_end"
  ),
  list(label = "TEXT", start = "_text_start", end = "_text_end"),
  list(label = "BSS", start = "_sbss", end = "_ebss")
)

# =============================================================================
# HELPER
# =============================================================================

# Count the number of unique row base-addresses
# - %/% to sort to bins
# - * to map to base address
n_occupied_rows <- function(addr_ints, rw) {
  length(unique((addr_ints %/% rw) * rw))
}

# =============================================================================
# CLI
# =============================================================================

args <- commandArgs(trailingOnly = TRUE)
if (length(args) < 1) {
  stop("Usage: Rscript single_heatmap.r <experiment_dir>")
}

experiment <- args[1]
faults_file <- if (length(args) >= 2) args[2] else "faults.csv"
suffix <- gsub("^faults|\\.csv$", "", faults_file)

# =============================================================================
# INPUT DATA  (read once)
# =============================================================================

datafile <- file.path(experiment, faults_file)
if (!file.exists(datafile)) {
  stop(paste("Input file not found:", datafile))
}

raw <- read_csv(
  datafile,
  col_types = cols(
    benchmark = col_character(),
    resulttype = col_character(),
    faults = col_double(),
    fault_address = col_character() # hex string "0x10001A"; converted below
  )
)

# =============================================================================
# ELF SYMBOLS  (parsed once)
# =============================================================================

# Names vector: sym_addr["_text_start"] = 0x10001a
sym_addr <- setNames(integer(0), character(0))

elf_file <- file.path(experiment, "system.elf")

if (!file.exists(elf_file)) {
  message("system.elf not found")
} else {
  # Parse elf regions
  nm_lines <- tryCatch(
    system2("nm", args = elf_file, stdout = TRUE, stderr = FALSE),
    error = function(e) {
      message("nm failed (", conditionMessage(e), ")")
      NULL
    }
  )

  if (!is.null(nm_lines) && length(nm_lines) > 0) {
    # Each nm line: "0010001a T _text_start"
    # Groups: 1 = hex address, 2 = symbol name
    pattern <- "^([0-9a-fA-F]+)\\s+\\S+\\s+(\\S+)"
    matches <- regmatches(nm_lines, regexec(pattern, nm_lines))
    valid <- Filter(function(m) length(m) == 3, matches)
    sym_addr <- setNames(
      strtoi(sapply(valid, `[[`, 2), 16L),
      sapply(valid, `[[`, 3)
    )
  }
}

# =============================================================================
# HEATMAP
# =============================================================================

make_heatmap <- function(target_resulttype, target_benchmark) {
  # ===========================================================================
  # FILTER
  # ===========================================================================

  filtered <- raw |>
    filter(
      .data$resulttype == target_resulttype,
      .data$benchmark == target_benchmark
    )

  # Nothing to draw — skip without error
  if (nrow(filtered) == 0) {
    return(invisible(NULL))
  }

  aggregated <- filtered |>
    select(fault_address, faults)

  # ===========================================================================
  # ADDRESS HEX -> INT
  # ===========================================================================

  # "0x10001A" -> substr strips "0x" -> strtoi parses base-16 -> integer
  aggregated <- aggregated |>
    mutate(
      addr_int = strtoi(
        substr(.data$fault_address, 3L, nchar(.data$fault_address)),
        16L
      )
    )

  # ===========================================================================
  # SCALE ROWS
  # ===========================================================================

  # Local copy so different combinations can scale independently.
  row_width <- row_width_init

  # Double row_width until occupied rows <= max_rows
  while (
    row_width < 65536L &&
      n_occupied_rows(
        aggregated$addr_int,
        row_width
      ) >
        max_rows
  ) {
    row_width <- row_width * 2L
  }

  if (row_width > row_width_init) {
    message(sprintf(
      "Note: [%s/%s] row_width auto-scaled to %d (%d occupied rows)",
      target_resulttype,
      target_benchmark,
      row_width,
      n_occupied_rows(aggregated$addr_int, row_width)
    ))
  }

  # ===========================================================================
  # GRID COORDINATES
  # ===========================================================================

  # col = addr %% row_width -> byte offset within the row (0 ... row_width-1)
  # row = (addr %/% row_width) * row_width -> base address of the row
  grid_data <- aggregated |>
    mutate(
      col = .data$addr_int %% row_width,
      row = (.data$addr_int %/% row_width) * row_width
    )

  # ===========================================================================
  # GAPS
  # ===========================================================================

  rows_sorted <- sort(unique(grid_data$row))
  n_data_rows <- length(rows_sorted)

  # - diff() returns the successive differences between consecutive elements
  # - has_gap_before[i] = TRUE when that distance > row_width
  # - First row never has a predecessor, so it's FALSE
  has_gap_before <- c(FALSE, diff(rows_sorted) > row_width)

  # - cumsum(has_gap_before) counts how many gaps precede each row
  # - Adding that offset to 1...n gives the row_idx values with gap slots
  cumulative_gaps <- cumsum(has_gap_before)
  row_order <- tibble(
    row = rows_sorted,
    row_idx = seq_len(n_data_rows) + cumulative_gaps,
    has_gap_before = has_gap_before
  )

  gap_marker_indices <- row_order$row_idx[has_gap_before] - 1L
  total_slots <- n_data_rows + sum(has_gap_before)

  # ===========================================================================
  # FILL EMPTY CELLS
  # ===========================================================================

  # - complete() adds a row for every missing (row, col) tuple
  # - left_join attaches row_idx to every cell
  grid_complete <- grid_data |>
    complete(row, col = 0L:(row_width - 1L)) |>
    left_join(row_order, by = "row")

  # ===========================================================================
  # GAP TILES
  # ===========================================================================

  gap_markers <- data.frame(row_idx = gap_marker_indices)

  # ===========================================================================
  # ELF REGION RECTANGLES
  # ===========================================================================

  region_rects <- data.frame(
    label = character(0),
    ymin = numeric(0),
    ymax = numeric(0)
  )

  if (length(sym_addr) > 0) {
    rects_list <- lapply(regions, function(reg) {
      s <- sym_addr[reg$start]
      e <- sym_addr[reg$end]

      if (is.na(s) || is.na(e) || s >= e) {
        return(NULL)
      }

      # Row with base address r covers bytes r ... r + row_width - 1.
      # Overlap if r < e && r + row_width > s
      overlapping <- row_order[
        row_order$row < e & (row_order$row + row_width) > s,
        ,
        drop = FALSE
      ]

      if (nrow(overlapping) == 0) {
        return(NULL)
      }

      data.frame(
        label = reg$label,
        ymin = min(overlapping$row_idx) - 0.5,
        ymax = max(overlapping$row_idx) + 0.5
      )
    })

    rects_list <- Filter(Negate(is.null), rects_list)
    if (length(rects_list) > 0) {
      region_rects <- do.call(rbind, rects_list)
    }
  }

  # ===========================================================================
  # TILE SIZE
  # ===========================================================================

  tile_size <- min(target_w / row_width, target_h / total_slots, max_tile)

  # ===========================================================================
  # X-AXIS TICKS
  # ===========================================================================

  # Minimum step to keep labels from overlapping at this tile size
  min_tick_step <- as.integer(ceiling(0.25 / tile_size))

  # Snap to a power of 2 so labels stay round
  x_tick_step <- max(1L, row_width %/% n_x_ticks)
  x_tick_step <- 2L^as.integer(
    ceiling(log2(max(x_tick_step, min_tick_step, 1L)))
  )
  col_tick_values <- seq(0L, row_width - 1L, by = x_tick_step)
  col_tick_labels <- sprintf("+0x%X", col_tick_values)

  # ===========================================================================
  # Y-AXIS TICKS
  # ===========================================================================

  # Show 15 labels max (gaps are ignored)
  label_step <- max(1L, ceiling(n_data_rows / 15L))
  label_at <- row_order[seq(1L, n_data_rows, by = label_step), ]

  # ===========================================================================
  # PLOT
  # ===========================================================================

  plot <- ggplot(
    grid_complete,
    aes(
      x = col,
      y = .data$row_idx,
      fill = .data$faults
    )
  ) +

    # One rectangle per (col, row_idx) tuple
    geom_tile(width = 1, height = 1, colour = NA) +

    # Separators at address gaps
    geom_rect(
      data = gap_markers,
      aes(ymin = .data$row_idx - 0.5, ymax = .data$row_idx + 0.5),
      xmin = -0.5,
      xmax = row_width - 0.5,
      fill = "grey40",
      colour = NA,
      inherit.aes = FALSE
    ) +

    # Heatmap color ramp
    scale_fill_viridis_c(
      name = "Faults",
      trans = "log1p",
      na.value = "grey85",
      option = "viridis"
    ) +

    # X-axis hex labels
    scale_x_continuous(
      breaks = col_tick_values,
      labels = col_tick_labels,
      limits = c(-0.5, row_width - 0.5),
      expand = c(0, 0)
    ) +

    # Y-axis hex labels, lowest address at the top
    scale_y_reverse(
      breaks = label_at$row_idx,
      labels = sprintf("0x%X", label_at$row),
      limits = c(total_slots + 0.5, 0.5), # includes gaps
      expand = c(0, 0)
    ) +

    # Title + axis labels
    labs(
      title = paste(target_resulttype, "/", target_benchmark),
      subtitle = paste(
        "Total:",
        format(
          sum(aggregated$faults, na.rm = TRUE),
          big.mark = ","
        )
      ),
      x = "Byte Offset",
      y = "Base Address"
    ) +

    # Theme
    theme_minimal() +
    theme(
      axis.text.x = element_text(
        family = "mono",
        angle = 45,
        hjust = 1,
        size = 9
      ),
      axis.text.y = element_text(family = "mono", size = 9),
      panel.grid = element_blank(),
      panel.border = element_rect(colour = "grey50", fill = NA, linewidth = 0.5)
    ) +

    # Force square tiles
    coord_fixed(ratio = 1)

  # ELF region borders
  if (nrow(region_rects) > 0) {
    plot <- plot +
      geom_rect(
        data = region_rects,
        aes(ymin = .data$ymin, ymax = .data$ymax, colour = .data$label),
        xmin = -0.5,
        xmax = row_width - 0.5,
        fill = NA,
        linewidth = 1.5,
        inherit.aes = FALSE
      ) +
      scale_colour_brewer(name = "Region", palette = "Dark2")
  }

  # ===========================================================================
  # SAVE
  # ===========================================================================

  fig_w <- row_width * tile_size + 4.5
  fig_h <- total_slots * tile_size + 2.5

  outfile <- file.path(
    experiment,
    paste0(
      "heatmap_",
      target_resulttype,
      "_",
      target_benchmark,
      suffix,
      ".svg"
    )
  )

  ggsave(outfile, plot = plot, width = fig_w, height = fig_h, units = "in")
  message(sprintf("Saved: %s", basename(outfile)))
  invisible(NULL)
}

# =============================================================================
# GENERATE HEATMAPS
# =============================================================================

# Combinations with missing data are skipped
for (bm in benchmarks) {
  for (marker in markers) {
    make_heatmap(marker, bm)
  }
}