WIP: heatmap chart

2026-06-11 21:17:31 +02:00
parent 47f3132859
commit 6f3d11128c
1 changed files with 301 additions and 0 deletions
--- a/scripts/charts/single_heatmap.r
+++ b/scripts/charts/single_heatmap.r
@ -0,0 +1,301 @@
+library(ggplot2)
+library(readr) # read_csv
+library(dplyr) # filter, mutate, ...
+library(tidyr) # complete
+library(scales)
+
+# Usage: Rscript single_heatmap.r exp_abspath marker benchmark
+
+# =============================================================================
+# CONFIGURATION
+# =============================================================================
+
+# Starting row width (automatically scaled up)
+row_width <- 16L
+
+# Maximum number of rows before row_width is doubled
+max_rows <- 64L
+
+# How many x-axis tick labels to show regardless of row_width
+n_x_ticks <- 16L
+
+# Target size in inches (without margins)
+target_w <- 10.0
+target_h <- 6.0
+
+# Limit tile size so small grids don't produce huge tiles
+max_tile <- 0.5
+
+# =============================================================================
+# COMMAND-LINE ARGUMENTS
+# =============================================================================
+
+args <- commandArgs(trailingOnly = TRUE)
+if (length(args) < 3) {
+  stop("Usage: Rscript single_heatmap.r <experiment_dir> <marker> <bench>")
+}
+
+# Which experiment to display
+experiment <- args[1]
+
+# Which marker to display
+target_resulttype <- args[2]
+
+# Which benchmark to display
+target_benchmark <- args[3]
+
+# =============================================================================
+# INPUT DATA
+# =============================================================================
+
+datafile <- file.path(experiment, "faults.csv")
+if (!file.exists(datafile)) {
+  stop(paste("Input file not found:", datafile))
+}
+
+raw <- read_csv(datafile, col_types = cols(
+  benchmark     = col_character(),
+  resulttype    = col_character(),
+  faults        = col_double(),
+  fault_address = col_character() # hex string "0x10001A"; converted below
+))
+
+# =============================================================================
+# FILTER
+# =============================================================================
+
+# Keep only rows matching the marker type and benchmark
+filtered <- raw |>
+  filter(
+    resulttype == target_resulttype,
+    benchmark == target_benchmark
+  )
+
+if (nrow(filtered) == 0) {
+  avail_rt <- paste(sort(unique(raw$resulttype)), collapse = ", ")
+  avail_bm <- paste(sort(unique(raw$benchmark)), collapse = ", ")
+  stop(paste0(
+    "No data for resulttype='", target_resulttype,
+    "' + benchmark='", target_benchmark, "'.\n",
+    "Available resulttypes: ", avail_rt, "\n",
+    "Available benchmarks:  ", avail_bm
+  ))
+}
+
+# We're only interested in addresses and count after filtering
+aggregated <- filtered |>
+  select(fault_address, faults)
+
+# =============================================================================
+# ADDRESS HEX -> INT
+# =============================================================================
+
+# "0x10001A" -> substr strips "0x" -> strtoi parses base-16 -> integer
+aggregated <- aggregated |>
+  mutate(addr_int = strtoi(
+    substr(fault_address, 3L, nchar(fault_address)),
+    16L
+  ))
+
+# =============================================================================
+# SCALE ROWS
+# =============================================================================
+
+# Count the number of rows/"bins" required:
+#  - (addr_ints %/% rw) is the bin
+#  - Multiply by rw to get the base address
+#  - Count the unique base addresses to get the number of occupied rows/bins
+n_occupied_rows <- function(addr_ints, rw) {
+  length(unique((addr_ints %/% rw) * rw))
+}
+
+# Double row_width until the number of occupied rows/bins is <= max_rows
+while (row_width < 65536L && n_occupied_rows(
+  aggregated$addr_int, row_width
+) > max_rows) {
+  row_width <- row_width * 2L
+}
+
+if (row_width > 16L) {
+  message(sprintf(
+    "Note: row_width auto-scaled to %d (%d occupied rows, max_rows=%d)",
+    row_width,
+    n_occupied_rows(aggregated$addr_int, row_width),
+    max_rows
+  ))
+}
+
+# =============================================================================
+# GRID COORDINATES
+# =============================================================================
+
+# col = addr %% row_width -> byte offset within the row (0 ... row_width-1)
+# row = (addr %/% row_width) * row_width -> base address of the row
+grid_data <- aggregated |>
+  mutate(
+    col = addr_int %% row_width,
+    row = (addr_int %/% row_width) * row_width
+  )
+
+# =============================================================================
+# GAPS
+# =============================================================================
+
+# Assign sequential indices to each row to mark gaps
+rows_sorted <- sort(unique(grid_data$row))
+n_data_rows <- length(rows_sorted)
+
+# - diff() returns the successive differences between consecutive elements
+# - has_gap_before[i] = TRUE when that distance > row_width
+# - First row never has a predecessor, so it's FALSE
+has_gap_before <- c(FALSE, diff(rows_sorted) > row_width)
+
+# - cumsum(has_gap_before) counts how many gaps are before each row
+# - Adding the offset to 1...n gives the row indices with gaps
+cumulative_gaps <- cumsum(has_gap_before)
+row_order <- tibble(
+  row            = rows_sorted,
+  row_idx        = seq_len(n_data_rows) + cumulative_gaps,
+  has_gap_before = has_gap_before
+)
+
+# Mark one slot before each row that has a gap preceding it
+gap_marker_indices <- row_order$row_idx[has_gap_before] - 1L
+
+# Total y-axis slots = data rows + gap markers
+total_slots <- n_data_rows + sum(has_gap_before)
+
+# =============================================================================
+# FILL EMPTY CELLS
+# =============================================================================
+
+# - complete() adds a row for every missing (row, col) tuple
+# - left_join adds row_idx and has_gap_before to every row
+grid_complete <- grid_data |>
+  complete(row, col = 0L:(row_width - 1L)) |>
+  left_join(row_order, by = "row")
+
+# =============================================================================
+# GAP TILES
+# =============================================================================
+
+# Create one rectangle per gap spanning the full width
+gap_markers <- data.frame(row_idx = gap_marker_indices)
+
+# =============================================================================
+# TILE SIZE  (computed here so x-tick density can use it)
+# =============================================================================
+
+# Largest tile size fitting within target sizes
+tile_size <- min(target_w / row_width, target_h / total_slots, max_tile)
+
+# =============================================================================
+# X-AXIS TICKS
+# =============================================================================
+
+# Make sure labels don't overlap
+min_tick_step <- as.integer(ceiling(0.25 / tile_size))
+
+# Snap to a power of 2 so labels stay round
+x_tick_step <- max(1L, row_width %/% n_x_ticks) # Desired
+x_tick_step <- 2L^as.integer(ceiling(log2(max(x_tick_step, min_tick_step, 1L))))
+col_tick_values <- seq(0L, row_width - 1L, by = x_tick_step)
+col_tick_labels <- sprintf("+0x%X", col_tick_values)
+
+# =============================================================================
+# Y-AXIS TICKS
+# =============================================================================
+
+# Show at most 15 labels (gap slots are ignored)
+label_step <- max(1L, ceiling(n_data_rows / 15L))
+label_at <- row_order[seq(1L, n_data_rows, by = label_step), ]
+
+# =============================================================================
+# PLOT
+# =============================================================================
+
+plot <- ggplot(grid_complete, aes(x = col, y = row_idx, fill = faults)) +
+
+  # One filled rectangle per (col, row_idx) tuple
+  geom_tile(width = 1, height = 1, colour = NA) +
+
+  # Separators at address gaps
+  geom_rect(
+    data = gap_markers,
+    aes(ymin = row_idx - 0.5, ymax = row_idx + 0.5),
+    xmin = -0.5,
+    xmax = row_width - 0.5,
+    fill = "grey40",
+    colour = NA,
+    inherit.aes = FALSE
+  ) +
+
+  # Heatmap color ramp (dark -> yellow)
+  scale_fill_viridis_c(
+    name     = "Faults",
+    trans    = "log1p",
+    na.value = "grey85",
+    option   = "viridis"
+  ) +
+
+  # X-axis hex labels
+  scale_x_continuous(
+    breaks = col_tick_values,
+    labels = col_tick_labels,
+    limits = c(-0.5, row_width - 0.5),
+    expand = c(0, 0)
+  ) +
+
+  # Y-axis hex labels. Lowest address at the top
+  scale_y_reverse(
+    breaks = label_at$row_idx,
+    labels = sprintf("0x%X", label_at$row),
+    limits = c(total_slots + 0.5, 0.5), # total_slots includes gap-marker slots
+    expand = c(0, 0)
+  ) +
+
+  # Title and axis labels
+  labs(
+    title = paste(target_resulttype, "/", target_benchmark),
+    subtitle = paste(
+      "Total:",
+      format(sum(aggregated$faults, na.rm = TRUE), big.mark = ",")
+    ),
+    x = "Byte Offset",
+    y = "Base Address"
+  ) +
+
+  # Theme
+  theme_minimal() +
+  theme(
+    axis.text.x = element_text(
+      family = "mono", angle = 45, hjust = 1, size = 9
+    ),
+    axis.text.y = element_text(family = "mono", size = 9),
+    panel.grid = element_blank(),
+    panel.border = element_rect(colour = "grey50", fill = NA, linewidth = 0.5)
+  ) +
+
+  # Force square tiles
+  coord_fixed(ratio = 1)
+
+# =============================================================================
+# SAVE
+# =============================================================================
+
+# Margins
+fig_w <- row_width * tile_size + 4.5
+fig_h <- total_slots * tile_size + 2.5
+
+# Write to file
+outfile <- file.path(experiment, paste0(
+  target_resulttype, "_", target_benchmark, "_heatmap.svg"
+))
+
+ggsave(
+  outfile,
+  plot   = plot,
+  width  = fig_w,
+  height = fig_h,
+  units  = "in"
+)