diff --git a/scripts/charts/single_heatmap.r b/scripts/charts/single_heatmap.r new file mode 100644 index 0000000..d94be96 --- /dev/null +++ b/scripts/charts/single_heatmap.r @@ -0,0 +1,301 @@ +library(ggplot2) +library(readr) # read_csv +library(dplyr) # filter, mutate, ... +library(tidyr) # complete +library(scales) + +# Usage: Rscript single_heatmap.r exp_abspath marker benchmark + +# ============================================================================= +# CONFIGURATION +# ============================================================================= + +# Starting row width (automatically scaled up) +row_width <- 16L + +# Maximum number of rows before row_width is doubled +max_rows <- 64L + +# How many x-axis tick labels to show regardless of row_width +n_x_ticks <- 16L + +# Target size in inches (without margins) +target_w <- 10.0 +target_h <- 6.0 + +# Limit tile size so small grids don't produce huge tiles +max_tile <- 0.5 + +# ============================================================================= +# COMMAND-LINE ARGUMENTS +# ============================================================================= + +args <- commandArgs(trailingOnly = TRUE) +if (length(args) < 3) { + stop("Usage: Rscript single_heatmap.r ") +} + +# Which experiment to display +experiment <- args[1] + +# Which marker to display +target_resulttype <- args[2] + +# Which benchmark to display +target_benchmark <- args[3] + +# ============================================================================= +# INPUT DATA +# ============================================================================= + +datafile <- file.path(experiment, "faults.csv") +if (!file.exists(datafile)) { + stop(paste("Input file not found:", datafile)) +} + +raw <- read_csv(datafile, col_types = cols( + benchmark = col_character(), + resulttype = col_character(), + faults = col_double(), + fault_address = col_character() # hex string "0x10001A"; converted below +)) + +# ============================================================================= +# FILTER +# ============================================================================= + +# Keep only rows matching the marker type and benchmark +filtered <- raw |> + filter( + resulttype == target_resulttype, + benchmark == target_benchmark + ) + +if (nrow(filtered) == 0) { + avail_rt <- paste(sort(unique(raw$resulttype)), collapse = ", ") + avail_bm <- paste(sort(unique(raw$benchmark)), collapse = ", ") + stop(paste0( + "No data for resulttype='", target_resulttype, + "' + benchmark='", target_benchmark, "'.\n", + "Available resulttypes: ", avail_rt, "\n", + "Available benchmarks: ", avail_bm + )) +} + +# We're only interested in addresses and count after filtering +aggregated <- filtered |> + select(fault_address, faults) + +# ============================================================================= +# ADDRESS HEX -> INT +# ============================================================================= + +# "0x10001A" -> substr strips "0x" -> strtoi parses base-16 -> integer +aggregated <- aggregated |> + mutate(addr_int = strtoi( + substr(fault_address, 3L, nchar(fault_address)), + 16L + )) + +# ============================================================================= +# SCALE ROWS +# ============================================================================= + +# Count the number of rows/"bins" required: +# - (addr_ints %/% rw) is the bin +# - Multiply by rw to get the base address +# - Count the unique base addresses to get the number of occupied rows/bins +n_occupied_rows <- function(addr_ints, rw) { + length(unique((addr_ints %/% rw) * rw)) +} + +# Double row_width until the number of occupied rows/bins is <= max_rows +while (row_width < 65536L && n_occupied_rows( + aggregated$addr_int, row_width +) > max_rows) { + row_width <- row_width * 2L +} + +if (row_width > 16L) { + message(sprintf( + "Note: row_width auto-scaled to %d (%d occupied rows, max_rows=%d)", + row_width, + n_occupied_rows(aggregated$addr_int, row_width), + max_rows + )) +} + +# ============================================================================= +# GRID COORDINATES +# ============================================================================= + +# col = addr %% row_width -> byte offset within the row (0 ... row_width-1) +# row = (addr %/% row_width) * row_width -> base address of the row +grid_data <- aggregated |> + mutate( + col = addr_int %% row_width, + row = (addr_int %/% row_width) * row_width + ) + +# ============================================================================= +# GAPS +# ============================================================================= + +# Assign sequential indices to each row to mark gaps +rows_sorted <- sort(unique(grid_data$row)) +n_data_rows <- length(rows_sorted) + +# - diff() returns the successive differences between consecutive elements +# - has_gap_before[i] = TRUE when that distance > row_width +# - First row never has a predecessor, so it's FALSE +has_gap_before <- c(FALSE, diff(rows_sorted) > row_width) + +# - cumsum(has_gap_before) counts how many gaps are before each row +# - Adding the offset to 1...n gives the row indices with gaps +cumulative_gaps <- cumsum(has_gap_before) +row_order <- tibble( + row = rows_sorted, + row_idx = seq_len(n_data_rows) + cumulative_gaps, + has_gap_before = has_gap_before +) + +# Mark one slot before each row that has a gap preceding it +gap_marker_indices <- row_order$row_idx[has_gap_before] - 1L + +# Total y-axis slots = data rows + gap markers +total_slots <- n_data_rows + sum(has_gap_before) + +# ============================================================================= +# FILL EMPTY CELLS +# ============================================================================= + +# - complete() adds a row for every missing (row, col) tuple +# - left_join adds row_idx and has_gap_before to every row +grid_complete <- grid_data |> + complete(row, col = 0L:(row_width - 1L)) |> + left_join(row_order, by = "row") + +# ============================================================================= +# GAP TILES +# ============================================================================= + +# Create one rectangle per gap spanning the full width +gap_markers <- data.frame(row_idx = gap_marker_indices) + +# ============================================================================= +# TILE SIZE (computed here so x-tick density can use it) +# ============================================================================= + +# Largest tile size fitting within target sizes +tile_size <- min(target_w / row_width, target_h / total_slots, max_tile) + +# ============================================================================= +# X-AXIS TICKS +# ============================================================================= + +# Make sure labels don't overlap +min_tick_step <- as.integer(ceiling(0.25 / tile_size)) + +# Snap to a power of 2 so labels stay round +x_tick_step <- max(1L, row_width %/% n_x_ticks) # Desired +x_tick_step <- 2L^as.integer(ceiling(log2(max(x_tick_step, min_tick_step, 1L)))) +col_tick_values <- seq(0L, row_width - 1L, by = x_tick_step) +col_tick_labels <- sprintf("+0x%X", col_tick_values) + +# ============================================================================= +# Y-AXIS TICKS +# ============================================================================= + +# Show at most 15 labels (gap slots are ignored) +label_step <- max(1L, ceiling(n_data_rows / 15L)) +label_at <- row_order[seq(1L, n_data_rows, by = label_step), ] + +# ============================================================================= +# PLOT +# ============================================================================= + +plot <- ggplot(grid_complete, aes(x = col, y = row_idx, fill = faults)) + + + # One filled rectangle per (col, row_idx) tuple + geom_tile(width = 1, height = 1, colour = NA) + + + # Separators at address gaps + geom_rect( + data = gap_markers, + aes(ymin = row_idx - 0.5, ymax = row_idx + 0.5), + xmin = -0.5, + xmax = row_width - 0.5, + fill = "grey40", + colour = NA, + inherit.aes = FALSE + ) + + + # Heatmap color ramp (dark -> yellow) + scale_fill_viridis_c( + name = "Faults", + trans = "log1p", + na.value = "grey85", + option = "viridis" + ) + + + # X-axis hex labels + scale_x_continuous( + breaks = col_tick_values, + labels = col_tick_labels, + limits = c(-0.5, row_width - 0.5), + expand = c(0, 0) + ) + + + # Y-axis hex labels. Lowest address at the top + scale_y_reverse( + breaks = label_at$row_idx, + labels = sprintf("0x%X", label_at$row), + limits = c(total_slots + 0.5, 0.5), # total_slots includes gap-marker slots + expand = c(0, 0) + ) + + + # Title and axis labels + labs( + title = paste(target_resulttype, "/", target_benchmark), + subtitle = paste( + "Total:", + format(sum(aggregated$faults, na.rm = TRUE), big.mark = ",") + ), + x = "Byte Offset", + y = "Base Address" + ) + + + # Theme + theme_minimal() + + theme( + axis.text.x = element_text( + family = "mono", angle = 45, hjust = 1, size = 9 + ), + axis.text.y = element_text(family = "mono", size = 9), + panel.grid = element_blank(), + panel.border = element_rect(colour = "grey50", fill = NA, linewidth = 0.5) + ) + + + # Force square tiles + coord_fixed(ratio = 1) + +# ============================================================================= +# SAVE +# ============================================================================= + +# Margins +fig_w <- row_width * tile_size + 4.5 +fig_h <- total_slots * tile_size + 2.5 + +# Write to file +outfile <- file.path(experiment, paste0( + target_resulttype, "_", target_benchmark, "_heatmap.svg" +)) + +ggsave( + outfile, + plot = plot, + width = fig_w, + height = fig_h, + units = "in" +)