WIP: heatmap chart
This commit is contained in:
301
scripts/charts/single_heatmap.r
Normal file
301
scripts/charts/single_heatmap.r
Normal file
@ -0,0 +1,301 @@
|
||||
library(ggplot2)
|
||||
library(readr) # read_csv
|
||||
library(dplyr) # filter, mutate, ...
|
||||
library(tidyr) # complete
|
||||
library(scales)
|
||||
|
||||
# Usage: Rscript single_heatmap.r exp_abspath marker benchmark
|
||||
|
||||
# =============================================================================
|
||||
# CONFIGURATION
|
||||
# =============================================================================
|
||||
|
||||
# Starting row width (automatically scaled up)
|
||||
row_width <- 16L
|
||||
|
||||
# Maximum number of rows before row_width is doubled
|
||||
max_rows <- 64L
|
||||
|
||||
# How many x-axis tick labels to show regardless of row_width
|
||||
n_x_ticks <- 16L
|
||||
|
||||
# Target size in inches (without margins)
|
||||
target_w <- 10.0
|
||||
target_h <- 6.0
|
||||
|
||||
# Limit tile size so small grids don't produce huge tiles
|
||||
max_tile <- 0.5
|
||||
|
||||
# =============================================================================
|
||||
# COMMAND-LINE ARGUMENTS
|
||||
# =============================================================================
|
||||
|
||||
args <- commandArgs(trailingOnly = TRUE)
|
||||
if (length(args) < 3) {
|
||||
stop("Usage: Rscript single_heatmap.r <experiment_dir> <marker> <bench>")
|
||||
}
|
||||
|
||||
# Which experiment to display
|
||||
experiment <- args[1]
|
||||
|
||||
# Which marker to display
|
||||
target_resulttype <- args[2]
|
||||
|
||||
# Which benchmark to display
|
||||
target_benchmark <- args[3]
|
||||
|
||||
# =============================================================================
|
||||
# INPUT DATA
|
||||
# =============================================================================
|
||||
|
||||
datafile <- file.path(experiment, "faults.csv")
|
||||
if (!file.exists(datafile)) {
|
||||
stop(paste("Input file not found:", datafile))
|
||||
}
|
||||
|
||||
raw <- read_csv(datafile, col_types = cols(
|
||||
benchmark = col_character(),
|
||||
resulttype = col_character(),
|
||||
faults = col_double(),
|
||||
fault_address = col_character() # hex string "0x10001A"; converted below
|
||||
))
|
||||
|
||||
# =============================================================================
|
||||
# FILTER
|
||||
# =============================================================================
|
||||
|
||||
# Keep only rows matching the marker type and benchmark
|
||||
filtered <- raw |>
|
||||
filter(
|
||||
resulttype == target_resulttype,
|
||||
benchmark == target_benchmark
|
||||
)
|
||||
|
||||
if (nrow(filtered) == 0) {
|
||||
avail_rt <- paste(sort(unique(raw$resulttype)), collapse = ", ")
|
||||
avail_bm <- paste(sort(unique(raw$benchmark)), collapse = ", ")
|
||||
stop(paste0(
|
||||
"No data for resulttype='", target_resulttype,
|
||||
"' + benchmark='", target_benchmark, "'.\n",
|
||||
"Available resulttypes: ", avail_rt, "\n",
|
||||
"Available benchmarks: ", avail_bm
|
||||
))
|
||||
}
|
||||
|
||||
# We're only interested in addresses and count after filtering
|
||||
aggregated <- filtered |>
|
||||
select(fault_address, faults)
|
||||
|
||||
# =============================================================================
|
||||
# ADDRESS HEX -> INT
|
||||
# =============================================================================
|
||||
|
||||
# "0x10001A" -> substr strips "0x" -> strtoi parses base-16 -> integer
|
||||
aggregated <- aggregated |>
|
||||
mutate(addr_int = strtoi(
|
||||
substr(fault_address, 3L, nchar(fault_address)),
|
||||
16L
|
||||
))
|
||||
|
||||
# =============================================================================
|
||||
# SCALE ROWS
|
||||
# =============================================================================
|
||||
|
||||
# Count the number of rows/"bins" required:
|
||||
# - (addr_ints %/% rw) is the bin
|
||||
# - Multiply by rw to get the base address
|
||||
# - Count the unique base addresses to get the number of occupied rows/bins
|
||||
n_occupied_rows <- function(addr_ints, rw) {
|
||||
length(unique((addr_ints %/% rw) * rw))
|
||||
}
|
||||
|
||||
# Double row_width until the number of occupied rows/bins is <= max_rows
|
||||
while (row_width < 65536L && n_occupied_rows(
|
||||
aggregated$addr_int, row_width
|
||||
) > max_rows) {
|
||||
row_width <- row_width * 2L
|
||||
}
|
||||
|
||||
if (row_width > 16L) {
|
||||
message(sprintf(
|
||||
"Note: row_width auto-scaled to %d (%d occupied rows, max_rows=%d)",
|
||||
row_width,
|
||||
n_occupied_rows(aggregated$addr_int, row_width),
|
||||
max_rows
|
||||
))
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# GRID COORDINATES
|
||||
# =============================================================================
|
||||
|
||||
# col = addr %% row_width -> byte offset within the row (0 ... row_width-1)
|
||||
# row = (addr %/% row_width) * row_width -> base address of the row
|
||||
grid_data <- aggregated |>
|
||||
mutate(
|
||||
col = addr_int %% row_width,
|
||||
row = (addr_int %/% row_width) * row_width
|
||||
)
|
||||
|
||||
# =============================================================================
|
||||
# GAPS
|
||||
# =============================================================================
|
||||
|
||||
# Assign sequential indices to each row to mark gaps
|
||||
rows_sorted <- sort(unique(grid_data$row))
|
||||
n_data_rows <- length(rows_sorted)
|
||||
|
||||
# - diff() returns the successive differences between consecutive elements
|
||||
# - has_gap_before[i] = TRUE when that distance > row_width
|
||||
# - First row never has a predecessor, so it's FALSE
|
||||
has_gap_before <- c(FALSE, diff(rows_sorted) > row_width)
|
||||
|
||||
# - cumsum(has_gap_before) counts how many gaps are before each row
|
||||
# - Adding the offset to 1...n gives the row indices with gaps
|
||||
cumulative_gaps <- cumsum(has_gap_before)
|
||||
row_order <- tibble(
|
||||
row = rows_sorted,
|
||||
row_idx = seq_len(n_data_rows) + cumulative_gaps,
|
||||
has_gap_before = has_gap_before
|
||||
)
|
||||
|
||||
# Mark one slot before each row that has a gap preceding it
|
||||
gap_marker_indices <- row_order$row_idx[has_gap_before] - 1L
|
||||
|
||||
# Total y-axis slots = data rows + gap markers
|
||||
total_slots <- n_data_rows + sum(has_gap_before)
|
||||
|
||||
# =============================================================================
|
||||
# FILL EMPTY CELLS
|
||||
# =============================================================================
|
||||
|
||||
# - complete() adds a row for every missing (row, col) tuple
|
||||
# - left_join adds row_idx and has_gap_before to every row
|
||||
grid_complete <- grid_data |>
|
||||
complete(row, col = 0L:(row_width - 1L)) |>
|
||||
left_join(row_order, by = "row")
|
||||
|
||||
# =============================================================================
|
||||
# GAP TILES
|
||||
# =============================================================================
|
||||
|
||||
# Create one rectangle per gap spanning the full width
|
||||
gap_markers <- data.frame(row_idx = gap_marker_indices)
|
||||
|
||||
# =============================================================================
|
||||
# TILE SIZE (computed here so x-tick density can use it)
|
||||
# =============================================================================
|
||||
|
||||
# Largest tile size fitting within target sizes
|
||||
tile_size <- min(target_w / row_width, target_h / total_slots, max_tile)
|
||||
|
||||
# =============================================================================
|
||||
# X-AXIS TICKS
|
||||
# =============================================================================
|
||||
|
||||
# Make sure labels don't overlap
|
||||
min_tick_step <- as.integer(ceiling(0.25 / tile_size))
|
||||
|
||||
# Snap to a power of 2 so labels stay round
|
||||
x_tick_step <- max(1L, row_width %/% n_x_ticks) # Desired
|
||||
x_tick_step <- 2L^as.integer(ceiling(log2(max(x_tick_step, min_tick_step, 1L))))
|
||||
col_tick_values <- seq(0L, row_width - 1L, by = x_tick_step)
|
||||
col_tick_labels <- sprintf("+0x%X", col_tick_values)
|
||||
|
||||
# =============================================================================
|
||||
# Y-AXIS TICKS
|
||||
# =============================================================================
|
||||
|
||||
# Show at most 15 labels (gap slots are ignored)
|
||||
label_step <- max(1L, ceiling(n_data_rows / 15L))
|
||||
label_at <- row_order[seq(1L, n_data_rows, by = label_step), ]
|
||||
|
||||
# =============================================================================
|
||||
# PLOT
|
||||
# =============================================================================
|
||||
|
||||
plot <- ggplot(grid_complete, aes(x = col, y = row_idx, fill = faults)) +
|
||||
|
||||
# One filled rectangle per (col, row_idx) tuple
|
||||
geom_tile(width = 1, height = 1, colour = NA) +
|
||||
|
||||
# Separators at address gaps
|
||||
geom_rect(
|
||||
data = gap_markers,
|
||||
aes(ymin = row_idx - 0.5, ymax = row_idx + 0.5),
|
||||
xmin = -0.5,
|
||||
xmax = row_width - 0.5,
|
||||
fill = "grey40",
|
||||
colour = NA,
|
||||
inherit.aes = FALSE
|
||||
) +
|
||||
|
||||
# Heatmap color ramp (dark -> yellow)
|
||||
scale_fill_viridis_c(
|
||||
name = "Faults",
|
||||
trans = "log1p",
|
||||
na.value = "grey85",
|
||||
option = "viridis"
|
||||
) +
|
||||
|
||||
# X-axis hex labels
|
||||
scale_x_continuous(
|
||||
breaks = col_tick_values,
|
||||
labels = col_tick_labels,
|
||||
limits = c(-0.5, row_width - 0.5),
|
||||
expand = c(0, 0)
|
||||
) +
|
||||
|
||||
# Y-axis hex labels. Lowest address at the top
|
||||
scale_y_reverse(
|
||||
breaks = label_at$row_idx,
|
||||
labels = sprintf("0x%X", label_at$row),
|
||||
limits = c(total_slots + 0.5, 0.5), # total_slots includes gap-marker slots
|
||||
expand = c(0, 0)
|
||||
) +
|
||||
|
||||
# Title and axis labels
|
||||
labs(
|
||||
title = paste(target_resulttype, "/", target_benchmark),
|
||||
subtitle = paste(
|
||||
"Total:",
|
||||
format(sum(aggregated$faults, na.rm = TRUE), big.mark = ",")
|
||||
),
|
||||
x = "Byte Offset",
|
||||
y = "Base Address"
|
||||
) +
|
||||
|
||||
# Theme
|
||||
theme_minimal() +
|
||||
theme(
|
||||
axis.text.x = element_text(
|
||||
family = "mono", angle = 45, hjust = 1, size = 9
|
||||
),
|
||||
axis.text.y = element_text(family = "mono", size = 9),
|
||||
panel.grid = element_blank(),
|
||||
panel.border = element_rect(colour = "grey50", fill = NA, linewidth = 0.5)
|
||||
) +
|
||||
|
||||
# Force square tiles
|
||||
coord_fixed(ratio = 1)
|
||||
|
||||
# =============================================================================
|
||||
# SAVE
|
||||
# =============================================================================
|
||||
|
||||
# Margins
|
||||
fig_w <- row_width * tile_size + 4.5
|
||||
fig_h <- total_slots * tile_size + 2.5
|
||||
|
||||
# Write to file
|
||||
outfile <- file.path(experiment, paste0(
|
||||
target_resulttype, "_", target_benchmark, "_heatmap.svg"
|
||||
))
|
||||
|
||||
ggsave(
|
||||
outfile,
|
||||
plot = plot,
|
||||
width = fig_w,
|
||||
height = fig_h,
|
||||
units = "in"
|
||||
)
|
||||
Reference in New Issue
Block a user