library(ggplot2) library(readr) # read_csv library(dplyr) # filter, mutate, ... library(tidyr) # complete library(scales) # Usage: Rscript single_heatmap.r exp_abspath marker benchmark # ============================================================================= # CONFIGURATION # ============================================================================= # Starting row width (automatically scaled up) row_width <- 16L # Maximum number of rows before row_width is doubled max_rows <- 64L # How many x-axis tick labels to show regardless of row_width n_x_ticks <- 16L # Target size in inches (without margins) target_w <- 10.0 target_h <- 6.0 # Limit tile size so small grids don't produce huge tiles max_tile <- 0.5 # ============================================================================= # COMMAND-LINE ARGUMENTS # ============================================================================= args <- commandArgs(trailingOnly = TRUE) if (length(args) < 3) { stop("Usage: Rscript single_heatmap.r ") } # Which experiment to display experiment <- args[1] # Which marker to display target_resulttype <- args[2] # Which benchmark to display target_benchmark <- args[3] # ============================================================================= # INPUT DATA # ============================================================================= datafile <- file.path(experiment, "faults.csv") if (!file.exists(datafile)) { stop(paste("Input file not found:", datafile)) } raw <- read_csv(datafile, col_types = cols( benchmark = col_character(), resulttype = col_character(), faults = col_double(), fault_address = col_character() # hex string "0x10001A"; converted below )) # ============================================================================= # FILTER # ============================================================================= # Keep only rows matching the marker type and benchmark filtered <- raw |> filter( resulttype == target_resulttype, benchmark == target_benchmark ) if (nrow(filtered) == 0) { avail_rt <- paste(sort(unique(raw$resulttype)), collapse = ", ") avail_bm <- paste(sort(unique(raw$benchmark)), collapse = ", ") stop(paste0( "No data for resulttype='", target_resulttype, "' + benchmark='", target_benchmark, "'.\n", "Available resulttypes: ", avail_rt, "\n", "Available benchmarks: ", avail_bm )) } # We're only interested in addresses and count after filtering aggregated <- filtered |> select(fault_address, faults) # ============================================================================= # ADDRESS HEX -> INT # ============================================================================= # "0x10001A" -> substr strips "0x" -> strtoi parses base-16 -> integer aggregated <- aggregated |> mutate(addr_int = strtoi( substr(fault_address, 3L, nchar(fault_address)), 16L )) # ============================================================================= # SCALE ROWS # ============================================================================= # Count the number of rows/"bins" required: # - (addr_ints %/% rw) is the bin # - Multiply by rw to get the base address # - Count the unique base addresses to get the number of occupied rows/bins n_occupied_rows <- function(addr_ints, rw) { length(unique((addr_ints %/% rw) * rw)) } # Double row_width until the number of occupied rows/bins is <= max_rows while (row_width < 65536L && n_occupied_rows( aggregated$addr_int, row_width ) > max_rows) { row_width <- row_width * 2L } if (row_width > 16L) { message(sprintf( "Note: row_width auto-scaled to %d (%d occupied rows, max_rows=%d)", row_width, n_occupied_rows(aggregated$addr_int, row_width), max_rows )) } # ============================================================================= # GRID COORDINATES # ============================================================================= # col = addr %% row_width -> byte offset within the row (0 ... row_width-1) # row = (addr %/% row_width) * row_width -> base address of the row grid_data <- aggregated |> mutate( col = addr_int %% row_width, row = (addr_int %/% row_width) * row_width ) # ============================================================================= # GAPS # ============================================================================= # Assign sequential indices to each row to mark gaps rows_sorted <- sort(unique(grid_data$row)) n_data_rows <- length(rows_sorted) # - diff() returns the successive differences between consecutive elements # - has_gap_before[i] = TRUE when that distance > row_width # - First row never has a predecessor, so it's FALSE has_gap_before <- c(FALSE, diff(rows_sorted) > row_width) # - cumsum(has_gap_before) counts how many gaps are before each row # - Adding the offset to 1...n gives the row indices with gaps cumulative_gaps <- cumsum(has_gap_before) row_order <- tibble( row = rows_sorted, row_idx = seq_len(n_data_rows) + cumulative_gaps, has_gap_before = has_gap_before ) # Mark one slot before each row that has a gap preceding it gap_marker_indices <- row_order$row_idx[has_gap_before] - 1L # Total y-axis slots = data rows + gap markers total_slots <- n_data_rows + sum(has_gap_before) # ============================================================================= # FILL EMPTY CELLS # ============================================================================= # - complete() adds a row for every missing (row, col) tuple # - left_join adds row_idx and has_gap_before to every row grid_complete <- grid_data |> complete(row, col = 0L:(row_width - 1L)) |> left_join(row_order, by = "row") # ============================================================================= # GAP TILES # ============================================================================= # Create one rectangle per gap spanning the full width gap_markers <- data.frame(row_idx = gap_marker_indices) # ============================================================================= # TILE SIZE (computed here so x-tick density can use it) # ============================================================================= # Largest tile size fitting within target sizes tile_size <- min(target_w / row_width, target_h / total_slots, max_tile) # ============================================================================= # X-AXIS TICKS # ============================================================================= # Make sure labels don't overlap min_tick_step <- as.integer(ceiling(0.25 / tile_size)) # Snap to a power of 2 so labels stay round x_tick_step <- max(1L, row_width %/% n_x_ticks) # Desired x_tick_step <- 2L^as.integer(ceiling(log2(max(x_tick_step, min_tick_step, 1L)))) col_tick_values <- seq(0L, row_width - 1L, by = x_tick_step) col_tick_labels <- sprintf("+0x%X", col_tick_values) # ============================================================================= # Y-AXIS TICKS # ============================================================================= # Show at most 15 labels (gap slots are ignored) label_step <- max(1L, ceiling(n_data_rows / 15L)) label_at <- row_order[seq(1L, n_data_rows, by = label_step), ] # ============================================================================= # PLOT # ============================================================================= plot <- ggplot(grid_complete, aes(x = col, y = row_idx, fill = faults)) + # One filled rectangle per (col, row_idx) tuple geom_tile(width = 1, height = 1, colour = NA) + # Separators at address gaps geom_rect( data = gap_markers, aes(ymin = row_idx - 0.5, ymax = row_idx + 0.5), xmin = -0.5, xmax = row_width - 0.5, fill = "grey40", colour = NA, inherit.aes = FALSE ) + # Heatmap color ramp (dark -> yellow) scale_fill_viridis_c( name = "Faults", trans = "log1p", na.value = "grey85", option = "viridis" ) + # X-axis hex labels scale_x_continuous( breaks = col_tick_values, labels = col_tick_labels, limits = c(-0.5, row_width - 0.5), expand = c(0, 0) ) + # Y-axis hex labels. Lowest address at the top scale_y_reverse( breaks = label_at$row_idx, labels = sprintf("0x%X", label_at$row), limits = c(total_slots + 0.5, 0.5), # total_slots includes gap-marker slots expand = c(0, 0) ) + # Title and axis labels labs( title = paste(target_resulttype, "/", target_benchmark), subtitle = paste( "Total:", format(sum(aggregated$faults, na.rm = TRUE), big.mark = ",") ), x = "Byte Offset", y = "Base Address" ) + # Theme theme_minimal() + theme( axis.text.x = element_text( family = "mono", angle = 45, hjust = 1, size = 9 ), axis.text.y = element_text(family = "mono", size = 9), panel.grid = element_blank(), panel.border = element_rect(colour = "grey50", fill = NA, linewidth = 0.5) ) + # Force square tiles coord_fixed(ratio = 1) # ============================================================================= # SAVE # ============================================================================= # Margins fig_w <- row_width * tile_size + 4.5 fig_h <- total_slots * tile_size + 2.5 # Write to file outfile <- file.path(experiment, paste0( target_resulttype, "_", target_benchmark, "_heatmap.svg" )) ggsave( outfile, plot = plot, width = fig_w, height = fig_h, units = "in" )