diff --git a/scripts/charts/single_heatmap.r b/scripts/charts/single_heatmap.r index d94be96..011c7a5 100644 --- a/scripts/charts/single_heatmap.r +++ b/scripts/charts/single_heatmap.r @@ -1,51 +1,83 @@ library(ggplot2) library(readr) # read_csv -library(dplyr) # filter, mutate, ... +library(dplyr) # filter, mutate library(tidyr) # complete library(scales) -# Usage: Rscript single_heatmap.r exp_abspath marker benchmark +# Usage: Rscript single_heatmap.r exp_abspath # ============================================================================= -# CONFIGURATION +# CONFIG # ============================================================================= -# Starting row width (automatically scaled up) -row_width <- 16L +# Starting row width (might be scaled up) +row_width_init <- 16L -# Maximum number of rows before row_width is doubled +# Max number of occupied rows before row_width is doubled max_rows <- 64L -# How many x-axis tick labels to show regardless of row_width +# How many x-axis tick labels to show n_x_ticks <- 16L -# Target size in inches (without margins) +# Target size (inches, no margins) target_w <- 10.0 target_h <- 6.0 -# Limit tile size so small grids don't produce huge tiles +# Max size so small grids don't produce huge tiles max_tile <- 0.5 +# Generate all heatmaps with crossproduct of this +benchmarks <- c("ip", "mem", "regs") +markers <- c( + "OK_MARKER", "FAIL_MARKER", "DETECTED_MARKER", + "ACCESS_OUTERSPACE", "WRITE_TEXTSEGMENT", + "GROUP1_MARKER", "TRAP", "TIMEOUT" +) + +# Labels for _start/_end symbols from linker.ld +regions <- list( + list(label = "WAMR AOT", start = "_wamr_aot_start", end = "_wamr_aot_end"), + list(label = "WAMR os_mmap", start = "_wamr_mmap_start", end = "_wamr_mmap_end"), + list(label = "WAMR runtime mem", start = "_wamr_runtime_pool_start", end = "_wamr_runtime_pool_end"), + list(label = "WAMR linear mem", start = "_wamr_linear_pool_start", end = "_wamr_linear_pool_end"), + list(label = "WAMR global heap", start = "_wamr_global_heap_start", end = "_wamr_global_heap_end"), + list(label = "IWASM AOT runtime", start = "_iwasm_aot_runtime_start", end = "_iwasm_aot_runtime_end"), + list(label = "IWASM bh/util", start = "_iwasm_bh_start", end = "_iwasm_bh_end"), + list(label = "IWASM mem_alloc", start = "_iwasm_mem_alloc_start", end = "_iwasm_mem_alloc_end"), + list(label = "IWASM platform", start = "_iwasm_platform_init_start", end = "_iwasm_platform_init_end"), + list(label = "IWASM exec_env", start = "_iwasm_exec_env_start", end = "_iwasm_exec_env_end"), + list(label = "IWASM interp", start = "_iwasm_interp_classic_start", end = "_iwasm_interp_classic_end"), + list(label = "IWASM memory", start = "_iwasm_memory_start", end = "_iwasm_memory_end"), + list(label = "IWASM native", start = "_iwasm_native_start", end = "_iwasm_native_end"), + list(label = "IWASM runtime", start = "_iwasm_runtime_start", end = "_iwasm_runtime_end"), + list(label = "TEXT", start = "_text_start", end = "_text_end"), + list(label = "BSS", start = "_sbss", end = "_ebss") +) + # ============================================================================= -# COMMAND-LINE ARGUMENTS +# HELPER +# ============================================================================= + +# Count the number of unique row base-addresses +# - %/% to sort to bins +# - * to map to base address +n_occupied_rows <- function(addr_ints, rw) { + length(unique((addr_ints %/% rw) * rw)) +} + +# ============================================================================= +# CLI # ============================================================================= args <- commandArgs(trailingOnly = TRUE) -if (length(args) < 3) { - stop("Usage: Rscript single_heatmap.r ") +if (length(args) < 1) { + stop("Usage: Rscript single_heatmap.r ") } -# Which experiment to display experiment <- args[1] -# Which marker to display -target_resulttype <- args[2] - -# Which benchmark to display -target_benchmark <- args[3] - # ============================================================================= -# INPUT DATA +# INPUT DATA (read once) # ============================================================================= datafile <- file.path(experiment, "faults.csv") @@ -61,241 +93,328 @@ raw <- read_csv(datafile, col_types = cols( )) # ============================================================================= -# FILTER +# ELF SYMBOLS (parsed once) # ============================================================================= -# Keep only rows matching the marker type and benchmark -filtered <- raw |> - filter( - resulttype == target_resulttype, - benchmark == target_benchmark +# Names vector: sym_addr["_text_start"] = 0x10001a +sym_addr <- setNames(integer(0), character(0)) + +elf_file <- file.path(experiment, "system.elf") + +if (!file.exists(elf_file)) { + message("system.elf not found") +} else { + # Parse elf regions + nm_lines <- tryCatch( + system2("nm", args = elf_file, stdout = TRUE, stderr = FALSE), + error = function(e) { + message("nm failed (", conditionMessage(e), ")") + NULL + } ) -if (nrow(filtered) == 0) { - avail_rt <- paste(sort(unique(raw$resulttype)), collapse = ", ") - avail_bm <- paste(sort(unique(raw$benchmark)), collapse = ", ") - stop(paste0( - "No data for resulttype='", target_resulttype, - "' + benchmark='", target_benchmark, "'.\n", - "Available resulttypes: ", avail_rt, "\n", - "Available benchmarks: ", avail_bm - )) -} - -# We're only interested in addresses and count after filtering -aggregated <- filtered |> - select(fault_address, faults) - -# ============================================================================= -# ADDRESS HEX -> INT -# ============================================================================= - -# "0x10001A" -> substr strips "0x" -> strtoi parses base-16 -> integer -aggregated <- aggregated |> - mutate(addr_int = strtoi( - substr(fault_address, 3L, nchar(fault_address)), - 16L - )) - -# ============================================================================= -# SCALE ROWS -# ============================================================================= - -# Count the number of rows/"bins" required: -# - (addr_ints %/% rw) is the bin -# - Multiply by rw to get the base address -# - Count the unique base addresses to get the number of occupied rows/bins -n_occupied_rows <- function(addr_ints, rw) { - length(unique((addr_ints %/% rw) * rw)) -} - -# Double row_width until the number of occupied rows/bins is <= max_rows -while (row_width < 65536L && n_occupied_rows( - aggregated$addr_int, row_width -) > max_rows) { - row_width <- row_width * 2L -} - -if (row_width > 16L) { - message(sprintf( - "Note: row_width auto-scaled to %d (%d occupied rows, max_rows=%d)", - row_width, - n_occupied_rows(aggregated$addr_int, row_width), - max_rows - )) + if (!is.null(nm_lines) && length(nm_lines) > 0) { + # Each nm line: "0010001a T _text_start" + # Groups: 1 = hex address, 2 = symbol name + pattern <- "^([0-9a-fA-F]+)\\s+\\S+\\s+(\\S+)" + matches <- regmatches(nm_lines, regexec(pattern, nm_lines)) + valid <- Filter(function(m) length(m) == 3, matches) + sym_addr <- setNames( + strtoi(sapply(valid, `[[`, 2), 16L), + sapply(valid, `[[`, 3) + ) + } } # ============================================================================= -# GRID COORDINATES +# HEATMAP # ============================================================================= -# col = addr %% row_width -> byte offset within the row (0 ... row_width-1) -# row = (addr %/% row_width) * row_width -> base address of the row -grid_data <- aggregated |> - mutate( - col = addr_int %% row_width, - row = (addr_int %/% row_width) * row_width +make_heatmap <- function(target_resulttype, target_benchmark) { + # =========================================================================== + # FILTER + # =========================================================================== + + filtered <- raw |> + filter( + .data$resulttype == target_resulttype, + .data$benchmark == target_benchmark + ) + + # Nothing to draw — skip without error + if (nrow(filtered) == 0) { + return(invisible(NULL)) + } + + aggregated <- filtered |> + select(.data$fault_address, .data$faults) + + # =========================================================================== + # ADDRESS HEX -> INT + # =========================================================================== + + # "0x10001A" -> substr strips "0x" -> strtoi parses base-16 -> integer + aggregated <- aggregated |> + mutate(addr_int = strtoi( + substr(.data$fault_address, 3L, nchar(.data$fault_address)), + 16L + )) + + # =========================================================================== + # SCALE ROWS + # =========================================================================== + + # Local copy so different combinations can scale independently. + row_width <- row_width_init + + # Double row_width until occupied rows <= max_rows + while (row_width < 65536L && n_occupied_rows( + aggregated$addr_int, row_width + ) > max_rows) { + row_width <- row_width * 2L + } + + if (row_width > row_width_init) { + message(sprintf( + "Note: [%s/%s] row_width auto-scaled to %d (%d occupied rows)", + target_resulttype, target_benchmark, + row_width, n_occupied_rows(aggregated$addr_int, row_width) + )) + } + + # =========================================================================== + # GRID COORDINATES + # =========================================================================== + + # col = addr %% row_width -> byte offset within the row (0 ... row_width-1) + # row = (addr %/% row_width) * row_width -> base address of the row + grid_data <- aggregated |> + mutate( + col = .data$addr_int %% row_width, + row = (.data$addr_int %/% row_width) * row_width + ) + + # =========================================================================== + # GAPS + # =========================================================================== + + rows_sorted <- sort(unique(grid_data$row)) + n_data_rows <- length(rows_sorted) + + # - diff() returns the successive differences between consecutive elements + # - has_gap_before[i] = TRUE when that distance > row_width + # - First row never has a predecessor, so it's FALSE + has_gap_before <- c(FALSE, diff(rows_sorted) > row_width) + + # - cumsum(has_gap_before) counts how many gaps precede each row + # - Adding that offset to 1...n gives the row_idx values with gap slots + cumulative_gaps <- cumsum(has_gap_before) + row_order <- tibble( + row = rows_sorted, + row_idx = seq_len(n_data_rows) + cumulative_gaps, + has_gap_before = has_gap_before ) -# ============================================================================= -# GAPS -# ============================================================================= + gap_marker_indices <- row_order$row_idx[has_gap_before] - 1L + total_slots <- n_data_rows + sum(has_gap_before) -# Assign sequential indices to each row to mark gaps -rows_sorted <- sort(unique(grid_data$row)) -n_data_rows <- length(rows_sorted) + # =========================================================================== + # FILL EMPTY CELLS + # =========================================================================== -# - diff() returns the successive differences between consecutive elements -# - has_gap_before[i] = TRUE when that distance > row_width -# - First row never has a predecessor, so it's FALSE -has_gap_before <- c(FALSE, diff(rows_sorted) > row_width) + # - complete() adds a row for every missing (row, col) tuple + # - left_join attaches row_idx to every cell + grid_complete <- grid_data |> + complete(row, col = 0L:(row_width - 1L)) |> + left_join(row_order, by = "row") -# - cumsum(has_gap_before) counts how many gaps are before each row -# - Adding the offset to 1...n gives the row indices with gaps -cumulative_gaps <- cumsum(has_gap_before) -row_order <- tibble( - row = rows_sorted, - row_idx = seq_len(n_data_rows) + cumulative_gaps, - has_gap_before = has_gap_before -) + # =========================================================================== + # GAP TILES + # =========================================================================== -# Mark one slot before each row that has a gap preceding it -gap_marker_indices <- row_order$row_idx[has_gap_before] - 1L + gap_markers <- data.frame(row_idx = gap_marker_indices) -# Total y-axis slots = data rows + gap markers -total_slots <- n_data_rows + sum(has_gap_before) + # =========================================================================== + # ELF REGION RECTANGLES + # =========================================================================== + + region_rects <- data.frame( + label = character(0), + ymin = numeric(0), + ymax = numeric(0) + ) + + if (length(sym_addr) > 0) { + rects_list <- lapply(regions, function(reg) { + s <- sym_addr[reg$start] + e <- sym_addr[reg$end] + + if (is.na(s) || is.na(e) || s >= e) { + return(NULL) + } + + # Row with base address r covers bytes r ... r + row_width - 1. + # Overlap if r < e && r + row_width > s + overlapping <- row_order[ + row_order$row < e & (row_order$row + row_width) > s, , + drop = FALSE + ] + + if (nrow(overlapping) == 0) { + return(NULL) + } + + data.frame( + label = reg$label, + ymin = min(overlapping$row_idx) - 0.5, + ymax = max(overlapping$row_idx) + 0.5 + ) + }) + + rects_list <- Filter(Negate(is.null), rects_list) + if (length(rects_list) > 0) { + region_rects <- do.call(rbind, rects_list) + } + } + + # =========================================================================== + # TILE SIZE + # =========================================================================== + + tile_size <- min(target_w / row_width, target_h / total_slots, max_tile) + + # =========================================================================== + # X-AXIS TICKS + # =========================================================================== + + # Minimum step to keep labels from overlapping at this tile size + min_tick_step <- as.integer(ceiling(0.25 / tile_size)) + + # Snap to a power of 2 so labels stay round + x_tick_step <- max(1L, row_width %/% n_x_ticks) + x_tick_step <- 2L^as.integer( + ceiling(log2(max(x_tick_step, min_tick_step, 1L))) + ) + col_tick_values <- seq(0L, row_width - 1L, by = x_tick_step) + col_tick_labels <- sprintf("+0x%X", col_tick_values) + + # =========================================================================== + # Y-AXIS TICKS + # =========================================================================== + + # Show 15 labels max (gaps are ignored) + label_step <- max(1L, ceiling(n_data_rows / 15L)) + label_at <- row_order[seq(1L, n_data_rows, by = label_step), ] + + # =========================================================================== + # PLOT + # =========================================================================== + + plot <- ggplot(grid_complete, aes( + x = col, y = .data$row_idx, fill = .data$faults + )) + + + # One rectangle per (col, row_idx) tuple + geom_tile(width = 1, height = 1, colour = NA) + + + # Separators at address gaps + geom_rect( + data = gap_markers, + aes(ymin = .data$row_idx - 0.5, ymax = .data$row_idx + 0.5), + xmin = -0.5, + xmax = row_width - 0.5, + fill = "grey40", + colour = NA, + inherit.aes = FALSE + ) + + + # Heatmap color ramp + scale_fill_viridis_c( + name = "Faults", + trans = "log1p", + na.value = "grey85", + option = "viridis" + ) + + + # X-axis hex labels + scale_x_continuous( + breaks = col_tick_values, + labels = col_tick_labels, + limits = c(-0.5, row_width - 0.5), + expand = c(0, 0) + ) + + + # Y-axis hex labels, lowest address at the top + scale_y_reverse( + breaks = label_at$row_idx, + labels = sprintf("0x%X", label_at$row), + limits = c(total_slots + 0.5, 0.5), # includes gaps + expand = c(0, 0) + ) + + + # Title + axis labels + labs( + title = paste(target_resulttype, "/", target_benchmark), + subtitle = paste("Total:", format( + sum(aggregated$faults, na.rm = TRUE), + big.mark = "," + )), + x = "Byte Offset", + y = "Base Address" + ) + + + # Theme + theme_minimal() + + theme( + axis.text.x = element_text( + family = "mono", angle = 45, hjust = 1, size = 9 + ), + axis.text.y = element_text(family = "mono", size = 9), + panel.grid = element_blank(), + panel.border = element_rect(colour = "grey50", fill = NA, linewidth = 0.5) + ) + + + # Force square tiles + coord_fixed(ratio = 1) + + # ELF region borders + if (nrow(region_rects) > 0) { + plot <- plot + + geom_rect( + data = region_rects, + aes(ymin = .data$ymin, ymax = .data$ymax, colour = .data$label), + xmin = -0.5, + xmax = row_width - 0.5, + fill = NA, + linewidth = 1.0, + inherit.aes = FALSE + ) + + scale_colour_brewer(name = "Region", palette = "Dark2") + } + + # =========================================================================== + # SAVE + # =========================================================================== + + fig_w <- row_width * tile_size + 4.5 + fig_h <- total_slots * tile_size + 2.5 + + outfile <- file.path(experiment, paste0( + "heatmap_", target_resulttype, "_", target_benchmark, ".svg" + )) + + ggsave(outfile, plot = plot, width = fig_w, height = fig_h, units = "in") + message(sprintf("Saved: %s", basename(outfile))) + invisible(NULL) +} # ============================================================================= -# FILL EMPTY CELLS +# GENERATE HEATMAPS # ============================================================================= -# - complete() adds a row for every missing (row, col) tuple -# - left_join adds row_idx and has_gap_before to every row -grid_complete <- grid_data |> - complete(row, col = 0L:(row_width - 1L)) |> - left_join(row_order, by = "row") - -# ============================================================================= -# GAP TILES -# ============================================================================= - -# Create one rectangle per gap spanning the full width -gap_markers <- data.frame(row_idx = gap_marker_indices) - -# ============================================================================= -# TILE SIZE (computed here so x-tick density can use it) -# ============================================================================= - -# Largest tile size fitting within target sizes -tile_size <- min(target_w / row_width, target_h / total_slots, max_tile) - -# ============================================================================= -# X-AXIS TICKS -# ============================================================================= - -# Make sure labels don't overlap -min_tick_step <- as.integer(ceiling(0.25 / tile_size)) - -# Snap to a power of 2 so labels stay round -x_tick_step <- max(1L, row_width %/% n_x_ticks) # Desired -x_tick_step <- 2L^as.integer(ceiling(log2(max(x_tick_step, min_tick_step, 1L)))) -col_tick_values <- seq(0L, row_width - 1L, by = x_tick_step) -col_tick_labels <- sprintf("+0x%X", col_tick_values) - -# ============================================================================= -# Y-AXIS TICKS -# ============================================================================= - -# Show at most 15 labels (gap slots are ignored) -label_step <- max(1L, ceiling(n_data_rows / 15L)) -label_at <- row_order[seq(1L, n_data_rows, by = label_step), ] - -# ============================================================================= -# PLOT -# ============================================================================= - -plot <- ggplot(grid_complete, aes(x = col, y = row_idx, fill = faults)) + - - # One filled rectangle per (col, row_idx) tuple - geom_tile(width = 1, height = 1, colour = NA) + - - # Separators at address gaps - geom_rect( - data = gap_markers, - aes(ymin = row_idx - 0.5, ymax = row_idx + 0.5), - xmin = -0.5, - xmax = row_width - 0.5, - fill = "grey40", - colour = NA, - inherit.aes = FALSE - ) + - - # Heatmap color ramp (dark -> yellow) - scale_fill_viridis_c( - name = "Faults", - trans = "log1p", - na.value = "grey85", - option = "viridis" - ) + - - # X-axis hex labels - scale_x_continuous( - breaks = col_tick_values, - labels = col_tick_labels, - limits = c(-0.5, row_width - 0.5), - expand = c(0, 0) - ) + - - # Y-axis hex labels. Lowest address at the top - scale_y_reverse( - breaks = label_at$row_idx, - labels = sprintf("0x%X", label_at$row), - limits = c(total_slots + 0.5, 0.5), # total_slots includes gap-marker slots - expand = c(0, 0) - ) + - - # Title and axis labels - labs( - title = paste(target_resulttype, "/", target_benchmark), - subtitle = paste( - "Total:", - format(sum(aggregated$faults, na.rm = TRUE), big.mark = ",") - ), - x = "Byte Offset", - y = "Base Address" - ) + - - # Theme - theme_minimal() + - theme( - axis.text.x = element_text( - family = "mono", angle = 45, hjust = 1, size = 9 - ), - axis.text.y = element_text(family = "mono", size = 9), - panel.grid = element_blank(), - panel.border = element_rect(colour = "grey50", fill = NA, linewidth = 0.5) - ) + - - # Force square tiles - coord_fixed(ratio = 1) - -# ============================================================================= -# SAVE -# ============================================================================= - -# Margins -fig_w <- row_width * tile_size + 4.5 -fig_h <- total_slots * tile_size + 2.5 - -# Write to file -outfile <- file.path(experiment, paste0( - target_resulttype, "_", target_benchmark, "_heatmap.svg" -)) - -ggsave( - outfile, - plot = plot, - width = fig_w, - height = fig_h, - units = "in" -) +# Combinations with missing data are skipped +for (bm in benchmarks) { + for (marker in markers) { + make_heatmap(marker, bm) + } +}