library(ggplot2) library(readr) # read_csv library(dplyr) # filter, mutate library(tidyr) # complete library(scales) # Usage: Rscript single_heatmap.r exp_abspath # ============================================================================= # CONFIG # ============================================================================= # Starting row width (might be scaled up) row_width_init <- 16L # Max number of occupied rows before row_width is doubled max_rows <- 64L # How many x-axis tick labels to show n_x_ticks <- 16L # Target size (inches, no margins) target_w <- 10.0 target_h <- 6.0 # Max size so small grids don't produce huge tiles max_tile <- 0.5 # Generate all heatmaps with crossproduct of this benchmarks <- c("ip", "mem", "regs") markers <- c( "OK_MARKER", "FAIL_MARKER", "DETECTED_MARKER", "ACCESS_OUTERSPACE", "WRITE_TEXTSEGMENT", "GROUP1_MARKER", "TRAP", "TIMEOUT" ) # Labels for _start/_end symbols from linker.ld regions <- list( list(label = "WAMR AOT", start = "_wamr_aot_start", end = "_wamr_aot_end"), list( label = "WAMR os_mmap", start = "_wamr_mmap_start", end = "_wamr_mmap_end" ), list( label = "WAMR runtime mem", start = "_wamr_runtime_pool_start", end = "_wamr_runtime_pool_end" ), list( label = "WAMR linear mem", start = "_wamr_linear_pool_start", end = "_wamr_linear_pool_end" ), list( label = "WAMR global heap", start = "_wamr_global_heap_start", end = "_wamr_global_heap_end" ), list( label = "IWASM AOT runtime", start = "_iwasm_aot_runtime_start", end = "_iwasm_aot_runtime_end" ), list( label = "IWASM bh/util", start = "_iwasm_bh_start", end = "_iwasm_bh_end" ), list( label = "IWASM mem_alloc", start = "_iwasm_mem_alloc_start", end = "_iwasm_mem_alloc_end" ), list( label = "IWASM platform", start = "_iwasm_platform_init_start", end = "_iwasm_platform_init_end" ), list( label = "IWASM exec_env", start = "_iwasm_exec_env_start", end = "_iwasm_exec_env_end" ), list( label = "IWASM interp", start = "_iwasm_interp_classic_start", end = "_iwasm_interp_classic_end" ), list( label = "IWASM memory", start = "_iwasm_memory_start", end = "_iwasm_memory_end" ), list( label = "IWASM native", start = "_iwasm_native_start", end = "_iwasm_native_end" ), list( label = "IWASM runtime", start = "_iwasm_runtime_start", end = "_iwasm_runtime_end" ), list(label = "TEXT", start = "_text_start", end = "_text_end"), list(label = "BSS", start = "_sbss", end = "_ebss") ) # ============================================================================= # HELPER # ============================================================================= # Count the number of unique row base-addresses # - %/% to sort to bins # - * to map to base address n_occupied_rows <- function(addr_ints, rw) { length(unique((addr_ints %/% rw) * rw)) } # ============================================================================= # CLI # ============================================================================= args <- commandArgs(trailingOnly = TRUE) if (length(args) < 1) { stop("Usage: Rscript single_heatmap.r ") } experiment <- args[1] faults_file <- if (length(args) >= 2) args[2] else "faults.csv" suffix <- gsub("^faults|\\.csv$", "", faults_file) # ============================================================================= # INPUT DATA (read once) # ============================================================================= datafile <- file.path(experiment, faults_file) if (!file.exists(datafile)) { stop(paste("Input file not found:", datafile)) } raw <- read_csv( datafile, col_types = cols( benchmark = col_character(), resulttype = col_character(), faults = col_double(), fault_address = col_character() # hex string "0x10001A"; converted below ) ) # ============================================================================= # ELF SYMBOLS (parsed once) # ============================================================================= # Names vector: sym_addr["_text_start"] = 0x10001a sym_addr <- setNames(integer(0), character(0)) elf_file <- file.path(experiment, "system.elf") if (!file.exists(elf_file)) { message("system.elf not found") } else { # Parse elf regions nm_lines <- tryCatch( system2("nm", args = elf_file, stdout = TRUE, stderr = FALSE), error = function(e) { message("nm failed (", conditionMessage(e), ")") NULL } ) if (!is.null(nm_lines) && length(nm_lines) > 0) { # Each nm line: "0010001a T _text_start" # Groups: 1 = hex address, 2 = symbol name pattern <- "^([0-9a-fA-F]+)\\s+\\S+\\s+(\\S+)" matches <- regmatches(nm_lines, regexec(pattern, nm_lines)) valid <- Filter(function(m) length(m) == 3, matches) sym_addr <- setNames( strtoi(sapply(valid, `[[`, 2), 16L), sapply(valid, `[[`, 3) ) } } # ============================================================================= # HEATMAP # ============================================================================= make_heatmap <- function(target_resulttype, target_benchmark) { # =========================================================================== # FILTER # =========================================================================== filtered <- raw |> filter( .data$resulttype == target_resulttype, .data$benchmark == target_benchmark ) # Nothing to draw — skip without error if (nrow(filtered) == 0) { return(invisible(NULL)) } aggregated <- filtered |> select(fault_address, faults) # =========================================================================== # ADDRESS HEX -> INT # =========================================================================== # "0x10001A" -> substr strips "0x" -> strtoi parses base-16 -> integer aggregated <- aggregated |> mutate( addr_int = strtoi( substr(.data$fault_address, 3L, nchar(.data$fault_address)), 16L ) ) # =========================================================================== # SCALE ROWS # =========================================================================== # Local copy so different combinations can scale independently. row_width <- row_width_init # Double row_width until occupied rows <= max_rows while ( row_width < 65536L && n_occupied_rows( aggregated$addr_int, row_width ) > max_rows ) { row_width <- row_width * 2L } if (row_width > row_width_init) { message(sprintf( "Note: [%s/%s] row_width auto-scaled to %d (%d occupied rows)", target_resulttype, target_benchmark, row_width, n_occupied_rows(aggregated$addr_int, row_width) )) } # =========================================================================== # GRID COORDINATES # =========================================================================== # col = addr %% row_width -> byte offset within the row (0 ... row_width-1) # row = (addr %/% row_width) * row_width -> base address of the row grid_data <- aggregated |> mutate( col = .data$addr_int %% row_width, row = (.data$addr_int %/% row_width) * row_width ) # =========================================================================== # GAPS # =========================================================================== rows_sorted <- sort(unique(grid_data$row)) n_data_rows <- length(rows_sorted) # - diff() returns the successive differences between consecutive elements # - has_gap_before[i] = TRUE when that distance > row_width # - First row never has a predecessor, so it's FALSE has_gap_before <- c(FALSE, diff(rows_sorted) > row_width) # - cumsum(has_gap_before) counts how many gaps precede each row # - Adding that offset to 1...n gives the row_idx values with gap slots cumulative_gaps <- cumsum(has_gap_before) row_order <- tibble( row = rows_sorted, row_idx = seq_len(n_data_rows) + cumulative_gaps, has_gap_before = has_gap_before ) gap_marker_indices <- row_order$row_idx[has_gap_before] - 1L total_slots <- n_data_rows + sum(has_gap_before) # =========================================================================== # FILL EMPTY CELLS # =========================================================================== # - complete() adds a row for every missing (row, col) tuple # - left_join attaches row_idx to every cell grid_complete <- grid_data |> complete(row, col = 0L:(row_width - 1L)) |> left_join(row_order, by = "row") # =========================================================================== # GAP TILES # =========================================================================== gap_markers <- data.frame(row_idx = gap_marker_indices) # =========================================================================== # ELF REGION RECTANGLES # =========================================================================== region_rects <- data.frame( label = character(0), ymin = numeric(0), ymax = numeric(0) ) if (length(sym_addr) > 0) { rects_list <- lapply(regions, function(reg) { s <- sym_addr[reg$start] e <- sym_addr[reg$end] if (is.na(s) || is.na(e) || s >= e) { return(NULL) } # Row with base address r covers bytes r ... r + row_width - 1. # Overlap if r < e && r + row_width > s overlapping <- row_order[ row_order$row < e & (row_order$row + row_width) > s, , drop = FALSE ] if (nrow(overlapping) == 0) { return(NULL) } data.frame( label = reg$label, ymin = min(overlapping$row_idx) - 0.5, ymax = max(overlapping$row_idx) + 0.5 ) }) rects_list <- Filter(Negate(is.null), rects_list) if (length(rects_list) > 0) { region_rects <- do.call(rbind, rects_list) } } # =========================================================================== # TILE SIZE # =========================================================================== tile_size <- min(target_w / row_width, target_h / total_slots, max_tile) # =========================================================================== # X-AXIS TICKS # =========================================================================== # Minimum step to keep labels from overlapping at this tile size min_tick_step <- as.integer(ceiling(0.25 / tile_size)) # Snap to a power of 2 so labels stay round x_tick_step <- max(1L, row_width %/% n_x_ticks) x_tick_step <- 2L^as.integer( ceiling(log2(max(x_tick_step, min_tick_step, 1L))) ) col_tick_values <- seq(0L, row_width - 1L, by = x_tick_step) col_tick_labels <- sprintf("+0x%X", col_tick_values) # =========================================================================== # Y-AXIS TICKS # =========================================================================== # Show 15 labels max (gaps are ignored) label_step <- max(1L, ceiling(n_data_rows / 15L)) label_at <- row_order[seq(1L, n_data_rows, by = label_step), ] # =========================================================================== # PLOT # =========================================================================== plot <- ggplot( grid_complete, aes( x = col, y = .data$row_idx, fill = .data$faults ) ) + # One rectangle per (col, row_idx) tuple geom_tile(width = 1, height = 1, colour = NA) + # Separators at address gaps geom_rect( data = gap_markers, aes(ymin = .data$row_idx - 0.5, ymax = .data$row_idx + 0.5), xmin = -0.5, xmax = row_width - 0.5, fill = "grey40", colour = NA, inherit.aes = FALSE ) + # Heatmap color ramp scale_fill_viridis_c( name = "Faults", trans = "log1p", na.value = "grey85", option = "viridis" ) + # X-axis hex labels scale_x_continuous( breaks = col_tick_values, labels = col_tick_labels, limits = c(-0.5, row_width - 0.5), expand = c(0, 0) ) + # Y-axis hex labels, lowest address at the top scale_y_reverse( breaks = label_at$row_idx, labels = sprintf("0x%X", label_at$row), limits = c(total_slots + 0.5, 0.5), # includes gaps expand = c(0, 0) ) + # Title + axis labels labs( title = paste(target_resulttype, "/", target_benchmark), subtitle = paste( "Total:", format( sum(aggregated$faults, na.rm = TRUE), big.mark = "," ) ), x = "Byte Offset", y = "Base Address" ) + # Theme theme_minimal() + theme( axis.text.x = element_text( family = "mono", angle = 45, hjust = 1, size = 9 ), axis.text.y = element_text(family = "mono", size = 9), panel.grid = element_blank(), panel.border = element_rect(colour = "grey50", fill = NA, linewidth = 0.5) ) + # Force square tiles coord_fixed(ratio = 1) # ELF region borders if (nrow(region_rects) > 0) { plot <- plot + geom_rect( data = region_rects, aes(ymin = .data$ymin, ymax = .data$ymax, colour = .data$label), xmin = -0.5, xmax = row_width - 0.5, fill = NA, linewidth = 1.5, inherit.aes = FALSE ) + scale_colour_brewer(name = "Region", palette = "Dark2") } # =========================================================================== # SAVE # =========================================================================== fig_w <- row_width * tile_size + 4.5 fig_h <- total_slots * tile_size + 2.5 outfile <- file.path( experiment, paste0( "heatmap_", target_resulttype, "_", target_benchmark, suffix, ".svg" ) ) ggsave(outfile, plot = plot, width = fig_w, height = fig_h, units = "in") message(sprintf("Saved: %s", basename(outfile))) invisible(NULL) } # ============================================================================= # GENERATE HEATMAPS # ============================================================================= # Combinations with missing data are skipped for (bm in benchmarks) { for (marker in markers) { make_heatmap(marker, bm) } }