library(ggplot2)
library(ggalluvial)

# Usage: Rscript combined_comparion.r exp_abspath1 exp_abspath2 ...

args <- commandArgs(trailingOnly = TRUE)
argc <- length(args)

if (argc != 2) {
  print("Expecting two input files")
  stop()
}

for (experiment in args) {
  datafile <- paste(experiment, "/faults.csv", sep = "")
  if (!file.exists(datafile)) {
    print(paste("Input file", datafile, "is missing"))
    stop()
  }
}

resulttype_labels <- c(
  OK_MARKER = "OK",
  FAIL_MARKER = "FAIL",
  DETECTED_MARKER = "DETECTED",
  TIMEOUT = "TIMEOUT",
  TRAP = "TRAP",
  ACCESS_OUTERSPACE = "OUTERSPC",
  WRITE_TEXTSEGMENT = "WRITETXT"
)

# Read data
datafile1 <- paste(args[1], "/faults.csv", sep = "")
data1 <- readr::read_csv(datafile1)
data1$fault_address <- strtoi(data1$fault_address)
data1$resulttype <- resulttype_labels[data1$resulttype]
# tibble::glimpse(data1)

datafile2 <- paste(args[2], "/faults.csv", sep = "")
data2 <- readr::read_csv(datafile2)
data2$fault_address <- strtoi(data2$fault_address)
data2$resulttype <- resulttype_labels[data2$resulttype]
# tibble::glimpse(data2)

# https://corybrunson.github.io/ggalluvial/
joined <- merge(
  data1[, c("fault_address", "resulttype", "faults")],
  data2[, c("fault_address", "resulttype", "faults")],
  by = "fault_address",
  suffixes = c("_bench1", "_bench2")
)

streams <- aggregate(
  faults_bench2 ~ resulttype_bench1 + resulttype_bench2,
  data = joined,
  sum
)
names(streams) <- c("bench1", "bench2", "faults")

plot <- ggplot(
  data = streams,
  aes(axis1 = bench1, axis2 = bench2, y = faults)
) +
  scale_x_discrete(
    # TODO: Name the benchmarks
    # limits = c(args[1], args[2])
    limits = c("Bench A", "Bench B")
  ) +
  labs(x = "Benchmark", y = "Faults") +
  geom_alluvium(aes(fill = bench1)) +
  geom_stratum() +
  geom_text(stat = "stratum", aes(label = after_stat(stratum))) +
  theme_minimal() +
  theme(legend.position = "none")

# TODO: Name the file according to the benchmarks
ggsave(
  paste(args[2], "/../sankey.svg", sep = ""),
  plot = plot,
)