R/sampling_distance.R

Defines functions sampling_distance

Documented in sampling_distance

#' Sampling distance Plot
#'
#' @description
#' The `sampling_distance` function creates a plot that displays the mean of the 
#' distance between the configurations that were executed in each iteration.
#' 
#' For categorical parameters the distance is calculated as the hamming distance, 
#' for numerical parameters a equality interval is defined by a threshold
#' specified by argument t and hamming distance is calculated using this interval. 
#'
#' @template arg_irace_results
#' @param type
#' String, (default "boxplot") Type of plot to be produces, either "line", "boxplot" 
#' or "both". The "boxplot" setting shows a boxplot of the mean distance of all
#' configurations, "line" shows the mean distance of the solution population in each 
#' iteration, "both" shows both plots.
#' 
#' @param t
#' Numeric, (default 0.05) percentage factor that will determine a distance to 
#' define equal numerical parameter values. If the numerical parameter values to be 
#' compared are v1 and v2 they are considered equal if `|v1-v2| <= |ub-lb|*t`. 
#' 
#' @param file_name
#' String, File name to save plot (example: "~/patch/example/file_name.png")
#'
#' @return line or box plot
#'
#' @export
#'
#' @examples
#' # sampling_distance(iraceResults)
#' # sampling_distance(iraceResults, type = "boxplot", t=0.07)
sampling_distance <- function(irace_results, type = "boxplot", t = 0.05, file_name = NULL) {
  if (!(type == "line" | type == "boxplot" | type == "both")) {
    cat("Error: The type parameter entered is incorrect\n")
    stop()
  }

  # variable assignment
  media <- allconf <- valor <- iterations <- tabla_box <- iteration <- vectorP <- NULL
  allconf <- irace_results$allConfigurations
  n_param <- length(allconf) - 2
  niterations <- length(irace_results$allElites)
  
  all.distance <- matrix(0, ncol=nrow(allconf), nrow=nrow(allconf))
  a <- 1:nrow(allconf)
  for (confid in allconf$.ID.) {
    all.distance[confid,a[-confid]] <- distance_config(irace_results, id_configuration=confid, t=t)
  }

  # The value of the distance between each configuration is created
  for (i in 1:niterations) {
    ids <- unique(irace_results$experimentLog[irace_results$experimentLog[,"iteration"] %in% i, "configuration"])
    iterations <- c(iterations, i)

    dd <- all.distance[ids,ids]
    distance <- dd[upper.tri(dd)]
    it <- i
    datos <- data.frame(it, distance)
    tabla_box <- rbind(tabla_box, datos)
    tabla_box[,"it"] <- as.character(tabla_box[,"it"])
    media <-  c(media, mean(distance))
  }
  tabla_box[,"it"] <- factor(tabla_box[,"it"] , levels=as.character(1:niterations))

  # A graph of points and lines is created
  if (type == "line" | type == "both") {
    tabla <- as.data.frame(cbind(iterations, media))
    colnames(tabla) <- c("iterations", "media")
    p <- ggplot(tabla, aes(x = iterations, y = media)) +
      geom_point() +
      geom_line() +
      scale_y_continuous(
        limits = c(0, n_param),
        breaks = seq(0, n_param, 2)
      ) +
      scale_x_continuous(
        limits = c(1, niterations),
        breaks = seq(1, niterations, by=1)
      ) +
      scale_color_viridis_c() +
      labs(y = "Distance", x = "iteration", color = "IT.") + 
      theme (legend.position = "none")
    vectorP[1] <- list(p)

    # A box plot is created
  }
  if (type == "boxplot" | type == "both") {
    p <- ggplot(tabla_box, aes(x = it, y = distance, group = it, color = it)) +
      geom_boxplot(na.rm = TRUE) +
      scale_color_viridis_d() +
      scale_y_continuous(
        limits = c(0, n_param),
        breaks = seq(0, n_param, 2)
      ) +
      labs(x = "iteration", y = "Distance", color = "IT.") +
      theme (legend.position = "none")
    vectorP[2] <- list(p)
  }

  # If the value in file_name is added the pdf file is created
  if (!is.null(file_name)) {
    if (type == "both") {
      pdf(paste0(file_name, ".pdf"), width = 12)
      do.call("grid.arrange", c(vectorP[1], ncol = 1))
      do.call("grid.arrange", c(vectorP[2], ncol = 1))
      dev.off()
    } else {
      ggsave(file_name, plot = p)
    }

    # If you do not add the value of file_name, the plot is displayed
  } else {
    if (type == "both") {
      do.call("grid.arrange", c(vectorP, nrow = 2))
    } else {
      p
      return(p)
    }
  }
}
pabloOnate/iraceplot documentation built on Aug. 29, 2021, 2:32 a.m.