R/aleatory_analysis_plotting.R
In spartan: Simulation Parameter Analysis R Toolkit ApplicatioN: 'spartan'

Documented in aa_graphATestsForSampleSize aa_graphSampleSizeSummary

#' Produce a plot for each sample size, showing the A-Test scores for each set of that size
#'
#' @inheritParams aa_summariseReplicateRuns
#' @param ATESTS Name of the file where the calculated A-Test scores can be found
#' @param LARGEDIFFINDICATOR The A-Test determines there is a large difference between two sets if the result is greater than 0.2 either side of the 0.5 line.  Should this not be suitable, this can be changed here
#' @param GRAPHOUTPUTNAME Name of the graph to be output for each sample size. Should be in PDF format
#' @param TIMEPOINT Timepoint for which this plot is being created
#'
#' @export
#'
#' @importFrom grDevices dev.off pdf
aa_graphATestsForSampleSize <- function(FILEPATH, ATESTS, MEASURES,
                                        LARGEDIFFINDICATOR, GRAPHOUTPUTNAME,
                                        TIMEPOINT, TIMEPOINTSCALE) {

  ATESTS <- data.frame(ATESTS, check.names = FALSE)

  # Where the resulting graph should go
  GRAPHFILE <- make_path(c(FILEPATH, GRAPHOUTPUTNAME))
  pdf(GRAPHFILE, width = 15, height = 7)
  par(xpd = NA, mar = c(4, 4, 4, 8))

  # WILL PLOT EACH MEASURE IN TURN.  BUT PLOT THE INITIAL MEASURE FIRST
  measure_label <- paste("ATest", MEASURES[1], sep = "")

  plot(ATESTS["Sample"][, 1], ATESTS[measure_label][, 1], type = "o", lty = 1,
       ylim = c(0, 1), pch = 1, xlab = "Run Subset / Parameter Value (Dummy)",
       ylab = "A Test Score", xaxt = "n", xlim = c(2, nrow(ATESTS) + 1))

  # NOW DO THE REST OF THE VALUES, IF THERE IS MORE THAN ONE MEASURE
  if (length(MEASURES) > 1) {
    for (l in 2:length(MEASURES)) {
      measure_label <- paste("ATest", MEASURES[l], sep = "")
      lines(ATESTS["Sample"][, 1], ATESTS[measure_label][, 1],
            type = "o", lty = 5, pch = l)
    }
  }

  # NOW COMPLETE GRAPH
  # DETERMINE IF THIS IS BEING DONE FOR ONE TIMEPOINT OR MANY
  if (is.null(TIMEPOINT))
    title(main = paste("A-Test Scores for ", nrow(ATESTS),
                     " Dummy Parameters where \n Sample Size = ",
                     ATESTS[1, 1], sep = ""))
  else
    title(main = paste("A-Test Scores for ", nrow(ATESTS),
                       " Dummy Parameters where \n Sample Size = ",
                       ATESTS[1, 1], " at Timepoint: ", TIMEPOINT, " ",
                       TIMEPOINTSCALE, sep = ""))

  axis(1, at = seq(2, nrow(ATESTS) + 1, by = 2))
  legend(par("usr")[2], par("usr")[4], title = "MEASURES", MEASURES,
         pch = 1:length(MEASURES), cex = 0.7, ncol = 1)
  par(xpd = FALSE)

  # ADD THE SIGNIFICANCE LINES
  # FIRSTLY DOWN THE MIDDLE
  abline(a = 0.5, b = 0, lty = 4)
  text(nrow(ATESTS) / 2, 0.52, "no difference", col = "blue")
  # NOW ADD DIFFERENCES AS DICTATED BY USER INPUT
  abline(a = (0.5 + LARGEDIFFINDICATOR), b = 0, lty = 4)
  text(nrow(ATESTS) / 2, (0.5 + LARGEDIFFINDICATOR + 0.02),
       "large difference", col = "blue")
  abline(a = (0.5 - LARGEDIFFINDICATOR), b = 0, lty = 4)
  text(nrow(ATESTS) / 2, (0.5 - LARGEDIFFINDICATOR - 0.02),
       "large difference", col = "blue")

  dev.off()
}


#' Plots a comparison of the maximum A-Test score for each sample size
#'
#' Produces a full graph of the data generated by \code{aa_sampleSize_Summary}
#' (by full, we mean the y-axis (the A-Test score) goes from 0-1, and the
#' x axis contains all sample sizes examined), making it easy to see how
#' uncertainty reduces with an increase in sample size. This graph is named
#' as stated in the parameter GRAPHOUTPUTFILE, with the timepoint appended
#' if the analysis is for multiple timepoints.
#'
#' @param FILEPATH Directory where the results of the simulation runs, in folders or in single CSV file format, can be found
#' @param MEASURES An array containing the names of the simulation output measures to be analysed.
#' @param MAXSAMPLESIZE The highest number of samples used.
#' @param SMALL The figure (>0.5) which is deemed a "small difference" between two sets being compared.  Vargha-Delaney set this value to 0.56 - but this can be altered here
#' @param MEDIUM The figure (>0.5) which is deemed a "medium difference" between two sets being compared.  Vargha-Delaney set this value to 0.66 - but this can be altered here
#' @param LARGE The figure (>0.5) which is deemed a "large difference" between two sets being compared.  Vargha-Delaney set this value to 0.73 - but this can be altered here
#' @param SAMPLESUMMARY_OBJECT The name of an R object in the environment containing the summary A-Test scores for this sample size
#' @param SAMPLESUMMARY_FILE The name of the CSV containing the summary A-Test scores for this sample size
#' @param GRAPHOUTPUTFILE Filename that should be given to the generated summary graph. This must have a PDF file extension
#' @param TIMEPOINTS Implemented so this method can be used when analysing multiple simulation timepoints. If only analysing one timepoint, this should be set to NULL. If not, this should be an array of timepoints, e.g. c(12,36,48,60)
#' @param TIMEPOINTSCALE Implemented so this method can be used when analysing multiple simulation timepoints. Sets the scale of the timepoints being analysed, e.g. "Hours"
#' @param GRAPHLABEL Used internally by the \code{getATestResults} method when producing graphs for multiple timepoints. Should not be set in function call
#'
#' @export
aa_graphSampleSizeSummary <- function(FILEPATH, MEASURES, MAXSAMPLESIZE, SMALL,
                                     MEDIUM, LARGE, GRAPHOUTPUTFILE,
                                     SAMPLESUMMARY_OBJECT = NULL,
                                     SAMPLESUMMARY_FILE = NULL,
                                     TIMEPOINTS = NULL, TIMEPOINTSCALE = NULL,
                                     GRAPHLABEL = NULL) {
  if (is.null(TIMEPOINTS)) {

    errorLog <-1

    if(!is.null(SAMPLESUMMARY_OBJECT) | !is.null(SAMPLESUMMARY_FILE)) {

      if(!is.null(SAMPLESUMMARY_FILE)) {
        # READ IN THE SUMMARY FILE
        if (file.exists(make_path(c(FILEPATH, SAMPLESUMMARY_FILE)))) {

          atest_results <- read.csv(make_path(c(FILEPATH, SAMPLESUMMARY_FILE)),
                                    header = TRUE, check.names = FALSE)
        } else {
          message("Specified A-Test Summary File does not exist")
          errorLog <- 0
        }
      } else {
        atest_results <- SAMPLESUMMARY_OBJECT
      }

      if(errorLog > 0) {

    # NOW DRAW THE GRAPH
    message("Creating Summary Graph")

    # Where the resulting graph should go
    graph_file <- make_path(c(FILEPATH, GRAPHOUTPUTFILE))
    pdf(graph_file, width = 12, height = 7)
    par(xpd = NA, mar = c(4, 4, 2, 17))
    atest_results <- as.data.frame(atest_results)

    # NOW PLOT FOR EACH MEASURE
    # THE PLOT BEGINS WITH THE FIRST MEASURE
    measure_label <- paste(MEASURES[1], "MaxA", sep = "")
    plot(atest_results$samplesize, atest_results[measure_label][, 1],
         type = "o", lty = 1, ylim = c(0.5, 1.0),
         pch = 1, xlab = "Sample Size",
         ylab = "A Test Score", xaxt = "n", yaxt = "n")

    # NOW DO ALL OTHER MEASURES, IF THERE ARE MORE THAN ONE
    if (length(MEASURES) > 1) {
      for (l in 2:length(MEASURES)) {
        measure_label <- paste(MEASURES[l], "MaxA", sep = "")
        lines(atest_results$samplesize, atest_results[measure_label][, 1],
              type = "o", lty = 5, pch = l)
      }
    }

    # COMPLETE GRAPH - TITLE DEPENDs ON WHETHER THIS IS ONE TIMEPOINT OR MANY
    if (is.null(GRAPHLABEL))
      title("Maximum A-Test Scores for each Sample Size")
    else
      title(paste("Maximum A-Test Scores for each Sample Size ",
                  "(Timepoint: ", GRAPHLABEL, ")",sep = ""))

    axis(1, at = seq(0, MAXSAMPLESIZE, by = 100))
    axis(2, at = seq(0.5, 1.0, by = 0.05))

    par(xpd = TRUE)
    legend(par("usr")[2], par("usr")[4], title = "MEASURES",
           MEASURES, pch = 1:length(MEASURES), cex = 0.7, ncol = 1)

    par(xpd = FALSE)

    # ADD THE LINES TO SHOW WHERE THE A-TEST EFFECTS ARE
    abline(h = SMALL, lty = 4)
    text(MAXSAMPLESIZE / 2, SMALL - 0.01, "SMALL effect", col = "blue")
    abline(h = LARGE, lty = 4)
    text(MAXSAMPLESIZE / 2, LARGE + 0.01, "LARGE effect", col = "blue")
    abline(h = MEDIUM, lty = 4)
    text(MAXSAMPLESIZE / 2, MEDIUM + 0.02, "MEDIUM effect", col = "blue")
    dev.off()

    message(paste("Summary Graph output to ", make_path(c(FILEPATH,
                                                        GRAPHOUTPUTFILE)),
                sep = ""))
    }
  }
  } else {
    for (n in 1:length(TIMEPOINTS)) {

      current_time <- TIMEPOINTS[n]
      message(paste("Processing Timepoint: ", current_time, sep = ""))
      summaryfilename_full <- append_time_to_argument(
        SAMPLESUMMARY_FILE, current_time,
        check_file_extension(SAMPLESUMMARY_FILE))

      graphfilename_full <- append_time_to_argument(
        GRAPHOUTPUTFILE, current_time,
        check_file_extension(GRAPHOUTPUTFILE))

      GRAPHLABEL <- paste(current_time, TIMEPOINTSCALE, sep = " ")

      aa_graphSampleSizeSummary(
        FILEPATH, MEASURES, MAXSAMPLESIZE, SMALL, MEDIUM, LARGE, graphfilename_full,
        SAMPLESUMMARY_OBJECT = NULL, SAMPLESUMMARY_FILE = summaryfilename_full,
        TIMEPOINTS = NULL, TIMEPOINTSCALE = NULL, GRAPHLABEL)
    }
  }
}