R/robustness_plotting.R

#' Takes each parameter in turn and creates a plot showing A-Test score against parameter value.
#'
#' This makes it easy to determine how the effect that changing the parameter
#' has had on simulation results. Graph for each parameter is output as a PDF
#'
#' @inheritParams oat_processParamSubsets
#' @inheritParams oat_csv_result_file_analysis
#' @param ATESTSIGLEVEL The A-Test determines if there is a large difference between two sets if the result is greater than 0.21 either side of the 0.5 line. Should this not be suitable, this can be changed here
#' @param output_types Files types of graph to produce (pdf,png,bmp etc)
#'
#' @export
#'
#' @importFrom graphics abline par plot lines axis legend plot text title
oat_graphATestsForSampleSize <- function(FILEPATH, PARAMETERS, MEASURES,
                                         ATESTSIGLEVEL, ATESTRESULTFILENAME,
                                         BASELINE, PMIN = NULL, PMAX = NULL,
                                         PINC = NULL, PARAMVALS = NULL,
                                         TIMEPOINTS = NULL,
                                         TIMEPOINTSCALE = NULL,
                                         output_types=c("pdf")) {

  if (is.null(TIMEPOINTS) || length(TIMEPOINTS) == 1) {
    # NOTE THAT OUTPUT_FOLDER AND BASELINE PARAMETERS ADDED IN SPARTAN 2.0
    # IN VERSION 2.0, WE PROCESS ONE FILE, NOT AN ATEST FILE FOR EACH PARAMETER
    # THE GRAPHS GO IN SAME DIRECTORY AS THIS FOLDER, NOT IN THE TOP LEVEL

    if (file.exists(FILEPATH)) {
      message("Creating graphs of A-Test results (oat_graphATestsForSampleSize)")

      # FIRSTLY READ IN THE ATESTS FILE (NOW ALL IN ONE FILE)
      RESULT <- read.csv(make_path(c(FILEPATH, ATESTRESULTFILENAME)),
                         sep = ",", header = TRUE, check.names = FALSE)

      for (PARAM in 1:length(PARAMETERS)) {
        message(paste("Creating graph for Parameter ", PARAMETERS[PARAM],
                    sep = ""))

        # NEED TO RECOVER THE A-TESTS FOR THIS PARAMETER FROM THE RESULTS
        EXP_PARAMS <- as.character(BASELINE)

        # GET THE LIST OF PARAMETER VALUES BEING EXPLORED FOR THIS PARAMETER
        # NOTE CONVERSION TO NUMBERS: GETS RID OF TRAILING ZEROS MADE BY SEQ
        val_list <- as.numeric(prepare_parameter_value_list(PMIN, PMAX, PINC,
                                                                  PARAMVALS,
                                                                  PARAM))

        PARAM_ATESTS <- NULL

        # NOW WE ITERATE THROUGH THE VALUES IN THIS LIST
        for (PARAMVAL in 1:length(val_list)) {
          # NOW TO RECOVER THE EXPERIMENTS RUN UNDER THIS PARAMETER SET
          ATESTS <- RESULT

          # SET THE VALUE OF THIS PARAMETER TO BE THAT WE ARE PROCESSING
          EXP_PARAMS[PARAM] <- as.character(val_list[PARAMVAL])

          for (PARAMOFINT in 1:length(PARAMETERS))
            ATESTS <- subset(ATESTS, ATESTS[[PARAMETERS[PARAMOFINT]]]
                             == as.numeric(EXP_PARAMS[PARAMOFINT]))

          # KEEP ALL THE ATESTS RESULTS TOGETHER FOR THIS PARAMETER
          PARAM_ATESTS <- rbind(PARAM_ATESTS, ATESTS)

        }

        graph_frame<-data.frame()
        for(measure in 1:length(MEASURES))
        {
          measure_set<-data.frame(rep(MEASURES[measure],nrow(PARAM_ATESTS[PARAMETERS[PARAM]])),PARAM_ATESTS[PARAMETERS[PARAM]],PARAM_ATESTS[paste0("ATest",MEASURES[measure])])
          colnames(measure_set)<-c("Measure","ParameterValue","ATestScore")
          graph_frame<-rbind(graph_frame,measure_set)
        }

        for(out in output_types)
        {
          # Now we can plot this
          ggplot2::ggplot(data=graph_frame, ggplot2::aes(x=graph_frame$ParameterValue, y=graph_frame$ATestScore, group=graph_frame$Measure)) +
            ggplot2::geom_line(ggplot2::aes(linetype=graph_frame$Measure)) + ggplot2::geom_point(ggplot2::aes(shape=graph_frame$Measure)) +
            ggplot2::theme(legend.position="bottom") + ggplot2::xlab("Parameter Value")+ggplot2::ylab("A-Test Score") + ggplot2::ylim(0,1) +
            ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 65, hjust = 1, size=ggplot2::rel(0.95))) +
            ggplot2::ggtitle(paste0("A-Test Scores when adjusting parameter\n",PARAMETERS[PARAM])) + ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5, size=ggplot2::rel(0.90))) +
            ggplot2::geom_hline(yintercept=0.5, linetype="dashed", color="black") +
            ggplot2::geom_hline(yintercept=0.5 + ATESTSIGLEVEL, linetype="dashed", color="black") +
            ggplot2::geom_hline(yintercept=0.5 - ATESTSIGLEVEL, linetype="dashed", color="black") +
            ggplot2::scale_x_continuous(breaks = seq(min(graph_frame$ParameterValue), max(graph_frame$ParameterValue), by = PINC[PARAM])) +
            ggplot2::annotate("text",x=(max(val_list) + min(val_list))/2, y=0.52, label="No Difference", color="blue", size=3) +
            ggplot2::annotate("text",x=(max(val_list) + min(val_list))/2, y=(0.5 + ATESTSIGLEVEL+ 0.02), label="Large Difference", color="blue",size=3) +
            ggplot2::annotate("text",x=(max(val_list) + min(val_list))/2, y=(0.5 - ATESTSIGLEVEL- 0.02), label="Large DIfference", color="blue", size=3)


            if(is.null(TIMEPOINTS))
            {
              ggplot2::ggsave(file.path(FILEPATH,paste0(PARAMETERS[PARAM],".",out)))
            } else {
              ggplot2::ggsave(file.path(FILEPATH,paste0(PARAMETERS[PARAM],"_",TIMEPOINTS,".",out)))
            }
        }
      }
    } else {
      message("The directory specified in FILEPATH does not exist.
            No graph created")
    }
  } else {
    # PROCESS EACH TIMEPOINT, AMENDING FILENAMES AND RECALLING THIS FUNCTION
    for (n in 1:length(TIMEPOINTS)) {
      current_time <- TIMEPOINTS[n]
      message(paste("PROCESSING TIMEPOINT: ", current_time, sep = ""))

      atest_result_filename_format <- check_file_extension(ATESTRESULTFILENAME)
      ATESTRESULTFILENAME_FULL <- paste(substr(ATESTRESULTFILENAME, 0,
                                               nchar(ATESTRESULTFILENAME) - 4),
                                        "_", current_time, ".",
                                        atest_result_filename_format, sep = "")

      oat_graphATestsForSampleSize(FILEPATH, PARAMETERS, MEASURES,
                                   ATESTSIGLEVEL, ATESTRESULTFILENAME_FULL,
                                   BASELINE, PMIN, PMAX, PINC, PARAMVALS,
                                   TIMEPOINTS = current_time, TIMEPOINTSCALE
                                   = TIMEPOINTSCALE)
    }
  }
}

#' For stochastic simulations plots the distribution of results for each parameter value
#'
#' Only applicable for stochastic simulations where the results are provided in
#' the folder structure: this takes each parameter in turn, and creates a boxplot
#' for each output measure, showing the result distribution for each value of that
#' parameter.
#'
#' @inheritParams oat_processParamSubsets
#' @inheritParams oat_csv_result_file_analysis
#' @param MEASURE_SCALE An array containing the measure used for each of the output measures (i.e. microns, microns/min).  Used to label graphs
#' @param output_types File formats in which graphs should be produced
#' @param outliers Whether outliers int he data should be removed
#'
#' @export
#'
#' @importFrom graphics boxplot
oat_plotResultDistribution <- function(FILEPATH, PARAMETERS, MEASURES,
                                       MEASURE_SCALE, CSV_FILE_NAME, BASELINE,
                                       PMIN = NULL, PMAX = NULL, PINC = NULL,
                                       PARAMVALS = NULL, TIMEPOINTS = NULL,
                                       TIMEPOINTSCALE = NULL, output_types=c("pdf"),
                                       outliers=FALSE) {

  if (is.null(TIMEPOINTS) || length(TIMEPOINTS) == 1) {

    # RoboSpartan had issues with checking the filepath existed, so for the moment this check
    # has been removed
    #if (file.exists(FILEPATH)) {
      message("Plotting result distribution for each parameter (oat_plotResultDistribution)")

      # NEW TO SPARTAN VERSION 2
      # READS SIMULATION RESPONSES FROM A CSV FILE, IN THE FORMAT: PARAMETER
      # VALUES (COLUMNS), SIMULATION OUTPUT MEASURES IN A CHANGE TO SPARTAN 1,
      # THE FIRST FUNCTION THAT PROCESSES SIMULATION RESPONSES CREATES THIS
      # FILE, NOT MEDIANS FOR EACH PARAMETER AS IT USED TO. THIS WAY WE ARE
      # NOT DEALING WITH TWO METHODS OF SIMULATION RESULT SPECIFICATION
      # READ IN THE OAT RESULT FILE
      RESULT <- read.csv(make_path(c(FILEPATH, CSV_FILE_NAME)), sep = ",",
                                   header = TRUE, check.names = FALSE)

      for (PARAM in 1:length(PARAMETERS)) {
        message(paste("Creating Output Responses Box Plot Graph for Parameter ",
                    PARAMETERS[PARAM], sep = ""))

        # THE RESULTS OF THE OAT ANALYSIS IS IN ONE PLACE. THUS WE NEED TO
        # REFER TO THE CORRECT BASELINE RESULT FOR PARAMETERS THAT ARE
        # NOT BEING CHANGED SO WE USE THE VARIABLE EXP_PARAMS WHEN WE START
        #A NEW VARIABLE - WE SET THE PARAMS TO THE BASELINE AND THEN ONLY
        # ALTER THE ONE BEING CHANGED
        EXP_PARAMS <- as.character(BASELINE)

        # NOW GET THE LIST OF PARAMETER VALUES BEING EXPLORED FOR THIS
        # PARAMETER. NOTE CONVERSION TO NUMBERS: GETS RID OF TRAILING
        # ZEROS MADE BY SEQ
        val_list <- as.numeric(prepare_parameter_value_list(PMIN, PMAX, PINC,
                                                            PARAMVALS, PARAM))

        ALLRESULTS <- NULL

        for (PARAMVAL in 1:length(val_list)) {
          EXP_PARAMS[PARAM] <- as.character(val_list[PARAMVAL])
          PARAM_RESULT <- subset_results_by_param_value_set(PARAMETERS,
                                                            RESULT, EXP_PARAMS)

          VALUE_RESULT <- cbind(PARAM_RESULT[PARAMETERS[PARAM]])

          # NOW ADD ALL MEASURES
          for (MEASURE in 1:length(MEASURES)) {
            VALUE_RESULT <- cbind(VALUE_RESULT,
                                  PARAM_RESULT[MEASURES[MEASURE]])
          }

          ALLRESULTS <- rbind(ALLRESULTS, VALUE_RESULT)
        }

        for (MEASURE in 1:length(MEASURES)) {
          # NOW DO THE BOXPLOTS FOR EACH MEASURE
          # BOXPLOT THE MEASURE
          if (is.null(TIMEPOINTS)) {

            GRAPHTITLE <- paste("Distribution of ", MEASURES[MEASURE],
                                " Responses \n when altering parameter ",
                              PARAMETERS[PARAM], sep = "")
          } else {

            GRAPHTITLE <- paste("Distribution of ", MEASURES[MEASURE],
                              " Responses \n when altering parameter ",
                              PARAMETERS[PARAM], " at Timepoint ",
                              TIMEPOINTS, " ", TIMEPOINTSCALE,
                              sep = "")
          }

          ALLRESULTS[PARAMETERS[PARAM]] <-as.factor(as.matrix(ALLRESULTS[PARAMETERS[PARAM]]))
          if(!outliers)
          {
            outlier_flag=NA
          } else {
            outlier_flag = 1
          }

          ggplot2::ggplot(ALLRESULTS, ggplot2::aes(x=stats::reorder(ALLRESULTS[,1],sort(as.numeric(ALLRESULTS[,1]))), y=ALLRESULTS[, MEASURE+1])) +
            ggplot2::geom_boxplot(notch=TRUE, outlier.shape = outlier_flag) + ggplot2::labs(title=GRAPHTITLE,x="Parameter Value",
                                  y = paste0(MEASURES[MEASURE], " (",MEASURE_SCALE[PARAM], ")")) +
            ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5)) +
            ggplot2::stat_summary(fun.y=median, geom="point", shape=23, size=2)

          for(output in output_types)
          {
            ggplot2::ggsave(file.path(FILEPATH,paste0("BP_",PARAMETERS[PARAM],"_",MEASURES[MEASURE],TIMEPOINTS,".",output)))
            message(paste0("Box Plots Generated and output as ",
                          file.path(FILEPATH,paste0("BP_",PARAMETERS[PARAM],"_",MEASURES[MEASURE],TIMEPOINTS,".",output))))
          }
        }
      }
    #} else {
    #  message("The directory specified in FILEPATH does not exist.
    #        No graph created")
    #}
  } else {
    # PROCESS EACH TIMEPOINT, AMENDING FILENAMES AND RECALLING THIS FUNCTION
    for (n in 1:length(TIMEPOINTS)) {
      current_time <- TIMEPOINTS[n]
      message(paste("Processing Timepoint: ", current_time, sep = ""))

      csv_file_name_format <- check_file_extension(CSV_FILE_NAME)
      CSV_FILE_NAME_FULL <- paste(substr(CSV_FILE_NAME, 0,
                                         nchar(CSV_FILE_NAME) - 4),
                                  "_", current_time, ".",
                                  csv_file_name_format, sep = "")

      oat_plotResultDistribution(FILEPATH, PARAMETERS, MEASURES, MEASURE_SCALE,
                                 CSV_FILE_NAME_FULL, BASELINE, PMIN, PMAX,
                                 PINC, PARAMVALS, TIMEPOINTS = current_time,
                                 TIMEPOINTSCALE = TIMEPOINTSCALE)
    }
  }
}
kalden/spartan documentation built on May 31, 2019, 11:52 p.m.