R/functions.R

Defines functions get_simulation_results plot_confint plot_scatter plot_box get_statistics_summary

Documented in get_simulation_results get_statistics_summary plot_box plot_confint plot_scatter

## [![Coverage Status](http://img.shields.io/codecov/c/github/pedroliman/arena2r/master.svg)](https://codecov.io/github/pedroliman/arena2r?branch=master)

#' Get Results from Arena CSV Files
#'
#' This function reads all csv files inside the provided path and returns a data.frame with the simulation runs, consolidated.
#' You should provide a path containing only csv files generated by Arena, with the same number of replications. I Suggest you to name your csv files after your scenarios.
#'
#' @param source The path where csv files is stored, or a list coming from shiny. If you do not provide a value, I'll assume they're on your current working directory.
#' @param source_type String that describes where the data is coming from. "path" stands for a path that contains all csv files. "shinyInput" stands for the list object returned by fileInput in the ShinyApp.
#'
#' @return a tidy dataframe with simulation results.
#' @export
#' @importFrom magrittr %>%
#' @importFrom utils read.csv
#' @examples
#' # Define de path where your csv files are:
#' path <- system.file("extdata", package = "arena2r")
#' simulation_results = get_simulation_results(path)
#' head(simulation_results)

get_simulation_results = function(source, source_type = "path") {

  if (missing(source)) {
    source = getwd()
  }


  if (source_type == "path") {
    # The source files come from all csv files in the path.
    caminho_arquivos = paste(source, list.files(path = source,pattern = ".csv"), sep = "/")
    nomes_cenarios <- list.files(path = source,pattern = ".csv")
    nomes_cenarios = gsub(".csv", "", nomes_cenarios)
  } else {

    if(is.null(source)){
      return(NULL)
    }

    # The source files come from an object in shiny.
    caminho_arquivos = source$datapath
    nomes_cenarios = gsub(".csv", "", source$name)
  }


  # Reading Csv files in a for loop.
  for (i in 1:length(nomes_cenarios)) {

    dados_cenario = read.csv(file = caminho_arquivos[i])

    dados_cenario$Scenario = nomes_cenarios[i]

    if(i == 1) {
      dados_completos = dados_cenario
    } else {
      dados_completos = rbind(dados_completos, dados_cenario)
    }

  }


  # Trazendo coluna dos Cenarios para a Frente:

  dados_completos = dados_completos[,c(length(names(dados_completos)), 1:(length(names(dados_completos))-1))]

  # Empilhando os Dados para Facilitar as Coisas

  dados_empilhados = tidyr::gather(dados_completos, key = "Replication", value = "Value", 7:length(names(dados_completos)))

  dados_finais = dados_empilhados %>%
    dplyr::select(.data$Scenario, .data$Statistic.Name, .data$Replication, .data$Value) %>%
    dplyr::mutate(Scenario = sub(x = .data$Scenario, pattern = ".SummaryStats", replacement = ""),
                  Replication = as.numeric(sub(x = .data$Replication, pattern = "Rep.", replacement = ""))) %>%
    dplyr::arrange(.data$Scenario, .data$Statistic.Name, .data$Replication)

  # Definindo Cenario como um Fator:
  dados_finais$Scenario = as.factor(dados_finais$Scenario)

  names(dados_finais) = c("Scenario", "Statistic", "Replication", "Value")

  dados_finais

}


#### Plots

#' Confidence Interval Plot
#'
#' Plots the confidence interval for a response variable, across different simulated scenarios.
#'
#' @param sim_results The data.frame generated by get_simulation_results()
#' @param response_variable A character string indicating the Statistic to be plotted.
#'
#' @return a confidence interval plot using ggplot2.
#' @export
#' @importFrom magrittr %>%
#' @importFrom rlang .data
#' @examples
#' library(arena2r)
#'
#' plot_confint(arena_results, "Entity 1.WaitTime")
plot_confint = function(sim_results, response_variable) {

  sim_results = sim_results %>%
    tidyr::spread(.data$Statistic, .data$Value)

  plot_call = substitute(
    expr = ggplot2::ggplot(sim_results, ggplot2::aes(x = Scenario, y = ResponseVariable, color = Scenario)),
    env = list(ResponseVariable = as.name(response_variable))
  )

  p =  eval(plot_call)

  p + ggplot2::geom_jitter(height = 0, width = 0.1, alpha = 0.2) +
    ggplot2::stat_summary(fun.data = ggplot2::mean_cl_normal, geom = "errorbar", lwd = 1) +
    ggplot2::stat_summary(fun.y = mean,geom="point",lwd=2, group=1) +
    ggplot2::theme(legend.position="none")

}

#' Scatter Plot
#'
#' @param sim_results The data.frame generated by get_simulation_results()
#' @param x_variable The name of the Statistic to be placed on the x axis
#' @param y_variable The name of the Statistic to be placed on the y axis
#'
#' @return a scatter plot showing individual replication results
#' @export
#' @importFrom magrittr %>%
#' @importFrom rlang .data
#' @examples
#' library(arena2r)
#'
#' plot_scatter(arena_results, "Entity 1.NumberOut","Entity 1.WaitTime")
plot_scatter = function(sim_results, x_variable, y_variable) {

  wide_results = sim_results %>%
    tidyr::spread(.data$Statistic, .data$Value)

  plot_call = substitute(
    expr = ggplot2::ggplot(data = wide_results, mapping = ggplot2::aes(x = X_Variable, y = Y_Variable, color = Scenario)),
    env = list(X_Variable = as.name(x_variable), Y_Variable = as.name(y_variable))
  )

  p = eval(plot_call)

  p + ggplot2::geom_point()

}

#' Box Plot
#'
#' Plots a box plot for a response variable, across different simulated scenarios.
#'
#' @param sim_results The data.frame generated by get_simulation_results()
#' @param response_variable A character string indicating the Statistic to be plotted.
#'
#' @return a box plot using ggplot2.
#' @export
#' @importFrom rlang .data
#' @importFrom magrittr %>%
#' @examples
#' library(arena2r)
#'
#' plot_box(arena_results, "Entity 1.NumberOut")
plot_box = function(sim_results, response_variable) {

  sim_results = sim_results %>%
    tidyr::spread(.data$Statistic, .data$Value)

  plot_call = substitute(
    expr = ggplot2::ggplot(sim_results, ggplot2::aes(x = Scenario, y = ResponseVariable)),
    env = list(ResponseVariable = as.name(response_variable))
  )

  p =  eval(plot_call)

  p + ggplot2::geom_boxplot() +
    ggplot2::theme(legend.position="none")

}



#' Get Statistics Summary
#'
#' Makes a summary table to every statistic available
#'
#' @param sim_results The data.frame generated by get_simulation_results()
#' @param confidence The confidence of the CI
#'
#' @return a data.frame with a summary for every Statistic
#' @export
#' @importFrom magrittr %>%
#' @importFrom stats qt
#' @importFrom stats sd
#' @importFrom rlang .data
#' @examples
#' library(arena2r)
#'
#' statistics_summary = get_statistics_summary(arena_results)
#' head(statistics_summary)
get_statistics_summary = function(sim_results, confidence = 0.95) {

  n = length(unique(sim_results$Replication))
  t = qt(confidence+(1-confidence)/2, df = n-1)

  sim_results %>%
    dplyr::group_by(.data$Scenario, .data$Statistic) %>%
    dplyr::summarise(Mean = mean(.data$Value),
                     SD = sd(.data$Value),
                     Min = min(.data$Value),
                     Max = max(.data$Value)) %>%
    dplyr::mutate(CV = .data$SD / .data$Mean,
                  HalfWidth = .data$SD/sqrt(n)) %>%
    dplyr::mutate(LowerLimit = .data$Mean - .data$HalfWidth,
                  UpperLimit = .data$Mean + .data$HalfWidth) %>%
    dplyr::arrange(.data$Scenario, .data$Statistic)

}


#' Test Dataset with Arena Results
#'
#' A dataset containing test data from an Arena simulation model
#'
#' @format A data frame with 2280 rows and 4 variables:
#' \describe{
#'   \item{Scenario}{The Scenario Name}
#'   \item{Statistic}{The Statistic's description}
#'   \item{Replication}{The Replication Number}
#'   \item{Value}{The numeric value of the statistic within the replication and scenario}
#' }
"arena_results"

Try the arena2r package in your browser

Any scripts or data that you put into this service are public.

arena2r documentation built on May 2, 2019, 3:47 p.m.