R/ards.R
In ards: Creates Analysis Results Datasets

Documented in add_ards get_ards init_ards

# Introductory Documentation ----------------------------------------------

#' @title ards: A package for creating Analysis Results Datasets
#'
#' @description An Analysis Results Dataset (ARDS) is commonly used in the pharma-biotech industry
#' to capture the results of an analysis in a tabular data structure.  The \strong{ards}
#' package helps create the ARDS. \strong{ards} functions can be called from inside 
#' a report program or a data preparation program.  The functions use a 
#' "bucketing" approach, whereby data can be added to the ARDS in multiple 
#' calls and from multiple intermediate data sets.    
#'     
#' @details 
#' The \strong{ards} package allows you to easily create an ARDS dataset
#' in a standard clinical reporting or data preparation program.  The 
#' \strong{ards} package contains only three functions, and each will
#' be used to create the ARDS dataset.  The functions will be called in the 
#' following order, and for the described purpose:
#' \itemize{
#'   \item \code{\link{init_ards}}: Initialize an ARDS dataset
#'   \item \code{\link{add_ards}}: Add data to an ARDS dataset
#'   \item \code{\link{get_ards}}: Extract the completed ARDS
#' }
#' Click on the links above for more information about each function.
#' 
#' The following figure describes the structure of the ARDS dataset.  This
#' structure is recommended by CDISC.
#' \figure{structure.png}
#' 
#' @docType package
#' @keywords internal
#' @name ards
NULL

# Globals -----------------------------------------------------------------



ardsenv <- new.env()

ardsenv$ards <- NULL
ardsenv$template <- NULL



# Functions ---------------------------------------------------------------



#' @title Initialize the Analysis Results Dataset
#' @description A function to initialize the Analysis Results Dataset (ARDS).
#' This function will
#' first create a data template in the desired structure, and then
#' populate common values across the dataset from that template.  
#' These common values will be
#' repeated on each row of the analysis data frame for subsequent inserts
#' from the \code{\link{add_ards}} function.
#' @param studyid The study for which the analysis was performed. 
#' This parameter is optional.
#' @param tableid A table identifier to use for the results. This value 
#' identifies the table within the study.  Optional string
#' value.
#' @param adsns A vector of source dataset names.  This parameter is used to 
#' identify the input data for the analysis.  This parameter is optional.
#' @param population A description of the analysis population.  This parameter
#' is used to identify the population for analysis.  This parameter is optional.
#' @param time A description of the time frame used in the analysis.  For example, 
#' in a clinical study, the "time" value may identify the visit on which the
#' analysis is based.
#' @param where An optional description of the criteria used to subset the 
#' data for analysis.
#' @param reset If true, clears out the existing ARDS dataset and replaces with
#' an empty template.  Otherwise, just assign new parameter values to the 
#' existing template.  The default
#' value is TRUE, meaning the ARDS in memory will be cleared every time 
#' \code{init_ards}
#' is called.  If you wish to assign new initialization values, but 
#' keep appending to the existing ARDS dataset, set this parameter to FALSE.  
#' This feature is used when you are creating two different tables in the 
#' same program.
#' @return The initialized analysis dataset.
#' @family ards
#' @examples 
#' library(ards)
#' library(dplyr)
#' 
#' # Initialize the ARDS
#' # - These values will be common through the dataset
#' init_ards(studyid = "MTCARS",
#'           tableid = "01", adsns = "mtcars",
#'           population = "all cars",
#'           time = "1973")
#' 
#' # Perform analysis on MPG
#' # - Using cylinders as a by-group
#' analdf <- mtcars |> 
#'   select(cyl, mpg) |> 
#'   group_by(cyl) |> 
#'   summarize(n = n(),
#'             mean = mean(mpg),
#'             std = sd(mpg),
#'             min = min(mpg),
#'             max = max(mpg))
#' 
#' # View analysis data
#' analdf
#' #     cyl     n  mean   std   min   max
#' #   <dbl> <int> <dbl> <dbl> <dbl> <dbl>
#' # 1     4    11  26.7  4.51  21.4  33.9
#' # 2     6     7  19.7  1.45  17.8  21.4
#' # 3     8    14  15.1  2.56  10.4  19.2
#' 
#' # Add analysis data to ARDS
#' # - These values will be unique per row
#' add_ards(analdf, 
#'          statvars = c("n", "mean", "std", "min", "max"),
#'          anal_var = "mpg", trtvar = "cyl")
#' 
#' 
#' # Get the ARDS
#' ards <- get_ards() 
#' 
#' # Uncomment to view ards
#' # View(ards)
#' @export
init_ards <- function(studyid = NA,
                      tableid = NA, adsns = NA,
                      population = NA, time = NA, where = NA, reset = TRUE) {


  if (reset) {
    if (!is.null(ardsenv$ards)) {
      ardsenv$ards <- NULL
    }
  }
  
  # Collapse analysis data
  if (all(is.na(adsns) == FALSE))
    adsns <- paste(adsns, sep ="", collapse = "|")

  # Create template record
  ardsenv$template <- data.frame(studyid = studyid, resultid = 0,
                                 tableid = tableid,
                                 adsns = adsns,
                                 population = population, time = time,
                                 where = where, byvar1 = NA, byvar2 = NA,
                                 byvar3 = NA, byvar4 = NA, byvar5 = NA,
                                 byvar6 = NA, byvar7 = NA, byvar8 = NA,
                                 byvar9 = NA, byval1 = NA, byval2 = NA,
                                 byval3 = NA, byval4 = NA, byval5 = NA,
                                 byval6 = NA, byval7 = NA, byval8 = NA,
                                 byval9 = NA, trtvar = NA, trtval = NA,
                                 paramcd = NA, anal_var = NA, anal_val = NA,
                                 statname = NA, statval = NA, statdesc = NA)

  if (reset) {
    # Assign empty template to ards
    ardsenv$ards <- ardsenv$template[0, ]
  }

  return(ardsenv$template)

}



#' @title Adds data to an Analysis Results Dataset
#' @description The \code{add_ards} function dumps data from an input analysis 
#' dataset
#' to the ARDS dataset.  The function is designed to be pipe-friendly, and will
#' return the input dataset unaltered.  The parameters on the function
#' define how to extract the desired data from the analysis dataset.
#' The "statvars" parameter defines which columns contain desired
#' analysis results.  The values in these columns will be used to populate the
#' "statval" variable in the output dataset.  Other parameters are used to
#' define identifying information for the statistics values, and are optional.
#' 
#' The \code{add_ards} function should be called immediately after any
#' calculations, while the analysis results are still in numeric form.  This 
#' recommendation is to ensure that the ARDS will contain full precision of the
#' analysis values.  Once the analysis values are dumped into the ARDS, you
#' may proceed to transform and format your analysis data, without affecting
#' the values captured in the ARDS. 
#' @param data The input data to create analysis results for.  This parameter
#' is required.
#' @param statvars  A vector of column names that identify the desired results.
#' Statvar columns must be numeric.  This parameter is required.
#' @param statdesc A vector of values or a column name that identifies a description 
#' for each statvar. If passed as a vector of values, the number of values
#' should correspond to the number of 'statvar' variables.  
#' @param byvars A vector of column names to use for by variables.
#' @param trtvar A column name to use for the treatment variable.
#' @param paramcd A character string that describes the analysis parameter 
#' code or column name that contains the parameter code. If supplied as a
#' column name, the function will populate the 'paramcd' column in the ARDS
#' with the value of the 'paramcd' column.
#' @param anal_var A column name for the analysis variable or a string
#' that identifies the analysis variable.
#' @param anal_val The analysis variable value.  Can be identified by a column
#' name or a vector of values. By default, the analysis values will be taken
#' from the values of the variable passed in 'anal_var'.  This parameter 
#' exists so that you may pass in the values from a different variable, if desired.
#' @family ards
#' @return The input data frame, unaltered.
#' @examples 
#' library(ards)
#' library(dplyr)
#' 
#' # Initialize the ARDS
#' init_ards(studyid = "MTCARS",
#'           tableid = "01", adsns = "mtcars",
#'           population = "all cars",
#'           time = "1973")
#' 
#' # Perform analysis on MPG
#' # - Add to ARDS from within continuous variable pipeline
#' mpgdf <- mtcars |> 
#'   select(cyl, mpg) |> 
#'   group_by(cyl) |> 
#'   summarize(n = n(),
#'             mean = mean(mpg),
#'             std = sd(mpg),
#'             min = min(mpg),
#'             max = max(mpg)) |> 
#'   add_ards(statvars = c("n", "mean", "std", "min", "max"),
#'          anal_var = "mpg", trtvar = "cyl")
#'             
#' # Perform analysis on GEAR
#' # - Add to ARDS from within categorical variable pipeline
#' geardf <- mtcars |> 
#'   mutate(denom = n()) |> 
#'   select(cyl, gear, denom) |> 
#'   group_by(cyl, gear) |> 
#'   summarize(cnt = n(), 
#'             denom = max(denom)) |>
#'   mutate(pct = cnt / denom * 100) |> 
#'   add_ards(statvars = c("cnt", "pct", "denom"),
#'          anal_var = "gear", trtvar = "cyl")
#' 
#' # Get the ARDS
#' ards <- get_ards() 
#' 
#' # Uncomment to view ards
#' # View(ards)
#' @export
add_ards <- function(data, statvars, statdesc = NULL,
                     byvars = NULL, trtvar = NULL, paramcd = NULL,
                     anal_var = NULL, anal_val = NULL) {

  #browser()
  
  if (is.null(ardsenv$ards)) {
    
    stop("ARDS dataset is not initialized.") 
  }

  nms <- names(data)

  for (i in seq_len(length(statvars))) {

    # Start with template and turn into list
    ret <- unclass(ardsenv$template)

    # Populate statistics value
    ret$statval <- data[[statvars[[i]]]]

    # Populate statistic name from variable name
    ret$statname <- statvars[[i]]

    # Determine starting point of ResultID
    if (nrow(ardsenv$ards) == 0) {
      strt <- 1
    } else {
      strt <- max(ardsenv$ards$resultid) + 1
    }

    # Populate ResultID
    ret$resultid <- seq(strt, strt + nrow(data) - 1)

    # Populate Stat Description
    if (!is.null(statdesc)) {
      if (all(statdesc %in% nms)) {
        if (length(statdesc) == 1) {

          ret$statdesc <- data[[statdesc]]
        } else if (length(statdesc) >= i) {
          ret$statdesc <- data[[statdesc[[i]]]]
        }
      } else {
        if (length(statdesc) == 1) {
          ret$statdesc <- statdesc
        } else if (length(statdesc) >= i) {
          ret$statdesc <- statdesc[[i]]
        }
      }
    }

    # Populate By variables and values
    if (!is.null(byvars)) {
      for (j in seq_along(length(byvars))) {
        ret[[paste0("byvar", j)]] <- byvars[[j]]

        if (!is.null(data[[byvars[[j]]]]))
          ret[[paste0("byval", j)]] <- data[[byvars[[j]]]]

      }
    }

    # Populate Analysis Variable and Value
    if (!is.null(anal_var)) {
      ret[["anal_var"]] <- anal_var
      if (is.null(anal_val)) {
        if (all(anal_var %in% nms)) {

          ret[["anal_val"]] <- data[[anal_var]]
        }
      } else if (all(anal_val %in% nms)) {
        ret[["anal_val"]] <- data[[anal_val]]
      } else {

        ret[["anal_val"]] <- anal_var
      }
    }


    # Populate Treatment Variable
    if (!is.null(trtvar)) {
      if (trtvar %in% nms) {

        ret[["trtvar"]] <- trtvar
        ret[["trtval"]] <- data[[trtvar]]

      } else {

        ret[["trtval"]] <- trtvar
      }
    }
    
    # Populate PARAM Variable
    if (!is.null(paramcd)) {
      if (paramcd %in% nms) {
        
        ret[["paramcd"]] <- paramcd
        ret[["paramcd"]] <- data[[paramcd]]
        
      } else {
        
        ret[["paramcd"]] <- paramcd
      }
    }



    # Convert to data frame and recycle template values.
    ret <- data.frame(ret)

    # Append to ards
    ardsenv$ards <- rbind(ardsenv$ards, ret)

  }

  return(data)

}

#' @title Returns the current Analysis Results Dataset
#' @description The \code{get_ards} function returns the current state
#' of the Analysis Results Dataset (ARDS) as an R data frame. 
#' This data frame may be saved to disk, saved in
#' a database, or examined from code.  The function takes no parameters.
#' @return A data frame of the current analysis results.
#' @family ards
#' @examples 
#' library(ards)
#' library(dplyr)
#' 
#' # Initialize the ARDS
#' # - These values will be common through the dataset
#' init_ards(studyid = "IRIS",
#'           tableid = "01", adsns = "iris",
#'           population = "all flowers",
#'           time = "1973")
#' 
#' # Perform analysis on Petal.Length
#' # - Using Species as a by-group
#' analdf1 <- iris |> 
#'   select(Petal.Length, Species) |> 
#'   group_by(Species) |> 
#'   summarize(n = n(),
#'             mean = mean(Petal.Length),
#'             std = sd(Petal.Length),
#'             min = min(Petal.Length),
#'             max = max(Petal.Length)) |> 
#'   add_ards(statvars = c("n", "mean", "std", "min", "max"),
#'            statdesc = c("Count", "Mean", "STD", "Minimum", "Maximum"),
#'            anal_var = "Petal.Length", trtvar = "Species")
#'            
#' # Perform analysis on Petal.Width
#' # - Using Species as a by-group
#' analdf2 <- iris |> 
#'   select(Petal.Width, Species) |> 
#'   group_by(Species) |> 
#'   summarize(n = n(),
#'             mean = mean(Petal.Width),
#'             std = sd(Petal.Width),
#'             min = min(Petal.Width),
#'             max = max(Petal.Width)) |> 
#'   add_ards(statvars = c("n", "mean", "std", "min", "max"),
#'            statdesc = c("Count", "Mean", "STD", "Minimum", "Maximum"),
#'            anal_var = "Petal.Width", trtvar = "Species")
#' 
#' # Get the ARDS
#' ards <- get_ards() 
#' 
#' # Uncomment to view ards
#' # View(ards)
#' @export
get_ards <- function() {

 return(ardsenv$ards)

}