clinicaltrials: A Collection of Functions for use in CCS

Documented in crtlibn

#' Load in CCS datasets
#' 
#' Designed to have similar functionality to \%crtlibn() in SAS
#'
#' The function loads all of the datasets from where \%crtlibn() does into your global directory.
#' 
#' The function can load the analysis, rave, or stats datasets. Simply include dir = "..." to
#' the function call.
#' 
#' You can specify the prefix for the dataset to allow, for example, loading both analysis and
#' rave datasets simultaneously without overwriting any datasets.
#' 
#' Added functionality to load RadOnc Studies by using the RAO number. Formats work as well.
#' 
#' @param d The study number 
#' @param dir The datasets you want to load (i.e. analysis, rave, stats, tst, uat, etc.). The default is analysis. Other directories are case specific.
#' @param env Environment the data is located in. Current data is located in "prod", pre-PLM data located in "pre_plm"
#' @param prefix A character string specifying a prefix for the dataset names. The default is no prefix
#' @param fmts A data.frame specifying the formats specification, or a file path to such a file. Passing
#'   NULL (the default) doesn't give formats. If dir="rave" and fmts=TRUE, loads the formats for the RAVE datasets
#' @param envir An environment in which to put the datasets. By default, the calling environment (e.g., globalenv())
#' @param ... Other arguments passed to read.all()
#' @param alt If TRUE allows for entering of a different study path than default. The default is FALSE
#' @param studpath If alt=TRUE, use to specify where the SAS datasets are located
#' @return This function returns the names of the datasets invisibly
#' @examples 
#' 
#' crtlibn(d = "A041703", dir = "rave", prefix = "rave.") # loads rave data without formats
#' 
#' crtlibn(d = "A041703") # loads the analysis files
#' 
#' #crtlibn(d = 'AFT-38') # Specific permissions necessary
#' 
#' crtlibn(d = "RAO1802", dir = "rave", fmts=TRUE) # loads Rave data with formats
#' 
#' @author Sawyer Jacobson
#' @author Ethan Heinzen
#' @author Stephanie Duong
#' 
#' @export
# Edited: 1/7/2020 Added CALGB functionality
# Edited: 1/10/2020 Added the ability to access all folders, including tst, uat, etc., in the study folder 
#'

crtlibn <- function(d, dir = "analysis", env = "prod", prefix = "", fmts = FALSE, 
                    envir = parent.frame(), alt = FALSE, studpath = "", ...) {
  #AFT studies EXCLUDING PALLAS (AFT-05)...PALLAS uses SAS exclusively, so I didn't add AFT-05 paths (the study has 2 directories)
  if ( stringr::str_sub(tolower(d),1,3)=='aft'){
    path <- "/bigdata/ccsstat/aft/AFT_"
    study <- stringr::str_match( d, '[0-9][0-9]')
    studylib <- paste0(path, study,'/')
    
    # capture the datasets in a new environment
    e <- new.env()
    # assign the datasets to that environment
    rlocal::read.all(studylib, csv = FALSE, txt = FALSE, sas = TRUE, excel = FALSE, tab = FALSE, rds = FALSE, RData = FALSE, envir = e)
    # loop through the environment and make the column names lowercase
    datasets <- eapply(e, sdms::cn_tolower, USE.NAMES = TRUE)
    
  } 
  else if( stringr::str_sub(tolower(d), 1, 3)=='rao'){
    path <- "~ccsicprd/cc-sas-all/sasdata/mart/protocol/"
    studylib <- paste0(path, toupper(d),'/rave')
    
    # capture the datasets in a new environment
    e <- new.env()
    # assign the datasets to that environment
    rlocal::read.all(studylib, csv = FALSE, txt = FALSE, sas = TRUE, excel = FALSE, tab = FALSE, rds = FALSE, RData = FALSE, envir = e)
    # loop through the environment and make the column names lowercase
    datasets <- eapply(e, sdms::cn_tolower, USE.NAMES = TRUE)
    rave_fmts <- paste0(studylib, "/codes.sas7bdat")
  }
  else if( stringr::str_sub(tolower(d), 1, 5)=='calgb'){
    #protref <- haven::read_sas("~ccsicprd/cc-sas-all/sasdata/mart/xstudy/protref.sas7bdat")
    #study <- protref[protref$dc_num == toupper(as.character(d)), ]
    #protstud <- as.character(study$study)
    path <- "~ccsicprd/cc-sas-all/sasdata/mart/protocol/"
    studylib <- paste0(path, toupper(d), "/rave/")#, if(dir %in% c("stats", "rave", "dev", "DEV", "uat", "testing", "tst")) dir, "/")
    
    # capture the datasets in a new environment
    e <- new.env()
    # assign the datasets to that environment
    rlocal::read.all(studylib, csv = FALSE, txt = FALSE, sas = TRUE, excel = FALSE, tab = FALSE, rds = FALSE, RData = FALSE, envir = e)
    # loop through the environment and make the column names lowercase
    datasets <- eapply(e, sdms::cn_tolower, USE.NAMES = TRUE)
    rave_fmts <- paste0(studylib, "codes.sas7bdat")
  }
  else if(tolower(env) == "pre_plm") {
    path <- "~ccsicprd/cc-sas-all/sasdata/mart/study/"
    protref <- haven::read_sas("~ccsicprd/cc-sas-all/sasdata/mart/xstudy/protref.sas7bdat") %>%
      sdms::cn_tolower()
    study <- protref[protref$dc_num == toupper(as.character(d)), ]
    protstud <- as.character(study$study)
    
    #dir <- tolower(dir)
    #dir %in% c("stats", "rave", "dev", "DEV", "uat", "UAT", "testing", "tst"))  I don't think this is necessary since we know/can find which specific directories are there.
    #They will just be case specific. JK, just need to specify
    if(alt == TRUE) studylib = studpath else studylib = paste0(path, protstud, "/", if( dir == "analysis" ) '' else  dir, "/")
    
    # Have to leave like this because formats don't work for the analysis files right now
    if(fmts == TRUE & dir != "analysis") rave_fmts <- paste0(studylib, "/codes.sas7bdat") #else rave_fmts <- paste0(path, protstud, "/", dir, "/codes.sas7bdat")
    
    # Allow loading of alternate study path locations, Rad Onc for example
    #if(alt == TRUE){studylib = studpath}
    # capture the datasets in a new environment
    e <- new.env()
    # assign the datasets to that environment
    rlocal::read.all(studylib, csv = FALSE, txt = FALSE, sas = TRUE, excel = FALSE, tab = FALSE, rds = FALSE, RData = FALSE, ..., envir = e)
    # loop through the environment and make the column names lowercase
    datasets <- eapply(e, sdms::cn_tolower, USE.NAMES = TRUE)
    
  }
  else{
    path <- "~ccsicprd/cc-sas-all/sasdata/mart/protocol/"
    study <- toupper(d)
    if(alt == TRUE) studylib = studpath else studylib = paste0(path, study, "/Prod/", if( dir == "rave" ) '' else  dir, "/")
    
    # Have to leave like this because formats don't work for the analysis files right now
    if(fmts == TRUE & dir != "analysis") rave_fmts <- paste0(studylib, "/codes.sas7bdat") #else rave_fmts <- paste0(path, protstud, "/", dir, "/codes.sas7bdat")
    
    # Allow loading of alternate study path locations, Rad Onc for example
    #if(alt == TRUE){studylib = studpath}
    # capture the datasets in a new environment
    e <- new.env()
    # assign the datasets to that environment
    rlocal::read.all(studylib, csv = FALSE, txt = FALSE, sas = TRUE, excel = FALSE, tab = FALSE, rds = FALSE, RData = FALSE, ..., envir = e)
    # loop through the environment and make the column names lowercase
    datasets <- eapply(e, sdms::cn_tolower, USE.NAMES = TRUE)
    
  }
  
  # now apply formats
  if(fmts) # if formats are desired for rave data
  {
    if(!is.data.frame(fmts) & dir == "rave"){ fmts <- haven::read_sas(rave_fmts)
    datasets <- lapply(datasets, sdms::apply_formats, fmts = fmts)
    }
    else if(!is.data.frame(fmts)){ fmts <- haven::read_sas(fmts)
    datasets <- lapply(datasets, sdms::apply_formats, fmts = fmts)
    }
    
  }
  
  # change the names
  names(datasets) <- paste0(prefix, names(datasets))
  
  # now assign them to the "global" environment
  list2env(datasets, envir = envir)
  
  # return the character names of the datsets invisibly
  invisible(names(datasets))
}


# read.all <- function (dir, control = NULL, ..., exclude.files = character(), 
#                       envir = parent.frame()) 
# {
#   log_func("read.all")
#   control <- c(list(...), control)
#   control <- do.call("read.all.control", control[!duplicated(names(control))])
#   if (!is.character(exclude.files)) 
#     stop("'exclude.files' must be character.")
#   types <- setdiff(unlist(filetypemap()), control$skip.types)
#   if (length(types) == 0) 
#     stop("No file types selected.")
#   ptn <- paste0("\\.", gsub(".", "\\.", types, fixed = TRUE), 
#                 "$", collapse = "|")
#   filenames <- setdiff(list.files(dir, pattern = ptn), exclude.files)
#   readtype <- function(filename, dir, env) {
#     nm <- tools::file_path_sans_ext(filename)
#     file_ext2 <- function(x) {
#       pos <- regexpr("\\.([[:alnum:]]+)(\\.gz)*$", x)
#       ifelse(pos > -1L, substring(x, pos + 1L), "")
#     }
#     ext <- file_ext2(filename)
#     fullname <- file.path(dir, filename)
#     mapping <- filetypemap()
#     if (ext %in% control$skip.types) 
#       invisible(NULL)
#     dat <- if (ext %in% mapping$csv) {
#       do.call(utils::read.csv, c(control$csv, file = fullname))
#     }
#     else if (ext %in% mapping$txt && ext %nin% control$skip.types) {
#       do.call(utils::read.table, c(control$txt, file = fullname))
#     }
#     else if (ext %in% mapping$sas) {
#       do.call(haven::read_sas, c(control$sas, data_file = fullname))
#     }
#     else if (ext %in% mapping$excel) {
#       do.call(readxl::read_excel, c(control$excel, path = fullname))
#     }
#     else if (ext %in% mapping$tab) {
#       do.call(utils::read.delim, c(control$tab, file = fullname))
#     }
#     else if (ext %in% mapping$rds) {
#       do.call(base::readRDS, c(control$rds, file = fullname))
#     }
#     else if (ext %in% mapping$RData) {
#       do.call(base::load, c(control$RData, file = fullname))
#       return(NULL)
#     }
#     else {
#       stop(paste0("'", fullname, "' wasn't read."))
#     }
#     out <- assign(nm, dat, envir = env)
#     attr(out, "file_ext") <- ext
#   }
#   tryreadtype <- function(filename, dir, env) {
#     tryCatch({
#       readtype(filename, dir, env)
#     }, error = function(e) {
#       warning(paste0("'", file.path(dir, filename), "' wasn't read."))
#       print(e)
#     })
#   }
#   lapply(filenames, tryreadtype, dir = dir, env = envir)
#   invisible(filenames)
# }
# 
# 
# read.all.control <- function (csv = list(), txt = list(), sas = "haven" %in% rownames(utils::installed.packages()), 
#                               excel = "readxl" %in% rownames(utils::installed.packages()), 
#                               tab = list(), rds = list(), RData = list(envir = globalenv()), 
#                               ...) 
# {
#   check.list <- function(x) {
#     xchar <- deparse(substitute(x))
#     if (!is.list(x) && !identical(x, FALSE) && !identical(x, 
#                                                           TRUE)) 
#       stop("'", xchar, "' is not a list.")
#   }
#   check.list(csv)
#   check.list(txt)
#   check.list(sas)
#   check.list(excel)
#   check.list(tab)
#   check.list(rds)
#   check.list(RData)
#   fmls <- function(x, f) {
#     xchar <- deparse(substitute(x))
#     if (identical(x, FALSE)) 
#       return(x)
#     if (identical(x, TRUE)) 
#       return(list())
#     fchar <- deparse(substitute(f))
#     vars <- names(formals(f))
#     if (any(!(names(x) %in% vars))) {
#       warning(paste0("Options specified for filetype '", 
#                      xchar, "' but don't appear in '", fchar, "()'."))
#     }
#     x[names(x) %in% vars[-1]]
#   }
#   csv <- fmls(csv, utils::read.csv)
#   txt <- fmls(txt, utils::read.table)
#   sas <- fmls(sas, haven::read_sas)
#   excel <- fmls(excel, readxl::read_excel)
#   tab <- fmls(tab, utils::read.delim)
#   rds <- fmls(rds, base::readRDS)
#   RData <- fmls(RData, base::load)
#   out <- list(csv = csv, txt = txt, sas = sas, excel = excel, 
#               tab = tab, rds = rds, RData = RData)
#   out$skip.types <- unname(unlist(filetypemap()[vapply(out, 
#                                                        identical, logical(1), FALSE)]))
#   return(structure(out, class = "read.all.control"))
# }
# 
# cn_tolower <- function (x) 
# {
#   colnames(x) <- tolower(colnames(x))
#   x
# }
# 
# 
# apply_formats <- function (dat, fmts, nm = NULL) 
# {
#   if (is.null(nm)) 
#     nm <- deparse(substitute(dat))
#   log_func("apply_formats")
#   fmtcols <- vapply(dat, function(x) !is.null(attr(x, "format.sas")), 
#                     logical(1))
#   fmts <- cn_tolower(fmts)
#   fmts$fmtname <- tolower(fmts$fmtname)
#   fmts$start <- trimws(fmts$start)
#   fmts$start[fmts$start == "."] <- NA
#   dat[fmtcols] <- lapply(dat[fmtcols], apply_format, f = fmts, 
#                          n = nm)
#   dat
# }