R/fctr.R

Defines functions fctr_info fctr_id_check fctr_check

#' Factor tests
#'
#' @param data data frame to check factors in
#' @param .names names of variables in `data` to consider.
#'
#' @return check functions 'return' errors and the intent is
#'   to return nothing if nothing is wrong,
#'   so hopefully nothing is returned.
#'
#' @noRd
#'
fctr_check <- function(data, .names){

 # using select_cols in case data is a data.table object.
 # I take the intersection of .names with names of data b/c
 # I don't want this function to throw an error if the user
 # specified a variable that was not in the dataframe.
 # (there is another check function dedicated to that kind of error)
 chr_index <- which(
  vapply(X = select_cols(data, intersect(.names, names(data))),
         FUN = is.character,
         FUN.VALUE = logical(1),
         USE.NAMES = FALSE)
 )

 if(is_empty(chr_index)) return(NULL)

 chrs <- .names[chr_index]

 chrs <- paste_collapse(chrs, last = ' and ')

 stop(
  "character variables in data should be converted to factors.\n",
  "Here are the character variables I detected: ", chrs, call.= FALSE
 )


}

#' Factor tests
#'
#' There is a good chance that someone using Surv(time, status) ~ .
#'   will forget that inside of the '.' sits an ID variable.
#'   Catching that and sending an informative error will likely
#'   be appreciated.
#'
#' @param data data frame to check factors in
#' @param .names names of variables in `data` to consider.
#'
#' @return check functions 'return' errors and the intent is
#'   to return nothing if nothing is wrong,
#'   so hopefully nothing is returned.
#'
#' @noRd
#'
fctr_id_check <- function(data, .names){

 for(.name in .names) {

  if(is.factor(data[[.name]])){

   if(length(levels(data[[.name]])) == nrow(data)){
    stop("factor variable ", .name, " has as many levels as there ",
         "are rows in the training data. Is ", .name, " an id variable?",
         call. = FALSE)
   }

  }

 }



}


#' Factor information
#'
#' @param data data frame to check factors in
#' @param .names names of variables in `data` to consider.
#' @param fctr_sep how to separate factor variable names from levels.
#'
#' @return a list describing factor variables in `data`.
#'
#' @noRd
#'
#' @example
#' fctr_info(pbc_orsf, .names = c('sex','stage'))

fctr_info <- function(data, .names, fctr_sep = '_'){

 fctr_check(data, .names)

 fctrs <- vector(mode = 'character')
 ordrd <- vector(mode = 'logical')

 for( .name in .names ){
  if(is.factor(data[[.name]])){
   fctrs <- c(fctrs, .name)
   ordrd <- c(ordrd, is.ordered(data[[.name]]))
  }
 }

 fctr_info <- vector(mode = 'list', length = 4L)
 names(fctr_info) <- c('cols', 'lvls', 'keys', 'ordr')

 # dont waste time if there aren't any factors
 if(is_empty(fctrs)) return(fctr_info)

 fctr_info$cols <- fctrs
 fctr_info$ordr <- ordrd
 fctr_info$lvls <- vector(mode = 'list', length = length(fctrs))
 fctr_info$keys <- vector(mode = 'list', length = length(fctrs))

 names(fctr_info$lvls) <- fctrs
 names(fctr_info$keys) <- fctrs

 for( i in seq_along(fctrs) ){

  fctr <- fctrs[i]

  lvls <- levels(data[[fctr]])
  fctr_info$lvls[[fctr]] <- lvls

  if(!fctr_info$ordr[i])
   fctr_info$keys[[fctr]] <- paste(fctr, lvls, sep = fctr_sep)

 }

 fctr_info

}

Try the aorsf package in your browser

Any scripts or data that you put into this service are public.

aorsf documentation built on Oct. 26, 2023, 5:08 p.m.