R/SurvTypes.R
In Colossus: "Risk Model Regression and Analysis with Complex Non-Linear Models"

Documented in ColossusCoxSurv ColossusLogitSurv ColossusPoisSurv get_form get_form_joint

#' Interprets a Poisson joint formula and makes necessary changes to data
#'
#' \code{get_form_joint} uses two event formula, a shared formula,
#' and data.table, to fully describe the model for a joint Poisson model.
#'
#' @inheritParams R_template
#'
#' @return returns a class fully describing the model and the updated data
#' @family Formula Interpretation
#' @export
get_form_joint <- function(formula_list, df, nthreads = as.numeric(detectCores()) / 2) {
  # ------------------------------------------------------------------------------ #
  # Make data.table use the set number of threads too
  if ((identical(Sys.getenv("TESTTHAT"), "true")) || (identical(Sys.getenv("TESTTHAT_IS_CHECKING"), "true"))) {
    nthreads <- min(c(2, nthreads))
  }
  thread_0 <- setDTthreads(nthreads) # save the old number and set the new number
  # ------------------------------------------------------------------------------ #
  if (class(df)[[1]] != "data.table") {
    tryCatch(
      {
        setDT(df)
      },
      error = function(e) {
        df <- data.table(df)
      }
    )
  }
  if (is.list(formula_list)) {
    if ("shared" %in% names(formula_list)) {
      formula_shared <- formula_list$shared
      formula_list <- formula_list[names(formula_list) != "shared"]
    } else {
      formula_shared <- .~.
    }
  } else if (is(formula_list, "formula")) {
    formula_list <- list(formula_list)
    formula_shared <- .~.
  } else {
    stop("Error: Joint formula list wasn't a list or a formula")
  }
  model_list <- list()
  for (formula_i in seq_along(formula_list)) {
    #
    formula <- formula_list[[formula_i]]
    surv_obj <- Reduce(paste, deparse(formula[[2]]))
    model_obj <- paste(Reduce(paste, deparse(formula[[3]])), collapse = " ")
    surv_obj <- gsub(" ", "", surv_obj)
    model_obj <- gsub(" ", "", model_obj)
    res <- get_form_list(surv_obj, model_obj, df)
    model <- res$model
    df <- res$data
    if (!grepl("pois", model$surv_model_type)) {
      stop("Error: Atleast one model was not a poisson model")
    }
    model_temp <- list(model)
    model_list <- c(model_list, model_temp)
  }
  #
  model_obj <- paste(Reduce(paste, deparse(formula_shared[[3]])), collapse = " ")
  if (model_obj != ".") {
    res <- get_form_risk(model_obj, df)
    model_share <- res$model
    df <- res$data
  } else {
    model_share <- list(tform = c(), term_n = c(), names = c(), modelform = "M", gmix_theta = 0, gmix_term = c())
  }
  #
  # Now we need to check that the values are similar
  model_1 <- model_list[[1]]
  for (model_i in 2:length(model_list)) {
    model_2 <- model_list[[model_i]]
    # The pyr should all be the same
    if (model_1$pyr != model_2$pyr) {
      stop(paste("Error: The joint models need to use the same person-year column. Instead they use ", model_1$pyr, " and ", model_2$pyr, ".", sep = ""))
    }
    # The strata should match
    if (any(model_1$strata != model_2$strata)) {
      stop(paste("Error: The joint models need to use the same stratification.", sep = ""))
    }
    # The modelform should match
    if (model_1$modelform != model_2$modelform) {
      stop(paste("Error: The joint models need to use the same modelform. Instead they use ", model_1$modelform, " and ", model_2$modelform, ".", sep = ""))
    }
    if (model_1$gmix_theta != model_2$gmix_theta) {
      stop(paste("Error: The joint models need to use the same geometric mixture theta value. Instead they use ", model_1$gmix_theta, " and ", model_2$gmix_theta, ".", sep = ""))
    }
  }
  if (length(model_share$tform) != 0) {
    if (model_1$modelform != model_share$modelform) {
      stop(paste("Error: The joint models and the shared model need to use the same modelform. Instead they use ", model_1$modelform, " and ", model_share$modelform, ".", sep = ""))
    }
    if (model_1$gmix_theta != model_share$gmix_theta) {
      stop(paste("Error: The joint models and the shared model need to use the same geometric mixture theta value. Instead they use ", model_1$gmix_theta, " and ", model_share$gmix_theta, ".", sep = ""))
    }
  } else {
    model_share$modelform <- model_1$modelform
    model_share$gmix_theta <- model_1$gmix_theta
  }
  # Now we pull everything out and get the joint model built
  # Pull it all out
  events <- c()
  name_list <- list(
    "shared" = model_share$names
  )
  term_n_list <- list(
    "shared" = model_share$term_n
  )
  tform_list <- list(
    "shared" = model_share$tform
  )
  for (model_i in seq_along(model_list)) {
    model <- model_list[[model_i]]
    event_temp <- model$event
    name_temp <- list()
    termn_temp <- list()
    tform_temp <- list()
    name_temp[[event_temp]] <- model$names
    termn_temp[[event_temp]] <- model$term_n
    tform_temp[[event_temp]] <- model$tform
    #
    events <- c(events, event_temp)
    term_n_list <- c(term_n_list, termn_temp)
    name_list <- c(name_list, name_temp)
    tform_list <- c(tform_list, tform_temp)
  }
  keep_constant_list <- list()
  a_n_list <- list()
  # Combine it
  val <- Joint_Multiple_Events(
    df, events, name_list,
    term_n_list, tform_list,
    keep_constant_list, a_n_list
  )
  df <- val$df
  names <- val$names
  term_n <- val$term_n
  tform <- val$tform
  keep_constant <- val$keep_constant
  a_n <- val$a_n
  #
  pyr <- model_1$pyr
  strata <- model_1$strata
  modelform <- model_1$modelform
  #
  gmix_term <- model_1$gmix_term
  gmix_theta <- model_1$gmix_theta
  if (modelform == "GMIX") {
    # We need to combine the results
    gmix_term_1 <- model_1$gmix_term
    gmix_term_2 <- model_2$gmix_term
    gmix_term_s <- model_share$gmix_term
    #
    len_1 <- length(gmix_term_1)
    len_2 <- length(gmix_term_2)
    len_s <- length(gmix_term_s)
    if (len_1 < len_2) {
      if (len_s < len_1) {
        # 2 is longest
        if (any(gmix_term_s != gmix_term_2[1:len_s])) {
          stop("Error: Second model and shared model have different geometric mixture term values.")
        }
        if (any(gmix_term_1 != gmix_term_2[1:len_1])) {
          stop("Error: Second model and first model have different geometric mixture term values.")
        }
        gmix_term <- gmix_term_2
      }
    }
  }
  #
  null <- FALSE
  model <- poismodel(pyr, "events", strata, null, term_n, tform, names, modelform, gmix_term, gmix_theta, a_n, keep_constant, df)
  # Revert data.table core change
  thread_1 <- setDTthreads(thread_0) # revert the old number
  # ------------------------------------------------------------------------------ #
  list(
    "model" = model, "data" = df
  )
}


#' Interprets a Colossus formula and makes necessary changes to data
#'
#' \code{get_form} uses a formula and data.table, to fully describe the model
#' for a Colossus regression function.
#'
#' @inheritParams R_template
#'
#' @return returns a class fully describing the model and the updated data
#' @family Formula Interpretation
#' @export
#' @examples
#' library(data.table)
#' ## basic example code reproduced from the starting-description vignette
#' df <- data.table::data.table(
#'   "UserID" = c(112, 114, 213, 214, 115, 116, 117),
#'   "Starting_Age" = c(18, 20, 18, 19, 21, 20, 18),
#'   "Ending_Age" = c(30, 45, 57, 47, 36, 60, 55),
#'   "Cancer_Status" = c(0, 0, 1, 0, 1, 0, 0),
#'   "a" = c(0, 1, 1, 0, 1, 0, 1),
#'   "b" = c(1, 1.1, 2.1, 2, 0.1, 1, 0.2),
#'   "c" = c(10, 11, 10, 11, 12, 9, 11),
#'   "d" = c(0, 0, 0, 1, 1, 1, 1),
#'   "e" = c(0, 0, 1, 0, 0, 0, 1)
#' )
#' formula <- Cox(Starting_Age, Ending_Age, Cancer_Status) ~
#'   loglinear(a, b, c, 0) + plinear(d, 0) + multiplicative()
#' model <- get_form(formula, df, 1)
get_form <- function(formula, df, nthreads = as.numeric(detectCores()) / 2) {
  # ------------------------------------------------------------------------------ #
  # Make data.table use the set number of threads too
  if ((identical(Sys.getenv("TESTTHAT"), "true")) || (identical(Sys.getenv("TESTTHAT_IS_CHECKING"), "true"))) {
    nthreads <- min(c(2, nthreads))
  }
  thread_0 <- setDTthreads(nthreads) # save the old number and set the new number
  # ------------------------------------------------------------------------------ #
  if (length(lapply(strsplit(Reduce(paste, deparse(formula)), ""), function(x) which(x == "~"))[[1]]) != 1) {
    stop("Error: The formula contained multiple '~', invalid formula")
  }
  surv_obj <- Reduce(paste, deparse(formula[[2]]))
  model_obj <- paste(Reduce(paste, deparse(formula[[3]])), collapse = " ")
  surv_obj <- gsub(" ", "", surv_obj)
  model_obj <- gsub(" ", "", model_obj)
  res <- get_form_list(surv_obj, model_obj, df)
  model <- res$model
  df <- res$data
  #
  surv_model_type <- model$surv_model_type
  tstart <- model$tstart
  tend <- model$tend
  pyr <- model$pyr
  trials <- model$trials
  event <- model$event
  strata <- model$strata
  weight <- model$weight
  #
  term_n <- model$term_n
  tform <- model$tform
  names <- model$names
  modelform <- model$modelform
  gmix_term <- model$gmix_term
  gmix_theta <- model$gmix_theta
  null <- model$null
  #
  expres_calls <- model$expres_calls
  #
  if (null) {
    names <- c("CONST")
    term_n <- c(0)
    tform <- c("loglin")
    modelform <- "M"
  }
  #
  if (grepl("cox", surv_model_type)) {
    model <- coxmodel(tstart, tend, event, strata, weight, null, term_n, tform, names, modelform, gmix_term, gmix_theta, c(), c(), df, expres_calls)
  } else if (grepl("finegray", surv_model_type)) {
    model <- coxmodel(tstart, tend, event, strata, weight, null, term_n, tform, names, modelform, gmix_term, gmix_theta, c(), c(), df, expres_calls)
  } else if (grepl("pois", surv_model_type)) {
    model <- poismodel(pyr, event, strata, null, term_n, tform, names, modelform, gmix_term, gmix_theta, c(), c(), df, expres_calls)
    if (all(strata != "NONE")) {
      if (!all(strata %in% names(df))) {
        stop("Error: One of the strata was not in the original data.")
      }
      #   Check_Strata_Model(term_n, tform, modelform, gmix_term, gmix_theta)
    } # verifies that a stratified model can be used
  } else if ((grepl("casecon", surv_model_type)) || (grepl("case_con", surv_model_type))) {
    model <- caseconmodel(tstart, tend, event, strata, null, term_n, tform, names, modelform, gmix_term, gmix_theta, c(), c(), df, expres_calls)
  } else if ((grepl("logit", surv_model_type)) || (grepl("logistic", surv_model_type))) {
    model <- logitmodel(trials, event, strata, term_n, tform, names, modelform, gmix_term, gmix_theta, c(), c(), df, expres_calls)
  } else {
    stop("Error: Bad survival model type passed")
  }
  # Revert data.table core change
  thread_1 <- setDTthreads(thread_0) # revert the old number
  # ------------------------------------------------------------------------------ #
  list(
    "model" = model, "data" = df
  )
}

#' Interprets a Colossus formula and makes necessary changes to data, returns list not class
#'
#' \code{get_form_list} uses a formula and data.table, to fully describe the model
#' for a Colossus regression function and returns a list
#'
#' @param surv_obj output from get_form_surv, list of survival values
#' @param model_obj output from get_form_risk, list of risk factor model values
#' @inheritParams R_template
#'
#' @noRd
#' @return returns the list of model values
#' @family Formula Interpretation
get_form_list <- function(surv_obj, model_obj, df) {
  if (class(df)[[1]] != "data.table") {
    tryCatch(
      {
        setDT(df)
      },
      error = function(e) {
        df <- data.table(df)
      }
    )
  }
  surv_obj <- gsub(" ", "", surv_obj)
  model_obj <- gsub(" ", "", model_obj)
  #
  surv_list <- get_form_surv(surv_obj, df)
  df <- surv_list$data
  surv_vals <- surv_list$surv
  surv_model_type <- surv_vals$surv_model_type
  tstart <- surv_vals$tstart
  tend <- surv_vals$tend
  pyr <- surv_vals$pyr
  event <- surv_vals$event
  strata <- surv_vals$strata
  weight <- surv_vals$weight
  trials <- surv_vals$trials
  #
  model_vals <- get_form_risk(model_obj, df)
  model <- model_vals$model
  term_n <- model$term_n
  tform <- model$tform
  names <- model$names
  modelform <- model$modelform
  gmix_term <- model$gmix_term
  gmix_theta <- model$gmix_theta
  null <- model$null
  df <- model_vals$data
  expres_calls <- model$expres_calls
  #
  list(
    "model" = list(
      surv_model_type = surv_model_type,
      tstart = tstart,
      tend = tend,
      pyr = pyr,
      trials = trials,
      event = event,
      strata = strata,
      weight = weight,
      term_n = term_n,
      tform = tform,
      names = names,
      modelform = modelform,
      gmix_term = gmix_term,
      gmix_theta = gmix_theta,
      null = null,
      expres_calls = expres_calls
    ),
    "data" = df
  )
}

#' Interprets the survival side of a formula
#'
#' \code{get_form_surv} interprets the LHS of a formula
#' @param surv_obj output from get_form_surv, list of survival values
#' @inheritParams R_template
#' @noRd
#' @return returns the left hand side of the formula reading
#' @family Formula Interpretation
get_form_surv <- function(surv_obj, df) {
  surv_obj <- gsub(" ", "", surv_obj)
  surv_obj <- gsub("\"", "", surv_obj) # remove literal strings if needed
  surv_model_type <- "NONE"
  tstart <- "NONE"
  tend <- "NONE"
  pyr <- "NONE"
  event <- "NONE"
  strata <- "NONE"
  weight <- "NONE"
  trials <- "NONE"
  #
  # split the survival model type
  second_split <- lapply(strsplit(surv_obj, ""), function(x) which(x == "("))[[1]]
  if (length(second_split) == 0) {
    stop("Error: The left hand side did not contain (")
  }
  surv_type <- tolower(substr(surv_obj, 1, second_split - 1))
  surv_paras <- substr(surv_obj, second_split + 1, nchar(surv_obj) - 1)
  surv_paras <- nested_split(surv_paras)
  # assign survival model values
  surv_model_type <- surv_type
  surv_para_list <- list()
  for (i in seq_along(surv_paras)) {
    para_cur <- surv_paras[i]
    para_break <- lapply(strsplit(para_cur, ""), function(x) which(x == "="))[[1]]
    if (length(para_break) == 0) {
      # no name, just add to list
      surv_para_list[[i]] <- para_cur
    } else {
      item_name <- substr(para_cur, 1, para_break - 1)
      item_value <- substr(para_cur, para_break + 1, nchar(para_cur))
      surv_para_list[[item_name]] <- item_value
    }
  }
  if (surv_type %in% c("cox", "coxph")) {
    res <- do.call(ColossusCoxSurv, surv_para_list)
    tstart <- res$tstart
    tend <- res$tend
    event <- res$event
  } else if (surv_type %in% c("cox_strata", "coxph_strata")) {
    res <- do.call(ColossusCoxStrataSurv, surv_para_list)
    tstart <- res$tstart
    tend <- res$tend
    event <- res$event
    strata <- res$strata
  } else if (surv_type %in% c("finegray", "fine_gray", "fg")) {
    res <- do.call(ColossusFineGraySurv, surv_para_list)
    tstart <- res$tstart
    tend <- res$tend
    event <- res$event
    weight <- res$weight
  } else if (surv_type %in% c("finegray_strata", "fine_gray_strata", "fg_strata")) {
    res <- do.call(ColossusFineGrayStrataSurv, surv_para_list)
    tstart <- res$tstart
    tend <- res$tend
    event <- res$event
    strata <- res$strata
    weight <- res$weight
  } else if (surv_type %in% c("poisson", "pois")) {
    res <- do.call(ColossusPoisSurv, surv_para_list)
    pyr <- res$pyr
    event <- res$event
    if (any(res$strata != "NULL")) {
      stop("Error: Too many columns passed to non-stratified Poisson model")
    }
  } else if (surv_type %in% c("poisson_strata", "pois_strata")) {
    res <- do.call(ColossusPoisSurv, surv_para_list)
    pyr <- res$pyr
    event <- res$event
    strata <- res$strata
    if (length(res$strata) == 1) {
      if (res$strata == "NULL") {
        stop("Error: Too few columns passed to non-stratified Poisson model")
      }
    }
  } else if (surv_type %in% c("casecon", "casecontrol", "case_control")) {
    res <- do.call(ColossusCaseConSurv, surv_para_list)
    event <- res$event
  } else if (surv_type %in% c("casecon_time", "casecontrol_time", "case_control_time")) {
    res <- do.call(ColossusCaseConTimeSurv, surv_para_list)
    tstart <- res$tstart
    tend <- res$tend
    event <- res$event
  } else if (surv_type %in% c("casecon_strata", "casecontrol_strata", "case_control_strata")) {
    res <- do.call(ColossusCaseConStrataSurv, surv_para_list)
    event <- res$event
    strata <- res$strata
  } else if (surv_type %in% c("casecon_strata_time", "casecontrol_strata_time", "case_control_strata_time", "casecon_time_strata", "casecontrol_time_strata", "case_control_time_strata")) {
    res <- do.call(ColossusCaseConTimeStrataSurv, surv_para_list)
    tstart <- res$tstart
    tend <- res$tend
    event <- res$event
    strata <- res$strata
  } else if (surv_type %in% c("logit", "logistic")) {
    res <- do.call(ColossusLogitSurv, surv_para_list)
    trials <- res$trials
    event <- res$event
    if ("CONST" == trials) {
      if ("CONST" %in% names(df)) {
        # fine
      } else {
        df$CONST <- 1
      }
    }
  } else {
    stop("Error: Invalid survival model type")
  }
  #
  if (tstart == "%trunc%") {
    if (tend == "%trunc%") {
      stop("Error: Both endpoints are truncated, not acceptable")
    }
    tmin <- min(df[[tend]]) - 1
    if (!("right_trunc" %in% names(df))) {
      df[, ":="(right_trunc = tmin)]
    }
    tstart <- "right_trunc"
  } else if (tend == "%trunc%") {
    tmax <- max(df[[tstart]]) + 1
    if (!("left_trunc" %in% names(df))) {
      df[, ":="(left_trunc = tmax)]
    }
    tend <- "left_trunc"
  }
  #
  list(
    "surv" = list(
      surv_model_type = surv_model_type,
      tstart = tstart,
      tend = tend,
      trials = trials,
      pyr = pyr,
      event = event,
      strata = strata,
      weight = weight
    ),
    "data" = df
  )
}

#' Interprets the risk factor side of a formula
#'
#' \code{get_form_risk} interprets the RHS of a formula
#' @param model_obj output from get_form_risk, list of risk factor model values
#' @inheritParams R_template
#' @noRd
#' @return returns the right hand side of a formula reading
#' @family Formula Interpretation
get_form_risk <- function(model_obj, df) {
  #
  model_obj <- gsub(" ", "", model_obj)
  term_n <- c()
  tform <- c()
  names <- c()
  gmix_theta <- 0
  gmix_term <- c()
  modelform <- "NONE"
  null <- FALSE
  expres_calls <- list() # storing any variables transformed, ie factor, ns, bs, etc. to be recalled when the data is reloaded
  #
  right_model_terms <- strsplit(model_obj, "\\+")[[1]]
  for (term_i in seq_along(right_model_terms)) {
    # seperate the term type or model-formula from parameters
    third_split <- lapply(strsplit(right_model_terms[term_i], ""), function(x) which(x == "("))[[1]]
    if (length(third_split) == 0) {
      stop(paste('Error: right hand side element "', right_model_terms[term_i], '" did not contain (', sep = ""))
    }
    model_type <- tolower(substr(right_model_terms[term_i], 1, third_split - 1))
    model_type <- gsub("_", "-", model_type)
    tform_acceptable <- c(
      "plin", "lin", "loglin", "loglin-dose", "lin-dose",
      "lin-quad-dose", "lin-exp-dose", "plinear", "product-linear", "linear",
      "exponential", "exp",
      "loglinear", "log-linear", "loglinear-dose", "log-linear-dose", "linear-dose", "linear-piecewise",
      "quadratic", "quad", "quad-dose", "quadratic-dose",
      "step-dose", "step-piecewise",
      "linear-quadratic-dose", "linear-quadratic-piecewise",
      "linear-exponential-dose", "linear-exponential-piecewise"
    )
    modelform_acceptable <- c(
      "multiplicative", "multiplicative-excess", "additive", "product-additive",
      "product-additive-excess", "a", "pa", "pae", "m", "me",
      "gmix", "geometric-mixture", "gmix-r", "relative-geometric-mixture",
      "gmix-e", "excess-geometric-mixture"
    )
    if (model_type == "null") {
      null <- TRUE
    } else if (model_type %in% tform_acceptable) {
      model_paras <- substr(right_model_terms[term_i], third_split + 1, nchar(right_model_terms[term_i]) - 1)
      model_paras <- nested_split(model_paras)
      last_entry <- model_paras[length(model_paras)]
      if (is.na(suppressWarnings(as.integer(last_entry)))) {
        # the last element isn't an integer
        term_num <- 0
      } else {
        model_paras <- model_paras[1:(length(model_paras) - 1)]
        term_num <- as.integer(last_entry)
      }
      for (subterm_i in seq_along(model_paras)) {
        # add element
        # check if the element is a function
        if (grepl("\\(", model_paras[subterm_i])) {
          # Some function is being used
          if (substr(model_paras[subterm_i], 1, 7) == "factor(") {
            # baseline is set by using factor(column;baseline=level)
            factor_args <- substr(model_paras[subterm_i], 8, nchar(model_paras[subterm_i]) - 1)
            factor_args <- nested_split(factor_args)
            ##
            factor_arg_list <- list()
            for (i in seq_along(factor_args)) {
              para_cur <- factor_args[i]
              para_break <- lapply(strsplit(para_cur, ""), function(x) which(x == "="))[[1]]
              if (length(para_break) == 0) {
                # no name, just add to list
                factor_arg_list[[i]] <- para_cur
              } else {
                item_name <- substr(para_cur, 1, para_break - 1)
                item_value <- substr(para_cur, para_break + 1, nchar(para_cur))
                factor_arg_list[[item_name]] <- parse_literal_string(item_value)
              }
            }
            repeat_list <- c(list("_exp_type" = "factor"), copy(factor_arg_list)) # The arguements needed to repeat the processing
            # Either the item is named x, or is it the first
            if ("x" %in% names(factor_arg_list)) {
              factor_col <- factor_arg_list$x
              factor_arg_list[["x"]] <- copy(df[[factor_arg_list$x]])
            } else {
              factor_col <- factor_arg_list[[1]]
              names(factor_arg_list)[[1]] <- "x"
              factor_arg_list[["x"]] <- copy(df[[factor_arg_list$x]])
            }
            if (!(factor_col %in% names(df))) {
              stop(paste("Error: Column: ", factor_col, " not in data", sep = ""))
            }
            xtemp <- do.call(factor, factor_arg_list)
            if (!("levels" %in% names(repeat_list))) { # using the levels will recreate the same factoring
              repeat_list[["levels"]] <- levels(xtemp)
            }
            df[[factor_col]] <- xtemp
            val <- factorize(df, factor_col)
            df <- val$df
            col_name <- val$cols
            level_ref <- paste(factor_col, levels(xtemp)[1], sep = "_")
            col_name <- col_name[col_name != level_ref]
            expres_calls[[length(expres_calls) + 1]] <- repeat_list
          } else if (substr(model_paras[subterm_i], 1, 2) == "I(") {
            factor_args <- substr(model_paras[subterm_i], 3, nchar(model_paras[subterm_i]) - 1)
            repeat_list <- c(list("_exp_type" = "power"), list(factor_args))
            vals <- strsplit(factor_args, "\\^")[[1]]
            if (length(vals) != 2) {
              stop("Error: I() currently only available for I(var^n)")
            }
            col <- vals[1]
            raised <- vals[2]
            if (!(col %in% names(df))) {
              stop(paste("Error: Column: ", col, " not in data", sep = ""))
            }
            options(warn = -1)
            if (all(vapply(raised, function(x) grepl("^[\\-]{0,1}[0-9]*\\.{0,1}[0-9]*$", x), logical(1))) || all(vapply(raised, function(x) grepl("^[\\-]{0,1}[0-9]+e[\\-]{0,1}[0-9]+$", x), logical(1)))) {
              options(warn = 0) # checks for an integer, decimal, decimal places or scientific notation
              raised <- as.numeric(raised)
            } else {
              stop("Error: Column was not raised to a numeric power")
            }
            col_name <- factor_args
            df[[col_name]] <- df[[col]]^raised
            expres_calls[[length(expres_calls) + 1]] <- repeat_list
          } else if (substr(model_paras[subterm_i], 1, 3) == "ns(") {
            # natural cubic spline
            factor_args <- substr(model_paras[subterm_i], 4, nchar(model_paras[subterm_i]) - 1)
            factor_args <- nested_split(factor_args)
            ##
            factor_arg_list <- list()
            for (i in seq_along(factor_args)) {
              para_cur <- factor_args[i]
              para_break <- lapply(strsplit(para_cur, ""), function(x) which(x == "="))[[1]]
              if (length(para_break) == 0) {
                # no name, just add to list
                factor_arg_list[[i]] <- para_cur
              } else {
                item_name <- substr(para_cur, 1, para_break - 1)
                item_value <- substr(para_cur, para_break + 1, nchar(para_cur))
                factor_arg_list[[item_name]] <- parse_literal_string(item_value)
              }
            }
            repeat_list <- c(list("_exp_type" = "ns"), copy(factor_arg_list)) # The arguements needed to repeat the processing
            # Either the item is named x, or is it the first
            if ("x" %in% names(factor_arg_list)) {
              factor_col <- factor_arg_list$x
              factor_arg_list[["x"]] <- copy(df[[factor_arg_list$x]])
            } else {
              factor_col <- factor_arg_list[[1]]
              names(factor_arg_list)[[1]] <- "x"
              factor_arg_list[["x"]] <- copy(df[[factor_arg_list$x]])
            }
            if (system.file(package = "splines") == "") {
              stop("Error: Attempted to use ns(), but splines not detected on system.")
            }
            if (!(factor_col %in% names(df))) {
              stop(paste("Error: Column: ", factor_col, " not in data", sep = ""))
            }
            xtemp <- do.call(splines::ns, factor_arg_list)
            #
            ns_att <- attributes(xtemp)
            # Applying the same knots, boundary, and intercept gives the same transformation
            if (!("knots" %in% names(repeat_list))) {
              repeat_list[["knots"]] <- ns_att$knots
            }
            if (!("Boundary.knots" %in% names(repeat_list))) {
              repeat_list[["Boundary.knots"]] <- ns_att$Boundary.knots
            }
            if (!("intercept" %in% names(repeat_list))) {
              repeat_list[["intercept"]] <- ns_att$intercept
            }
            #
            col_name <- c()
            for (i in seq_len(ncol(xtemp))) {
              x_col <- paste(factor_col, "_ns", i, sep = "")
              if (!(x_col %in% names(df))) {
                df[[x_col]] <- xtemp[, i]
              }
              col_name <- c(col_name, x_col)
            }
            expres_calls[[length(expres_calls) + 1]] <- repeat_list
            ##
          } else if (substr(model_paras[subterm_i], 1, 3) == "bs(") {
            # b-spline for polynomial spline
            factor_args <- substr(model_paras[subterm_i], 4, nchar(model_paras[subterm_i]) - 1)
            factor_args <- nested_split(factor_args)
            ##
            factor_arg_list <- list()
            for (i in seq_along(factor_args)) {
              para_cur <- factor_args[i]
              para_break <- lapply(strsplit(para_cur, ""), function(x) which(x == "="))[[1]]
              if (length(para_break) == 0) {
                # no name, just add to list
                factor_arg_list[[i]] <- para_cur
              } else {
                item_name <- substr(para_cur, 1, para_break - 1)
                item_value <- substr(para_cur, para_break + 1, nchar(para_cur))
                factor_arg_list[[item_name]] <- parse_literal_string(item_value)
              }
            }
            repeat_list <- c(list("_exp_type" = "bs"), copy(factor_arg_list)) # The arguements needed to repeat the processing
            # Either the item is named x, or is it the first
            if ("x" %in% names(factor_arg_list)) {
              factor_col <- factor_arg_list$x
              factor_arg_list[["x"]] <- copy(df[[factor_arg_list$x]])
            } else {
              factor_col <- factor_arg_list[[1]]
              names(factor_arg_list)[[1]] <- "x"
              factor_arg_list[["x"]] <- copy(df[[factor_arg_list$x]])
            }
            if (system.file(package = "splines") == "") {
              stop("Error: Attempted to use bs(), but splines not detected on system.")
            }
            if (!(factor_col %in% names(df))) {
              stop(paste("Error: Column: ", factor_col, " not in data", sep = ""))
            }
            xtemp <- do.call(splines::bs, factor_arg_list)
            #
            bs_att <- attributes(xtemp)
            # Applying the same degree, knots, boundary, and intercept gives the same transformation
            if (!("degree" %in% names(repeat_list))) {
              repeat_list[["degree"]] <- bs_att$degree
            }
            if (!("knots" %in% names(repeat_list))) {
              repeat_list[["knots"]] <- bs_att$knots
            }
            if (!("Boundary.knots" %in% names(repeat_list))) {
              repeat_list[["Boundary.knots"]] <- bs_att$Boundary.knots
            }
            if (!("intercept" %in% names(repeat_list))) {
              repeat_list[["intercept"]] <- bs_att$intercept
            }
            col_name <- c()
            for (i in seq_len(ncol(xtemp))) {
              x_col <- paste(factor_col, "_bs", i, sep = "")
              if (!(x_col %in% names(df))) {
                df[[x_col]] <- xtemp[, i]
              }
              col_name <- c(col_name, x_col)
            }
            expres_calls[[length(expres_calls) + 1]] <- repeat_list
            ##
          } else {
            # ----------------------------------------------------------------------------------- #
            # generic function call
            para_split <- lapply(strsplit(model_paras[subterm_i], ""), function(x) which(x == "("))[[1]]
            exp_name <- substr(model_paras[subterm_i], 1, para_split - 1)
            factor_args <- substr(model_paras[subterm_i], para_split + 1, nchar(model_paras[subterm_i]) - 1)
            factor_args <- nested_split(factor_args)
            ##
            factor_arg_list <- list()
            for (i in seq_along(factor_args)) {
              para_cur <- factor_args[i]
              para_break <- lapply(strsplit(para_cur, ""), function(x) which(x == "="))[[1]]
              if (length(para_break) == 0) {
                # no name, just add to list
                factor_arg_list[[i]] <- para_cur
              } else {
                item_name <- substr(para_cur, 1, para_break - 1)
                item_value <- substr(para_cur, para_break + 1, nchar(para_cur))
                factor_arg_list[[item_name]] <- parse_literal_string(item_value)
              }
            }
            repeat_list <- c(list("_exp_type" = exp_name), copy(factor_arg_list)) # The arguements needed to repeat the processing
            # Either the item is named x, or is it the first
            factor_col <- factor_arg_list[[1]]
            factor_vals <- unlist(factor_arg_list, use.names = FALSE)
            for (i in seq_along(factor_vals)) {
              if (factor_vals[[i]] %in% names(df)) {
                factor_arg_list[[i]] <- copy(df[[factor_vals[[i]]]])
              }
            }
            if (!(factor_col %in% names(df))) {
              stop(paste("Error: Column: ", factor_col, " not in data", sep = ""))
            }
            xtemp <- do.call(exp_name, factor_arg_list)
            #
            col_name <- c()
            if (is.atomic(xtemp)) {
              x_col <- paste(factor_col, "_", exp_name, sep = "")
              if (!(x_col %in% names(df))) {
                df[[x_col]] <- xtemp
              }
              col_name <- c(col_name, x_col)
            } else {
              for (i in seq_len(ncol(xtemp))) {
                x_col <- paste(factor_col, "_", exp_name, i, sep = "")
                if (!(x_col %in% names(df))) {
                  df[[x_col]] <- xtemp[, i]
                }
                col_name <- c(col_name, x_col)
              }
            }
            expres_calls[[length(expres_calls) + 1]] <- repeat_list
          }
        } else if (grepl("\\*", model_paras[subterm_i])) {
          # interaction element
          repeat_list <- c(list("_exp_type" = "interaction"), model_paras[subterm_i]) # The arguements needed to repeat the processing
          #
          # split into the columns
          cols <- strsplit(model_paras[subterm_i], "\\*")[[1]]
          for (col in cols) {
            if (!(col %in% names(df))) {
              # good, it is in there
              stop(paste("Error: Interaction column missing: ", col, sep = ""))
            }
          }
          interact_tables <- do.call(c, lapply(seq_along(cols), combn, x = cols, simplify = FALSE))
          col_name <- c()
          for (i_table in interact_tables) {
            vals <- unlist(i_table, use.names = FALSE)
            if (length(vals) == 1) {
              if (is.factor(df[[vals]])) {
                # factor
                val <- factorize(df, vals)
                df <- val$df
                fac_names <- val$cols
                level_ref <- paste(vals, levels(df[[vals]])[1], sep = "_")
                col_name <- c(col_name, fac_names[fac_names != level_ref])
              } else {
                # not factor
                col_name <- c(col_name, vals)
              }
            } else {
              # there are multiple to combine
              # get the levels for each element
              element_levels <- list()
              for (term_i in seq_along(vals)) {
                factor_col <- vals[term_i]
                if (is.factor(df[[factor_col]])) {
                  val <- factorize(df, factor_col)
                  df <- val$df
                  i_levels <- paste(factor_col, levels(df[[factor_col]]), sep = "_")
                  level_ref <- paste(factor_col, levels(df[[factor_col]])[1], sep = "_")
                  i_levels <- i_levels[i_levels != level_ref]
                  element_levels[[term_i]] <- i_levels
                } else {
                  element_levels[[term_i]] <- factor_col
                }
              }
              combs <- c()
              for (i in element_levels[[1]]) {
                combs <- c(combs, paste(i))
              }
              for (i in 2:length(element_levels)) {
                comb_temp <- copy(combs)
                combs <- c()
                for (k in element_levels[[i]]) {
                  combs <- c(combs, paste(comb_temp, k, sep = ":"))
                }
              }
              for (comb in combs) {
                #
                entries <- strsplit(comb, ":")[[1]]
                df[[comb]] <- 1
                for (j in entries) {
                  df[[comb]] <- df[[comb]] * df[[j]]
                }
                if (max(df[[comb]]) != min(df[[comb]])) {
                  col_name <- c(col_name, comb)
                }
              }
            }
          }
          expres_calls[[length(expres_calls) + 1]] <- repeat_list
        } else {
          # check if the column is actually a factor
          element_col <- model_paras[subterm_i]
          if ("CONST" == element_col) {
            if ("CONST" %in% names(df)) {
              # fine
            } else {
              df$CONST <- 1
            }
          }
          if (!element_col %in% names(df)) {
            stop(paste("Error: Subterm column missing: ", col, sep = ""))
          }
          if (is.null(levels(df[[element_col]]))) {
            col_name <- c(element_col)
          } else {
            # it is a factor
            factor_arg_list <- list()
            factor_arg_list[[1]] <- element_col
            repeat_list <- c(list("_exp_type" = "factor"), copy(factor_arg_list))
            repeat_list[["levels"]] <- levels(df[[element_col]])
            expres_calls[[length(expres_calls) + 1]] <- repeat_list
            val <- factorize(df, element_col)
            df <- val$df
            col_name <- val$cols
            level_ref <- paste(element_col, levels(df[[element_col]])[1], sep = "_")
            col_name <- col_name[col_name != level_ref]
          }
        }
        if (length(col_name) == 0) {
          stop("Error: Subterm missing element, was a single-valued factor used?")
        }
        col_name <- unique(col_name)
        # convert subterm formula
        model_terms <- c(model_type)
        if (model_type %in% c("plin", "plinear", "product-linear")) {
          model_terms <- c("plin")
        } else if (model_type %in% c("lin", "linear")) {
          model_terms <- c("lin")
        } else if (model_type %in% c("loglin", "loglinear", "log-linear", "exponential", "exp")) {
          model_terms <- c("loglin")
        } else if (model_type %in% c("loglin-dose", "loglinear-dose", "log-linear-dose")) {
          model_terms <- c("loglin_slope", "loglin_top")
        } else if (model_type %in% c("lin-dose", "linear-dose", "linear-piecewise")) {
          model_terms <- c("lin_slope", "lin_int")
        } else if (model_type %in% c("quadratic", "quad", "quad-dose", "quadratic-dose")) {
          model_terms <- c("quad_slope")
        } else if (model_type %in% c("step-dose", "step-piecewise")) {
          model_terms <- c("step_slope", "step_int")
        } else if (model_type %in% c("lin-quad-dose", "linear-quadratic-dose", "linear-quadratic-piecewise")) {
          model_terms <- c("lin_quad_slope", "lin_quad_int")
        } else if (model_type %in% c("lin-exp-dose", "linear-exponential-dose", "linear-exponential-piecewise")) {
          model_terms <- c("lin_exp_slope", "lin_exp_int", "lin_exp_exp_slope")
        } else {
          stop(paste("Error: Unknown subterm type used, ", model_type, sep = ""))
        }
        for (col in col_name) {
          for (model_term in model_terms) {
            if (grepl("_int", model_term)) {
              # we want to create a second column, for the intercept, to be normalized differently
              new_col <- paste(col, ":intercept", sep = "")
              if (!(new_col %in% names(df))) {
                df[, new_col] <- df[, col, with = FALSE]
              }
              names <- c(names, new_col)
            } else {
              names <- c(names, col)
            }
            tform <- c(tform, model_term)
            term_n <- c(term_n, term_num)
          }
        }
      }
    } else if (model_type %in% modelform_acceptable) {
      if (model_type %in% c("m", "multiplicative")) {
        model_type <- "M"
      } else if (model_type %in% c("me", "multiplicative-excess")) {
        model_type <- "ME"
      } else if (model_type %in% c("a", "additive")) {
        model_type <- "A"
      } else if (model_type %in% c("pa", "product-additive")) {
        model_type <- "PA"
      } else if (model_type %in% c("pae", "product-additive-excess")) {
        model_type <- "PAE"
      } else if (model_type %in% c("gm", "gmix", "geometric-mixture")) {
        model_type <- "GMIX"
        model_paras <- substr(right_model_terms[term_i], third_split + 1, nchar(right_model_terms[term_i]) - 1)
        model_paras <- gsub("\"", "", model_paras) # remove literal strings if needed
        #
        if (is.na(model_paras) || model_paras == "") {
          # Nothing, just give the defaults
          gmix_theta <- 0.5
          gmix_term <- c()
        } else {
          # There is something, but what?
          if (grepl(",", model_paras)) {
            # Multiple items
            model_paras <- tolower(strsplit(model_paras, ",")[[1]])
            # we need to start by checking if the first thing is a number
            if (all(vapply(model_paras[1], function(x) grepl("^[\\-]{0,1}[0-9]*\\.{0,1}[0-9]*$", x), logical(1))) || all(vapply(model_paras[1], function(x) grepl("^[\\-]{0,1}[0-9]+e[\\-]{0,1}[0-9]+$", x), logical(1)))) {
              # Good, the first item is a number
              gmix_theta <- as.numeric(model_paras[1])
              para_else <- model_paras[2:length(model_paras)]
              # Check if they are all valid
              if (all(vapply(para_else, function(x) grepl(x, "er"), logical(1)))) {
                gmix_term <- ifelse(para_else == "e", 1, 0)
              } else {
                # Remaining entry was wrong
                stop("Error: Gmix term had an invalid option after the theta value. Please only use 'e/r'")
              }
            } else {
              # wasn't a number
              gmix_theta <- 0.5
              para_else <- model_paras[1:length(model_paras)]
              # Check if they are all valid
              if (all(vapply(para_else, function(x) grepl(x, "er"), logical(1)))) {
                gmix_term <- ifelse(para_else == "e", 1, 0)
              } else {
                # Remaining entry was wrong
                stop("Error: Gmix term had an invalid option. Please only use a number or 'e/r'")
              }
            }
          } else {
            # Single Item
            if (all(vapply(model_paras, function(x) grepl("^[\\-]{0,1}[0-9]*\\.{0,1}[0-9]*$", x), logical(1))) || all(vapply(model_paras, function(x) grepl("^[\\-]{0,1}[0-9]+e[\\-]{0,1}[0-9]+$", x), logical(1)))) {
              # It is a number, make it the gmix_theta
              gmix_theta <- as.numeric(model_paras)
              gmix_term <- c()
            } else {
              # it is not a number, is it a 'e' or 'r'?
              if (grepl(tolower(model_paras), "er")) {
                # it is an option!
                gmix_theta <- 0.5
                gmix_term <- ifelse(tolower(model_paras) == "e", 1, 0)
              } else {
                # Doesn't match anything
                stop("Error: Gmix term had an invalid option. Please only use a number or 'e/r'")
              }
            }
          }
        }
        #
      } else if (model_type %in% c("gmix-r", "relative-geometric-mixture")) {
        model_type <- "GMIX-R"
        model_paras <- substr(right_model_terms[term_i], third_split + 1, nchar(right_model_terms[term_i]) - 1)
        if (is.na(model_paras) || model_paras == "") {
          # Nothing, just give the defaults
          gmix_theta <- 0.5
        } else {
          model_paras <- tolower(strsplit(model_paras, ",")[[1]])
          gmix_theta <- as.numeric(model_paras[1])
        }
      } else if (model_type %in% c("gmix-e", "excess-geometric-mixture")) {
        model_type <- "GMIX-E"
        model_paras <- substr(right_model_terms[term_i], third_split + 1, nchar(right_model_terms[term_i]) - 1)
        if (is.na(model_paras) || model_paras == "") {
          # Nothing, just give the defaults
          gmix_theta <- 0.5
        } else {
          model_paras <- tolower(strsplit(model_paras, ",")[[1]])
          gmix_theta <- as.numeric(model_paras[1])
        }
      }
      if (modelform == "NONE") {
        modelform <- model_type
      } else {
        stop("Error: modelform defined twice")
      }
    } else {
      stop(paste("Error: Unknown option encountered, ", model_type, sep = ""))
    }
  }
  if (!null) {
    term_tot <- max(term_n) + 1
    if (modelform == "NONE") {
      modelform <- "ME"
    } else if (modelform == "GMIX-R") {
      modelform <- "GMIX"
      gmix_term <- rep(0, term_tot)
    } else if (modelform == "GMIX-E") {
      modelform <- "GMIX"
      gmix_term <- rep(1, term_tot)
    }
    if (length(gmix_term) < term_tot) {
      gmix_term <- c(gmix_term, rep(1.0, term_tot - length(gmix_term)))
    } else if (length(gmix_term) > term_tot) {
      stop("Error: The gmix option was used with more values than terms")
    }
  }
  # We want to check that it is in the data and not a character
  all_name <- unique(names)
  for (col in all_name) {
    # Make sure it exists
    if (!(col %in% names(df))) {
      stop(paste("Error: Column missing from data: ", col, sep = ""))
    }
    # Make sure it can be a number
    if (!is.numeric(df[[col]])) {
      stop(paste("Error: Column was not numeric: ", col, sep = ""))
    }
  }
  #
  if (length(tform) > 1) {
    og_index <- seq_along(tform)
    all_remove <- rep(0, length(tform))
    for (u_tform in unique(tform)) {
      for (u_term_n in unique(term_n)) {
        new_name <- names[(tform == u_tform) & (term_n == u_term_n)]
        new_index <- og_index[(tform == u_tform) & (term_n == u_term_n)]
        if (length(new_name) > 1) {
          df_temp <- df[, new_name, with = FALSE]
          removed <- c(0)
          for (i in 2:length(names(df_temp))) {
            new_rank <- qr(df_temp[, 1:i])$rank
            if (new_rank == i - sum(removed)) {
              # all good
              removed <- c(removed, 0)
            } else {
              # the column was linearlly dependent
              removed <- c(removed, 1)
              all_remove[new_index[i]] <- 1
            }
          }
        }
      }
    }
    term_n <- term_n[all_remove == 0]
    tform <- tform[all_remove == 0]
    names <- names[all_remove == 0]
  }
  #
  model <- list(
    "term_n" = term_n,
    "tform" = tform,
    "names" = names,
    "modelform" = modelform,
    "gmix_term" = gmix_term,
    "gmix_theta" = gmix_theta,
    "null" = null,
    "expres_calls" = expres_calls
  )
  list(
    "model" = model, "data" = df
  )
}

#' Applies a list of function calls from a model evaluation
#'
#' \code{ColossusExpressionCall} Uses a list of function call info and a data.table, and applies the function calls
#'
#' @param calls List from 'model$expres' calls formatted as the type of function call, and then the arguments
#' @param df dataset, to be modified by the function calls
#' @noRd
#' @return returns the transformed data
#' @family Formula Interpretation
ColossusExpressionCall <- function(calls, df) {
  for (call in calls) {
    if (call[["_exp_type"]] == "factor") {
      factor_arg_list <- call[names(call) != "_exp_type"]
      if ("x" %in% names(factor_arg_list)) {
        factor_col <- factor_arg_list$x
        factor_arg_list[["x"]] <- copy(df[[factor_arg_list$x]])
      } else {
        factor_col <- factor_arg_list[[1]]
        names(factor_arg_list)[[1]] <- "x"
        factor_arg_list[["x"]] <- copy(df[[factor_arg_list$x]])
      }
      xtemp <- do.call(factor, factor_arg_list)
      df[[factor_col]] <- xtemp
      val <- factorize(df, factor_col)
      df <- val$df
    } else if (call[["_exp_type"]] == "power") {
      factor_args <- call[names(call) != "_exp_type"][[1]]
      vals <- strsplit(factor_args, "\\^")[[1]]
      col <- vals[1]
      raised <- vals[2]
      raised <- as.numeric(raised)
      df[[factor_args]] <- df[[col]]^raised
    } else if (call[["_exp_type"]] == "ns") {
      # natural cubic spline
      factor_arg_list <- call[names(call) != "_exp_type"]
      if ("x" %in% names(factor_arg_list)) {
        factor_col <- factor_arg_list$x
        factor_arg_list[["x"]] <- copy(df[[factor_arg_list$x]])
      } else {
        factor_col <- factor_arg_list[[1]]
        names(factor_arg_list)[[1]] <- "x"
        factor_arg_list[["x"]] <- copy(df[[factor_arg_list$x]])
      }
      if (system.file(package = "splines") == "") {
        stop("Error: Attempted to use ns(), but splines not detected on system.")
      }
      xtemp <- do.call(splines::ns, factor_arg_list)
      #
      for (i in seq_len(ncol(xtemp))) {
        x_col <- paste(factor_col, "_ns", i, sep = "")
        if (!(x_col %in% names(df))) {
          df[[x_col]] <- xtemp[, i]
        }
      }
      ##
    } else if (call[["_exp_type"]] == "bs") {
      # b-spline for polynomial spline
      factor_arg_list <- call[names(call) != "_exp_type"]
      if ("x" %in% names(factor_arg_list)) {
        factor_col <- factor_arg_list$x
        factor_arg_list[["x"]] <- copy(df[[factor_arg_list$x]])
      } else {
        factor_col <- factor_arg_list[[1]]
        names(factor_arg_list)[[1]] <- "x"
        factor_arg_list[["x"]] <- copy(df[[factor_arg_list$x]])
      }
      if (system.file(package = "splines") == "") {
        stop("Error: Attempted to use bs(), but splines not detected on system.")
      }
      xtemp <- do.call(splines::bs, factor_arg_list)
      #
      for (i in seq_len(ncol(xtemp))) {
        x_col <- paste(factor_col, "_bs", i, sep = "")
        if (!(x_col %in% names(df))) {
          df[[x_col]] <- xtemp[, i]
        }
      }
      ##
    } else if (call["_exp_type"] == "interaction") {
      # split into the columns
      cols <- strsplit(call[[2]], "\\*")[[1]]
      #      recur_interact <- function(x, y) {
      #        if (length(y) == 1) {
      #          return(paste(x, y[[1]], sep = ":"))
      #        } else {
      #          for (i in y[[1]]) {
      #            y0 <- paste(x, i, sep = ":")
      #            return(recur_interact(y0, y[2:length(y)]))
      #          }
      #        }
      #      }
      interact_tables <- do.call(c, lapply(seq_along(cols), combn, x = cols, simplify = FALSE))
      for (i_table in interact_tables) {
        vals <- unlist(i_table, use.names = FALSE)
        if (length(vals) == 1) {
          if (is.factor(df[[vals]])) {
            # factor
            val <- factorize(df, vals)
            df <- val$df
          }
        } else {
          # there are multiple to combine
          # get the levels for each element
          element_levels <- list()
          for (term_i in seq_along(vals)) {
            factor_col <- vals[term_i]
            if (is.factor(df[[factor_col]])) {
              i_levels <- paste(factor_col, levels(df[[factor_col]]), sep = "_")
              level_ref <- paste(factor_col, levels(df[[factor_col]])[1], sep = "_")
              i_levels <- i_levels[i_levels != level_ref]
              element_levels[[term_i]] <- i_levels
            } else {
              element_levels[[term_i]] <- factor_col
            }
          }
          combs <- c()
          for (i in element_levels[[1]]) {
            combs <- c(combs, paste(i))
          }
          for (i in 2:length(element_levels)) {
            comb_temp <- copy(combs)
            combs <- c()
            for (k in element_levels[[i]]) {
              combs <- c(combs, paste(comb_temp, k, sep = ":"))
            }
          }
          for (comb in combs) {
            #
            entries <- strsplit(comb, ":")[[1]]
            df[[comb]] <- 1
            for (j in entries) {
              df[[comb]] <- df[[comb]] * df[[j]]
            }
          }
        }
      }
    } else {
      # --------------------------------------------------------------------- #
      exp_name <- call[["_exp_type"]]
      factor_arg_list <- call[names(call) != "_exp_type"]
      # Either the item is named x, or is it the first
      factor_col <- factor_arg_list[[1]]
      factor_vals <- unlist(factor_arg_list, use.names = FALSE)
      for (i in seq_along(factor_vals)) {
        if (factor_vals[[i]] %in% names(df)) {
          factor_arg_list[[i]] <- copy(df[[factor_vals[[i]]]])
        }
      }

      xtemp <- do.call(exp_name, factor_arg_list)
      #
      if (is.atomic(xtemp)) {
        x_col <- paste(factor_col, "_", exp_name, sep = "")
        if (!(x_col %in% names(df))) {
          df[[x_col]] <- xtemp
        }
      } else {
        for (i in seq_len(ncol(xtemp))) {
          x_col <- paste(factor_col, "_", exp_name, i, sep = "")
          if (!(x_col %in% names(df))) {
            df[[x_col]] <- xtemp[, i]
          }
        }
      }
    }
  }
  df
}

#' Interprets basic cox survival formula RHS
#'
#' \code{ColossusCoxSurv} assigns and interprets interval columns for cox model.
#' This functions is called using the arguments for Cox in the right-hand side of
#' the formula. Uses an interval start time, end time, and event status. These are
#' expected to be in order or named: tstart, tend, and event.
#' The Fine-Gray and Stratified versions use strata and weight named options
#' or the last two entries.
#'
#' @param ... entries for a cox survival object, tstart, tend, and event. Either in order or named. If unnamed and two entries, tend and event are assumed.
#'
#' @export
#' @return returns list with interval endpoints and event
#' @family Formula Interpretation
ColossusCoxSurv <- function(...) {
  args <- list(...)
  argName <- names(args)
  if (length(args) < 2) {
    stop("Error: Too few entries in Cox survival object")
  }
  if (length(args) > 3) {
    stop("Error: Too many entries in Cox survival object")
  }
  tstart <- "%trunc%"
  tend <- "%trunc%"
  event <- "NULL"
  indx <- pmatch(argName[argName != ""], c("tstart", "tend", "event"), nomatch = 0L)
  if (any(indx == 0L)) {
    stop(gettextf(
      "Error: Argument '%s' not matched in survival object",
      argName[argName != ""][indx == 0L]
    ), domain = NA)
  }
  if (all(argName == "")) {
    # If none have names, assume (start, end, event) or (end, event)
    if (length(args) == 2) {
      tstart <- "%trunc%"
      tend <- args[[1]]
      event <- args[[2]]
    } else if (length(args) == 3) {
      tstart <- args[[1]]
      tend <- args[[2]]
      event <- args[[3]]
    } else {
      stop("Error: Incorrect number of arguments to survival object")
    }
  } else if (any(argName == "")) {
    # start by directly assigning what is available
    if ("tstart" %in% argName) {
      tstart <- args$tstart
    }
    if ("tend" %in% argName) {
      tend <- args$tend
    }
    if ("event" %in% argName) {
      event <- args$event
    }
    # now determine what is left
    # start by guessing by position
    if (length(args) == 2) {
      # either end/event or start/event
      if (event != "NULL") {
        # assume end/event
        tstart <- "%trunc%"
        tend <- args[[1]]
      } else {
        event <- args[[2]]
      }
    } else if (length(args) == 3) {
      if ((tstart == "%trunc%") && (argName[1] == "")) {
        tstart <- args[[1]]
      }
      if ((tend == "%trunc%") && (argName[2] == "")) {
        tend <- args[[2]]
      }
      if ((event == "NULL") && (argName[3] == "")) {
        event <- args[[3]]
      }
    } else {
      stop("Error: Incorrect number of arguments to survival object")
    }
  } else {
    if ("tstart" %in% argName) {
      tstart <- args$tstart
    }
    if ("tend" %in% argName) {
      tend <- args$tend
    }
    if ("event" %in% argName) {
      event <- args$event
    }
  }
  list("tstart" = tstart, "tend" = tend, "event" = event)
}

#' Interprets basic cox survival formula RHS with strata
#'
#' \code{ColossusCoxStrataSurv} assigns and interprets interval columns for cox model with stratification
#'
#' @param ... entries for a cox survival object with strata, tstart, tend, event, and strata. Either in order or named. If unnamed, the last is assumed to be strata and the rest are passed to ColossusCoxSurv.
#'
#' @noRd
#' @return returns list with interval endpoints, event, and strata column
#' @family Formula Interpretation
ColossusCoxStrataSurv <- function(...) {
  args <- list(...)
  argName <- names(args)
  if (length(args) < 3) {
    stop("Error: Too few entries in Cox Strata survival object")
  }
  if (length(args) > 4) {
    stop("Error: Too many entries in Cox Strata survival object")
  }
  strata <- "NULL"
  # Is stata a named entry?
  if ("strata" %in% argName) {
    strata <- parse_literal_string(args$strata)
    res <- do.call(ColossusCoxSurv, args[names(args) != "strata"])
  } else if (all(argName == "")) {
    strata <- parse_literal_string(args[[length(args)]])
    res <- do.call(ColossusCoxSurv, args[seq_len(length(args) - 1)])
  } else {
    if (argName[length(args)] == "") {
      strata <- parse_literal_string(args[[length(args)]])
      res <- do.call(ColossusCoxSurv, args[seq_len(length(args) - 1)])
    } else {
      stop("Error: Final entry of Cox Strata object was not named correctly")
    }
  }
  res[["strata"]] <- strata
  res
}

#' Interprets basic fine-gray survival formula RHS
#'
#' \code{ColossusFineGraySurv} assigns and interprets interval columns for fine-gray model
#'
#' @param ... entries for a Fine_Gray object, tstart, tend, event, and weighting. Either in order or named. If unnamed, the last is assumed to be weighting and the rest are passed to ColossusCoxSurv.
#'
#' @noRd
#' @return returns list with interval endpoints, event, and weighting columns
#' @family Formula Interpretation
ColossusFineGraySurv <- function(...) {
  args <- list(...)
  argName <- names(args)
  if (length(args) < 3) {
    stop("Error: Too few entries in FineGray survival object")
  }
  if (length(args) > 4) {
    stop("Error: Too many entries in FineGray survival object")
  }
  weight <- "NULL"
  # Is stata a named entry?
  if ("weight" %in% argName) {
    weight <- args$weight
    res <- do.call(ColossusCoxSurv, args[names(args) != "weight"])
  } else if (all(argName == "")) {
    weight <- args[[length(args)]]
    res <- do.call(ColossusCoxSurv, args[seq_len(length(args) - 1)])
  } else {
    if (argName[length(args)] == "") {
      weight <- args[[length(args)]]
      res <- do.call(ColossusCoxSurv, args[seq_len(length(args) - 1)])
    } else {
      stop("Error: Final entry of FineGray object was not named correctly")
    }
  }
  res["weight"] <- weight
  res
}

#' Interprets basic fine-gray survival formula RHS with strata
#'
#' \code{ColossusFineGraySurv} assigns and interprets interval columns for fine-gray model and stratification
#'
#' @param ... entries for a Fine_Gray object with strata, tstart, tend, event, strata, and weighting. Either in order or named. If unnamed, the last is assumed to be weighting and the rest are passed to ColossusCoxStrataSurv.
#'
#' @noRd
#' @return returns list with interval endpoints, event, strata, and weighting columns
#' @family Formula Interpretation
ColossusFineGrayStrataSurv <- function(...) {
  args <- list(...)
  argName <- names(args)
  if (length(args) < 4) {
    stop("Error: Too few entries in FineGray Strata survival object")
  }
  if (length(args) > 5) {
    stop("Error: Too many entries in FineGray Strata survival object")
  }
  weight <- "NULL"
  # Is stata a named entry?
  if ("weight" %in% argName) {
    weight <- args$weight
    res <- do.call(ColossusCoxStrataSurv, args[names(args) != "weight"])
  } else if (all(argName == "")) {
    weight <- args[[length(args)]]
    res <- do.call(ColossusCoxStrataSurv, args[seq_len(length(args) - 1)])
  } else {
    if (argName[length(args)] == "") {
      weight <- args[[length(args)]]
      res <- do.call(ColossusCoxStrataSurv, args[seq_len(length(args) - 1)])
    } else {
      stop("Error: Final entry of FineGray object was not named correctly")
    }
  }
  res["weight"] <- weight
  res
}

# --------------------------------------------------------------------------------- #
#' Interprets basic poisson survival formula RHS
#'
#' \code{ColossusPoisSurv} assigns and interprets interval columns for poisson model.
#' This functions is called using the arguments for Poisson or Poisson_Strata in the
#' right-hand side of the formula. Uses an person-year column, number of events, and
#' any strata columns. The first two are expected to be in order or named: pyr and event.
#' Anything beyond the event name is assumed to be strata if Poisson_Strata is used.
#'
#' @param ... entries for a Poisson object with or without strata, pyr, event, and any strata columns. Either in order or named. The first two are assumed to be pyr and event, the rest assumed to be strata columns
#'
#' @export
#' @return returns list with duration, strata if used, and event
#' @family Formula Interpretation
ColossusPoisSurv <- function(...) {
  args <- list(...)
  argName <- names(args)
  if (length(args) < 2) {
    stop("Error: Too few entries in Poisson survival object")
  }
  pyr <- "NULL"
  event <- "NULL"
  strata <- "NULL"
  indx <- pmatch(argName[argName != ""], c("pyr", "event"), nomatch = 0L)
  if (any(indx == 0L)) {
    stop(gettextf(
      "Error: Argument '%s' not matched in survival object",
      argName[argName != ""][indx == 0L]
    ), domain = NA)
  }
  if (all(argName == "")) {
    # If none have names, assume (pyr, event, and all strata)
    if (length(args) == 2) {
      pyr <- args[[1]]
      event <- args[[2]]
    } else {
      pyr <- args[[1]]
      event <- args[[2]]
      strata <- unlist(args[3:length(args)], use.names = FALSE)
    }
  } else if (any(argName == "")) {
    # start by directly assigning what is available
    if ("pyr" %in% argName) {
      pyr <- args$pyr
    }
    if ("event" %in% argName) {
      event <- args$event
    }
    # now determine what is left
    # start by guessing by position
    if (length(args) == 2) {
      if (event != "NULL") {
        pyr <- args[[1]]
      } else {
        event <- args[[2]]
      }
    } else {
      if ((pyr == "NULL") && (argName[1] == "")) {
        pyr <- args[[1]]
      }
      if ((event == "NULL") && (argName[2] == "")) {
        event <- args[[2]]
      }
      strata <- unlist(args[3:length(args)], use.names = FALSE)
    }
  } else {
    pyr <- args$pyr
    event <- args$event
  }
  list("pyr" = pyr, "event" = event, "strata" = strata)
}

# --------------------------------------------------------------------------------- #
#' Interprets basic case-control survival formula RHS with no grouping
#'
#' \code{ColossusCaseConSurv} assigns and interprets interval columns for case-control model without grouping.
#'
#' @param ... entries for a Case-Control object, only the event column
#'
#' @noRd
#' @return returns list with event
#' @family Formula Interpretation
ColossusCaseConSurv <- function(...) {
  # expects one arguement, as the event column
  args <- list(...)
  argName <- names(args)
  if (length(args) < 1) {
    stop("Error: Too few entries in Case-Control survival object")
  }
  if (length(args) > 1) {
    stop("Error: Too many entries in Case-Control survival object")
  }
  if ((argName[[1]] == "event") || is.null(argName[[1]])) {
    event <- args[[1]]
  } else {
    stop("Error: Entry must be unnamed or named 'event'")
  }
  list("event" = event)
}

#' Interprets basic case-control survival formula RHS with grouping by risk group
#'
#' \code{ColossusCaseConTimeSurv} assigns and interprets interval columns for case-control model with grouping by risk group.
#'
#' @param ... entries for a Case-Control object, with risk group info. Similar to basic Cox survival, interval start, end, and the event column. Either named or in order.
#'
#' @noRd
#' @return returns list with event
#' @family Formula Interpretation
ColossusCaseConTimeSurv <- function(...) {
  # expects time and event arguements
  args <- list(...)
  argName <- names(args)
  if (length(args) < 2) {
    stop("Error: Too few entries in Case-Control survival object matched on time")
  }
  if (length(args) > 3) {
    stop("Error: Too many entries in Case-Control survival object matched on time")
  }
  tstart <- "%trunc%"
  tend <- "%trunc%"
  event <- "NULL"
  indx <- pmatch(argName[argName != ""], c("tstart", "tend", "event"), nomatch = 0L)
  if (any(indx == 0L)) {
    stop(gettextf(
      "Error: Argument '%s' not matched in survival object",
      argName[argName != ""][indx == 0L]
    ), domain = NA)
  }
  if (all(argName == "")) {
    # If none have names, assume (start, end, event) or (end, event)
    if (length(args) == 2) {
      tstart <- "%trunc%"
      tend <- args[[1]]
      event <- args[[2]]
    } else if (length(args) == 3) {
      tstart <- args[[1]]
      tend <- args[[2]]
      event <- args[[3]]
    } else {
      stop("Error: Incorrect number of arguments to survival object")
    }
  } else if (any(argName == "")) {
    # start by directly assigning what is available
    if ("tstart" %in% argName) {
      tstart <- args$tstart
    }
    if ("tend" %in% argName) {
      tend <- args$tend
    }
    if ("event" %in% argName) {
      event <- args$event
    }
    # now determine what is left
    # start by guessing by position
    if (length(args) == 2) {
      # either end/event or start/event
      if (event != "NULL") {
        # assume end/event
        tstart <- "%trunc%"
        tend <- args[[1]]
      } else {
        event <- args[[2]]
      }
    } else if (length(args) == 3) {
      if ((tstart == "%trunc%") && (argName[1] == "")) {
        tstart <- args[[1]]
      }
      if ((tend == "%trunc%") && (argName[2] == "")) {
        tend <- args[[2]]
      }
      if ((event == "NULL") && (argName[3] == "")) {
        event <- args[[3]]
      }
    } else {
      stop("Error: Incorrect number of arguments to survival object")
    }
  } else {
    if ("tstart" %in% argName) {
      tstart <- args$tstart
    }
    if ("tend" %in% argName) {
      tend <- args$tend
    }
    if ("event" %in% argName) {
      event <- args$event
    }
  }
  list("tstart" = tstart, "tend" = tend, "event" = event)
}

#' Interprets basic case-control survival formula RHS with grouping by strata
#'
#' \code{ColossusCaseConStrataSurv} assigns and interprets interval columns for case-control model with grouping by strata.
#'
#' @param ... entries for a Case-Control object, with strata. Expects an event column and strata column, either named or in order.
#'
#' @noRd
#' @return returns list with event
#' @family Formula Interpretation
ColossusCaseConStrataSurv <- function(...) {
  # expects two arguements, the event and strata columns
  args <- list(...)
  argName <- names(args)
  if (length(args) < 2) {
    stop("Error: Too few entries in Case-Control survival object matched on strata")
  }
  if (length(args) > 2) {
    stop("Error: Too many entries in Case-Control survival object matched on strata")
  }
  indx <- pmatch(argName[argName != ""], c("strata", "event"), nomatch = 0L)
  if (any(indx == 0L)) {
    stop(gettextf(
      "Error: Argument '%s' not matched in survival object",
      argName[argName != ""][indx == 0L]
    ), domain = NA)
  }
  #
  if ("event" %in% argName) {
    event <- args$event
    strata <- parse_literal_string(args[names(args) != "event"][[1]])
  } else if ("strata" %in% argName) {
    strata <- parse_literal_string(args$strata)
    event <- args[names(args) != "strata"][[1]]
  } else {
    event <- args[[1]]
    strata <- parse_literal_string(args[[2]])
  }
  #
  list("event" = event, "strata" = strata)
}

#' Interprets basic case-control survival formula RHS with grouping by strata and risk group
#'
#' \code{ColossusCaseConTimeStrataSurv} assigns and interprets interval columns for case-control model with grouping by strata and risk group.
#'
#' @param ... entries for a Case-Control object, with strata and interval info. Similar to the stratified Cox survival object. Expects interval start, end, event, and strata in order or named. Removes strata and calls the standard Case-Control survival object grouped by risk group.
#'
#' @noRd
#' @return returns list with event
#' @family Formula Interpretation
ColossusCaseConTimeStrataSurv <- function(...) {
  args <- list(...)
  argName <- names(args)
  if (length(args) < 3) {
    stop("Error: Too few entries in Case-Control survival object matched on strata and time")
  }
  if (length(args) > 4) {
    stop("Error: Too many entries in Case-Control survival object matched on strata and time")
  }
  strata <- "NULL"
  # Is stata a named entry?
  if ("strata" %in% argName) {
    strata <- parse_literal_string(args$strata)
    res <- do.call(ColossusCaseConTimeSurv, args[names(args) != "strata"])
  } else if (all(argName == "")) {
    strata <- parse_literal_string(args[[length(args)]])
    res <- do.call(ColossusCaseConTimeSurv, args[seq_len(length(args) - 1)])
  } else {
    if (argName[length(args)] == "") {
      strata <- parse_literal_string(args[[length(args)]])
      res <- do.call(ColossusCaseConTimeSurv, args[seq_len(length(args) - 1)])
    } else {
      stop("Error: Final entry of Case Control Strata and Time object was not named correctly")
    }
  }
  res[["strata"]] <- strata
  res
}

#' Interprets basic logistic survival formula RHS with no grouping
#'
#' \code{ColossusLogitSurv} assigns and interprets columns for trials and events in logistic model with no grouping.
#'
#' @param ... entries for a Logistic object, trials and events. trials not provided assumes one trial per row.
#'
#' @export
#' @return returns list with event
#' @family Formula Interpretation
ColossusLogitSurv <- function(...) {
  args <- list(...)
  argName <- names(args)
  #
  if (length(args) < 1) {
    stop("Error: Too few entries in Logistic survival object")
  }
  if (length(args) > 2) {
    stop("Error: Too many entries in Logistic survival object")
  }
  trials <- "NULL"
  events <- "NULL"
  indx <- pmatch(argName[argName != ""], c("trials", "event", "events"), nomatch = 0L)
  if (any(indx == 0L)) {
    stop(gettextf(
      "Error: Argument '%s' not matched in survival object",
      argName[argName != ""][indx == 0L]
    ), domain = NA)
  }
  if (("event" %in% argName) && ("events" %in% argName)) {
    stop("Error: Cannot provide both 'event' and 'events'")
  }
  if (all(argName == "")) {
    if (length(args) == 1) {
      # should just be the events
      # assume only one trial
      events <- args[[1]]
      trials <- "CONST"
    } else {
      # should be events and then cases
      events <- args[[2]]
      trials <- args[[1]]
    }
  } else {
    # atleast one is named
    if (any(argName == "")) {
      # one is named
      if ("trials" %in% argName) {
        trials <- args$trials
        events <- args[names(args) != "trials"][[1]]
      }
      if ("event" %in% argName) {
        events <- args$event
        trials <- args[names(args) != "event"][[1]]
      }
      if ("events" %in% argName) {
        events <- args$events
        trials <- args[names(args) != "events"][[1]]
      }
    } else {
      trials <- args$trials
      events <- args[names(args) != "trials"][[1]]
    }
  }
  list("event" = events, "trials" = trials)
}
Any scripts or data that you put into this service are public.
Colossus documentation built on Feb. 20, 2026, 5:10 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
Colossus
"Risk Model Regression and Analysis with Complex Non-Linear Models"

R/SurvTypes.R
In Colossus: "Risk Model Regression and Analysis with Complex Non-Linear Models"

Documented in ColossusCoxSurv ColossusLogitSurv ColossusPoisSurv get_form get_form_joint

Try the Colossus package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

Colossus "Risk Model Regression and Analysis with Complex Non-Linear Models"

R/SurvTypes.R In Colossus: "Risk Model Regression and Analysis with Complex Non-Linear Models"

Documented in ColossusCoxSurv ColossusLogitSurv ColossusPoisSurv get_form get_form_joint

Try the Colossus package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

Colossus
"Risk Model Regression and Analysis with Complex Non-Linear Models"

R/SurvTypes.R
In Colossus: "Risk Model Regression and Analysis with Complex Non-Linear Models"