R/Utility.R

Defines functions Interpret_Output Convert_Model_Eq Event_Time_Gen Event_Count_Gen Check_Verbose System_Version Rcpp_version Rcomp_version gcc_version get_os Joint_Multiple_Events Time_Since Date_Shift gen_time_dep Check_Trunc Check_Dupe_Columns Likelihood_Ratio_Test interact_them factorize_par factorize Linked_Lin_Exp_Para Linked_Dose_Formula Def_Control_Guess Def_model_control Def_modelform_fix Def_Control Replace_Missing Correct_Formula_Order Gather_Guesses_CPP

Documented in Check_Dupe_Columns Check_Trunc Check_Verbose Convert_Model_Eq Correct_Formula_Order Date_Shift Def_Control Def_Control_Guess Def_model_control Def_modelform_fix Event_Count_Gen Event_Time_Gen factorize factorize_par Gather_Guesses_CPP gcc_version gen_time_dep get_os interact_them Interpret_Output Joint_Multiple_Events Likelihood_Ratio_Test Linked_Dose_Formula Linked_Lin_Exp_Para Rcomp_version Rcpp_version Replace_Missing System_Version Time_Since

#' Performs checks to gather a list of guesses and iterations
#'
#' \code{Gather_Guesses_CPP} called from within R, uses a list of options and the model definition to generate a list of parameters and iterations that do not produce errors
#'
#' @inheritParams R_template
#' @param a_n_default center of parameter distribution guessing scope
#' @param dfc vector matching subterm number to matrix column
#' @param x_all covariate matrix
#'
#' @return returns a list of the final results
#' @export
#' @examples
#' library(data.table)
#' a <- c(0, 1, 2, 3, 4, 5, 6)
#' b <- c(1, 2, 3, 4, 5, 6, 7)
#' c <- c(0, 1, 0, 0, 0, 1, 0)
#' d <- c(3, 4, 5, 6, 7, 8, 9)
#' df <- data.table::data.table("a" = a, "b" = b, "c" = c, "d" = d)
#' time1 <- "a"
#' time2 <- "b"
#' event <- "c"
#' names <- c("d")
#' term_n <- c(0)
#' tform <- c("loglin")
#' keep_constant <- c(0)
#' a_n <- c(-0.1)
#' a_n_default <- a_n
#' modelform <- "M"
#' control <- list(
#'   "ncores" = 2, "lr" = 0.75, "maxiter" = -1,
#'   "halfmax" = 5, "epsilon" = 1e-9,
#'   "deriv_epsilon" = 1e-9, "abs_max" = 1.0,
#'   "dose_abs_max" = 100.0, "verbose" = FALSE, "ties" = "breslow",
#'   "double_step" = 1
#' )
#' guesses_control <- list()
#' model_control <- list()
#' all_names <- unique(names(df))
#' dfc <- match(names, all_names)
#' term_tot <- max(term_n) + 1
#' x_all <- as.matrix(df[, all_names, with = FALSE])
#' control <- Def_Control(control)
#' guesses_control <- Def_Control_Guess(guesses_control, a_n)
#' model_control <- Def_model_control(model_control)
#' options(warn = -1)
#' Gather_Guesses_CPP(
#'   df, dfc, names, term_n, tform, keep_constant,
#'   a_n, x_all, a_n_default,
#'   modelform, control, guesses_control
#' )
#' @importFrom rlang .data
Gather_Guesses_CPP <- function(df, dfc, names, term_n, tform, keep_constant, a_n, x_all, a_n_default, modelform, control, guesses_control, model_control = list()) {
  tryCatch(
    {
      df <- setDT(df)
    },
    error = function(e) {
      df <- data.table(df)
    }
  )
  if (typeof(a_n) != "list") {
    a_n <- list(a_n)
  }
  term_tot <- max(term_n) + 1
  a_ns <- c(NaN)
  maxiters <- c(NaN)
  model_control <- Def_model_control(model_control)
  val <- Def_modelform_fix(control, model_control, modelform, term_n)
  modelform <- val$modelform
  model_control <- val$model_control
  val <- Correct_Formula_Order(
    term_n, tform, keep_constant, a_n_default,
    names
  )
  term_n <- val$term_n
  tform <- val$tform
  keep_constant <- val$keep_constant
  a_n_default <- val$a_n
  names <- val$names
  if (length(a_n_default) != length(names)) {
    stop(paste("Error: Default Parameters used: ", length(a_n_default),
      ", Covariates used: ", length(names),
      sep = ""
    ))
  }
  for (i in seq_along(tform)) {
    if (grepl("_int", tform[i], fixed = FALSE)) {
      # fine
    } else if (grepl("lin_exp_exp_slope", tform[i], fixed = FALSE)) {
      # fine
    } else if (grepl("_slope", tform[i], fixed = FALSE)) {
      # fine
    } else if (grepl("loglin", tform[i], fixed = FALSE)) {
      # fine
    } else if (grepl("lin", tform[i], fixed = FALSE)) {
      # fine
    } else {
      stop(paste("Error: tform not implemented ", tform[i], sep = ""))
    }
  }
  for (i in seq_len(length(a_n))) {
    a_n0 <- a_n[[i]]
    if (length(a_n0) != length(names)) {
      stop(paste("Error: Parameters used: ", length(a_n0),
        ", Covariates used: ", length(names),
        sep = ""
      ))
    }
    if (length(term_n) < length(names)) {
      stop(paste("Error: Terms used: ", length(term_n),
        ", Covariates used: ", length(names),
        sep = ""
      ))
    } else if (length(term_n) > length(names)) {
      stop(paste("Error: Terms used: ", length(term_n),
        ", Covariates used: ", length(names),
        sep = ""
      ))
    }
    if (length(tform) < length(names)) {
      stop(paste("Error: Term types used: ", length(tform),
        ", Covariates used: ", length(names),
        sep = ""
      ))
    } else if (length(tform) > length(names)) {
      stop(paste("Error: Term types used: ", length(tform),
        ", Covariates used: ", length(names),
        sep = ""
      ))
    }
    if (length(keep_constant) < length(names)) {
      keep_constant <- c(keep_constant, rep(
        0.0,
        length(names) - length(keep_constant)
      ))
    } else if (length(keep_constant) > length(names)) {
      keep_constant <- keep_constant[seq_len(length(names))]
    }
    rmin <- guesses_control$rmin
    rmax <- guesses_control$rmax
    if (length(rmin) != length(rmax)) {
      warning("Warning: rmin and rmax lists not equal size, defaulting to lin and loglin min/max values")
    }
    keep <- risk_check_transition(
      term_n, tform, a_n0, dfc, x_all,
      0, modelform,
      control, model_control,
      keep_constant, term_tot
    )
    if (keep) {
      if (is.nan(maxiters[1])) {
        a_ns <- c(a_n0)
        maxiters <- c(guesses_control$maxiter)
      } else {
        a_ns <- c(a_ns, a_n0)
        maxiters <- c(maxiters, guesses_control$maxiter)
      }
    }
  }
  while (length(maxiters) - length(a_n) < guesses_control$guesses - 1) {
    if (guesses_control$verbose >= 3) {
      message(paste("Note: ", length(maxiters), " valid guesses",
        sep = ""
      )) # nocov
    }
    if (length(rmin) != length(rmax)) {
      for (i in seq_along(tform)) {
        if (guesses_control$guess_constant[i] == 0) {
          if (grepl("_int", tform[i], fixed = FALSE)) {
            a_n0[i] <- runif(1,
              min = guesses_control$intercept_min,
              max = guesses_control$intercept_max
            ) +
              a_n_default[i]
          } else if (grepl("lin_exp_exp_slope", tform[i],
            fixed = FALSE
          )) {
            a_n0[i] <- runif(1,
              min = guesses_control$exp_slope_min,
              max = guesses_control$exp_slope_max
            ) +
              a_n_default[i]
          } else if (grepl("_slope", tform[i], fixed = FALSE)) {
            a_n0[i] <- runif(1,
              min = guesses_control$lin_min,
              max = guesses_control$lin_max
            ) +
              a_n_default[i]
          } else if (grepl("loglin", tform[i], fixed = FALSE)) {
            a_n0[i] <- runif(1,
              min = guesses_control$exp_min,
              max = guesses_control$exp_max
            ) +
              a_n_default[i]
          } else if (grepl("lin", tform[i], fixed = FALSE)) {
            a_n0[i] <- runif(1,
              min = guesses_control$lin_min,
              max = guesses_control$lin_max
            ) +
              a_n_default[i]
          } else {
            stop(paste("Error: tform not implemented ", tform[i],
              sep = ""
            ))
          }
        } else {
          a_n0[i] <- a_n_default[i]
        }
      }
    } else {
      for (i in seq_along(tform)) {
        if (guesses_control$guess_constant[i] == 0) {
          a_n0[i] <- runif(1,
            min = guesses_control$rmin[i],
            max = guesses_control$rmax[i]
          ) +
            a_n_default[i]
        } else {
          a_n0[i] <- a_n_default[i]
        }
      }
    }
    keep <- risk_check_transition(
      term_n, tform, a_n0, dfc, x_all,
      0, modelform, control, model_control,
      keep_constant, term_tot
    )
    if (keep) {
      if (is.nan(maxiters[1])) {
        a_ns <- c(a_n0)
        maxiters <- c(guesses_control$maxiter)
      } else {
        a_ns <- c(a_ns, a_n0)
        maxiters <- c(maxiters, guesses_control$maxiter)
      }
    }
  }
  list("a_ns" = a_ns, "maxiters" = maxiters)
}

#' Corrects the order of terms/formula/etc
#'
#' \code{Correct_Formula_Order} checks the order of formulas given and corrects any ordering issues, orders alphabetically, by term number, etc.
#'
#' @inheritParams R_template
#'
#' @return returns the corrected lists
#' @export
#' @family Data Cleaning Functions
#' @examples
#' library(data.table)
#' ## basic example code reproduced from the starting-description vignette
#' term_n <- c(0, 1, 1, 0, 0)
#' tform <- c("loglin", "quad_slope", "lin", "lin_int", "lin_slope")
#' keep_constant <- c(0, 0, 0, 1, 0)
#' a_n <- c(1, 2, 3, 4, 5)
#' names <- c("a", "a", "a", "a", "a")
#' val <- Correct_Formula_Order(term_n, tform, keep_constant,
#'   a_n, names,
#'   cons_mat = matrix(c(0)),
#'   cons_vec = c(0)
#' )
#' term_n <- val$term_n
#' tform <- val$tform
#' keep_constant <- val$keep_constant
#' a_n <- val$a_n
#' names <- val$names
#'
Correct_Formula_Order <- function(term_n, tform, keep_constant, a_n, names, cons_mat = matrix(c(0)), cons_vec = c(0), verbose = FALSE, model_control = list()) {
  #
  verbose <- Check_Verbose(verbose)
  if ("para_number" %in% names(model_control)) {
    # pass
  } else {
    model_control["para_number"] <- 0
  }
  if (is.matrix(cons_mat)) {
    # pass
  } else {
    cons_mat <- as.matrix(cons_mat)
    warning("Warning: Constraint Matrix was not a matrix, converted")
  }
  #
  if (min(keep_constant) < 0) {
    stop(paste("Error: keep_constant expects 0/1 values, minimum value was ",
      min(keep_constant),
      sep = ""
    ))
  }
  if (max(keep_constant) > 1) {
    stop(paste("Error: keep_constant expects 0/1 values, maximum value was ",
      max(keep_constant),
      sep = ""
    ))
  }
  if (any(keep_constant != round(keep_constant))) {
    stop(paste("Error: keep_constant expects 0/1 values, atleast one value was noninteger",
      sep = ""
    ))
  }
  if (any(term_n != round(term_n))) {
    stop(paste("Error: term_n expects integer values, atleast one value was noninteger",
      sep = ""
    ))
  }
  if (min(term_n) != 0) {
    if (verbose >= 2) {
      warning(paste("Warning: term_n expects nonnegative integer values and a minimum of 0, minimum value was ",
        min(term_n),
        ". Minimum value set to 0, others shifted by ",
        -1 * min(term_n),
        sep = ""
      ))
    }
    term_n <- term_n - min(term_n)
  }
  if (length(sort(unique(term_n))) != length(min(term_n):max(term_n))) {
    stop(paste("Error: term_n expects no missing integer values. Term numbers range from ", min(term_n),
      " to ", max(term_n), " but term_n has ",
      length(unique(term_n)), " unique values instead of ",
      length(min(term_n):max(term_n)),
      sep = ""
    ))
  }
  if (length(keep_constant) < length(names)) {
    keep_constant <- c(keep_constant, rep(0.0, length(names) -
      length(keep_constant)))
  } else if (length(keep_constant) > length(names)) {
    keep_constant <- keep_constant[seq_len(length(names))]
  }
  if (length(term_n) < length(names)) {
    if (verbose >= 2) {
      warning(paste("Warning: Terms used: ", length(term_n),
        ", Covariates used: ", length(names),
        sep = ""
      ))
    }
    term_n <- c(term_n, rep(0, length(names) -
      length(term_n)))
  } else if (length(term_n) > length(names)) {
    if (verbose >= 2) {
      warning(paste("Warning: Terms used: ", length(term_n),
        ", Covariates used: ", length(names),
        sep = ""
      ))
    }
    term_n <- term_n[seq_len(length(names))]
  }
  if (length(tform) < length(names)) {
    if (verbose >= 2) {
      warning(paste("Warning: Term types used: ", length(tform),
        ", Covariates used: ", length(names),
        sep = ""
      ))
    }
    tform <- c(tform, rep("loglin", length(names) -
      length(tform)))
  } else if (length(tform) > length(names)) {
    if (verbose >= 2) {
      warning(paste("Warning: Term types used: ", length(tform),
        ", Covariates used: ", length(names),
        sep = ""
      ))
    }
    tform <- tform[seq_len(length(names))]
  }
  col_to_cons <- c()
  for (i in keep_constant) {
    if (i == 0) {
      if (length(col_to_cons) == 0) {
        col_to_cons <- c(1)
      } else {
        col_to_cons <- c(col_to_cons, max(col_to_cons) + 1)
      }
    } else {
      col_to_cons <- c(col_to_cons, 0)
    }
  }
  if (ncol(cons_mat) > 1) {
    if (ncol(cons_mat) != (length(keep_constant) - sum(keep_constant))) {
      stop("Error: Constraint matrix has incorrect number of columns")
    }
    if (nrow(cons_mat) != length(cons_vec)) {
      stop("Error: Constraint rows and constant lengths do not match")
    }
  }
  if (min(keep_constant) > 0) {
    stop("Error: Atleast one parameter must be free")
  }
  tform <- tolower(tform)
  for (i in 1:length(tform)) {
    if (tform[i] %in% c("plin", "plinear", "product-linear")) {
      tform[i] <- "plin"
    } else if (tform[i] %in% c("lin", "linear")) {
      tform[i] <- "lin"
    } else if (tform[i] %in% c("loglin", "loglinear", "log-linear")) {
      tform[i] <- "loglin"
    }
  }
  tform_order <- c(
    "loglin", "lin", "plin", "loglin_slope", "loglin_top",
    "lin_slope", "lin_int", "quad_slope",
    "step_slope", "step_int",
    "lin_quad_slope", "lin_quad_int", "lin_exp_slope",
    "lin_exp_int", "lin_exp_exp_slope"
  )
  tform_iden <- match(tform, tform_order)
  if (any(is.na(tform_iden))) {
    stop(paste("Missing tform option ", tform[is.na(tform_iden)],
      ", ",
      sep = ""
    ))
  }
  if (((typeof(a_n) == "list") && (length(a_n) == 1)) || (typeof(a_n) != "list")) {
    if (typeof(a_n) == "list") {
      a_n <- a_n[[1]]
    }
    if (length(a_n) < length(names)) {
      if (verbose >= 2) {
        warning(paste("Warning: Parameters used: ",
          length(a_n), ", Covariates used: ",
          length(names), ", Remaining filled with 0.01",
          sep = ""
        ))
      }
      a_n <- c(a_n, rep(0.01, length(names) - length(a_n)))
    } else if (length(a_n) > length(names)) {
      stop(paste("Error: Parameters used: ", length(a_n),
        ", Covariates used: ", length(names),
        sep = ""
      ))
    }
    df <- data.table::data.table(
      "term_n" = term_n, "tform" = tform,
      "keep_constant" = keep_constant,
      "a_n" = a_n, "names" = names,
      "iden_const" = rep(0, length(names)),
      "para_num" = rep(0, length(names)),
      "current_order" = seq_len(length(tform)),
      "constraint_order" = col_to_cons
    )
    df$para_num[[model_control[["para_number"]] + 1]] <- 1
    df$tform_order <- tform_iden
    keycol <- c("term_n", "names", "tform_order")
    data.table::setorderv(df, keycol)
    a_n <- df$a_n
  } else {
    a_0 <- a_n[[1]]
    for (a_i in a_n) {
      if (length(a_i) != length(a_0)) {
        stop(paste("Error: Parameters used in first option: ",
          length(a_0),
          ", Parameters used in different option: ",
          length(a_i),
          ", please fix parameter length",
          sep = ""
        ))
      }
    }
    if (length(a_0) < length(names)) {
      if (verbose >= 2) {
        warning(paste("Warning: Parameters used: ", length(a_0),
          ", Covariates used: ", length(names),
          ", Remaining filled with 0.01",
          sep = ""
        ))
      }
      for (i in seq_len(length(a_n))) {
        a_n[[i]] <- c(
          a_n[[i]],
          rep(0.01, length(names) - length(a_n[[i]]))
        )
      }
    } else if (length(a_0) > length(names)) {
      stop(paste("Error: Parameters used: ", length(a_0),
        ", Covariates used: ", length(names),
        sep = ""
      ))
    }
    df <- data.table::data.table(
      "term_n" = term_n, "tform" = tform,
      "keep_constant" = keep_constant,
      "names" = names, "iden_const" = rep(0, length(names)),
      "para_num" = rep(0, length(names)),
      "current_order" = seq_along(tform),
      "constraint_order" = col_to_cons
    )
    df$para_num[[model_control[["para_number"]] + 1]] <- 1
    for (i in seq_len(length(a_n))) {
      df[[paste("a_", i, sep = "")]] <- a_n[[i]]
    }
    df$tform_order <- tform_iden
    keycol <- c("term_n", "names", "tform_order")
    data.table::setorderv(df, keycol)
    for (i in seq_len(length(a_n))) {
      a_n[[i]] <- df[[paste("a_", i, sep = "")]]
    }
  }
  a <- df$tform
  for (i in seq_len(length(a))) {
    if (i < length(a)) {
      if ((a[i] == "loglin_slope")) {
        if (a[i + 1] != "loglin_top") {
          stop("Error: loglin_top missing")
        }
      } else if ((a[i] == "lin_slope")) {
        if (a[i + 1] != "lin_int") {
          stop("Error: lin_int missing")
        }
      } else if ((a[i] == "step_slope")) {
        if (a[i + 1] != "step_int") {
          stop("Error: step_int missing")
        }
      } else if ((a[i] == "lin_quad_slope")) {
        if (a[i + 1] != "lin_quad_int") {
          stop("Error: lin_quad_int missing")
        }
      } else if ((a[i] == "lin_exp_slope")) {
        if (a[i + 1] != "lin_exp_int") {
          stop("Error: lin_exp_int missing")
        }
      } else if ((a[i] == "lin_exp_int")) {
        if (a[i + 1] != "lin_exp_exp_slope") {
          stop("Error: lin_exp_exp_slope missing")
        }
      }
    }
    if (i > 1) {
      if ((a[i] == "lin_int")) {
        if (a[i - 1] != "lin_slope") {
          stop("Error: lin_slope missing")
        }
      } else if ((a[i] == "step_int")) {
        if (a[i - 1] != "step_slope") {
          stop("Error: step_slope missing")
        }
      } else if ((a[i] == "lin_quad_int")) {
        if (a[i - 1] != "lin_quad_slope") {
          stop("Error: lin_quad_slope missing")
        }
      } else if ((a[i] == "lin_exp_int")) {
        if (a[i - 1] != "lin_exp_slope") {
          stop("Error: lin_exp_slope missing")
        }
      } else if ((a[i] == "lin_exp_exp_slope")) {
        if (a[i - 1] != "lin_exp_int") {
          stop("Error: lin_exp_int missing")
        }
      }
    }
  }
  col_to_cons <- df$constraint_order
  cons_order <- c()
  for (i in col_to_cons) {
    if (i > 0) {
      cons_order <- c(cons_order, i)
    }
  }
  if (ncol(cons_mat) > 1) {
    colnames(cons_mat) <- seq_len(ncol(cons_mat))
    r0 <- nrow(cons_mat)
    c0 <- ncol(cons_mat)
    cons_mat <- as.matrix(cons_mat[, cons_order])
    if ((nrow(cons_mat) == r0) && (ncol(cons_mat) == c0)) {
      # all good
    } else if ((nrow(cons_mat) == c0) && (ncol(cons_mat) == r0)) {
      cons_mat <- t(cons_mat)
    } else {
      stop("Matrix reordering failed") # nocov
    }
  }
  a_temp <- df$iden_const
  b_temp <- df$para_num
  para_num <- which(b_temp == 1) - 1
  list(
    "term_n" = df$term_n, "tform" = df$tform, "keep_constant" = df$keep_constant,
    "a_n" = a_n, "names" = df$names,
    "Permutation" = df$current_order,
    "cons_mat" = unname(cons_mat), "cons_vec" = cons_vec,
    "para_num" = para_num
  )
}

#' Automatically assigns missing values in listed columns
#'
#' \code{Replace_Missing} checks each column and fills in NA values
#'
#' @inheritParams R_template
#' @family Data Cleaning Functions
#' @return returns a filled datatable
#' @export
#' @examples
#' library(data.table)
#' ## basic example code reproduced from the starting-description vignette
#' df <- data.table::data.table(
#'   "UserID" = c(112, 114, 213, 214, 115, 116, 117),
#'   "Starting_Age" = c(18, 20, 18, 19, 21, 20, 18),
#'   "Ending_Age" = c(30, 45, NA, 47, 36, NA, 55),
#'   "Cancer_Status" = c(0, 0, 1, 0, 1, 0, 0)
#' )
#' df <- Replace_Missing(df, c("Starting_Age", "Ending_Age"), 70)
Replace_Missing <- function(df, name_list, msv, verbose = FALSE) {
  tryCatch(
    {
      df <- setDT(df)
    },
    error = function(e) {
      df <- data.table(df)
    }
  )
  verbose <- Check_Verbose(verbose)
  if (is.na(msv)) {
    stop("Error: The missing-value replacement is also NA")
  }
  for (j in name_list) {
    #
    if (j %in% names(df)) {
      # fine
    } else {
      stop(paste("Error: ", j, " missing from column names", sep = ""))
    }
    if (sum(is.na(df[[j]]))) {
      data.table::set(df, which(is.na(df[[j]])), j, msv)
      if (verbose >= 3) {
        message(paste("Note: Column ", j, " had replaced values",
          sep = ""
        )) # nocov
      }
    }
  }
  return(df)
}

#' Automatically assigns missing control values
#'
#' \code{Def_Control} checks and assigns default values
#'
#' @inheritParams R_template
#' @family Data Cleaning Functions
#' @return returns a filled list
#' @export
#' @examples
#' library(data.table)
#' control <- list(
#'   "ncores" = 2, "lr" = 0.75, "maxiter" = 5,
#'   "ties" = "breslow", "double_step" = 1
#' )
#' control <- Def_Control(control)
#'
Def_Control <- function(control) {
  control_def <- list(
    "verbose" = 0, "lr" = 0.75, "maxiter" = 20,
    "halfmax" = 5, "epsilon" = 1e-4,
    "deriv_epsilon" = 1e-4, "abs_max" = 1.0,
    "change_all" = TRUE, "dose_abs_max" = 100.0,
    "ties" = "breslow", "double_step" = 1,
    "ncores" = as.numeric(detectCores())
  )
  names(control) <- tolower(names(control))
  if ((identical(Sys.getenv("TESTTHAT"), "true")) || (identical(Sys.getenv("TESTTHAT_IS_CHECKING"), "true"))) {
    control_def$ncores <- min(c(2, as.numeric(detectCores())))
  }
  sys_config <- System_Version()
  OpenMP <- sys_config[["OpenMP Enabled"]]
  os <- sys_config[["Operating System"]]
  cpp_compiler <- sys_config[["Default c++"]]
  R_compiler <- sys_config[["R Compiler"]]
  if (!OpenMP) {
    warning("Warning: OpenMP not detected, cores set to 1")
    control$ncores <- 1 # nocov
  }
  if (Sys.getenv("R_COLOSSUS_NOT_CRAN") == "") {
    if (os == "linux") {
      if (cpp_compiler == "gcc") {
        if (R_compiler != "gcc") { # nocov
          control$ncores <- 1 # nocov
          warning("Warning: linux machine not using gcc, cores set to 1. Set R_COLOSSUS_NOT_CRAN environemnt variable to skip check")
        }
      } else if (cpp_compiler == "clang") { # nocov
        control$ncores <- 1 # nocov
        warning("Warning: linux machine using clang, cores set to 1. Set R_COLOSSUS_NOT_CRAN environemnt variable to skip check")
      }
    }
  }
  for (nm in names(control_def)) {
    if (nm %in% names(control)) {
      if (nm == "ncores") {
        if (control$ncores > control_def$ncores) {
          stop(paste("Error: Cores Requested:", control["ncores"],
            ", Cores Available:", control_def["ncores"],
            sep = " "
          )) # nocov
        }
      } else if (nm == "verbose") {
        control$verbose <- Check_Verbose(control$verbose)
      }
    } else {
      control[nm] <- control_def[nm]
    }
  }
  control["ties"] <- tolower(control["ties"])
  control_min <- list(
    "verbose" = 0, "lr" = 0.0, "maxiter" = -1,
    "halfmax" = 0, "epsilon" = 0.0,
    "deriv_epsilon" = 0.0, "abs_max" = 0.0,
    "dose_abs_max" = 0.0
  )
  for (nm in names(control_min)) {
    if (control[[nm]] < control_min[[nm]]) {
      control[nm] <- control_min[nm]
    }
  }
  control_int <- list(
    "verbose" = 0, "maxiter" = -1,
    "halfmax" = 0
  )
  for (nm in names(control_int)) {
    control[nm] <- as.integer(control[nm])
  }
  return(control)
}

#' Automatically assigns geometric-mixture values and checks that a valid modelform is used
#'
#' \code{Def_modelform_fix} checks and assigns default values for modelform options
#'
#' @inheritParams R_template
#' @family Data Cleaning Functions
#' @return returns a filled list
#' @export
#' @examples
#' library(data.table)
#' control <- list(
#'   "ncores" = 2, "lr" = 0.75, "maxiter" = 5,
#'   "ties" = "breslow", "double_step" = 1
#' )
#' control <- Def_Control(control)
#' model_control <- list("single" = TRUE)
#' model_control <- Def_model_control(model_control)
#' term_n <- c(0, 1, 1)
#' modelform <- "a"
#' val <- Def_modelform_fix(control, model_control, modelform, term_n)
#' model_control <- val$model_control
#' modelform <- val$modelform
#'
Def_modelform_fix <- function(control, model_control, modelform, term_n) {
  term_tot <- max(term_n) + 1
  modelform <- toupper(modelform)
  acceptable <- toupper(c(
    "A", "PA", "PAE", "M", "ME", "GMIX", "GMIX-R", "GMIX-E",
    "multiplicative", "multiplicative-excess", "additive",
    "product-additive", "product-additive-excess"
  ))
  if (modelform %in% acceptable) {
    if (modelform %in% c("ME", "MULTIPLICATIVE", "MULTIPLICATIVE-EXCESS")) {
      modelform <- "M"
    } else if (modelform == "ADDITIVE") {
      modelform <- "A"
    } else if (modelform == "PRODUCT-ADDITIVE") {
      modelform <- "PA"
    } else if (modelform == "PRODUCT-ADDITIVE-EXCESS") {
      modelform <- "PAE"
    } else if (modelform == "GMIX-R") {
      model_control$gmix_term <- rep(0, term_tot)
      modelform <- "GMIX"
    } else if (modelform == "GMIX-E") {
      model_control$gmix_term <- rep(1, term_tot)
      modelform <- "GMIX"
    }
  } else {
    stop(paste("Error: Model formula ", modelform,
      " not in acceptable list",
      sep = ""
    ))
  }
  if (modelform == "GMIX") {
    gmix_term <- model_control$gmix_term
    if (length(gmix_term) != term_tot) {
      stop(paste("Error: Terms used:", term_tot,
        ", Terms with gmix types available:",
        length(gmix_term),
        sep = " "
      ))
    }
  }
  return(list("modelform" = modelform, "model_control" = model_control))
}

#' Automatically assigns missing model control values
#'
#' \code{Def_model_control} checks and assigns default values
#'
#' @inheritParams R_template
#' @family Data Cleaning Functions
#' @return returns a filled list
#' @export
#' @examples
#' library(data.table)
#' control <- list("single" = TRUE)
#' control <- Def_model_control(control)
#'
Def_model_control <- function(control) {
  names(control) <- tolower(names(control))
  control_def_names <- c(
    "single", "basic", "null", "cr", "linear_err",
    "gradient", "constraint", "strata", "surv",
    "schoenfeld", "risk",
    "risk_subset", "log_bound", "pearson", "deviance",
    "mcml", "oberved_info", "time_risk"
  )
  for (nm in control_def_names) {
    if (nm %in% names(control)) {
      # fine
    } else {
      control[nm] <- FALSE
    }
  }
  if (control["null"] == TRUE) {
    control["single"] <- TRUE
  }
  if ("step_size" %in% names(control)) {
    # fine
  } else {
    control["step_size"] <- 0.5
  }
  if ("unique_values" %in% names(control)) {
    # fine
  } else {
    control["unique_values"] <- 2
  }
  if ("gmix_theta" %in% names(control)) {
    # fine
  } else {
    control["gmix_theta"] <- 0.5
  }
  if ("gmix_term" %in% names(control)) {
    # fine
  } else {
    control["gmix_term"] <- c(0)
  }
  if ("conditional_threshold" %in% names(control)) {
    # fine
  } else {
    control["conditional_threshold"] <- 50
  }
  if (control[["log_bound"]]) {
    if ("qchi" %in% names(control)) {
      # fine
    } else {
      if ("alpha" %in% names(control)) {
        control["qchi"] <- qchisq(1 - control[["alpha"]], df = 1) / 2
      } else {
        control["alpha"] <- 0.05
        control["qchi"] <- qchisq(1 - control[["alpha"]], df = 1) / 2
      }
    }
    if ("para_num" %in% names(control)) {
      warning("R Warning: para_num detected in model_control, did you mean para_number?")
      control["para_number"] <- control["para_num"]
    }
    if ("para_number" %in% names(control)) {
      # fine
    } else {
      control["para_number"] <- 0
    }
    if ("maxstep" %in% names(control)) {
      # fine
    } else {
      control["maxstep"] <- 10
    }
    if ("manual" %in% names(control)) {
      # fine
    } else {
      control["manual"] <- FALSE
    }
    if ("search_mult" %in% names(control)) {
      # fine
    } else {
      control["search_mult"] <- 1.0
    }
    if ("step_size" %in% names(control)) {
      # fine
    } else {
      control["search_mult"] <- 0.5
    }
  }
  if (control[["gradient"]]) {
    control_def_names <- c(
      "momentum", "adadelta", "adam"
    )
    for (nm in control_def_names) {
      if (nm %in% names(control)) {
        # fine
      } else {
        control[nm] <- FALSE
      }
    }
    # add different parameters
    if ("momentum_decay" %in% names(control)) {
      # fine
    } else {
      control["momentum_decay"] <- 0.9
    }
    if ("learning_decay" %in% names(control)) {
      # fine
    } else {
      control["learning_decay"] <- 0.999
    }
    if ("epsilon_decay" %in% names(control)) {
      # fine
    } else {
      control["epsilon_decay"] <- 1e-4
    }
  }
  return(control)
}

#' Automatically assigns missing guessing control values
#'
#' \code{Def_Control_Guess} checks and assigns default values
#'
#' @inheritParams R_template
#' @family Data Cleaning Functions
#' @return returns a filled list
#' @export
#' @examples
#' library(data.table)
#' guesses_control <- list(
#'   "maxiter" = 10, "guesses" = 10,
#'   "loglin_min" = -1, "loglin_max" = 1, "loglin_method" = "uniform"
#' )
#' a_n <- c(0.1, 2, 1.3)
#' guesses_control <- Def_Control_Guess(guesses_control, a_n)
#'
Def_Control_Guess <- function(guesses_control, a_n) {
  names(guesses_control) <- tolower(names(guesses_control))
  if ("verbose" %in% names(guesses_control)) { # determines extra printing
    guesses_control$verbose <- Check_Verbose(guesses_control$verbose)
  } else {
    guesses_control$verbose <- 0
  }
  if ("maxiter" %in% names(guesses_control)) { # determines the iterations for each guess
    # fine
  } else {
    guesses_control$maxiter <- 5
  }
  if ("guesses" %in% names(guesses_control)) { # determines the number of guesses
    # fine
  } else {
    guesses_control$guesses <- 10
  }
  if ("exp_min" %in% names(guesses_control)) { # minimum exponential parameter change
    # fine
  } else {
    guesses_control$exp_min <- -1
  }
  if ("exp_max" %in% names(guesses_control)) { # maximum exponential parameter change
    # fine
  } else {
    guesses_control$exp_max <- 1
  }
  if ("intercept_min" %in% names(guesses_control)) { # minimum intercept parameter change
    # fine
  } else {
    guesses_control$intercept_min <- -1
  }
  if ("intercept_max" %in% names(guesses_control)) { # maximum intercept parameter change
    # fine
  } else {
    guesses_control$intercept_max <- 1
  }
  if ("lin_min" %in% names(guesses_control)) { # minimum linear parameter change
    # fine
  } else {
    guesses_control$lin_min <- 0.1
  }
  if ("lin_max" %in% names(guesses_control)) { # maximum linear parameter change
    # fine
  } else {
    guesses_control$lin_max <- 1
  }
  if ("exp_slope_min" %in% names(guesses_control)) { # minimum exp_slope parameter change
    # fine
  } else {
    guesses_control$exp_slope_min <- 0.001
  }
  if ("exp_slope_max" %in% names(guesses_control)) { # maximum exp_slope parameter change
    # fine
  } else {
    guesses_control$exp_slope_max <- 2
  }
  if ("term_initial" %in% names(guesses_control)) { # list of term numbers for tiered guessing
    # fine
  } else {
    guesses_control$term_initial <- c(0)
  }
  if ("guess_constant" %in% names(guesses_control)) {
    # binary values for if a parameter is distributed (0) or not (1)
    if (length(guesses_control$guess_constant) < length(a_n)) {
      guesses_control$guess_constant <- c(
        guesses_control$guess_constant,
        rep(0, length(a_n) - length(guesses_control$guess_constant))
      )
    }
  } else {
    guesses_control$guess_constant <- rep(0, length(a_n))
  }
  if ("guesses_start" %in% names(guesses_control)) {
    # number of guesses for first part of tiered guessing
    # fine
  } else {
    guesses_control$guesses_start <- guesses_control$guesses
  }
  if ("strata" %in% names(guesses_control)) { # if stratification is used
    # fine
  } else {
    guesses_control$strata <- FALSE
  }
  if (("rmin" %in% names(guesses_control)) && ("rmax" %in% names(guesses_control))) {
    # if rmin/rmax used is used
    # fine
  } else {
    # if they are unequal length, the default is used
    guesses_control$rmin <- c(-1)
    guesses_control$rmax <- c(-1, -1)
  }
  return(guesses_control)
}

#' Calculates Full Parameter list for Special Dose Formula
#'
#' \code{Linked_Dose_Formula} Calculates all parameters for linear-quadratic and linear-exponential linked formulas
#'
#' @inheritParams R_template
#'
#' @return returns list of full parameters
#' @export
#' @examples
#' library(data.table)
#' tforms <- list("cov_0" = "quad", "cov_1" = "exp")
#' paras <- list("cov_0" = c(1, 3.45), "cov_1" = c(1.2, 4.5, 0.1))
#' full_paras <- Linked_Dose_Formula(tforms, paras)
#'
Linked_Dose_Formula <- function(tforms, paras, verbose = 0) {
  verbose <- Check_Verbose(verbose)
  full_paras <- list()
  for (nm in names(tforms)) {
    if (tforms[nm] == "quad") {
      plist <- unlist(paras[nm], use.names = FALSE)
      a0 <- plist[1]
      y <- plist[2]
      if (a0 < 0) {
        stop("Error: a0 arguement was negative")
      }
      if (is.numeric(a0)) {
        # fine
      } else {
        stop("Error: a0 arguement was not a number")
      }
      if (is.numeric(y)) {
        # fine
      } else {
        stop("Error: threshold arguement was not a number") # nocov
      }
      a1 <- a0 / 2 / y
      b1 <- a0 * y / 2
      full_paras[[nm]] <- c(y, a0, a1, b1)
    } else if (tforms[nm] == "exp") {
      plist <- unlist(paras[nm], use.names = FALSE)
      a0 <- plist[1]
      y <- plist[2]
      b1 <- plist[3]
      if (a0 < 0) {
        stop("Error: a0 arguement was negative")
      }
      if (is.numeric(a0)) {
        # fine
      } else {
        stop("Error: a0 arguement was not a number") # nocov
      }
      if (is.numeric(y)) {
        # fine
      } else {
        stop("Error: threshold arguement was not a number") # nocov
      }
      if (is.numeric(b1)) {
        # fine
      } else {
        stop("Error: exponential arguement was not a number") # nocov
      }
      c1 <- log(a0) - log(b1) + b1 * y
      a1 <- a0 * y + exp(c1 - b1 * y)
      full_paras[[nm]] <- c(y, a0, a1, b1, c1)
    }
  }
  return(full_paras)
}

#' Calculates The Additional Parameter For a linear-exponential formula with known maximum
#'
#' \code{Linked_Lin_Exp_Para} Calculates what the additional parameter would be for a desired maximum
#'
#' @inheritParams R_template
#'
#' @return returns parameter used by Colossus
#' @export
#' @examples
#' library(data.table)
#' y <- 7.6
#' a0 <- 1.2
#' a1_goal <- 15
#' full_paras <- Linked_Lin_Exp_Para(y, a0, a1_goal)
#'
Linked_Lin_Exp_Para <- function(y, a0, a1_goal, verbose = 0) {
  verbose <- Check_Verbose(verbose)
  b1 <- 10
  lr <- 1.0
  if (a0 < 0) {
    stop("Error: a0 arguement was negative")
  }
  if (a1_goal > y * a0) {
    # fine
  } else {
    stop("Error: goal is too low")
  }
  iter_i <- 0
  while (iter_i < 100) {
    iter_i <- iter_i + 1
    c1 <- log(a0 / b1) + b1 * y
    a1 <- a0 * y + exp(c1 - b1 * y)
    a_dif <- a1 - a1_goal
    if (abs(a_dif) < 1e-3) {
      break
    }
    da1 <- -1 / b1 * exp(c1 - b1 * y)
    db1 <- (a1_goal - a1) / da1
    if (-1 * db1 > b1) {
      db1 <- -0.9 * b1
    }
    b1 <- b1 + lr * db1
  }
  return(b1)
}

#' Splits a parameter into factors
#'
#' \code{factorize} uses user provided list of columns to define new parameter for each unique value and update the data.table.
#' Not for interaction terms
#'
#' @inheritParams R_template
#' @family Data Cleaning Functions
#' @return returns a list with two named fields. df for the updated dataframe, and cols for the new column names
#' @export
#' @examples
#' library(data.table)
#' a <- c(0, 1, 2, 3, 4, 5, 6)
#' b <- c(1, 2, 3, 4, 5, 6, 7)
#' c <- c(0, 1, 2, 1, 0, 1, 0)
#' df <- data.table::data.table("a" = a, "b" = b, "c" = c)
#' col_list <- c("c")
#' val <- factorize(df, col_list)
#' df <- val$df
#' new_col <- val$cols
#'
factorize <- function(df, col_list, verbose = 0) {
  tryCatch(
    {
      df <- setDT(df)
    },
    error = function(e) {
      df <- data.table(df)
    }
  )
  verbose <- Check_Verbose(verbose)
  cols <- c()
  col0 <- names(df)
  tnum <- c()
  for (i in seq_len(length(col_list))) {
    col <- col_list[i]
    x <- sort(unlist(as.list(unique(df[, col, with = FALSE])),
      use.names = FALSE
    ))
    for (j in x) {
      newcol <- c(paste(col, j, sep = "_"))
      df[, newcol] <- 1 * (df[, col, with = FALSE] == j)
      cols <- c(cols, newcol)
      tnum <- c(tnum, i)
    }
  }
  cols <- setdiff(names(df), col0)
  if (length(col_list) > 1) {
    cols <- Check_Dupe_Columns(df, cols, rep(0, length(cols)), verbose, TRUE)
  }
  if (verbose >= 3) {
    message(paste("Note: Number of factors:", length(cols), sep = "")) # nocov
  }
  list("df" = df, "cols" = cols)
}

#' Splits a parameter into factors in parallel
#'
#' \code{factorize_par} uses user provided list of columns to define new parameter for each unique value and update the data.table.
#' Not for interaction terms
#'
#' @inheritParams R_template
#' @family Data Cleaning Functions
#' @return returns a list with two named fields. df for the updated dataframe, and cols for the new column names
#' @export
#' @examples
#' library(data.table)
#' a <- c(0, 1, 2, 3, 4, 5, 6)
#' b <- c(1, 2, 3, 4, 5, 6, 7)
#' c <- c(0, 1, 2, 1, 0, 1, 0)
#' df <- data.table::data.table("a" = a, "b" = b, "c" = c)
#' col_list <- c("c")
#' val <- factorize_par(df, col_list, FALSE, 2)
#' df <- val$df
#' new_col <- val$cols
#'
factorize_par <- function(df, col_list, verbose = 0, nthreads = as.numeric(detectCores())) {
  tryCatch(
    {
      df <- setDT(df)
    },
    error = function(e) {
      df <- data.table(df)
    }
  )
  verbose <- Check_Verbose(verbose)
  cols <- c()
  vals <- c()
  names <- c()
  if ((identical(Sys.getenv("TESTTHAT"), "true")) || (identical(Sys.getenv("TESTTHAT_IS_CHECKING"), "true"))) {
    nthreads <- 2
  }
  for (i in seq_len(length(col_list))) {
    col <- col_list[i]
    x <- sort(unlist(as.list(unique(df[, col, with = FALSE])),
      use.names = FALSE
    ))
    for (j in x) {
      newcol <- c(paste(col, j, sep = "_"))
      names <- c(names, newcol)
      vals <- c(vals, j)
      cols <- c(cols, i - 1)
    }
  }
  df0 <- Gen_Fac_Par(
    as.matrix(df[, col_list, with = FALSE]), vals,
    cols, nthreads
  )
  df0 <- data.table::as.data.table(df0)
  names(df0) <- names
  col_keep <- Check_Dupe_Columns(
    df0, names, rep(0, length(cols)),
    verbose, TRUE
  )
  list("df" = cbind(df, df0), "cols" = col_keep)
}

#' Defines Interactions
#'
#' \code{interact_them} uses user provided interactions define interaction terms and update the data.table. assumes interaction is "+" or "*" and applies basic anti-aliasing to avoid duplicates
#'
#' @inheritParams R_template
#' @family Data Cleaning Functions
#' @return returns a list with two named fields. df for the updated dataframe, and cols for the new column names
#' @export
#' @examples
#' library(data.table)
#' a <- c(0, 1, 2, 3, 4, 5, 6)
#' b <- c(1, 2, 3, 4, 5, 6, 7)
#' c <- c(0, 1, 2, 1, 0, 1, 0)
#' df <- data.table::data.table("a" = a, "b" = b, "c" = c)
#' interactions <- c("a?+?b", "a?*?c")
#' new_names <- c("ab", "ac")
#' vals <- interact_them(df, interactions, new_names)
#' df <- vals$df
#' new_col <- vals$cols
interact_them <- function(df, interactions, new_names, verbose = 0) {
  tryCatch(
    {
      df <- setDT(df)
    },
    error = function(e) {
      df <- data.table(df)
    }
  )
  verbose <- Check_Verbose(verbose)
  cols <- c()
  for (i in seq_len(length(interactions))) {
    interac <- interactions[i]
    formula <- unlist(strsplit(interac, "[?]"), use.names = FALSE)
    if (length(formula) != 3) {
      stop(paste(
        "Error: Iteration:", interac, "has incorrect length of",
        length(formula), "but should be 3."
      ))
    }
    newcol <- paste(formula[1], formula[2], formula[3], sep = "")
    if (new_names[i] != "") {
      newcol <- new_names[i]
    }
    col1 <- formula[1]
    col2 <- formula[3]
    if (paste(formula[1], "?", formula[2], "?", formula[3], sep = "") %in% interactions[i + seq_len(length(interactions))]) {
      if (verbose >= 2) {
        warning(paste("Warning: interation ", i, "is duplicated")) # nocov
      }
    } else if (paste(formula[3], "?", formula[2], "?", formula[1], sep = "") %in% interactions[i + seq_len(length(interactions))]) {
      if (verbose >= 2) {
        warning(paste(
          "Warning: the reverse of interation ", i,
          "is duplicated"
        )) # nocov
      }
    } else {
      if (formula[2] == "+") {
        df[, newcol] <- df[, col1, with = FALSE] +
          df[, col2, with = FALSE]
        cols <- c(cols, newcol)
      } else if (formula[2] == "*") {
        df[, newcol] <- df[, col1, with = FALSE] *
          df[, col2, with = FALSE]
        cols <- c(cols, newcol)
      } else {
        stop(paste("Error: Incorrect operation of", formula[2]))
      }
    }
  }
  list("df" = df, "cols" = cols)
}

#' Defines the likelihood ratio test
#'
#' \code{Likelihood_Ratio_Test} uses two models and calculates the ratio
#'
#' @inheritParams R_template
#'
#' @return returns the score statistic
#' @export
#' @examples
#' library(data.table)
#' # In an actual example, one would run two seperate RunCoxRegression regressions,
#' #    assigning the results to e0 and e1
#' e0 <- list("name" = "First Model", "LogLik" = -120)
#' e1 <- list("name" = "New Model", "LogLik" = -100)
#' score <- Likelihood_Ratio_Test(e1, e0)
#'
Likelihood_Ratio_Test <- function(alternative_model, null_model) {
  if (("LogLik" %in% names(alternative_model)) && ("LogLik" %in% names(null_model))) {
    return(2 * (unlist(alternative_model["LogLik"], use.names = FALSE) - unlist(null_model["LogLik"], use.names = FALSE)))
  }
  stop("Error: models input did not contain LogLik values")
  return(NULL) # nocov
}

#' checks for duplicated column names
#'
#' \code{Check_Dupe_Columns} checks for duplicated columns, columns with the same values, and columns with single value. Currently not updated for multi-terms
#'
#' @inheritParams R_template
#' @family Data Cleaning Functions
#' @return returns the usable columns
#' @export
#' @examples
#' library(data.table)
#' a <- c(0, 1, 2, 3, 4, 5, 6)
#' b <- c(1, 2, 3, 4, 5, 6, 7)
#' c <- c(0, 1, 2, 1, 0, 1, 0)
#' df <- data.table::data.table("a" = a, "b" = b, "c" = c)
#' cols <- c("a", "b", "c")
#' term_n <- c(0, 0, 1)
#' unique_cols <- Check_Dupe_Columns(df, cols, term_n)
#'
Check_Dupe_Columns <- function(df, cols, term_n, verbose = 0, factor_check = FALSE) {
  tryCatch(
    {
      df <- setDT(df)
    },
    error = function(e) {
      df <- data.table(df)
    }
  )
  verbose <- Check_Verbose(verbose)
  if (length(cols) > 1) {
    features_pair <- combn(cols, 2, simplify = FALSE) # list all column pairs
    terms_pair <- combn(term_n, 2, simplify = FALSE) # list all term pairs
    toRemove <- c() # init a vector to store duplicates
    for (pair_n in seq_len(length(features_pair))) {
      # put the pairs for testing into temp objects
      pair <- unlist(features_pair[pair_n])
      term <- unlist(terms_pair[pair_n])
      f1 <- pair[1]
      f2 <- pair[2]
      if (!(f1 %in% names(df))) {
        stop(paste("Error: ", f1, " not in data.table", sep = "")) # nocov
      }
      if (!(f2 %in% names(df))) {
        stop(paste("Error: ", f2, " not in data.table", sep = "")) # nocov
      }
      t1 <- term[1]
      t2 <- term[2]
      checked_factor <- TRUE
      if (factor_check) { #
        if ((is.numeric(df[[f1]])) && (is.numeric(df[[f2]]))) {
          if (sum(df[[f1]] * df[[f2]]) == 0) {
            checked_factor <- FALSE
          }
        } else if (is.numeric(df[[f1]]) != is.numeric(df[[f2]])) {
          checked_factor <- FALSE # nocov
        }
      }
      if ((t1 == t2) && (checked_factor)) {
        if (!(f1 %in% toRemove) && !(f2 %in% toRemove)) {
          if (all(df[[f1]] == df[[f2]])) { # test for duplicates
            if (verbose >= 2) {
              warning(paste("Warning: ", f1, " and ", f2,
                " are equal",
                sep = ""
              ))
            }
            toRemove <- c(toRemove, f2) # build the list of duplicates
          }
          if (min(df[[f2]]) == max(df[[f2]])) {
            if (min(df[[f2]]) == 0) {
              toRemove <- c(toRemove, f2) # remove zero values
            }
          }
        }
      }
    }
    newcol <- setdiff(cols, toRemove)
    if (length(newcol) == 1) {
      if (min(df[, newcol, with = FALSE]) == max(df[, newcol, with = FALSE])) {
        return(c()) # nocov
      } else {
        return(newcol)
      }
    }
    return(newcol)
  } else if (length(cols) == 1) {
    f1 <- cols[1]
    if (!(f1 %in% names(df))) {
      stop(paste("Error: ", f1, " not in data.table", sep = ""))
    }
    if (min(df[, cols, with = FALSE]) == max(df[, cols, with = FALSE])) {
      if (min(df[, cols, with = FALSE]) == 0) {
        return(c())
      } else {
        return(cols)
      }
    } else {
      return(cols)
    }
  } else {
    return(c())
  }
  return(c()) # nocov
}

#' Applies time duration truncation limits to create columns for Cox model
#'
#' \code{Check_Trunc} creates columns to use for truncation
#'
#' @inheritParams R_template
#' @family Data Cleaning Functions
#' @return returns the updated data and time period columns
#' @export
#' @examples
#' library(data.table)
#' df <- data.table::data.table(
#'   "UserID" = c(112, 114, 213, 214, 115, 116, 117),
#'   "Starting_Age" = c(18, 20, 18, 19, 21, 20, 18),
#'   "Ending_Age" = c(30, 45, 57, 47, 36, 60, 55),
#'   "Cancer_Status" = c(0, 0, 1, 0, 1, 0, 0)
#' )
#' # For the interval case
#' time1 <- "Starting_Age"
#' time2 <- "Ending_Age"
#' ce <- c("%trunc%", "Ending_Age")
#' val <- Check_Trunc(df, ce)
#' df <- val$df
#' ce <- val$ce
#'
Check_Trunc <- function(df, ce, verbose = 0) {
  tryCatch(
    {
      df <- setDT(df)
    },
    error = function(e) {
      df <- data.table(df)
    }
  )
  verbose <- Check_Verbose(verbose)
  if (ce[1] == "%trunc%") {
    if (ce[2] == "%trunc%") {
      stop("Error: Both endpoints are truncated, not acceptable")
    }
    tname <- ce[2]
    tmin <- min(df[, get(tname)]) - 1
    if (!("right_trunc" %in% names(df))) {
      df[, ":="(right_trunc = tmin)]
    }
    ce[1] <- "right_trunc"
  } else if (ce[2] == "%trunc%") {
    tname <- ce[1]
    tmax <- max(df[, get(tname)]) + 1
    if (!("left_trunc" %in% names(df))) {
      df[, ":="(left_trunc = tmax)]
    }
    ce[2] <- "left_trunc"
  }
  return(list("df" = df, "ce" = ce))
}

#' Applies time dependence to parameters
#'
#' \code{gen_time_dep} generates a new dataframe with time dependent covariates by applying a grid in time
#'
#' @inheritParams R_template
#' @family Data Cleaning Functions
#' @return returns the updated dataframe
#' @export
#' @examples
#' library(data.table)
#' # Adapted from the tests
#' a <- c(20, 20, 5, 10, 15)
#' b <- c(1, 2, 1, 1, 2)
#' c <- c(0, 0, 1, 1, 1)
#' df <- data.table::data.table("a" = a, "b" = b, "c" = c)
#' time1 <- "%trunc%"
#' time2 <- "a"
#' event <- "c"
#' control <- list(
#'   "lr" = 0.75, "maxiter" = -1, "halfmax" = 5, "epsilon" = 1e-9,
#'   "deriv_epsilon" = 1e-9, "abs_max" = 1.0,
#'   "dose_abs_max" = 100.0,
#'   "verbose" = FALSE, "ties" = "breslow", "double_step" = 1
#' )
#' grt_f <- function(df, time_col) {
#'   return((df[, "b"] * df[, get(time_col)])[[1]])
#' }
#' func_form <- c("lin")
#' df_new <- gen_time_dep(
#'   df, time1, time2, event, TRUE, 0.01, c("grt"), c(),
#'   c(grt_f), paste("test", "_new.csv", sep = ""), func_form, 2
#' )
#' file.remove("test_new.csv")
#'
gen_time_dep <- function(df, time1, time2, event0, iscox, dt, new_names, dep_cols, func_form, fname, tform, nthreads = as.numeric(detectCores())) {
  tryCatch(
    {
      df <- setDT(df)
    },
    error = function(e) {
      df <- data.table(df)
    }
  )
  dfn <- names(df)
  ce <- c(time1, time2, event0)
  t_check <- Check_Trunc(df, ce)
  df <- t_check$df
  ce <- t_check$ce
  time1 <- ce[1]
  time2 <- ce[2]
  dfn_same <- dfn[!(dfn %in% dep_cols)]
  dfn_dep <- c()
  for (i in seq_len(length(new_names))) {
    name0 <- paste(new_names[i], 0, sep = "_")
    name1 <- paste(new_names[i], 1, sep = "_")
    func <- func_form[i]
    df[, name0] <- lapply(func, function(f) f(df, time1))
    df[, name1] <- lapply(func, function(f) f(df, time2))
    dfn_dep <- c(dfn_dep, name0, name1)
  }
  if (length(new_names) != length(func_form)) {
    stop(paste("Error: new_names vector should be the same size as the list of functions applied",
      sep = ""
    ))
  }
  if (length(new_names) != length(tform)) {
    stop(paste("Error: new_names vector should be the same size as the list of interpolation method used",
      sep = ""
    ))
  }
  for (i in seq_len(length(tform))) {
    temp <- tform[i]
    if (temp != "lin") {
      a <- substr(temp, 1, 5)
      if (a != "step?") {
        stop(paste("Error: Interpolation method not recognized: ",
          temp,
          sep = ""
        ))
      }
    }
  }
  dfn_time <- c(time1, time2)
  dfn_event <- c(event0)
  dfn_same <- dfn_same[!(dfn_same %in% dfn_time)]
  dfn_same <- dfn_same[!(dfn_same %in% dfn_event)]
  dfend <- df[get(event0) == 1, ]
  tu <- sort(unlist(unique(dfend[, time2, with = FALSE]), use.names = FALSE))
  if (iscox) {
    df <- df[get(time2) >= min(tu), ]
    df <- df[get(time1) <= max(tu), ]
  }
  x_time <- as.matrix(df[, dfn_time, with = FALSE])
  x_dep <- as.matrix(df[, dfn_dep, with = FALSE])
  x_same <- as.matrix(df[, dfn_same, with = FALSE])
  x_event <- as.matrix(df[, dfn_event, with = FALSE])
  if (grepl(".csv", fname, fixed = TRUE)) {
    # fine
  } else {
    fname <- paste(fname, ".csv", sep = "_")
  }
  if ((identical(Sys.getenv("TESTTHAT"), "true")) || (identical(Sys.getenv("TESTTHAT_IS_CHECKING"), "true"))) {
    nthreads <- 2
  }
  Write_Time_Dep(
    x_time, x_dep, x_same, x_event, dt, fname,
    tform, tu, iscox, nthreads
  )
  df_new <- data.table::fread(fname,
    data.table = TRUE,
    header = FALSE, nThread = nthreads,
    col.names = c(time1, time2, new_names, dfn_same, event0)
  )
  data.table::setkeyv(df_new, c(event0, time2, time1))
  return(df_new)
}

#' Automates creating a date difference column
#'
#' \code{Date_Shift} generates a new dataframe with a column containing time difference in a given unit
#'
#' @inheritParams R_template
#' @param dcol0 list of starting month, day, and year
#' @param dcol1 list of ending month, day, and year
#' @family Data Cleaning Functions
#' @return returns the updated dataframe
#' @export
#' @examples
#' library(data.table)
#' m0 <- c(1, 1, 2, 2)
#' m1 <- c(2, 2, 3, 3)
#' d0 <- c(1, 2, 3, 4)
#' d1 <- c(6, 7, 8, 9)
#' y0 <- c(1990, 1991, 1997, 1998)
#' y1 <- c(2001, 2003, 2005, 2006)
#' df <- data.table::data.table("m0" = m0, "m1" = m1, "d0" = d0, "d1" = d1, "y0" = y0, "y1" = y1)
#' df <- Date_Shift(df, c("m0", "d0", "y0"), c("m1", "d1", "y1"), "date_since")
#'
Date_Shift <- function(df, dcol0, dcol1, col_name, units = "days") {
  tryCatch(
    {
      df <- setDT(df)
    },
    error = function(e) {
      df <- data.table(df)
    }
  )
  def_cols <- names(df)
  df$dt0 <- paste(df[[match(dcol0[1], names(df))]],
    df[[match(dcol0[2], names(df))]],
    df[[match(dcol0[3], names(df))]],
    sep = "-"
  )
  df$dt1 <- paste(df[[match(dcol1[1], names(df))]],
    df[[match(dcol1[2], names(df))]],
    df[[match(dcol1[3], names(df))]],
    sep = "-"
  )
  # TO NOT ENCOUNTER DAYLIGHT SAVINGS ISSUES, THE UTC TIMEZONE IS USED
  # IF NOT USED THEN RESULTS MAY HAVE TIMES OFF BY 1/24 decimals
  df[, col_name] <- difftime(
    strptime(df$dt1,
      format = "%m-%d-%Y",
      tz = "UTC"
    ),
    strptime(df$dt0, format = "%m-%d-%Y"),
    units = units,
    tz = "UTC"
  )
  def_cols <- c(def_cols, col_name)
  return(df[, def_cols, with = FALSE])
}

#' Automates creating a date since a reference column
#'
#' \code{Time_Since} generates a new dataframe with a column containing time since a reference in a given unit
#'
#' @inheritParams R_template
#' @param dcol0 list of ending month, day, and year
#' @family Data Cleaning Functions
#' @return returns the updated dataframe
#' @export
#' @examples
#' library(data.table)
#' m0 <- c(1, 1, 2, 2)
#' m1 <- c(2, 2, 3, 3)
#' d0 <- c(1, 2, 3, 4)
#' d1 <- c(6, 7, 8, 9)
#' y0 <- c(1990, 1991, 1997, 1998)
#' y1 <- c(2001, 2003, 2005, 2006)
#' df <- data.table::data.table(
#'   "m0" = m0, "m1" = m1,
#'   "d0" = d0, "d1" = d1,
#'   "y0" = y0, "y1" = y1
#' )
#' tref <- strptime("3-22-1997", format = "%m-%d-%Y", tz = "UTC")
#' df <- Time_Since(df, c("m1", "d1", "y1"), tref, "date_since")
#'
Time_Since <- function(df, dcol0, tref, col_name, units = "days") {
  tryCatch(
    {
      df <- setDT(df)
    },
    error = function(e) {
      df <- data.table(df)
    }
  )
  def_cols <- names(df)
  df$dt0 <- paste(df[[match(dcol0[1], names(df))]], df[[match(
    dcol0[2],
    names(df)
  )]], df[[match(dcol0[3], names(df))]], sep = "-")
  df[, col_name] <- lapply(df$dt0, function(x) {
    (difftime(
      strptime(x,
        format = "%m-%d-%Y", tz = "UTC"
      ), tref,
      units = units
    ))
  })
  def_cols <- c(def_cols, col_name)
  return(df[, def_cols, with = FALSE])
}

#' Automates creating data for a joint competing risks analysis
#'
#' \code{Joint_Multiple_Events} generates input for a regression with multiple non-independent events and models
#'
#' @inheritParams R_template
#' @param events vector of event column names
#' @param term_n_list list of vectors for term numbers for event specific or shared model elements, defaults to term 0
#' @param tform_list list of vectors for subterm types for event specific or shared model elements, defaults to loglinear
#' @param keep_constant_list list of vectors for constant elements for event specific or shared model elements, defaults to free (0)
#' @param a_n_list list of vectors for parameter values for event specific or shared model elements, defaults to term 0
#' @param name_list list of vectors for columns for event specific or shared model elements, required
#' @family Data Cleaning Functions
#' @return returns the updated dataframe and model inputs
#' @export
#' @examples
#' library(data.table)
#' a <- c(0, 0, 0, 1, 1, 1)
#' b <- c(1, 1, 1, 2, 2, 2)
#' c <- c(0, 1, 2, 2, 1, 0)
#' d <- c(1, 1, 0, 0, 1, 1)
#' e <- c(0, 1, 1, 1, 0, 0)
#' df <- data.table("t0" = a, "t1" = b, "e0" = c, "e1" = d, "fac" = e)
#' time1 <- "t0"
#' time2 <- "t1"
#' df$pyr <- df$t1 - df$t0
#' pyr <- "pyr"
#' events <- c("e0", "e1")
#' names_e0 <- c("fac")
#' names_e1 <- c("fac")
#' names_shared <- c("t0", "t0")
#' term_n_e0 <- c(0)
#' term_n_e1 <- c(0)
#' term_n_shared <- c(0, 0)
#' tform_e0 <- c("loglin")
#' tform_e1 <- c("loglin")
#' tform_shared <- c("quad_slope", "loglin_top")
#' keep_constant_e0 <- c(0)
#' keep_constant_e1 <- c(0)
#' keep_constant_shared <- c(0, 0)
#' a_n_e0 <- c(-0.1)
#' a_n_e1 <- c(0.1)
#' a_n_shared <- c(0.001, -0.02)
#' name_list <- list("shared" = names_shared, "e0" = names_e0, "e1" = names_e1)
#' term_n_list <- list("shared" = term_n_shared, "e0" = term_n_e0, "e1" = term_n_e1)
#' tform_list <- list("shared" = tform_shared, "e0" = tform_e0, "e1" = tform_e1)
#' keep_constant_list <- list(
#'   "shared" = keep_constant_shared,
#'   "e0" = keep_constant_e0, "e1" = keep_constant_e1
#' )
#' a_n_list <- list("shared" = a_n_shared, "e0" = a_n_e0, "e1" = a_n_e1)
#' val <- Joint_Multiple_Events(
#'   df, events, name_list, term_n_list,
#'   tform_list, keep_constant_list, a_n_list
#' )
#'
Joint_Multiple_Events <- function(df, events, name_list, term_n_list = list(), tform_list = list(), keep_constant_list = list(), a_n_list = list()) {
  tryCatch(
    {
      df <- setDT(df)
    },
    error = function(e) {
      df <- data.table(df)
    }
  )
  # filling missing values
  for (i in names(name_list)) {
    temp0 <- unlist(name_list[i], use.names = FALSE)
    if (i %in% names(term_n_list)) {
      temp1 <- unlist(term_n_list[i], use.names = FALSE)
      if (length(temp0) != length(temp1)) {
        stop(paste("Error: item ", i, " in name_list has ",
          length(temp0),
          " items, but same item in term_n_list has ",
          length(temp1),
          " items. Omit entry in term_n_list to set to default of term 0 or add missing values",
          sep = ""
        ))
      }
    } else {
      temp <- list(rep(0, length(temp0)))
      names(temp) <- i
      term_n_list <- c(term_n_list, temp)
    }
    if (i %in% names(tform_list)) {
      temp1 <- unlist(tform_list[i], use.names = FALSE)
      if (length(temp0) != length(temp1)) {
        stop(paste("Error: item ", i, " in name_list has ",
          length(temp0),
          " items, but same item in tform_list has ",
          length(temp1),
          " items. Omit entry in tform_list to set to default of 'loglin' or add missing values",
          sep = ""
        ))
      }
    } else {
      temp <- list(rep("loglin", length(temp0)))
      names(temp) <- i
      tform_list <- c(tform_list, temp)
    }
    if (i %in% names(keep_constant_list)) {
      temp1 <- unlist(keep_constant_list[i], use.names = FALSE)
      if (length(temp0) != length(temp1)) {
        stop(paste("Error: item ", i, " in name_list has ",
          length(temp0),
          " items, but same item in keep_constant_list has ",
          length(temp1),
          " items. Omit entry in tform_list to set to default of 0 or add missing values",
          sep = ""
        ))
      }
    } else {
      temp <- list(rep(0, length(temp0)))
      names(temp) <- i
      keep_constant_list <- c(keep_constant_list, temp)
    }
    if (i %in% names(a_n_list)) {
      temp1 <- unlist(a_n_list[i], use.names = FALSE)
      if (length(temp0) != length(temp1)) {
        stop(paste("Error: item ", i, " in name_list has ",
          length(temp0),
          " items, but same item in a_n_list has ",
          length(temp1),
          " items. Omit entry in a_n_list to set to default of 0 or add missing values",
          sep = ""
        ))
      }
    } else {
      temp <- list(rep(0, length(temp0)))
      names(temp) <- i
      a_n_list <- c(a_n_list, temp)
    }
  }
  df0 <- data.table()
  for (i in names(df)) {
    if (i %in% events) {
      if (i == events[1]) {
        temp <- c()
        for (j in events) {
          temp <- c(temp, unlist(df[, j, with = FALSE], use.names = FALSE))
        }
        df0[, "events"] <- temp
      }
      temp <- c()
      for (j in events) {
        if (i == j) {
          temp <- c(temp, rep(1, nrow(df)))
        } else {
          temp <- c(temp, rep(0, nrow(df)))
        }
      }
      df0[, i] <- temp
    } else {
      temp <- rep(unlist(df[, i, with = FALSE], use.names = FALSE), length(events))
      df0[, i] <- temp
    }
  }
  names <- c()
  term_n <- c()
  tform <- c()
  keep_constant <- c()
  a_n <- c()
  if ("shared" %in% names(name_list)) {
    names <- c(names, unlist(name_list["shared"], use.names = FALSE))
    term_n <- c(term_n, unlist(term_n_list["shared"], use.names = FALSE))
    tform <- c(tform, unlist(tform_list["shared"], use.names = FALSE))
    keep_constant <- c(keep_constant, unlist(keep_constant_list["shared"],
      use.names = FALSE
    ))
    a_n <- c(a_n, unlist(a_n_list["shared"], use.names = FALSE))
  }
  for (i in events) {
    if (i %in% names(name_list)) {
      interactions <- c()
      new_names <- c()
      for (j in unlist(name_list[i], use.names = FALSE)) {
        interactions <- c(interactions, paste(j, "?*?", i, sep = ""))
        new_names <- c(new_names, paste(j, "_", i, sep = ""))
      }
      vals <- interact_them(df0, interactions, new_names)
      df0 <- vals$df
      new_names <- vals$cols
      names <- c(names, new_names)
      term_n <- c(term_n, unlist(term_n_list[i], use.names = FALSE))
      tform <- c(tform, unlist(tform_list[i], use.names = FALSE))
      keep_constant <- c(keep_constant, unlist(keep_constant_list[i],
        use.names = FALSE
      ))
      a_n <- c(a_n, unlist(a_n_list[i], use.names = FALSE))
    }
  }
  return(list("df" = df0, "names" = names, "term_n" = term_n, "tform" = tform, "keep_constant" = keep_constant, "a_n" = a_n))
}

#' Checks system OS
#'
#' \code{get_os} checks the system OS, part of configuration script
#'
#' @return returns a string representation of OS
get_os <- function() {
  sysinf <- Sys.info()
  if (!is.null(sysinf)) {
    os <- sysinf["sysname"]
    if (os == "Darwin") {
      os <- "osx" # nocov
    }
  } else { ## mystery machine
    os <- .Platform$OS.type # nocov
    if (grepl("^darwin", R.version$os)) { # nocov
      os <- "osx" # nocov
    }
    if (grepl("linux-gnu", R.version$os)) { # nocov
      os <- "linux" # nocov
    }
  }
  tolower(os)
}

#' Checks default c++ compiler
#'
#' \code{gcc_version} Checks default c++ compiler, part of configuration script
#'
#' @return returns a string representation of gcc, clang, or c++ output
gcc_version <- function() {
  out <- tryCatch(run("c++", "-v", stderr_to_stdout = TRUE),
    error = function(cnd) list(stdout = "")
  )
  out0 <- str_match(out$stdout, "gcc version")[1]
  if (!is.na(out0)) {
    out <- "gcc"
  } else {
    out0 <- str_match(out$stdout, "clang version")[1] # nocov
    if (!is.na(out0)) { # nocov
      out <- "clang" # nocov
    } else {
      out <- out$stdout # nocov
    }
  }
  out
}

#' Checks how R was compiled
#'
#' \code{Rcomp_version} Checks how R was compiled, part of configuration script
#'
#' @return returns a string representation of gcc, clang, or R CMD config CC output
Rcomp_version <- function() {
  out <- rcmd("config", "CC")
  out0 <- str_match(out$stdout, "clang")[1]
  if (!is.na(out0)) {
    out <- "clang" # nocov
  } else {
    out0 <- str_match(out$stdout, "gcc")[1]
    if (!is.na(out0)) {
      out <- "gcc"
    } else {
      out <- out$stdout # nocov
    }
  }
  out
}

#' Checks default R c++ compiler
#'
#' \code{Rcpp_version} checks ~/.R/Makevars script for default compilers set, part of configuration script
#'
#' @return returns a string representation of gcc, clang, or head ~/.R/Makevars
Rcpp_version <- function() {
  out <- tryCatch(run("head", "~/.R/Makevars", stderr_to_stdout = TRUE),
    error = function(cnd) list(stdout = "")
  )
  out0 <- str_match(out$stdout, "clang")[1]
  if (!is.na(out0)) {
    out <- "clang" # nocov
  } else {
    out0 <- str_match(out$stdout, "gcc")[1]
    if (!is.na(out0)) {
      out <- "gcc" # nocov
    } else {
      out <- out$stdout # nocov
    }
  }
  out
}

#' Checks OS, compilers, and OMP
#'
#' \code{System_Version} checks OS, default R c++ compiler, and if OMP is enabled
#'
#' @return returns a list of results
#' @export
#' @family Output and Information Functions
System_Version <- function() {
  os <- get_os()
  gcc <- gcc_version()
  Rcomp <- Rcomp_version()
  OMP <- OMP_Check()
  list(
    "Operating System" = os, "Default c++" = gcc, "R Compiler" = Rcomp,
    "OpenMP Enabled" = OMP
  )
}

#' General purpose verbosity check
#'
#' \code{Check_Verbose} checks and assigns verbosity values
#'
#' @inheritParams R_template
#' @family Data Cleaning Functions
#' @return returns correct verbose value
#'
Check_Verbose <- function(verbose) {
  if (verbose %in% c(0, 1, 2, 3, 4)) {
    verbose <- as.integer(verbose)
  } else if (verbose %in% c(TRUE, FALSE)) {
    if (verbose) {
      verbose <- 3 # nocov
    } else {
      verbose <- 0 # nocov
    }
  } else {
    stop("Error: verbosity arguement not valid")
  }
  verbose
}

#' uses a table, list of categories, and list of event summaries to generate person-count tables
#'
#' \code{Event_Count_Gen} generates event-count tables
#'
#' @inheritParams R_template
#' @param table dataframe with every category/event column needed
#' @param categ list with category columns and methods, methods can be either strings or lists of boundaries
#' @param events list of columns to summarize, supports counts and means and renaming the summary column
#'
#' @return returns a grouped table and a list of category boundaries used
#' @family Data Cleaning Functions
#' @export
#' @examples
#' library(data.table)
#' a <- c(0, 1, 2, 3, 4, 5, 6)
#' b <- c(1, 2, 3, 4, 5, 6, 7)
#' c <- c(0, 1, 0, 0, 0, 1, 0)
#' table <- data.table::data.table(
#'   "a" = a,
#'   "b" = b,
#'   "c" = c
#' )
#' categ <- list(
#'   "a" = "0/3/5]7",
#'   "b" = list(
#'     lower = c(-1, 3, 6),
#'     upper = c(3, 6, 10),
#'     name = c("low", "medium", "high")
#'   )
#' )
#' event <- list(
#'   "c" = "count AS cases",
#'   "a" = "mean", "b" = "mean"
#' )
#' e <- Event_Count_Gen(table, categ, event, T)
#'
Event_Count_Gen <- function(table, categ, events, verbose = FALSE) {
  df <- as_tibble(table)
  `%>%` <- dplyr::`%>%`
  #
  categ_cols <- c()
  categ_bounds <- list()
  for (cat in names(categ)) {
    cat_str <- ""
    if (!cat %in% names(table)) {
      stop(paste(cat, " not in table", sep = ""))
    }
    if (length(categ[[cat]]) > 1) { # list of bounds
      temp0 <- categ[[cat]]$lower
      temp1 <- categ[[cat]]$upper
      if ("name" %in% names(categ[[cat]])) { # assign names to the levels
        temp2 <- categ[[cat]]$name
      } else { # number the categories
        temp2 <- seq_len(length(temp0)) # 1:length(temp0)
      }
      num_categ <- length(temp0) # number of categories
      cat_col <- paste(cat, "category", sep = "_") # name of the category
      categ_cols <- c(categ_cols, cat_col) # add to list
      df <- df %>% mutate("{cat_col}" := "Unassigned") # add to tibble
      for (i in 1:num_categ) { # for each category
        L <- as.numeric(temp0[i]) # lower bound
        if (grepl("]", temp1[i], fixed = TRUE)) {
          U <- as.numeric(gsub("]", "", temp1[i])) # assign upper
          a_col_categ <- case_when(df[[cat]] <= U & df[[cat]] >= L & df[[cat_col]] == "Unassigned" ~ as.character(temp2[i]), .default = df[[cat_col]]) # rows within the bounds and unassigned are assigned the name
          cat_str <- paste(cat_str, paste("[", L, ", ", U, "]", sep = ""), sep = " ") # add to list of intervals
        } else {
          U <- as.numeric(temp1[i]) # assign upper
          if (L == U) { # discrete case
            a_col_categ <- case_when(df[[cat]] == U & df[[cat_col]] == "Unassigned" ~ as.character(temp2[i]), .default = df[[cat_col]])
            cat_str <- paste(cat_str, paste("[", L, ", ", U, "]", sep = ""), sep = " ")
          } else { # interval case
            a <- case_when(df[[cat]] < U & df[[cat]] >= L & df[[cat_col]] == "Unassigned" ~ as.character(temp2[i]), .default = df[[cat_col]])
            cat_str <- paste(cat_str, paste("[", L, ", ", U, ")", sep = ""), sep = " ")
          }
        }
        df[[cat_col]] <- a_col_categ # update column
      }
      categ_bounds[[cat_col]] <- cat_str # add to list of intervals
    } else { # string of bounds
      cat_str <- ""
      temp <- categ[[cat]]
      temp <- gsub("/", " / ", temp)
      temp <- gsub("]", " ] ", temp)
      temp <- strsplit(temp, "\\s+")[[1]] # adding and splitting by spaces
      num_categ <- (length(temp) - 1) / 2 # get number of categories
      cat_col <- paste(cat, "category", sep = "_")
      categ_cols <- c(categ_cols, cat_col)
      df <- df %>% mutate("{cat_col}" := "Unassigned") # add to tibble
      for (i in 1:num_categ) {
        L <- as.numeric(temp[2 * i - 1]) # get upper and lower bounds
        U <- as.numeric(temp[2 * i + 1])
        if (L == U) { # dicrete value case
          a_col_categ <- case_when(df[[cat]] == U & df[[cat_col]] == "Unassigned" ~ as.character(i), .default = df[[cat_col]])
          cat_str <- paste(cat_str, paste("[", L, ", ", U, "]", sep = ""), sep = " ")
        } else if (temp[2 * i] == "/") { # strict less than upper bound
          a_col_categ <- case_when(df[[cat]] < U & df[[cat]] >= L & df[[cat_col]] == "Unassigned" ~ as.character(i), .default = df[[cat_col]])
          cat_str <- paste(cat_str, paste("[", L, ", ", U, ")", sep = ""), sep = " ")
        } else { # within and including bounds
          a_col_categ <- case_when(df[[cat]] <= U & df[[cat]] >= L & df[[cat_col]] == "Unassigned" ~ as.character(i), .default = df[[cat_col]])
          cat_str <- paste(cat_str, paste("[", L, ", ", U, "]", sep = ""), sep = " ")
        }
        df[[cat_col]] <- a_col_categ # update column
      }
      categ_bounds[[cat_col]] <- cat_str # add interval boundaries
    }
  }
  df_group <- df %>%
    group_by(across(all_of(categ_cols))) %>%
    summarize("COUNT" = n(), .groups = "drop") # group by columns and summarize by counts
  for (evt in names(events)) { # iterate through events
    if (grepl(" AS ", events[[evt]], fixed = TRUE)) { # get method and updated name
      temp <- gsub(" AS ", " ", events[[evt]])
      temp <- strsplit(temp, "\\s+")[[1]]
      col_name <- temp[2]
      method <- temp[1]
    } else {
      col_name <- evt
      temp <- strsplit(events[[evt]], "\\s+")[[1]]
      method <- temp[[length(temp)]]
    }
    if (method == "count") { # summarize by count
      df_temp <- df %>%
        group_by(across(all_of(categ_cols))) %>%
        summarize("{col_name}" := sum(.data[[evt]]), .groups = "drop")
    } else if (method == "mean") { # summarize by mean
      df_temp <- df %>%
        group_by(across(all_of(categ_cols))) %>%
        summarize("{col_name}" := mean(.data[[evt]]), .groups = "drop")
    }
    df_group[[col_name]] <- df_temp[[col_name]] # add to grouped tibble
  }
  # return the grouped list and list of catgegory bounds
  list(df = as.data.table(df_group), bounds = categ_bounds)
}

#' uses a table, list of categories, list of summaries, list of events, and person-year information to generate person-time tables
#'
#' \code{Event_Time_Gen} generates event-time tables
#'
#' @inheritParams R_template
#' @param table dataframe with every category/event column needed
#' @param pyr list with entry and exit lists, containing day/month/year columns in the table
#' @param categ list with category columns and methods, methods can be either strings or lists of boundaries, includes a time category or entry/exit are both required for the pyr list
#' @param summaries list of columns to summarize, supports counts, means, and weighted means by person-year and renaming the summary column
#' @param events list of events or interests, checks if events are within each time interval
#'
#' @return returns a grouped table and a list of category boundaries used
#' @family Data Cleaning Functions
#' @export
#' @examples
#' library(data.table)
#' a <- c(0, 1, 2, 3, 4, 5, 6)
#' b <- c(1, 2, 3, 4, 5, 6, 7)
#' c <- c(0, 1, 0, 0, 0, 1, 0)
#' d <- c(1, 2, 3, 4, 5, 6, 7)
#' e <- c(2, 3, 4, 5, 6, 7, 8)
#' f <- c(
#'   1900, 1900, 1900, 1900,
#'   1900, 1900, 1900
#' )
#' g <- c(1, 2, 3, 4, 5, 6, 7)
#' h <- c(2, 3, 4, 5, 6, 7, 8)
#' i <- c(
#'   1901, 1902, 1903, 1904,
#'   1905, 1906, 1907
#' )
#' table <- data.table::data.table(
#'   "a" = a, "b" = b, "c" = c,
#'   "d" = d, "e" = e, "f" = f,
#'   "g" = g, "h" = h, "i" = i
#' )
#' categ <- list(
#'   "a" = "-1/3/5]7",
#'   "b" = list(
#'     lower = c(-1, 3, 6), upper = c(3, 6, 10),
#'     name = c("low", "medium", "high")
#'   ),
#'   "time AS time" = list(
#'     "day" = c(1, 1, 1, 1, 1),
#'     "month" = c(1, 1, 1, 1, 1),
#'     "year" = c(1899, 1903, 1910)
#'   )
#' )
#' summary <- list(
#'   "c" = "count AS cases",
#'   "a" = "mean",
#'   "b" = "weighted_mean"
#' )
#' events <- list("c")
#' pyr <- list(
#'   entry = list(year = "f", month = "e", day = "d"),
#'   exit = list(year = "i", month = "h", day = "g"),
#'   unit = "years"
#' )
#' e <- Event_Time_Gen(table, pyr, categ, summary, events, T)
#'
Event_Time_Gen <- function(table, pyr, categ, summaries, events, verbose = FALSE) {
  df <- as_tibble(table)
  `%within%` <- lubridate::`%within%`
  `%>%` <- dplyr::`%>%`
  #
  categ_cols <- c()
  categ_bounds <- list()
  for (cat in names(categ)) { # for each category
    cat_str <- ""
    if (grepl(" AS ", cat, fixed = TRUE)) { # get the column and name
      temp <- strsplit(gsub(" AS ", " ", cat), "\\s+")[[1]]
      cat_col <- temp[2]
      cat_df <- temp[1]
    } else {
      cat_df <- cat
      cat_col <- paste(cat, "category", sep = "_")
    }
    if (cat_col %in% names(df)) { # check that the category doesn't already exist in the original dataframe
      stop(paste(cat_col, " already exists, use ' AS ' to rename if needed", sep = ""))
    }
    if (length(categ[[cat]]) > 1) { # boundary as lists
      if ("lower" %in% names(categ[[cat]])) { # lower and upper boundary intervals
        if (!cat_df %in% names(table)) {
          stop(paste(cat_df, " not in table", sep = ""))
        }
        temp0 <- categ[[cat]]$lower
        temp1 <- categ[[cat]]$upper
        if ("name" %in% names(categ[[cat]])) { # check for names for each level
          temp2 <- categ[[cat]]$name
        } else {
          temp2 <- seq_len(length(temp0)) # 1:length(temp0)
        }
        num_categ <- length(temp0)
        categ_cols <- c(categ_cols, cat_col)
        df <- df %>% mutate("{cat_col}" := "Unassigned") # initialize the tibble
        for (i in 1:num_categ) { # for each level
          L <- as.numeric(temp0[i])
          if (grepl("]", temp1[i], fixed = TRUE)) { # check for including the upper limit
            U <- as.numeric(gsub("]", "", temp1[i])) # get upper limit
            a_col_categ <- case_when(df[[cat_df]] <= U & df[[cat_df]] >= L & df[[cat_col]] == "Unassigned" ~ as.character(temp2[i]), .default = df[[cat_col]]) # assign the level to unassigned rows
            cat_str <- paste(cat_str, paste("[", L, ", ", U, "]", sep = ""), sep = " ") # add boundary information to list of intervals
          } else {
            U <- as.numeric(temp1[i]) # get upper limit
            if (L == U) { # discrete case
              a_col_categ <- case_when(df[[cat_df]] == U & df[[cat_col]] == "Unassigned" ~ as.character(temp2[i]), .default = df[[cat_col]])
              cat_str <- paste(cat_str, paste("[", L, ", ", U, "]", sep = ""), sep = " ")
            } else { # interval case
              a_col_categ <- case_when(df[[cat_df]] < U & df[[cat_df]] >= L & df[[cat_col]] == "Unassigned" ~ as.character(temp2[i]), .default = df[[cat_col]])
              cat_str <- paste(cat_str, paste("[", L, ", ", U, ")", sep = ""), sep = " ")
            }
          }
          df[[cat_col]] <- a_col_categ # update tibble
        }
      } else { # calender time scale
        if ("day" %in% names(categ[[cat]])) { # determine the day, month, year data for the category
          day <- categ[[cat]]$day # nocov
          if ("month" %in% names(categ[[cat]])) { # nocov
            month <- categ[[cat]]$month # nocov
          } else { # nocov
            month <- rep(1, length(day)) # nocov
          }
          if ("year" %in% names(categ[[cat]])) { # nocov
            year <- categ[[cat]]$year # nocov
          } else {
            year <- rep(1900, length(day)) # nocov
          }
        } else if ("month" %in% names(categ[[cat]])) { # nocov
          month <- categ[[cat]]$month # nocov
          day <- rep(1, length(month)) # nocov
          if ("year" %in% names(categ[[cat]])) { # nocov
            year <- categ[[cat]]$year # nocov
          } else { # nocov
            year <- rep(1900, length(month)) # nocov
          }
        } else if ("year" %in% names(categ[[cat]])) { # nocov
          year <- categ[[cat]]$year # nocov
          day <- rep(1, length(year)) # nocov
          month <- rep(1, length(month)) # nocov
        } else {
          stop("calender category missing 'day', 'month', and 'year'") # all three are required
        }
        num_categ <- length(year) - 1
        categ_cols <- c(categ_cols, cat_col)
        df <- df %>% mutate("{cat_col}" := "Unassigned") # initialize the tibble
        if ("PYR" %in% names(df)) {
          stop("either multiple time categorizations used or 'PYR' column already exists")
        }
        df <- df %>% mutate("PYR" := 0)
        interval <- "unassigned"
        if ("exit" %in% names(pyr)) { # format the exit as date column
          pyr_exit <- pyr$exit
          if ("year" %in% names(pyr_exit)) { # nocov
            if ("month" %in% names(pyr_exit)) { # nocov
              if ("day" %in% names(pyr_exit)) { # nocov
                exit <- make_date(year = table[[pyr_exit$year]], month = table[[pyr_exit$month]], day = table[[pyr_exit$day]]) # nocov
              } else { # nocov
                exit <- make_date(year = table[[pyr_exit$year]], month = table[[pyr_exit$month]]) # nocov
              }
            } else { # nocov
              if ("day" %in% names(pyr_exit)) { # nocov
                exit <- make_date(year = table[[pyr_exit$year]], day = table[[pyr_exit$day]]) # nocov
              } else { # nocov
                exit <- make_date(year = table[[pyr_exit$year]]) # nocov
              }
            }
          } else {
            if ("month" %in% names(pyr_exit)) { # nocov
              if ("day" %in% names(pyr_exit)) { # nocov
                exit <- make_date(month = table[[pyr_exit$month]], day = table[[pyr_exit$day]]) # nocov
              } else { # nocov
                exit <- make_date(month = table[[pyr_exit$month]]) # nocov
              }
            } else { # nocov
              if ("day" %in% names(pyr_exit)) { # nocov
                exit <- make_date(day = table[[pyr_exit$day]]) # nocov
              } else {
                stop("person-year exit missing day, month, and year")
              }
            }
          }
          if ("entry" %in% names(pyr)) { # format the entry as date column
            pyr_entry <- pyr$entry
            interval <- "interval"
            if ("year" %in% names(pyr_entry)) { # nocov
              if ("month" %in% names(pyr_entry)) { # nocov
                if ("day" %in% names(pyr_entry)) { # nocov
                  entry <- make_date(year = table[[pyr_entry$year]], month = table[[pyr_entry$month]], day = table[[pyr_entry$day]]) # nocov
                } else { # nocov
                  entry <- make_date(year = table[[pyr_entry$year]], month = table[[pyr_entry$month]]) # nocov
                }
              } else { # nocov
                if ("day" %in% names(pyr_entry)) { # nocov
                  entry <- make_date(year = table[[pyr_entry$year]], day = table[[pyr_entry$day]]) # nocov
                } else { # nocov
                  entry <- make_date(year = table[[pyr_entry$year]]) # nocov
                }
              }
            } else { # nocov
              if ("month" %in% names(pyr_entry)) { # nocov
                if ("day" %in% names(pyr_entry)) { # nocov
                  entry <- make_date(month = table[[pyr_entry$month]], day = table[[pyr_entry$day]]) # nocov
                } else { # nocov
                  entry <- make_date(month = table[[pyr_entry$month]]) # nocov
                }
              } else { # nocov
                if ("day" %in% names(pyr_entry)) { # nocov
                  entry <- make_date(day = table[[pyr_entry$day]]) # nocov
                } else {
                  stop("person-year entry missing day, month, and year")
                }
              }
            }
          } else {
            pyr_entry <- list()
            interval <- "left trunc" # if there is an exit and no entry then the data is left truncated
          }
        } else if ("entry" %in% names(pyr)) {
          pyr_entry <- pyr$entry # format the entry as date column
          if ("year" %in% names(pyr_entry)) { # nocov
            if ("month" %in% names(pyr_entry)) { # nocov
              if ("day" %in% names(pyr_entry)) { # nocov
                entry <- make_date(year = table[[pyr_entry$year]], month = table[[pyr_entry$month]], day = table[[pyr_entry$day]]) # nocov
              } else { # nocov
                entry <- make_date(year = table[[pyr_entry$year]], month = table[[pyr_entry$month]]) # nocov
              }
            } else { # nocov
              if ("day" %in% names(pyr_entry)) { # nocov
                entry <- make_date(year = table[[pyr_entry$year]], day = table[[pyr_entry$day]]) # nocov
              } else { # nocov
                entry <- make_date(year = table[[pyr_entry$year]]) # nocov
              }
            }
          } else { # nocov
            if ("month" %in% names(pyr_entry)) { # nocov
              if ("day" %in% names(pyr_entry)) { # nocov
                entry <- make_date(month = table[[pyr_entry$month]], day = table[[pyr_entry$day]]) # nocov
              } else { # nocov
                entry <- make_date(month = table[[pyr_entry$month]]) # nocov
              }
            } else { # nocov
              if ("day" %in% names(pyr_entry)) { # nocov
                entry <- make_date(day = table[[pyr_entry$day]]) # nocov
              } else {
                stop("person-year entry missing day, month, and year")
              }
            }
          }
          pyr_exit <- list()
          interval <- "right trunc" # entry and no exit is right truncated
        } else {
          stop("Date columns given for category, but person-year data not in date format") # pyr does not have exit or entry values
        }
        df_added <- tibble()
        pyr_unit <- "years" # default person-years to years
        if ("unit" %in% names(pyr)) {
          pyr_unit <- pyr$unit
        }
        for (time_i in 1:num_categ) { # for every time interval
          istart <- make_date(year = year[time_i], month = month[time_i], day = day[time_i]) # interval start
          iend <- make_date(year = year[time_i + 1], month = month[time_i + 1], day = day[time_i + 1]) # interval end
          cat_str <- paste(cat_str, paste("[", istart, " to ", iend, "]", sep = ""), sep = " ") # prepare the interval info
          categ_interval <- interval(istart, iend) # define as date interval
          c_categ <- list()
          if (interval == "left trunc") {
            # only exit
            a_categ <- case_when(exit %within% categ_interval & df[[cat_col]] == "Unassigned" ~ as.character(time_i), .default = "0") # exit within interval, set to i
            for (evt in events) {
              c_categ[[evt]] <- case_when(exit %within% categ_interval & df[[cat_col]] == "Unassigned" ~ df[[evt]], .default = 0) # exit within interval set to event column value
            }
            b_categ <- case_when(a_categ == as.character(time_i) ~ as.numeric(as.duration(interval(istart, exit)), pyr_unit), .default = 0) # for every row with exit in interval, track the duration from interval start
            risk_interval <- interval(iend, exit) # interval from interval end to row end
            a_categ <- case_when(as.numeric(as.duration(risk_interval)) > 0 & df[[cat_col]] == "Unassigned" ~ as.character(time_i), .default = a_categ) # track rows which end after the interval, set to i
            b_categ <- case_when(as.numeric(as.duration(risk_interval)) > 0 & df[[cat_col]] == "Unassigned" ~ as.numeric(as.duration(categ_interval), pyr_unit), .default = b_categ) # rows which end after interval are at risk the full interval
          } else if (interval == "right trunc") {
            # only entry
            a_categ <- case_when(entry %within% categ_interval & df[[cat_col]] == "Unassigned" ~ as.character(time_i), .default = "0") #  start during the interval, set to i
            for (evt in events) {
              c_categ[[evt]] <- case_when(entry %within% categ_interval & df[[cat_col]] == "Unassigned" ~ df[[evt]], .default = 0) # start during interval, set to event value
            }
            b_categ <- case_when(entry %within% categ_interval & df[[cat_col]] == "Unassigned" ~ as.numeric(as.duration(interval(entry, iend)), pyr_unit), .default = 0) # every row which starts during the interval, tracks duration from entry to interval end
            risk_interval <- interval(entry, istart)
            a_categ <- case_when(as.numeric(as.duration(risk_interval)) > 0 & df[[cat_col]] == "Unassigned" ~ as.character(time_i), .default = a_categ) # tracks which rows start before the interval
            b_categ <- case_when(as.numeric(as.duration(risk_interval)) > 0 & df[[cat_col]] == "Unassigned" ~ as.numeric(as.duration(categ_interval), pyr_unit), .default = b_categ) # rows which start before the interval are at risk the full interval
          } else {
            # both entry and exit
            risk_interval <- interval(entry, exit)
            a_categ <- case_when(istart %within% risk_interval & iend %within% risk_interval & df[[cat_col]] == "Unassigned" ~ as.character(time_i), .default = "0") # interval fully contained, set to i
            a_categ <- case_when(iend %within% risk_interval & df[[cat_col]] == "Unassigned" ~ as.character(time_i), .default = a_categ) # interval ends during row interval, set to i
            a_categ <- case_when(istart %within% risk_interval & df[[cat_col]] == "Unassigned" ~ as.character(time_i), .default = a_categ) # interval starts during row interval, set to i
            a_categ <- case_when(entry %within% categ_interval & exit %within% categ_interval & df[[cat_col]] == "Unassigned" ~ as.character(time_i), .default = a_categ) # row interval fully contained in category interval, set to i
            for (evt in events) {
              c_categ[[evt]] <- case_when(exit %within% categ_interval & df[[cat_col]] == "Unassigned" ~ df[[evt]], .default = 0) # row ends during category interval, set to event value
            }
            b_categ <- case_when(exit %within% categ_interval & df[[cat_col]] == "Unassigned" ~ as.numeric(as.duration(interval(istart, exit)), pyr_unit), .default = 0) # rows which end during the category interval, track category interval start to row end
            b_categ <- case_when(entry %within% categ_interval & df[[cat_col]] == "Unassigned" ~ as.numeric(as.duration(interval(entry, iend)), pyr_unit), .default = b_categ) # rows which enter during the category interval, track entry to category interval end
            b_categ <- case_when(istart %within% risk_interval & iend %within% risk_interval & df[[cat_col]] == "Unassigned" ~ as.numeric(as.duration(categ_interval), pyr_unit), .default = b_categ) # category interval fully in row interval, track full category interval
            b_categ <- case_when(entry %within% categ_interval & exit %within% categ_interval & df[[cat_col]] == "Unassigned" ~ as.numeric(as.duration(risk_interval), pyr_unit), .default = b_categ) # row interval fully in category interval, track full row interval
          }
          index_kept <- seq_len(nrow(df))
          index_kept <- index_kept[a_categ == time_i] # indexes which contain the category interval to some level
          b_categ <- b_categ[a_categ == time_i] # durations for kept indexes
          row_kept <- slice(df, index_kept) # dataframe at kept rows
          row_kept <- row_kept %>% mutate("{cat_col}" := as.character(time_i)) # time category value
          row_kept <- row_kept %>% mutate("PYR" := b_categ) # duration value
          for (evt in events) {
            d_categ <- c_categ[[evt]]
            d_categ <- d_categ[a_categ == time_i]
            row_kept <- row_kept %>% mutate("{evt}" := d_categ) # event values
          }
          df_added <- bind_rows(df_added, row_kept) # new updates dataset
        }
        df <- df_added
      }
      categ_bounds[[cat_col]] <- cat_str # update the list of category boundaries
    } else { # boundary as string, not a time category
      if (!cat_df %in% names(table)) {
        stop(paste(cat_df, " not in table", sep = ""))
      }
      cat_str <- ""
      temp <- categ[[cat]]
      temp <- gsub("/", " / ", temp)
      temp <- gsub("]", " ] ", temp)
      temp <- strsplit(temp, "\\s+")[[1]] # seperate values and delimiters
      num_categ <- (length(temp) - 1) / 2
      categ_cols <- c(categ_cols, cat_col)
      df <- df %>% mutate("{cat_col}" := "Unassigned") # initialize column
      for (time_i in 1:num_categ) {
        L <- as.numeric(temp[2 * time_i - 1])
        U <- as.numeric(temp[2 * time_i + 1]) # get lower and upper bounds
        if (L == U) { # discrete case
          a_categ <- case_when(df[[cat_df]] == U & df[[cat_col]] == "Unassigned" ~ as.character(time_i), .default = df[[cat_col]])
          cat_str <- paste(cat_str, paste("[", L, ", ", U, "]", sep = ""), sep = " ")
        } else if (temp[2 * time_i] == "/") { # strictly below upper bound
          a_categ <- case_when(df[[cat_df]] < U & df[[cat_df]] >= L & df[[cat_col]] == "Unassigned" ~ as.character(time_i), .default = df[[cat_col]])
          cat_str <- paste(cat_str, paste("[", L, ", ", U, ")", sep = ""), sep = " ")
        } else { # including both bounds
          a_categ <- case_when(df[[cat_df]] <= U & df[[cat_df]] >= L & df[[cat_col]] == "Unassigned" ~ as.character(time_i), .default = df[[cat_col]])
          cat_str <- paste(cat_str, paste("[", L, ", ", U, "]", sep = ""), sep = " ")
        }
        df[[cat_col]] <- a_categ # update tibble
      }
      categ_bounds[[cat_col]] <- cat_str
    }
  }
  if ("PYR" %in% names(df)) {
    # good, we need a person-years measure
  } else {
    if ("exit" %in% names(pyr)) { # format the exit as date column
      pyr_exit <- pyr$exit
      if ("year" %in% names(pyr_exit)) { # nocov
        if ("month" %in% names(pyr_exit)) { # nocov
          if ("day" %in% names(pyr_exit)) { # nocov
            exit <- make_date(year = table[[pyr_exit$year]], month = table[[pyr_exit$month]], day = table[[pyr_exit$day]]) # nocov
          } else { # nocov
            exit <- make_date(year = table[[pyr_exit$year]], month = table[[pyr_exit$month]]) # nocov
          }
        } else { # nocov
          if ("day" %in% names(pyr_exit)) { # nocov
            exit <- make_date(year = table[[pyr_exit$year]], day = table[[pyr_exit$day]]) # nocov
          } else { # nocov
            exit <- make_date(year = table[[pyr_exit$year]]) # nocov
          }
        }
      } else {
        if ("month" %in% names(pyr_exit)) { # nocov
          if ("day" %in% names(pyr_exit)) { # nocov
            exit <- make_date(month = table[[pyr_exit$month]], day = table[[pyr_exit$day]]) # nocov
          } else { # nocov
            exit <- make_date(month = table[[pyr_exit$month]]) # nocov
          }
        } else { # nocov
          if ("day" %in% names(pyr_exit)) { # nocov
            exit <- make_date(day = table[[pyr_exit$day]]) # nocov
          } else {
            stop("person-year exit missing day, month, and year")
          }
        }
      }
      if ("entry" %in% names(pyr)) { # format the entry as date column
        pyr_entry <- pyr$entry
        interval <- "interval"
        if ("year" %in% names(pyr_entry)) { # nocov
          if ("month" %in% names(pyr_entry)) { # nocov
            if ("day" %in% names(pyr_entry)) { # nocov
              entry <- make_date(year = table[[pyr_entry$year]], month = table[[pyr_entry$month]], day = table[[pyr_entry$day]]) # nocov
            } else { # nocov
              entry <- make_date(year = table[[pyr_entry$year]], month = table[[pyr_entry$month]]) # nocov
            }
          } else { # nocov
            if ("day" %in% names(pyr_entry)) { # nocov
              entry <- make_date(year = table[[pyr_entry$year]], day = table[[pyr_entry$day]]) # nocov
            } else { # nocov
              entry <- make_date(year = table[[pyr_entry$year]]) # nocov
            }
          }
        } else { # nocov
          if ("month" %in% names(pyr_entry)) { # nocov
            if ("day" %in% names(pyr_entry)) { # nocov
              entry <- make_date(month = table[[pyr_entry$month]], day = table[[pyr_entry$day]]) # nocov
            } else { # nocov
              entry <- make_date(month = table[[pyr_entry$month]]) # nocov
            }
          } else { # nocov
            if ("day" %in% names(pyr_entry)) { # nocov
              entry <- make_date(day = table[[pyr_entry$day]]) # nocov
            } else {
              stop("person-year entry missing day, month, and year")
            }
          }
        }
      } else {
        stop("Both entry and exit needed to calculate person-years, without a time category for reference")
      }
      risk_interval <- interval(entry, exit)
      pyr_unit <- "years" # default person-years to years
      if ("unit" %in% names(pyr)) {
        pyr_unit <- pyr$unit
      }
      df <- df %>% mutate("PYR" := as.numeric(as.duration(risk_interval), pyr_unit))
    } else {
      stop("Both entry and exit needed to calculate person-years, without a time category for reference")
    }
  }
  # check that required columns are not already in use
  if ("PYR" %in% names(summaries)) { # storing person-year durations
    stop("'PYR' listed as a event column, either remove or rename with ' AS '")
  }
  if ("AT_RISK" %in% names(summaries)) { # storing number at risk
    stop("'AT_RISK' listed as a event column, either remove or rename with ' AS '")
  }
  df_group <- df %>%
    group_by(across(all_of(categ_cols))) %>%
    summarize("AT_RISK" = n(), "PYR" = sum(.data[["PYR"]]), .groups = "drop") # group by categories and define the durations and counts
  for (evt in names(summaries)) { # for each event summary
    if (grepl(" AS ", summaries[[evt]], fixed = TRUE)) { # get the method and column name
      temp <- gsub(" AS ", " ", summaries[[evt]])
      temp <- strsplit(temp, "\\s+")[[1]]
      col_name <- temp[2]
      method <- temp[1]
    } else {
      col_name <- evt
      temp <- strsplit(summaries[[evt]], "\\s+")[[1]]
      method <- temp[[length(temp)]]
    }
    if (method == "count") { # sum of event across each category combination
      df_temp <- df %>%
        group_by(across(all_of(categ_cols))) %>%
        summarize("{col_name}" := sum(.data[[evt]]), .groups = "drop")
    } else if (method == "mean") { # mean value of event across each category combination
      df_temp <- df %>%
        group_by(across(all_of(categ_cols))) %>%
        summarize("{col_name}" := mean(.data[[evt]]), .groups = "drop")
    } else if (method == "weighted_mean") { # mean value weighted by person-years
      df_temp <- df %>%
        group_by(across(all_of(categ_cols))) %>%
        summarize("{col_name}" := weighted.mean(.data[[evt]], .data[["PYR"]]), .groups = "drop")
    }
    df_group[[col_name]] <- df_temp[[col_name]]
  }
  #
  list(df = as.data.table(df_group), bounds = categ_bounds)
}

#' Converts a string equation to regression model inputs
#'
#' \code{Convert_Model_Eq} Converts a string expression of a risk model into the vectors used by different Colossus regression functions
#'
#' @inheritParams R_template
#'
#' @return returns a list of regression inputs
#' @export
#' @family Data Cleaning Functions
#' @examples
#' library(data.table)
#' a <- c(0, 1, 2, 3, 4, 5, 6)
#' b <- c(1, 2, 3, 4, 5, 6, 7)
#' c <- c(0, 1, 0, 0, 0, 1, 0)
#' d <- c(1, 2, 3, 4, 5, 6, 7)
#' e <- c(2, 3, 4, 5, 6, 7, 8)
#' table <- data.table::data.table(
#'   "a" = a, "b" = b, "c" = c,
#'   "d" = d, "e" = e
#' )
#' Model_Eq <- "cox(a,b, c) ~ loglinear(d, factor(e), 0) + multiplicative()"
#' e <- Convert_Model_Eq(Model_Eq, table)
#'
Convert_Model_Eq <- function(Model_Eq, df) {
  tryCatch(
    {
      df <- setDT(df)
    },
    error = function(e) {
      df <- data.table(df)
    }
  )
  # values to assign to
  term_n <- c()
  tform <- c()
  names <- c()
  modelform <- "NONE"
  surv_model_type <- "NONE"
  tstart <- "NONE"
  tend <- "NONE"
  pyr <- "NONE"
  event <- "NONE"
  strata <- "NONE"
  # split into the survival model type and the rest of the model terms
  Model_Eq <- gsub(" ", "", Model_Eq)
  first_split <- lapply(strsplit(Model_Eq, ""), function(x) which(x == "~"))[[1]]
  if (length(first_split) == 0) {
    stop("Error: The model equation did not contain ~")
  }
  surv_obj <- substr(Model_Eq, 1, first_split - 1)
  model_obj <- substr(Model_Eq, first_split + 1, nchar(Model_Eq))
  # split the survival model type
  second_split <- lapply(strsplit(surv_obj, ""), function(x) which(x == "("))[[1]]
  if (length(second_split) == 0) {
    stop("Error: The left hand side did not contain (")
  }
  surv_type <- tolower(substr(surv_obj, 1, second_split - 1))
  surv_paras <- substr(surv_obj, second_split + 1, nchar(surv_obj) - 1)
  surv_paras <- strsplit(surv_paras, ",")[[1]]
  # assign survival model values
  surv_model_type <- surv_type
  if (surv_type == "cox") {
    if (length(surv_paras) == 2) {
      # tend and event
      tend <- surv_paras[1]
      event <- surv_paras[2]
    } else if (length(surv_paras) == 3) {
      # tstart, tend, and event
      tstart <- surv_paras[1]
      tend <- surv_paras[2]
      event <- surv_paras[3]
    } else {
      stop("Error: Invalid number of parameters for cox model")
    }
  } else if (surv_type == "cox_strata") {
    if (length(surv_paras) == 3) {
      # tend and event, then strata
      tend <- surv_paras[1]
      event <- surv_paras[2]
      strata <- surv_paras[3]
    } else if (length(surv_paras) == 4) {
      # tstart, tend, and event, then strata
      tstart <- surv_paras[1]
      tend <- surv_paras[2]
      event <- surv_paras[3]
      strata <- surv_paras[4]
    } else {
      stop("Error: Invalid number of parameters for cox_strata model")
    }
  } else if (surv_type == "poisson") {
    if (length(surv_paras) == 2) {
      pyr <- surv_paras[1]
      event <- surv_paras[2]
    } else {
      stop("Error: Invalid number of parameters for poisson model")
    }
  } else if (surv_type == "poisson_strata") {
    if (length(surv_paras) > 2) {
      pyr <- surv_paras[1]
      event <- surv_paras[2]
      strata <- surv_paras[3:length(surv_paras)]
    } else {
      stop("Error: Invalid number of parameters for poisson model with stratification")
    }
  } else {
    stop("Error: Invalid survival model type for poisson")
  }
  #
  right_model_terms <- strsplit(model_obj, "\\+")[[1]]
  for (term_i in seq_along(right_model_terms)) {
    # seperate the term type or model-formula from parameters
    third_split <- lapply(strsplit(right_model_terms[term_i], ""), function(x) which(x == "("))[[1]]
    if (length(third_split) == 0) {
      stop(paste('Error: right hand side element "', right_model_terms[term_i], '" did not contain (', sep = ""))
    }
    model_type <- tolower(substr(right_model_terms[term_i], 1, third_split - 1))
    model_type <- gsub("_", "-", model_type)
    tform_acceptable <- c(
      "plin", "lin", "loglin", "loglin-dose", "lin-dose",
      "lin-quad-dose", "lin-exp-dose", "plinear", "product-linear", "linear",
      "loglinear", "log-linear", "loglinear-dose", "log-linear-dose", "linear-dose", "linear-piecewise",
      "quadratic", "quad", "quad-dose", "quadratic-dose",
      "step-dose", "step-piecewise",
      "linear-quadratic-dose", "linear-quadratic-piecewise",
      "linear-exponential-dose", "linear-exponential-piecewise"
    )
    modelform_acceptable <- c(
      "multiplicative", "multiplicative-excess", "additive", "product-additive",
      "product-additive-excess", "a", "pa", "pae", "m", "me",
      "gmix", "geometric-mixture", "gmix-r", "relative-geometric-mixture",
      "gmix-e", "excess-geometric-mixture"
    )
    if (model_type %in% tform_acceptable) {
      model_paras <- substr(right_model_terms[term_i], third_split + 1, nchar(right_model_terms[term_i]) - 1)
      model_paras <- strsplit(model_paras, ",")[[1]]
      last_entry <- model_paras[length(model_paras)]
      if (is.na(suppressWarnings(as.integer(last_entry)))) {
        # the last element isn't an integer
        term_num <- 0
      } else {
        model_paras <- model_paras[1:(length(model_paras) - 1)]
        term_num <- as.integer(last_entry)
      }
      for (subterm_i in seq_along(model_paras)) {
        # add element
        if (substr(model_paras[subterm_i], 1, 7) == "factor(") {
          # baseline is set by using factor(column;baseline=level)
          col_name <- substr(model_paras[subterm_i], 8, nchar(model_paras[subterm_i]) - 1)
          base_split <- lapply(strsplit(col_name, ""), function(x) which(x == ";"))[[1]]
          if (length(base_split) == 1) {
            col_names <- substr(col_name, 1, base_split - 1)
            base_line <- substr(col_name, base_split + 1 + 9, nchar(col_name))
            base_level <- paste(col_names, base_line, sep = "_")
            #
            val <- factorize(df, col_names)
            df <- val$df
            col_name <- val$cols
            if (base_level %in% names(df)) {
              df <- df[, (base_level) := NULL]
              col_name <- col_name[!col_name == base_level]
            } else {
              warning(paste("Warning: Baseline level ", base_level, " not found.", sep = ""))
            }
          } else {
            val <- factorize(df, col_name)
            df <- val$df
            col_name <- val$cols
          }
        } else {
          col_name <- c(model_paras[subterm_i])
        }
        # convert subterm formula
        model_terms <- c(model_type)
        if (model_type %in% c("plin", "plinear", "product-linear")) {
          model_terms <- c("plin")
        } else if (model_type %in% c("lin", "linear")) {
          model_terms <- c("lin")
        } else if (model_type %in% c("loglin", "loglinear", "log-linear")) {
          model_terms <- c("loglin")
        } else if (model_type %in% c("loglin-dose", "loglinear-dose", "log-linear-dose")) {
          model_terms <- c("loglin_slope", "loglin_top")
        } else if (model_type %in% c("lin-dose", "linear-dose", "linear-piecewise")) {
          model_terms <- c("lin_slope", "lin_int")
        } else if (model_type %in% c("quadratic", "quad", "quad-dose", "quadratic-dose")) {
          model_terms <- c("quad_slope")
        } else if (model_type %in% c("step-dose", "step-piecewise")) {
          model_terms <- c("step_slope", "step_int")
        } else if (model_type %in% c("lin-quad-dose", "linear-quadratic-dose", "linear-quadratic-piecewise")) {
          model_terms <- c("lin_quad_slope", "lin_quad_int")
        } else if (model_type %in% c("lin-exp-dose", "linear-exponential-dose", "linear-exponential-piecewise")) {
          model_terms <- c("lin_exp_slope", "lin_exp_int", "lin_exp_exp_slope")
        } else {
          stop(paste("Error: Unknown subterm type used, ", model_type, sep = ""))
        }
        for (col in col_name) {
          for (model_term in model_terms) {
            names <- c(names, col)
            tform <- c(tform, model_term)
            term_n <- c(term_n, term_num)
          }
        }
      }
    } else if (model_type %in% modelform_acceptable) {
      if (model_type %in% c("m", "me", "multiplicative", "multiplicative-excess")) {
        model_type <- "M"
      } else if (model_type %in% c("a", "additive")) {
        model_type <- "A"
      } else if (model_type %in% c("pa", "product-additive")) {
        model_type <- "PA"
      } else if (model_type %in% c("pae", "product-additive-excess")) {
        model_type <- "PAE"
      } else if (model_type %in% c("gmix", "geometric-mixture")) {
        model_type <- "GMIX"
      } else if (model_type %in% c("gmix-r", "relative-geometric-mixture")) {
        model_type <- "GMIX-R"
      } else if (model_type %in% c("gmix-e", "excess-geometric-mixture")) {
        model_type <- "GMIX-E"
      }
      if (modelform == "NONE") {
        modelform <- model_type
      } else {
        stop("Error: modelform defined twice")
      }
    } else {
      stop(paste("Error: Unknown option encountered, ", model_type, sep = ""))
    }
  }
  if (modelform == "NONE") {
    modelform <- "M"
  }
  return(list(
    "term_n" = term_n, "tform" = tform, "names" = names, "modelform" = modelform,
    "survival_model_type" = surv_model_type,
    "start_age" = tstart, "end_age" = tend, "person_year" = pyr, "event" = event, "strata" = strata,
    "data" = df
  ))
}

#' Prints a regression output clearly
#'
#' \code{Interpret_Output} uses the list output from a regression, prints off a table of results and summarizes the score and convergence.
#'
#' @inheritParams R_template
#'
#' @return return nothing, prints the results to console
#' @export
#' @family Output and Information Functions
#' @examples
#' library(data.table)
#' ## basic example code reproduced from the starting-description vignette
#' df <- data.table::data.table(
#'   "UserID" = c(112, 114, 213, 214, 115, 116, 117),
#'   "Starting_Age" = c(18, 20, 18, 19, 21, 20, 18),
#'   "Ending_Age" = c(30, 45, 57, 47, 36, 60, 55),
#'   "Cancer_Status" = c(0, 0, 1, 0, 1, 0, 0),
#'   "a" = c(0, 1, 1, 0, 1, 0, 1),
#'   "b" = c(1, 1.1, 2.1, 2, 0.1, 1, 0.2),
#'   "c" = c(10, 11, 10, 11, 12, 9, 11),
#'   "d" = c(0, 0, 0, 1, 1, 1, 1),
#'   "e" = c(0, 0, 1, 0, 0, 0, 1)
#' )
#' # For the interval case
#' time1 <- "Starting_Age"
#' time2 <- "Ending_Age"
#' event <- "Cancer_Status"
#' names <- c("a", "b", "c", "d")
#' a_n <- list(c(1.1, -0.1, 0.2, 0.5), c(1.6, -0.12, 0.3, 0.4))
#' # used to test at a specific point
#' term_n <- c(0, 1, 1, 2)
#' tform <- c("loglin", "lin", "lin", "plin")
#' modelform <- "M"
#' keep_constant <- c(0, 0, 0, 0)
#' control <- list(
#'   "ncores" = 2, "lr" = 0.75, "maxiters" = c(5, 5, 5),
#'   "halfmax" = 5, "epsilon" = 1e-3, "deriv_epsilon" = 1e-3,
#'   "abs_max" = 1.0, "dose_abs_max" = 100.0,
#'   "verbose" = FALSE,
#'   "ties" = "breslow", "double_step" = 1, "guesses" = 2
#' )
#' e <- RunCoxRegression_Omnibus(df, time1, time2, event,
#'   names, term_n, tform, keep_constant,
#'   a_n, modelform, control,
#'   model_control = list(
#'     "single" = FALSE,
#'     "basic" = FALSE, "cr" = FALSE, "null" = FALSE
#'   )
#' )
#' Interpret_Output(e)
#'
Interpret_Output <- function(out_list, digits = 2) {
  tryCatch(
    {
      df <- setDT(df)
    },
    error = function(e) {
      df <- data.table(df)
    }
  )
  # make sure the output isn't an error
  passed <- out_list$Status
  message("|-------------------------------------------------------------------|")
  if (!is.na(passed)) {
    if ("Likelihood_Goal" %in% names(out_list)) {
      # likelihood boundary output
      limits <- out_list$Parameter_Limits
      neg <- out_list$Negative_Risk_Limit_Hit
      conv <- out_list$Limit_Converged
      lik_bound <- out_list$Likelihood_Boundary
      lik_goal <- out_list$Likelihood_Goal
      message("Likelihood Boundary Results")
      if (neg[1]) {
        message("Lower limit was not found")
      } else {
        if (conv[1]) {
          message(paste("Lower limit converged to at ", round(limits[1], digits), " at a score of ", round(lik_bound[1], digits), " with of goal of ", round(lik_goal, digits), sep = ""))
        } else {
          message(paste("Lower limit reached ", round(limits[1], digits), " at a score of ", round(lik_bound[1], digits), " with of goal of ", round(lik_goal, digits), " but did not converge", sep = ""))
        }
      }
      if (neg[2]) {
        message("Upper limit was not found")
      } else {
        if (conv[2]) {
          message(paste("Upper limit converged to at ", round(limits[2], digits), " at a score of ", round(lik_bound[2], digits), " with of goal of ", round(lik_goal, digits), sep = ""))
        } else {
          message(paste("Upper limit reached ", round(limits[2], digits), " at a score of ", round(lik_bound[2], digits), " with of goal of ", round(lik_goal, digits), " but did not converge", sep = ""))
        }
      }
    } else {
      # Check if its a multidose problem
      if (out_list$Survival_Type == "Cox_Multidose") {
        message("Currently the multiple realization code is not setup for printing results, due to the potentially large number of realizations")
      } else if (out_list$Survival_Type == "CaseControl") {
        # case control output
        # get the model details
        names <- out_list$Parameter_Lists$names
        tforms <- out_list$Parameter_Lists$tforms
        term_n <- out_list$Parameter_Lists$term_n
        beta_0 <- out_list$beta_0
        stdev <- out_list$Standard_Deviation
        pval <- pnorm(-abs(beta_0 / stdev))
        strata_odds <- out_list$StrataOdds
        res_table <- data.table(
          "Covariate" = names,
          "Subterm" = tforms,
          "Term Number" = term_n,
          "Central Estimate" = beta_0,
          "Standard Error" = stdev,
          "1-tail p-value" = pval
        )
        message("Final Results")
        print(res_table)
        deviance <- out_list$Deviance
        iteration <- out_list$Control_List$Iteration
        step_max <- out_list$Control_List$`Maximum Step`
        deriv_max <- out_list$Control_List$`Derivative Limiting`
        converged <- out_list$Converged
        #
        freepara <- out_list$FreeParameters
        freestrata <- out_list$FreeSets
        #
        message("\nMatched Case-Control Model Used")
        message(paste("Deviance: ", round(deviance, digits), sep = ""))
        message(paste(freestrata, " out of ", length(strata_odds), " matched sets used Unconditional Likelihood", sep = ""))
        if (!is.null(converged)) {
          message(paste("Iterations run: ", iteration, "\nmaximum step size: ", formatC(step_max, format = "e", digits = digits), ", maximum first derivative: ", formatC(deriv_max, format = "e", digits = digits), sep = ""))
          if (converged) {
            message("Analysis converged")
          } else {
            message("Analysis did not converge, check convergence criteria or run further")
          }
        }
      } else {
        # get the model details
        names <- out_list$Parameter_Lists$names
        tforms <- out_list$Parameter_Lists$tforms
        term_n <- out_list$Parameter_Lists$term_n
        beta_0 <- out_list$beta_0
        stdev <- out_list$Standard_Deviation
        pval <- 2 * pnorm(-abs(beta_0 / stdev))
        res_table <- data.table(
          "Covariate" = names,
          "Subterm" = tforms,
          "Term Number" = term_n,
          "Central Estimate" = beta_0,
          "Standard Error" = stdev,
          "2-tail p-value" = pval
        )
        message("Final Results")
        print(res_table)
        # get the model results
        LogLik <- out_list$LogLik
        AIC <- out_list$AIC
        BIC <- out_list$BIC
        deviation <- out_list$Deviation
        iteration <- out_list$Control_List$Iteration
        step_max <- out_list$Control_List$`Maximum Step`
        deriv_max <- out_list$Control_List$`Derivative Limiting`
        converged <- out_list$Converged
        if (is.null(deviation)) {
          # cox model
          message("\nCox Model Used")
          message(paste("-2*Log-Likelihood: ", round(-2 * LogLik, digits), ",  AIC: ", round(AIC, digits), sep = ""))
        } else {
          # poisson model
          message("\nPoisson Model Used")
          message(paste("-2*Log-Likelihood: ", round(-2 * LogLik, digits), ",  Deviation: ", round(deviation, digits), ",  AIC: ", round(AIC, digits), ",  BIC: ", round(BIC, digits), sep = ""))
        }
        if (!is.null(converged)) {
          message(paste("Iterations run: ", iteration, "\nmaximum step size: ", formatC(step_max, format = "e", digits = digits), ", maximum first derivative: ", formatC(deriv_max, format = "e", digits = digits), sep = ""))
          if (converged) {
            message("Analysis converged")
          } else {
            message("Analysis did not converge, check convergence criteria or run further")
          }
        }
      }
    }
  } else {
    message(paste("Regression Failed"))
  }
  if ("RunTime" %in% names(out_list)) {
    run_time_sec <- as.numeric(out_list$RunTime, units = "secs")
    run_time_min <- as.numeric(out_list$RunTime, units = "mins")
    run_time_hour <- as.numeric(out_list$RunTime, units = "hours")
    if (run_time_sec < 60) {
      message(paste("Run finished in ", round(run_time_sec, digits), " seconds", sep = ""))
    } else if (run_time_min < 60) {
      message(paste("Run finished in ", round(run_time_min, digits), " minutes", sep = ""))
    } else {
      message(paste("Run finished in ", round(run_time_hour, digits), " hours", sep = ""))
    }
    #    message(paste("Run finished in ", out_list$RunTime))
  }
  message("|-------------------------------------------------------------------|")
}

Try the Colossus package in your browser

Any scripts or data that you put into this service are public.

Colossus documentation built on June 8, 2025, 1:10 p.m.