R/fit_model.R

Defines functions fit_null_cox_model fit_cox_model_from_files

Documented in fit_cox_model_from_files fit_null_cox_model

#' Step 2: Fit Cox Model from Files
#'
#' Implements Step 2 of the CoxMK workflow: fitting a null Cox proportional hazards model
#' by reading phenotype and covariate data from files. This function is designed for 
#' batch processing and large-scale analysis where data is stored in separate files.
#'
#' @param phenotype_file Path to CSV file with columns: IID, time, status
#' @param covariate_file Path to CSV file with columns: IID, covar1, covar2, ...
#' @param output_file Path to RDS file to save the fitted null model (default: temporary directory)
#' @param use_spacox Legacy parameter, kept for compatibility (ignored)
#' @return Invisible path to the output file
#' @export
#' @examples
#' # \donttest{
#' # Prepare example data files
#' pheno_data <- data.frame(
#'   IID = paste0("ID", 1:100),
#'   time = rexp(100, 0.1),
#'   status = rbinom(100, 1, 0.3)
#' )
#' covar_data <- data.frame(
#'   IID = paste0("ID", 1:100),
#'   age = rnorm(100, 50, 10),
#'   sex = rbinom(100, 1, 0.5)
#' )
#' 
#' # Use temporary directory for file operations to comply with CRAN policies
#' temp_dir <- tempdir()
#' pheno_file <- file.path(temp_dir, "phenotype.csv")
#' covar_file <- file.path(temp_dir, "covariates.csv")
#' output_file <- file.path(temp_dir, "null_model.rds")
#' 
#' write.csv(pheno_data, pheno_file, row.names = FALSE)
#' write.csv(covar_data, covar_file, row.names = FALSE)
#' 
#' # Step 2: Fit null Cox model from files
#' fit_cox_model_from_files(
#'   phenotype_file = pheno_file,
#'   covariate_file = covar_file, 
#'   output_file = output_file
#' )
#' 
#' # Load the fitted model for Step 3
#' model_info <- readRDS(output_file)
#' }
fit_cox_model_from_files <- function(phenotype_file, covariate_file, output_file = NULL,
                                   use_spacox = TRUE) {

  #
  if (!file.exists(phenotype_file)) {
    stop("Phenotype file not found: ", phenotype_file)
  }
  
  if (!file.exists(covariate_file)) {
    stop("Covariate file not found: ", covariate_file)
  }
  
  # Set default output file to temporary directory if not specified
  if (is.null(output_file)) {
    output_file <- file.path(tempdir(), "null_model.rds")
    cat("Output file not specified, using temporary directory:", output_file, "\n")
  }
  
  #
  cat("Loading phenotype data from:", phenotype_file, "\n")
  pheno_data <- read.csv(phenotype_file, stringsAsFactors = FALSE)
  
  # Check required columns
  required_cols <- c("IID", "time", "status")
  missing_cols <- setdiff(required_cols, colnames(pheno_data))
  if (length(missing_cols) > 0) {
    stop("Missing required columns in phenotype file: ", paste(missing_cols, collapse = ", "))
  }

  cat("  - Loaded", nrow(pheno_data), "samples\n")
  cat("  - Events:", sum(pheno_data$status), "/", nrow(pheno_data), 
      "(", round(100 * sum(pheno_data$status) / nrow(pheno_data), 1), "%)\n")

  #
  cat("Loading covariate data from:", covariate_file, "\n")
  covar_data <- read.csv(covariate_file, stringsAsFactors = FALSE)

  # Check IID column exists
  if (!"IID" %in% colnames(covar_data)) {
    stop("IID column not found in covariate file")
  }

  cat("  - Loaded", nrow(covar_data), "samples\n")
  cat("  - Covariates:", paste(setdiff(colnames(covar_data), "IID"), collapse = ", "), "\n")

  #
  cat("Merging phenotype and covariate data...\n")
  merged_data <- merge(pheno_data, covar_data, by = "IID", all.x = TRUE)

  # Check for missing data after merge
  n_complete <- sum(complete.cases(merged_data))
  if (n_complete < nrow(merged_data)) {
    cat("  Warning:", nrow(merged_data) - n_complete, "samples have missing covariate data\n")
    merged_data <- merged_data[complete.cases(merged_data), ]
    cat("  Using", nrow(merged_data), "complete samples\n")
  }

  #
  cat("Fitting null Cox model...\n")

  # Prepare formula
  covar_names <- setdiff(colnames(merged_data), c("IID", "time", "status"))
  if (length(covar_names) > 0) {
    formula_str <- paste("Surv(time, status) ~", paste(covar_names, collapse = " + "))
  } else {
    formula_str <- "Surv(time, status) ~ 1"
  }

  cat("  Formula:", formula_str, "\n")
  cat("  Using standard Cox regression...\n")

  # Fit standard Cox model
  null_model <- survival::coxph(as.formula(formula_str), data = merged_data)
  model_type <- "Standard Cox"
  
  #
  cat("Saving fitted model to:", output_file, "\n")
  
  # Add metadata to the model
  model_info <- list(
    model = null_model,
    model_type = model_type,
    formula = formula_str,
    n_samples = nrow(merged_data),
    n_events = sum(merged_data$status),
    n_covariates = length(setdiff(colnames(merged_data), c("IID", "time", "status"))),
    covariate_names = setdiff(colnames(merged_data), c("IID", "time", "status")),
    fit_time = Sys.time(),
    r_version = R.version.string
  )
  
  # Save to RDS file
  saveRDS(model_info, file = output_file)
  
  #
  cat("\n=== Model Fitting Summary ===\n")
  cat("Model type:", model_type, "\n")
  cat("Formula:", formula_str, "\n")
  cat("Samples used:", nrow(merged_data), "\n")
  cat("Events:", sum(merged_data$status), "\n")
  cat("Covariates:", length(model_info$covariate_names), "\n")
  if (length(model_info$covariate_names) > 0) {
    cat("  -", paste(model_info$covariate_names, collapse = ", "), "\n")
  }
  cat("Output file:", output_file, "\n")
  cat("Fit completed at:", format(Sys.time()), "\n")
  
  # Print model summary if available
  if (model_type == "Standard Cox" && inherits(null_model, "coxph")) {
    cat("\nModel Summary:\n")
    print(summary(null_model))
  }
  
  cat("\nModel fitting completed successfully!\n")
  
  invisible(output_file)
}

# This is an internal function.
#
# @param time Survival times
# @param status Event indicators
# @param covariates Covariate data frame (optional)
# @return Fitted Cox model object (SPACox or coxph)
# @examples
# \donttest{
# # Example with covariates
# data(example_phenotype)
# data(example_covariates)
# 
# null_model <- fit_null_cox_model(
#   time = example_phenotype$time,
#   status = example_phenotype$status,
#   covariates = example_covariates
# )
# 
# # Example without covariates
# null_model <- fit_null_cox_model(
#   time = example_phenotype$time,
#   status = example_phenotype$status
# )
# }

#' Fit Null Cox Model
#' 
#' @param time Numeric vector of survival times
#' @param status Numeric vector of censoring indicators
#' @param covariates Optional covariate data frame
#' @param use_spacox Logical, whether to use SPACox (ignored in CRAN version)
#' @return Fitted Cox model object
#' @export
fit_null_cox_model <- function(time, status, covariates = NULL, use_spacox = TRUE) {
  
  # Input validation
  if (length(time) != length(status)) {
    stop("time and status must have the same length")
  }
  
  if (sum(status) < 2) {
    stop("Need at least 2 events to fit Cox model")
  }
  
  # Prepare data
  if (!is.null(covariates)) {
    if (nrow(covariates) != length(time)) {
      stop("Number of rows in covariates must match length of time/status")
    }
    
    # Create merged data
    model_data <- data.frame(
      time = time,
      status = status,
      covariates
    )
    
    # Remove incomplete cases
    model_data <- model_data[complete.cases(model_data), ]
    
    if (nrow(model_data) < 10) {
      stop("Too few complete cases for model fitting")
    }
    
    covar_names <- colnames(covariates)
    formula_str <- paste("Surv(time, status) ~", paste(covar_names, collapse = " + "))
    
  } else {
    # No covariates
    model_data <- data.frame(time = time, status = status)
    formula_str <- "Surv(time, status) ~ 1"
  }
  
  # Use standard Cox regression only
  if (!requireNamespace("survival", quietly = TRUE)) {
    stop("survival package is required")
  }
  
  null_model <- survival::coxph(as.formula(formula_str), data = model_data)
  
  # Add metadata
  attr(null_model, "model_type") <- "Standard Cox"
  attr(null_model, "formula") <- formula_str
  attr(null_model, "n_samples") <- nrow(model_data)
  attr(null_model, "n_events") <- sum(model_data$status)
  
  return(null_model)
}

Try the CoxMK package in your browser

Any scripts or data that you put into this service are public.

CoxMK documentation built on Sept. 9, 2025, 5:24 p.m.