Nothing
#' Step 2: Fit Cox Model from Files
#'
#' Implements Step 2 of the CoxMK workflow: fitting a null Cox proportional hazards model
#' by reading phenotype and covariate data from files. This function is designed for
#' batch processing and large-scale analysis where data is stored in separate files.
#'
#' @param phenotype_file Path to CSV file with columns: IID, time, status
#' @param covariate_file Path to CSV file with columns: IID, covar1, covar2, ...
#' @param output_file Path to RDS file to save the fitted null model (default: temporary directory)
#' @param use_spacox Legacy parameter, kept for compatibility (ignored)
#' @return Invisible path to the output file
#' @export
#' @examples
#' # \donttest{
#' # Prepare example data files
#' pheno_data <- data.frame(
#' IID = paste0("ID", 1:100),
#' time = rexp(100, 0.1),
#' status = rbinom(100, 1, 0.3)
#' )
#' covar_data <- data.frame(
#' IID = paste0("ID", 1:100),
#' age = rnorm(100, 50, 10),
#' sex = rbinom(100, 1, 0.5)
#' )
#'
#' # Use temporary directory for file operations to comply with CRAN policies
#' temp_dir <- tempdir()
#' pheno_file <- file.path(temp_dir, "phenotype.csv")
#' covar_file <- file.path(temp_dir, "covariates.csv")
#' output_file <- file.path(temp_dir, "null_model.rds")
#'
#' write.csv(pheno_data, pheno_file, row.names = FALSE)
#' write.csv(covar_data, covar_file, row.names = FALSE)
#'
#' # Step 2: Fit null Cox model from files
#' fit_cox_model_from_files(
#' phenotype_file = pheno_file,
#' covariate_file = covar_file,
#' output_file = output_file
#' )
#'
#' # Load the fitted model for Step 3
#' model_info <- readRDS(output_file)
#' }
fit_cox_model_from_files <- function(phenotype_file, covariate_file, output_file = NULL,
use_spacox = TRUE) {
#
if (!file.exists(phenotype_file)) {
stop("Phenotype file not found: ", phenotype_file)
}
if (!file.exists(covariate_file)) {
stop("Covariate file not found: ", covariate_file)
}
# Set default output file to temporary directory if not specified
if (is.null(output_file)) {
output_file <- file.path(tempdir(), "null_model.rds")
cat("Output file not specified, using temporary directory:", output_file, "\n")
}
#
cat("Loading phenotype data from:", phenotype_file, "\n")
pheno_data <- read.csv(phenotype_file, stringsAsFactors = FALSE)
# Check required columns
required_cols <- c("IID", "time", "status")
missing_cols <- setdiff(required_cols, colnames(pheno_data))
if (length(missing_cols) > 0) {
stop("Missing required columns in phenotype file: ", paste(missing_cols, collapse = ", "))
}
cat(" - Loaded", nrow(pheno_data), "samples\n")
cat(" - Events:", sum(pheno_data$status), "/", nrow(pheno_data),
"(", round(100 * sum(pheno_data$status) / nrow(pheno_data), 1), "%)\n")
#
cat("Loading covariate data from:", covariate_file, "\n")
covar_data <- read.csv(covariate_file, stringsAsFactors = FALSE)
# Check IID column exists
if (!"IID" %in% colnames(covar_data)) {
stop("IID column not found in covariate file")
}
cat(" - Loaded", nrow(covar_data), "samples\n")
cat(" - Covariates:", paste(setdiff(colnames(covar_data), "IID"), collapse = ", "), "\n")
#
cat("Merging phenotype and covariate data...\n")
merged_data <- merge(pheno_data, covar_data, by = "IID", all.x = TRUE)
# Check for missing data after merge
n_complete <- sum(complete.cases(merged_data))
if (n_complete < nrow(merged_data)) {
cat(" Warning:", nrow(merged_data) - n_complete, "samples have missing covariate data\n")
merged_data <- merged_data[complete.cases(merged_data), ]
cat(" Using", nrow(merged_data), "complete samples\n")
}
#
cat("Fitting null Cox model...\n")
# Prepare formula
covar_names <- setdiff(colnames(merged_data), c("IID", "time", "status"))
if (length(covar_names) > 0) {
formula_str <- paste("Surv(time, status) ~", paste(covar_names, collapse = " + "))
} else {
formula_str <- "Surv(time, status) ~ 1"
}
cat(" Formula:", formula_str, "\n")
cat(" Using standard Cox regression...\n")
# Fit standard Cox model
null_model <- survival::coxph(as.formula(formula_str), data = merged_data)
model_type <- "Standard Cox"
#
cat("Saving fitted model to:", output_file, "\n")
# Add metadata to the model
model_info <- list(
model = null_model,
model_type = model_type,
formula = formula_str,
n_samples = nrow(merged_data),
n_events = sum(merged_data$status),
n_covariates = length(setdiff(colnames(merged_data), c("IID", "time", "status"))),
covariate_names = setdiff(colnames(merged_data), c("IID", "time", "status")),
fit_time = Sys.time(),
r_version = R.version.string
)
# Save to RDS file
saveRDS(model_info, file = output_file)
#
cat("\n=== Model Fitting Summary ===\n")
cat("Model type:", model_type, "\n")
cat("Formula:", formula_str, "\n")
cat("Samples used:", nrow(merged_data), "\n")
cat("Events:", sum(merged_data$status), "\n")
cat("Covariates:", length(model_info$covariate_names), "\n")
if (length(model_info$covariate_names) > 0) {
cat(" -", paste(model_info$covariate_names, collapse = ", "), "\n")
}
cat("Output file:", output_file, "\n")
cat("Fit completed at:", format(Sys.time()), "\n")
# Print model summary if available
if (model_type == "Standard Cox" && inherits(null_model, "coxph")) {
cat("\nModel Summary:\n")
print(summary(null_model))
}
cat("\nModel fitting completed successfully!\n")
invisible(output_file)
}
# This is an internal function.
#
# @param time Survival times
# @param status Event indicators
# @param covariates Covariate data frame (optional)
# @return Fitted Cox model object (SPACox or coxph)
# @examples
# \donttest{
# # Example with covariates
# data(example_phenotype)
# data(example_covariates)
#
# null_model <- fit_null_cox_model(
# time = example_phenotype$time,
# status = example_phenotype$status,
# covariates = example_covariates
# )
#
# # Example without covariates
# null_model <- fit_null_cox_model(
# time = example_phenotype$time,
# status = example_phenotype$status
# )
# }
#' Fit Null Cox Model
#'
#' @param time Numeric vector of survival times
#' @param status Numeric vector of censoring indicators
#' @param covariates Optional covariate data frame
#' @param use_spacox Logical, whether to use SPACox (ignored in CRAN version)
#' @return Fitted Cox model object
#' @export
fit_null_cox_model <- function(time, status, covariates = NULL, use_spacox = TRUE) {
# Input validation
if (length(time) != length(status)) {
stop("time and status must have the same length")
}
if (sum(status) < 2) {
stop("Need at least 2 events to fit Cox model")
}
# Prepare data
if (!is.null(covariates)) {
if (nrow(covariates) != length(time)) {
stop("Number of rows in covariates must match length of time/status")
}
# Create merged data
model_data <- data.frame(
time = time,
status = status,
covariates
)
# Remove incomplete cases
model_data <- model_data[complete.cases(model_data), ]
if (nrow(model_data) < 10) {
stop("Too few complete cases for model fitting")
}
covar_names <- colnames(covariates)
formula_str <- paste("Surv(time, status) ~", paste(covar_names, collapse = " + "))
} else {
# No covariates
model_data <- data.frame(time = time, status = status)
formula_str <- "Surv(time, status) ~ 1"
}
# Use standard Cox regression only
if (!requireNamespace("survival", quietly = TRUE)) {
stop("survival package is required")
}
null_model <- survival::coxph(as.formula(formula_str), data = model_data)
# Add metadata
attr(null_model, "model_type") <- "Standard Cox"
attr(null_model, "formula") <- formula_str
attr(null_model, "n_samples") <- nrow(model_data)
attr(null_model, "n_events") <- sum(model_data$status)
return(null_model)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.