Nothing
#' Identify Recurrent Copy Number Variations by Risk Group
#'
#' Filters a CNV data file for samples belonging to a specified risk group
#' and identifies genomic regions that recur across multiple samples above
#' a given threshold. Results are saved as a CSV file.
#'
#' @param x A named list of sample ID vectors, as returned by
#' \code{\link{classify_risk}}. Each element name corresponds to a risk
#' group label (e.g., \code{"low_risk"}, \code{"intermediate_risk"},
#' \code{"high_risk"}).
#' @param risk_level Character. The risk group to analyse. Must be a name
#' present in \code{x}.
#' @param cnv_data_file Character. Path to the CNV data file
#' (whitespace-delimited, with a header). Must contain columns:
#' \code{Sample}, \code{Chromosome}, \code{Start}, \code{End},
#' \code{Num_Probes}, \code{Segment_Mean}.
#' @param threshold Numeric. Minimum number of samples a CNV region must
#' appear in to be considered recurrent. Default is \code{2}.
#'
#' @return Character. The file path of the saved CSV file containing the
#' recurrent CNV regions for the specified risk group.
#'
#' @details
#' Sample IDs in the CNV file are trimmed to 12 characters and hyphens are
#' replaced with dots to match standard TCGA-style identifiers. The output
#' CSV is saved inside a timestamped subdirectory under \code{recurrent_cnv/}
#' in the temporary directory. This function is cancer-type agnostic.
#'
#' @examples
#' sample_file <- system.file("extdata", "sample_data.csv", package = "RiskyCNV")
#' cnv_file <- system.file("extdata", "cnv_data.txt", package = "RiskyCNV")
#' risk_result <- classify_risk(
#' file_path = sample_file,
#' column_name = "gleason_score",
#' disease_type = "prostate",
#' output_dir = tempdir()
#' )
#' output_path <- recurrent(
#' x = risk_result,
#' risk_level = "low_risk",
#' cnv_data_file = cnv_file,
#' threshold = 2
#' )
#' print(output_path)
#'
#' @export
recurrent <- function(x, risk_level, cnv_data_file, threshold = 2) {
cnv_data <- utils::read.table(cnv_data_file, header = TRUE)
cnv_data$Sample <- gsub("-", ".", substr(cnv_data$Sample, 1, 12))
matching_samples <- cnv_data$Sample %in% substr(x[[risk_level]], 1, 12)
sample_cnv_data <- cnv_data[matching_samples, ]
if (nrow(sample_cnv_data) == 0) {
stop("No matching samples found in CNV data for risk level: ", risk_level)
}
cnv_counts <- table(paste(sample_cnv_data$Chromosome,
sample_cnv_data$Start,
sample_cnv_data$End))
recurrent_cnvs <- names(cnv_counts)[cnv_counts >= threshold]
recurrent_df <- sample_cnv_data[
paste(sample_cnv_data$Chromosome,
sample_cnv_data$Start,
sample_cnv_data$End) %in% recurrent_cnvs,
c("Sample", "Chromosome", "Start", "End", "Num_Probes", "Segment_Mean")
]
timestamp <- format(Sys.time(), "%Y%m%d_%H%M%S")
output_dir <- file.path(tempdir(), "recurrent_cnv", timestamp)
if (!file.exists(output_dir)) dir.create(output_dir, recursive = TRUE)
output_file <- file.path(output_dir,
paste0("recurrent_cnvs_", risk_level, ".csv"))
utils::write.csv(recurrent_df, file = output_file, row.names = FALSE)
message("Recurrent CNVs for ", risk_level, " saved to: ", output_file)
return(output_file)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.