Nothing
#' Correlate CNV Profiles with Gene Expression Data
#'
#' Computes Pearson correlations between CNV segment means and RNA
#' expression values for each gene present in both datasets. RNA data is
#' log2-transformed prior to analysis. Three result files are written:
#' all correlations, those with p-value < 0.05, and those with both
#' p-value < 0.05 and correlation coefficient > 0.8.
#'
#' @param cnv_file Character. Path to the CNV matrix CSV file (output of
#' \code{\link{create_CNVMatrix}}). Rows are samples; first column is
#' sample IDs; remaining columns are gene symbols.
#' @param rna_file Character. Path to the RNA expression CSV file. Rows
#' are genes; first column is gene names; remaining columns are sample
#' IDs (trimmed to 12 characters for TCGA-style matching).
#'
#' @return A named list with three data frames:
#' \describe{
#' \item{all_correlations}{All computed Pearson correlations with
#' columns \code{gene}, \code{cor_val}, \code{p.value}.}
#' \item{significant}{Subset where \code{p.value < 0.05}.}
#' \item{high_correlation}{Subset where \code{p.value < 0.05} AND
#' \code{cor_val > 0.8}.}
#' }
#' Results are also written to CSV files in the temporary directory.
#'
#' @details
#' Sample IDs in the RNA file are trimmed to 12 characters to match
#' TCGA-style identifiers. Infinite values from \code{log2(0)} are
#' replaced with 0. Pearson correlation is computed using
#' \code{stats::cor.test} with \code{use = "complete.obs"}. This
#' function is cancer-type agnostic.
#'
#' @references
#' Chin L, et al. (2011). Making sense of cancer genomic data.
#' \emph{Genes Dev}, 25(6):534-555.
#'
#' @examples
#' cnv_file <- system.file("extdata", "cnv_matrix.csv", package = "RiskyCNV")
#' rna_file <- system.file("extdata", "rna_data.csv", package = "RiskyCNV")
#' results <- correlate_with_expr(
#' cnv_file = cnv_file,
#' rna_file = rna_file
#' )
#' head(results$all_correlations)
#'
#' @export
correlate_with_expr <- function(cnv_file, rna_file) {
cnv <- utils::read.csv(cnv_file)
rna <- utils::read.csv(rna_file)
rownames(cnv) <- make.names(cnv[, 1], unique = TRUE)
cnv <- cnv[, -1]
rownames(rna) <- rna[, 1]
rna <- rna[, -1]
colnames(rna) <- substring(colnames(rna), 1, 12)
sample_intersect <- intersect(colnames(rna), rownames(cnv))
genes_intersect <- intersect(rownames(rna), colnames(cnv))
cnv.subset <- cnv[sample_intersect, genes_intersect, drop = FALSE]
rna.subset <- rna[genes_intersect, sample_intersect, drop = FALSE]
cnv.subset <- as.data.frame(
lapply(cnv.subset, function(x) as.numeric(as.character(x))),
row.names = rownames(cnv.subset)
)
rna.subset <- as.data.frame(
lapply(rna.subset, function(x) as.numeric(as.character(x))),
row.names = rownames(rna.subset)
)
rna.subset <- log2(rna.subset)
rna.subset <- as.data.frame(
apply(rna.subset, 2, function(x) { x[is.infinite(x)] <- 0; x })
)
cor_val <- numeric()
p.value <- numeric()
gene <- character()
common_genes <- intersect(rownames(rna.subset), colnames(cnv.subset))
message("Common genes found: ", length(common_genes))
for (i in common_genes) {
R <- as.numeric(unlist(rna.subset[i, ]))
C <- as.numeric(unlist(cnv.subset[, i]))
if (length(R) == length(C) && length(R) > 0) {
cor_result <- tryCatch(
stats::cor.test(R, C, method = "pearson", use = "complete.obs"),
error = function(e) NULL
)
if (!is.null(cor_result)) {
gene <- c(gene, i)
cor_val <- c(cor_val, cor_result$estimate)
p.value <- c(p.value, cor_result$p.value)
}
}
}
all_correlations <- stats::na.omit(
data.frame(gene = gene, cor_val = cor_val, p.value = p.value)
)
significant <- all_correlations[all_correlations$p.value < 0.05, ]
high_correlation <- significant[significant$cor_val > 0.8, ]
out_dir <- tempdir()
utils::write.csv(all_correlations,
file.path(out_dir, "all_correlations.csv"),
row.names = FALSE)
utils::write.csv(significant,
file.path(out_dir, "significant_corr.csv"),
row.names = FALSE)
utils::write.csv(high_correlation,
file.path(out_dir, "high_correlation.csv"),
row.names = FALSE)
message("Correlation results saved to: all_correlations.csv, ",
"significant_corr.csv, high_correlation.csv")
return(list(
all_correlations = all_correlations,
significant = significant,
high_correlation = high_correlation
))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.