# Tools for processing CNV data
#' Determine if the segment mean indicates an amplification or a deletion
#'
#' @param values A numeric vector - the log2Ratio values for copy number changes.
#' @param cutoff A numeric vector of length 2 - the cutoff for calling amplifications or deletions
#' @param return_val A string - if \code{"logic"}, return \code{c(-1,0,1)}-based logical\cr
#' value; if \code{"num"}, return original numeric value; if anything else, \cr
#' return \code{c("Loss",NA,"Gain")}-based string. Default value is \code{"logic"}.
#' @param to_val A vector - the text return value. Default value is \code{c("Gain", NA, "Loss")}.
#'
#' @return A numeric vector of the same length with \code{values}
#' @export
test_cnv <- function(values,
cutoff = c(-0.3, 0.3),
return_val = "logic",
to_val = c("Gain", NA, "Loss")) {
if (cutoff[2] <= cutoff[1]) {
stop("Cutoff values not accepted")
}
if (return_val == "num") {
return(values)
} else {
test_1 <- (values > cutoff[2])
test_2 <- (values >= cutoff[1])
test <- test_1 + test_2 - 1
if (return_val == "logic") {
return(test)
} else {
test <- plyr::mapvalues(x = test,
from = c(1, 0, -1),
to = to_val)
return(test)
}
}
}
#' Get segmentation data from raw.txt files generated by Nexus
#'
#' @param file A string vector - list of raw.txt file names
#' @param sample_name A string vector - matching the names of \code{file}. Default value is \code{NULL}.
#' @param pattern A string - the pattern used to generate sample names if \code{sample_name} is not specified.
#'
#' @return A dataframe of segmentation data
#' @export
log2ratio_to_segment <- function(file, sample_name = NULL, pattern = NULL) {
if (is.null(sample_name) & (is.null(pattern))) {
warning("No sample names or pattern specified. Sample names will be automatically generated.",
immediate. = TRUE)
sample_name <- 1:length(file)
} else if (is.null(sample_name)) {
sample_name <- stringr::str_extract(string = file, pattern = pattern)
} else if (length(file) != length(sample_name)) {
stop("Lengths of files and sample names should be the same")
} else {}
data_list <- list()
for (i in seq_along(1:length(file))) {
df <- utils::read.delim(file[i], header = TRUE)
cna <- DNAcopy::CNA(genomdat = df$Log2Ratio,
chrom = df$Chromosome,
maploc = df$Position,
data.type = "logratio",
sampleid = sample_name[i])
cna <- DNAcopy::smooth.CNA(cna)
cna <- DNAcopy::segment(cna)
data_list[[i]] <- cna$output
}
return(as.data.frame(do.call("rbind", data_list)))
}
#' Get CNV status from segmentation data
#'
#' @param df A dataframe - segmentation data
#' @param gene_list A string vector - list of human gene symbols. Default value is \code{NULL}, indicating the whole genome.
#' @param as_matrix A logical value - if the result table is cast to matrix format. Default value is \code{FALSE}.
#' @param ... Additional parameters passed to \code{test_cnv()}
#'
#' @return A dataframe of CNV status
#' @importFrom dplyr filter mutate
#' @importFrom magrittr %>% %<>%
#' @importFrom rlang .data
#' @export
segment_to_cnv <- function(df,
gene_list = NULL,
as_matrix = FALSE,
...) {
df <- as.data.frame(df)
cnseg <- CNTools::CNSeg(df)
if (!is.null(gene_list)) {
gene_info <- geneInfo %>%
filter(.data$genename %in% gene_list)
} else {
gene_info <- geneInfo
}
rdByGene <- CNTools::getRS(cnseg,
by = "gene",
imput = FALSE,
XY = FALSE,
geneMap = as.data.frame(gene_info),
what = "median")
cnv_res <- rdByGene@rs %>%
`[`(6:ncol(rdByGene@rs)) %>%
t()
colnames(cnv_res) <- rdByGene@rs$genename
if (as_matrix) {
cnv_res %<>%
test_cnv(...) %>%
t()
} else {
cnv_res <- reshape2::melt(cnv_res)
colnames(cnv_res) <- c("Sample", "Gene", "CNV")
cnv_res %<>%
mutate(CNV = test_cnv(.data$CNV, ...))
}
return(cnv_res)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.