R/cnv.R

Defines functions segment_to_cnv log2ratio_to_segment test_cnv

Documented in log2ratio_to_segment segment_to_cnv test_cnv

# Tools for processing CNV data

#' Determine if the segment mean indicates an amplification or a deletion
#'
#' @param values A numeric vector - the log2Ratio values for copy number changes.
#' @param cutoff A numeric vector of length 2 - the cutoff for calling amplifications or deletions
#' @param return_val A string - if \code{"logic"}, return \code{c(-1,0,1)}-based logical\cr
#' value; if \code{"num"}, return original numeric value; if anything else, \cr
#' return \code{c("Loss",NA,"Gain")}-based string. Default value is \code{"logic"}.
#' @param to_val A vector - the text return value. Default value is \code{c("Gain", NA, "Loss")}.
#'
#' @return A numeric vector of the same length with \code{values}
#' @export

test_cnv <- function(values,
                     cutoff = c(-0.3, 0.3),
                     return_val = "logic",
                     to_val = c("Gain", NA, "Loss")) {

    if (cutoff[2] <= cutoff[1]) {
        stop("Cutoff values not accepted")
    }

    if (return_val == "num") {
        return(values)
    } else {

        test_1 <- (values > cutoff[2])
        test_2 <- (values >= cutoff[1])

        test <- test_1 + test_2 - 1

        if (return_val == "logic") {
            return(test)
        } else {
            test <- plyr::mapvalues(x = test,
                                    from = c(1, 0, -1),
                                    to = to_val)
            return(test)
        }
    }
}


#' Get segmentation data from raw.txt files generated by Nexus
#'
#' @param file A string vector - list of raw.txt file names
#' @param sample_name A string vector - matching the names of \code{file}. Default value is \code{NULL}.
#' @param pattern A string - the pattern used to generate sample names if \code{sample_name} is not specified.
#'
#' @return A dataframe of segmentation data
#' @export

log2ratio_to_segment <- function(file, sample_name = NULL, pattern = NULL) {

    if (is.null(sample_name) & (is.null(pattern))) {

        warning("No sample names or pattern specified. Sample names will be automatically generated.",
                immediate. = TRUE)

        sample_name <- 1:length(file)

    } else if (is.null(sample_name)) {

        sample_name <- stringr::str_extract(string = file, pattern = pattern)

    } else if (length(file) != length(sample_name)) {

        stop("Lengths of files and sample names should be the same")

    } else {}


    data_list <- list()

    for (i in seq_along(1:length(file))) {

        df <- utils::read.delim(file[i], header = TRUE)

        cna <- DNAcopy::CNA(genomdat = df$Log2Ratio,
                            chrom = df$Chromosome,
                            maploc = df$Position,
                            data.type = "logratio",
                            sampleid = sample_name[i])

        cna <- DNAcopy::smooth.CNA(cna)
        cna <- DNAcopy::segment(cna)

        data_list[[i]] <- cna$output
    }

    return(as.data.frame(do.call("rbind", data_list)))
}


#' Get CNV status from segmentation data
#'
#' @param df A dataframe - segmentation data
#' @param gene_list A string vector - list of human gene symbols. Default value is \code{NULL}, indicating the whole genome.
#' @param as_matrix A logical value - if the result table is cast to matrix format. Default value is \code{FALSE}.
#' @param ... Additional parameters passed to \code{test_cnv()}
#'
#' @return A dataframe of CNV status
#' @importFrom dplyr filter mutate
#' @importFrom magrittr %>% %<>%
#' @importFrom rlang .data
#' @export

segment_to_cnv <- function(df,
                           gene_list = NULL,
                           as_matrix = FALSE,
                           ...) {

    df <- as.data.frame(df)

    cnseg <- CNTools::CNSeg(df)

    if (!is.null(gene_list)) {
        gene_info <- geneInfo %>%
            filter(.data$genename %in% gene_list)
    } else {
        gene_info <- geneInfo
    }

    rdByGene <- CNTools::getRS(cnseg,
                               by = "gene",
                               imput = FALSE,
                               XY = FALSE,
                               geneMap = as.data.frame(gene_info),
                               what = "median")

    cnv_res <- rdByGene@rs %>%
        `[`(6:ncol(rdByGene@rs)) %>%
        t()

    colnames(cnv_res) <- rdByGene@rs$genename

    if (as_matrix) {

        cnv_res %<>%
            test_cnv(...) %>%
            t()

    } else {
        cnv_res <- reshape2::melt(cnv_res)
        colnames(cnv_res) <- c("Sample", "Gene", "CNV")
        cnv_res %<>%
            mutate(CNV = test_cnv(.data$CNV, ...))
    }

    return(cnv_res)
}
xmc811/xmcutil documentation built on June 4, 2021, 10:48 a.m.