#' calc_dcorr_GC_content_counts
#'
#' Calculates the distance correlation between normalized counts and the GC content of sgRNAs in a given library.
#'
#' @param counts A data frame of normalized counts for each sample in the study (samples as columns, gRNAs as rows).
#' @param library A data frame containing the library file in which the first column gives the sgRNA sequence and the second column gives the sgRNA ID.
#'
#' @return A data frame with a `SampleName` column and a `distcorr_GC_content_counts` column.
#' @author Alex T. Kalinka, \email{alex.kalinka@@cancer.org.uk}
#' @importFrom dplyr %>% mutate rowwise ungroup filter arrange
#' @importFrom energy dcor2d
#' @importFrom magrittr %<>%
#' @references Szekely, G.J., Rizzo, M.L., and Bakirov, N.K. (2007), Measuring and Testing Dependence by Correlation of Distances, Annals of Statistics, Vol. 35 No. 6, pp. 2769-2794.
#' @export
calc_dcorr_GC_content_counts <- function(counts, library){
tryCatch({
library %<>%
fgcQC::calc_GC_percent_library() %>%
dplyr::filter(V2 %in% counts$sgRNA) %>%
dplyr::arrange(V2)
counts %<>%
dplyr::filter(sgRNA %in% library$V2) %>%
dplyr::arrange(sgRNA)
ret <- data.frame(SampleName = colnames(counts)[3:ncol(counts)], stringsAsFactors = F) %>%
dplyr::rowwise() %>%
dplyr::mutate(distcorr_GC_content_counts = energy::dcor2d(counts[,colnames(counts) == SampleName],
library$GC_percent[match(counts$sgRNA, library$V2)],
type = "U")) %>%
dplyr::ungroup()
},
error = function(e) stop(paste("calc_dcorr_GC_content_counts: unable to calculate dist corr for GC vs counts:",e))
)
return(ret)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.