#' plot correlation scatterplot for bw files
#' @description correlation between two replicates of samples with correlation
#' value.
#' @author pooja sethiya
#' @param dat Absolute path of count matrix. The input file can be generated by
#' deeptools \code{multiBigwigSummary bins} command. The output should be a
#' counts matrix \code{--outRawCounts} with first three columns:\code{chr
#' start end} and later sample wise counts. Alternatively, a matrix of
#' expression values with first column gene-name and later expression values.
#' @param multiBigwigSummary_output Logical, whether the input matrix is
#' \code{multiBigwigSummary bins} out put or not.\code{ default:TRUE}
#' @param pattern A character vector, defining pattern to split the columns in
#' replicates. \code{ default: E.g. sampleA_set1, sampleA_set2, sampleB_set1,
#' sampleB_set2. pattern="(.*)_(.*)". will have groups of sample: sampleA, sampleB
#' and replicate: set1, set2}
#'
#' @return a multi-sample scatterplot of log2 values with correlation value
#' @export
#' @importFrom dplyr select
#' @importFrom dplyr mutate
#' @importFrom tidyr as_tibble
#' @importFrom tidyr pivot_longer
#' @importFrom dplyr summarise
#' @importFrom dplyr group_by
#' @importFrom tidyr spread
#' @import ggplot2
#'
#' @examples
#' \dontrun{
#'
#' dat_path <- system.file("extdata//correlation_data/an_histone_spore_count_mat.tab" , package = "FungalSporeAnalysis")
#' dat <- read.delim(dat_path,sep="\t", header = TRUE)
#' gp <- bw_corr(dat, pattern = ".*_spore_(.*)_(.*)")
#' ggsave(gp, filename = "An_spore_histone_bwscatterplot.png", height=5, width=5, device = "png", units = "in")
#'
#' }
bw_corr <- function(dat,multiBigwigSummary_output=TRUE, pattern=NULL ){
if(multiBigwigSummary_output==TRUE){
dd <- dat %>% dplyr::select(c(4:ncol(dat)))
}
else{
message("First column should be gene-name and later should be expression value")
dd <- dat %>% dplyr::select(c(2:ncol(dat)))
}
if(is.null(pattern)==FALSE){
pattern <- pattern
# ".*_spore_(.*)_(.*)"
}
else{
pattern <- "(.*)_(.*)"
}
dd <- dd %>%
dplyr::mutate(genes=paste("genes", 1:nrow(.), sep = "")) %>%
tidyr::as_tibble() %>%
tidyr::pivot_longer(names_pattern = pattern,names_to = c("sample", "replicate"), values_to = "values", -genes) %>%
dplyr::mutate(values=tidyr::replace_na(values, 0), values=log2(values+1)) %>%
tidyr::spread(replicate, values)
# compute correlation
cors <- dd %>%
dplyr::group_by(sample) %>%
dplyr::summarise(cor=round(cor(set1, set2), 2), max=max(c(set1, set2)), min=min(c(set1, set2)))
max_lim <- max(cors$max)
min_lim <- min(cors$min)
x_cor <- max_lim - 1
y_cor <- min_lim + 1
gp <- dd %>% ggplot2::ggplot(ggplot2::aes(set1, set2))+
ggplot2::geom_point() +
ggplot2::facet_wrap(~sample, scales = "free") +
ggplot2::xlim(min_lim, max_lim)+
ggplot2::ylim(min_lim, max_lim)+
ggplot2::geom_text(data=cors, aes(label=paste("r=", cor, sep="")), x=x_cor, y=y_cor) + ggplot2::theme_bw()+
ggplot2::theme(axis.text = element_text(color="black", size=10))
return(gp)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.