R/bw_corr.R

Defines functions bw_corr

Documented in bw_corr

#' plot correlation scatterplot for bw files
#' @description correlation between two replicates of samples with correlation
#'   value.
#' @author pooja sethiya
#' @param dat Absolute path of count matrix. The input file can be generated by
#'   deeptools \code{multiBigwigSummary bins} command. The output should be a
#'   counts matrix \code{--outRawCounts} with first three columns:\code{chr
#'   start end} and later sample wise counts.  Alternatively, a matrix of
#'   expression values with first column gene-name and later expression values.
#' @param multiBigwigSummary_output Logical, whether the input matrix is
#'   \code{multiBigwigSummary bins} out put or not.\code{ default:TRUE}
#' @param pattern A character vector, defining pattern to split the columns in
#'   replicates. \code{ default: E.g. sampleA_set1, sampleA_set2, sampleB_set1,
#'   sampleB_set2. pattern="(.*)_(.*)". will have groups of sample: sampleA, sampleB
#'   and replicate: set1, set2}
#'
#' @return a multi-sample scatterplot of log2 values with correlation value
#' @export
#' @importFrom dplyr select
#' @importFrom dplyr mutate
#' @importFrom tidyr as_tibble
#' @importFrom tidyr pivot_longer
#' @importFrom dplyr summarise
#' @importFrom dplyr group_by
#' @importFrom tidyr spread
#' @import ggplot2
#'
#' @examples
#' \dontrun{
#'
#' dat_path <- system.file("extdata//correlation_data/an_histone_spore_count_mat.tab" , package = "FungalSporeAnalysis")
#' dat <- read.delim(dat_path,sep="\t", header = TRUE)
#' gp <- bw_corr(dat, pattern = ".*_spore_(.*)_(.*)")
#' ggsave(gp, filename = "An_spore_histone_bwscatterplot.png", height=5, width=5, device = "png", units = "in")
#'
#' }
bw_corr <- function(dat,multiBigwigSummary_output=TRUE, pattern=NULL ){

          if(multiBigwigSummary_output==TRUE){
                    dd <- dat %>% dplyr::select(c(4:ncol(dat)))
          }
          else{
                    message("First column should be gene-name and later should be expression value")
                    dd <- dat %>% dplyr::select(c(2:ncol(dat)))
          }

          if(is.null(pattern)==FALSE){

                    pattern <- pattern

                    # ".*_spore_(.*)_(.*)"
          }

          else{
                    pattern <- "(.*)_(.*)"
          }

                  dd <-   dd %>%
                              dplyr::mutate(genes=paste("genes", 1:nrow(.), sep = "")) %>%
                              tidyr::as_tibble() %>%
                              tidyr::pivot_longer(names_pattern = pattern,names_to = c("sample", "replicate"), values_to = "values", -genes) %>%
                              dplyr::mutate(values=tidyr::replace_na(values, 0), values=log2(values+1)) %>%
                              tidyr::spread(replicate, values)

          # compute correlation

          cors <- dd %>%
                    dplyr::group_by(sample) %>%
                    dplyr::summarise(cor=round(cor(set1, set2), 2), max=max(c(set1, set2)), min=min(c(set1, set2)))

          max_lim <- max(cors$max)
          min_lim <- min(cors$min)
          x_cor <- max_lim - 1
          y_cor <- min_lim + 1

          gp <-  dd %>% ggplot2::ggplot(ggplot2::aes(set1, set2))+
                              ggplot2::geom_point() +
                              ggplot2::facet_wrap(~sample, scales = "free") +
                              ggplot2::xlim(min_lim, max_lim)+
                              ggplot2::ylim(min_lim, max_lim)+
                              ggplot2::geom_text(data=cors, aes(label=paste("r=", cor, sep="")), x=x_cor, y=y_cor) + ggplot2::theme_bw()+
                              ggplot2::theme(axis.text = element_text(color="black", size=10))

         return(gp)


}
sethiyap/FungalSporeAnalysis documentation built on July 31, 2020, 10:26 p.m.