R/msc.depth.R

Defines functions msc.depth

Documented in msc.depth

#' Check the read depth of assembled minicircles
#'
#' The msc.depth function allows you to analyze the read depth of assembled minicircles using depth statistics generated by KOMICS. These statistics provide information such as the average, median, minimum, and maximum read depth per site for each minicircle contig. By standardizing the median read depths per minicircle contig to the median genome-wide read depths, you can estimate minicircle copy numbers.
#'
#' @usage msc.depth(depthstats, groups, HCN = NULL)
#' @param depthstats character vector containing the file names of the depth statistics generated by KOMICS. Each file should be in the format "sample_name.depthstats.txt" (e.g., sampleA.depthstats.txt, sampleB.depthstats.txt,...).
#' @param groups a vector specifying the groups (e.g., species) to which the samples belong.
#' @param HCN an optional numeric vector containing haploid copy numbers of the corresponding samples. This argument is set to null by default.
#' @return 
#'   \item{all}{a table that merges the depth statistics of all samples. The table includes the average, median, minimum, and maximum per site read depth.}
#'   \item{plots}{a plot per sample that visualizes the median read depth distribution.}
#'   \item{medianRD}{a graph summarizing the median read depth distribution of all samples.}
#'   \item{CN}{a graph summarizing the copy number (if HCN is not null) of all samples.}
#' @examples
#' require(ggpubr)
#' data(exData)
#'
#' ### run function
#' \donttest{
#' depth <- msc.depth(depthstats = system.file("extdata", 
#'                   exData$depthstats, package = "rKOMICS"), groups = exData$species,
#'                   HCN = exData$medGWD/2)
#' 
#' ### visualize results 
#' hist(depth$all[,"MEDIAN.DEPTH"], breaks=100, 
#'     main="Global median depth distribution",xlab = (''))
#' 
#' ### alter plot 
#' annotate_figure(depth$plots$CUM29A1, fig.lab = "CUM29A1", 
#'                fig.lab.pos = "bottom.right", fig.lab.face = 'italic')
#' }
#' @importFrom utils read.table read.csv
#' @importFrom ggpubr ggarrange
#' @importFrom dplyr group_by summarise
#' @import ggplot2
#' @export

msc.depth <- function(depthstats, groups, HCN = NULL) {
  
  
  #############   0   Tests   #############
  
  
  if (sum(file.exists(depthstats))!=length(depthstats)) 
    stop("One or more files don't exist")
  if (!is.null(HCN)) {
    if (length(depthstats) != length(HCN)) 
      stop("the HCN vector and depthstats vector should be of equal length.")
  }
  if (length(depthstats)!=length(groups)) 
    stop('groups factor and depthstats are not of equal length. ')
  
  
  #############   0   Define global functions or variables   #############
  
  
  MEDIAN.DEPTH <- CN <- sample <- group <- NULL
  groups <- as.factor(groups)
  
  
  #############   1   Depth stats per minicircle contig   #############
  
  
  theme_set(theme_minimal() + 
              theme(axis.title = element_text(face = 'bold')))
  
  depths <- depth_plts <- hists_MEDIAN.DEPTH <- box_MEDIAN.DEPTH <- box_CN <- list()
  
  for (n in 1:length(depthstats)) {
    
    depth_file <- utils::read.csv(depthstats[n], sep=' ', header=T, row.names = 1)[,1:4]
    depth_file[,"sample"] <- gsub("_contig.*", "", rownames(depth_file)[1])
    depth_file[,"group"] <- groups[n]
    
    hists_MEDIAN.DEPTH[[n]] <- ggplot(depth_file, aes(x=MEDIAN.DEPTH)) + 
      geom_histogram(bins = 50, color="black", fill="gray") + 
      ylab('frequency') + 
      xlab('median depth') + 
      ggtitle(depth_file[1,5])
    
    box_MEDIAN.DEPTH[[n]] <- ggplot(depth_file, aes(x=MEDIAN.DEPTH)) + 
      geom_boxplot() + 
      xlab('median depth') + 
      theme(axis.text.y = element_blank()) 

    
    if (!is.null(HCN)) {
      
      depth_file$HCN <- HCN[n]
      depth_file$CN <- depth_file$MEDIAN.DEPTH/HCN[n]
      depth_file$MIN.CN <- depth_file$MIN.DEPTH/HCN[n]
      depth_file$MAX.CN <- depth_file$MAX.DEPTH/HCN[n]
      
      depths[[n]] <- depth_file
      
      box_CN[[n]] <- ggplot(depth_file, aes(x=CN)) +
        geom_boxplot() +
        xlab('copy number') +
        theme(axis.text.y = element_blank())
      
      depth_plts[[n]] <- ggpubr::ggarrange(hists_MEDIAN.DEPTH[[n]],
                                           box_CN[[n]],
                                           nrow=2,
                                           heights = c(3,1),
                                           align = "v")
  
    } else {
      
      depth_file$HCN <- depth_file$CN <- depth_file$MIN.CN <- depth_file$MAX.CN <- NA
      
      depth_plts[[n]] <- ggpubr::ggarrange(hists_MEDIAN.DEPTH[[n]],
                                           box_MEDIAN.DEPTH[[n]],
                                           nrow=2,
                                           heights = c(3,1),
                                           align = "v")
    }

  }
  
  depths <- data.frame(do.call("rbind", depths))
  names(depth_plts) <- unique(depths$sample)
  
  mean.RD <- depths %>%
    dplyr::group_by(sample) %>%
    dplyr::summarise(mean(depths$MEDIAN.DEPTH), .groups = 'drop')

  
  box_MEDIAN.DEPTH <- ggplot(depths, aes(y=MEDIAN.DEPTH, x=sample, fill=group, color=group)) +
    geom_boxplot(alpha=0.5, show.legend = F) + 
    xlab('') + 
    theme(axis.text.x = element_text(angle=90, hjust=1)) + 
    ylab('Minicircle median read depth') + 
    facet_grid(. ~ group, scales="free", space="free")
  
  
  box_CN <- NULL
  if (!is.null(HCN)) {
    box_CN <- ggplot(depths, aes(y=CN, x=sample, fill=group, color=group)) +
      geom_boxplot(alpha=0.5, show.legend = F) +
      xlab('') +
      theme(axis.text.x = element_text(angle=90, hjust=1)) +
      ylab('Minicircle copy number') +
      facet_grid(. ~ group, scales="free", space="free")
  }
  
  
  #############   2   Return read depth results    #############
  
  
  return(list("all" = depths,
              "plots" = depth_plts,
              "medianRD" = box_MEDIAN.DEPTH,
              "CN" = box_CN))
  
}

Try the rKOMICS package in your browser

Any scripts or data that you put into this service are public.

rKOMICS documentation built on July 9, 2023, 7:46 p.m.