Nothing
#' Check the read depth of assembled minicircles
#'
#' The msc.depth function allows you to analyze the read depth of assembled minicircles using depth statistics generated by KOMICS. These statistics provide information such as the average, median, minimum, and maximum read depth per site for each minicircle contig. By standardizing the median read depths per minicircle contig to the median genome-wide read depths, you can estimate minicircle copy numbers.
#'
#' @usage msc.depth(depthstats, groups, HCN = NULL)
#' @param depthstats character vector containing the file names of the depth statistics generated by KOMICS. Each file should be in the format "sample_name.depthstats.txt" (e.g., sampleA.depthstats.txt, sampleB.depthstats.txt,...).
#' @param groups a vector specifying the groups (e.g., species) to which the samples belong.
#' @param HCN an optional numeric vector containing haploid copy numbers of the corresponding samples. This argument is set to null by default.
#' @return
#' \item{all}{a table that merges the depth statistics of all samples. The table includes the average, median, minimum, and maximum per site read depth.}
#' \item{plots}{a plot per sample that visualizes the median read depth distribution.}
#' \item{medianRD}{a graph summarizing the median read depth distribution of all samples.}
#' \item{CN}{a graph summarizing the copy number (if HCN is not null) of all samples.}
#' @examples
#' require(ggpubr)
#' data(exData)
#'
#' ### run function
#' \donttest{
#' depth <- msc.depth(depthstats = system.file("extdata",
#' exData$depthstats, package = "rKOMICS"), groups = exData$species,
#' HCN = exData$medGWD/2)
#'
#' ### visualize results
#' hist(depth$all[,"MEDIAN.DEPTH"], breaks=100,
#' main="Global median depth distribution",xlab = (''))
#'
#' ### alter plot
#' annotate_figure(depth$plots$CUM29A1, fig.lab = "CUM29A1",
#' fig.lab.pos = "bottom.right", fig.lab.face = 'italic')
#' }
#' @importFrom utils read.table read.csv
#' @importFrom ggpubr ggarrange
#' @importFrom dplyr group_by summarise
#' @import ggplot2
#' @export
msc.depth <- function(depthstats, groups, HCN = NULL) {
############# 0 Tests #############
if (sum(file.exists(depthstats))!=length(depthstats))
stop("One or more files don't exist")
if (!is.null(HCN)) {
if (length(depthstats) != length(HCN))
stop("the HCN vector and depthstats vector should be of equal length.")
}
if (length(depthstats)!=length(groups))
stop('groups factor and depthstats are not of equal length. ')
############# 0 Define global functions or variables #############
MEDIAN.DEPTH <- CN <- sample <- group <- NULL
groups <- as.factor(groups)
############# 1 Depth stats per minicircle contig #############
theme_set(theme_minimal() +
theme(axis.title = element_text(face = 'bold')))
depths <- depth_plts <- hists_MEDIAN.DEPTH <- box_MEDIAN.DEPTH <- box_CN <- list()
for (n in 1:length(depthstats)) {
depth_file <- utils::read.csv(depthstats[n], sep=' ', header=T, row.names = 1)[,1:4]
depth_file[,"sample"] <- gsub("_contig.*", "", rownames(depth_file)[1])
depth_file[,"group"] <- groups[n]
hists_MEDIAN.DEPTH[[n]] <- ggplot(depth_file, aes(x=MEDIAN.DEPTH)) +
geom_histogram(bins = 50, color="black", fill="gray") +
ylab('frequency') +
xlab('median depth') +
ggtitle(depth_file[1,5])
box_MEDIAN.DEPTH[[n]] <- ggplot(depth_file, aes(x=MEDIAN.DEPTH)) +
geom_boxplot() +
xlab('median depth') +
theme(axis.text.y = element_blank())
if (!is.null(HCN)) {
depth_file$HCN <- HCN[n]
depth_file$CN <- depth_file$MEDIAN.DEPTH/HCN[n]
depth_file$MIN.CN <- depth_file$MIN.DEPTH/HCN[n]
depth_file$MAX.CN <- depth_file$MAX.DEPTH/HCN[n]
depths[[n]] <- depth_file
box_CN[[n]] <- ggplot(depth_file, aes(x=CN)) +
geom_boxplot() +
xlab('copy number') +
theme(axis.text.y = element_blank())
depth_plts[[n]] <- ggpubr::ggarrange(hists_MEDIAN.DEPTH[[n]],
box_CN[[n]],
nrow=2,
heights = c(3,1),
align = "v")
} else {
depth_file$HCN <- depth_file$CN <- depth_file$MIN.CN <- depth_file$MAX.CN <- NA
depth_plts[[n]] <- ggpubr::ggarrange(hists_MEDIAN.DEPTH[[n]],
box_MEDIAN.DEPTH[[n]],
nrow=2,
heights = c(3,1),
align = "v")
}
}
depths <- data.frame(do.call("rbind", depths))
names(depth_plts) <- unique(depths$sample)
mean.RD <- depths %>%
dplyr::group_by(sample) %>%
dplyr::summarise(mean(depths$MEDIAN.DEPTH), .groups = 'drop')
box_MEDIAN.DEPTH <- ggplot(depths, aes(y=MEDIAN.DEPTH, x=sample, fill=group, color=group)) +
geom_boxplot(alpha=0.5, show.legend = F) +
xlab('') +
theme(axis.text.x = element_text(angle=90, hjust=1)) +
ylab('Minicircle median read depth') +
facet_grid(. ~ group, scales="free", space="free")
box_CN <- NULL
if (!is.null(HCN)) {
box_CN <- ggplot(depths, aes(y=CN, x=sample, fill=group, color=group)) +
geom_boxplot(alpha=0.5, show.legend = F) +
xlab('') +
theme(axis.text.x = element_text(angle=90, hjust=1)) +
ylab('Minicircle copy number') +
facet_grid(. ~ group, scales="free", space="free")
}
############# 2 Return read depth results #############
return(list("all" = depths,
"plots" = depth_plts,
"medianRD" = box_MEDIAN.DEPTH,
"CN" = box_CN))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.