Nothing
## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
## ----setup, warning=FALSE, message=FALSE--------------------------------------
library(longreadvqs)
library(ggplot2)
#Load sample 1.
sample1 <- system.file("extdata", "sample1.fasta", package = "longreadvqs")
## ----fig0, warning=FALSE, fig.height = 3, fig.width = 7.2, fig.align = "center"----
#Check which % cut-off could effectively minimize noises by assessing % singleton haplotypes.
x <- pctopt(sample1, pctsing = 0, label = "sample1")
ggplot(x, aes(x=pct, y=pctsingleton)) + geom_line() + geom_point()
## ----cmd0, warning=FALSE, message=FALSE---------------------------------------
#VQS diversity metrics of noise minimized (10% cut-off) read alignment
vqssub(sample1, pct = 10, label = "sample1")
## ----cmd1, warning=FALSE, message=FALSE---------------------------------------
#Load samples 2 and 3.
sample2 <- system.file("extdata", "sample2.fasta", package = "longreadvqs")
sample3 <- system.file("extdata", "sample3.fasta", package = "longreadvqs")
#Noise minimization (10% cut-off) and down-sampling (depth of 300)
a <- vqssub(sample1, pct = 10, samsize = 300, label = "sample1")
b <- vqssub(sample2, pct = 10, samsize = 300, label = "sample2")
c <- vqssub(sample3, pct = 10, samsize = 300, label = "sample3")
#Compare VQS diversity across three samples after Noise minimization and down-sampling.
rbind(a, b, c)
## ----cmd2, warning=FALSE, message=FALSE---------------------------------------
#Randomly down-sample from depth of 655 to 300, and 100 (10 iterations for each sample size).
set.seed(123)
c_full <- vqssub(sample3, pct = 10, label = "full_depth") #non-sampled alignment
c_300 <- vqsresub(sample3, iter = 10, pct = 10,
samsize = 300, label = "depth_300") #down-sample to depth of 300
c_100 <- vqsresub(sample3, iter = 10, pct = 10,
samsize = 100, label = "depth_100") #down-sample to depth of 100
all_c <- rbind(c_full, c_300, c_100) #combine data
#Load packages for visualization.
library(cowplot)
all_c$depth <- as.character(all_c$depth)
depthorder <- c("655", "300", "100")
#Plot box-plots of three VQS metrics variation through different sampling depths.
haplotypes <- ggplot(all_c, aes(x = factor(depth, level=depthorder), y = haplotypes,
group = interaction(depth, label), fill = label)) + geom_boxplot() +
xlab("depth") + theme(legend.position = "none")
shannon <- ggplot(all_c, aes(x = factor(depth, level=depthorder), y = shannon,
group = interaction(depth, label), fill = label)) + geom_boxplot() +
xlab("depth") + theme(legend.position = "none")
gini_simpson <- ggplot(all_c, aes(x = factor(depth, level=depthorder), y = gini_simpson,
group = interaction(depth, label), fill = label)) + geom_boxplot() +
xlab("depth") + theme(legend.position = "none")
## ----fig1, fig.height = 3, fig.width = 7.2, fig.align = "center"--------------
plot_grid(haplotypes, shannon, gini_simpson, nrow = 1)
## ----cmd3, warning=FALSE, message=FALSE---------------------------------------
#Generate complete VQS data for further comparison for each sample.
s1 <- vqsassess(sample1, pct = 10, samsize = 300, label = "sample1")
s2 <- vqsassess(sample2, pct = 10, samsize = 300, label = "sample2")
s3 <- vqsassess(sample3, pct = 10, samsize = 300, label = "sample3")
## ----fig2, fig.height = 6, fig.width = 7.2, fig.align = "center"--------------
#Compare SNV profile between three samples.
snvcompare(samplelist = list(s1, s2, s3), ncol = 1)
## ----cmd4, warning=FALSE, message=FALSE---------------------------------------
#Load sample 4.
sample4 <- system.file("extdata", "mock.fasta", package = "longreadvqs")
s4 <- vqsassess(sample4, pct = 10, samsize = 300, label = "sample4")
## ----fig3, warning=FALSE, fig.height = 2, fig.width = 7.2, fig.align = "center"----
#SNV profile of sample 4
s4$snv
## ----cmd5, warning=FALSE, message=FALSE---------------------------------------
#Use "vqscustompct" function to increase % cut-off after position 972 to 30%.
s4_fix <- vqscustompct(sample4, pct = 10, brkpos = c("1:971","972:982"), lspct = c(10,30),
samsize = 300, label = "sample4")
## ----fig4, warning=FALSE, fig.height = 2, fig.width = 7.2, fig.align = "center"----
#SNV profile of sample 4 after % cut-off adjustment
s4_fix$snv
## ----cmd6, warning=FALSE, message=FALSE---------------------------------------
#List outputs from "vqsassess" or "vqscustompct" that we want to compare into "vqscompare" function.
#Set the number of new OTU groups based on k-means clustering to 10 groups (kmeans.n = 10).
set.seed(1234)
comp <- vqscompare(samplelist = list(s1, s2, s3, s4_fix),
lab_name = "Sample", kmeans.n = 10)
## ----fig5, warning=FALSE, fig.height = 6, fig.width = 7.2, fig.align = "center"----
#The most important output of the "vqscompare" function is the summary plot.
comp$summaryplot
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.