Nothing
## ----dontrun-basics-vignette, eval=FALSE--------------------------------------
# vignette("phyloseq-basics")
## ----load-packages, message=FALSE, warning=FALSE------------------------------
library("phyloseq")
library("ggplot2")
## ----ggplot2-themes-----------------------------------------------------------
theme_set(theme_bw())
## -----------------------------------------------------------------------------
data(GlobalPatterns)
## -----------------------------------------------------------------------------
# prune OTUs that are not present in at least one sample
GP <- prune_taxa(taxa_sums(GlobalPatterns) > 0, GlobalPatterns)
# Define a human-associated versus non-human categorical variable:
human <- get_variable(GP, "SampleType") %in% c("Feces", "Mock", "Skin", "Tongue")
# Add new human variable to sample data:
sample_data(GP)$human <- factor(human)
## ----richness_estimates0, fig.width=13, fig.height=7--------------------------
alpha_meas = c("Observed", "Chao1", "ACE", "Shannon", "Simpson", "InvSimpson")
(p <- plot_richness(GP, "human", "SampleType", measures=alpha_meas))
## ----richness_estimates, fig.width=13,height=7--------------------------------
p + geom_boxplot(data=p$data, aes(x=human, y=value, color=NULL), alpha=0.1)
## -----------------------------------------------------------------------------
GP.chl <- subset_taxa(GP, Phylum=="Chlamydiae")
## ----GP-chl-tree, fig.width=15, fig.height=7, message=FALSE, warning=FALSE----
plot_tree(GP.chl, color="SampleType", shape="Family", label.tips="Genus", size="Abundance")
## -----------------------------------------------------------------------------
data(enterotype)
## ----EntAbundPlot, fig.height=6, fig.width=8----------------------------------
par(mar = c(10, 4, 4, 2) + 0.1) # make more room on bottom margin
N <- 30
barplot(sort(taxa_sums(enterotype), TRUE)[1:N]/nsamples(enterotype), las=2)
## -----------------------------------------------------------------------------
rank_names(enterotype)
## -----------------------------------------------------------------------------
TopNOTUs <- names(sort(taxa_sums(enterotype), TRUE)[1:10])
ent10 <- prune_taxa(TopNOTUs, enterotype)
print(ent10)
## -----------------------------------------------------------------------------
sample_variables(ent10)
## ----entbarplot0, fig.height=6, fig.width=10----------------------------------
plot_bar(ent10, "SeqTech", fill="Enterotype", facet_grid=~Genus)
## ----GPheatmap----------------------------------------------------------------
data("GlobalPatterns")
gpac <- subset_taxa(GlobalPatterns, Phylum=="Crenarchaeota")
(p <- plot_heatmap(gpac, "NMDS", "bray", "SampleType", "Family"))
## ----GPheatmap-rename-axes----------------------------------------------------
p$scales$scales[[1]]$name <- "My X-Axis"
p$scales$scales[[2]]$name <- "My Y-Axis"
print(p)
## ----plot_sample_network, fig.width=11, fig.height=7, message=FALSE, warning=FALSE----
data(enterotype)
plot_net(enterotype, maxdist=0.4, color="SeqTech", shape="Enterotype")
## ----eval=FALSE---------------------------------------------------------------
# my.physeq <- import("Biom", BIOMfilename="myBiomFile.biom")
# my.ord <- ordinate(my.physeq)
# plot_ordination(my.physeq, my.ord, color="myFavoriteVarible")
## ----help-import, eval=FALSE--------------------------------------------------
# help(import)
# help(ordinate)
# help(distance)
# help(plot_ordination)
## ----GP-data-load-------------------------------------------------------------
data(GlobalPatterns)
## ---- eval=FALSE--------------------------------------------------------------
# GPUF <- UniFrac(GlobalPatterns)
## ----load-precomputed-UF------------------------------------------------------
load(system.file("doc", "Unweighted_UniFrac.RData", package="phyloseq"))
## -----------------------------------------------------------------------------
GloPa.pcoa = ordinate(GlobalPatterns, method="PCoA", distance=GPUF)
## ----PCoAScree, fig.width=6, fig.height=4-------------------------------------
plot_scree(GloPa.pcoa, "Scree plot for Global Patterns, UniFrac/PCoA")
## ----GPfig5ax1213-------------------------------------------------------------
(p12 <- plot_ordination(GlobalPatterns, GloPa.pcoa, "samples", color="SampleType") +
geom_point(size=5) + geom_path() + scale_colour_hue(guide = FALSE) )
(p13 <- plot_ordination(GlobalPatterns, GloPa.pcoa, "samples", axes=c(1, 3),
color="SampleType") + geom_line() + geom_point(size=5) )
## ----GP_UF_NMDS0--------------------------------------------------------------
# (Re)load UniFrac distance matrix and GlobalPatterns data
data(GlobalPatterns)
load(system.file("doc", "Unweighted_UniFrac.RData", package="phyloseq"))
# perform NMDS, set to 2 axes
GP.NMDS <- ordinate(GlobalPatterns, "NMDS", GPUF)
(p <- plot_ordination(GlobalPatterns, GP.NMDS, "samples", color="SampleType") +
geom_line() + geom_point(size=5) )
## ----GPCAscree0, fig=FALSE----------------------------------------------------
data(GlobalPatterns)
# Take a subset of the GP dataset, top 200 species
topsp <- names(sort(taxa_sums(GlobalPatterns), TRUE)[1:200])
GP <- prune_taxa(topsp, GlobalPatterns)
# Subset further to top 5 phyla, among the top 200 OTUs.
top5ph <- sort(tapply(taxa_sums(GP), tax_table(GP)[, "Phylum"], sum), decreasing=TRUE)[1:5]
GP <- subset_taxa(GP, Phylum %in% names(top5ph))
# Re-add human variable to sample data:
sample_data(GP)$human <- factor(human)
## ----GPCAscree, fig.width=8, fig.height=5-------------------------------------
# Now perform a unconstrained correspondence analysis
gpca <- ordinate(GP, "CCA")
# Scree plot
plot_scree(gpca, "Scree Plot for Global Patterns Correspondence Analysis")
## ----GPCA1234-----------------------------------------------------------------
(p12 <- plot_ordination(GP, gpca, "samples", color="SampleType") +
geom_line() + geom_point(size=5) )
(p34 <- plot_ordination(GP, gpca, "samples", axes=c(3, 4), color="SampleType") +
geom_line() + geom_point(size=5) )
## ----GPCAspecplot0------------------------------------------------------------
p1 <- plot_ordination(GP, gpca, "species", color="Phylum")
(p1 <- ggplot(p1$data, p1$mapping) + geom_point(size=5, alpha=0.5) +
facet_wrap(~Phylum) + scale_colour_hue(guide = FALSE) )
## ----GPCAspecplotTopo0--------------------------------------------------------
(p3 <- ggplot(p1$data, p1$mapping) + geom_density2d() +
facet_wrap(~Phylum) + scale_colour_hue(guide = FALSE) )
## ----GPCAjitter0--------------------------------------------------------------
library("reshape2")
# Melt the species-data.frame, DF, to facet each CA axis separately
mdf <- melt(p1$data[, c("CA1", "CA2", "Phylum", "Family", "Genus")],
id=c("Phylum", "Family", "Genus") )
# Select some special outliers for labelling
LF <- subset(mdf, variable=="CA2" & value < -1.0)
# build plot: boxplot summaries of each CA-axis, with labels
p <- ggplot(mdf, aes(Phylum, value, color=Phylum)) +
geom_boxplot() +
facet_wrap(~variable, 2) +
scale_colour_hue(guide = FALSE) +
theme_bw() +
theme( axis.text.x = element_text(angle = -90, vjust = 0.5) )
# Add the text label layer, and render ggplot graphic
(p <- p + geom_text(data=subset(LF, !is.na(Family)),
mapping = aes(Phylum, value+0.1, color=Phylum, label=Family),
vjust=0,
size=2))
## ----GPtaxaplot0--------------------------------------------------------------
plot_bar(GP, x="human", fill="SampleType", facet_grid= ~ Phylum)
## ----GPdpcoa01----------------------------------------------------------------
# Perform ordination
GP.dpcoa <- ordinate(GP, "DPCoA")
# Generate default ordination bi-plot
pdpcoa <-
plot_ordination(
physeq = GP,
ordination = GP.dpcoa,
type="biplot",
color="SampleType",
shape="Phylum")
# Adjust the shape scale manually
# to make taxa hollow and samples filled (advanced)
shape.fac <- pdpcoa$data$Phylum
man.shapes <- c(19, 21:25)
names(man.shapes) <- c("Samples", levels(shape.fac)[levels(shape.fac)!="Samples"])
p2dpcoa <- pdpcoa + scale_shape_manual(values=man.shapes)
p2dpcoa
## ----GPdpcoa02----------------------------------------------------------------
# Show just Samples or just Taxa
plot_ordination(GP, GP.dpcoa, type="taxa", shape="Phylum")
plot_ordination(GP, GP.dpcoa, type="samples", color="SampleType")
# Split
plot_ordination(GP, GP.dpcoa, type="split",
color="SampleType", shape="Phylum") +
ggplot2::scale_colour_discrete()
## ----distancefun--------------------------------------------------------------
data(esophagus)
distance(esophagus, "bray")
distance(esophagus, "wunifrac") # weighted UniFrac
distance(esophagus, "jaccard") # vegdist jaccard
distance(esophagus, "g") # betadiver method option "g"
## ----eval=FALSE, echo=TRUE----------------------------------------------------
# data(esophagus)
# distance(esophagus, "wUniFrac")
# distance(esophagus, "uUniFrac")
## -----------------------------------------------------------------------------
# (Re)load UniFrac distance matrix and GlobalPatterns data
data(GlobalPatterns)
load(system.file("doc", "Unweighted_UniFrac.RData", package="phyloseq"))
# Manually define color-shading vector based on sample type.
colorScale <- rainbow(length(levels(get_variable(GlobalPatterns, "SampleType"))))
cols <- colorScale[get_variable(GlobalPatterns, "SampleType")]
GP.tip.labels <- as(get_variable(GlobalPatterns, "SampleType"), "character")
# This is the actual hierarchical clustering call, specifying average-link clustering
GP.hclust <- hclust(GPUF, method="average")
plot(GP.hclust, col=cols)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.