knitr::opts_chunk$set(echo = TRUE, fig.width = 7, fig.height = 4, fig.align = 'center', message = FALSE, warning = FALSE)
library(phyloseq) library(tidyverse) library(genefilter) #KOverA library(ggrepel) # geom_text_repel library(randomcoloR)# distinctColorPalette(n) library(adaptiveGPCA) library(igraph) library(phyloseqGraphTest) devtools::load_all()
theme_set(theme_minimal()) theme_update( text = element_text(size = 10), legend.text = element_text(size = 10) )
data("psE_BARBI") threshold <- kOverA(2, A = 25) psE_BARBI <- phyloseq::filter_taxa( psE_BARBI, threshold, TRUE) psE_BARBI ps <- psE_BARBI rm(psE_BARBI) ps <- prune_taxa(taxa_sums(ps) > 0, ps) ps
We edit specimen names and identify Asteraceae and non-Asteraceae plants.
sam_names <- str_replace(sample_names(ps), "E106", "E-106") sam_names <- str_replace(sam_names, "_F_filt.fastq.gz", "") sam_names <- str_replace(sam_names, "Connor-", "E") sample_names(ps) <- sam_names sample_data(ps)$X <- sam_names sample_data(ps)$unique_names <- sam_names aster <- c("142","143","15","ST","22","40") non_aster <- c("33", "71", "106") paired_aster <- c("E-142-1", "E142-1", "E-142-5", "E142-5", "E-142-10", "E142-10", "E-143-2", "E143-2", "E-143-7", "E143-7", "E-15-1", "E15-1", "ST-CAZ-4-R-O", "ST-CAZ-4-R-M", "ST-SAL-22-R-O", "ST-SAL-22-R-M", "ST-TRI-10-R-O", "ST-TRI-10-R-M") paired_non_aster <- c("E33-7", "E-33-7", "E33-8", "E-33-8", "E33-9", "E-33-9", "E71-10", "E-71-10","E71-2","E-71-2" ,"E71-3", "E-71-3", "E106-1", "E-106-1", "E106-3", "E-106-3", "E106-4", "E-106-4") paired_specimens <- c(paired_aster, paired_non_aster)
# We will use 9 colors for 9 different plants plant_colors <- c("#999999", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7", "purple")
ps_log <- transform_sample_counts( ps, function(x) log(1 + x) ) out.wuf.log <- ordinate( ps_log, method = "MDS", distance = "wunifrac" ) evals <- out.wuf.log$values$Eigenvalues mds_wuni <- plot_ordination( ps_log, out.wuf.log, color = "species_names", shape = "pna" ) + labs( col = "Plant type" ) + coord_fixed( sqrt(evals[2] / evals[1]) ) + scale_colour_manual( values = plant_colors ) + facet_grid(~Type) + geom_point(size = 3) # adding only paired-specimen names df_paired <- mds_wuni$data %>% dplyr::filter( unique_names %in% paired_specimens ) mds_wuni + geom_text_repel( data = df_paired, aes( x = Axis.1, y = Axis.2, label = unique_names ), color = "black" )
We see that Centaurea solstitialis specimens are outlier that were sampled in a field across three countries, while other plants are grown from seeds. We label the paired specimens in all plants. The paired specimens are relatively close to each other, except (E-143-7, E143-7), (E-142-5, E142-5), (E-71-2, E71-2), and (E-71-3, E71-3) that are in positive and negative axes of the first two principal axes.
Fitzpatrick et al. (2018) concluded that there is no effect in within and between specimen bacterial diversity when they used universal or Aster-modified pPNA types.
The above Figure shows that specimens from the aster plants that are paired with universal or Aster-modified pPNA types make clusters.
An MDS plot using Bray-Curtis distance between specimens.
out.bc.log <- ordinate( ps_log, method = "MDS", distance = "bray" ) evals <- out.bc.log$values$Eigenvalues mds_bc <- plot_ordination( ps_log, out.bc.log, color = "species_names", shape = "pna" ) + coord_fixed( sqrt(evals[2] / evals[1]) ) + labs( col = "Plant type", shape = "pna" ) + scale_colour_manual( values = plant_colors ) + facet_grid(~Type) + geom_point(size=3) df_paired <- mds_bc$data %>% dplyr::filter( unique_names %in% paired_specimens ) mds_bc + geom_text_repel( data = df_paired, aes(x = Axis.1, y = Axis.2, label = unique_names ), color = "black" )
The ordination of specimens in MDS with BC is similar to MDS with weighted unifrac.
A DPCoA plot incorporates phylogenetic information.
The DPCoA specimens positions can be interpreted with respect to the ASVs coordinates in this display.
data("ps_ans") out.dpcoa.log <- ordinate( ps_ans, method = "DPCoA" ) evals <- out.dpcoa.log$eig dpcoa_sam <- plot_ordination( ps_ans, out.dpcoa.log, color = "species_names", shape = "pna" ) + coord_fixed( sqrt(evals[2] / evals[1]) ) + labs( col = "Plant type", shape = "pna" ) + scale_colour_manual( values = plant_colors ) + facet_grid(~Type) + geom_point(size = 3) df_paired <- dpcoa_sam$data %>% dplyr::filter( unique_names %in% paired_specimens ) dpcoa_sam + geom_text_repel( data = df_paired, aes( x = CS1, y = CS2, label = unique_names ), color = "black" ) dpcoa_taxa <- plot_ordination( ps_ans, out.dpcoa.log, type = "species", color = "Class" ) df_taxa <- dplyr::filter( dpcoa_taxa$data, (CS1 > 0.63 | CS1 < -0.14 | CS2 > 0.29 | CS2 < -0.13) & ( !is.na(Class) ) ) dpcoa_taxa$data <- df_taxa dpcoa_taxa + coord_fixed( sqrt(evals[2] / evals[1]) ) + geom_point(size = 2) + scale_color_manual( values = distinctColorPalette( length( unique( tax_table(ps_ans)[, "Class"] ) ) ) )
pp <- processPhyloseq( ps ) out.agpca <- adaptivegpca( pp$X, pp$Q, k = 2 ) agPCA_samples <- ggplot( data.frame( out.agpca$U, sample_data(ps) ) ) + geom_point( aes(x = Axis1, y = Axis2, color = species_names, shape = pna), size = 3 ) + labs( x = "Axis 1", y = "Axis 2", col = "Plant type" ) + scale_colour_manual( values = plant_colors ) + facet_grid(~ Type) agPCA_samples
agPCA_asv <- ggplot( data.frame( out.agpca$QV, tax_table(ps) ) ) + geom_point( aes( x = Axis1, y = Axis2, color = Class ), size = 3 ) + xlab( "Axis 1" ) + ylab( "Axis 2" ) main_class <- c("Bacilli", "Actinobacteria", "Cytophagia", "Gammaproteobacteria", "Betaproteobacteria", "Deltaproteobacteria", "Alphaproteobacteria", "Opitutae") df_taxa <- dplyr::filter( agPCA_asv$data, Class %in% main_class ) agPCA_asv$data <- df_taxa agPCA_asv <- agPCA_asv + scale_color_manual( values = distinctColorPalette( length( unique( tax_table(ps_ans)[, "Class"]) ) ) ) agPCA_asv
Centaurea solstitialis specimens are outliers among Asteraceae plants on the left of Axis 1. Axis 2 explains the microbial variability in plant types.
ps_ca <- ps out.ca <- ordinate( ps_ca, method = "CCA" ) evals <- out.ca$CA$eig evals_prop <- evals/sum(evals) ca_plot <- plot_ordination( ps_ca, out.ca, type = "sample", color = "species_names", shape = "pna") + labs( col = "Plant type", shape = "pna" ) + scale_color_manual( values = plant_colors ) + facet_grid(~Type) + geom_point(size=2) + geom_jitter()+ coord_fixed(0.4) + theme_update( text = element_text(size = 10), legend.text = element_text(size = 10), panel.spacing = unit(2, "lines") ) ca_plot
ca_plot_taxa <- plot_ordination( ps_ca, out.ca, type = "species", color = "Class" ) main_class <- c("Bacilli", "Actinobacteria", "Cytophagia", "Gammaproteobacteria", "Betaproteobacteria", "Deltaproteobacteria", "Alphaproteobacteria", "Opitutae", "Sphingobacteriia") df_taxa <- dplyr::filter( ca_plot_taxa$data, Class %in% main_class ) df_taxa$Class <- factor(df_taxa$Class) ca_plot_taxa$data <- df_taxa ca_plot_taxa <- ca_plot_taxa + coord_fixed(0.4) + labs(col = "Class") + geom_point(size=3) + geom_jitter() + scale_color_manual( values = distinctColorPalette( length( unique( main_class ) ) ) ) ca_plot_taxa
ps <- subset_samples( ps, unique_names %in% paired_specimens ) ps <- prune_taxa( taxa_sums(ps) > 0, ps ) ps
net <- make_network( ps, max.dist = 0.8 ) sampledata <- sample_data(ps) %>% data.frame() V(net)$unique_names <- sampledata[names(V(net)), "unique_names"] V(net)$pna <- sampledata[names(V(net)), "pna"] V(net)$species_names <- sampledata[names(V(net)), "species_names"] p_net <- plot_network( net, ps, color = "species_names", shape = "pna" ) + labs( col = "Species names" ) + scale_colour_manual( values = plant_colors ) p_net
We first perform a test using an Minimum spanning tree (MST) with Jaccard dissimilarity. We want to know whether the universal or Asteraceae-modified pPNAs come from the same distribution in paired-specimens.
Since there is a grouping in the data by specimen, we can’t simply permute all the labels, we need to maintain this nested structure – this is what the grouping argument does. Here we permute the pna labels but keep the plant type structure intact.
set.seed(15) gt <- graph_perm_test( ps, sampletype = "pna", grouping = "unique_names", distance = "jaccard", type = "mst") gt$pval
This test has a larger p-value, and we do not reject the null hypothesis that the paired-specimens come from the same distribution.
This result mirrors the observations from the PCA after the truncated-rank transformation, adaptive gPCA, MDS, and DPCoA that paired-specimens have similar microbial variability.
rm(df_paired, df_taxa, dpcoa_sam, dpcoa_taxa, mds_bc, mds_wuni, out.bc.log, out.dpcoa.log, out.wuf.log, ps, ps_log, agPCA_asv, agPCA_samples, out.agpca, pp, aster, evals, main_class, non_aster, paired_aster, paired_non_aster, paired_specimens, plant_colors, sam_names, ca_plot, out.ca, ps_ca, evals_prop, ca_plot_taxa, gt, net, p_net, sampledata)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.