Microbiome analytics report

Project attributes

Author : r author

Project : r projectname

This report includes exploratory analysis of data (taxa summary, read distribution, quality control checks, etc.), composition analysis, diversity analysis, ordination analysis.
Throught the document the code chunks can be checked by clicking in the code button on the left side.

A random seed is generated for the analysis done in the report.

#generate a random number 
n <- runif(1, 0, 10^6) # keep this number stored as below instead of XXX when re running the codes.

message("random number set.seed is below")
print(n)

set.seed(n)

Input parameters

Below are the user provided input parameters used for the analysis.

parameters <- c("otufile", "mapping_file", "taxonomy", "treefilename","type","work_dir","out_dir","VariableA","VariableB","UnConstOrd","heatmap","filterCount","filterPrev","col.palette","filterpseq","samsize","projectname", "author" )

if(is.null(taxonomy )){

  Taxanomy_file <- paste("No seperate taxonomy file")

} else{

  Taxanomy_file <- taxonomy
}

# samplesize
if(is.na(samsize )){

  sample_size <- paste("Is set to NA")

} else{

  sample_size <- samsize
}

parameters_setting <- c(otufile, mapping,Taxanomy_file, treefilename, type, work_dir, out_dir, VariableA, VariableB, UnConstOrd, heatmap, filterCount, filterPrev , col.palette ,filterpseq,sample_size, projectname , author)

dft <- as.data.frame(cbind(parameters, parameters_setting))
colnames(dft) <- c("Parameters", "User input")

DT::datatable(dft)

Note: If the output directory is default, then the working directory is where all the files will be saved.

The following directories were created for saving the outputs at each step.

message("Working directory is ", work_dir)

if(file.exists("QC")) {
  message("QC folder already exists, data will be overwritten")
} else{
  message("QC folder will be created in ", out_dir)
  dir.create(paste0(out_dir, "/QC"))
}

if(file.exists("AlphaDiversity")) {
  message("AlphaDiversity folder already exists, data will be overwritten")
} else{
  message("AlphaDiversity folder will be created in ", out_dir)
  dir.create(paste0(out_dir, "/AlphaDiversity"))
}

if(file.exists("BetaDiversity")) {
  message("BetaDiversity folder already exists, data will be overwritten")
} else{
  message("BetaDiversity folder will be created in ", out_dir)
  dir.create(paste0(out_dir, "/BetaDiversity"))
}

if(file.exists("Others")) {
  message("Others folder already exists, data will be overwritten")
} else{
  message("Others folder will be created in ", out_dir)
  dir.create(paste0(out_dir, "/Others"))
}

if(file.exists("PhyloseqObjects")) {
  message("PhyloseqObjects folder already exists, data will be overwritten")
} else{
  message("PhyloseqObjects folder will be created in ", out_dir)
  dir.create(paste0(out_dir, "/PhyloseqObjects"))
}

R packages

To check for the main R packages used in the analysis were, click on the code button on the left side.

library(microbiome)
library(microbiomeutilities)
# install.packages("picante",repos="http://R-Forge.R-project.org") 
# library(picante)
library(picante)
library(data.table)
library(DT)
library(RColorBrewer)
library(phyloseq)
library(tibble)
library(ggpubr)

Initiate

This code chunk reads the input files and creates a phyloseq object.

ps0 <- read_phyloseq(otu.file = otufile, 
                     metadata.file = mapping,
                     taxonomy.file = taxonomy, 
                     type = type)

if (!is.na(treefilename)){
  tree <- read.tree(treefilename)

ps0 <- merge_phyloseq(ps0, tree)
} else{
  message("No tree available")
}


saveRDS(ps0, paste0(out_dir, "/PhyloseqObjects/ps_raw.rds"))

message("Raw phyloseq object, confirm the number of samples and variables (as in columns of mapping file)")
message("Below is the content of raw phyloseqobject stored as ps_raw.rds")

print_ps(ps0)

Taxonomic assignments

Below is the percentage of taxonomic assignments at each level.
Note: Only patterns such as [g] or similar is expected. [g] or [g__unclassified] not considered.

DT::datatable(percent_classified(ps0))

Taxonomic summary

An overview of Phylum level abundances in total dataset.

DT::datatable(taxa_summary(ps0, "Phylum"))

QC {.tabset}

print("Below is the summary of the phyloseq object")

print_ps(ps0)

Library sizes (density plot)

Check the library sizes for each of the samples within the main variable. Do you think it is okay or there is difference in library sizes and will this affect you downstream statistical analysis?

SeqDepth <- colSums(otu_table(ps0))

sample_data(ps0)$SeqDepth <- SeqDepth

meta.df <- meta(ps0)

qc_plot1 <- plot_read_distribution(ps0, groups= VariableA, plot.type= 'density')

print(qc_plot1)

ggsave(paste0(out_dir,"/QC/ReadDistribution_density.pdf"))

message("QC plots for Read Distribution stored in QC folder as ReadDistribution.pdf")

Library sizes (histogram plot)

message("Investigating library sizes")
SeqDepth <- colSums(otu_table(ps0))

sample_data(ps0)$SeqDepth <- SeqDepth

meta.df <- meta(ps0)

lib.hist <- ggplot(meta.df, aes(x = SeqDepth)) + geom_histogram() + 
  facet_wrap(~meta.df[,VariableA]) +
  xlab("Library size")

print(lib.hist)

ggsave(paste0(out_dir,"/QC/ReadDistribution_density_hist.pdf"))

message("QC plots for library sizes stored in QC folder as ReadDistribution_density_hist.pdf")

Sample size filtering (conditional)

If any sample has less than 2000 reads, it will be removed

if(min(sample_sums(ps0)) < 2000){
  print("There are sample(s) less than 2000 reads, these will be removed")

  # Check sample names before filtering 


  samples_b4_filter <- phyloseq::sample_names(ps0)

  ps0 <- prune_samples(sample_sums(ps0)>=2000, ps0)

  samples_af4_filter <- phyloseq::sample_names(ps0)

  samples_removed <- setdiff(samples_b4_filter, samples_af4_filter)
  print(paste("Following samples were had less than 2000 reads- ", samples_removed))
} else {

  print("No samples below 2000 reads")
  print_ps(ps0)
}

OTU counts distribution (density)

message("Investigating OTU counts distribution")

taxasums = rowSums(otu_table(ps0))
taxatable <- as.data.frame.matrix(tax_table(ps0))


tax_plot1 <- ggplot(taxatable, aes(x = taxasums, color = taxatable[,"Phylum"])) + 
  geom_line(size = 1.5, stat = "density") +
  xlab("OTU Counts") + theme_bw() + scale_x_log10() 
print(tax_plot1)

ggsave(paste0(out_dir,"/QC/Distribution_OTU_Counts_by_phyla.pdf"), height = 6, width = 14)

OTU counts distribution (histogram)

message("Investigating OTU counts distribution")

tax_plot2 <- ggplot(taxatable, aes(x = taxasums, fill = taxatable[,"Phylum"])) + 
   geom_histogram(bins = 30, alpha = 0.5, position = "identity") +
  xlab("OTU Counts") + theme_bw() + scale_x_log10() 
print(tax_plot2)

ggsave(paste0(out_dir,"/QC/Distribution_OTU_Counts_by_phyla_hist.pdf"), height = 6, width = 10)
message("QC plots for library sizes stored in QC folder as Distribution_OTU_Counts.pdf")

Taxa prevalence

Check which of the OTUs are present in low abundance and low prevalence. You might want to remove them depending on the research question.

# for sanity
prev.plot <- plot_taxa_prevalence(ps0, "Phylum")
prev.plot

ggsave(paste0(out_dir,"/QC/OTU_prevalence_phyla.pdf"), height = 8, width = 16)
# for sanity
ps1 <- prune_taxa(taxa_sums(ps0) > 0, ps0)

Variance (raw)

This is variance for all OTU counts without filtering for min number of reads/OTU and prevalence.

Variance.plot.a <- qplot(log10(apply(otu_table(ps1), 1, var)), 
                         xlab = "log10(variance)", 
                         main = "Variance in OTUs") + 
  ggtitle("before filtering") + theme_minimal()

print(Variance.plot.a)

ggsave(paste0(out_dir, "/QC/Variance before filtering.pdf"))

message("QC plots for OTU variance stored in QC folder as Variance before filtering.pdf")

Variance (filtered)

This is variance for all OTU counts after filtering for min number of reads/OTU and prevalence.

if (filterpseq == TRUE) {
message(paste0("Filtering OTUs with less than ", filterCount, " counts"))
message(paste0("in at least ", filterPrev*100, " % of the samples "))

ps2 <- filter_taxa(ps1, function(x) sum(x > filterCount) > (filterPrev * length(x)), TRUE)

message("Saving the transformed phyloseq object as ps_filtered.rds")

saveRDS(ps2, paste0(out_dir,"/PhyloseqObjects/ps_filtered.rds"))

message("Below is the content of filtered phyloseqobject (based on filterCount and filterPrev) stored as ps_filtered.rds")

print_ps(ps2) 

Variance.plot.b <- qplot(log10(apply(otu_table(ps2), 1, var)), 
                         xlab = "log10(variance)", 
                         main = "Variance in OTUs") + 
  ggtitle("after filtering") + 
  theme_minimal()

print(Variance.plot.b)
ggsave(paste0(out_dir,"/QC/Variance After filtering.pdf"))

} else 
  {

    message("filterpseq was false. Did not filter and hence will not save the filtered phyloseq")

    ps2 <- ps1

}

Coefficient of variation (raw)

This is coefficient of variation for all OTU counts without filtering for min number of reads/OTU and prevalence.

cv_plot <- plot_taxa_cv(ps1, "hist")
print(cv_plot)

ggsave(paste0(out_dir, "/QC/Coefficient of variation before filtering.pdf"))

message("QC plots for OTU variance stored in QC folder as Variance before filtering.pdf")

Coefficient of variation (filtered)

This is coefficient of variation for all OTU counts after filtering for min number of reads/OTU and prevalence.

if (filterpseq == TRUE) {
message(paste0("Filtering OTUs with less than ", filterCount, " counts"))
message(paste0("in at least ", filterPrev*100, " % of the samples "))

ps2 <- filter_taxa(ps1, function(x) sum(x > filterCount) > (filterPrev * length(x)), TRUE)

message("Using the filtered phyloseq object as ps_filtered.rds")

message("Below is the content of filtered phyloseqobject (based on filterCount and filterPrev) stored as ps_filtered.rds")

print_ps(ps2) 

cv_plot <- plot_taxa_cv(ps2, "hist")
print(cv_plot)
ggsave(paste0(out_dir,"/QC/Coefficient of variation After filtering.pdf"))

} else 
  {

    message("filterpseq was false. Did not filter and hence will not save the filtered phyloseq")

    ps2 <- ps1

}

Measuring the kurtosis

Kurtosis is essentially a measure of how much weight is at the tails of the distribution relative to the weight around the location.
If you have large differences in your library sizes then you may have to normalise your data.

Kurtosis

message("Using the raw phyloseq to check for kurtosis in library size")

df <- data.table(NumberReads = sample_sums(ps0), SampleID = sample_names(ps0))
require(moments)
n <- kurtosis(df$NumberReads) 

if (n > 3) {
    message("Your library size is heavily tailed, considering normalising them for further analysis")
  }  else {

    message("The variation in library sizes is below kurtosis value of 3 may indicate no need for rarefying")

}

Alpha diversity {.tabset}

Alpha diversity measures are standard calculations in microbial ecology. The differences in richness and eveness between groups may have importance to understanding the ecology. There are numerous measures, we use the defaults from phyloseq R package and also the phylogenetic diversity from picanteR package.

The caculations can be done on rarefied or non-rarefied data which can be specified by the samsize option in "Set project attributes" option above.

Non-phylogenetic diversity

if (!is.na(samsize)) {

  ps3 <- rarefy_even_depth(ps2, sample.size = samsize)

  saveRDS(ps3, paste0(out_dir, "/phyloseqObjects/ps_rarefyied.rds"))
} else{
  ps3 <- ps2
}

metadf <- meta(ps3)
metadf$sam_rep_nw <- rownames(metadf)

adiv.meta <- estimate_richness(ps3)
colnames(adiv.meta)

adiv.meta$sam_rep_nw <- rownames(adiv.meta)

adiv.nw <- reshape2::melt(adiv.meta)
colnames(adiv.nw) <- c("sam_rep_nw","Diversity","div.val")
meta_df_nw <- reshape2::melt(metadf)
meta_adiv <- merge.data.frame(meta_df_nw, adiv.nw, by = "sam_rep_nw")
colnames(meta_adiv)

p <- ggqqplot(meta_adiv, "div.val", 
              facet.by = c("Diversity", VariableA), 
              color = VariableA)
p <- facet(p , facet.by = c("Diversity", VariableA), scales = "free")
print(p)

#Create 2x2 plot environment 
ggsave(paste0(out_dir,"/AlphaDiversity/Non-phylogenetic_alpha_diversity_qqnorm.pdf"), height = 8, width= 12)


#shapiro.test
shaOb <- shapiro.test(adiv.meta$Observed)

shaChao1 <- shapiro.test(adiv.meta$Chao1)
shaACE <- shapiro.test(adiv.meta$ACE)
shaShannon <- shapiro.test(adiv.meta$Shannon)
shaSimpson <- shapiro.test(adiv.meta$Simpson)
shaInvSimpson <- shapiro.test(adiv.meta$InvSimpson)
shaFisher <- shapiro.test(adiv.meta$Fisher)

Diversity_Metric <- c("Observed", "Chao1","ACE","Shannon","Simpson","InvSimpson","Fisher")
shapiro.test.statistic <- c(shaOb$statistic, shaChao1$statistic, shaACE$statistic, shaShannon$statistic, shaSimpson$statistic, shaInvSimpson$statistic, shaFisher$statistic)
shapiro.test.p.value <- c(shaOb$p.value, shaChao1$p.value, shaACE$p.value, shaShannon$p.value, shaSimpson$p.value, shaInvSimpson$p.value, shaFisher$p.value)


divtab <- cbind(Diversity_Metric, round(shapiro.test.statistic, 3), shapiro.test.p.value)
rownames(divtab) <- paste(seq(1:7))
colnames(divtab) <- c("Diversity_Metric","shapiro.test.statistic","shapiro.test.p.value")

DT::datatable(divtab)

alpha_div <- plot_richness(ps3, x = VariableA,
                           color = VariableB, 
                           measures = c("Observed", 
                                        "Chao1", 
                                        "Shannon", 
                                        "InvSimpson")) + geom_boxplot()

alpha_div <- alpha_div + theme_bw() + geom_point(size = 2) + ggtitle("Non phylogenetic diversity") + scale_color_brewer(palette = col.palette) + rotate_x_text()

print(alpha_div)
ggsave(paste0(out_dir,"/AlphaDiversity/Non-phylogenetic_alpha_diversity.pdf"), 
              height = 6, width = 18)

if (!is.na(samsize)){
  message("Non-phylogenetic_alpha_diversity on RAREFIED data stored in AlphaDiversity folder")
  message("Non-phylogenetic_alpha_diversity.pdf")
} else{
  message("Non-phylogenetic_alpha_diversity on NON-RAREFIED data stored  in AlphaDiversity folder")
  message("Non-phylogenetic_alpha_diversity.pdf")
}

For a more comphrensive diversity measures check Microbiome:Diversities

Phylogenetic diversity measures

For more on this check Picante.

if (!is.na(treefilename)){

   message("If sam.size was provided then rarefyied phyloseq object will be used to calculate PD")
  print(ps3)
  otu_table_ps3 <- as.data.frame(ps3@otu_table)
  metadata_table_ps3  <- meta(ps3)

 message("include.root in pd is set to FALSE by default")

 df.pd <- pd(t(otu_table_ps3), tree,include.root=F) # t(ou_table) transposes the table for use in picante and the tre file comes from the  first code chunck we used to read tree file (see making a phyloseq object section).


 datatable(df.pd)
# now we need to plot PD
# check above how to get the metadata file from phyloseq object.
# We will add the results of PD to this file and then plot.
 select.meta <- metadata_table_ps3[,c(VariableA,VariableB)] #, "Phyogenetic_diversity"]
 select.meta$Phyogenetic_diversity <- df.pd$PD 
 colnames(select.meta) <- c("VariableA", "VariableB", "Phyogenetic_diversity")

 shapiro.test(select.meta$Phyogenetic_diversity)

 qqnorm(select.meta$Phyogenetic_diversity)

 plot.pd <- ggplot(select.meta, aes(VariableA, Phyogenetic_diversity)) + 
   geom_boxplot(aes(fill = VariableB)) + geom_point(size = 2) +  
   theme(axis.text.x = element_text(size=14, angle = 90)) + 
   theme_bw() + scale_fill_brewer(palette = col.palette)

 print(plot.pd)

 ggsave(paste0(out_dir, "/AlphaDiversity/Phylogenetic_diversityon_nonRafrefied_data.pdf"), 
         plot = plot.pd, 
         height = 6, width = 18)

} else{

  message("No tree supplied, PD cannot be calculated")

}

Community composition {.tabset}

The composition plots are limited to phylum and family level taxonomic ranks.

Phylum (Barplot)

ps3.com <- ps1 # create a new pseq object
# We need to set Palette
taxic <- as.data.frame(ps3.com@tax_table)  # this will help in setting large color options

colourCount = length(unique(taxic$Phylum))  #define number of variable colors based on number of Family (change the level accordingly to phylum/class/order)
getPalette = colorRampPalette(brewer.pal(12, col.palette))  # change the palette as well as the number of colors will change according to palette.

# now edit the unclassified taxa
tax_table(ps3.com)[tax_table(ps3.com)[, "Phylum"] == "p__", "Phylum"] <- "f__Unclassified Phylum"
# We will also remove the 'p__' patterns for cleaner labels
tax_table(ps3.com)[, colnames(tax_table(ps3.com))] <- gsub(tax_table(ps3.com)[, 
    colnames(tax_table(ps3.com))], pattern = "[a-z]__", replacement = "")

otu.df <- as.data.frame(otu_table(ps3.com))  # make a dataframe for OTU information.
# head(otu.df) # check the rows and columns

taxic$OTU <- row.names.data.frame(otu.df)  # Add the OTU ids from OTU table into the taxa table at the end.
colnames(taxic)  # You can see that we now have extra taxonomy levels.

taxmat <- as.matrix(taxic)  # convert it into a matrix.
new.tax <- tax_table(taxmat)  # convert into phyloseq compaitble file.
tax_table(ps3.com) <- new.tax  # incroporate into phyloseq Object


# it would be nice to have the Taxonomic names in italics.
# for that we set this
guide_italics <- guides(fill = guide_legend(label.theme = element_text(size = 15, 
    face = "italic", colour = "Black", angle = 0)))


## Now we need to plot at family level, We can do it as follows:

# first remove the phy_tree

ps3.com@phy_tree <- NULL

lev0 = "Phylum"
tax_table(ps3.com)[,lev0][is.na(tax_table(ps3.com)[,lev0])] <- paste0(tolower(substring(lev0, 1, 1)), "__")


ps3.com.phy <- aggregate_taxa(ps3.com, "Phylum")

ps3.com.phy.rel <- microbiome::transform(ps3.com.phy, "compositional")

plot.composition.relAbun.phy <- plot_composition(ps3.com.phy.rel) + theme(legend.position = "bottom") + 
    scale_fill_manual(values = getPalette(colourCount)) + theme_bw() + 
    theme(axis.text.x = element_text(angle = 90)) + 
  ggtitle("Relative abundance Phylum level") + guide_italics 

plot.composition.relAbun.phy

if (nrow(metadf) > 30) {
  ggsave(paste0(out_dir, "/Others/compositionbarplot_Phylum.pdf"), plot = plot.composition.relAbun.phy, height = 8, width = 28)

} else {
    ggsave(paste0(out_dir, "/Others/compositionbarplot_Phylum.pdf"), plot = plot.composition.relAbun.phy, 
      height = 8, width = 18)
}

Phylum (Boxplot)

Top 5 Phyla are shown below:

pn0 <- plot_taxa_boxplot(ps3.com, 
                        taxonomic.level = "Phylum", 
                        top.otu = 6, VariableA, 
                        title = "Relative abundance Phylum level", color = "Paired")
pn0
ggsave(paste0(out_dir,"/Others/compositionboxplot_Phylum.pdf"), height = 6, width = 12)

Family (Barplot)

colourCount = length(unique(taxic$Family))  #define number of variable colors based on number of Family (change the level accordingly to phylum/class/order)
getPalette = colorRampPalette(brewer.pal(12, col.palette))  # change the palette as well as the number of colors will change according to palette.
lev = "Family"
tax_table(ps3.com)[,lev][is.na(tax_table(ps3.com)[,lev])] <- paste0(tolower(substring(lev, 1, 1)), "__")

ps3.com.fam <- aggregate_taxa(ps3.com, "Family")

ps3.com.fam.rel <- microbiome::transform(ps3.com.fam, "compositional")

plot.composition.relAbun.fam <- plot_composition(ps3.com.fam.rel) + theme(legend.position = "bottom") + 
    scale_fill_manual(values = getPalette(colourCount)) + theme_bw() + 
    theme(axis.text.x = element_text(angle = 90)) + 
  ggtitle("Relative abundance Family level") + guide_italics 

plot.composition.relAbun.fam

if (nrow(metadf) > 30) {
  ggsave(paste0(out_dir,"/Others/compositionbarplot_Family.pdf"), plot = plot.composition.relAbun.fam, height = 8, width = 28)

} else {
    ggsave(paste0(out_dir,"/Others/compositionbarplot_Family.pdf"), plot = plot.composition.relAbun.fam, 
      height = 8, width = 18)
}

Family (Boxplot)

Top 10 Families are shown below:

pn1 <- plot_taxa_boxplot(ps3.com, 
                        taxonomic.level = "Family", 
                        top.otu = 10, VariableA, 
                        title = "Relative abundance Family level", color = "Paired")

pn1
ggsave(paste0(out_dir,"/Others/compositionboxplot_Family.pdf"), height = 6, width = 16)

Beta Diversity (Ordinations) {.tabset}

Bray-Curtis distance PCoA

The counts are compositionally tranformed and then used for ordinations.

ps3.rel <- microbiome::transform(ps3, "compositional")

bc.pcoa <- phyloseq::ordinate(ps3.rel, method = "PCoA", distance = "bray")

bc.pcoa.plot <- plot_ordination(ps3.rel, bc.pcoa, 
                                type = "split", axes = 1:2,
                                color = VariableA, shape = VariableB, 
                                label = NULL, 
                                title = "Bray-Curtis distance PCoA",
                                justDF = FALSE)
bc.pcoa.plot <- bc.pcoa.plot + theme_bw() + geom_point(size = 2)
print(bc.pcoa.plot)

ggsave(paste0(out_dir,"/BetaDiversity/Bray-Curtis distance PCoA.pdf"), plot = bc.pcoa.plot, 
      height = 6, width = 10)


# Calculate bray curtis distance matrix
ps3_bray <- phyloseq::distance(ps3.rel, method = "bray")

# use meta data from phylogenetic div code chunk.
metadata_table_ps3  <- meta(ps3.rel)
select.meta2 <- metadata_table_ps3[,c(VariableA,VariableB)] 

colnames(select.meta2) <- c("VariableA", "VariableB")

# Adonis test
adonis(ps3_bray ~ VariableA, data = select.meta2)


# Homogeneity of dispersion test
beta.bray <- betadisper(ps3_bray, select.meta2$VariableA)
permutest(beta.bray)

Weighted Unifrac distance PCoA

The counts are converted to relative abundance and then used for ordinations.

if (!is.na(treefilename)){

wunifrac.pcoa <- ordinate(ps3.rel, method = "PCoA", distance = "wunifrac")

wunifrac.pcoa.plot <- plot_ordination(ps3.rel, wunifrac.pcoa, 
                                type = "split", axes = 1:2,
                                color = VariableA, shape = VariableB, 
                                label = NULL, 
                                title = "Weighted Unifrac distance PCoA",
                                justDF = FALSE)
wunifrac.pcoa.plot <- wunifrac.pcoa.plot + theme_bw() + geom_point(size = 2)
print(wunifrac.pcoa.plot)

ggsave(paste0(out_dir,"/BetaDiversity/Weighted Unifrac distance PCoA.pdf"), plot = wunifrac.pcoa.plot, 
      height = 6, width = 10)

# Calculate bray curtis distance matrix
ps3_wunifrac <- phyloseq::distance(ps3.rel, method = "wunifrac")

# use meta data from phylogenetic div code chunk.

# Adonis test
adonis(ps3_wunifrac ~ VariableA, data = select.meta2)


# Homogeneity of dispersion test
beta.wunifrac <- betadisper(ps3_wunifrac, select.meta2$VariableA)
permutest(beta.wunifrac)
} else {
  message("No tree supplied, cannot calculate unifrac distances")
}

Unweighted Unifrac distance PCoA

The counts are converted to relative abundnces and then used for ordinations.

if (!is.na(treefilename)){

unifrac.pcoa <- ordinate(ps3.rel, method = "PCoA", distance = "unifrac")

unifrac.pcoa.plot <- plot_ordination(ps3.rel, unifrac.pcoa, 
                                type = "split", axes = 1:2,
                                color = VariableA, shape = VariableB, 
                                label = NULL, 
                                title = "Unweighted Unifrac distance PCoA",
                                justDF = FALSE)
unifrac.pcoa.plot <- unifrac.pcoa.plot + theme_bw() + geom_point(size = 2)
print(unifrac.pcoa.plot)

ggsave(paste0(out_dir,"/BetaDiversity/Unweighted Unifrac distance PCoA.pdf"), plot = unifrac.pcoa.plot, 
      height = 6, width = 10)

message("Unweighted Unifrac distance gives negative values and standard anova and anoism cannot be used")

} else {
  message("No tree supplied, cannot calculate unifrac distances")
}

Heatmap

if (heatmap == TRUE) {

heat.sample <- plot_taxa_heatmap(ps3, subset.top = 50,
                                 VariableA = VariableA,
                                 heatcolors =brewer.pal(9, "Blues"),
                                 transformation = "compositional",
                                 file = "./Others/Heatmap rel abun top 50 OTUs.tiff",
                                 height = 9, width = 10)
print(paste0("heatmap saved in ", out_dir))

# Duplicate for printing in the HTML file  
heat.sample.dup <- plot_taxa_heatmap(ps3, subset.top = 50,
                                 VariableA = VariableA,
                                 heatcolors =brewer.pal(9, "Blues"),
                                 transformation = "compositional")


}

If you find this useful please cite:

Shetty SA, Lahti L, et al. (2018-). microbiomeutilities: An R package for utilities to guide in-depth marker gene amplicon data analysis.
Leo Lahti, Sudarshan Shetty et al. (Bioconductor, 2017). Tools for microbiome analysis in R.

R session information and parameters

The versions of the R software and Bioconductor packages used for this analysis are listed below. It is important to save them if one wants to re-perform the analysis in the same conditions. Please cite the key R packages listed below.

sessionInfo()


microsud/microbiomeutilities documentation built on Nov. 29, 2022, 12:18 a.m.