htmltools::img(src = "https://raw.githubusercontent.com/ningzhibin/rmdocpu/master/inst/rmd/iMetaReport.png", 
               alt = 'logo', 
               style = 'position:absolute; top:0; right:0; padding:10px;width:150px;height:150px;')
# enviroment setup
knitr::opts_chunk$set(echo = FALSE,warning = FALSE, message = FALSE, cache = FALSE)

library(tidyverse)
library(ggplot2)
library(plotly)
library(readxl)
library(pheatmap)
library(reshape2)
library(vegan)
library(ggdendro)
# library(gridExtra)

#library(plotly)
# library(DT)
# for interactive data table display 

# local usage test and debug
# rmarkdown::render("MQ_report_taxonomy_ocpu.Rmd", params = list(summary_file_tbl =  your_readin_tbl))
# on local machine, your_readin_tbl is a data table, while on ocpu, your_readin_tbl is a json formatted table

# version control
# 20190819
tidy_MQ_taxonomy <- function(df_taxonomy.txt){

    extract_species_gg <- melt(data = df_taxonomy.txt[df_taxonomy.txt$Rank == 'Species', -c(2:10)],               
                            id.vars = c("Name"), variable.name = "Sample", value.name = "Intensity")
    extract_genus_gg <- melt(data = df_taxonomy.txt[df_taxonomy.txt$Rank == 'Genus', -c(2:10)],               
                            id.vars = c("Name"), variable.name = "Sample", value.name = "Intensity")
    extract_family_gg <- melt(data = df_taxonomy.txt[df_taxonomy.txt$Rank == 'Family', -c(2:10)],               
                            id.vars = c("Name"), variable.name = "Sample", value.name = "Intensity")
    extract_order_gg <- melt(data = df_taxonomy.txt[df_taxonomy.txt$Rank == 'Order', -c(2:10)],               
                            id.vars = c("Name"), variable.name = "Sample", value.name = "Intensity")
    extract_class_gg <- melt(data = df_taxonomy.txt[df_taxonomy.txt$Rank == 'Class', -c(2:10)],               
                            id.vars = c("Name"), variable.name = "Sample", value.name = "Intensity")
    extract_phylum_gg <- melt(data = df_taxonomy.txt[df_taxonomy.txt$Rank == 'Phylum', -c(2:10)],               
                            id.vars = c("Name"), variable.name = "Sample", value.name = "Intensity")

    extract_species <- df_taxonomy.txt[df_taxonomy.txt$Rank == 'Species', -c(2:10)]  %>%  remove_rownames %>% column_to_rownames(var="Name")
    extract_genus <- df_taxonomy.txt[df_taxonomy.txt$Rank == 'Genus', -c(2:10)]  %>%  remove_rownames %>% column_to_rownames(var="Name")
    extract_family <- df_taxonomy.txt[df_taxonomy.txt$Rank == 'Family', -c(2:10)]  %>%  remove_rownames %>% column_to_rownames(var="Name")
    extract_order <- df_taxonomy.txt[df_taxonomy.txt$Rank == 'Order', -c(2:10)]  %>%  remove_rownames %>% column_to_rownames(var="Name")
    extract_class <- df_taxonomy.txt[df_taxonomy.txt$Rank == 'Class', -c(2:10)]  %>%  remove_rownames %>% column_to_rownames(var="Name")
    extract_phylum <- df_taxonomy.txt[df_taxonomy.txt$Rank == 'Phylum', -c(2:10)]  %>%  remove_rownames %>% column_to_rownames(var="Name")

    return(list("extract_species_gg" = extract_species_gg,
                "extract_genus_gg" = extract_genus_gg,
                "extract_family_gg" = extract_family_gg,
                "extract_order_gg" = extract_order_gg,
                "extract_class_gg" = extract_class_gg,
                "extract_phylum_gg" = extract_phylum_gg,
                "extract_species" = extract_species,
                "extract_genus" = extract_genus,
                "extract_family" = extract_family,
                "extract_order" = extract_order,
                "extract_class" = extract_class,
                "extract_phylum" = extract_phylum
    ))

}
# input
if(is.null(params$input_datatable)){
  # test with local test with local files in the same dir,
  #df_taxonomy.txt  <- read_excel("BuiltIn.taxa.refine.csv", sheet = 2)
  df_taxonomy.txt <- read.delim("BuiltIn.taxa.refine.csv", header = TRUE,sep = ",",check.names = FALSE, stringsAsFactors = FALSE) 

}else{
  # opencpu render from data table by parametrized input
  df_taxonomy.txt <- params$input_datatable
}


# Note: The folling analysis with meta info assumes that
# 1st columns as sample name, 2nd column as experiment name, 3rd column and after as grouping

meta_table <- params$meta_table 
# if there is any null value in the meta colum, set the meta as NULL
if(any(is.na(meta_table$meta1))){
  meta_table <- NULL
}


# use if(!is.null(meta_table))  to deal with data with meta 



# tidy

summary_file_taxonomy <- tidy_MQ_taxonomy(df_taxonomy.txt)

Intro

This report provides some basic description and visualization of the MetaLab taxonomy results. The report is based on the MetaLab_taxonomy.xlsx, without defined experimental design/grouping information. Users can use this to quickly check the taxonomic profile of the dataset at each taxonomic level.

Sample overview

Identification per sample

  idnumber <- specnumber(t(summary_file_taxonomy$extract_species))
  idnumber <- as.data.frame(idnumber)
  idnumber <- as.data.frame(cbind(names = rownames(idnumber), idnumber))

  p1 <- ggplotly(ggplot(data=idnumber) +
        geom_col(aes(x=names, y=idnumber)) + theme_bw() +coord_flip()+
        xlab("Sample labels") +
        ylab("Number of species identified"))

  idnumberg <- specnumber(t(summary_file_taxonomy$extract_genus))
  idnumberg <- as.data.frame(idnumberg)
  idnumberg <- as.data.frame(cbind(names = rownames(idnumberg), idnumberg))

  p2 <- ggplotly(ggplot(data=idnumberg) +
        geom_col(aes(x=names, y=idnumberg)) + theme_bw() +coord_flip()+ theme(axis.text.y=element_blank()) +
        ylab("Number of genera identified"))

  idnumberf <- specnumber(t(summary_file_taxonomy$extract_family))
  idnumberf <- as.data.frame(idnumberf)
  idnumberf <- as.data.frame(cbind(names = rownames(idnumberf), idnumberf))

  p3 <- ggplotly(ggplot(data=idnumberf) +
        geom_col(aes(x=names, y=idnumberf)) + theme_bw() +coord_flip()+ 
        xlab("Sample labels") + ylab("Number of families identified"))

  idnumbero <- specnumber(t(summary_file_taxonomy$extract_order))
  idnumbero <- as.data.frame(idnumbero)
  idnumbero <- as.data.frame(cbind(names = rownames(idnumbero), idnumbero))

  p4 <- ggplotly(ggplot(data=idnumbero) +
        geom_col(aes(x=names, y=idnumbero)) + theme_bw() +coord_flip()+ 
        xlab("Sample labels") + ylab("Number of orders identified"))

  idnumberc <- specnumber(t(summary_file_taxonomy$extract_class))
  idnumberc <- as.data.frame(idnumberc)
  idnumberc <- as.data.frame(cbind(names = rownames(idnumberc), idnumberc))

  p5 <- ggplotly(ggplot(data=idnumberc) +
        geom_col(aes(x=names, y=idnumberc)) + theme_bw() +coord_flip()+ 
        xlab("Sample labels") + ylab("Number of classes identified"))

  idnumberp <- specnumber(t(summary_file_taxonomy$extract_phylum))
  idnumberp <- as.data.frame(idnumberp)
  idnumberp <- as.data.frame(cbind(names = rownames(idnumberp), idnumberp))

  p6 <- ggplotly(ggplot(data=idnumberp) +
        geom_col(aes(x=names, y=idnumberp)) + theme_bw() +coord_flip()+ 
        xlab("Sample labels") + ylab("Number of phyla identified"))

  subplot(p1,p2,p3, nrows=1, shareY = TRUE, shareX = TRUE)
  subplot(p4,p5,p6, nrows=1, shareY = TRUE, shareX = TRUE)

Alpha diversity

The figure below displays the alpha-diversity (Shannon-Wiener index) on the species level.

    diversity <- round(diversity(t(summary_file_taxonomy$extract_species),index = "shannon"),3)
    Alpha_diversity <- as.data.frame(diversity)
    Alpha_diversity <- as.data.frame(cbind(names = rownames(Alpha_diversity), Alpha_diversity))

    ggplotly(ggplot(data=Alpha_diversity) +
      geom_col(aes(x=names, y=diversity)) + theme_bw() +coord_flip()+
      xlab("Sample labels") + ylab("Shannon-Wiener index")
    )

r if(ncol(summary_file_taxonomy$extract_species) >2){"# Beta diversity \n**The figure below displays the beta-diversity on the species level, visualized using PCoA**"}

The figure below displays the beta-diversity on the species level, visualized using PCoA.

data_matrix_t <- t(summary_file_taxonomy$extract_species)
d.bray <- vegan::vegdist(data_matrix_t)
show.d.bray <- as.matrix(d.bray)

if(nrow(data_matrix_t) >3){
  pc.bray <- cmdscale(d.bray, k=3, eig = TRUE)

  beta_diversity <- as.data.frame(pc.bray$points)
  beta_diversity <- as.data.frame(cbind(names = rownames(beta_diversity), beta_diversity))

 plot_ly(beta_diversity, x = ~V1, y = ~V2, z = ~V3, color = ~names, colors = c("red","orange","yellow","green","cyan","blue","purple","black","gray50","gray80")) %>%
  add_markers() %>%
  add_text(text = ~names) %>%
  hide_legend()

}else if (nrow(data_matrix_t) ==3){
  pc.bray <- cmdscale(d.bray, k=2, eig = TRUE)

  beta_diversity <- as.data.frame(pc.bray$points)
  beta_diversity <- as.data.frame(cbind(names = rownames(beta_diversity), beta_diversity))

 plot_ly(beta_diversity, x = ~V1, y = ~V2, color = ~names, colors = c("red","orange","yellow","green","cyan","blue","purple","black","gray50","gray80")) %>%
  add_markers() %>%
  add_text(text = ~names) %>%
  hide_legend()
}

Sample Clustering

Visualize sample clustering based on the species-level composition. Distance measure used is "euclidean", and agglomeration method used is "ward.D". (Clustering analysis will be performed when there are more than 2 samples)

if(ncol(summary_file_taxonomy$extract_species) >2){
  distance <- dist(t(summary_file_taxonomy$extract_species), method = "euclidean")
  fit <- hclust(distance, method= "ward.D")
  ggdendrogram(fit, rotate = TRUE, theme_dendro = FALSE, size = 2) + xlab("Samples") + ylab("Distance") +
               theme_bw()

}

Taxonomic composition bar plots

Combosition bar plot, species-level:

  ggplotly(ggplot(summary_file_taxonomy$extract_species_gg, aes(x = Sample, y = Intensity, fill = Name)) +
             geom_bar(stat='identity') + theme_bw() +
             ylab("Intensity") + theme(axis.text.x = element_text(angle = 45, hjust = 1))
  )

Combosition bar plot, genus-level:

  ggplotly(ggplot(summary_file_taxonomy$extract_genus_gg, aes(x = Sample, y = Intensity, fill = Name)) +
             geom_bar(stat='identity') + theme_bw() +
             ylab("Intensity") + theme(axis.text.x = element_text(angle = 45, hjust = 1))
  )

Combosition bar plot, family-level:

  ggplotly(ggplot(summary_file_taxonomy$extract_family_gg, aes(x = Sample, y = Intensity, fill = Name)) +
             geom_bar(stat='identity') + theme_bw() +
             ylab("Intensity") + theme(axis.text.x = element_text(angle = 45, hjust = 1))
  )

Combosition bar plot, order-level:

  ggplotly(ggplot(summary_file_taxonomy$extract_order_gg, aes(x = Sample, y = Intensity, fill = Name)) +
             geom_bar(stat='identity') + theme_bw() +
             ylab("Intensity") + theme(axis.text.x = element_text(angle = 45, hjust = 1))
  )

Combosition bar plot, class-level:

  ggplotly(ggplot(summary_file_taxonomy$extract_class_gg, aes(x = Sample, y = Intensity, fill = Name)) +
             geom_bar(stat='identity') + theme_bw() +
             ylab("Intensity") + theme(axis.text.x = element_text(angle = 45, hjust = 1))
  )

Combosition bar plot, phylum-level:

  ggplotly(ggplot(summary_file_taxonomy$extract_phylum_gg, aes(x = Sample, y = Intensity, fill = Name)) +
             geom_bar(stat='identity') + theme_bw() +
             ylab("Intensity") + theme(axis.text.x = element_text(angle = 45, hjust = 1))
  )


ningzhibin/rmdocpu documentation built on Feb. 3, 2022, 9:30 p.m.