load data

library(data.table)
library(rmeta)
library(tibble)
library(reshape)
library(phyloseq)
library(ggplot2)
library(dplyr)
source("../R/convert_data_into_phyloseq.R")
source("../R/config.R")
## profile 
tax <- fread("../data/metaphlan3.merged.abundance.profile.all.tsv", stringsAsFactors = F, data.table = F)
rownames(tax) <- tax$clade_name
tax <- tax[,-c(1:2)]
splittax <- splitMetaphlan2(tax, prefix = "pmv")

## function 
humann2 <- fread("../data/humann2_pathabundance_joined_unstratified.tsv", stringsAsFactors = F, data.table = F, check.names = F, quote = "")
rownames(humann2) <- humann2[,1]
humann2 <- humann2[,-1]
colnames(humann2) <- gsub("_Abundance", "", colnames(humann2))
# rm unmapped & unintergrate & scale
humann2f <- humann2[-c(1:2),]
humann2ft <- apply(humann2f, 2, function(x){x/sum(x)})
pathway_name <- data.frame(pathway_name = rownames(humann2ft)) # rename the feature id 
rownames(pathway_name) <- rownames(humann2ft) <- paste0("pathway", seq(1:nrow(pathway_name)))

## ko
ko <- fread("../data/humann2.uniref90_ko.joined_unstratified.tsv", stringsAsFactors = F, data.table = F, check.names = F, quote = "")
rownames(ko) <- ko[,1]
ko <- ko[,-1]
colnames(ko) <- gsub("_Abundance-RPKs", "", colnames(ko))
ko2 <- ko[-c(1:2), ]


## grid 
grid <- fread("../data/grid.merged.reliable.tsv", stringsAsFactors = F, data.table = F, check.names = F, quote = "")
rownames(grid) <- grid[,1]
grid <- grid[,-1]



## metadata 
metadata <- openxlsx::read.xlsx("../data/metadata.xlsx", sheet = 2)
rownames(metadata) <- metadata$PID
metadata$Group <- ifelse(metadata$Group == 0, "g0", "g1")

## generate phyloseq

phylo <- data2phyloseq(metadata = metadata, micro = splittax$pmv_genus, occ = 0.1)
phylo2 <- data2phyloseq(metadata = metadata, micro = splittax$pmv_species, occ = 0.1)
phylo3 <- data2phyloseq(metadata = metadata, micro = humann2ft, occ = 0.1)
phylo4 <- data2phyloseq(metadata = metadata, micro = ko2, occ = 0.1)
phylo5 <- data2phyloseq(metadata = metadata, micro = grid, occ = 0.1)

00.global view

Alpha-diversity

source("../R/alpha_diversity.R")
source("../R/plot_alpha_diversity.R")

genus_diver <- alpha_diversity(phylo, method = "all", paired = F)[, c(2:5,8)]
species_diver <- alpha_diversity(phylo2, method = "all", paired = F)[, c(2:5,8)]
humann_diver <- alpha_diversity(phylo3, method = "all", paired = F)[, c(2:5,8)]
ko_diver <- alpha_diversity(phylo4, method = "all", paired = F)[, c(2:5,8)]
# plot 

plot_alpha_diversity(species_diver, pair = F)
plot_alpha_diversity(genus_diver, pair = F)
plot_alpha_diversity(humann_diver, pair = F)
plot_alpha_diversity(ko_diver, pair = F)

Beta-diversity

source("../R/ordination.R")
source("../R/plot_ordination.R")
# genus 
genus_ord <- ordination(physeq = phylo, which_distance = "bray", method = "NMDS",
                        grouping_column = group_varname)

plot_ordination(ordination.res = genus_ord, phylores = phylo, method = "NMDS", 
                        grouping_column = group_varname, paired = F)

# species 
species_ord <- ordination(physeq = phylo2, which_distance = "bray", method = "NMDS",
                        grouping_column = group_varname)

plot_ordination(ordination.res = species_ord, phylores = phylo, method = "NMDS", 
                        grouping_column = group_varname, paired = F)

# humann2 / pathway 

humann2_ord <- ordination(physeq = phylo3, which_distance = "bray", method = "NMDS",
                        grouping_column = group_varname)

plot_ordination(ordination.res = humann2_ord, phylores = phylo, method = "NMDS", 
                        grouping_column = group_varname, paired = F)

# humann2 / module 



# humann2 / ko 
ko_ord <- ordination(physeq = phylo4, which_distance = "bray", method = "NMDS",
                        grouping_column = group_varname)

plot_ordination(ordination.res = ko_ord, phylores = phylo, method = "NMDS", 
                        grouping_column = group_varname, paired = F)

Composition

## Top10 Genus 
source("../R/plot_ordination.R")
library(ggplot2)
library(RColorBrewer)
library(ggpubr)
library(rmeta)
microbiotaPair::comtaxTop(dat = splittax$pmv_genus, group = metadata, top = 10, group_var = "Group", orderSum = F)
## Top10 Species 
comtaxTop(dat = splittax$pmv_species, group = metadata, top = 10, group_var = "Group")


## Generate 2 group mean 
metadata_s <- data.frame(Group = c("g0", "g1"))
rownames(metadata_s) <- c("g0", "g1")
pmv_genus_g <- as.data.frame(t(apply(splittax$pmv_genus, 1, function(x){tapply(x, metadata$Group, mean)})))

comtaxTop(dat = pmv_genus_g, group = metadata_s, top = 10, group_var = "Group")

Network

### sparCC (spearman)
source("../R/network.R")
library(Matrix)
rankclass <- read.table("../data/metaphlan2.rankclass.tab", sep = "\t")
rownames(rankclass) <- rankclass$V7

microdata <- otu_table(phylo2)
# >0.5
occindex <- apply(microdata, 1, function(x){sum(x!=0)/length(x)})>0.5
microdata <- microdata[occindex, ]
rankfilter <- tax_table(rankclass[rownames(microdata), ])
# result 
set.seed(000)
rownames(microdata) <- rownames(rankfilter)
gonet <- sparCCnetwork(microdata  = microdata, rank = rankfilter, 
                       phemeta = sample_data(phylo2), group = "g0", group_var = "Group")
g1net <- sparCCnetwork(microdata =  microdata, rank = rankfilter, 
                       phemeta = sample_data(phylo2), group = "g1", group_var = "Group")

# phemeta2 <- sample_data(phylo2)
# phemeta2$Group2 <- "all"
# allnet <- sparCCnetwork(microdata = otu_table(phylo2), rank = rankfilter, 
#                        phemeta = sample_data(phylo2), group = "all", group_var = "Group2")
# stat

# plot
cowplot::plot_grid(gonet[[1]], g1net[[1]],
  labels = c("A", "B"), ncol = 2, align = "h")

# degree distribution 
graph <- adj2igraph(Matrix(gonet[[2]], sparse=TRUE))
graph2 <- adj2igraph(Matrix(g1net[[2]], sparse=TRUE))
degreeres <- data.frame(g0 = degree(graph), g1 = degree(graph2), tax = rankfilter[,7])
order(apply(degreeres[,1:2], 1,sum), decreasing = T) -> orderindex
qdat2 <- melt(degreeres[orderindex[1:20],])
p <- ggplot(qdat2, aes(x = ta7, y =  value ,  group= variable, color = variable))+geom_line()+geom_point()+ scale_color_manual(values = time_colour)+
     theme_classic()+theme(axis.text.x = element_text(angle = 60, hjust = 0.5, vjust = 0.5))+ylab("Degree")
p  
### BAnOCC 
library(banocc)
library(ggraph)


source("../R/network.R")
rankclass <- read.table("../data/metaphlan2.rankclass.tab", sep = "\t")
rownames(rankclass) <- rankclass$V7
species_d <- otu_table(phylo2)
species_input <- renorm(t(species_d))

g0 <- species_input[metadata[metadata$Group=="g0", "PID"], ]
g1 <- species_input[metadata[metadata$Group=="g1", "PID"], ]


############ g0 #############
compiled_banocc_model <- rstan::stan_model(model_code = banocc::banocc_model) 

g0_fit     <- banocc::run_banocc(C = g0, compiled_banocc_model=compiled_banocc_model, init = init)
g0_ig <- banocc::get_banocc_output(banoccfit=g0_fit,
                                       eval_convergence = FALSE)
g0_output <- g0_ig$Estimates.median
#write.csv(g0_ig$Estimates.median, "g0_igraph.csv", quote = F)

# here output edge ;range; degree; betwness ; closeness
g0_sta <- network_sta(g0_output, cutoff = 0.3) 

# here plot the network
g0_net2 <- network_tran(g0_output, g0, rankclass[,"V2", drop=F])
graph_data <- tidygraph::tbl_graph(nodes = g0_net2[[2]], edges =g0_net2[[1]], 
                                   directed = FALSE)
node_name <- g0_net2[[2]]$node
g0_networkplot <- ggraph(graph = graph_data, layout = "auto", circular = TRUE) +
  geom_edge_arc(aes(edge_colour = edge.colour, edge_width = edge.width)) +
scale_edge_colour_gradient2(low = "#155F83FF", mid = "white", high = "#800000FF") +
  scale_edge_width_continuous(range = c(0.2,2)) +
  geom_node_point(aes(colour = colour, size = node.size)) +
  scale_size_continuous(range = c(5,10)) +
  #scale_colour_manual(values = c("Class A" = "#8A9045FF", "Class B" = "#155F83FF")) +
  theme_void() +
  geom_node_text(aes(x = x,
                     y = y,
                     label = node_name,
                     colour = colour), 
                 size = 3.5) 
ggsave(g0_networkplot, "g0_network.pdf", width = 20, height = 20)

############ g1 ################

g1_fit     <- banocc::run_banocc(C = g1, compiled_banocc_model=compiled_banocc_model, init = init)
g1_ig <- banocc::get_banocc_output(banoccfit=g1_fit,
                                       eval_convergence = FALSE)
g1_output <- g1_ig$Estimates.median
#write.csv(g1_ig$Estimates.median, "g1_igraph.csv", quote = F)

# here output edge ;range; degree; betwness ; closeness
g1_sta <- network_sta(g1_output, cutoff = 0.3) 

# here plot the network
g1_net2 <- network_tran(g1_output, g1, rankclass[,"V2", drop=F])
graph_data <- tidygraph::tbl_graph(nodes = g1_net2[[2]], edges =g1_net2[[1]], 
                                   directed = FALSE)
node_name <- g1_net2[[2]]$node
g1_networkplot <- ggraph(graph = graph_data, layout = "auto", circular = TRUE) +
  geom_edge_arc(aes(edge_colour = edge.colour, edge_width = edge.width)) +
scale_edge_colour_gradient2(low = "#155F83FF", mid = "white", high = "#800000FF") +
  scale_edge_width_continuous(range = c(0.2,2)) +
  geom_node_point(aes(colour = colour, size = node.size)) +
  scale_size_continuous(range = c(5,10)) +
  #scale_colour_manual(values = c("Class A" = "#8A9045FF", "Class B" = "#155F83FF")) +
  theme_void() +
  geom_node_text(aes(x = x,
                     y = y,
                     label = node_name,
                     colour = colour), 
                 size = 3.5) 
ggsave(g1_networkplot, "g1_network.pdf", width = 20, height = 20)


############ 
save(list(g0sta, g1sta), file = "network.stat.RData")

01.Compare analysis

Taxanomy

source("../R/Wilcoxon_Sign_Rank_Test.R")
### genus 
com_genus <- compare_two(physeq = phylo, PID = NULL, GROUP = "Group", grp1 = "g0",
                         grp2 =  "g1", paired = F, occ = 0.1)

write.csv(com_genus, "genus_compare.csv", quote = F)
### species 
com_species <- compare_two(physeq = phylo2, PID = NULL, GROUP = "Group", grp1 = "g0",
                         grp2 =  "g1", paired = F, occ = 0.1)
write.csv(com_species, "species_compare.csv", quote = F)

### grid
com_grid <- compare_two(physeq = phylo5, PID = NULL, GROUP = "Group", grp1 = "g0",
                         grp2 =  "g1", paired = F, occ = 0.1)
write.csv(com_grid, "grid_compare.csv", quote = F)

Function

### humann2 / pathway 
com_humann2 <- compare_two(physeq = phylo3, PID = NULL, GROUP = "Group", grp1 = "g0",grp2 =  "g1", paired = F, occ = 0.1)
write.csv(com_humann2, "humann2_compare.csv", quote = F)

### humann2 / module 



### humann2 / ko
com_ko <- compare_two(physeq = phylo3, PID = NULL, GROUP = "Group", grp1 = "g0",grp2 =  "g1", paired = F, occ = 0.1)
write.csv(com_ko, "ko_compare.csv", quote = F)

02.Correlation analysis

Species vs clinical

microdata <-  otu_table(phylo2) %>% t() %>%as.data.frame()
occindex <- apply(microdata, 1, function(x){sum(x!=0)/length(x)})>0.1 # select the occrance >0.5 species 
microdataf <- microdata[ ,occindex]

metadata2 <- openxlsx::read.xlsx("../data/metadata.xlsx", sheet = 3, rowNames=T)
rownames(metadata2) <- metadata2$PID
metadata2 <- metadata2[, -1]
########## spearman correlation #####################
source("../R/basic.R")
res1_spearman <- myspearman(phe = metadata2, species =t(microdataf), method = "s")
write.csv(res1_spearman, "clinical_species_spearman.csv", quote = F)

######### partial spearman correlation ##############
source("../R/pcorPair.R")
library(ppcor)

metadataVar <- colnames(metadata2)[-1]
confounder <- c("BMI")
# # split g0 & g1
# res <- PcorPair(microbiota = microdataf, metadata = metadata, metadataVar = metadataVar, confounder = confounder,method = "s", time_varname = "Group")
# all 
metadata2$Group2 <- "all"
res2_pcor <- PcorPair(microbiota = microdataf, metadata = metadata2, metadataVar = metadataVar, confounder = confounder,method = "s", time_varname = "Group2")
write.csv(res2_pcor, "clinical_species_pcor.csv", quote = F)


# plot 
library(pheatmap)
pdf("clinical.species.spearman.pdf", width = 20, height = 40)
corPlot(res1_spearman, cutoff = 1, adjust = F, tr=F)
dev.off()



pdf("clinical.species.pcor.pdf", width = 20, height = 40)
corPlot(res2_pcor[[1]], cutoff = 1, adjust = F, tr=F)
dev.off()

Pathway vs clinical

pathData <- otu_table(phylo3) %>% t()%>%as.data.frame()
metadata2 <- openxlsx::read.xlsx("../data/metadata.xlsx", sheet = 3, rowNames=T)
rownames(metadata2) <- metadata2$PID
metadata2 <- metadata2[, -1]
########## spearman correlation #####################
source("../R/basic.R")
res1_spearman <- myspearman(phe = metadata2, species =t(pathData), method = "s")
rownames(res1_spearman) <- as.character(pathway_name[rownames(res1_spearman),])
openxlsx::write.xlsx(res1_spearman, "clinical_pathway_spearman.xlsx", rowNames=T)


######### patital spearmna correlation ##############
source("../R/pcorPair.R")
library(ppcor)
# split g0 & g1
# res <- PcorPair(microbiota = otu_table(phylo3) %>% t() %>%as.data.frame(), metadata = metadata, metadataVar = metadataVar, confounder = confounder,method = "s", time_varname = "Group")

# all 
confounder <- "BMI"
metadataVar <- colnames(metadata2)[-1]

metadata2$Group2 <- "all"
res2_pcor <- PcorPair(microbiota = pathData, metadata = metadata2, metadataVar = metadataVar, confounder = confounder,method = "s", time_varname = "Group2")
rownames(res2_pcor[[1]]) <- as.character(pathway_name[rownames(res2_pcor[[1]]),])

openxlsx::write.xlsx(res2_pcor[[1]], "clinical_pathway_pcor.xlsx", rowNames=T)

# plot 
library(pheatmap)
pdf("clinical.pathway.spearman.pdf", width = 20, height = 50)
corPlot(res1_spearman, cutoff = 1, adjust = F, tr=F)
dev.off()



pdf("clinical.pathway.pcor.pdf", width = 20, height = 50)
corPlot(res2_pcor[[1]], cutoff = 1, adjust = F, tr=F)
dev.off()

03. Model

RF based on species & Pathway

source("../R/predict.R")
library(caret)
# to generate the  classified model 
sele_spe <- as.data.frame(t(splittax$pmv_species[com_species[com_species$Pvalue <= 0.05, "type"], ]))  
sele_human <- t(humann2ft[com_humann2[com_humann2$Pvalue <= 0.05, "type"], ])

# species model
phe <- metadata[rownames(sele_spe), ]
res_species <- randomForestTwo(data = sele_spe, metadata = phe, response = "Group", repeatNum = 10, foldNum = 5, factorLev = c("g0", "g1"))

library(plotROC)
library(pROC)
perfPlot(res_species, title = "Group")


# pathway model 
# res_pathway <- randomForestTwo(data = all, metadata = phe, response = "Group", repeatNum = 100, foldNum = "leaveone", factorLev = c("g0", "g1"))
# plot2 <- perfPlot(res_pathway, title = "Group")


rusher321/microbiotaPair documentation built on July 24, 2024, 8:40 p.m.