rm(list=ls())
library (vegan)
library(ape)
library(ggplot2)
library(dplyr)
library(data.table)
merge_data <- function(df1, df2){
tab <- df1[match(df1$SampleID, as.character(df2$SampleID)),] #match the data.frames on the SampleID
out <- merge(tab, df2, by.x="SampleID", by.y = "SampleID")
return(out)
}
perform_cap_scale <- function(idata, norm_dat, dist_mat){
# set up full and null models for ordistep
cap1 <- capscale(norm_dat ~ ., data=idata, dist=as.character(dist_mat))
cap0 <- capscale(norm_dat ~ 1, data=idata, dist=as.character(dist_mat))
#perform forward and backward selection of explanatory variables
env <- ordistep(cap0, scope=formula(cap1))
anova_res <- env$anova
return(list(env, cap0, cap1, anova))
}
#TODO have to disentagle this function and maybe work with two different frames rather combined
peform_varpart <- function(df, env_vars, limit_OTU_table, normalization="hellinger"){
normi <- decostand(df[limit_OTU_table[1]:nrow(df),limit_OTU_table[2]:ncol(df)], method = normalization)
df_subset <- df[, env_vars]
df_subset <- droplevels(df_subset)
return(normi, df_subset)
}
delete_neg_mock <- function(df, names_mock, name_neg){
df_clean <- as.data.table(df)
result <- df_clean[!Sample_ID %like% get(names_mock) & !Sample_ID %like% name_neg]
return(result)
}
#read in Mapping file with the Details for each Sample
map <- read.csv("/home/robert/Projects/microbiome-a.incompertus/data/Metadata_with_Host.tsv", sep="\t")
names(map)[1]<-"SampleID"
#read in OTU table which i transposed in Python before
OTU <- read.csv("/home/robert/Projects/microbiome-a.incompertus/data/OTU_table_bacteria_forward_Rhizo_unassigned.csv", sep=",", header=TRUE)
#OTU table has a weird column name, fix that
names(OTU)[1]<-"SampleID"
#merge two data.frames based on ID
bacteria <- merge_data(map, OTU)
#scaledOTU <- scale(bacteria[1:nrow(bacteria),24:ncol(bacteria)], scale=T)
#summary(scaledOTU, display=NULL)
#mean(scaledOTU)
test <- delete_neg_mock(bacteria)
#delete the Mock-Community and negatives
bacteria_new <- bacteria[-c(63,64,65,66),]
bacteria_new$Location<-factor(bacteria_new$Location)
bacteria_new$Raffaelea<-factor(bacteria_new$Raffaelea)
##########################################################################################################################################################################
######################################################## Variationpartitioning ###########################################################################################
#OTU table merged with the metaData
#normalize the data with decostand
normi <- decostand(bacteria_new[1:nrow(bacteria_new),24:ncol(bacteria_new)], method="hellinger")
#Get the environmental variable I thnk are a key factor
ambrosia.env <- bacteria_new[, c('Location', 'Gender', 'Tree', 'Host', 'Raffaelea','Collected', 'Alive_Dead')]
#the tree level has cooccuring patterns between sites so fix that
ambrosia.env$Tree <- factor(with(ambrosia.env, paste(Location, Tree, sep ='_')))
ambrosia.env <- droplevels(ambrosia.env)
# The column collected should be a factor
ambrosia.env$Collected <- factor(ambrosia.env$Collected)
# set up full and null models for ordistep
bacteria.cap1 <- capscale(normi ~ ., data=ambrosia.env, dist="bray")
bacteria.cap0 <- capscale(normi ~ 1, data=ambrosia.env, dist="bray")
summary(ambrosia.env)
#perform forward and backward selection of explanatory variables
step.env <- ordistep(bacteria.cap0, scope=formula(bacteria.cap1))
step.env$anova
#exclude tree
new.ambrosia.env <- ambrosia.env[, c('Location','Gender', 'Host', 'Raffaelea', 'Collected', 'Alive_Dead')]
new.ambrosia.env <- droplevels(new.ambrosia.env)
summary(new.ambrosia.env)
# set up full and null models for ordistep
bacteria.cap1.new <- capscale(normi ~ ., data=new.ambrosia.env, dist="bray")
bacteria.cap0.new <- capscale(normi ~ 1, data=new.ambrosia.env, dist="bray")
#perform forward and backward selection of explanatory variables
step.env.new <- ordistep(bacteria.cap0.new, scope=formula(bacteria.cap1.new))
step.env.new$anova
plot(step.env.new)
#Select the OTU table
OTU.var <- bacteria_new[1:nrow(bacteria_new),24:ncol(bacteria_new)]
#Do the variation partitioning with these variables
bacteria.var <- varpart(OTU.var, ~Location, ~Alive_Dead, ~Host, ~Collected, data=new.ambrosia.env)
plot(bacteria.var, bg=1:3, Xnames=c('Location', 'Alive Dead', 'Host', 'Collected'))
title(main = "Variation for Bacteria explained")
bacteria.var
#Check for significance of the partioned variables
bacteria.var.rda.Location <- rda(OTU.var ~ Location + Condition(Host) + Condition(Collected) + Condition(Alive_Dead), data=new.ambrosia.env)
anova(bacteria.var.rda.Location)
bacteria.var.rda.Host <- rda(OTU.var ~ Host + Condition(Location) + Condition(Collected) + Condition(Alive_Dead), data=new.ambrosia.env)
anova(bacteria.var.rda.Host)
bacteria.var.rda.Collected <- rda(OTU.var ~ Collected + Condition(Host) + Condition(Location) + Condition(Alive_Dead), data=new.ambrosia.env)
anova(bacteria.var.rda.Collected)
bacteria.var.rda.Alive_Dead <- rda(OTU.var ~ Alive_Dead + Condition(Host) + Condition(Collected) + Condition(Location), data=new.ambrosia.env)
anova(bacteria.var.rda.Alive_Dead)
#include the distance of trees in the variation partitioning
spatial_Ai <- subset(bacteria_new, select=c("Longitude", "Latidude"))
ambrosia.pcnm <- as.data.frame(scores(pcnm(dist(spatial_Ai))))
dim(ambrosia.pcnm)
ambrosia.var <- varpart(OTU.var, ambrosia.pcnm, ~Location, ~Alive_Dead, ~Host, data=ambrosia.env)
plot(ambrosia.var, bg=1:3, Xnames=c('space', 'Location', 'Alive or Dead', 'Host'))
#test for the significance
sig.rda.Location <- rda(OTU.var, new.ambrosia.env$Location, cbind(new.ambrosia.env$Alive_Dead, new.ambrosia.env$Host))
anova(sig.rda.Location)
plot(sig.rda.Location)
################################################### Variation partitioning and Spatial Analysis on each site ####################################################################################################
################################################## Olney State Forest #################################################################################################
#subset the table for the specific site, in this case Olney
Olney <- subset(bacteria_new, Location=='Olney')
#grep the spatial patterns and transform them into a distance matrix
O.spatial_Ai <- subset(Olney, select=c("Longitude", "Latidude"))
Olney.pcnm <- as.data.frame(scores(pcnm(dist(O.spatial_Ai))))
dim(Olney.pcnm)
#Get only the OTUs
OTU.spatial_olney <- Olney[1:nrow(Olney),24:ncol(Olney)]
#normalize
normi.olney <- decostand(OTU.spatial_olney, method="hellinger")
#Get the variable I am interested in
ambrosia.env.Olney <- Olney[, c('Gender', 'Tree', 'Host', 'Raffaelea','Collected', 'Alive_Dead')]
#the tree level has cooccuring patterns between sites so fix that
ambrosia.env.Olney <- droplevels(ambrosia.env.Olney)
# The column collected should be a factor
ambrosia.env.Olney$Collected <- factor(ambrosia.env.Olney$Collected)
ambrosia.env.Olney$Tree <- factor(ambrosia.env.Olney$Tree)
# set up full and null models for ordistep
bacteria.olney.cap1 <- capscale(normi.olney ~ ., data=ambrosia.env.Olney, dist="bray")
bacteria.olney.cap0 <- capscale(normi.olney ~ 1, data=ambrosia.env.Olney, dist="bray")
summary(ambrosia.env.Olney)
#perform forward and backward selection of explanatory variables
step.olney.env <- ordistep(bacteria.olney.cap0, scope=formula(bacteria.olney.cap1))
step.olney.env$anova
ambrosia_Olney.var <- varpart(OTU.spatial_olney, Olney.pcnm, ~Tree, data=Olney)
plot(ambrosia_Olney.var, bg=1:3, Xnames=c('space', 'Tree'))
ambrosia_Olney.var
#Redundancy analysis for the site.
ambrosia_Olney.rda <- rda(OTU.spatial_olney ~ Tree + Condition(Olney.pcnm[, 1]) + Condition(Olney.pcnm[, 2]) + Condition(Olney.pcnm[, 3]), data=Olney)
anova(ambrosia_Olney.rda)
############################################### Dampier State Forest ######################################################################
#subset the table for the specific site, in this case Dampier
Dampier <- subset(bacteria_new, Location=='Dampier_State_Forest')
#grep the spatial patterns and transform them into a distance matrix
Dampier.spatial_Ai <- subset(Dampier, select=c("Longitude", "Latidude"))
Dampier.pcnm <- as.data.frame(scores(pcnm(dist(Dampier.spatial_Ai))))
dim(Dampier.pcnm)
#Get only the OTUs
OTU.spatial_dampier <- Dampier[1:nrow(Dampier),24:ncol(Dampier)]
#normalize
normi.dampier <- decostand(OTU.spatial_dampier, method="hellinger")
#Get the variable I am interested in
ambrosia.env.Dampier <- Dampier[, c('Gender', 'Tree', 'Host', 'Raffaelea','Collected', 'Alive_Dead')]
#the tree level has cooccuring patterns between sites so fix that
ambrosia.env.Dampier <- droplevels(ambrosia.env.Dampier)
# The column collected should be a factor
ambrosia.env.Dampier$Collected <- factor(ambrosia.env.Dampier$Collected)
ambrosia.env.Dampier$Tree <- factor(ambrosia.env.Dampier$Tree)
# set up full and null models for ordistep
bacteria.dampier.cap1 <- capscale(normi.dampier ~ ., data=ambrosia.env.Dampier, dist="bray")
bacteria.dampier.cap0 <- capscale(normi.dampier ~ 1, data=ambrosia.env.Dampier, dist="bray")
summary(ambrosia.env.Dampier)
#perform forward and backward selection of explanatory variables
step.dampier.env <- ordistep(bacteria.dampier.cap0, scope=formula(bacteria.dampier.cap1))
step.dampier.env$anova
ambrosia_dampier.var <- varpart(OTU.spatial_dampier, Dampier.pcnm, ~Tree, data=Dampier)
plot(ambrosia_dampier.var, bg=1:3, Xnames=c('space', 'Tree'))
ambrosia_dampier.var
#Redundancy analysis for the site.
ambrosia_dampier.rda <- rda(OTU.spatial_dampier ~ Tree + Condition(Dampier.pcnm[, 1]) + Condition(Dampier.pcnm[, 2]) + Condition(Dampier.pcnm[, 3]), data=Dampier)
anova(ambrosia_dampier.rda)
############################################### Dorrigo National Park ######################################################################
#subset the table for the specific site, in this case Dorrigo
dorrigo <- subset(bacteria_new, Location=='Dorrigo')
#grep the spatial patterns and transform them into a distance matrix
dorrigo.spatial_Ai <- subset(dorrigo, select=c("Longitude", "Latidude"))
dorrigo.pcnm <- as.data.frame(scores(pcnm(dist(dorrigo.spatial_Ai))))
dim(dorrigo.pcnm)
#Get only the OTUs
OTU.spatial_dorrigo <- dorrigo[1:nrow(dorrigo),24:ncol(dorrigo)]
#normalize
normi.dorrigo <- decostand(OTU.spatial_dorrigo, method="hellinger")
#Get the variable I am interested in
ambrosia.env.dorrigo <- dorrigo[, c('Gender', 'Tree', 'Host', 'Raffaelea','Collected', 'Alive_Dead')]
ambrosia.env.dorrigo <- droplevels(ambrosia.env.dorrigo)
# The column collected should be a factor
ambrosia.env.dorrigo$Collected <- factor(ambrosia.env.dorrigo$Collected)
ambrosia.env.dorrigo$Tree <- factor(ambrosia.env.dorrigo$Tree)
# set up full and null models for ordistep
bacteria.dorrigo.cap1 <- capscale(normi.dorrigo ~ ., data=ambrosia.env.dorrigo, dist="bray")
bacteria.dorrigo.cap0 <- capscale(normi.dorrigo ~ 1, data=ambrosia.env.dorrigo, dist="bray")
summary(ambrosia.env.dorrigo)
#perform forward and backward selection of explanatory variables
step.dorrigo.env <- ordistep(bacteria.dorrigo.cap0, scope=formula(bacteria.dorrigo.cap1))
step.dorrigo.env$anova
ambrosia_dorrigo.var <- varpart(OTU.spatial_dorrigo, dorrigo.pcnm, ~Gender, data=dorrigo)
plot(ambrosia_dorrigo.var, bg=1:3, Xnames=c('space', 'Gender'))
ambrosia_dorrigo.var
#Redundancy analysis for the site.
ambrosia_dorrigo.rda <- rda(OTU.spatial_dorrigo, ambrosia_dorrigo.var$Gender, OTU.spatial_dorrigo, data=dorrigo)
anova(ambrosia_dorrigo.rda)
############################################### Wombeyan National Park ######################################################################
#subset the table for the specific site, in this case Wombeyan
wombeyan <- subset(bacteria_new, Location=='Wombeyan_Caves')
#grep the spatial patterns and transform them into a distance matrix
wombeyan.spatial_Ai <- subset(wombeyan, select=c("Longitude", "Latidude"))
wombeyan.pcnm <- as.data.frame(scores(pcnm(dist(wombeyan.spatial_Ai))))
dim(wombeyan.pcnm)
#Get only the OTUs
OTU.spatial_wombeyan <- wombeyan[1:nrow(wombeyan),24:ncol(wombeyan)]
#normalize
normi.wombeyan <- decostand(OTU.spatial_wombeyan, method="hellinger")
#Get the variable I am interested in
ambrosia.env.wombeyan <- wombeyan[, c('Gender', 'Tree', 'Host', 'Raffaelea','Collected', 'Alive_Dead')]
ambrosia.env.wombeyan <- droplevels(ambrosia.env.wombeyan)
# The column collected should be a factor
ambrosia.env.wombeyan$Collected <- factor(ambrosia.env.wombeyan$Collected)
ambrosia.env.wombeyan$Tree <- factor(ambrosia.env.wombeyan$Tree)
# set up full and null models for ordistep
bacteria.wombeyan.cap1 <- capscale(normi.wombeyan ~ ., data=ambrosia.env.wombeyan, dist="bray")
bacteria.wombeyan.cap0 <- capscale(normi.wombeyan ~ 1, data=ambrosia.env.wombeyan, dist="bray")
summary(ambrosia.env.wombeyan)
#perform forward and backward selection of explanatory variables
step.wombeyan.env <- ordistep(bacteria.wombeyan.cap0, scope=formula(bacteria.wombeyan.cap1))
step.wombeyan.env$anova
ambrosia_wombeyan.var <- varpart(OTU.spatial_wombeyan, wombeyan.pcnm, ~Tree, data=wombeyan)
plot(ambrosia_wombeyan.var, bg=1:3, Xnames=c('space', 'Host', 'Raffaelea'))
ambrosia_wombeyan.var
#Redundancy analysis for the site.
ambrosia_wombeyan.rda <- rda(OTU.spatial_wombeyan, ambrosia_wombeyan.var$Gender, OTU.spatial_wombeyan, data=wombeyan)
anova(ambrosia_wombeyan.rda)
############################################### Mount Wilson ######################################################################
#subset the table for the specific site, in this case Mount Wilson
wilson <- subset(bacteria_new, Location=='Mount_Wilson')
#grep the spatial patterns and transform them into a distance matrix
wilson.spatial_Ai <- subset(wilson, select=c("Longitude", "Latidude"))
wilson.pcnm <- as.data.frame(scores(pcnm(dist(wilson.spatial_Ai))))
dim(wilson.pcnm)
#Get only the OTUs
OTU.spatial_wilson <- wilson[1:nrow(wilson),24:ncol(wilson)]
#normalize
normi.wilson <- decostand(OTU.spatial_wilson, method="hellinger")
#Get the variable I am interested in
ambrosia.env.wilson <- wilson[, c('Gender', 'Tree', 'Host', 'Raffaelea','Collected', 'Alive_Dead')]
ambrosia.env.wilson <- droplevels(ambrosia.env.wilson)
# The column collected should be a factor
ambrosia.env.wilson$Collected <- factor(ambrosia.env.wilson$Collected)
ambrosia.env.wilson$Tree <- factor(ambrosia.env.wilson$Tree)
# set up full and null models for ordistep
bacteria.wilson.cap1 <- capscale(normi.wilson ~ ., data=ambrosia.env.wilson, dist="bray")
bacteria.wilson.cap0 <- capscale(normi.wilson ~ 1, data=ambrosia.env.wilson, dist="bray")
summary(ambrosia.env.wilson)
#perform forward and backward selection of explanatory variables
step.wilson.env <- ordistep(bacteria.wilson.cap0, scope=formula(bacteria.wilson.cap1))
step.wilson.env$anova
ambrosia_wilson.var <- varpart(OTU.spatial_wilson, wilson.pcnm, ~Tree, data=wilson)
plot(ambrosia_wilson.var, bg=1:3, Xnames=c('space', 'Tree'))
ambrosia_wilson.var
############################################### Termeil State Forest ######################################################################
#subset the table for the specific site, in this case Termeil State Forest
termeil <- subset(bacteria_new, Location=='Termeil_State_Forest')
#grep the spatial patterns and transform them into a distance matrix
termeil.spatial_Ai <- subset(termeil, select=c("Longitude", "Latidude"))
termeil.pcnm <- as.data.frame(scores(pcnm(dist(termeil.spatial_Ai))))
dim(termeil.pcnm)
#Get only the OTUs
OTU.spatial_termeil <- termeil[1:nrow(termeil),24:ncol(termeil)]
#normalize
normi.termeil <- decostand(OTU.spatial_termeil, method="hellinger")
#Get the variable I am interested in
ambrosia.env.termeil <- termeil[, c('Gender', 'Tree', 'Host', 'Raffaelea','Collected', 'Alive_Dead')]
ambrosia.env.termeil <- droplevels(ambrosia.env.termeil)
# The column collected should be a factor
ambrosia.env.termeil$Collected <- factor(ambrosia.env.termeil$Collected)
ambrosia.env.termeil$Tree <- factor(ambrosia.env.termeil$Tree)
# set up full and null models for ordistep
bacteria.termeil.cap1 <- capscale(normi.termeil ~ ., data=ambrosia.env.termeil, dist="bray")
bacteria.termeil.cap0 <- capscale(normi.termeil ~ 1, data=ambrosia.env.termeil, dist="bray")
summary(ambrosia.env.termeil)
#perform forward and backward selection of explanatory variables
step.termeil.env <- ordistep(bacteria.termeil.cap0, scope=formula(bacteria.termeil.cap1))
step.termeil.env$anova
ambrosia_termeil.var <- varpart(OTU.spatial_termeil, termeil.pcnm, ~Tree, data=termeil)
plot(ambrosia_termeil.var, bg=1:3, Xnames=c('space', 'Tree'))
ambrosia_termeil.var
#Redundancy analysis for the site.
ambrosia_termeil.rda <- rda(OTU.spatial_wombeyan, ambrosia_wombeyan.var$Tree, OTU.spatial_wombeyan, data=wombeyan)
anova(ambrosia_termeil.rda)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.