requires : - datasets : spdiv, spinfo (spinfo is the file containing information about all species, spdiv is the collected data.) - plotNAset (the names of plots which are included)

Load and merge dataset

Note : this script is based on the Exploratoreis diversity dataset, which is read in with the script analysis_nonpublic.R. It reads in the two datasets spdiv and spinfo.

# Load requirements from <..>nonpublic.R file
sections_to_be_loaded <- c("raw_diversity")
source("vignettes/analysis_nonpublic.R")
# this loads all datasets (updated versions.)
require(betapart) # needed in order to run beta.pair_zerospecies()

Dataset Cleaning

From cleaning, getting numbers : - 202944 species in the dataset - 20 different trophic levels - type = presenceabsence : plant pathogen, ant omnivore

# spdiv[, c("Dataversion", "DataID") := NULL]
# This analysis requires complete data (NA s in the diversity data can not be accepted because 
# they can not be imputed).
# Therefore remove all rows with NA.
spdiv <- spdiv[!is.na(value)]

# Set to presence-absence
spdiv_abund <- spdiv # keep abundance data for later (rarefaction)
spdiv[value != 0, value := 1] # convert to presence-absence

Select trophic levels

Group to 18 trophic levels as needed for analysis. (see also info_diversity.ods)

#TODO DIESES SKRIPT
# - übersichten über die trophic levels, number of species etc. ev. nach unten schieben --> 
#    zu dem Part, wo das Diversity dataset parat ist.

#TODO delete Oct 2023
# new names are all already there.
# # rename trophic levels with names from the previous version of the diversity dataset
# # (old ID : 190802, new ID : 201124)
# spdiv <- merge(spdiv, names_trlevel, by = "Trophic_level", all.x = T)
# spdiv[, Trophic_level := NULL]
# setnames(spdiv, old = "old_Trophic_level", new = "Trophic_level")
#TODO change names or NOT? if not, delete this chunk and the dataset names_trlevel

# take out trophic levels which are not used in analysis
# not looked at in this script
exclude <- c("unknown.protist", "unknown.soilfungi", "unknown.acari", "herbivore.bird", "protist.parasite.nonplant", "mainlybacterivore.protist", "decomposer.collembola", "omnivore.snail", "omnivore.arthropod")
# herbivore.bird, former vertebrate herbivore vert.herb : only 104 plots
# protist.parasite.nonplant :  probably wise to remove, we don't expect to affect any function directly
# mainlybacterivore.protist : less studied --> less information about their roles. expected to 
#      have similar role than bacterivores.
# decomposer.collembola : data from 2019, newer than the newest function (from 2017)

# take out trophic leels which are not used in analysis
# but are looked at in this script (can be outcommented to reproduce reason of exclusion)
#TODO potentially uncomment below (Oct 2023)
exclude <- c(exclude, "omnivore.snail", "omnivore.ant", "omnivore.arthropod", "decomposer.acari", "carnivore.acari", "protist.bacterivore", "herbivore.snail", "pollinator.arthropod")
# pollinator.arthropod : see reason below
# omnivores : represent interactions between secondary.consumers and herbivores
# carnivore.acari : only 1 species
# herbivore.snail : too few plots
# decomposers : from 2019, functions are until 2017 - too young.

spdiv <- spdiv[!Trophic_level %in% exclude, ]
spdiv_backup <- spdiv # TODO remove later

overview dataset

unique(spdiv[, .(Trophic_level, Year)])
length(unique(spdiv$Species)) # 183649 species in the dataset (incl. OTU)
# Get overview of amount of species per trophic level
for(t in unique(spdiv$Trophic_level)){
  print(c(t, length(unique(spdiv[Trophic_level == t, Species]))))
}

[1] "bacteria.RNA" "175021"
[1] "herbivore.snail" "39"
[1] "autotroph" "494"
[1] "decomposer.acari" "46"
[1] "tertiary.consumer.birdbat" "78"
[1] "pollinator.arthropod" "806"
[1] "omnivore.snail" "21"
[1] "herbivore.arthropod" "681"
[1] "plantpathogen.fungi" "84"
[1] "secondary.consumer.arthropodsoillarvae" "17"
[1] "decomposer.myriapod" "4"
[1] "herbivore.arthropodsoillarvae" "11"
[1] "omnivore.ant" "20"
[1] "secondary.consumer.arthropod" "376"
[1] "omnivore.arthropod" "25"
[1] "carnivore.acari" "1"
[1] "secondary.consumer.myriapod" "4"
[1] "omnivore.protist" "444"
[1] "eukaryvore.protist" "217"
[1] "plantparasite.protist" "96"
[1] "bacterivore.protist" "886"
[1] "decomposer.soilfungi" "4069"
[1] "symbiont.soilfungi" "710"
[1] "pathotroph.soilfungi" "461"
[1] "decomposer.arthropod" "133"

Store trophic levels in vector

trlevels <- sort(unique(spdiv$Trophic_level))
# store complete set of plots
all_plots <- unique(spdiv$Plot)
length(all_plots) == 150

Check if diversity across years is correlated

Create new column with trophic level and year

#TODO : move this chunk below
spdiv[, trlevel_year := paste(Trophic_level, Year, sep = "")]

Correlation plot of all trophic levels and years? Or better betadiversity among years?

# m <- cor(spdiv[, .(Species, value, trlevel_year)], use = "pariwise.complete.obs")
# 
# M <- cor(small_grlfuns[, !colnames(grlfuns) %in% c("Explo","Plotn", "Plot"), with=F], use="pairwise.complete.obs")
# corrplot::corrplot(M,type="lower",addCoef.col = "black",method="color",diag=F, tl.srt=1, tl.col="black", mar=c(1,0,1,0), number.cex=0.6)

# testing with 1 trophic level, 2 years
# for presence absence
for(trlevel in c("autotroph", "secondary.consumer", "tertiary.consumer.birdbat")){
  pdf(paste("corr_diversity_across_years_", trlevel, ".pdf", sep=""), paper = "a4r")
  par(mfrow=c(12,12))
  test <- spdiv[Trophic_level == trlevel, .(Species, value, trlevel_year, Plot, Year)]
  for(p in unique(test$Plot)){
    a <- data.table::dcast.data.table(test[Plot == p], Year ~ Species, fill = 0)
    a[is.na(a)] <- 0
    a <- t(a)
    colnames(a) <- a[1,]
    a <- a[-1,]
    a <- apply(a, 2, as.numeric)
    # corrplot::corrplot(cor(a), type="lower", diag = F)
    a[a != 0] <- 1
    if(sum(is.na(cor(a))) >= 1) next
    corrplot::corrplot(cor(a), type="lower", diag = T, method = "square", tl.col = "black", order = "alphabet", tl.cex = 0.7, main = p)
  }
  dev.off()
}
# note : not possible OTU, because it can't be compared across years. The OTU identities always change.
# note : omnivore, herbivores different years are different species --> no overlap

# tertiary consumers : special case. take the 67 species measured in 2008, 2011 and 2012 and take the 11 suppl. species out of years 2009 and 2010.
tert <- spdiv[Trophic_level == "tertiary.consumer", .(Species, value, trlevel_year, Plot, Year)]
# find the 11 species which are only present in 2009 and 2010
unique(tert[Year == "2009", Species])[which(!unique(tert[Year == "2009", Species]) %in% unique(tert[Year == "2008", Species]))]

Clean Diversity data

Combine years if possible, check presence of species and if a given group of species can be included in the analysis or not.

autotrophs

Years 2008 to 2016. Lichens and Bryophytes only 2009 (119 species, 137 plots). Note : this dataset is referred to as "lichens" in this script, but it always includes mosses, too.

year | no. species | no. Plots | -------| ------------ | --------- | 2008 | 342 | 147 | 2009 | 485 | 150 | 2009 l | 119 | 150 | lichen & bryophyte dataset 2009 p | 342 | 150 | 2010 | 342 | 150 | 2011 | 342 | 150 | 2012 | 342 | 149 | 2013 | 342 | 149 | 2014 | 342 | 148 | 2015 | 342 | 150 | 2016 | 342 | 150 | 2017 | 365 | 150 | 2018 | 365 | 144 |

autotroph <- spdiv[Trophic_level == "autotroph"]
autotroph <- autotroph[Year %in% c("2009", "2008", "2010", "2011", "2012", "2013", "2014", "2015", "2016", "2017", "2018")]
# find the species in autotroph which are never present in the years
test <- data.table::data.table(aggregate(value ~ Species, autotroph, sum))
exclude <- test[value == 0, Species]
# exclude them from the analysis
autotroph <- autotroph[!Species %in% exclude]

# get numbers for table above :
# how many species and how many plots were measured in the different years?
# length(unique(autotroph[Year == "2008", Plot])) # Changed year and tested for all years for reporting in the table

# get vector with lichen species to examine lichens separately
lichenspecies <- setdiff(unique(autotroph[Year == "2009", Species]), unique(autotroph[Year == "2008", Species]))
  # examine lichens and plants separately - same number of plots and expected number of species?
length(unique(autotroph[!Species %in% lichenspecies, Species]))
length(unique(autotroph[!Species %in% lichenspecies, Plot]))

Plots identities

# are the same plots missing in all years?
testplotpresence <- unique(autotroph[, .(Year, Plot)])
visdat::vis_miss(data.table::dcast(testplotpresence, Plot ~ Year, value.var = "Plot"))
# no, different plots are missing.
rm(testplotpresence)
# note : this chunk depends on plotNAset, which is not available from the first round on. (is created by this script and updated by another later. (circularity in scripts))
# are the same plots missing in all years?
testplotpresence <- unique(autotroph[Plot %in% plotNAset, .(Year, Plot)])
visdat::vis_miss(data.table::dcast(testplotpresence, Plot ~Year))
# no, different plots are missing.
rm(testplotpresence)

If a plant species is present in at least 1 year, it is considered to be present. Underlying assumption : plant species composition is quite stable, therefore it is representative to assess their presence in a large time span.

If a plant species is missing in a year, rather the year than the plot is removed. Underlying assumption : if a plot is missing a year, the diversity was not measured as many times as in the other plots.

Years 2009, 2010, 2011, 2015, 2016, 2017 are complete.

Assessing presence of plant species in all years. Lichen and Bryophyte species are taken directly from year 2009, as this is the only year.

autotroph <- autotroph[Year %in% c("2009", "2010", "2011", "2015", "2016", "2017")]
plants <- autotroph[!Species %in% lichenspecies, ]
lichens <- autotroph[Species %in% lichenspecies,]

# if a species is present in at least one year, it is considered to be present
# calculate the mean of all value (species presence/ absence). If it is not zero, the species was present in at least one year and the value can be set to 1.
plants <- aggregate(value ~ Species + Plot, plants, function(x) mean(x, na.rm=T))
plants <- data.table::data.table(plants)
# Check how many plant species have been present / absent in how many years
hist(plants$value, main="Presence of plant species among years", sub="The plot indicates if a plant species was either present in no year, \n in 1 years , ... , in all 6 years. Most species are never present", xlab = "")

# set the numbers which are not 0 or 1 to 1 (if value > 0, that means that plant has been present at least in one year)
plants[value != 0, value := 1]

# add the lichens dataset to the plants dataset
autotroph <- rbind(plants, lichens[, .(Species, Plot, value)])
nrow(autotroph) == 150 * 365 + 150 * 119 # TRUE : there are 119 lichen and 365 plant species in all 150 plots.

Dataset IDs : 5522 27386 4140 (all 3 datasets are version 2) Years : "2009" "2010" "2011" "2015" "2016" "2017" Plots : 150

hist(autotroph$value)
hist(lichens$value)
hist(plants$value)

Although working with a range of 6 years, still many plant species are not present in many plots. This underpins the the assumption of stable autotroph species composition.

# remove lichens and plants, not used any more
rm(plants) ; rm(lichens); rm(lichenspecies); rm(test)
# take out autotrophs from spdiv
spdiv <- spdiv[!Trophic_level == "autotroph"]

herbivore

candidates to include : herbivore.arthropod and herbivore.snail note: code with eval=F is describing the reasons why a specific group of species is excluded or included.

herbivore <- spdiv[Trophic_level %in% c("herbivore.arthropod")]
# note that currently, herbivore.snail is excluded (in head part of script)
# herbivore.snail <- spdiv[Trophic_level %in% c("herbivore.snail")]

first check plot set for arthropods, then check which plots are covered by snail data as well.

herbivore.arthropod

consist of 4 groups ("Hymenoptera" "Hemiptera" "Coleoptera" "Orthoptera"), some contain missing plots. Chose the plot intersect with complete coverage.

# Arthropod herbivores consist of 3 Groups, 2 of them contain missing data, one not.
# We can only take the plots present in all groups --> reduce the number of plots
unique(herbivore[Trophic_level == "herbivore.arthropod", Group_broad])
length(unique(herbivore[Group_broad == "Hemiptera", Species])) # 313
temp_1 <- aggregate(herbivore, Species ~ Group_broad + Year, function(x) length(unique(x)))
temp_2 <- aggregate(herbivore, Plot ~ Group_broad + Year, function(x) length(unique(x)))
temp <- merge(temp_1, temp_2, by = c("Group_broad", "Year"))
rm(temp_1, temp_2)
# knitr::kable(temp) # print table nicely in markdown format

|Group_broad |Year | Species| Plot| |:-----------|:----|-------:|----:| |Coleoptera |2008 | 343| 146| |Coleoptera |2009 | 343| 149| |Coleoptera |2010 | 343| 149| |Coleoptera |2011 | 343| 143| |Coleoptera |2012 | 343| 148| |Coleoptera |2013 | 343| 149| |Coleoptera |2014 | 343| 134| |Coleoptera |2015 | 343| 144| |Coleoptera |2016 | 343| 110| |Coleoptera |2017 | 343| 148| |:-----------|:----|-------:|----:| |Hemiptera |2008 | 313| 146| |Hemiptera |2009 | 313| 149| |Hemiptera |2010 | 313| 149| |Hemiptera |2011 | 313| 143| |Hemiptera |2012 | 313| 148| |Hemiptera |2013 | 313| 149| |Hemiptera |2014 | 313| 134| |Hemiptera |2015 | 313| 144| |Hemiptera |2016 | 313| 110| |Hemiptera |2017 | 313| 148| |:-----------|:----|-------:|----:| |Hymenoptera |2008 | 4| 139| |:-----------|:----|-------:|----:| |Orthoptera |2008 | 21| 146| |Orthoptera |2009 | 21| 149| |Orthoptera |2010 | 21| 149| |Orthoptera |2011 | 21| 143| |Orthoptera |2012 | 21| 148| |Orthoptera |2013 | 21| 149| |Orthoptera |2014 | 21| 134| |Orthoptera |2015 | 21| 144| |Orthoptera |2016 | 21| 110| |Orthoptera |2017 | 21| 148|

herbivore <- herbivore[!Group_broad %in% "Hymenoptera", ]
herbivore <- herbivore[!Year %in% c("2014", "2016"), ]
herbivore <- herbivore[Year %in% c("2009", "2008", "2010", "2011", "2012", "2013", "2015", "2017"), ]
# find the species in herbivore arthropod which are never present in any of the years
test <- data.table::data.table(aggregate(value ~ Species, herbivore, sum))
exclude <- test[value == 0, Species]
# exclude them from the analysis
herbivore <- herbivore[!Species %in% exclude]
testplotpresence_orthoptera <- data.table::dcast(unique(herbivore[Group_broad == "Orthoptera", .(Plot, Year)]), Plot ~ Year, value.var = "Plot")
testplotpresence_coleoptera <- data.table::dcast(unique(herbivore[Group_broad == "Coleoptera", .(Plot, Year)]), Plot ~ Year, value.var = "Plot")
testplotpresence_hemiptera <- data.table::dcast(unique(herbivore[Group_broad == "Hemiptera", .(Plot, Year)]), Plot ~ Year, value.var = "Plot")

plot_grid(visdat::vis_miss(testplotpresence_orthoptera), visdat::vis_miss(testplotpresence_coleoptera),
          visdat::vis_miss(testplotpresence_hemiptera))
rm(testplotpresence_orthoptera, testplotpresence_coleoptera, testplotpresence_hemiptera)
# if a species is present in at least one year, it is considered to be present
# calculate the mean of all value (species presence/ absence). If it is not zero, the species was present in at least one year and the value can be set to 1.
herbivore <- data.table(aggregate(value ~ Species + Plot, herbivore, function(x) mean(x, na.rm=T)))
length(unique(herbivore$Species)) # 651 species

# Check how many herbivore species have been present / absent in how many years
hist(herbivore$value, main="Presence of herbivore species among years", sub="The plot indicates if a arhtropod species was either present in no year, \n in 1 years , ... , in all 8 years. Most species are never present", xlab = "")
# Years : "2008" "2009" "2010" "2011" "2012" "2013" "2015" "2017"

herbivore[value > 0, value := 1]
hist(herbivore$value)
length(unique(herbivore$Plot)) # 150 plots

herbivore[, Trophic_level := "herbivore.arthropod"]

herbivore.snail REMOVED

testplotpresence <- unique(rbindlist(list(herbivore[, .(Trophic_level, Plot)], herbivore.snail[, .(Trophic_level, Plot)])))
visdat::vis_miss(data.table::dcast(testplotpresence, Plot ~ Trophic_level, value.var = "Plot"))
rm(testplotpresence)

Snails were only measured in 134 plots. They contain 39 species. Insects were measured in 134 plots, they contain 401 species. Snails seem to be missing mostly in one region : in SEG, 16 plots are missing, no missings in the other regions.

length(grep("AEG", unique(herbivore.snail[Trophic_level == "herbivore.snail", Plot]), value = T)) # 50
length(grep("SEG", unique(herbivore.snail[Trophic_level == "herbivore.snail", Plot]), value = T)) # 34
length(grep("HEG", unique(herbivore.snail[Trophic_level == "herbivore.snail", Plot]), value = T)) # 50

16 Plots in Schorfheide are missing.

134 plots contain both arthropods and snails.

length(unique(herbivore[, Species])) # 681
length(unique(herbivore.snail[, Species])) # 39
length(intersect(herbivore[, Plot], herbivore.snail[Trophic_level == "herbivore.snail", Plot])) # 134

We have arthropod data on all plots. Snails are only from 1 year, and 16 plots are missing, all in one of the exploratories. We can not include them.

herbivore

Too many missing plots in snails, and too few species. Take arthropod dataset only.

herbivore.arthropod <- data.table::copy(herbivore)
herbivore.arthropod <- herbivore.arthropod[, .(Species, Plot, value)]
spdiv <- spdiv[!Trophic_level %in% c("herbivore.arthropod")]
# summary : 651 species in 150 plots
rm(herbivore)

Secondary consumers

arthropods (2008) and myriapods (2011)

secondary.consumer <- spdiv[Trophic_level %in% c("secondary.consumer.arthropod", "secondary.consumer.myriapod")]
# myria.sc <- spdiv[Trophic_level == "secondary.consumer.myriapod"]

testplotpresence <- unique(secondary.consumer[, .(Trophic_level, Plot)])
# testplotpresence <- unique(secondary.consumer[Plot %in% plotNAset, .(Year, Plot)])
visdat::vis_miss(data.table::dcast(testplotpresence, Plot ~ Trophic_level, value.var = "Plot"))
rm(testplotpresence)

No missing values.

length(unique(secondary.consumer[Trophic_level == "secondary.consumer.myriapod", Species]))
# any(unique(secondary.consumer[Trophic_level == "secondary.consumer.arthropod", Species]) %in% unique(secondary.consumer[Trophic_level == "secondary.consumer.myriapod", Species])) # no overlap

Year | arthropods | myriapods | ---------- | ----------- | --------- | no Plots | 150 | 150 | no Species | 376 | 4 |

Species set does not overlap.

Assess arthropod temporal data

consist of 5 groups ("Neuroptera" "Coleoptera" "Araneae" "Hemiptera" "Orthoptera"), some contain missing plots. Chose the plot intersect with complete coverage.

# Arthropod herbivores consist of 3 Groups, 2 of them contain missing data, one not.
# We can only take the plots present in all groups --> reduce the number of plots
unique(secondary.consumer[Trophic_level == "secondary.consumer.arthropod", Group_broad])
length(unique(secondary.consumer[Group_broad == "Neuroptera", Species])) # 6
temp_1 <- aggregate(secondary.consumer, Species ~ Group_broad + Year, function(x) length(unique(x)))
temp_2 <- aggregate(secondary.consumer, Plot ~ Group_broad + Year, function(x) length(unique(x)))
temp <- merge(temp_1, temp_2, by = c("Group_broad", "Year"))
rm(temp_1, temp_2)
# knitr::kable(temp) # print table nicely in markdown format

|Group_broad |Year | Species| Plot| |:-----------|:----|-------:|----:| |Araneae |2008 | 174| 146| |Araneae |2009 | 174| 149| |Araneae |2010 | 174| 149| |Araneae |2011 | 174| 143| |Araneae |2012 | 174| 148| |Araneae |2013 | 174| 149| |Araneae |2014 | 174| 134| |Araneae |2015 | 174| 144| |Araneae |2016 | 174| 110| |Araneae |2017 | 174| 148| |:-----------|:----|-------:|----:| |Coleoptera |2008 | 160| 146| |Coleoptera |2009 | 160| 149| |Coleoptera |2010 | 160| 149| |Coleoptera |2011 | 160| 143| |Coleoptera |2012 | 160| 148| |Coleoptera |2013 | 160| 149| |Coleoptera |2014 | 160| 134| |Coleoptera |2015 | 160| 144| |Coleoptera |2016 | 160| 110| |Coleoptera |2017 | 160| 148| |:-----------|:----|-------:|----:| |Hemiptera |2008 | 29| 146| |Hemiptera |2009 | 29| 149| |Hemiptera |2010 | 29| 149| |Hemiptera |2011 | 29| 143| |Hemiptera |2012 | 29| 148| |Hemiptera |2013 | 29| 149| |Hemiptera |2014 | 29| 134| |Hemiptera |2015 | 29| 144| |Hemiptera |2016 | 29| 110| |Hemiptera |2017 | 29| 148| |:-----------|:----|-------:|----:| |Myriapoda |2011 | 4| 150| |:-----------|:----|-------:|----:| |Neuroptera |2008 | 6| 139| |:-----------|:----|-------:|----:| |Orthoptera |2008 | 7| 146| |Orthoptera |2009 | 7| 149| |Orthoptera |2010 | 7| 149| |Orthoptera |2011 | 7| 143| |Orthoptera |2012 | 7| 148| |Orthoptera |2013 | 7| 149| |Orthoptera |2014 | 7| 134| |Orthoptera |2015 | 7| 144| |Orthoptera |2016 | 7| 110| |Orthoptera |2017 | 7| 148| |:-----------|:----|-------:|----:|

proceed as for herbivore arthropods :

secondary.consumer <- secondary.consumer[!Year %in% c("2014", "2016"), ]
secondary.consumer <- secondary.consumer[Year %in% c("2009", "2008", "2010", "2011", "2012", "2013", "2015", "2017"), ]
secondary.consumer <- secondary.consumer[!Group_broad %in% "Neuroptera"]
# find the species in secondary consumer arthropod which are never present in any of the years
test <- data.table::data.table(aggregate(value ~ Species, secondary.consumer, sum))
exclude <- test[value == 0, Species]
# exclude them from the analysis
secondary.consumer <- secondary.consumer[!Species %in% exclude]

Check pattern of missing plots

testplotpresence_orthoptera <- data.table::dcast(unique(secondary.consumer[Group_broad == "Orthoptera", .(Plot, Year)]), Plot ~ Year, value.var = "Plot")
testplotpresence_coleoptera <- data.table::dcast(unique(secondary.consumer[Group_broad == "Coleoptera", .(Plot, Year)]), Plot ~ Year, value.var = "Plot")
testplotpresence_hemiptera <- data.table::dcast(unique(secondary.consumer[Group_broad == "Hemiptera", .(Plot, Year)]), Plot ~ Year, value.var = "Plot")
testplotpresence_aranea <- data.table::dcast(unique(secondary.consumer[Group_broad == "Araneae", .(Plot, Year)]), Plot ~ Year, value.var = "Plot")
testplotpresence_myriapoda<- data.table::dcast(unique(secondary.consumer[Group_broad == "Myriapoda", .(Plot, Year)]), Plot ~ Year, value.var = "Plot")

plot_grid(visdat::vis_miss(testplotpresence_orthoptera),
          visdat::vis_miss(testplotpresence_coleoptera),
          visdat::vis_miss(testplotpresence_hemiptera), 
          visdat::vis_miss(testplotpresence_aranea),
          visdat::vis_miss(testplotpresence_myriapoda))
# The groups have the same pattern of missingness, except myriapoda no missing plots.

rm(testplotpresence_orthoptera, testplotpresence_coleoptera, testplotpresence_hemiptera, testplotpresence_aranea, testplotpresence_myriapoda)
# if a species is present in at least one year, it is considered to be present
# calculate the mean of all value (species presence/ absence). If it is not zero, the species was present in at least one year and the value can be set to 1.
secondary.consumer <- data.table(aggregate(value ~ Species + Plot, secondary.consumer, function(x) mean(x, na.rm=T)))
length(unique(secondary.consumer$Species)) # 343 species

# Check how many herbivore species have been present / absent in how many years
hist(secondary.consumer$value, main="Presence of secondary consumer species among years", sub="The plot indicates if a arhtropod species was either present in no year, \n in 1 years , ... , in all 8 years. Most species are never present", xlab = "", breaks = 9)
# Years : "2008" "2009" "2010" "2011" "2012" "2013" "2015" "2017"

secondary.consumer[value > 0, value := 1]
hist(secondary.consumer$value)
length(unique(secondary.consumer$Plot)) # 150 plots

We combine secondary.consumer.arthropod and secondary.consumer.myriapod to one trophic group "secondary.consumer". Need to update in trlevels vector.

trlevels <- c(grep("secondary.consumer.arthropod$|secondary.consumer.myriapod", trlevels, value = T, invert = T), "secondary.consumer")

# get datasetID etc. information
test <- unique(spdiv[Species %in% secondary.consumer$Species, .(Year, DataID, Dataversion, Group_broad)])
test <- test[Year %in% c("2009", "2008", "2010", "2011", "2012", "2013", "2015", "2017"), ]
rm(test)
length(unique(secondary.consumer$Plot))
length(unique(secondary.consumer$Species))

spdiv <- spdiv[!Trophic_level == "secondary.consumer.arthropod"]
spdiv <- spdiv[!Trophic_level == "secondary.consumer.myriapod"]

Dataset IDs : 20146 (v2, myriapods), 21969 (v4) Years : "2011" (incl. Myriapoda) "2008" "2009" "2010" "2012" "2013" "2015" "2017" Plots : 150 Species : 343 Groups : "Orthoptera" "Coleoptera" "Hemiptera""Araneae" "Myriapoda"

decomposer

myriapoda and coleoptera. Myriapoda only 2011, Coleoptera from temporal arthropod dataset

decomposer <- spdiv[Trophic_level %in% c("decomposer.myriapod", "decomposer.arthropod")]
temp_1 <- aggregate(decomposer, Species ~ Group_broad + Year, function(x) length(unique(x)))
temp_2 <- aggregate(decomposer, Plot ~ Group_broad + Year, function(x) length(unique(x)))
temp <- merge(temp_1, temp_2, by = c("Group_broad", "Year"))
rm(temp_1, temp_2)
# knitr::kable(temp) # print table nicely in markdown format

|Group_broad |Year | Species| Plot| |:-----------|:----|-------:|----:| |Coleoptera |2008 | 133| 146| |Coleoptera |2009 | 133| 149| |Coleoptera |2010 | 133| 149| |Coleoptera |2011 | 133| 143| |Coleoptera |2012 | 133| 148| |Coleoptera |2013 | 133| 149| |Coleoptera |2014 | 133| 134| |Coleoptera |2015 | 133| 144| |Coleoptera |2016 | 133| 110| |Coleoptera |2017 | 133| 148| |Myriapoda |2011 | 4| 150|

proceed equally as for secondary consumers arthropod :

decomposer <- decomposer[!Year %in% c("2014", "2016"), ]
decomposer <- decomposer[Year %in% c("2009", "2008", "2010", "2011", "2012", "2013", "2015", "2017"), ]
# find the species in decomposer which are never present in any of the years
test <- data.table::data.table(aggregate(value ~ Species, decomposer, sum))
exclude <- test[value == 0, Species]
# exclude them from the analysis
decomposer <- decomposer[!Species %in% exclude]

Check pattern of missing plots

testplotpresence_myriapod <- data.table::dcast(unique(decomposer[Trophic_level == "decomposer.myriapod", .(Plot, Year)]), Plot ~ Year, value.var = "Plot")
testplotpresence_arthropod <- data.table::dcast(unique(decomposer[Trophic_level  == "decomposer.arthropod", .(Plot, Year)]), Plot ~ Year, value.var = "Plot")

plot_grid(visdat::vis_miss(testplotpresence_myriapod),
          visdat::vis_miss(testplotpresence_arthropod))

rm(testplotpresence_myriapod, testplotpresence_arthropod)
# if a species is present in at least one year, it is considered to be present
# calculate the mean of all value (species presence/ absence). If it is not zero, the species was present in at least one year and the value can be set to 1.
decomposer <- data.table(aggregate(value ~ Species + Plot, decomposer, function(x) mean(x, na.rm=T)))
length(unique(decomposer$Species)) # 130 species

# Check how many decomposer species have been present / absent in how many years
hist(decomposer$value, main="Presence of secondary consumer species among years", sub="The plot indicates if a arhtropod species was either present in no year, \n in 1 years , ... , in all 8 years. Most species are never present", xlab = "", breaks = 9)
# Years : "2008" "2009" "2010" "2011" "2012" "2013" "2015" "2017"

decomposer[value > 0, value := 1]
hist(decomposer$value)
length(unique(decomposer$Plot)) # 150 plots

We combine decomposer.arthropod and decomposer.myriapod to one trophic group "decomposer". Need to update in trlevels vector.

trlevels <- c(grep("decomposer.arthropod$|decomposer.myriapod", trlevels, value = T, invert = T), "decomposer")

# get datasetID etc. information
test <- unique(spdiv[Species %in% decomposer$Species, .(Year, DataID, Dataversion, Group_broad)])
test <- test[Year %in% c("2009", "2008", "2010", "2011", "2012", "2013", "2015", "2017"), ]
rm(test)
length(unique(decomposer$Plot))
length(unique(decomposer$Species))

spdiv <- spdiv[!Trophic_level == "decomposer.arthropod"]
spdiv <- spdiv[!Trophic_level == "decomposer.myriapod"]

Dataset IDs : 20146 (v2, myriapods), 21969 (v4) Years : "2011" (incl. Myriapoda) "2008" "2009" "2010" "2012" "2013" "2015" "2017" Plots : 150 Species : 130 Groups : "Coleoptera" "Myriapoda"

Tertiary consumer

only included birds, not bats. Bats are not expected to have direct effects on any of the included functions. (Tested with analysis, putting them together explains much more variance and possibly "unmasks" the "true bird effect".)

tertiary.consumer <- spdiv[Trophic_level == "tertiary.consumer.birdbat"]
testplotpresence <- unique(tertiary.consumer[, .(Year, Plot)])
visdat::vis_miss(data.table::dcast(testplotpresence, Plot ~ Year, value.var = "Plot"))
rm(testplotpresence)

Year | 2008 | 2009 | 2010 | 2011 | 2012 | ---------- | ------ | ---------- | ---------- | ------- | -------- | subset | birds | birds bats | birds bats | birds | birds | no Plot | 150 | 150+149 | 150+150 | 150 | 150 | no Species | 67 | 78= 67+11 | 78= 67+11 | 67 | 67 |

# unique(tertiary.consumer[Year == "2011", Group_fine])
# length(unique(tertiary.consumer[Year == "2009", Plot]))

# length(intersect(unique(tertiary.consumer[Year == "2009", Species]), unique(tertiary.consumer[Year == "2010", Species])))
# batspecies <- setdiff(unique(tertiary.consumer[Year == "2009", Species]), unique(tertiary.consumer[Year == "2011", Species]))

The Bird and Bat species of the different years are the same.

Take average of Species occurrence and check how often species occur. We assume that birds and bats are difficult to measure, therefore having a large time window makes sense. A species is considered to be present in a plot, if it is present at least in one year.

Dataset ID : 21446 21447 21448 21449 24690 25306 Years : "2008" "2009" "2010" "2011" "2012" "2018" Plots : 150

birds <- aggregate(value ~ Species + Plot, tertiary.consumer[Group_fine == "Birds"], function(x) mean(x, na.rm=T)) # combine years
birds <- data.table::data.table(birds)
# nrow(birds) == 67 * 150 # TRUE : 67 species in 150 plots
hist(birds$value, main="Presence of bird species among years")
# If species is present at least in one year, take it as present
birds[value > 0, value := 1]
nrow(birds) == 67 * 150 # still expected number of rows present (67 species in 150 plots)

tertiary.consumer <- data.table::copy(birds)
rm(birds); gc()

nrow(tertiary.consumer) == 150 * (67)

# find plots with 0 species, but don't remove
# test <- data.table::data.table(aggregate(value ~ Plot, tertiary.consumer, sum))
# barplot(height = test$value)
# test[value == 0, Plot] # 5 plots with 0 species

nrow(tertiary.consumer) == 150*67 # expected number of rows present

check betadiversity

# check betadiversity
beta.tertiary.consumer <- prepare_for_betapair(tertiary.consumer)
beta.tertiary.consumer <- BetaDivMultifun::beta.pair_zerospecies(beta.tertiary.consumer, index.family = "sorensen")
test <- beta.tertiary.consumer$beta.sor
test <- as.matrix(test)
test[!lower.tri(test)] <- 1000
test <- reshape2::melt(test, value.name = "tertiary.consumer")
test <- data.table(test)
test <- test[!tertiary.consumer == 1000, ]
plot(test$tertiary.consumer)
# check the betadiversiy = 0 cases
test[tertiary.consumer == 0, ] # 33 such cases.
all(tertiary.consumer[Plot %in% c("AEG06"), Species] == tertiary.consumer[Plot %in% c("AEG23"), Species])
# plot comparisons with 0 betadiversity both contain all species
rm(test)
trlevels[which(trlevels == "tertiary.consumer.birdbat")] <- "tertiary.consumer"
spdiv <- spdiv[!Trophic_level == "tertiary.consumer.birdbat"]

Bacteria RNA

bacteria.RNA <- spdiv[Trophic_level == "bacteria.RNA"]
# bacteria.RNA <- add_back_missing_plots_species_combinations(bacteria.RNA, all_plots) # 23276560 missing combinations were added.
testplotpresence <- unique(bacteria.RNA[, .(Year, Plot)])
visdat::vis_miss(data.table::dcast(testplotpresence, Plot ~ Year))
rm(testplotpresence)

Not visible in the plot, but there are only 148 plots for both years.

Check if OTU names are the same in both years. No obvious overlap.

grep("01ece679a6294aadf97b931ce39a539a", bacteria.RNA$Species, value = T)
unique(bacteria.RNA[grep("01ece679a6294aadf97b931ce39a539a", bacteria.RNA$Species), Year]) # species present in both years

The OTUs can be compared across the years. However, we will only work with the 2011 dataset in order to be more consistent with the years. Replace the prefix by bac_ instead of bac11_ and bac14_

| year | 2011 | 2014 | 2011 & 14 | | ---------- | ------- | --------- | -----------| | no Plots | 148 | 148 | 148 | | no OTUs | 140510 | 139691 | 174945 |

length(unique(bacteria.RNA[Year == "2014", Species])) # 139691
length(setdiff(unique(bacteria.RNA[Year == "2011", Species]), unique(bacteria.RNA[Year == "2014", Species]))) # 35254 unique to 2011, 34435 unique to 2014
length(intersect(unique(bacteria.RNA[Year == "2011", Species]), unique(bacteria.RNA[Year == "2014", Species]))) # 105256 present in both years

grid::grid.newpage()
VennDiagram::draw.pairwise.venn(area1 = 34435 + 105256, area2 = 35254 + 105256, cross.area = 105256, category = c("2014 OTU", "2011 OTU"))
# approx. overlap of 3/4

Select 2011 dataset.

bacteria.RNA <- bacteria.RNA[Year == "2011", .(Species, Plot, value)]
# The bacteria RNA dataset had been reduced: all species-plot combinations with 0 were removed.
# reconstruct them (code from Caterina, see https://github.com/biodiversity-exploratories-synthesis/Synthesis-dataset-manual/blob/main/Synthesis%20datasets%20%20How%20to%20use.md under "Missing" zeros)
# 2 plots are missing, exclude them (AEG33 and AEG34)
bac_plots <- all_plots[-which(all_plots %in% c("AEG33", "AEG34"))]
bacteria.RNA <- add_back_missing_plots_species_combinations(bacteria.RNA, bac_plots) # 19052152 missing combinations were added.
rm(bac_plots)

nrow(bacteria.RNA) == 148 * 140510 # expected number of plots present
spdiv <- spdiv[!Trophic_level == "bacteria.RNA"]

Data ID : 24866 Year : 2011 Plots : 148

protist.bacterivore

2011, 2017 datasets

protist.bacterivore <- spdiv[Trophic_level == "bacterivore.protist"]
testplotpresence <- unique(protist.bacterivore[, .(Year, Plot)])
visdat::vis_miss(data.table::dcast(testplotpresence, Plot ~ Year, value.var = "Plot"))
rm(testplotpresence)

Only 1 plot missing, in 2011

Are the OTU the same across years? Datasets 2011 and 2017 are comparable with each other.

length(setdiff(unique(protist.bacterivore[Year == "2011", Species]), unique(protist.bacterivore[Year == "2017", Species])))
length(intersect(unique(protist.bacterivore[Year == "2017", Species]), unique(protist.bacterivore[Year == "2011", Species])))
length(unique(protist.bacterivore[Year == "2011", Plot]))
length(unique(protist.bacterivore[, Species]))

| year | 2011 | 2017 | 2011 2017 | | ---------- | ------- | ------ | --------- | | no Plots | 149 | 150 | 150 | | no OTUs | 886 | 886 | 886 |

For BetaDivMultifun, only year 2011 is chosen :

bacterivore.protist <- protist.bacterivore[Year == "2011", .(Species, Plot, value)]
nrow(bacterivore.protist) == 149 * 886 # TRUE : The expected number of rows is present.
spdiv <- spdiv[!Trophic_level == "bacterivore.protist"]
rm(protist.bacterivore)

Dataset ID : 24426 Year : 2011 Plots : 149

protist.eukaryvore

protist.eukaryvore <- spdiv[Trophic_level == "eukaryvore.protist"]
testplotpresence <- unique(protist.eukaryvore[, .(Year, Plot)])
visdat::vis_miss(data.table::dcast(testplotpresence, Plot ~ Year, value.var = "Plot"))
rm(testplotpresence)

Only 1 plot missing, in 2011.

# interchange intersect with setdiff and modify years to obtain values in table below
# length(intersect(unique(protist.eukaryvore[Year == "2017", Species]), unique(protist.eukaryvore[Year == "2011", Species])))
length(unique(protist.eukaryvore[Year == "2017", Plot]))

| Year | 2011 | 2017 | both | | -------- | -------- | ------ | -------- | | no Plots | 149 | 150 | 150 | | no OTUs | 217 | 217 | 217 |

For BetaDivMultifun, only year 2011 is chosen :

eukaryvore.protist <- protist.eukaryvore[Year == "2011", .(Species, Plot, value)]
nrow(eukaryvore.protist) == 149 * 217 # TRUE : The expected number of rows is present.
spdiv <- spdiv[!Trophic_level == "eukaryvore.protist"]
rm(protist.eukaryvore)

Dataset ID : 24426 Year : 2011 Plots : 149 Species : 217

protist.omnivore

protist.omnivore <- spdiv[Trophic_level == "omnivore.protist"]
testplotpresence <- unique(protist.omnivore[, .(Year, Plot)])
visdat::vis_miss(data.table::dcast(testplotpresence, Plot ~Year))
rm(testplotpresence)

Only 1 plot missing, in 2011.

# interchange intersect with setdiff and modify years to obtain values in table below
length(intersect(unique(protist.omnivore[Year == "2011", Species]), unique(protist.omnivore[Year == "2017", Species])))
length(unique(protist.omnivore[Year == "2017", Plot]))
length(unique(protist.omnivore[, Plot]))

| Year | 2011 | 2017 | both | | -------- | -------- | ------ | -------- | | no Plots | 149 | 150 | 150 | | no OTUs | 444 | 444 | 444 |

Only Year 2011 is chosen.

omnivore.protist <- protist.omnivore[Year == "2011", .(Species, Plot, value)]
nrow(omnivore.protist) == 149 * 444 # expected number of rows is present
spdiv <- spdiv[!Trophic_level == "omnivore.protist"]
rm(protist.omnivore)

Dataset ID : 24426 Plots : 149 Year : 2011

protist.plant.parasite

protist.plant.parasite <- spdiv[Trophic_level == "plantparasite.protist"]
testplotpresence <- unique(protist.plant.parasite[, .(Year, Plot)])
visdat::vis_miss(data.table::dcast(testplotpresence, Plot ~ Year, value.var = "Plot"))
rm(testplotpresence)

Only 1 plot missing, in 2011.

# interchange intersect with setdiff and modify years to obtain values in table below
length(intersect(unique(protist.plant.parasite[Year == "2017", Species]), unique(protist.plant.parasite[Year == "2011", Species])))
length(unique(protist.plant.parasite[Year == "2017", Species]))

| Year | 2011 | 2017 | both | | -------- | -------- | ------ | -------- | | no Plots | 149 | 150 | 149 | | no OTUs | 96 | 96 | 96 |

Only 2011 is chosen

plantparasite.protist <- protist.plant.parasite[Year == "2011", .(Species, Plot, value)]
nrow(plantparasite.protist) == 149 * 96
spdiv <- spdiv[!Trophic_level == "plantparasite.protist"]
rm(protist.plant.parasite)

Dataset ID : 24426 Plots : 149 Year : 2011

decomposer.soilfungi

Years 2011, 2014, 2017

decomposer.soilfungi <- spdiv[Trophic_level == "decomposer.soilfungi"]
# adding back the missing plot - species combinations . all species-plot combinations with 0 had been removed to store memory. note : only temporarily, after filtering for years, needs to be done again because the function can not add the years to missing combinations.
decomposer.soilfungi <- add_back_missing_plots_species_combinations(decomposer.soilfungi, all_plots)

testplotpresence <- unique(decomposer.soilfungi[, .(Year, Plot)])
visdat::vis_miss(data.table::dcast(testplotpresence, Plot ~ Year, value.var = "Plot"))
rm(testplotpresence)

Check if OTU need renaming and check overlap of OTUs

length(unique(decomposer.soilfungi[Year == "2011", Species]))
length(setdiff(unique(decomposer.soilfungi[Year == "2017", Species]), unique(decomposer.soilfungi[Year == "2014", Species])))

length(intersect(intersect(unique(decomposer.soilfungi[Year == "2011", Species]), unique(decomposer.soilfungi[Year == "2017", Species])), unique(decomposer.soilfungi[Year == "2014", Species])))
length(intersect(unique(decomposer.soilfungi[Year == "2014", Species]), unique(decomposer.soilfungi[Year == "2017", Species])))

| Year | 2011 | 2014 | 2017 | 11 14 | 11 17 | 14 17 | all | | -------- | ------ | ------ | ------ | ----- | ----- | ------| ----- | | no Plots | 150 | 150 | 150 | 150 | 150 | 150 | 150 | | no OTUs | 2332 | 1971 | 2353 | 1102 | 1245 | 1146 | 906 |

Only year 2011 is chosen

decomposer.soilfungi <- decomposer.soilfungi[Year == "2011", .(Species, Plot, value)]
# nrow(decomposer.soilfungi) # 8783
decomposer.soilfungi <- add_back_missing_plots_species_combinations(decomposer.soilfungi, all_plots)
#nrow(decomposer.soilfungi) # 349800
nrow(decomposer.soilfungi) == 150 * 2332 # TRUE : all expected rows are present
spdiv <- spdiv[!Trophic_level == "decomposer.soilfungi"]

Data ID : 26470 Plots : 150 Year : 2011

pathotroph.soilfungi

2011, 2014, 2017

pathotroph.soilfungi <- spdiv[Trophic_level == "pathotroph.soilfungi"]
pathotroph.soilfungi <- add_back_missing_plots_species_combinations(pathotroph.soilfungi, all_plots)
testplotpresence <- unique(pathotroph.soilfungi[, .(Year, Plot)])
visdat::vis_miss(data.table::dcast(testplotpresence, Plot ~ Year, value.var = "Plot"))
rm(testplotpresence)

missing plots in 2014

# code to fill table below
length(unique(pathotroph.soilfungi[Year == "2017", Species]))
length(setdiff(unique(pathotroph.soilfungi[Year == "2017", Species]), unique(pathotroph.soilfungi[Year == "2011", Species])))

length(intersect(intersect(unique(pathotroph.soilfungi[Year == "2011", Species]), unique(pathotroph.soilfungi[Year == "2017", Species])), unique(pathotroph.soilfungi[Year == "2014", Species])))

| Year | 2011 | 2014 | 2017 | 11 14 | 11 17 | 14 17 | all | | -------- | ------ | ------ | ------ | ---------| ------ | ------| ---- | | no Plots | 150 | 145 | 150 | 150 | 150 | 150 | 150 | | no OTUs | 285 | 222 | 277 | 132 | 155 | 149 | 113 |

Only 2011 data is used here.

pathotroph.soilfungi <- pathotroph.soilfungi[Year == "2011", .(Species, Plot, value)]
# after filtering for year, need to add missing plot x species combinations again
pathotroph.soilfungi <- add_back_missing_plots_species_combinations(pathotroph.soilfungi, all_plots = all_plots)
nrow(pathotroph.soilfungi) == 150 * 285 # TRUE : all expected rows present
spdiv <- spdiv[!Trophic_level == "pathotroph.soilfungi"]

Dataset ID : 26470 Year : 2011 Plots : 150

symbiont.soilfungi

2011, 2014, 2017

symbiont.soilfungi <- spdiv[Trophic_level == "symbiont.soilfungi"]
symbiont.soilfungi <- add_back_missing_plots_species_combinations(symbiont.soilfungi, all_plots)
# 97571 missing combinations were added
testplotpresence <- unique(symbiont.soilfungi[, .(Year, Plot)])
visdat::vis_miss(data.table::dcast(testplotpresence, Plot ~ Year, value.var = "Plot"))
rm(testplotpresence)

several missing plots x species combinations in all years, but no NA values.

length(unique(symbiont.soilfungi[Year == "2017", Species]))

length(setdiff(unique(symbiont.soilfungi[Year == "2011", Plot]), unique(symbiont.soilfungi[Year == "2014", Plot])))

length(intersect(intersect(unique(symbiont.soilfungi[Year == "2011", Species]), unique(symbiont.soilfungi[Year == "2017", Species])), unique(symbiont.soilfungi[Year == "2014", Species])))

| Year | 2011 | 2014 | 2017 | 11 14 | 11 17 | 14 17 | all | | -------- | ------ | ------ | ------ | ---------| ------ | ------| ---- | | no Plots | 150 | 150 | 150 | 150 | 150 | 150 | 150 | | no OTUs | 328 | 334 | 505 | 214 | 201 | 211 | 169 |

Only 2011 is chosen.

symbiont.soilfungi <- symbiont.soilfungi[Year == "2011", .(Species, Plot, value)]
# adding back missing plot-species combinations (code from Caterina)
symbiont.soilfungi <- add_back_missing_plots_species_combinations(symbiont.soilfungi, all_plots) # 46710 missing combinations were added
length(unique(symbiont.soilfungi$Plot)) == 150

nrow(symbiont.soilfungi) == 150 * 328 # all expected combinations present
spdiv <- spdiv[!Trophic_level == "symbiont.soilfungi"]

Data ID : 26470 Year : 2011 Plot : 150

check and select trophic levels with only one year

belowground herbivore

belowground.herbivore <- spdiv[Trophic_level == "herbivore.arthropodsoillarvae",]
# testplotpresence <- unique(belowground.herbivore[, .(Year, Plot)])
# visdat::vis_miss(data.table::dcast(testplotpresence, Plot ~Year))
# rm(testplotpresence)
# length(unique(belowground.herbivore$Species)) # 11 species
nrow(belowground.herbivore) == 150 * 11 # expected number of rows present

All plots are present, only one year (simple situation - no plot is shown).

hist(belowground.herbivore$value, main="Presence of belowground herbivores, 11 species", sub="The plot indicates that most species - Plot combinations are not present. This is a first indicator for relatively high (acceptable high) beta-diversity.", xlab = "")
bh_overv <- aggregate(value ~ Plot, belowground.herbivore, sum)
barplot(height = bh_overv$value)
abline(h = mean(bh_overv$value), col = "red")
abline(h = median(bh_overv$value), col = "green")

Most plots contain 3 species. 5 Plots do not contain any species. No need to remove them from analysis, beta-diversity will be set to 0 or 1 accordingly (see below).

Calculate betadiversity to check wether plots differ in their species composition / turnover.

beta.belowground.herbivore <- prepare_for_betapair(belowground.herbivore)
beta.belowground.herbivore <- beta.pair_zerospecies(beta.belowground.herbivore, index.family = "sorensen")

corrplot::corrplot(as.matrix(beta.belowground.herbivore$beta.sor),type="lower", method="color", diag=F, mar=c(1,0,1,0), number.cex=0.4, tl.srt = 45, tl.col = "black", tl.cex = 0.5)

corrplot::corrplot(as.matrix(beta.belowground.herbivore$beta.sim),type="lower", method="color", diag=F, mar=c(1,0,1,0), number.cex=0.4, tl.srt = 45, tl.col = "black", tl.cex = 0.5)

corrplot::corrplot(as.matrix(beta.belowground.herbivore$beta.sne),type="lower", method="color", diag=F, mar=c(1,0,1,0), number.cex=0.4, tl.srt = 45, tl.col = "black", tl.cex = 0.5)

# note : betadiversity including plots without species produce NaN. They will be addressed later.

# cleaning
rm(beta.belowground.herbivore)

There is definitely some betadiversity among the plots.

nrow(belowground.herbivore) == 150 * 11 # expected number of plots present
herbivore.arthropodsoillarvae <- data.table::copy(belowground.herbivore[, .(Species, Plot, value)])
rm(belowground.herbivore)
spdiv <- spdiv[!Trophic_level == "herbivore.arthropodsoillarvae"]

Data ID : 16746 Year : 2011 Plots : 150 Species : 11

belowground predator

belowground.predator <- spdiv[Trophic_level == "secondary.consumer.arthropodsoillarvae"]
nrow(belowground.predator) == 17 * 150 # expected number of plots present
bp_overv <- aggregate(value ~ Plot, belowground.predator, sum)
barplot(height = bp_overv$value)
abline(h = mean(bp_overv$value), col = "red")
abline(h = median(bp_overv$value), col = "green")

There is 1 plot in belowground predators which does not contain any species.

calc betadiversity and plot

beta.belowground.predator <- prepare_for_betapair(belowground.predator)
beta.belowground.predator <- beta.pair_zerospecies(beta.belowground.predator, index.family = "sorensen")

corrplot::corrplot(as.matrix(beta.belowground.predator$beta.sor),type="lower", method="color", diag=F, mar=c(1,0,1,0), number.cex=0.4, tl.srt = 45, tl.col = "black", tl.cex = 0.5)

corrplot::corrplot(as.matrix(beta.belowground.predator$beta.sim),type="lower", method="color", diag=F, mar=c(1,0,1,0), number.cex=0.4, tl.srt = 45, tl.col = "black", tl.cex = 0.5)

corrplot::corrplot(as.matrix(beta.belowground.predator$beta.sne),type="lower", method="color", diag=F, mar=c(1,0,1,0), number.cex=0.4, tl.srt = 45, tl.col = "black", tl.cex = 0.5)
# cleaning
rm(beta.belowground.predator)
nrow(belowground.predator) == 150 * 17 # expected number of rows present
secondary.consumer.arthropodsoillarvae <- data.table::copy(belowground.predator[, .(Species, Plot, value)])
spdiv <- spdiv[!Trophic_level == "secondary.consumer.arthropodsoillarvae"]
rm(belowground.predator)

Data ID : 16746 Year : 2011 Plots : 150 Species : 17

plant pathogen

Year 2011

plantpathogen.fungi <- spdiv[Trophic_level == "plantpathogen.fungi"]
plantpathogen.fungi <- add_back_missing_plots_species_combinations(plantpathogen.fungi, all_plots)
bp_overv <- data.table::data.table(aggregate(value ~ Plot, plantpathogen.fungi, sum))
barplot(height = bp_overv$value)
abline(h = mean(bp_overv$value), col = "red")
abline(h = median(bp_overv$value), col = "green")
# Note : keep plots without species.

84 species in 150 plots

length(unique(plantpathogen.fungi$Plot))
nrow(plantpathogen.fungi) == 84 * 150

plantpathogen.fungi <- plantpathogen.fungi[, .(Species, Plot, value)]
spdiv <- spdiv[!Trophic_level == "plantpathogen.fungi"]

Dataset ID : 18548 Year : 2011 Plots : 150

Omnivore REMOVED

Consists of arthropods dataset from 2008, ants dataset from 2014_2015, and snails dataset from 2017. Removed biologically because they represent the interaction of herbivores and secondary consumers in the analysis. Including them makes it more difficult to differentiate between herbivores and carnivores.

omnivore <- spdiv[Trophic_level %in% c("omnivore.arthropod", "omnivore.ant", "omnivore.snail")]
testplotpresence <- unique(omnivore[, .(Year, Plot)])
# testplotpresence <- unique(omnivore[Plot %in% plotNAset, .(Year, Plot)])
visdat::vis_miss(data.table::dcast(testplotpresence, Plot ~ Year, value.var = "Plot"))
rm(testplotpresence)

Many plots are missing in 2014 and 2016.

Groups : Mollusca (snails), Formicidae (ants), Dictyoptera, Dermaptera, Opiliones, Hemiptera

# Arthropod omnivores consist of 4 Groups
# We can only take the plots present in all groups --> reduce the number of plots
temp_1 <- aggregate(omnivore, Species ~ Group_broad + Year, function(x) length(unique(x)))
temp_2 <- aggregate(omnivore, Plot ~ Group_broad + Year, function(x) length(unique(x)))
temp <- merge(temp_1, temp_2, by = c("Group_broad", "Year"))
rm(temp_1, temp_2)
# knitr::kable(temp) # print table nicely in markdown format

|Group_broad |Year | Species| Plot| |:-----------|:---------|-------:|----:| |Dermaptera |2008 | 1| 139| |:-----------|:---------|-------:|----:| |Dictyoptera |2008 | 1| 139| |:-----------|:---------|-------:|----:| |Formicidae |2014_2015 | 20| 110| |:-----------|:---------|-------:|----:| |Hemiptera |2008 | 22| 146| |Hemiptera |2009 | 22| 149| |Hemiptera |2010 | 22| 149| |Hemiptera |2011 | 22| 143| |Hemiptera |2012 | 22| 148| |Hemiptera |2013 | 22| 149| |Hemiptera |2014 | 22| 134| |Hemiptera |2015 | 22| 144| |Hemiptera |2016 | 22| 110| |Hemiptera |2017 | 22| 148| |:-----------|:---------|-------:|----:| |Mollusca |2017 | 21| 134| |:-----------|:---------|-------:|----:| |Opiliones |2008 | 1| 139| |:-----------|:---------|-------:|----:|

Molluscs would be assessed, if the dataset was kept.

proceed as for herbivore arthropods :

omnivore <- omnivore[!Year %in% c("2014", "2016"), ]
omnivore <- omnivore[Year %in% c("2009", "2008", "2010", "2011", "2012", "2013", "2015", "2017"), ]
omnivore <- omnivore[!Group_broad %in% c("Dermaptera", "Opiliones", "Dictyoptera", "Formicidae")]
# find the species which are never present in any of the years
test <- data.table::data.table(aggregate(value ~ Species, omnivore, sum))
exclude <- test[value == 0, Species]
# exclude them from the analysis
omnivore <- omnivore[!Species %in% exclude]

we keep Hemiptera and Mollusca

Check pattern of missing plots

testplotpresence_omnivore <- data.table::dcast(unique(
  omnivore[, .(Plot, Year, Group_broad)]), 
  Plot ~ Year + Group_broad, value.var = "Plot")

visdat::vis_miss(testplotpresence_hemiptera)

rm(testplotpresence_omnivore)
# if a species is present in at least one year, it is considered to be present
# calculate the mean of all value (species presence/ absence). If it is not zero, the species was present in at least one year and the value can be set to 1.
omnivore <- data.table(aggregate(value ~ Species + Plot, omnivore, function(x) mean(x, na.rm=T)))
length(unique(omnivore$Species)) # 41 species

# Check how many species have been present / absent in how many years
hist(omnivore$value, main="Presence of omnivore species among years", sub="The plot indicates if a arhtropod species was either present in no year, \n in 1 years , ... , in all 8 years. Most species are never present", xlab = "", breaks = 9)
# Years : "2008" "2009" "2010" "2011" "2012" "2013" "2015" "2017"

omnivore[value > 0, value := 1]
hist(omnivore$value)
length(unique(omnivore$Plot)) # 150 plots

We separate omnivore to omnivore.arthropod and omnivore.snail and assess separately.

# removed ants
trlevels <- grep("omnivore.ant$", trlevels, value = T, invert = T)
omnivore[Species %in% spdiv[Trophic_level == "omnivore.arthropod", Species], Trophic_level := "omnivore.arthropod"]
omnivore[Species %in% spdiv[Trophic_level == "omnivore.snail", Species], Trophic_level := "omnivore.snail"]
omnivore[is.na(Trophic_level), ] # no empty rows.

omnivore.snail <- omnivore[Trophic_level == "omnivore.snail", ]
omnivore.arthropod <- omnivore[Trophic_level == "omnivore.arthropod", ]
rm(omnivore)                       

# get datasetID etc. information
unique(spdiv[Species %in% omnivore.snail$Species, .(Year, DataID, Dataversion, Group_broad)])
unique(spdiv[Species %in% omnivore.arthropod$Species, .(Year, DataID, Dataversion, Group_broad)])

length(unique(omnivore.arthropod$Plot))
length(unique(omnivore.arthropod$Species))
length(unique(omnivore.snail$Plot))
length(unique(omnivore.snail$Species))

spdiv <- spdiv[!Trophic_level == "omnivore.arthropod"]
spdiv <- spdiv[!Trophic_level == "omnivore.snail"]

Dataset IDs : 24986 (v2, snails), 21969 (v4) Years : "2011" "2008" "2009" "2010" "2012" "2013" "2015" "2017" (incl. snails) Plots : 150 (arthropods), 134 (snails) Species : 20 (arhtropods), 21 (snails) Groups : "Orthoptera" "Coleoptera" "Hemiptera""Araneae" "Myriapoda"

pollinator REMOVED

pollinator <- spdiv[Trophic_level == "pollinator.arthropod"]

consist of 4 groups ("Diptera" "Lepidoptera" "Hymenoptera" "Coleoptera")

unique(pollinator[Trophic_level == "pollinator.arthropod", Group_broad])
temp_1 <- aggregate(pollinator, Species ~ Group_broad + Year, function(x) length(unique(x)))
temp_2 <- aggregate(pollinator, Plot ~ Group_broad + Year, function(x) length(unique(x)))
temp <- merge(temp_1, temp_2, by = c("Group_broad", "Year"))
rm(temp_1, temp_2)
# knitr::kable(temp) # print table nicely in markdown format

|Group_broad |Year | Species| Plot| |:-----------|:----|-------:|----:| |Coleoptera |2008 | 105| 146| |Coleoptera |2009 | 105| 149| |Coleoptera |2010 | 105| 149| |Coleoptera |2011 | 105| 143| |Coleoptera |2012 | 105| 148| |Coleoptera |2013 | 105| 149| |Coleoptera |2014 | 105| 134| |Coleoptera |2015 | 105| 144| |Coleoptera |2016 | 105| 110| |Coleoptera |2017 | 105| 148| |:-----------|:----|-------:|----:| |Diptera |2008 | 425| 119| |:-----------|:----|-------:|----:| |Hymenoptera |2008 | 178| 145| |:-----------|:----|-------:|----:| |Lepidoptera |2008 | 98| 137|

use year 2008

pollinator <- pollinator[!Group_broad %in% c("Diptera")]
pollinator <- pollinator[Year %in% c(2008)]

# find the species in pollintor arthropod which are never present in any of the years
test <- data.table::data.table(aggregate(value ~ Species, pollinator, sum))
exclude <- test[value == 0, Species]
# exclude them from the analysis
pollinator <- pollinator[!Species %in% exclude]
testplotpresence_poll <- data.table::dcast(unique(pollinator[, .(Plot, Year, Group_broad)]), Plot ~ Year + Group_broad, value.var = "Plot")

visdat::vis_miss(testplotpresence_poll)

rm(testplotpresence_orthoptera)

assess number of plots

# Created two datasets, one with lepidoptera and one without, in order to test how many plots we could include.
# Decided to exclude lepidoptera (see below)

# 134 plots
lepi_plots <- unique(intersect(intersect(
  unique(pollinator[Group_broad %in% "Coleoptera", Plot]),
  unique(pollinator[Group_broad %in% "Hymenoptera", Plot])),
  unique(pollinator[Group_broad %in% "Lepidoptera", Plot])))

# 142 plot without lepidoptera
nolepi_plots <- unique(intersect(
  unique(pollinator[Group_broad %in% "Coleoptera", Plot]),
  unique(pollinator[Group_broad %in% "Hymenoptera", Plot])))

# 150 plots if we would only use coleoptera

pollinator.withlepi <- pollinator[Plot %in% lepi_plots, ]
pollinator.withoutlepi <- pollinator[Plot %in% nolepi_plots &
                                       Group_broad %in% c("Coleoptera", "Hymenoptera")]

# update trlevels vector
trlevels <- c(grep("pollinator", trlevels, value = T, invert = T), "pollinator.withlepi", "pollinator.withoutlepi")

We decide to exclude pollinators. We do not have pollination as ecosystem function, and no hypothesis on how pollinator would affect other functions. There is some evidence that pollinators could affect biomass (at least of seeds and fruits), but this effect is via plants, which we included in the study. (Sources : https://www.sciencedirect.com/science/article/pii/S2468265922000166, https://www.international-climate-initiative.com/legacy/Dokumente/2020/205018_Publication_pollinators_protection.pdf)

acari REMOVED

decomposer.acari

decomp.acari <- spdiv[Trophic_level == "decomposer.acari"]
length(unique(decomp.acari$Plot))

bp_overv <- data.table(aggregate(value ~ Plot, decomp.acari, sum))
barplot(height = bp_overv$value, ylab = "number of pollinator species per plot")
abline(h = mean(bp_overv$value), col = "red")
abline(h = median(bp_overv$value), col = "green")
decomp.acari <- decomp.acari[!Plot %in% bp_overv[value == 0, Plot], ]
rm(bp_overv)

46 species, 140 plots

Find plot Selection

Find subset of plots without missing diversity data and store.

rm(spdiv); gc()

#check names of trlevels
names_trlevel <- names_trlevel[grep("ok", names_trlevel$inclusion), final_name]
all(names_trlevel %in% trlevels) & all(trlevels %in% names_trlevel) # same set of names in trlevels

Select the plots without missing information and store. This chunk of code generates plotNAset.rds, the selected set of plots.

plots <- list()
for(t in trlevels){
  tr <- get(t)
  plots[[t]] <- unique(tr$Plot)
}
common.plots <- Reduce(intersect,plots) # 147 plots
# take out HEG31 because this is taken out in functions
# common.plots <- common.plots[-which(common.plots == "HEG31")]
#TODO HERE remove later!! first do imputation and check if still needed!! (Oct 23)
saveRDS(common.plots, file = "plotNAset.rds")

redplotset <- c()
for(t in trlevels){
  redplotset[t] <- length(Reduce(intersect, plots[-which(names(plots) == t)]))
}

# show missing status of all trophic levels
testplotpresence <- data.table(t(rbindlist(lapply(plots, as.data.frame.list), fill = T, use.names = T)))
names(testplotpresence) <- names(plots)
visdat::vis_miss(testplotpresence)

par(mar = c(10, 4.1, 4.1, 2.1))
barplot(redplotset, ylim = c(0, 150), las = 3, cex.names = 0.8, main = "Does removal of \ngiven trophic level \nincrease number of plots?")
# abline(h = 114, col = "darkgreen")
abline(h = 147, col = "red")

length(grep("AEG", common.plots)) # 48
length(grep("HEG", common.plots)) # 50 --> will reduce to 49 without HEG31
length(grep("SEG", common.plots)) # 49

Based on above graph : keep 147 plots, do not remove bacteria because we want them.

Inspect common plot set

paste("AEG :", length(grep("A", common.plots)), "   HEG :", length(grep("H", common.plots)), "    SEG :", length(grep("S", common.plots)), sep = "")

| region | no. plots | | --------- | --------- | | all | 147 | | AEG | 48 | | HEG | 50 | | SEG | 49 |

Select Plots from diversity datasets

Only keep the selected plots

if(!length(plotNAset) > 1){plotNAset <- common.plots}
for(t in trlevels){
  tr <- get(t)
  tr <- tr[Plot %in% plotNAset, ]
  assign(t, tr)
}
# take out HEG31, is automatic as soon as the gapfilling is run and thus plotNAset is updated.. 
# that means that this betadiversity script needs to be run twice, once before and once after the gapfilling
#TODO HERE Nov 23 : still need to run

overvievs

sum <- 0
gb <- c()
for(t in trlevels){
  tr <- data.table::copy(get(t))
  # find number of group_broad
  tr <- merge(tr, spinfo, by = "Species", all.x = T, all.y = F)
  print(paste(t, ": ", length(unique(tr$Species)), "species."))
  print(paste("    Number of group broad : ", length(unique(tr$Group_broad)), " groups : ", paste(unique(tr$Group_broad), collapse = ", ")))

  sum <- sum + length(unique(tr$Species))
  gb <- c(gb, unique(tr$Group_broad))
}
sum
unique(gb)
length(unique(gb))
rm(tr); rm(t)

[1] "autotroph : 484 species." [1] " Number of group broad : 3 groups : Lichen, Plant, Moss" [1] "bacteria.RNA : 140510 species." [1] " Number of group broad : 1 groups : bacteria.RNA" [1] "bacterivore.protist : 886 species." [1] " Number of group broad : 1 groups : Protists" [1] "decomposer.soilfungi : 2332 species." [1] " Number of group broad : 1 groups : soilfungi" [1] "eukaryvore.protist : 217 species." [1] " Number of group broad : 1 groups : Protists" [1] "herbivore.arthropod : 647 species." [1] " Number of group broad : 4 groups : NA, Hemiptera, Coleoptera, Orthoptera" [1] "herbivore.arthropodsoillarvae : 11 species." [1] " Number of group broad : 1 groups : Insect.larvae" [1] "omnivore.protist : 444 species." [1] " Number of group broad : 1 groups : Protists" [1] "pathotroph.soilfungi : 285 species." [1] " Number of group broad : 1 groups : soilfungi" [1] "plantparasite.protist : 96 species." [1] " Number of group broad : 1 groups : Protists" [1] "plantpathogen.fungi : 84 species." [1] " Number of group broad : 1 groups : Plant.pathogen" [1] "secondary.consumer.arthropodsoillarvae : 17 species." [1] " Number of group broad : 1 groups : Insect.larvae" [1] "symbiont.soilfungi : 328 species." [1] " Number of group broad : 2 groups : NA, soilfungi" [1] "tertiary.consumer : 67 species." [1] " Number of group broad : 1 groups : Birds" [1] "secondary.consumer : 343 species." [1] " Number of group broad : 6 groups : NA, Coleoptera, Araneae, Hemiptera, Orthoptera, Myriapoda" [1] "decomposer : 130 species." [1] " Number of group broad : 3 groups : NA, Coleoptera, Myriapoda"

Total : 146'881 species in the dataset (6'375 species without bacteria) belonging to 16 broad taxonomic groups

Calculate betadiversities

Note : abundance-based betadiversity is calculated below, therefore this script needs to be run specifically for that purpose.

Set betadiversity to 0 or 1 in case plots do not contain any species, according to e.g. Carlo Ricotta https://doi.org/10.1002/ece3.2980 e.g. Plot "P1" contains 3 species, "P2" and "P3" do not contain any species :

P1 : 1 1 1 0 0 0 0 P2 : 0 0 0 0 0 0 0 P3 : 0 0 0 0 0 0 0

betadiversity(P1, P2) is set to beta.sne = 1, beta.sim = 0, beta.sor = 1 betadiversity(P2, P3) is set to beta.sor = 0 = beta.sne + beta.sim

This is done by the function beta.pair_zerospecies() from this package (BetaDivMultifun package)

for(t in trlevels){
  tr <- get(t)
  b.tr <- prepare_for_betapair(tr)
  b.tr <- beta.pair_zerospecies(b.tr, index.family = "sorensen")
  trsimname <- paste(t, ".beta.sim", sep = "")
  trnesname <- paste(t, ".beta.sne", sep = "")
  assign(trsimname, b.tr$beta.sim)
  assign(trnesname, b.tr$beta.sne)
}
rm(t); rm(tr); rm(b.tr); rm(trsimname); rm(trnesname)

# prepare for formatesitepair
# convert to long format
trlevels_components <- c(paste(trlevels, ".beta.sim", sep=""), paste(trlevels, ".beta.sne", sep=""))
# create super-dataframe to store all betadiversities
t <- trlevels_components[1] ; tr <- get(t) ; tr <- as.matrix(tr) ; tr[!lower.tri(tr)] <- 1000
tr <- reshape2::melt(tr, value.name = t)
tr <- tr[which(tr[,3] != 1000), ]
nrow(tr[which(tr[,3] != 1000), ]) ==  (135*135) - ((135*135 / 2) +135/2) # 9045 rows left
masterbeta <- tr

for(t in trlevels_components[-1]){
  tr <- get(t)
  tr <- as.matrix(tr)
  tr[!lower.tri(tr)] <- 1000
  tr <- reshape2::melt(tr, value.name = t)
  # tr <- tr[!is.na(tr[, 3]),] #TODO removed 
  tr <- tr[which(tr[,3] != 1000), ] 
  masterbeta <- merge(masterbeta, tr, by = c("Var1", "Var2"), all.x = T)
}
rm(tr); rm(t)

save betadiversity datasets

# for(t in trlevels){
#   tr <- get(t)
#   saveRDS(tr, file = paste(t, "betadiversity.rds", sep="_"))
# }
saveRDS(trlevels, "trlevels_vector.rds")
saveRDS(masterbeta, "master_beta.rds")

Calculate alpha diversities

Rarefaction of bacteria and soilfungi datasets TODO need to do??

#TODO HERE Oct 2023 : rarefy those two datasets
# Note : this rarefaction code is not helping : 
# The data is rarefied to a constant number of species (OTUs per plot). 
# this means alphadiversity is the same in all plots.
#
#TODO need to rarefy on abundance data.
# I do not have abuncance data on bacteria or fungi..

# dependencies 
if(!requireNamespace("BiocManager")){
  install.packages("BiocManager")
}
BiocManager::install("phyloseq")
library(phyloseq)

# is done on abundance data, not on presence-absence data!

dat <- bacteria.RNA
dat <- symbiont.soilfungi
dat <- pathotroph.soilfungi

# convert to matrix for phyloseq
dat <- data.table::dcast(dat, Species ~ Plot, value.var = "value")
spnames <- dat$Species
dat <- as.matrix(dat[, -1])
rownames(dat) <- spnames

# Code from https://github.com/biodiversity-exploratories-synthesis/Synthesis_dataset_manual/blob/main/Synthesis%20datasets%20%20How%20to%20use.md
# adapted from example code to rarefy a microbial dataset (thanks to Kezia Goldmann and Johannes Sikorski for guidance on this topic)

dat <- otu_table(dat, taxa_are_rows = TRUE) # get a phyloseq object step1
pylodat <- phyloseq(dat) # get a phyloseq object step2

rarefied.dat <- rarefy_even_depth(pylodat, rngseed = 1) 
# sample size should be the smallest number of sequences per sample (default)
# rngseed sets a seed to get reproducible results

# reformat to previous state
rarefied.dat <- data.frame(rarefied.dat)
rarefied.dat$Species <- rownames(rarefied.dat)
rarefied.dat <- data.table::melt(data.table(rarefied.dat), id.vars = c("Species"))
names(rarefied.dat) <- c("Species", "Plot", "value")

rarefied_alpha <- stats::aggregate(x = rarefied.dat, by = value ~ Plot, FUN = "sum")
unrarefied_alpha <- stats::aggregate(x = bacteria.RNA, by = value ~ Plot, FUN = "sum")
for(t in trlevels){
  tr <- get(t)
  alpha <- aggregate(value ~ Plot, tr, function(x) sum(x))
  alphaname <- paste(t, ".alpha", sep = "")
  setnames(alpha, old = c("Plot", "value"), new = c("Var1", alphaname))
  assign(alphaname, alpha)
}
rm(t); rm(tr); rm(alpha); rm(alphaname)

# put all alphas together to masteralpha
masteralpha <- data.table::copy(autotroph.alpha) # by hand for trlevels[1]
for(t in trlevels[-1]){
  alphaname <- paste(t, ".alpha", sep = "")
  tr <- get(alphaname)
  masteralpha <- merge(masteralpha, tr, by = "Var1", all.x = T)
}
# save alphadiversities
saveRDS(masteralpha, "master_alpha.rds")

# combine betadiversity masters with alphas
masterdiversity <- merge(masterbeta, masteralpha, by = "Var1", all.x = T)
backup <- colnames(masteralpha)
colnames(masteralpha) <- paste(colnames(masteralpha), "2", sep = ".")
setnames(masteralpha, old = "Var1.2", new = "Var2")
masterdiversity <- merge(masterdiversity, masteralpha, by = "Var2", all.x = T)

# save masterdiversity
saveRDS(masterdiversity, "master_diversity_alpha_beta.rds")

Calculate gamma diversity

# create results table with same format as masterbeta
mastergamma <- data.table(masterbeta)
mastergamma <- mastergamma[, .(Var1, Var2)]
# mastergamma <- mastergamma[, (trlevels) := NA] #TODO add ".gamma" suffix

for(t in trlevels){
  print(t)
  tr <- get(t)
  # preparing input for designdist
  tr <- dcast(tr, Plot ~ Species, fill = NA, value.var = "value")
  rownames(tr) <- tr$Plot
  plot_names <- tr$Plot
  tr$Plot <- NULL
  tr <- as.matrix(tr)
  # calculate gamma diversity with designdist
  tr <- as.matrix(vegan::designdist(tr, method = "A+B-J", alphagamma = F))
  # reshaping the result matrix
  colnames(tr) <- plot_names
  rownames(tr) <- plot_names
  tr[!lower.tri(tr)] <- 1000
  tr <- reshape2::melt(tr, value.name = paste(t, ".gamma", sep = ""))
  tr <- tr[which(tr[,3] != 1000), ] 
  # add to results data.table
  mastergamma <- merge(mastergamma, tr, by = c("Var1", "Var2"))
}
rm(t); rm(tr)

# save gammadiversity
saveRDS(mastergamma, "master_gamma.rds")
# add to masterdiversity
masterdiversity <- merge(masterdiversity, mastergamma, by = c("Var1", "Var2"), all.x = T)
saveRDS(masterdiversity, "master_diversity_alpha_beta_gamma.rds")

Calculate abundance-based betadiversity ABANDONED

# If this piece should be run : 
# (1) uncomment the line above that converts dataset to presence-absence
# (2) do not run beta- and alphadiversity calculations (set by hand eval = F)

for(t in trlevels){
  tr <- get(t)
  b.tr <- prepare_for_betapair(tr)
  b.tr <- beta.pair.abund_zerospecies(b.tr, index.family = "bray")
  trbalname <- paste(t, ".beta.bal", sep = "")
  trgraname <- paste(t, ".beta.gra", sep = "")
  assign(trbalname, b.tr$beta.bray.bal)
  assign(trgraname, b.tr$beta.bray.gra)
}
rm(t); rm(tr); rm(b.tr); rm(trsimname); rm(trnesname)


# prepare for formatesitepair
# convert to long format
trlevels_components <- c(paste(trlevels, ".beta.bal", sep=""), paste(trlevels, ".beta.gra", sep=""))
# create super-dataframe to store all betadiversities
t <- trlevels_components[1]
tr <- get(t)
tr <- as.matrix(tr)
tr[!lower.tri(tr)] <- 1000
tr <- reshape2::melt(tr, value.name = t)
tr <- tr[which(tr[,3] != 1000), ]
# select plots
tr <- data.table(tr)
tr <- tr[Var1 %in% plotNAset & Var2 %in% plotNAset, ]
nrow(tr[which(tr[,3] != 1000), ]) ==  (135*135) - ((135*135 / 2) +135/2) # 9045 rows left
masterbeta <- tr

for(t in trlevels_components[-1]){
  tr <- get(t)
  tr <- as.matrix(tr)
  tr[!lower.tri(tr)] <- 1000
  tr <- reshape2::melt(tr, value.name = t)
  # tr <- tr[!is.na(tr[, 3]),] #TODO removed 
  tr <- tr[which(tr[,3] != 1000), ] 
  # select plots
  tr <- data.table(tr)
  tr <- tr[Var1 %in% plotNAset & Var2 %in% plotNAset, ]
  masterbeta <- merge(masterbeta, tr, by = c("Var1", "Var2"), all.x = T)
}
rm(tr); rm(t)

# save
saveRDS(masterbeta, "master_beta_abundance.rds")

Cleaning

rm(redplotset); rm(plots)
rm(alphaname); rm(sum); rm(exclude); rm(bh_overv)
rm(masteralpha); rm(masterdiversity)
rm(list = trlevels)
rm(list = trlevels_components)


allanecology/multiFunBetadivLuiGDM documentation built on Nov. 12, 2023, 6:16 a.m.