This script runs the GDM models. In the first code section, the desired GDM model is chosen manually. The script writes the GDM input and output files and the summary output.

Dependencies

Data

sections_to_be_loaded <- c("functions_dissimilarity", "betadiversity", "LUI", "prepared_covariates")
source("vignettes/analysis_nonpublic.R")

output

select input

Depending on the model intented to run, chose the appropriate variables below.

# select a model to run, from the table `model_names` which is part of the R package data (see how to load in `analysis_nonpublic.R`)
if(exists("model_names_selection")){
  # model_names_selection is given from outside the script
  print("using selection of the model from outside the script :")
  print(model_names_selection)
} else {
  print("model selection is not given, default is EFturnover_0.7")
  model_names_selection <- model_names[which(model_names$modelname == "gdm_EFturnover_0.7_LUI"), ]
  print(c("working on model :", paste(model_names_selection, collapse = ", ")))
}

Implementation of input choice

chose selected input from mastertables

## column titles of the chosen LUI
lui <- c(model_names_selection$lui, paste("delta", model_names_selection$lui, sep = ""))
# can be e.g. : "deltaLUI", "Gstd", "Mstd", "Fstd", "deltaGstd", "deltaMstd", "deltaFstd"

# ecosystem funcitons
include <- c("Var1", "Var2", model_names_selection$funs)
EFmaster <- EFmaster[, ..include]
rm(include)

# lui
LUI <- LUI[, c("Plotn", lui)]
LUI <- data.table(LUI)

# geographic distances
geodist <- predictors[, c("Plotn", "Longitude_Dec_Plotcenter", "Latitude_Dec_Plotcenter")]
geodist <- data.table(geodist)

# soil covariates
predictors <- data.table(predictors)
predictors <- predictors[, .(Plotn, plot_isolation)]
predictors_soil <- data.table(predictors_soil)

prepare GDM input table

ecosystem functions

Distance in ecosystem functions is called "distance"

setnames(EFmaster, old = model_names_selection$funs, new = "distance")
gdminput <- data.table::copy(EFmaster)
gdminput[, weights := 1]
rm(EFmaster)

geographic distance

xcoord = Latitude ycoord = Longitude

setcolorder(geodist, c("Plotn", "Latitude_Dec_Plotcenter", "Longitude_Dec_Plotcenter"))
setnames(geodist, old = c("Plotn", "Latitude_Dec_Plotcenter","Longitude_Dec_Plotcenter"), new = c("Var1", "s1.xCoord", "s1.yCoord"))
gdminput <- merge(gdminput, geodist, by = c("Var1"))
setnames(geodist, old = c("Var1", "s1.xCoord", "s1.yCoord"), new = c("Var2", "s2.xCoord", "s2.yCoord"))
gdminput <- merge(gdminput, geodist, by = "Var2")

variant : by hand geographic distance calculation

Geographic distance can also be calculated with geosphere package and normally added to gdminput, when geo = F in gdm.

gdminput[, s1.geosphere_distgeo := geosphere::distGeo(p1 = as.matrix(gdminput[, .(s1.yCoord, s1.xCoord)]), 
                                                   p2 = as.matrix(gdminput[, .(s2.yCoord, s2.xCoord)]))]
gdminput[, s2.geosphere_distgeo := 0]

Note : based on sensitivity analysis (comparing by-hand and automatic distance calculation), this variant was not used.

TODO include conclusion why.

Note that this variant has code below as well.

betadiversities

Prepare for each betadiversity two columns : one "s1." containing the betadiversity values and one "s2." with 0. GDM expects two values per predictor, but as dissimilarities for betadivs are already calculated, the input is just the distance between the actual distance (beta diversity) and 0. Note that this is not the intented use of GDM and causes issues in permutation that need to be solved later on.

# add betadiversities
colnames(masterbeta)[-c(1,2)] <- paste("s1.", colnames(masterbeta)[-c(1,2)], sep = "")
gdminput <- merge(gdminput, masterbeta, by = c("Var1", "Var2"))
# add 0 columns
zeronames <- gsub("s1.", "s2.", colnames(masterbeta)[-c(1,2)])
gdminput[, (zeronames) := 0]

LUI

setnames(LUI, old = c("Plotn", colnames(LUI)[-1]), new = c("Var1", paste("s1.", colnames(LUI)[-1], sep = "")))
gdminput <- merge(gdminput, LUI, by = "Var1")
setnames(LUI, old = c("Var1", colnames(LUI)[-1]), new = c("Var2", gsub("s1.", "s2.", colnames(LUI)[-1])))
gdminput <- merge(gdminput, LUI, by = "Var2")

covariates

# create predictors table with edis_soil
setnames(predictors_soil, old = "edis_soil", new = "s1.edis_soil")
predictors_soil[, s2.edis_soil := 0]

# add plot isolation
setnames(predictors, old = c("Plotn" ,"plot_isolation"), new = c("Var1" ,"s1.plot_isolation"))
predictors_soil <- merge(predictors_soil, predictors, by = "Var1")
setnames(predictors, old = c("Var1" ,"s1.plot_isolation"), new = c("Var2" ,"s2.plot_isolation"))
predictors_soil <- merge(predictors_soil, predictors, by = "Var2")
rm(predictors)
gdminput <- merge(gdminput, predictors_soil, by = c("Var1", "Var2"))

finalising dataset for GDM

fix column order as required by gdm. The required columns, followed by s1.betadiversities, s1.LUI, s1.predictors, s2.betadiversities, s2.LUI, s2.predictors. convert to data.frame and assign rownames to be the plot combinations separated by dot.

GDM requires a well-defined table. Below, the names of the columns and their content are listed: - distance : distance between ecosystem functions - weights : column containing all 1 (because we weigh all comparisons the same) - s1.xCoord, s1.yCoord, s2.xCoord, s2.yCoord : 4 columns containting geographic location. GDM either includes them in modelling (geo = T) or excludes (geo = F). - s1.pred1, ..., s1.predn : here I fill in my predictor columns, diversity, LUI and covariates of the first plot. (all have s1. at the beginning) - s2.pred1, ..., s2.predn : here I fill in LUI and covariate values of plot 2, and columns of zero for betadiversity to imitate I would have 2 values.

### variant : by hand geographic distance calculation
# adjust colorder so it includes s1.geosphere_distgeo and s2.geosphere_distgeo
colorder_gdm_input <- append(colorder_gdm_input, "s1.geosphere_distgeo", after=max(grep("s1.", colorder_gdm_input)))
colorder_gdm_input <- append(colorder_gdm_input, "s2.geosphere_distgeo", after = max(grep("s2.", colorder_gdm_input)))
# select respective LUI measures from colorder
# grep(lui[1], colorder_gdm_input)
if(lui[1] == "LUI"){
  colorder_gdm_input <- colorder_gdm_input[-grep(paste(c("Gstd", "Mstd", "Fstd"), collapse = "|"), colorder_gdm_input)]
} else {
  colorder_gdm_input <- colorder_gdm_input[-grep("LUI", colorder_gdm_input)]
}
# check if all names of gdminput are in colorder_gdm_input and vice versa
all(colnames(gdminput) %in% colorder_gdm_input)
all(colorder_gdm_input %in% colnames(gdminput))
length(grep("geosphere_distgeo", colorder_gdm_input)) == 0
length(grep("geosphere_distgeo", colnames(gdminput))) == 0

gdminput <- data.table::setcolorder(gdminput, colorder_gdm_input)

gdminput[, rown := paste(Var1, Var2, sep = ".")]
gdminput[,c("Var1","Var2") := NULL]
gdminput <- as.data.frame(gdminput)
rownames(gdminput) <- gdminput$rown
gdminput$rown <- NULL

gdminput <- gdm::formatsitepair(bioData = gdminput, bioFormat = 4, predData = gdminput)

run GDM

gdm_output <- gdm::gdm(gdminput, geo = T)

save output

outname <- paste("gdm", model_names_selection$funs, lui[1], "output_summary.txt", sep = "_")
outnameRDS <- paste("gdm", model_names_selection$funs, lui[1], "output.Rds", sep = "_")

# save input and output
saveRDS(gdminput, file = paste("vignettes/out/", paste("gdm", model_names_selection$funs, lui[1], "input.Rds", sep = "_"), sep = ""))
write.csv(gdminput, file = paste("vignettes/out/", paste("gdm", model_names_selection$funs, lui[1], "input.CSV", sep = "_"), sep = ""))
saveRDS(gdm_output, paste("vignettes/out/", outnameRDS, sep = ""))
capture.output(summary(gdm_output), file = paste("vignettes/out/", outname, sep = ""))

# get model specifications
# model_specs <- paste(
# model_names_selection$modelname,
# paste("deviance of the fitted GDM model :", round(gdm_output$gdmdeviance, 3)),
# paste("deviance of the null model : ", round(gdm_output$nulldeviance, 3)),
# paste("percentage of null deviance \n explained by the fitted GDM model :", round(gdm_output$explained, 3)),
# "3 splines used, knots at min, max and 50% (default). ",
# paste("LUI knots at :", paste(round(gdm_output$knots[1:3], 3), collapse = ", ")),
# sep = "\n")
# capture.output(print(model_specs), file = paste(pathtoout, "/", model_names_selection$modelname, "_GDM_model_specs.txt", sep = ""))

model_specs <- data.table(modelname = model_names_selection$modelname,
                           "deviance of the fitted GDM model" = round(gdm_output$gdmdeviance, 3),
                           "deviance of the null model" = round(gdm_output$nulldeviance, 3),
                           "percentage of null dev expl" = round(gdm_output$explained, 3),
                           "splines knots" = paste(round(gdm_output$knots[1:3], 3), collapse = ", ")
)
fwrite(model_specs, paste(pathtoout, "/", model_names_selection$modelname, "_GDM_model_specs.csv", sep = ""))

next steps

rm(sections_to_be_loaded)


allanecology/BetaDivMultifun documentation built on Nov. 9, 2023, 8:47 p.m.