knitr::opts_chunk$set(echo = TRUE)
----------CLEAN THIS
Requirements :
- script is runned right after prepare_and_run_GDM.Rmd
. Requires its output gdminput
.
- note that gdminput
is also saved as .csv and .Rds from the above mentioned script.
- nicenames
for nice correlation plot
- model_names_selection
: a row of the dataframe model_names
The script calculates euclidean distances abs(simple substraction) of the covariates and LUI as well as the geographic distance based on coordinates and forms a table with all distances. A correlation plot is produced and a linear model fitted and both are saved. Variance inflation of the linear model is calculated and saved.
DEPRECIATED notice : linear model not used any more in analyis: assumption of independent observations (i.i.d. assumption) not met and therefore p-values not trustable. Also : nonlinearities are exptected. GAM used to check, but it also violates the i.i.d. assumption. Individual associations (correlation plot) does not correct for other predictors. Need to check associations among variables and GDM assumption of monotonic increase. --> all methods violate important assumptions --> compare the 3 methods and make the best of it.
Output :
- DEPRECIATED linear model output vignettes/out/<modelname>_linear_model_before_gdm.Rds
- DEPRECIATED variance inflation out/<modelname>_vif_before_gdm.txt
- DEPRECIATED linear model plot vignettes/out/<modelname>_linear_model_directions.pdf
Goal : - find negative and positive associations --> help interpretation - variance inflation
Requirement : - checkgdminput : calculated in previous script "check_GDM_input.Rmd"
require(mgcv)
# ## variance inflation in linear model # # note : p-values are not trustable, because the assumption of independent observations is not met! (matrix correlation) # # DEPRECIATED --> new method needed # # generate model # mod <- paste("distance ~ " ,paste(colnames(checkgdminput)[!colnames(checkgdminput) %in% c("distance")], collapse = " + ")) # m1 <- lm(formula = formula(mod), data = checkgdminput) # # save linear model # saveRDS(m1, file = paste("vignettes/out/", modelname, "_linear_model_before_gdm.Rds", sep = "")) # # save linear model output # capture.output(summary(m1), # file = paste("vignettes/out/", modelname, "_lm_summary.txt")) # # vif_output <- car::vif(m1) # vif_output[which(vif_output > 5)] # # vif_output[which(vif_output[, "GVIF"] > 5),] # capture.output(c(mod, # vif_output[which(vif_output > 5)]), # file = paste("vignettes/out/", modelname, "_vif_before_gdm.txt", sep = "")) # # # plot linear model # # m1$coefficients # test <- summary(m1) # coefs <- test$coefficients # coefs <- data.table(coefs) # coefs[, names := rownames(test$coefficients)] # coefs[`Pr(>|t|)` < 0.05, pval_symbol := "*"] # coefs[`Pr(>|t|)` > 0.05, pval_symbol := ""] # # linplot <- ggplot(coefs, aes(x = names, y = Estimate)) + # geom_bar(stat = "identity") + # coord_flip() + # geom_text(aes(label=pval_symbol), y=0.5, color="black", size=3.5) + # geom_errorbar(aes(ymin=Estimate-`Std. Error`, ymax=Estimate+`Std. Error`), width=.2, color = "red") + # background_grid() + # ggtitle(modelname) + # ylab("linear model estimates") + # xlab("") + # ylim(-0.4, 0.5) + # labs(caption = "error bars correspond to standard errors") # # ggsave(linplot, filename = paste("vignettes/out/", modelname, "_linear_model_directions.pdf", sep = ""), dpi = 400, # units = "cm", width = 29.7, height = 21)
# test model testmod <- mgcv::gam(distance ~ autotroph.beta.sim + s(bacteria.RNA.beta.sim) + s(omnivore.protist.beta.sim) + s(plantparasite.protist.beta.sim) + s(geosphere_distgeo) + s(LUI), data = checkgdminput) plot(testmod, all.terms = T) # also plot linear terms # full model mod <- paste(paste("distance ~ s(", paste(colnames(checkgdminput)[!colnames(checkgdminput) %in% c("distance")], collapse = ") + s("), sep = ""), ")", sep = "") # check if model is running. # mod1 : full model with all smooth terms mod1 <- mgcv::gam(distance ~ s(autotroph.beta.sim) + s(bacteria.RNA.beta.sim) + s(herbivore.arthropodsoillarvae.beta.sim) + s(herbivore.arthropod.beta.sim) + s(plantpathogen.fungi.beta.sim) + s(bacterivore.protist.beta.sim)+ s(eukaryvore.protist.beta.sim) + s(omnivore.protist.beta.sim) + s(plantparasite.protist.beta.sim) + s(secondary.consumer.beta.sim) + s(secondary.consumer.arthropodsoillarvae.beta.sim) + s(tertiary.consumer.beta.sim) + s(decomposer.soilfungi.beta.sim) + s(pathotroph.soilfungi.beta.sim) + s(symbiont.soilfungi.beta.sim) + s(autotroph.beta.sne) + s(bacteria.RNA.beta.sne) + s(herbivore.arthropodsoillarvae.beta.sne) + s(herbivore.arthropod.beta.sne) + s(plantpathogen.fungi.beta.sne) + s(bacterivore.protist.beta.sne) + s(eukaryvore.protist.beta.sne) + s(omnivore.protist.beta.sne) + s(plantparasite.protist.beta.sne) + s(secondary.consumer.beta.sne) + s(secondary.consumer.arthropodsoillarvae.beta.sne) + s(tertiary.consumer.beta.sne) + s(decomposer.soilfungi.beta.sne) + s(pathotroph.soilfungi.beta.sne) + s(symbiont.soilfungi.beta.sne) + s(geosphere_distgeo) + s(LUI) + s(deltaLUI) + s(edis_soil) + s(plot_isolation), data = checkgdminput) summary(mod1) # Rsq = 0.38, Devexpl = 39.2%, GCV = 0.01552 # if predictor edf (effective degrees of freedom) = 1 : gam works with linear estimator --> can leave out the s() part for the # given variable # no automatic model selection with "REML". Better : adjust manually # some smooth terms can probably be removed. mod1B <- update(mod1, .~.-s(plantpathogen.fungi.beta.sim) + plantpathogen.fungi.beta.sim -s(autotroph.beta.sne) + autotroph.beta.sne -s(omnivore.protist.beta.sne) + omnivore.protist.beta.sne -s(secondary.consumer.beta.sne) + secondary.consumer.beta.sne -s(symbiont.soilfungi.beta.sne) + symbiont.soilfungi.beta.sne -s(pathotroph.soilfungi.beta.sne) + pathotroph.soilfungi.beta.sne) summary(mod1B) # Rsq = 0.38, Devexpl = 29.2%, GCV = 0.015521 #TODO : discuss : omnivore protist still has negative effect # also omnivore protist beta sim! (which is nonlinearly modelled) # also plantparasite protist sim still has negative # ! plantparasite protist beta sne could also be positive in GAM nonlinear # ! decomposer soilfungi beta sim changed : had negative in linear model, now positive (hump-shaped) # ! decomposer soilfungi sne definitely changed to positive in nonlinear case # # distgeo : 3 regions visible? plot(mod1B, pages = 2) # gratia::draw(mod1B, residuals = T, pages = 2) # gratia is useful to plot mgcv results mgcv::gam.check(mod1B) # some significant terms! Problematic #TODO : why diagnostics not so good? because wrong assumption? # partial effects plot vis.gam(mod1B, view=c("LUI", "omnivore.protist.beta.sim"), plot.type='contour', color='topo', main='test') plot.gam(mod1B, select = 7, residuals = T) vis.gam(mod1B, view=c("LUI", "omnivore.protist.beta.sne"), plot.type='contour', color='topo', main='test') plot.gam(mod1B, all.terms = T, select = 32, residuals = T) # plot linear effect plot.gam(testmod, residuals = T, select = 1)
What happens if I use mean LUI and delta LUI = sd(LUI1, LUI2) as predictors for EFdist?
# NOTE : to construct the dataset checkgdminput : delete "lui" instead of "delcols" in the script "check_GDM_input.Rmd" # in order to have mean and sd LUI instead of euclidean distance # linear model # run code from check_GDM_input.Rmd mod <- paste(paste("distance ~ s(", paste(colnames(checkgdminput)[!colnames(checkgdminput) %in% c("distance")], collapse = ") + s("), sep = ""), ")", sep = "") # check if model is running. # mod1 : full model with all smooth terms mod1 <- mgcv::gam(distance ~ s(autotroph.beta.sim) + s(bacteria.RNA.beta.sim) + s(herbivore.arthropodsoillarvae.beta.sim) + s(herbivore.arthropod.beta.sim) + s(plantpathogen.fungi.beta.sim) + s(bacterivore.protist.beta.sim) + s(eukaryvore.protist.beta.sim) + s(omnivore.protist.beta.sim) + s(plantparasite.protist.beta.sim) + s(secondary.consumer.beta.sim) + s(secondary.consumer.arthropodsoillarvae.beta.sim) + s(tertiary.consumer.beta.sim) + s(decomposer.soilfungi.beta.sim) + s(pathotroph.soilfungi.beta.sim) + s(symbiont.soilfungi.beta.sim) + s(autotroph.beta.sne) + s(bacteria.RNA.beta.sne) + s(herbivore.arthropodsoillarvae.beta.sne) + s(herbivore.arthropod.beta.sne) + s(plantpathogen.fungi.beta.sne) + s(bacterivore.protist.beta.sne) + s(eukaryvore.protist.beta.sne) + s(omnivore.protist.beta.sne) + s(plantparasite.protist.beta.sne) + s(secondary.consumer.beta.sne) + s(secondary.consumer.arthropodsoillarvae.beta.sne) + s(tertiary.consumer.beta.sne) + s(decomposer.soilfungi.beta.sne) + s(pathotroph.soilfungi.beta.sne) + s(symbiont.soilfungi.beta.sne) + s(meanLUI) + s(sdLUI) + s(geosphere_distgeo) + s(edis_soil) + s(plot_isolation), data = checkgdminput)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.