Requirements :
- script is runned right after prepare_and_run_GDM.Rmd
. Requires its output gdminput
.
- note that gdminput
is also saved as .csv and .Rds from the above mentioned script.
- nicenames
for nice correlation plot
- model_names_selection
: a row of the dataframe model_names
The script calculates euclidean distances abs(simple substraction) of the covariates and LUI as well as the geographic distance based on coordinates and forms an unscaled and a scaled table with all distances. A correlation plot is produced.
For more exploration, see also the script check_GDM_input_LM_GAM.Rmd
with vif, linear models and GAMS. The i.i.d. assumption is not met with matrix input, therefore these explorations have been moved to additional results.
Output :
- datasets <modelname>_gdminput_distances_unscaled.rds
and <modelname>_gdminput_distances_zscores.rds
contains the distances of all variables in long format (unlike the GDM input data which is raw values for all non-beta predictors)
- correlation plot vignettes/out/<modelname>_corrplot_before_gdm.pdf
lui <- c(model_names_selection$lui, paste("delta", model_names_selection$lui, sep = "")) checkgdminput <- data.table::copy(gdminput) checkgdminput <- data.table(checkgdminput) # calc euclidean distance between each predictor pair of checkgdminput # diversity : take out s2. because they are all 0 dists <- colnames(checkgdminput)[grep("beta", colnames(checkgdminput))] dists <- grep("s2.", dists, value = T) checkgdminput[, (dists) := NULL] checkgdminput[, weights := NULL] # LUI luicols <- grep(paste(lui, collapse="|"), colnames(checkgdminput), value=TRUE) if(length(grep("Gstd", lui)) > 1){ print("Grazing") checkgdminput[, Gstd := abs(s1.Gstd - s2.Gstd)] checkgdminput[, deltaGstd := abs(s1.deltaGstd - s2.deltaGstd)] checkgdminput[, (luicols) := NULL] } else if(length(grep("Mstd", lui)) > 1) { print("Mowing") checkgdminput[, Mstd := abs(s1.Mstd - s2.Mstd)] checkgdminput[, deltaMstd := abs(s1.deltaMstd - s2.deltaMstd)] checkgdminput[, (luicols) := NULL] } else if(length(grep("Fstd", lui)) > 1) { print("Fertilisation") checkgdminput[, Fstd := abs(s1.Fstd - s2.Fstd)] checkgdminput[, deltaFstd := abs(s1.deltaFstd - s2.deltaFstd)] checkgdminput[, (luicols) := NULL] } else if(length(grep("LUI", lui)) > 1) { print("LUI") checkgdminput[, LUI := abs(s1.LUI - s2.LUI)] checkgdminput[, deltaLUI := abs(s1.deltaLUI - s2.deltaLUI)] checkgdminput[, meanLUI := rowMeans(.SD), .SDcols = c("s1.LUI", "s2.LUI")] checkgdminput[, sdLUI := apply(.SD, 1, sd), .SDcols = c("s1.LUI", "s2.LUI")] checkgdminput[, (luicols) := NULL] } #TODO keep an eye on distGeo, removed the suggestion to "raster" package which # depends on retired packages. checkgdminput[, geosphere_distgeo := geosphere::distGeo( p1 = as.matrix(checkgdminput[, .(s1.yCoord, s1.xCoord)]), p2 = as.matrix(checkgdminput[, .(s2.yCoord, s2.xCoord)]) )] geocol <- grep("Coord", colnames(checkgdminput), value = T) checkgdminput[, (geocol) := NULL] # checkgdminput[, s2.geosphere_distgeo := NULL] # predictors checkgdminput[, edis_soil := abs(s1.edis_soil - s2.edis_soil)] checkgdminput[, plot_isolation := abs(s1.plot_isolation - s2.plot_isolation)] del <- c("s1.edis_soil", "s2.edis_soil", "s1.plot_isolation", "s2.plot_isolation") checkgdminput[, (del) := NULL] # rename s1 from diversity columns colnames(checkgdminput) <- gsub("s1.", "", colnames(checkgdminput)) # compute z-score # observe correlations across the z-scores of variables, not the raw ones. to_scale <- grep("beta|distance", colnames(checkgdminput), invert = T, value = T) scaled_checkgminput <- data.table::copy(checkgdminput) scaled_checkgminput[, (to_scale) := lapply(.SD, scale, center = T, scale = T), .SDcols = to_scale] rm(to_scale) # save datasets saveRDS(checkgdminput, file = paste("vignettes/out/", model_names_selection$modelname, "_gdminput_distances_unscaled.rds", sep = "")) saveRDS(scaled_checkgminput, file = paste("vignettes/out/", model_names_selection$modelname, "_gdminput_distances_zscores.rds", sep = ""))
## correlation plot # use nice names for plotting, only those which are available c <- nicenames$nicenames names(c) <- nicenames$names c <- c[names(checkgdminput)] names(checkgdminput)[!is.na(c)] <- c[!is.na(c)] # names(checkgdminput)[is.na(c)] <- c("EFdist", "geographic distance") names(checkgdminput) <- gsub("\\\\narthropodsoillarvae", " a. larvae", names(checkgdminput)) # name : modelname "_corrplot_before_gdm.pdf" m <- cor(checkgdminput, method = "spearman") cairo_pdf(paste("vignettes/out/", model_names_selection$modelname, "_corrplot_before_gdm_main_models.pdf", sep = ""), width = 11.7, height = 8.3) corrplot::corrplot(m, type = "lower", addCoef.col = "black", method = "color", diag = F, tl.srt = 50, tl.col = "black", order = "hclust", tl.cex = 0.5, cl.cex = 0.3, number.cex = 0.25) dev.off() m[m < 0.5 & m > -0.5] <- 0 corrplot::corrplot(m, type = "lower", addCoef.col = "black", method = "color", diag = F, tl.srt = 10, tl.col = "black", order = "hclust", tl.cex = 0.5, cl.cex = 0.3, number.cex = 0.3) # output high correlations as a table h <- as.matrix(m) h[!lower.tri(h)] <- 1000 h <- reshape2::melt(h, value.name = "correlation") h <- data.table(h) h <- h[!correlation == 1000, ] h <- h[correlation > 0.7 | correlation < -0.7, ][order(correlation)] fwrite(h, file = paste("vignettes/out/", model_names_selection$modelname, "_correlations_before_gdm_table.csv", sep = ""))
Next script is plot_GDM.Rmd
.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.