Depends on clean_and_load_soil_covariates.Rmd
and analysis_nonpublic.R
.
# run clean_and_load_soil_covariates.Rmd sections_to_be_loaded <- c("assemble_covariates") source("vignettes/analysis_nonpublic.R")
# copy to keep glsoil variable soilcov <- data.table::copy(glsoil)
scale the columns which are neither binary nor factors. Scaled by centering around 0 (substracting the mean) and dividing by standard deviation.
# take out soil type as this is converted to dummy variables soilcov[, Soil.type := NULL] # create scaling vector T/F of which columns to scale in pcr scalecols <- c("Clay", "Fine.Silt", "Medium.Silt", "Coarse.Silt", "Fine.Sand", "Medium.Sand", "Coarse.Sand", "pH", "Soil.depth", "TWI") soilcov[, (scalecols) := lapply(.SD, function(x) scale(x, center = T)), .SDcols = scalecols] # note : Could be done within PCA computation with parameter `scale = TRUE`. mysubset <- names(soilcov)[!names(soilcov) %in% c("Plotn", "Soil.type")]
note : The binary categoric columns are not affected by this.
plot(soilcov$Rendzina) points(soilcov$Braunerde) points(soilcov$Pelosol) points(soilcov$Pseudogley) points(soilcov$Erdniedermoor) points(soilcov$Gley) points(soilcov$Mulmniedermoor) points(soilcov$Parabraunerde) points(soilcov$Fahlerde)
From 11 variables, 1 of those (soil type) is split into 9 variables --> 19 columns.
# Plot information is taken as rownames of a data.frame (instead of data.table). pc_glsoil <- as.data.frame(soilcov[, !"Plotn", with=FALSE]) rownames(pc_glsoil) <- soilcov$Plotn rm(soilcov) # run the pca pca_from_glsoil <- prcomp(pc_glsoil[, !(names(pc_glsoil) %in% "Soil.type")], scale = F) plot(pca_from_glsoil) temp <- cbind(pc_glsoil, "region" = sub("[0-9][0-9]", "", rownames(pc_glsoil))) pcaplots <- list(autoplot(pca_from_glsoil, data = temp, colour = "region"), autoplot(pca_from_glsoil, data = temp, colour = "region", x = 3, y = 4), autoplot(pca_from_glsoil, data = temp, colour = "region", x = 5, y = 6), autoplot(pca_from_glsoil, data = temp, colour = "region", x = 7, y = 8)) pcaplots2 <- list(autoplot(pca_from_glsoil, data = temp, colour = "pH"), autoplot(pca_from_glsoil, data = temp, colour = "pH", label = T, label.size = 2, shape = F, loadings = T), autoplot(pca_from_glsoil, data = temp, colour = "pH", loadings = T, loadings.label = T), autoplot(pca_from_glsoil, data = temp, colour = "pH", x =3, y = 4, loadings = T, loadings.label = T)) plot_grid(plotlist = pcaplots, nrow = 2) plot_grid(plotlist = pcaplots2, nrow = 2) # save plots as : "<date>_calc_covariates_soil_pca1.pdf" rm(temp); rm(pcaplots); rm(pcaplots2)
Take all axis until 90% cumulative variance explained : first 7 axes.
summary(pca_from_glsoil) # take axes 1 - 7
# color by soil type ggplot2::autoplot(pca_from_glsoil, data = pc_glsoil, colour = 'TWI') ggplot2::autoplot(pca_from_glsoil, data = pc_glsoil, colour = 'Soil.depth') ggplot2::autoplot(pca_from_glsoil, data = pc_glsoil, loadings = TRUE, loadings.colour = 'darkgray', loadings.label = TRUE, loadings.label.size = 2) ggplot2::autoplot(pca_from_glsoil, x=3, y=4, data=pc_glsoil, colour="TWI") ggplot2::autoplot(pca_from_glsoil, x=3, y=4, data=pc_glsoil, colour="pH")
Components 1 to 7 (as argued above).
pca_from_glsoil <- pca_from_glsoil$x[, 1:7]
With euclidian distance : edis_glsoil
# select plot set from matrix pca_from_glsoil <- data.frame(pca_from_glsoil) pca_from_glsoil <- pca_from_glsoil[which(rownames(pca_from_glsoil) %in% plotNAset), ] # pca_from_glsoil <- pca_from_glsoil[which(rownames(pca_from_glsoil) != "HEG31"), ] # not needed if plotNA set is in the first version. only run this line in case this is the first time you are running that script. # compute dissimilarities for the first 14 principal components (euclidian distance) edis_glsoil <- vegan::vegdist(pca_from_glsoil, method = "euclid") rm(pc_glsoil); rm(pca_from_glsoil)
# prepare for gdm (the desired format) # edis_glsoil <- as.matrix(edis_glsoil) # edis_glsoil <- cbind(row.names(edis_glsoil), edis_glsoil) # bring to long format for GDM and add to edis_glsoil table edis_glsoil <- as.matrix(edis_glsoil) edis_glsoil[!lower.tri(edis_glsoil)] <- NA edis_glsoil <- reshape2::melt(edis_glsoil, value.name = "edis_soil") edis_glsoil <- edis_glsoil[!is.na(edis_glsoil[, 3]),] nrow(edis_glsoil) == choose(127, 2) # store soil dissimilarity matrix separately if needed saveRDS(edis_glsoil, "predictors_soil.rds")
plt.sur <- unique(plt.sur) #for some areas each row has two values plt.sur <- plt.sur[,c("EP_PLOTID","G500")] data.table::setnames(plt.sur, "EP_PLOTID", "Plot") plt.sur <- merge(plt.sur, usefulplotids, by="Plot") plt.sur[, PLAND_G_500 := 100 - (G500 *100)] # change so more isolated plots have higher values plt.sur[, G500 := NULL] ; plt.sur[, Plot := NULL] # remove unused columns # select plot set plt.sur <- plt.sur[Plotn %in% plotNAset,] plt.sur <- as.data.frame(plt.sur)# convert to dataframe for analysis rownames(plt.sur) <- plt.sur[["Plotn"]] plt.sur <- plt.sur["PLAND_G_500"] colnames(plt.sur) <- "plot_isolation" # in old code, plt.sur is now as column in a variable called "predictors for GDM preparation"
landscape <- data.table(BEplotZeros(landscape, column = "EP_PlotID", plotnam = "EP")) setnames(landscape, old = c("EP_PlotID", "EP"), new = c("oldEP", "Plotn")) landscape <- landscape[, .(Plotn, Grassland.perm.1000, Arable.1000, Shannon.1000)] landscape <- landscape[Plotn %in% plotNAset, ] landscape2 <- data.table(BEplotZeros(landscape2, column = "plot_id", plotnam = "EP")) landscape2[, plot_id := NULL] setnames(landscape2, old = c("EP"), new = c("Plotn")) landscape2 <- landscape2[Year == 2012 & scale == 1000, .(Plotn, Land.te)] setnames(landscape2, old = "Land.te", new = "Land.te.1000") landscape <- merge(landscape, landscape2, by = "Plotn") rm(landscape2) # convert to data.frame data.table::setkey(landscape, Plotn) landscape <- data.frame(landscape) rownames(landscape) <- landscape$Plotn landscape <- landscape[, c("Grassland.perm.1000", "Arable.1000", "Shannon.1000", "Land.te.1000")]
plot.names <- names_gl[Landuse == "G", .(EP_PlotID, PlotID)] data.table::setnames(plot.names, old = "EP_PlotID", new = "Plot") plot.names <- merge(plot.names, usefulplotids, by = "Plot", all = T) geodist <- geodist[Landuse == "G", .(Plot_ID, Longitude_Dec_Plotcenter, Latitude_Dec_Plotcenter)] data.table::setnames(geodist, "Plot_ID", "PlotID") geodist <- merge(geodist, plot.names, by = "PlotID") geodist[, c("PlotID", "Plot") := NULL] # remove two duplicated rows # geodist[which(duplicated(geodist$Plotn)), ] # geodist[Plotn %in% c("AEG29", "AEG10"), ] # there is only very little difference - remove one of them # note : in the newest dataset BExIS ID 1000, plot coordinates are given as selected here. # (48.38/9.21 for AEG10 and 48.42/9.36 for AEG29) geodist <- geodist <- geodist[!which(duplicated(geodist$Plotn)),] # select plot set geodist <- geodist[Plotn %in% plotNAset] data.table::setkey(geodist,Plotn) geodist <- data.frame(geodist) rownames(geodist) <- geodist$Plotn geodist <- geodist[, c("Longitude_Dec_Plotcenter", "Latitude_Dec_Plotcenter")] rm(plot.names)
combine covariates to a predictor dataset
predictors <- merge(landscape, geodist, by = "row.names") colnames(predictors)[1] <- "Plotn" saveRDS(predictors, file = "predictors.rds") rm(edis_glsoil); rm(landscape); rm(geodist)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.