load, clean and join data
library(tidyverse) #load traits and aop setwd("..") nComp = 10 invisible(base_data <- readr::read_csv("./indir/ch3.csv")) traits_names <- c("ntrgnPr", "crbnPrc", "extrCBC", "extrcCC" , "extrCAC", "lgnnPrc", "clllsPr" #, "lm_frs_", "lm_dry_", "cl_lg__" , "leafAre" , "lfMssPA", "dryMssF") random_effects <- c("domanID", "nlcdCls") #taxonID, soilID, plantStatus local_environment <- c("ope", "ect", "DTM") plant_status <- c("CHM", "plntStt") reflectance <- colnames(base_data)[grep("hsi", colnames(base_data))] bands_clusters <- readr::read_csv("./indir/bands_clusters_kld.csv") # load climate trends invisible(climate_trends <- readr::read_csv("./indir/climate_features.csv")) colnames(bands_clusters) <- "KLDcluster"
#apply KL-D to the data and reduce hyperspectral info into 15 aggregated pseudo-bands reflectance_data <- base_data[colnames(base_data) %in% reflectance] %>% t %>% cbind.data.frame(bands_clusters) %>% group_by(KLDcluster) %>% summarise_all(list(mean)) %>% mutate_all(~ . /10000) %>% t %>% data.frame reflectance_data <- cbind.data.frame(base_data[["indvdID"]], reflectance_data[-1,]) colnames(reflectance_data)[1] <- "individualID"
log scale and normalize responses
# log scale and normalize responses leaf_traits_data <- base_data[colnames(base_data) %in% traits_names] %>% mutate_all(log)# %>% #scale leaf_traits_data <- cbind.data.frame(base_data[["indvdID"]], leaf_traits_data) colnames(leaf_traits_data) <- c( "individualID", "ntrgnPr", "crbnPrc", "extrCBC", "extrcCC" , "extrCAC", "lgnnPrc", "clllsPr" #, "lm_frs", "lm_dry", "cl_lg" , "leafAre","lfMssPA", "dryMssF" )
reduce climate data dimensionality using PCA
climate_features <- climate_trends %>% dplyr::filter(individualID %in% base_data[["indvdID"]]) prm <- FactoMineR::PCA(climate_features[-1], ncp=10) climate_features <- data.frame(climate_features[["individualID"]], prm$ind$coord) colnames(climate_features)[1] <- "individualID"
get soil categories by querying soilDB
#load data.frame with plots coordiantes in lat/long #setwd("..") cfc_df <- readr::read_csv("indir/cfc_field_data.csv") %>% dplyr::filter(individualID %in% base_data[["individualID"]]) %>% dplyr::select(individualID, plotID, siteID, taxonID, decimalLongitude, decimalLatitude) %>% unique soil_data <- list() for(st in unique(base_data$siteID)){ soil_data[[st]] <- cfc_df %>% dplyr::filter(siteID == st) %>% get_soil_features }
setwd("..") soil_data = readRDS("./indir/soil_data.rds") s_data <- do.call(rbind.data.frame, soil_data) %>% dplyr::select(individualID, areasymbol, spatialversion, musym, nationalmusym, mukey, muareaacres, mupolygonkey, geometry)
get species correaltion
setwd("..") # log scale and normalize responses species_list <- base_data %>% dplyr::select(taxonID, scntfcN) colnames(species_list)[2] <- "scientificName" sp_list2 <- readr::read_csv("./indir/vegetation_structure.csv") %>% dplyr::select(taxonID, scientificName) species_list <- rbind.data.frame(species_list, sp_list2) %>% unique species_list <- get_cophenetic_distance(species_list, sc_name = "scientificName") species_features <- base_data %>% dplyr::select(indvdID, scntfcN) species_features[["scntfcN"]] <- word(species_features[["scntfcN"]], 1,2) species_features[["scntfcN"]] <- str_replace(species_features[["scntfcN"]], " spp.", "") species_features[["scntfcN"]] <- str_replace(species_features[["scntfcN"]], " sp.", "") species_features[["scntfcN"]] <- tolower(species_features[["scntfcN"]]) species_features <-left_join(species_features, species_list$resolved_names, by = c("scntfcN" = "search_string")) %>% select(indvdID, taxonID) colnames(species_features) <- c("individualID", "taxonID") #get meaningful soil characters in.statement <- soilDB::format_SQL_in_statement((s_data$mukey)) q <- paste("SELECT component.mukey, taxclname, taxorder, taxsuborder, taxgrtgroup, taxsubgrp FROM legend INNER JOIN mapunit ON mapunit.lkey = legend.lkey LEFT OUTER JOIN component ON component.mukey = mapunit.mukey WHERE mapunit.mukey IN ", in.statement, sep="") # run query, process results, and return as data.frame object res <- soilDB::SDA_query(q) res <- res[complete.cases(res),] res$mukey local_soil <- c("mukey") soil_features <- s_data %>%dplyr::select(individualID, local_soil) soil_features <- unique(soil_features) soil_features <- left_join(soil_features, res)
get local environment features
# log scale and normalize responses site_features <- base_data[colnames(base_data) %in% local_environment] %>% scale site_features <- cbind.data.frame(base_data[["indvdID"]], site_features) colnames(site_features)[1] <- "individualID"
status_features <- base_data[colnames(base_data) %in% plant_status] status_features <- cbind.data.frame(base_data[["indvdID"]], status_features) colnames(status_features)[1] <- "individualID"
define random effects
random_effects_data <- base_data[colnames(base_data) %in% random_effects] random_effects_data <- cbind.data.frame(base_data[["indvdID"]], random_effects_data) colnames(random_effects_data)[1] <- "individualID"
train test split
# log scale and normalize responses cfc_df <- cfc_df[complete.cases(cfc_df),] train_data <- cfc_df %>% dplyr::select(individualID, taxonID, siteID) %>% group_by(taxonID, siteID) %>% sample_frac(0.8) test_data <- cfc_df %>% dplyr::select(individualID, taxonID, siteID) %>% filter(!individualID %in% train_data[["individualID"]])
train_dataset <- train_data %>% ungroup() %>% dplyr::select(-one_of("taxonID")) %>% inner_join(leaf_traits_data) %>% inner_join(climate_features) %>% inner_join(site_features) %>% inner_join(random_effects_data) %>% inner_join(reflectance_data) %>% inner_join(species_features) %>% inner_join(soil_features) %>% inner_join(status_features) %>% unique train_dataset<- train_dataset[!is.na(train_dataset[["taxonID"]]),] train_dataset[!train_dataset[["taxonID"]] %in% colnames(species_list$cov_taxa_eff),"taxonID"] %>% unique %>% print train_dataset[train_dataset[["taxonID"]]=="ABIES","taxonID"] = "ABBA" train_dataset[train_dataset[["taxonID"]]=="BETUL","taxonID"] = "BELE" train_dataset[train_dataset[["taxonID"]]=="BOURR","taxonID"] = "BOSU2" train_dataset[train_dataset[["taxonID"]]=="FRAXI","taxonID"] = "FRAM2" train_dataset[train_dataset[["taxonID"]]=="HALES","taxonID"] = "HACA3" train_dataset[train_dataset[["taxonID"]]=="NYSY","taxonID"] = "NYBI" train_dataset[train_dataset[["taxonID"]]=="OXYDE","taxonID"] = "OXAR" train_dataset[train_dataset[["taxonID"]]=="PINUS","taxonID"] = "PIEC2" train_dataset[train_dataset[["taxonID"]]=="SASSA","taxonID"] = "SAAL5" train_dataset = train_dataset[complete.cases(train_dataset),] train_dataset = mutate_if(train_dataset, is.numeric, scale)
imp_data <- mice::mice(train_dataset, m = 5, print = FALSE)
Now we can finally use all these variables to generate models! Let's check first how well everything but reflectance represents our traits data
library(brms) #define formula and build the model list_formulas <- paste(paste("mvbind(" , paste(colnames(leaf_traits_data)[-1], collapse = " , "), ") ~ ", sep = "") #climate variables , paste("s(",colnames(climate_features)[2:6], ")", collapse = " + ", sep = "") # , paste(colnames(climate_features)[2:6], collapse = " + ", sep = "") , " + " , paste(local_environment, collapse = " + ", sep = "") , " + " #, paste("(1 | soil| ",local_soil, ")", collapse = "+"), " + " # , paste("(1 | gr(",random_effects #, "(1 | reg | domanID)" , "+" , "(1 | hlt | plntStt)" , "+" , "(1 | soil | musym)" , "+" , "(1 | eco | nlcdCls)" , "+" , "(1 | evo | taxonID" , ")", collapse = "+") formula_reflectance_only <- paste(paste("mvbind(" , paste(colnames(leaf_traits_data)[-1], collapse = " , "), ") ~ ", sep = "") , paste("s(", colnames(reflectance_data)[-1], ")", collapse = " + ", sep = "") , " + " , "(1 | site | siteID" , ")", collapse = "+") #check if positive definite, in case slightly modify it to be invertible, according to: taxaPD <- Matrix::nearPD( species_list$cov_taxa_eff) fit <- brm(list_formulas, data = train_dataset, cores =2, chains = 2, iter = 2000, family=brmsfamily("gaussian"), cov_ranef = list(taxonID = as.matrix(taxaPD$mat))) refl_fit <- brm(formula_reflectance_only, data = train_dataset, cores =2, chains = 2, iter = 6000, family=brmsfamily("gaussian"), prior = prior(horseshoe(), class = "b")) arameters_summary <- VarCorr(refl_fit)
test dataset
test_dataset <- test_data %>% ungroup() %>% dplyr::select(-one_of("taxonID")) %>% inner_join(leaf_traits_data) %>% inner_join(climate_features) %>% inner_join(site_features) %>% inner_join(random_effects_data) %>% inner_join(reflectance_data) %>% inner_join(species_features) %>% inner_join(soil_features) %>% inner_join(status_features) %>% unique test_dataset<- test_dataset[!is.na(test_dataset[["taxonID"]]),] test_dataset[!test_dataset[["taxonID"]] %in% colnames(species_list$cov_taxa_eff),"taxonID"] %>% unique %>% print test_dataset[test_dataset[["taxonID"]]=="ABIES","taxonID"] = "ABBA" test_dataset[test_dataset[["taxonID"]]=="BETUL","taxonID"] = "BELE" test_dataset[test_dataset[["taxonID"]]=="BOURR","taxonID"] = "BOSU2" test_dataset[test_dataset[["taxonID"]]=="FRAXI","taxonID"] = "FRAM2" test_dataset[test_dataset[["taxonID"]]=="HALES","taxonID"] = "HACA3" test_dataset[test_dataset[["taxonID"]]=="NYSY","taxonID"] = "NYBI" test_dataset[test_dataset[["taxonID"]]=="OXYDE","taxonID"] = "OXAR" test_dataset[test_dataset[["taxonID"]]=="PINUS","taxonID"] = "PIEC2" test_dataset[test_dataset[["taxonID"]]=="SASSA","taxonID"] = "SAAL5" test_dataset = test_dataset %>% dplyr::filter(siteID %in% unique(train_dataset$siteID)) test_dataset = test_dataset[complete.cases(test_dataset),]
#test model bR2 <- bayes_R2(fit, newdata= test_dataset, robust = T) print(bR2) parameters_summary <- VarCorr(refl_fit) obj = list(mod = refl_fit, R2 = bR2, params =parameters_summary ) saveRDS(obj, "./outdir/refl_mod.rds")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.