General information

knitr::opts_chunk$set(echo = TRUE)
target_ESG <- "Semiarid_Warm_SandyUplands_LoamyUplands"
user <- "Duniway_et_al_2024" # use this user to get the exact data used for the Duniway et al 2024 STM paper
#user <- "Anna" # use one of these two options to get the most current data
#user <- "VPN"
output_figure_folder <- file.path("C:/Users/aknight/Documents/Telework_Backups/V_drive/ANNA_KNIGHT/ESG/STM/Figures", target_ESG) 

library(dplyr)
library(kableExtra)
library(sf)
# remove.packages("STMdevelopment")
# remotes::install_github("annack84/STMdevelopment@Duniway_et_al_2024") # have to use this branch to get exactly identical results to the manuscript analysis, because a handful of plots got dropped due to missing date problems
library(STMdevelopment)
# library(devtools);load_all()

# consider trying out this function to cite all the loaded packages here automatically
#knitr::write_bib()
# TODO insert dot graphic and text about the status of the ESD (e.g. "Provisional"). Might help
# to create a function to automatically insert the description based on the
# status input
EDIT_map(target_ESG = target_ESG, user = user, maxZoomStyle = "public")

Associated ecological site concepts

This Ecological Site Group (ESG) includes a variety of upland ecological sites that have deeper sandy soils without significant rock content. The group was defined by placing ecological sites into groups by separating soils, climate, and geomorphic features to best differentiate reference vegetation production values and documented ecological states as documented by Nauman et al. (2022).

esds <- esd_data_pull(user=user, target_ESG = target_ESG)
comps <- comp_data_pull(user=user, target_ESG = target_ESG)
num_esds <- nrow(esds)

This ESG includes r num_esds ecological sites. These sites were correlated to the group based on average soil, climate and geomorphic properties of the soil components linked to each ecological site.

linked_esd_names <- esds[,c("ecoclassid","ecoclassname")] %>%
  mutate(ecoclassid = paste0("[", ecoclassid, "](https://edit.jornada.nmsu.edu/catalogs/esd/",
                             substr(ecoclassid, 2, 5), "/", ecoclassid, ")"))

## Associated ecological sites
format_tables_EDIT_style(data = linked_esd_names,
                         col.names=c("ESD Code","ESD Name"),
                         caption = "Ecological Sites Descriptions (ESDs) associated with this Ecological Site Group",
                         row.names = F)
#knitr::kable(x = esds[,c("ecoclassid","ecoclassname")], col.names=c("ESD Code","ESD Name"), caption = NULL,row.names = F)
# create table of dominant plant functional groups and species
esg_prod_list <- esg_production_pull(user=user, target_ESG = target_ESG)

dom_plants_df <- dplyr::tribble(~`Functional group`, ~`Dominant plant species`,
                          "Trees", as.character(esg_prod_list[["Tree.df"]]$names_formatted)[1:5],
                          "Shrubs", as.character(esg_prod_list[["Shrub.df"]]$names_formatted)[1:5],
                          "Grasses",as.character(esg_prod_list[["Grass.df"]]$names_formatted)[1:5],
                          "Forbs", as.character(esg_prod_list[["Forb.df"]]$names_formatted)[1:5])

format_tables_EDIT_style(data = dom_plants_df,
                         caption = "Dominant plant species (top 5) by functional group based on reference state production values in the Ecological Sites associated with this ESG (based on tabulations of ESDs by Nauman et al., 2022).")
#knitr::kable(x = dom_plants_df, caption = "Table 1. Dominant plant species")
#TODO insert "Download full description" link to a PDF

Physiographic features

# TODO create table of landforms, elevations, slopes, water table depth, flooding duration, flooding frequency, ponding frequency
# ideally should be able to tab between US and metric system measurements
landforms <- unlist(strsplit(esds$landfrms[1], split=","))
for(e in 2:nrow(esds)){
  esdlfs <- unlist(strsplit(esds[e,c("landfrms")],split=","))
  landforms <- append(landforms,esdlfs)
}
landforms <- landforms[!landforms %in% "NA"]
lfsum <-  factor(landforms)
lfsum <- reorder(lfsum,lfsum,FUN=function(x) -length(x))
landforms_unique <- unique(landforms)

These sites occur on upland positions with mostly gentle slopes on a variety of landforms (n=r length(landforms_unique)). Some areas can have steeper slopes, but these are not typical of the group (based on tabulations of ESDs by Nauman et al., 2022).

# Slope
slope_ave <- mean(comps@site$slope_r.x)
slope_75th <- unname(quantile(comps@site$slope_r.x, probs = 0.75, na.rm = T))
slope_max <- max(comps@site$slope_r.x, na.rm = T)
slope_sd <- sd(comps@site$slope_r.x, na.rm = T)
slope_field <- paste("Slopes average",format(round(slope_ave, 1), nsmall = 1), 
                     "% with a standard deviation of",format(round(slope_sd, 1), nsmall = 1), 
                     "and are generally less than", format(round(slope_75th, 1), nsmall = 1),
                     "but can be up to", format(round(slope_max, 1), nsmall = 1),"%",sep=" ")

phys.df <- dplyr::tribble(~Item, ~Description,
                          "Landforms (Top 10)", as.character(unique(lfsum))[1:10],
                          "Slope", slope_field,
                          "Flooding Frequency","None",
                          "Aspect", "Aspect is not a significant factor")

format_tables_EDIT_style(data = phys.df, caption = "Representative physiographic features (Nauman et al., 2022; Soil Survey Staff, 2022)")
#knitr::kable(x = phys.df, caption = "Table 2. Representative physiographic features")

Climatic features

Climate is generally semiarid and warm with aridity index (precipitation / potential evapotranspiration) values averaging r format(round(mean(esds$aimean), 2), nsmall = 2) and ranging from r format(round(min(esds$aimean), 2), nsmall = 2) to r format(round(max(esds$aimean), 2), nsmall = 2) (based on tabulations of ESDs by Nauman et al., 2022). The average maximum temperatures (Celsius) of the hottest month range from r format(round(min(esds$maxtempmean), 2), nsmall = 2) to r format(round(max(esds$maxtempmean), 2), nsmall = 2), and minimum temperatures of the coldest month range from r format(round(min(esds$mintempmean), 2), nsmall = 2) to r format(round(max(esds$mintempmean), 2), nsmall = 2). Warm season (June to September) precipitation makes up r format(round(mean(esds$pptrt*100), 0), nsmall = 0)% of total precipitation on average, but can range from r format(round(min(esds$pptrt*100), 0), nsmall = 0)% to r format(round(max(esds$pptrt*100), 0), nsmall = 0)%.

clim.df <- dplyr::tribble(~Item, ~Description,
                          "Frost Free Period (days)", paste(format(round(min(comps@site$ffd_r), 0), nsmall = 0),format(round(mean(comps@site$ffd_r), 0), nsmall = 0),format(round(max(comps@site$ffd_r), 0), nsmall = 0),sep=", "),
                          "Mean Annual Precipitation (in)", paste(format(round(min(comps@site$reannualprecip_r/25.4,na.rm=T), 1), nsmall = 1),format(round(mean(comps@site$reannualprecip_r/25.4,na.rm=T), 1), nsmall = 1),format(round(max(comps@site$reannualprecip_r/25.4,na.rm=T), 1), nsmall = 1),sep=", "))

format_tables_EDIT_style(data = clim.df, caption = "Representative climatic features (min, mean, max, based on tabulations of ESDs by Nauman et al., 2022)")
#knitr::kable(x = clim.df, caption = "Table 3. Representative climatic features (min, mean, max)")
# TODO create line chart and bar charts of monthly high/low precip that you can tab between
# may need to do some zonal stats with PRISM to get monthly values table for all the ESGs
# TODO create line chart and bar charts of monthly high/low temps that you can tab between

Water features

These sites neither benefit significantly from run-in moisture nor experience excessive loss of moisture from runoff (Nauman et al. 2022).

Soil features

Soils in this group are moderately deep or deeper to bedrock and are composed primarily of alluvium and eolian sediments (based on tabulations of ESDs by Nauman et al., 2022). Surface horizons have sand, loamy sand, sandy loam, and loam textures and subsurface horizons are similar but can also include sandy clay loams. Soils are non-saline and non sodic and can have up to 20% calcium carbonate, but generally have less than 6% carbonates. Soil pH ranges from r min(c(round(min(comps@horizons[comps@horizons$hzdept_r<30,c("ph1to1h2o_r")],na.rm=T), 0), round(min(comps@horizons[comps@horizons$hzdept_r>30,c("ph1to1h2o_r")],na.rm=T), 0))) to r max(c(round(max(comps@horizons[comps@horizons$hzdept_r<30,c("ph1to1h2o_r")],na.rm=T), 1), round(max(comps@horizons[comps@horizons$hzdept_r>30,c("ph1to1h2o_r")],na.rm=T), 1))), but is generally closer to 8.0. Water erosion hazard is low and wind erosion hazard is severe.

## Prep soil variables for table
surftext <-  factor(esds$txtnm_surf)
surftext <- reorder(surftext,surftext,FUN=function(x) -length(x))
surftext_uni <- levels(surftext)
subtext <-  factor(esds$txtnm_sub)
subtext <- reorder(subtext,subtext,FUN=function(x) -length(x))
subtext_uni <- levels(subtext)
drainage <- factor(comps@site$drainagecl.x)
drainage <- reorder(drainage,drainage,FUN=function(x) -length(x))
drainage_uni <- levels(drainage)[1:3]

## Make Table of soil descriptors: 
# TODO Could update pH to be based on esd averages instead of comps. Also could update PM to grab ssurgo values
soil.df <- dplyr::tribble(~Item, ~Description,
                          "Parent Material", "alluvium and eolian sediments",
                          "Surface Texture (0-30cm)", surftext_uni,
                          "Subsurface Texture (>30cm)",subtext_uni,
                          "Drainage", drainage_uni,
                          "Soil Depth", paste(format(round(min(esds$depthmean,na.rm=T), 0), nsmall = 0),"-",format(round(max(esds$depthmean,na.rm=T), 0), nsmall = 0), " cm",sep=""),
                          "Surface Rock Content %vol (0-30cm)", paste(format(round(min(esds$rock_surf,na.rm=T), 0), nsmall = 0),"-",format(round(max(esds$rock_surf,na.rm=T), 0), nsmall = 0), "%",sep=""),
                          "Subsurface Rock Content %vol (>30cm)", paste(format(round(min(esds$rock_sub,na.rm=T), 0), nsmall = 0),"-",format(round(max(esds$rock_sub,na.rm=T), 0), nsmall = 0), "%",sep=""),
                          "Surface Electrical Conductivity (0-30cm)", paste(format(round(min(esds$ec_surf,na.rm=T), 0), nsmall = 0),"-",format(round(max(esds$ec_surf,na.rm=T), 1), nsmall = 1), " dS/m",sep=""),
                          "Subsurface Electrical Conductivity (>30cm)", paste(format(round(min(esds$ec_sub,na.rm=T), 0), nsmall = 0),"-",format(round(max(esds$ec_sub,na.rm=T), 1), nsmall = 1), " dS/m",sep=""),
                          "Surface Sodium Adsorption Ratio (0-30cm)", paste(format(round(min(esds$sar_surf,na.rm=T), 0), nsmall = 0),"-",format(round(max(esds$sar_surf,na.rm=T), 1), nsmall = 1),sep=""),
                          "Subsurface Sodium Adsorption Ratio (>30cm)", paste(format(round(min(esds$sar_sub,na.rm=T), 0), nsmall = 0),"-",format(round(max(esds$sar_sub,na.rm=T), 1), nsmall = 1),sep=""),
                          "Surface 1:1 pH (0-30cm)", paste(format(round(min(comps@horizons[comps@horizons$hzdept_r<30,c("ph1to1h2o_r")],na.rm=T), 0), nsmall = 0),"-",format(round(max(comps@horizons[comps@horizons$hzdept_r<30,c("ph1to1h2o_r")],na.rm=T), 1), nsmall = 1),sep=""),
                          "Subsurface 1:1 pH (>30cm)", paste(format(round(min(comps@horizons[comps@horizons$hzdept_r>30,c("ph1to1h2o_r")],na.rm=T), 0), nsmall = 0),"-",format(round(max(comps@horizons[comps@horizons$hzdept_r>30,c("ph1to1h2o_r")],na.rm=T), 1), nsmall = 1),sep=""),
                          "Surface Calcium Carbonate (0-30cm)", paste(format(round(min(esds$caco3_surf,na.rm=T), 0), nsmall = 0),"-",format(round(max(esds$caco3_surf,na.rm=T), 0), nsmall = 0), "%",sep=""),
                          "Subsurface Calcium Carbonate (>30cm)", paste(format(round(min(esds$caco3_sub,na.rm=T), 0), nsmall = 0),"-",format(round(max(esds$caco3_sub,na.rm=T), 0), nsmall = 0), "%",sep=""))

format_tables_EDIT_style(data = soil.df, caption = "Representative soil features (Nauman et al., 2022; Soil Survey Staff, 2022)")
#knitr::kable(x = soil.df, caption = "Table 4. Representative soil features")

Soil Component data

Soil types correlated to this group include r length(unique(comps@site$cokey)) different components mapped in SSURGO (Soil Survey Staff, 2022). Soil taxonomic units include:

cat(sort(unique(comps@site$compname)), sep = ", ")

Component level soil property depth plots

agg <- aqp::slab(comps, fm= ~ claytotal_r + silttotal_r + sandtotal_r + ec_r + caco3_r)

lattice::xyplot(top ~ p.q50 | variable, data=agg, ylab='Depth (cm)',
             xlab='median bounded by 25th and 75th percentiles',
             lower=agg$p.q25, upper=agg$p.q75, ylim=c(201,-2),
             panel=aqp::panel.depth_function,
             alpha=0.25, sync.colors=TRUE,
             par.settings=list(superpose.line=list(col='RoyalBlue', lwd=2)),
             prepanel=aqp::prepanel.depth_function,
             cf=agg$contributing_fraction, cf.col='black', cf.interval=20, 
             layout=c(5,1), strip=lattice::strip.custom(bg=grey(0.8)),
             scales=list(x=list(tick.number=4, alternating=3, relation='free'))
             )

Ecological dynamics

These sites have high risk for wind erosion and dune mobilization, especially with drought or large disturbances. Water erosion issues are possible. Compared to reference states, alternative states may have annual invasion, woody encroachment, and/or loss of perennial grasses (Nauman et al., 2022).

State and transition model development

# TODO update with the newly tabulated ESD STMs from Travis, then set to eval = TRUE
apriori_stms <- read.csv(data_file_paths(user)$apriori_stms, stringsAsFactors = F,
                         na.strings = c("NA", "", " "))

apriori_stms_target <- select(apriori_stms, any_of(c("State", "Plant_community", target_ESG)))
n_ESDs <- apriori_stms_target[apriori_stms_target$Plant_community=="n_ESDs", target_ESG]
colnames(apriori_stms_target) <- c("State", "Plant community", "Proportion of ESDs containing this plant community")

format_tables_EDIT_style(data = apriori_stms_target[-nrow(apriori_stms_target),],
                         caption = paste("State and plant community summary from Ecological Sites within this ESG.",
                                         n_ESDs, "complete STMs were available to include.")) %>%
  collapse_rows(columns = 1, valign = "top")

Reference state production indicators

## Plot of reference production indicators used in paper
resp_vars <- colnames(esds)[1:21]
resp_vars <- resp_vars[!resp_vars %in% c("ecoclassid")]
par(mar = c(4,8,4,2)) ## This works when knitted
boxplot(esds[,resp_vars], horizontal = TRUE,las = 1, main = "Reference Production (lbs/Acre)")
# by functional group cover
indicators <- c("AH_C3NativePerenGrassCover",
                "AH_C3IntroducedPerenGrassCover",
                "AH_C4NativePerenGrassCover",
                "AH_C4IntroducedPerenGrassCover",
                "AH_NativePerenForbCover", # Native perennial forbs
                "AH_IntroducedPerenForbCover", # Non-native perennial forbs
                "AH_NativeAnnGrassCover", # Native annual grasses
                "AH_IntroducedAnnGrassCover", # Non-native annual grasses
                "AH_NativeAnnForbCover", # Native annual forbs
                "AH_IntroducedAnnForbCover", # Non-native annual forbs
                "AH_ArtemisiaTridentataCover", # All big sagebrush species lumped together
                "BareSoilCover", # Bare soil
                "FH_TotalLitterCover", # Litter
                #"CA_percent_100plus", # Canopy gaps > 100 cm
                #"CA_percent_200plus",
                "FH_LichenCover", # Lichen + moss combined cover
                "FH_MossCover"#,
                #"SoilStab_all" # to represent cyano abundance... I think this is only marginally a community variable. If used in hierarchical clustering and Bray-Curtis NMDS, will need to standardize it to make units more similar to % cover units
                )

ann_grass_by_spp <- F
ann_forb_by_spp <- F
per_grass_by_spp <- F
per_forb_by_spp <- F
succulent_by_spp <- F
shrub_by_spp <- T # All shrubs and sub-shrubs by species
subshrub_by_spp <- T
tree_by_spp <- T # All trees by species
opuntia_combined <- T

data_sources <- c(#"BadgerWash",
                             #"CRCMonitoring", # drop if not needed for spatial representation
                             "IM_NCPN",
                             "LMF",
                             "NRI",
                             #"Parashant", # drop if not needed for spatial representation
                             #"JFSP",
                             "AIM"#, 
                             #"VanScoyocThesis" # drop if not needed for spatial representation
                           )

impute_gap_type <- NULL

plot_data <- plot_data_pull(user=user,
                             target_ESG = target_ESG,
                           data_sources = data_sources,
                           indicators = indicators,
                           ann_grass_by_spp = ann_grass_by_spp,
                           ann_forb_by_spp = ann_forb_by_spp,
                           per_grass_by_spp = per_grass_by_spp,
                           per_forb_by_spp = per_forb_by_spp,
                           succulent_by_spp = succulent_by_spp,
                           shrub_by_spp = shrub_by_spp,
                           subshrub_by_spp = subshrub_by_spp,
                           tree_by_spp = tree_by_spp,
                           opuntia_combined = opuntia_combined#,
                           #impute_gap_type = impute_gap_type
                           ) %>%
  filter(!is.na(Month))
# TODO update I&M species codes to match current USDA Plants codes! probably a step for the plot_data_pull function?

# remove incomplete rows - ordination won't work with NAs
plot_data_clean <- na.omit(plot_data) %>%
  # NRI has a separate plot code for each time a plot was sampled, but all other data sets have the same plot
  # code for all sampling times. Make a similar code for NRI.
  mutate(PlotCode_NoYear = ifelse(test = SourceKey == "NRI_UCRB",
                                  yes = paste0(SourceKey, "_",
                                               SiteName, "_",
                                               substr(PlotID, 5, nchar(PlotID))),
                                  no = PlotCode))

# Some plots were sampled multiple times - I'll include only the first year that a plot has complete data
plot_data_first <- plot_data_clean %>% #plot_data_clean %>%
  dplyr::group_by(PlotCode_NoYear) %>%
  dplyr::arrange(.data=., Year, .by_group = TRUE) %>%
  dplyr::filter(dplyr::row_number()==1) %>%
  dplyr::ungroup()

# remove plots with SGU probability (i.e. certainty of prediction) in the 10th percentile
SGU_prob_raster <- raster::raster(data_file_paths(user)$sgu_probability_raster)

file_paths <- data_file_paths(user)
plot_locations <- sf::st_read(dsn = file.path(file_paths$plotnet_processed, "PlotLocations"),
                              layer = "all_plot-years_2022-11-08",
                              quiet=TRUE) %>%
  distinct()

if("NRI" %in% data_sources){
  nri_locations <- sf::st_read(dsn = file_paths$nri,
                               layer = "NRI_UCRB_plot-years_2022-11-03",
                               quiet = TRUE)

  plot_locations <- dplyr::filter(plot_locations, !grepl(pattern = "^NRI_",
                                                         x=PlotCode)) %>%
    dplyr::bind_rows(., nri_locations) %>%
    distinct()
}

plot_SGU_prob <- sf::st_as_sf(raster::extract(x = SGU_prob_raster,
                               y = plot_locations,
                               sp = T))

plot_data_first <- left_join(plot_data_first, select(plot_SGU_prob, PlotCode, UCRB_SGUs_ProbMax)) %>%
  distinct() 

certainty_cutoff <- quantile(plot_data_first$UCRB_SGUs_ProbMax, probs = 0.1, na.rm = T)

plot_data_first <- filter(plot_data_first, UCRB_SGUs_ProbMax > certainty_cutoff)

# make the clustering and ordination data frame - can't include ANY columns except the actual variables!
ord.df <- dplyr::select(plot_data_first, -SourceKey, -PlotID, -SiteName, -PlotName,
                        -Year, -Longitude_NAD83, -Latitude_NAD83,
                        -Month, -Day,
                        -UCRB_SGUs_ProbMax, -geometry, -PlotCode_NoYear
                        ) %>%
  tibble::column_to_rownames("PlotCode") # keep an ID code for the plot as the row name to prevent data scrambling problems

# If using soil stability, standardize variables to make units more similar
# Here, dividing by the max possible value for each measurement type (cover values
# divided by 100, soil stability divided by 6)
if("SoilStab_all" %in% indicators){
  ord.df.cover <- select(ord.df, -SoilStab_all)/100 
  ord.df.soilstab <- select(ord.df, SoilStab_all)/6
  ord.df <- cbind(ord.df.cover, ord.df.soilstab)
}

# remove rare species
sp_pa <- mutate(rowwise(ord.df), across(everything(), function(x){if(x>0){1}else{0}})) # make a presence/absence data frame
sp_keep <- names(colSums(sp_pa)[which(colSums(sp_pa)>2)]) 
ord.df <- select(ord.df, all_of(sp_keep))

# remove unknown species if present
ord.df <- select(ord.df, -any_of(c("UNKS", "UNKPG", "UNKAF", "UNKPF", "PG1", "PG01", "AF1")))

# # save clustering data for other analyses - exclude location data to keep kosher
# # with the data sharing restrictions
# ord.df.share <- select(plot_data_first, all_of(c("SourceKey", "PlotCode", "Year", "Month", "Day", "UCRB_SGUs_ProbMax", colnames(ord.df))))
# write.csv(ord.df.share,
#           file = paste0("C:/Users/aknight/Documents/Telework_Backups/V_drive/ANNA_KNIGHT/ESG/STM/Data/Analysis_ready_data/contains_NRI_do_not_sync/",
#                         target_ESG, "_FANNYInputData_", Sys.Date(), ".csv"),
#           row.names = F)

Field data from were collected by field crews with the Natural Resource Conservation Service's (NRCS) National Resource Inventory (NRI) program (Nusser and Goebel, 1997), the Bureau of Land Management's (BLM) Assessment, Inventory, and Monitoring (AIM) and Landscape Monitoring Framework (LMF) programs (Toevs et al. 2011), and the National Park Service's (NPS) Inventory and Monitoring Northern Colorado Plateau Network (NCPN) program (Witwicki et al. 2017) between r min(plot_data_clean$Year) and r max(plot_data_clean$Year). Plots within the target ESG with SGU classification probability less than or equal to the 10th percentile (r certainty_cutoff%) were excluded, resulting in a total of r nrow(ord.df) plots for this ESG. Plant and soil cover were measured using the line-point intercept method (Herrick et al., 2017). Plot-level indicators (Table \@ref(tab:doc-plot-indicators)) were calculated using the terradactyl package in R (McCord et al., 2022). When plots were sampled in multiple years, only the first sampling year was used in STM development. r if(!is.null(impute_gap_type)){paste("For plots that were missing annual & perennial gap data but had perennial-only gap data (primarily I&M-NCPN), annual & perennial gap values were imputed from perennial-only gaps and annual herbaceous cover using a linear model built from LMF and NRI plots that had both types of gap (r-squared of 0.83 and 0.81 for 100-cm and 200-cm gap predictions, respectively).")}

rm(plot_data_clean, plot_data)

indicator_descriptions <- make_indicator_descriptions(indicators = indicators,
                                                      shrub_by_spp = shrub_by_spp,
                                                      subshrub_by_spp = subshrub_by_spp,
                                                      tree_by_spp = tree_by_spp,
                                                      opuntia_combined = opuntia_combined)


format_tables_EDIT_style(data = indicator_descriptions[,-1],
                         caption = "Indicators used in ordination and clustering analysis to develop plant communities") %>%
  collapse_rows(columns = 1, valign = "top")
# are the data clusterable? Use the Hopkin's statistic (Lawson, Richard, and Jurs, 1990; see https://www.datanovia.com/en/lessons/assessing-clustering-tendency/ for explanation)
# H values > 0.5 indicate clustering tendency (i.e. if H is below 0.5, the data set is uniformly distributed and clusters are not meaningful)

res_LoamyUplands <- factoextra::get_clust_tendency(data = ord.df, n=nrow(ord.df)-1, graph = FALSE)
res_LoamyUplands$hopkins_stat
# cluster and indicator species analysis
library(cluster) # for cluster analysis
library(dendextend)
library(labdsv)
library(foreach)
library(vegan)
library(ggplot2)

# initial clustering
dist.df <- vegan::vegdist(ord.df, method = "bray", na.rm = T) # creates Bray-Curtis distance matrix
# This chunk determines the best combination of number of clusters and fuzziness for the data. It is SLOOOOWWW so better to run it before knitting to get the right clustering parameters and then not run it when knitting the descriptive document.
library(parallel)
library(doParallel)

# get index values for all combos of membership exponent and number of clusters

indices_arg <- c("ch", #"duda", won't work with the ranking scheme 
                 "cindex",
             #"gamma", # takes a long time
             #"beale", # unclear how this one works
             "ccc", "ptbiserial",
             #"gplus", # takes a long time
             "db", "silhouette", "dunn" #,
             #"gap" # takes a long time 
             ) 

indices_name <- c("CH", #"Duda",
                  "Cindex",
             #"gamma", # takes a long time
             #"Beale",
             "CCC", "Ptbiserial",
             #"gplus", # takes a long time
             "DB", "Silhouette", "Dunn" #,
             #"gap" # takes a long time 
             )

memb_exps <- seq(1.1,
                 1.3, # start with 2.0, can reduce value to rerun if some larger values don't give results
                 by = 0.1)

cl <- makeCluster(spec = 3, # number of cores to use
                  type = "PSOCK",
                  methods = FALSE)
registerDoParallel(cl)

cluster_eval_fanny <- foreach(memb_exp=memb_exps,
                              .packages = c("STMdevelopment", "dplyr", "cluster"),
                              .combine = "rbind") %dopar% {
      nbmetrics_fanny_temp <- NbClust_fanny(data = ord.df,
              diss = dist.df, #chord.df,
              distance = NULL,
              min.nc = 2,
              max.nc = 12,
              method = "fanny",
              index = "all", # could also try alllong to get a few more metrics - this is SLOW
              alphaBeale = 0.1,
              memb.exp= memb_exp # 1.2
              )

    cluster_eval_fanny_temp <- nbmetrics_fanny_temp$All.index %>%
      as.data.frame() %>%
      mutate(Number_clusters = rownames(.),
             Memb_exp = memb_exp)
}

registerDoSEQ()
stopCluster(cl)

cluster_eval_fanny_indices <- select(cluster_eval_fanny, any_of(c("Memb_exp", "Number_clusters", indices_name)))

# calculate percent worst than best value for each memb. exp. X num. clusters combo for each index

pct_off_best_higher <- function(x, all_vals){
  best <- max(all_vals)
  pct_off <- ((best-x)/best)*100
  return(pct_off)
}

pct_off_best_lower <- function(x, all_vals){ 
  best <- min(all_vals)
  pct_off <- ((x-best)/best)*100
  return(pct_off)
}

cluster_eval_fanny_indices_rank <- cluster_eval_fanny_indices %>%
  rowwise() %>%
  mutate(CH_rank = pct_off_best_higher(CH, cluster_eval_fanny_indices$CH),
         Cindex_rank = pct_off_best_lower(Cindex, cluster_eval_fanny_indices$Cindex),
         CCC_rank = pct_off_best_higher(CCC, cluster_eval_fanny_indices$CCC),
         Ptbiserial_rank = pct_off_best_higher(Ptbiserial, cluster_eval_fanny_indices$Ptbiserial),
         DB_rank = pct_off_best_lower(DB, cluster_eval_fanny_indices$DB),
         Silhouette_rank = pct_off_best_higher(Silhouette, cluster_eval_fanny_indices$Silhouette),
         Dunn_rank = pct_off_best_higher(Dunn, cluster_eval_fanny_indices$Dunn)
         ) %>%
  mutate(overall_rank = sum(CH_rank, # lowest overall rank wins!
                            Cindex_rank,
                            CCC_rank,
                            Ptbiserial_rank,
                            DB_rank,
                            Silhouette_rank,
                            Dunn_rank,
                            na.rm = T))

best_n_clust <- as.numeric(cluster_eval_fanny_indices_rank[[which.min(cluster_eval_fanny_indices_rank$overall_rank), "Number_clusters"]])
best_memb_exp <- as.numeric(cluster_eval_fanny_indices_rank[[which.min(cluster_eval_fanny_indices_rank$overall_rank), "Memb_exp"]])

# VISUALLY CHECK for ties and ranking weirdness before proceeding!!
k_groups_fuzzy <- 4 # best 4 (rank 83), second best 10 (rank 87), third best 9 (rank 87), fourth best 7 (rank 92; all with 1.1 memb_exp)
memb_exp <- 1.1

set.seed(1)
clust_fuzz <- fanny(x=dist.df,
                    k = k_groups_fuzzy, # 2
                    diss = T,
                    #metric = "euclidean",
                    memb.exp = # this affects how crisp/fuzzy the clusters are - closer to 1 is crisper
                      memb_exp # 1.1
                    )


# grp_fuzz <- clust_fuzz$clustering
# plot_data_first$PlantCommunity_fuzzy_original <- as.factor(grp_fuzz)
# 
# k_groups_fuzzy <- 10 # best 4 (rank 83), second best 10 (rank 87), third best 9 (rank 87), fourth best 7 (rank 92; all with 1.1 memb_exp)
# memb_exp <- 1.1
# 
# set.seed(1)
# clust_fuzz <- fanny(x=dist.df,
#                     k = k_groups_fuzzy, # 2
#                     diss = T,
#                     #metric = "euclidean",
#                     memb.exp = # this affects how crisp/fuzzy the clusters are - closer to 1 is crisper
#                       memb_exp # 1.1
#                     )


grp_fuzz <- clust_fuzz$clustering
plot_data_first$PlantCommunity_fuzzy <- as.factor(grp_fuzz)

Analyses were performed in R 4.2.2 (R Core Team, 2022). Plots were clustered into ecological communities using the fuzzy analysis clustering algorithm (FANNY, Kaufman and Rousseeuw, 1990) in the "cluster" R package (Maechler et al., 2021) with Bray-Curtis distance ("vegan" package; Oksanen et al., 2022). A suite of clustering indices were used to determine the most appropriate number of clusters and degree of fuzziness (i.e. membership exponent), including Calinski and Harabasz (1974), C-index (Hubert and Levin, 1976), cubic clustering criteria (Sarle, 1983), point-biserial correlation (Milligan, 1980, 1981), Davies and Bouldin (1979), silhouette width (Rousseeuw, 1987), and Dunn's index (1974). Values between 1 and 12 possible clusters and 1.1 to 1.3 membership exponents were tested. The final clustering solution used r k_groups_fuzzy clusters with a membership exponent of r memb_exp.

# another slow chunk - run before knitting and then don't evaluate in the knit step.
NMDS_scree(ord.df)
ord_dims <- 3 # change manually based on the plot results

#run NMS ordination 
set.seed(1)
ord <- metaMDS(ord.df,
               k=ord_dims, # number of dimensions
               trymax = 30) 

The clustering results were visualized in ordination spaced, using non-metric multidimensional scaling (NMDS) with Bray-Curtis distance ("vegan" package in R; Oksanen et al., 2022). Based on testing ordination stress for 1 to 5 dimensions, ordination stress is acceptable with r ord_dims dimensions, and stress reduction is minimal with additional dimensions. The stress for the ordination with r ord_dims dimensions is r round(ord$stress, 3).

# 2D plotting
pal_veg2 <- c("#4E93CB", "#357832", "#66346D", "#FF9A35")
# pal_veg2 <- RColorBrewer::brewer.pal(n=k_groups_fuzzy+1, name = "Set1")[2:5]
# pal_veg2 <- RColorBrewer::brewer.pal(n=k_groups_fuzzy, name = "Dark2")
# pal_veg2 <- RColorBrewer::brewer.pal(n=k_groups_fuzzy, name = "Set3")

# manually update based on indicator species
group_labels_base <- c(paste0("PJ & gambel oak
woodland"),
                        paste0("Sagebrush & C3 perennial
grass shrubland"),
                        paste0("Non-native annual grass &
sagebrush shrubland"),
                        paste0("Perennial grassland with
scattered shrubs"))

group_labels_fuzzy <- paste0(group_labels_base,
                               " (n = ",
                               c(nrow(filter(plot_data_first, PlantCommunity_fuzzy==1)),
                                 nrow(filter(plot_data_first, PlantCommunity_fuzzy==2)),
                                 nrow(filter(plot_data_first, PlantCommunity_fuzzy==3)),
                                 nrow(filter(plot_data_first, PlantCommunity_fuzzy==4))
                                  ),
                               ")")

# group_labels_fuzzy <- c(paste0("PJ & gambel oak
# woodland (n = ", nrow(filter(plot_data_first, PlantCommunity_fuzzy==1)), ")"),
#                         paste0("Sagebrush & C3 perennial
# grass shrubland (n = ", nrow(filter(plot_data_first, PlantCommunity_fuzzy==2)), ")"),
#                         paste0("Non-native annual grass &
# sagebrush shrubland (n = ", nrow(filter(plot_data_first, PlantCommunity_fuzzy==3)), ")"),
#                         paste0("C3 perennial grassland
# (n = ", nrow(filter(plot_data_first, PlantCommunity_fuzzy==4)), ")"))

#  group_labels_fuzzy <- as.character(1:clust_fuzz$k.crisp)

par(mfrow=c(2,2))
# Axes 1x2
plot(ord, choices = c(1,2), type = "n", # plot the axes
     xlim = c(-1, 1),
     ylim = c(-1, 1))
points(ord, choices = c(1,2), display = "sites", # plot points - can choose "sites" or "species"
       col=pal_veg2[plot_data_first$PlantCommunity_fuzzy],
       pch = 21, cex = .6, bg=pal_veg2[plot_data_first$PlantCommunity_fuzzy])
ordiellipse(ord, plot_data_first$PlantCommunity_fuzzy, col=pal_veg2, lwd = 2, label = F,
            choices = c(1,2)) # plot your groups
# can use oriellipse, orihull, or ordispider to plot groups, depending on your needs

# Axes 3x2
plot(ord, choices = c(3,2), type = "n", 
     xlim = c(-1, 1),
     ylim = c(-1, 1))
points(ord, choices = c(3,2), display = "sites", 
       col=pal_veg2[plot_data_first$PlantCommunity_fuzzy],
       pch = 21, cex = .6, bg=pal_veg2[plot_data_first$PlantCommunity_fuzzy])
ordiellipse(ord, plot_data_first$PlantCommunity_fuzzy, col=pal_veg2, lwd = 2, label = F,
            choices = c(3,2)) 

# Axes 1x3
plot(ord, choices = c(1,3), type = "n",
     xlim = c(-1, 1),
     ylim = c(-1, 1))
points(ord, choices = c(1,3), display = "sites",
       col=pal_veg2[plot_data_first$PlantCommunity_fuzzy],
       pch = 21, cex = .6, bg=pal_veg2[plot_data_first$PlantCommunity_fuzzy])
ordiellipse(ord, plot_data_first$PlantCommunity_fuzzy, col=pal_veg2, lwd = 2, label = F,
            choices = c(1,3)) 

# Legend
plot(ord, type = "n", axes=FALSE,
     display = "sites",
     col=pal_veg2[plot_data_first$PlantCommunity_fuzzy],
     xlab = "",
     ylab = "")
legend(x="center",
       legend = group_labels_fuzzy,
       fill = pal_veg2,
       title = "Plant communities",
       bty = "n",
       y.intersp = 1.5)

par(mfrow=c(1,1))
# save clustering results
fuzzy_cluster_membership <- as.data.frame(clust_fuzz$membership)
fuzzy_cluster_membership$PlotCode <- row.names(clust_fuzz$membership)
fuzzy_cluster_membership$best_group <- clust_fuzz$clustering

colnames(fuzzy_cluster_membership) <- c(paste0("Group_", 1:k_groups_fuzzy, "_membership"), "PlotCode", "Best_group")

group_names <- data.frame(Best_group = 1:k_groups_fuzzy,
                          Best_group_name = c("PJ & gambel oak woodland",
                                         "Sagebrush & C3 perennial grass shrubland",
                                         "Non-native annual grass & sagebrush shrubland",
                                         "C3 perennial grassland")
                          # Best_group_name = as.character(1:k_groups_fuzzy)
                          )
fuzzy_cluster_membership <- left_join(fuzzy_cluster_membership, group_names)

# get the plot location and date sampled (use first date)
plot_locations_first <- plot_locations %>%
  mutate(DateSampled = as.Date(paste(Year, Month, Day, sep = "-"))) %>%
  arrange(DateSampled) %>%
  group_by(PlotCode) %>%
  summarise(DateSampled = first(DateSampled))

fuzzy_cluster_memb_sf <- left_join(fuzzy_cluster_membership, plot_locations_first) %>% 
  st_as_sf(., crs = st_crs(plot_locations))

# # save plots - usually only need to run this once
# fuzzy_cluster_memb_sf$DateSampled <- as.character(fuzzy_cluster_memb_sf$DateSampled)
# 
# sf::st_write(fuzzy_cluster_memb_sf,
#              dsn = "C:/Users/aknight/Documents/Telework_Backups/V_drive/ANNA_KNIGHT/ESG/Maps/contains_NRI_do_not_sync",
#              layer = paste0("SW_SandyLoamyUplands_fuzzyclusters_", Sys.Date(), ".shp"),
#              driver = "ESRI Shapefile")
# 
# fuzzy_cluster_memb_sf_shareable <- filter(fuzzy_cluster_memb_sf, !grepl(pattern = "NRI", x=PlotCode))
# 
# sf::st_write(fuzzy_cluster_memb_sf_shareable,
#              dsn = "C:/Users/aknight/Documents/Telework_Backups/V_drive/ANNA_KNIGHT/ESG/Maps",
#              layer = paste0("SW_SandyLoamyUplands_fuzzyclust_share_", Sys.Date(), ".shp"),
#              driver = "ESRI Shapefile")
#
# states <- sf::st_read("C:/Users/aknight/Documents/Telework_Backups/V_drive/ANNA_KNIGHT/GIS/Political boundaries/cb_2018_us_state_5m/cb_2018_us_state_5m.shp")
# fourcorners <- filter(states, NAME %in% c("Utah", "Colorado", "Arizona", "New Mexico"))
# 
# fuzzy_cluster_memb_sf_mike <- sf::st_filter(sf::st_as_sf(fuzzy_cluster_memb_sf, crs=sf::st_crs(plot_locations)),
#                                             sf::st_transform(fourcorners, crs=sf::st_crs(plot_locations))) %>%
#   mutate(Source = ifelse(test = grepl(x=PlotCode, pattern = "^AIM"),
#                          yes = "AIM",
#                          no = NA)) %>%
#   mutate(Source = ifelse(test = grepl(x=PlotCode, pattern = "^LMF"),
#                          yes = "LMF",
#                          no = Source)) %>%
#   mutate(Source = ifelse(test = grepl(x=PlotCode, pattern = "^IM_NCPN"),
#                          yes = "NPS",
#                          no = Source)) %>%
#   mutate(Source = ifelse(test = grepl(x=PlotCode, pattern = "^NRI"),
#                          yes = "NRI",
#                          no = Source))
# 
# sf::st_write(fuzzy_cluster_memb_sf_mike,
#              dsn = "C:/Users/aknight/Documents/Telework_Backups/V_drive/ANNA_KNIGHT/ESG/Maps/contains_NRI_do_not_sync",
#              layer = paste0("SW_SandyLoamyUplands_fuzzyclust_mike_", Sys.Date(), ".shp"),
#              driver = "ESRI Shapefile")
# 
# fuzzy_cluster_memb_sf_NRIonly <- filter(fuzzy_cluster_memb_sf, grepl(pattern = "NRI", x=PlotCode))
# 
# sf::st_write(fuzzy_cluster_memb_sf_NRIonly,
#              dsn = "C:/Users/aknight/Documents/Telework_Backups/V_drive/ANNA_KNIGHT/ESG/Maps/contains_NRI_do_not_sync",
#              layer = paste0("SW_SandyLoamyUplands_fuzzyclust_NRIonly_", Sys.Date(), ".shp"),
#              driver = "ESRI Shapefile")

State and transition model

CUSTOM DIAGRAM

# create custom diagram of states in PowerPoint. Save as a JPG to load in here.
# State/community naming conventions need to be determined.
# Species get listed in the descriptions in decreasing order of mean cover for that community
knitr::include_graphics("C:/Users/aknight/Documents/Telework_Backups/V_drive/ANNA_KNIGHT/ESG/STM/Analyses/Semiarid_Warm_SandyLoamyUplands_boxandarrow_Duniway_et_al_2023_supplemental.JPG")
knitr::include_graphics("C:/Users/aknight/Documents/Telework_Backups/V_drive/ANNA_KNIGHT/ESG/STM/Analyses/Semiarid_Warm_SandyLoamyUplands_boxandarrowlegend_Duniway_et_al_2023_supplemental.JPG")

The data used to develop this STM were sourced from large monitoring data sets (Nusser and Goebel, 1997; Toevs et al., 2011; Witwicki et al., 2017) designed to sample landscapes with a variety of current and historic land use. The model does not describe reference communities because these data do not include targeted sampling of relict plant communities with histories of little alteration to disturbance regimes. All states described here contain some introduced species (e.g. Bromus tectorum, Alyssum desertorum, Agropyron cristatum; see Tables \@ref(tab:comm1-1-dominant-spp), \@ref(tab:comm2-1-dominant-spp), \@ref(tab:comm3-1-dominant-spp), and \@ref(tab:comm4-1-dominant-spp) for details).

Transition and restoration pathways and drivers were derived by tabulating the transitions and drivers for similar states in the Ecological Site Description STMs linked to this ESG (Table \@ref(tab:assoc-eco-sites), Duniway et al. in review). For further information on reference states as well as drivers of transitions, see the Ecological Site Descriptions for the sites listed in Table \@ref(tab:assoc-eco-sites).

State descriptive figures

state_labels_base <- c(paste0("Open woodland
"),
                        paste0("Shrubland
"),
                        paste0("Invaded
"),
                        paste0("Grassland
"))

state_labels_fuzzy <- paste0(state_labels_base,
                               " (n = ",
                               c(nrow(filter(plot_data_first, PlantCommunity_fuzzy==1)),
                                 nrow(filter(plot_data_first, PlantCommunity_fuzzy==2)),
                                 nrow(filter(plot_data_first, PlantCommunity_fuzzy==3)),
                                 nrow(filter(plot_data_first, PlantCommunity_fuzzy==4))
                                  ),
                               ")")
# load descriptive data
plot_data_descriptive <- plot_data_pull(user=user,
                                        target_ESG = target_ESG,
                                        data_sources = data_sources,
                                        indicators = c("TotalFoliarCover",
                                                       "AH_WoodyCover",
                                                       "AH_ShrubCover",
                                                       "AH_SubShrubCover",
                                                       # TODO what to do about subshrubs??
                                                       "AH_TreeCover",
                                                       "AH_ForbGrassCover",
                                                       "AH_PerenForbGrassCover",
                                                       "AH_AnnForbGrassCover",
                                                       "SoilStab_all",
                                                       "AH_ArtemisiaTridentataCover",
                                                       "FH_CyanobacteriaCover",
                                                       "CA_percent_200plus",
                                                       "CA_percent_100plus",
                                                       "CP_percent_200plus",
                                                       "CP_percent_100plus",
                                                       "AH_NativeCover",
                                                       "AH_PerenGrassCover",
                                                       "AH_GrassCover",
                                                       "AH_AnnGrassCover",
                                                       "AH_ForbCover",
                                                       "BareSoilCover"
                                                       ),
                                        ann_grass_by_spp = T,
                                        ann_forb_by_spp = T,
                                        per_grass_by_spp = T,
                                        per_forb_by_spp = T,
                                        succulent_by_spp = T,
                                        shrub_by_spp = T,
                                        subshrub_by_spp = T,
                                        tree_by_spp = T,
                                        opuntia_combined = F,
                                        impute_gap_type = NULL #c("CA_percent_100plus", "CA_percent_200plus")
) %>%
  filter(!is.na(Month)) %>%
  left_join(select(plot_data_first, PlotCode, Year, PlantCommunity_fuzzy) , .)

colnames(plot_data_descriptive)[which(colnames(plot_data_descriptive)=="AH_ArtemisiaTridentataCover")] <- "ARTR2"

# summarize species occurrance
descriptive.df <- dplyr::select(plot_data_descriptive, -SourceKey, -PlotID, -SiteName, -PlotName,
                        -Year, -Longitude_NAD83, -Latitude_NAD83,
                        -Month, -Day, -PlantCommunity_fuzzy, -TotalFoliarCover,
                        -AH_WoodyCover, -AH_ShrubCover, -AH_SubShrubCover, -AH_TreeCover, -AH_ForbGrassCover, 
                        -AH_PerenForbGrassCover, -AH_AnnForbGrassCover,
                        -SoilStab_all, -FH_CyanobacteriaCover, -CA_percent_200plus, -CA_percent_100plus,
                        -CP_percent_200plus, -CP_percent_100plus,
                        -AH_NativeCover, -AH_PerenGrassCover, -AH_GrassCover,
                        -AH_AnnGrassCover, -AH_ForbCover, -BareSoilCover
                        ) %>%
  tibble::column_to_rownames("PlotCode") # keep an ID code for the plot as the row name to prevent data scrambling problems

sp_pa_desc <- mutate(rowwise(descriptive.df), across(everything(), function(x){if(x>0){1}else{0}})) # make a presence/absence data frame
sp_keep_desc <- names(colSums(sp_pa_desc)[which(colSums(sp_pa_desc)>0)]) 
descriptive.df <- select(descriptive.df, all_of(sp_keep_desc))

plot_data_descriptive <- select(plot_data_descriptive, SourceKey, PlotID, SiteName, PlotName,
                        Year, PlotCode, Longitude_NAD83, Latitude_NAD83,
                        Month, Day, PlantCommunity_fuzzy, TotalFoliarCover,
                        AH_WoodyCover, AH_ShrubCover, AH_SubShrubCover, AH_TreeCover, AH_ForbGrassCover, 
                        AH_PerenForbGrassCover, AH_AnnForbGrassCover,
                        SoilStab_all, FH_CyanobacteriaCover, CA_percent_100plus, CA_percent_200plus,
                        CP_percent_100plus, CP_percent_200plus,
                        AH_NativeCover, AH_PerenGrassCover, AH_GrassCover, AH_AnnGrassCover, 
                        AH_ForbCover, BareSoilCover, all_of(sp_keep_desc))

#ord.df <- select(ord.df, -any_of(c("UNKS", "UNKPG", "UNKAF", "UNKPF", "PG1", "PG01", "AF1")))

# estimate annual + perennial gaps for plots with only perennial gaps
plot_data_descriptive <- mutate(plot_data_descriptive,
                                CA_percent_100plus_est = estimate_allgaps(gap_type = "100plus",
                                                                          perennial_gaps = CP_percent_100plus,
                                                                          AG = AH_AnnGrassCover,
                                                                          PG = AH_PerenGrassCover,
                                                                          SH = AH_ShrubCover,
                                                                          Forb = AH_ForbCover),
                                CA_percent_200plus_est = estimate_allgaps(gap_type = "200plus",
                                                                          perennial_gaps = CP_percent_200plus,
                                                                          AG = AH_AnnGrassCover,
                                                                          PG = AH_PerenGrassCover,
                                                                          SH = AH_ShrubCover,
                                                                          Forb = AH_ForbCover)
                                ) %>% 
  mutate(CA_percent_100plus = ifelse(test = is.na(CA_percent_100plus),
                                     yes = ifelse(test = CA_percent_100plus_est >=0,
                                                  yes = CA_percent_100plus_est,
                                                  no = 0), # round negative estimates to 0
                                     no = CA_percent_100plus
                                     ),
         CA_percent_200plus = ifelse(test = is.na(CA_percent_200plus),
                                     yes = ifelse(test = CA_percent_200plus_est >=0,
                                                  yes = CA_percent_200plus_est,
                                                  no = 0), # round negative estimates to 0
                                     no = CA_percent_200plus
                                     )
         )

# report how many plots have estimated canopy gaps (rather than measure)
plot_data_descriptive %>% 
  group_by(PlantCommunity_fuzzy) %>% 
  summarise(n = n(),
            n_est = length(which(CA_percent_200plus==CA_percent_200plus_est)),
            n_NPS_est = length(which(CA_percent_200plus==CA_percent_200plus_est & SourceKey=="IM_NCPN")))

# NCPN woodland sampling stratum doesn't do gaps - how many plots are affected?
plot_data_descriptive %>% 
  group_by(SourceKey) %>% 
  summarise(n = n(),
            n_nogapdata = length(which(is.na(CA_percent_100plus))),
            pct_nogapdata = length(which(is.na(CA_percent_100plus)))/n())

## load climate data
AI_raster <- raster::raster(file_paths$ai_raster)
plot_AI <- sf::st_as_sf(raster::extract(x = AI_raster,
                               y = fuzzy_cluster_memb_sf, #plot_locations,
                               sp = T)) %>%
  mutate(AI = ai_et0_WesternUSEcoregions*0.0001 # AI rasters are actually AI*10,000 so that they
                  #can be stored as integers - need to put back into standard AI units
                  ) %>%
  sf::st_drop_geometry()
rm(AI_raster)

climate_rasters <- raster::stack(file_paths$prism_rasters)
prism_vals <- raster::extract(x=climate_rasters,
                              y= fuzzy_cluster_memb_sf, #plot_locations,
                              sp = T) %>%
  as(., "sf")
sf::st_geometry(prism_vals) <- NULL
colnames(prism_vals)[which(colnames(prism_vals) %in% c("PRISM_ppt_30yr_normal_800mM2_annual_asc",
                                                       "PRISM_ppt_30yr_normal_800mM2_06_asc",      
                                                       "PRISM_ppt_30yr_normal_800mM2_07_asc",
                                                       "PRISM_ppt_30yr_normal_800mM2_08_asc",
                                                       "PRISM_ppt_30yr_normal_800mM2_09_asc",
                                                       "PRISM_tmean_30yr_normal_800mM2_annual_asc",
                                                       "PRISM_tmax_30yr_normal_800mM2_annual_asc",
                                                       "PRISM_tmin_30yr_normal_800mM2_annual_asc"
                                                       ))] <- c("Ann_ppt_mm", "June_ppt_mm",
                                                                "July_ppt_mm", "Aug_ppt_mm",
                                                                "Sept_ppt_mm", "Ann_tmean",
                                                                "Ann_tmax", "Ann_tmin")
rm(climate_rasters)

prism_vals <- mutate(prism_vals,
                     Summer_ppt_ratio = (June_ppt_mm + July_ppt_mm + Aug_ppt_mm + Sept_ppt_mm)/Ann_ppt_mm)

plot_data_descriptive <- left_join(plot_data_descriptive,
                               select(plot_AI, PlotCode, #Year,
                                      AI)) %>%
  left_join(., 
            select(prism_vals, PlotCode, #Year,
                   Ann_ppt_mm, Summer_ppt_ratio, Ann_tmean, Ann_tmax, Ann_tmin))

## species richness and diversity
plot_data_descriptive$shannon_diversity <- vegan::diversity(x=descriptive.df, index = "shannon")
plot_data_descriptive$sp_richness <- vegan::specnumber(x=descriptive.df)

# AERO
aero <- read.csv(file_paths$aero)

aero_aim <- filter(aero, source=="AIM") %>%
  mutate(PlotKey = substr(PrimaryKey, 1, nchar(PrimaryKey)-10), # fix the Jornada's data storage errors
         Year = as.numeric(substr(PrimaryKey, nchar(PrimaryKey)-9, nchar(PrimaryKey)-6)),) %>%
  select(PlotKey, Year, horizontal_flux_total_MN, horizontal_flux_total_LPI, horizontal_flux_total_UPI) %>%
  rename("PlotID" = "PlotKey")

aero_lmf <- filter(aero, source=="LMF") %>%
  select(PlotKey, horizontal_flux_total_MN, horizontal_flux_total_LPI, horizontal_flux_total_UPI) %>%
  rename("PlotID" = "PlotKey")

plot_data_descriptive <- left_join(plot_data_descriptive, aero_aim) %>%
  rows_update(x = ., y = aero_lmf, unmatched = "ignore")

# RHEM
rhem_aim <- read.csv(file_paths$rhem$aim) %>%
  mutate(PrimaryKey_Jornada = as.character(PrimaryKey),
         Year = as.numeric(substr(PrimaryKey, nchar(PrimaryKey)-9, nchar(PrimaryKey)-6)),
         PlotID = substr(PrimaryKey, 1, nchar(PrimaryKey)-10)) %>% # make the Jornada's proprietary PrimaryKey system play nice with everyone else's
  select(PlotID, Year, Runoff_Long_Term_MEAN, Soil_Loss_Long_Term_MEAN, Sed_Yield_Long_Term_MEAN,
         any_of(contains("Runoff_Yearly_Max_Daily_")), any_of(contains("Soil_Loss_Yearly_Max_Daily_")),
         any_of(contains("SY_Yearly_Max_Daily_"))) %>%
  distinct()

rhem_lmf <- read.csv(file_paths$rhem$lmf) %>%
  select(PrimaryKey, Runoff_Long_Term_MEAN, Soil_Loss_Long_Term_MEAN, Sed_Yield_Long_Term_MEAN,
         any_of(contains("Runoff_Yearly_Max_Daily_")), any_of(contains("Soil_Loss_Yearly_Max_Daily_")),
         any_of(contains("SY_Yearly_Max_Daily_"))) %>%
  distinct() %>%
  rename("PlotID" = "PrimaryKey")

rhem_nri <- read.csv(file_paths$rhem$nri) %>%
  select(PrimaryKey, Runoff_Long_Term_MEAN, Soil_Loss_Long_Term_MEAN, Sed_Yield_Long_Term_MEAN,
         any_of(contains("Runoff_Yearly_Max_Daily_")), any_of(contains("Soil_Loss_Yearly_Max_Daily_")),
         any_of(contains("SY_Yearly_Max_Daily_"))) %>%
  distinct() %>%
  rename("PlotID" = "PrimaryKey")

plot_data_descriptive <- left_join(plot_data_descriptive, rhem_aim) %>%
  rows_update(x = ., y = rhem_lmf, unmatched = "ignore") %>%
  rows_update(x = ., y = rhem_nri, unmatched = "ignore")

# gridded soil data from Nauman and Duniway (2019) https://doi.org/10.5066/P9YBAKC2
# this part is slowww - about 10 minutes without parallelizing, 6 minutes with parallelizing
library(parallel)
library(doParallel)

cl <- makeCluster(spec = 3, # number of cores to use
                  type = "PSOCK",
                  methods = FALSE)
registerDoParallel(cl)

soil_pct_vals <- foreach(soil_attrib = c("clay", "sand",
                                         "fragments"#, "ec", "depthclass"
                                         ),
                              .packages = c("raster", "sf"),
                              .combine = "cbind") %dopar% {
      soil_pct_raster <- raster::stack(c(file_paths$soil_rasters[[soil_attrib]]))

      soil_pct_vals_temp <- raster::extract(x=soil_pct_raster,
                              y=fuzzy_cluster_memb_sf,
                              buffer = 50, # extract values from within 50 m of plot center
                              fun = mean, # calculate the weighted mean of the extracted values around each plot center
                              na.rm = T, # passed to "mean"
                              exact = T, # use fraction of each cell covered, even if cell center is not covered
                              weights = T, # do weighted mean
                              sp = T) %>%
  as(., "sf") 
}
registerDoSEQ()
stopCluster(cl)

soil_pct_vals <- mutate(soil_pct_vals,
                        clay_pct_mean = calc_soil_trapezoidal(value1 = claytotal_r_0_cm_2D_QRF_bt,
                                                              value2 = claytotal_r_5_cm_2D_QRF_bt,
                                                              value3 = claytotal_r_15_cm_2D_QRF_bt,
                                                              value4 = claytotal_r_30_cm_2D_QRF_bt),
                        sand_pct_mean = calc_soil_trapezoidal(value1 = sandtotal_r_0_cm_2D_QRF,
                                                              value2 = sandtotal_r_5_cm_2D_QRF,
                                                              value3 = sandtotal_r_15_cm_2D_QRF,
                                                              value4 = sandtotal_r_30_cm_2D_QRF),
                        fragment_pct_mean = calc_soil_trapezoidal(value1 = fragvol_r_0_cm_2D_QRF_bt_ART_SG100covs_bt,
                                                                  value2 = fragvol_r_5_cm_2D_QRF_bt_ART_SG100covs_bt,
                                                                  value3 = fragvol_r_15_cm_2D_QRF_bt_ART_SG100covs_bt,
                                                                  value4 = fragvol_r_30_cm_2D_QRF_bt_ART_SG100covs_bt)#,
                        # ec_mean = calc_soil_trapezoidal(value1 = ec_r_0_cm_2D_QRF_bt,
                        #                                 value2 = ec_r_5_cm_2D_QRF_bt,
                        #                                 value3 = ec_r_15_cm_2D_QRF_bt,
                        #                                 value4 = ec_r_30_cm_2D_QRF_bt)
)

# soil depth from Brungard et al. 2021, https://doi.org/10.1016/j.geoderma.2021.114998
soil_depth_raster <- raster::raster(file_paths$soil_rasters$depthclass)
soil_vals <- sf::st_as_sf(raster::extract(x = soil_depth_raster,
                               y = soil_pct_vals,
                               sp = T)) %>% 
  st_drop_geometry() %>% 
  mutate(soil_depth_class = stringr::str_replace_all(as.character(DepthClass_ensemble),
                                                     c("1" = "Bedrock",
                                                       "2" = "Very shallow",
                                                       "3" = "Shallow",
                                                       "4" = "Moderately deep",
                                                       "5" = "Deep",
                                                       "6" = "Very deep")) %>% 
           factor(x=., levels = c("Bedrock", "Very shallow", "Shallow",
                                  "Moderately deep", "Deep", "Very deep"),
                  ordered = T)
         ) 
rm(soil_depth_raster)

plot_data_descriptive <- left_join(plot_data_descriptive, select(soil_vals,
                                                                 PlotCode,
                                                                 clay_pct_mean,
                                                                 sand_pct_mean,
                                                                 fragment_pct_mean,
                                                                 soil_depth_class)
                                   )

# calculate proportion native cover
plot_data_descriptive$NativeRelativeCover <- (plot_data_descriptive$AH_NativeCover/plot_data_descriptive$TotalFoliarCover)*100
# # consider including species present in >10% of plots:
# names(colSums(sp_pa)[which(colSums(sp_pa)>(nrow(plot_data_first)*0.10))])
# # also consider including species with high mean cover across all plots:
# mean_cover <- summarise(plot_data_first, across(.cols = AH_C3IntroducedPerenGrassCover:AH_OpuntiaCover, .fns = mean)) %>%
#  select(-any_of(c("Longitude_NAD83", "Latitude_NAD83", "Year", "Month", "Day"))) %>%
#  t() %>%
#  as.data.frame()
# 
# row.names(mean_cover)[which(mean_cover$V1>=0.9)]

# Consider switching to plotly to allow users to hover and see exact quantile and outlier values, e.g.:
# plotly::plot_ly(x=plot_data_descriptive$PlantCommunity_fuzzy,
#                 y=plot_data_descriptive$horizontal_flux_total_UPI,
#                 split=plot_data_descriptive$PlantCommunity_fuzzy,
#                 type='box')

plot_data_first_tall <- select(plot_data_first,
                               SourceKey, PlotID, SiteName,
                               PlotName, Year, PlotCode,
                               #PlantCommunity,
                               PlantCommunity_fuzzy,
                               AH_C3IntroducedPerenGrassCover, 
                               AH_C3NativePerenGrassCover,     
                               AH_C4NativePerenGrassCover,
                               AH_IntroducedAnnForbCover,
                               AH_IntroducedAnnGrassCover,
                               AH_IntroducedPerenForbCover,    
                               AH_NativeAnnForbCover, 
                               AH_NativeAnnGrassCover,
                               AH_NativePerenForbCover, 
                               BareSoilCover,                  
                               FH_TotalLitterCover, 
                               AH_ArtemisiaTridentataCover,
                               #CA_percent_200plus, 
                               AH_C4IntroducedPerenGrassCover, 
                               FH_LichenMossCover,
                               #CA_percent_100plus, 
                               CHVI8,
                               SYRO,
                               ERNA10,
                               SAVE4,
                               PUTR2,
                               GUSA2,
                               PIED,
                               JUOS,
                               QUGA
                               ) %>%
  left_join(., select(plot_data_descriptive, PlotCode, CA_percent_100plus, CA_percent_200plus)) %>%
  tidyr::pivot_longer(cols = c(AH_C3IntroducedPerenGrassCover, 
                                             AH_C3NativePerenGrassCover,     
                                             AH_C4NativePerenGrassCover,
                                             AH_IntroducedAnnForbCover,
                                             AH_IntroducedAnnGrassCover,
                                             AH_IntroducedPerenForbCover,    
                                             AH_NativeAnnForbCover, 
                                             AH_NativeAnnGrassCover,
                                             AH_NativePerenForbCover, 
                                             BareSoilCover,                  
                                             FH_TotalLitterCover, 
                                             AH_ArtemisiaTridentataCover,
                                             CA_percent_200plus, 
                                             AH_C4IntroducedPerenGrassCover, 
                                             FH_LichenMossCover,
                                             CA_percent_100plus, 
                                             CHVI8,
                               SYRO,
                               ERNA10,
                               SAVE4,
                               PUTR2,
                               GUSA2,
                               PIED,
                               JUOS,
                               QUGA
                               ),
                      names_to = "CoverType",
                      values_to = "PctCover")


spp_list <- read.csv(file_paths$species_list)

cover_funcgrps <- sort(c("AH_C3IntroducedPerenGrassCover", 
                 "AH_C3NativePerenGrassCover",     
                 "AH_C4NativePerenGrassCover",
                 "AH_IntroducedAnnForbCover",
                 "AH_IntroducedAnnGrassCover",
                 "AH_IntroducedPerenForbCover",    
                 "AH_NativeAnnForbCover", 
                 "AH_NativeAnnGrassCover",
                 "AH_NativePerenForbCover", 
                 "BareSoilCover",                  
                 "FH_TotalLitterCover", 
                 "AH_ArtemisiaTridentataCover",
                 "CA_percent_200plus", 
                 "AH_C4IntroducedPerenGrassCover", 
                 "FH_LichenMossCover",
                 "CA_percent_100plus"
                 ))
funcgrps_names <- indicator_descriptions %>%
  bind_rows(., data.frame(Indicator_code = c("CA_percent_100plus", "CA_percent_200plus"),
                          Indicator = c("Ann. & peren. canopy
gaps > 100 cm", "Ann. & peren. canopy
gaps > 200 cm"))) %>%
  filter(., Indicator_code %in% cover_funcgrps) %>%
  arrange(., Indicator_code) %>%
  select(Indicator) %>%
  unlist()

names(cover_funcgrps) <- funcgrps_names

cover_spp <- sort(c("CHVI8",
                    "SYRO",
                    "ERNA10",
                    "SAVE4",
                    "PUTR2",
                    "GUSA2",
                    "PIED",
                    "JUOS",
                    "QUGA"
                    ))
spp_names <- paste0( filter(spp_list, SpeciesCode %in% cover_spp & SpeciesState=="AIM")$ScientificName,
                    "\n (", filter(spp_list, SpeciesCode %in% cover_spp & SpeciesState=="AIM")$CommonName, ")")
names(cover_spp) <- spp_names

cover_labels <- sort(c(cover_funcgrps, cover_spp))
names(cover_labels)[which(cover_labels=="AH_ArtemisiaTridentataCover")] <- "Artemisia tridentata\n (all subspecies)"

functypes_grps <- c("TotalFoliarCover", #"AH_WoodyCover", #AH_ForbGrassCover, 
                    #"AH_SubShrubCover",
                    "AH_ShrubCover", "AH_TreeCover",
                    "AH_PerenForbGrassCover", "AH_AnnForbGrassCover"
                )
functypes_cover_states <- ggplot(data = select(plot_data_descriptive, 
                                             any_of(c("PlantCommunity_fuzzy", functypes_grps))) %>%
                                   tidyr::pivot_longer(cols = all_of(functypes_grps),
                      names_to = "CoverType",
                      values_to = "PctCover"),
                             aes(x=PlantCommunity_fuzzy, y=PctCover, fill=CoverType)) +
  geom_boxplot() +
  scale_fill_brewer(type = "qual", palette = "Set3",
                    name = "Functional type",
                    labels = c("Annual herbaceous", "Perennial herbaceous",
                               #"Woody",
                               "Shrub",
                               #"SubShrub",
                               "Tree",
                               "Total foliar cover")
                    ) +
  scale_x_discrete(labels=state_labels_fuzzy) +
  xlab(NULL) +
  ylab("Cover (%)") +
  ggtitle("Plant functional groups") +
  theme_bw() +
  theme(plot.title = element_text(color = "black", size = 13),
        axis.title = element_text(color = "black", size = 11),
        axis.text.x = element_text(angle = 45, hjust = 1, color = "black", size = 9),
        axis.text.y = element_text(color = "black", size = 9),
        legend.title = element_text(color = "black", size = 11),
        legend.text = element_text(color = "black", size = 9))

woody_grps <- c("AH_ArtemisiaTridentataCover", "CHVI8", "SYRO",
                "ERNA10", "SAVE4", "PUTR2", "GUSA2",
                "PIED", "JUOS", "QUGA"
                )
woody_cover_states <- ggplot(data = filter(plot_data_first_tall, CoverType %in% woody_grps),
                             aes(x=PlantCommunity_fuzzy, y=PctCover, fill=CoverType)) +
  geom_boxplot() +
  scale_fill_brewer(type = "qual", palette = "Set3",
                    name = "Species",
                    labels = names(cover_labels[which(cover_labels %in% woody_grps)])) +
  scale_x_discrete(labels=state_labels_fuzzy) +
  xlab(NULL) +
  ylab("Cover (%)") +
  ggtitle("Woody plants") +
  theme_bw() +
  theme(plot.title = element_text(color = "black", size = 13),
        axis.title = element_text(color = "black", size = 11),
        axis.text.x = element_text(angle = 45, hjust = 1, color = "black", size = 9),
        axis.text.y = element_text(color = "black", size = 9),
        legend.title = element_text(color = "black", size = 11),
        legend.text = element_text(color = "black", size = 9))

perenherb_grps <- c("AH_C3IntroducedPerenGrassCover", "AH_C3NativePerenGrassCover", 
                    "AH_C4IntroducedPerenGrassCover", "AH_C4NativePerenGrassCover",
                    "AH_IntroducedPerenForbCover", "AH_NativePerenForbCover")
perenherb_cover_states <- ggplot(data = filter(plot_data_first_tall, CoverType %in% perenherb_grps),
                                 aes(x=PlantCommunity_fuzzy, y=PctCover, fill=CoverType)) +
  geom_boxplot() +
  scale_fill_brewer(type = "qual", palette = "Set3",
                    name = "Functional group",
                    labels = names(cover_labels[which(cover_labels %in% perenherb_grps)])) +
  scale_x_discrete(labels=state_labels_fuzzy) +
  xlab(NULL) +
  ylab("Cover (%)") +
  ggtitle("Perennial herbaceous plants") +
  theme_bw() +
  theme(plot.title = element_text(color = "black", size = 13),
        axis.title = element_text(color = "black", size = 11),
        axis.text.x = element_text(angle = 45, hjust = 1, color = "black", size = 9),
        axis.text.y = element_text(color = "black", size = 9),
        legend.title = element_text(color = "black", size = 11),
        legend.text = element_text(color = "black", size = 9))

annherb_grps <- c("AH_IntroducedAnnForbCover", "AH_IntroducedAnnGrassCover",
                  "AH_NativeAnnForbCover", "AH_NativeAnnGrassCover")
annherb_cover_states <- ggplot(data = filter(plot_data_first_tall, CoverType %in% annherb_grps),
                               aes(x=PlantCommunity_fuzzy, y=PctCover, fill=CoverType)) +
  geom_boxplot() +
  scale_fill_brewer(type = "qual", palette = "Set3",
                    name = "Functional group",
                    labels = names(cover_labels[which(cover_labels %in% annherb_grps)])) +
  scale_x_discrete(labels=state_labels_fuzzy) +
  xlab(NULL) +
  ylab("Cover (%)") +
  ggtitle("Annual herbaceous plants") +
  theme_bw() +
  theme(plot.title = element_text(color = "black", size = 13),
        axis.title = element_text(color = "black", size = 11),
        axis.text.x = element_text(angle = 45, hjust = 1, color = "black", size = 9),
        axis.text.y = element_text(color = "black", size = 9),
        legend.title = element_text(color = "black", size = 11),
        legend.text = element_text(color = "black", size = 9))

ground_grps <- c("BareSoilCover", "FH_TotalLitterCover", "CA_percent_200plus",
                 "FH_LichenMossCover", "CA_percent_100plus"
                 )
ground_cover_states <- ggplot(data = filter(plot_data_first_tall, CoverType %in% ground_grps),
                              aes(x=PlantCommunity_fuzzy, y=PctCover, fill=CoverType)) +
  geom_boxplot() +
  scale_fill_brewer(type = "qual", palette = "Set3",
                    name = "Functional group",
                    labels = names(cover_labels[which(cover_labels %in% ground_grps)])) +
  scale_x_discrete(labels=state_labels_fuzzy) +
  xlab("State") +
  ylab("Cover (%)") +
  ggtitle("Ground cover") +
  theme_bw() +
  theme(plot.title = element_text(color = "black", size = 13),
        axis.title = element_text(color = "black", size = 11),
        axis.text.x = element_text(angle = 45, hjust = 1, color = "black", size = 9),
        axis.text.y = element_text(color = "black", size = 9),
        legend.title = element_text(color = "black", size = 11),
        legend.text = element_text(color = "black", size = 9))

cover_states <- cowplot::plot_grid(functypes_cover_states + theme(legend.position = "none"),
                                   woody_cover_states + theme(legend.position = "none"),
                   perenherb_cover_states + theme(legend.position = "none"),
                   annherb_cover_states + theme(legend.position = "none"),
                   ground_cover_states + theme(legend.position = "none"),
                   ncol = 1, axis = "lr", labels = "auto")

cover_axes <- cowplot::plot_grid(cowplot::get_legend(functypes_cover_states),
                                 cowplot::get_legend(woody_cover_states),
                   cowplot::get_legend(perenherb_cover_states),
                   cowplot::get_legend(annherb_cover_states),
                   cowplot::get_legend(ground_cover_states),
                   ncol = 1)

cowplot::plot_grid(cover_states, cover_axes, ncol = 2, rel_widths = c(1, 0.5))
# group_labels_fuzzy <- paste0(group_labels_base,
#                                " (n = ",
#                                c(nrow(filter(plot_data_descriptive, PlantCommunity_fuzzy==1 & !is.na(AI))),
#                                  nrow(filter(plot_data_descriptive, PlantCommunity_fuzzy==2 & !is.na(AI))),
#                                  nrow(filter(plot_data_descriptive, PlantCommunity_fuzzy==3 & !is.na(AI))),
#                                  nrow(filter(plot_data_descriptive, PlantCommunity_fuzzy==4 & !is.na(AI)))
#                                   ),
#                                ")")

cowplot::plot_grid(
  cowplot::plot_grid(
ggplot(data = plot_data_descriptive,
       aes(x=PlantCommunity_fuzzy, y=AI, fill = PlantCommunity_fuzzy)) +
  geom_boxplot() +
  scale_x_discrete(labels=state_labels_fuzzy) +
  scale_fill_manual(name="State", labels=state_labels_fuzzy, values = pal_veg2) +
  xlab(NULL) +
  ylab("Aridity index") +
  ylim(c(0, 0.45)) +
  theme_bw() +
  theme(plot.title = element_text(color = "black", size = 14),
        axis.title = element_text(color = "black", size = 12),
        axis.text.x = element_text(angle = 45, hjust = 1, color = "black", size = 10),
        axis.text.y = element_text(color = "black", size = 10),
        legend.title = element_text(color = "black", size = 12),
        legend.text = element_text(color = "black", size = 10),
        legend.position = "none"),
ggplot(data = plot_data_descriptive,
       aes(x=PlantCommunity_fuzzy, y=Ann_ppt_mm, fill = PlantCommunity_fuzzy)) +
  geom_boxplot() +
  scale_x_discrete(labels=state_labels_fuzzy) +
  scale_fill_manual(name="State", labels=state_labels_fuzzy, values = pal_veg2) +
  xlab(NULL) +
  ylab("Ann. precip. (mm)") +
  ylim(c(0, 650)) +
  theme_bw() +
  theme(plot.title = element_text(color = "black", size = 14),
        axis.title = element_text(color = "black", size = 11),
        axis.text.x = element_text(angle = 45, hjust = 1, color = "black", size = 10),
        axis.text.y = element_text(color = "black", size = 10),
        legend.title = element_text(color = "black", size = 12),
        legend.text = element_text(color = "black", size = 10),
        legend.position = "none"),
nrow = 1, labels = c("a", "b")),
ggplot(data = select(plot_data_descriptive, PlantCommunity_fuzzy, Ann_tmean, Ann_tmax, Ann_tmin) %>%
         tidyr::pivot_longer(c(Ann_tmean, Ann_tmax, Ann_tmin), names_to = "temptype", values_to = "tempdegrees"),
       aes(fill=PlantCommunity_fuzzy, y=tempdegrees, x=temptype)) +
  geom_boxplot() +
  scale_fill_manual(name="State", labels=state_labels_fuzzy, values = pal_veg2) +
  scale_x_discrete(labels=c("Maximum", "Mean", "Minimum")) +
  xlab(NULL) +
  ylab("Temperature (C)") +
  theme_bw() +
  theme(plot.title = element_text(color = "black", size = 14),
        axis.title = element_text(color = "black", size = 12),
        axis.text.x = element_text(angle = 45, hjust = 1, color = "black", size = 10),
        axis.text.y = element_text(color = "black", size = 10),
        legend.title = element_text(color = "black", size = 12),
        legend.text = element_text(color = "black", size = 10),
        legend.position = "none"),
nrow = 2, labels = c(NA, "c"))

# # summer precipitation ratio figure
# ggplot(data = plot_data_descriptive,
#         aes(x=PlantCommunity_fuzzy, y=Summer_ppt_ratio)) +
#      geom_boxplot() +
#      scale_x_discrete(labels=state_labels_fuzzy) +
#      xlab(NULL) +
#      ylab("Summer precipitation ratio") +
#      theme_bw() +
#      theme(axis.text.x = element_text(angle = 45, hjust = 1))
# soil depth figure
soil_depth_byclass <- plot_data_descriptive %>%
  # mutate(soil_depth_class = ifelse(test = soil_depth_class =="Bedrock",
  #                                         yes = "Very shallow",
  #                                         no = soil_depth_class)) %>%
  group_by(soil_depth_class) %>%
  summarise(n_OpenWoodland = length(which(PlantCommunity_fuzzy == "1")),
            n_Shrubland = length(which(PlantCommunity_fuzzy == "2")),
            n_Invaded = length(which(PlantCommunity_fuzzy == "3")),
            n_Grassland = length(which(PlantCommunity_fuzzy == "4"))) %>%
  tidyr::pivot_longer(data=.,
               cols = c(n_OpenWoodland,
                        n_Shrubland,
                        n_Invaded,
                        n_Grassland),
               names_to = "PlantCommunity",
               values_to = "Percent") %>%
  mutate(PlantCommunity_fuzzy = factor(PlantCommunity,
                                                 levels = c("n_OpenWoodland", "n_Shrubland", "n_Invaded",
                                                            "n_Grassland"),
         labels = c("Open woodland", "Shrubland",
                    "Invaded", "Grassland"),
         ordered = T))

soil_depth_fig <- ggplot(data = soil_depth_byclass,
                          aes(x=soil_depth_class, y=Percent, fill=PlantCommunity_fuzzy)) +
  geom_col(position = "dodge") +
  scale_fill_manual(values = pal_veg2,
                    name = "State",
                    labels = c("Open woodland",
                               "Shrubland",
                               "Invaded",
                               "Grassland")
                    ) +
  scale_x_discrete(labels=c(levels(soil_depth_byclass$soil_depth_class)[-1])) +
  xlab("Soil depth class") +
  ylab("No. plots") +
  theme_bw() +
  theme(axis.title = element_text(color = "black", size = 12),
        axis.text.x = element_text(angle = 45, hjust = 1, color = "black", size = 10),
        axis.text.y = element_text(color = "black", size = 10),
        legend.title = element_text(color = "black", size = 12),
        legend.text = element_text(color = "black", size = 10),
        legend.position = "right") 

# boxplot panel
soiltexture_panel <- cowplot::plot_grid(
ggplot(data = plot_data_descriptive,
       aes(x=PlantCommunity_fuzzy, y=clay_pct_mean, fill = PlantCommunity_fuzzy)) +
  geom_boxplot() +
  scale_x_discrete(labels=state_labels_fuzzy) +
  scale_fill_manual(name="State", labels=state_labels_fuzzy, values = pal_veg2) +
  ylim(c(0, 100)) +
  xlab(NULL) +
  ylab("% clay 
(0-30 cm)") +
  theme_bw() +
  theme(axis.title = element_text(color = "black", size = 12),
        axis.text.x = element_text(angle = 45, hjust = 1, color = "black", size = 10),
        axis.text.y = element_text(color = "black", size = 10),
        legend.title = element_text(color = "black", size = 12),
        legend.text = element_text(color = "black", size = 10),
        legend.position = "none"),
ggplot(data = plot_data_descriptive,
       aes(x=PlantCommunity_fuzzy, y=sand_pct_mean, fill = PlantCommunity_fuzzy)) +
  geom_boxplot() +
  scale_x_discrete(labels=state_labels_fuzzy) +
  scale_fill_manual(name="State", labels=state_labels_fuzzy, values = pal_veg2) +
  ylim(c(0, 100)) +
  xlab(NULL) +
  ylab("% sand
(0-30 cm)") +
  theme_bw() +
  theme(axis.title = element_text(color = "black", size = 12),
        axis.text.x = element_text(angle = 45, hjust = 1, color = "black", size = 10),
        axis.text.y = element_text(color = "black", size = 10),
        legend.title = element_text(color = "black", size = 12),
        legend.text = element_text(color = "black", size = 10),
        legend.position = "none"),
ggplot(data = plot_data_descriptive,
       aes(x=PlantCommunity_fuzzy, y=fragment_pct_mean, fill = PlantCommunity_fuzzy)) +
  geom_boxplot() +
  scale_x_discrete(labels=state_labels_fuzzy) +
  scale_fill_manual(name="State", labels=state_labels_fuzzy, values = pal_veg2) +
  ylim(c(0, 100)) +
  xlab(NULL) +
  ylab("% rock frag.
(0-30 cm)") +
  theme_bw() +
  theme(axis.title = element_text(color = "black", size = 12),
        axis.text.x = element_text(angle = 45, hjust = 1, color = "black", size = 10),
        axis.text.y = element_text(color = "black", size = 10),
        legend.title = element_text(color = "black", size = 12),
        legend.text = element_text(color = "black", size = 10),
        legend.position = "none"),
labels = "auto", nrow = 1
)

cowplot::plot_grid(soiltexture_panel,
                   soil_depth_fig,
                   nrow = 2, labels = c("", "d"))
# group_labels_fuzzy <- paste0(group_labels_base,
#                                " (n = ",
#                                c(nrow(filter(plot_data_descriptive, PlantCommunity_fuzzy==1 & !is.na(shannon_diversity))),
#                                  nrow(filter(plot_data_descriptive, PlantCommunity_fuzzy==2 & !is.na(shannon_diversity))),
#                                  nrow(filter(plot_data_descriptive, PlantCommunity_fuzzy==3 & !is.na(shannon_diversity))),
#                                  nrow(filter(plot_data_descriptive, PlantCommunity_fuzzy==4 & !is.na(shannon_diversity)))
#                                   ),
#                                ")")

cowplot::plot_grid(ggplot(data = plot_data_descriptive,
       aes(x=PlantCommunity_fuzzy, y=shannon_diversity, fill=PlantCommunity_fuzzy)) +
  geom_boxplot() +
  scale_x_discrete(labels=state_labels_fuzzy) +
  scale_fill_manual(name="State", labels=state_labels_fuzzy, values = pal_veg2) +
  xlab(NULL) +
  ylab("Shannon
diversity index") +
  theme_bw() +
  theme(axis.title = element_text(color = "black", size = 12),
        axis.text.x = element_text(angle = 45, hjust = 1, color = "black", size = 10),
        axis.text.y = element_text(color = "black", size = 10),
        legend.title = element_text(color = "black", size = 12),
        legend.text = element_text(color = "black", size = 10),
        legend.position = "none"),
  ggplot(data = plot_data_descriptive,
       aes(x=PlantCommunity_fuzzy, y=sp_richness, fill = PlantCommunity_fuzzy)) +
  geom_boxplot() +
  scale_x_discrete(labels=state_labels_fuzzy) +
  scale_fill_manual(name="State", labels=state_labels_fuzzy, values = pal_veg2) +
  xlab(NULL) +
  ylab("Number of
species") +
  theme_bw() +
  theme(axis.title = element_text(color = "black", size = 12),
        axis.text.x = element_text(angle = 45, hjust = 1, color = "black", size = 10),
        axis.text.y = element_text(color = "black", size = 10),
        legend.title = element_text(color = "black", size = 12),
        legend.text = element_text(color = "black", size = 10),
        legend.position = "none"),
  ggplot(data = plot_data_descriptive,
       aes(x=PlantCommunity_fuzzy, y=NativeRelativeCover, fill = PlantCommunity_fuzzy)) +
  geom_boxplot() +
  scale_x_discrete(labels=state_labels_fuzzy) +
  scale_fill_manual(name="State", labels=state_labels_fuzzy, values = pal_veg2) +
  xlab(NULL) +
  ylab("Relative native
plant cover (%)") +
  theme_bw() +
  theme(axis.title = element_text(color = "black", size = 12),
        axis.text.x = element_text(angle = 45, hjust = 1, color = "black", size = 10),
        axis.text.y = element_text(color = "black", size = 10),
        legend.title = element_text(color = "black", size = 12),
        legend.text = element_text(color = "black", size = 10),
        legend.position = "none"),
  nrow = 1, labels = "auto")
state_labels_aero <- paste0(state_labels_base,
                               " (n = ",
                               c(nrow(filter(plot_data_descriptive, PlantCommunity_fuzzy==1 & !is.na(horizontal_flux_total_MN))),
                                 nrow(filter(plot_data_descriptive, PlantCommunity_fuzzy==2 & !is.na(horizontal_flux_total_MN))),
                                 nrow(filter(plot_data_descriptive, PlantCommunity_fuzzy==3 & !is.na(horizontal_flux_total_MN))),
                                 nrow(filter(plot_data_descriptive, PlantCommunity_fuzzy==4 & !is.na(horizontal_flux_total_MN)))
                                  ),
                               ")")

state_labels_soilstab <- paste0(state_labels_base,
                               " (n = ",
                               c(nrow(filter(plot_data_descriptive, PlantCommunity_fuzzy==1 & !is.na(SoilStab_all))),
                                 nrow(filter(plot_data_descriptive, PlantCommunity_fuzzy==2 & !is.na(SoilStab_all))),
                                 nrow(filter(plot_data_descriptive, PlantCommunity_fuzzy==3 & !is.na(SoilStab_all))),
                                 nrow(filter(plot_data_descriptive, PlantCommunity_fuzzy==4 & !is.na(SoilStab_all)))
                                  ),
                               ")")

state_labels_rhem <- paste0(state_labels_base,
                               " (n = ",
                               c(nrow(filter(plot_data_descriptive, PlantCommunity_fuzzy==1 & !is.na(Runoff_Long_Term_MEAN))),
                                 nrow(filter(plot_data_descriptive, PlantCommunity_fuzzy==2 & !is.na(Runoff_Long_Term_MEAN))),
                                 nrow(filter(plot_data_descriptive, PlantCommunity_fuzzy==3 & !is.na(Runoff_Long_Term_MEAN))),
                                 nrow(filter(plot_data_descriptive, PlantCommunity_fuzzy==4 & !is.na(Runoff_Long_Term_MEAN)))
                                  ),
                               ")")

# dust_tall <- plot_data_descriptive %>%
#   select(PlotID, PlantCommunity_fuzzy, horizontal_flux_total_MN, horizontal_flux_total_LPI, horizontal_flux_total_UPI) %>%
#   tidyr::pivot_longer(cols = c(horizontal_flux_total_MN, horizontal_flux_total_LPI, horizontal_flux_total_UPI),
#                       names_to = "dust_statistic",
#                       values_to = "dust_flux")


cowplot::plot_grid(
  # a
  ggplot(data = plot_data_descriptive,
       aes(x=PlantCommunity_fuzzy, y=horizontal_flux_total_MN, fill=PlantCommunity_fuzzy)) +
  geom_boxplot() +
  scale_x_discrete(labels=state_labels_aero) +
  scale_fill_manual(name="State", labels=state_labels_aero, values = pal_veg2) +
  xlab(NULL) +
  ylab("Sediment mass 
flux (Q, g/m/day)") +
  theme_bw() +
  theme(axis.title = element_text(color = "black", size = 12),
        axis.text.x = element_text(angle = 45, hjust = 1, color = "black", size = 10),
        axis.text.y = element_text(color = "black", size = 10),
        legend.title = element_text(color = "black", size = 12),
        legend.text = element_text(color = "black", size = 10),
        legend.position = "none") + 
  scale_y_log10(breaks = scales::trans_breaks("log10", function(x) 10^x),
                labels = scales::trans_format("log10", scales::label_math(10^.x))),
  # b
  ggplot(data = plot_data_descriptive,
       aes(x=PlantCommunity_fuzzy, y=SoilStab_all, fill = PlantCommunity_fuzzy)) +
  geom_boxplot() +
  scale_x_discrete(labels=state_labels_soilstab) +
  scale_fill_manual(name="State", labels=state_labels_soilstab, values = pal_veg2) +
  ylim(c(0, 6)) +
  xlab(NULL) +
  ylab("Mean soil 
stability index") +
  theme_bw() +
  theme(axis.title = element_text(color = "black", size = 12),
        axis.text.x = element_text(angle = 45, hjust = 1, color = "black", size = 10),
        axis.text.y = element_text(color = "black", size = 10),
        legend.title = element_text(color = "black", size = 12),
        legend.text = element_text(color = "black", size = 10),
        legend.position = "none"),
  # c
  ggplot(data = plot_data_descriptive,
       aes(x=PlantCommunity_fuzzy, y=Runoff_Long_Term_MEAN, fill = PlantCommunity_fuzzy)) +
  geom_boxplot() +
  scale_x_discrete(labels=state_labels_rhem) +
  scale_fill_manual(name="State", labels=state_labels_rhem, values = pal_veg2) +
  xlab(NULL) +
  ylab("Long-term mean 
runoff (mm/yr)") +
  theme_bw() +
  theme(axis.title = element_text(color = "black", size = 12),
        axis.text.x = element_text(angle = 45, hjust = 1, color = "black", size = 10),
        axis.text.y = element_text(color = "black", size = 10),
        legend.title = element_text(color = "black", size = 12),
        legend.text = element_text(color = "black", size = 10),
        legend.position = "none") + 
  scale_y_log10(breaks = scales::trans_breaks("log10", function(x) 10^x),
                labels = scales::trans_format("log10", scales::label_math(10^.x))),
  # d
  ggplot(data = plot_data_descriptive,
       aes(x=PlantCommunity_fuzzy, y=Soil_Loss_Long_Term_MEAN, fill = PlantCommunity_fuzzy)) +
  geom_boxplot() +
  scale_x_discrete(labels=state_labels_rhem) +
  scale_fill_manual(name="State", labels=state_labels_rhem, values = pal_veg2) +
  xlab(NULL) +
  ylab("Long-term mean 
soil loss (ton/ha/yr)") +
  theme_bw() +
  theme(axis.title = element_text(color = "black", size = 12),
        axis.text.x = element_text(angle = 45, hjust = 1, color = "black", size = 10),
        axis.text.y = element_text(color = "black", size = 10),
        legend.title = element_text(color = "black", size = 12),
        legend.text = element_text(color = "black", size = 10),
        legend.position = "none") +
  scale_y_log10(breaks = scales::trans_breaks("log10", function(x) 10^x),
                labels = scales::trans_format("log10", scales::label_math(10^.x))),
  # e
  ggplot(data = plot_data_descriptive,
       aes(x=PlantCommunity_fuzzy, y=Runoff_Yearly_Max_Daily_50_Yr, fill = PlantCommunity_fuzzy)) +
  geom_boxplot() +
  scale_x_discrete(labels=state_labels_rhem) +
  scale_fill_manual(name="State", labels=state_labels_rhem, values = pal_veg2) +
  xlab(NULL) +
  ylab("50 yr storm max 
daily runoff (mm)") +
  theme_bw() +
  theme(axis.title = element_text(color = "black", size = 12),
        axis.text.x = element_text(angle = 45, hjust = 1, color = "black", size = 10),
        axis.text.y = element_text(color = "black", size = 10),
        legend.title = element_text(color = "black", size = 12),
        legend.text = element_text(color = "black", size = 10),
        legend.position = "none") + 
  scale_y_log10(breaks = scales::trans_breaks("log10", function(x) 10^x),
                labels = scales::trans_format("log10", scales::label_math(10^.x))),
  # f
  ggplot(data = plot_data_descriptive,
       aes(x=PlantCommunity_fuzzy, y=Soil_Loss_Yearly_Max_Daily_50_Yr, fill = PlantCommunity_fuzzy)) +
  geom_boxplot() +
  scale_x_discrete(labels=state_labels_rhem) +
  scale_fill_manual(name="State", labels=state_labels_rhem, values = pal_veg2) +
  xlab(NULL) +
  ylab("50 yr storm max
daily soil loss (tons/ha)") +
  theme_bw() +
  theme(axis.title = element_text(color = "black", size = 12),
        axis.text.x = element_text(angle = 45, hjust = 1, color = "black", size = 10),
        axis.text.y = element_text(color = "black", size = 10),
        legend.title = element_text(color = "black", size = 12),
        legend.text = element_text(color = "black", size = 10),
        legend.position = "none") +
  scale_y_log10(breaks = scales::trans_breaks("log10", function(x) 10^x),
                labels = scales::trans_format("log10", scales::label_math(10^.x))),
  nrow = 3, labels = "auto")

State 1

Shrubland State

# TODO write state description

Community 1.1

Shrubland State: sagebrush and C3 perennial grass shrubland

# TODO write state description
dom_spp_grp2 <- get_dominant_spp(data = filter(plot_data_descriptive, PlantCommunity_fuzzy==2),
                                   species_cols = colnames(descriptive.df),
                                   user = user)

# fix weird names
dom_spp_grp2[dom_spp_grp2$SpeciesCode=="PSSP6", "ScientificName"] <- "Pseudoroegneria spicata"

dom_spp_grp2_pub <- filter(
  select(dom_spp_grp2, Indicator, ScientificName, CommonName, SpeciesCode, MeanCover, pct_plots),
               pct_plots >=10) %>%
  mutate(MeanCover = round(MeanCover, 2),
         pct_plots = round(pct_plots, 2),
         # make plant codes link to USDA Plants database
         SpeciesCode = paste0("[", SpeciesCode, "](https://plants.usda.gov/home/plantProfile?symbol=", SpeciesCode, ")")
         ) %>%
  setNames(c("Functional group", "Scientific name", "Common name", "USDA Plants code", "Mean cover (%)", "Percent of plots where present"))


format_tables_EDIT_style(data = dom_spp_grp2_pub,
                         caption = "Community 1.1 plant community composition. Species are listed by functional type in descending order of mean percent cover across all plot in Community 1.1. Mean cover and presence in plots were calculated based on LPI data (Herrick et al., 2017) from field plots (Nusser and Goebel, 1997; Toevs et al., 2011; Witwicki et al., 2017). For more information about each species, see the link to the USDA Plants database (USDA, NRCS, 2024).") %>%
  collapse_rows(columns = 1, valign = "top")
# TODO create pie charts of community composition by functional group. May need to change "production" to "percent cover" to fit with our data
# TODO create table of production (or percent cover) by functional type

# knitr::kable(x = dom_plant_spp_tab, caption = "Table 5. Annual production by plant type")
# TODO create table of ground cover by type

# knitr::kable(x = dom_plant_spp_tab, caption = "Table 6. Ground cover")
# TODO create table of func type percent cover at different heights

# knitr::kable(x = dom_plant_spp_tab, caption = "Table 7. Canopy structure (% cover)")

State 2

Open Woodland State

Community 2.1

Open woodland state: pinyon-juniper and gambel oak woodland

# TODO write state description
dom_spp_grp1 <- get_dominant_spp(data = filter(plot_data_descriptive, PlantCommunity_fuzzy==1),
                                   species_cols = colnames(descriptive.df),
                                   user = user)

# fix weird names
dom_spp_grp1[dom_spp_grp1$SpeciesCode=="PLPA2", "ScientificName"] <- "Plantago patagonica"

dom_spp_grp1_pub <- filter(
  select(dom_spp_grp1, Indicator, ScientificName, CommonName, SpeciesCode, MeanCover, pct_plots),
               pct_plots >=10) %>%
    mutate(MeanCover = round(MeanCover, 2),
         pct_plots = round(pct_plots, 2),
         # make plant codes link to USDA Plants database
         SpeciesCode = paste0("[", SpeciesCode, "](https://plants.usda.gov/home/plantProfile?symbol=", SpeciesCode, ")")
         ) %>%
  setNames(c("Functional group", "Scientific name", "Common name", "USDA Plants code", "Mean cover (%)", "Percent of plots where present"))


format_tables_EDIT_style(data = dom_spp_grp1_pub,
                         caption = "Community 2.1 plant community composition. Species are listed by functional type in descending order of mean percent cover across all plot in Community 2.1. Mean cover and presence in plots were calculated based on LPI data (Herrick et al., 2017) from field plots (Nusser and Goebel, 1997; Toevs et al., 2011; Witwicki et al., 2017). For more information about each species, see the link to the USDA Plants database (USDA, NRCS, 2024).") %>%
  collapse_rows(columns = 1, valign = "top")

State 3

Grassland State

# TODO write state description

Community 3.1

Grassland State: C3 perennial grassland

# TODO write state description
dom_spp_grp4 <- get_dominant_spp(data = filter(plot_data_descriptive, PlantCommunity_fuzzy==4),
                                   species_cols = colnames(descriptive.df),
                                   user = user)

# fix weird names
dom_spp_grp4[dom_spp_grp4$SpeciesCode=="OPUNT", "ScientificName"] <- "Opuntia spp."

dom_spp_grp4_pub <- filter(
  select(dom_spp_grp4, Indicator, ScientificName, CommonName, SpeciesCode, MeanCover, pct_plots),
               pct_plots >=10) %>%
  mutate(MeanCover = round(MeanCover, 2),
         pct_plots = round(pct_plots, 2),
         # make plant codes link to USDA Plants database
         SpeciesCode = paste0("[", SpeciesCode, "](https://plants.usda.gov/home/plantProfile?symbol=", SpeciesCode, ")")
         ) %>%
  setNames(c("Functional group", "Scientific name", "Common name", "USDA Plants code", "Mean cover (%)", "Percent of plots where present"))


format_tables_EDIT_style(data = dom_spp_grp4_pub,
                         caption = "Community 3.1 plant community composition. Species are listed by functional type in descending order of mean percent cover across all plot in Community 3.1. Mean cover and presence in plots were calculated based on LPI data (Herrick et al., 2017) from field plots (Nusser and Goebel, 1997; Toevs et al., 2011; Witwicki et al., 2017). For more information about each species, see the link to the USDA Plants database (USDA, NRCS, 2024).") %>%
  collapse_rows(columns = 1, valign = "top")

State 4

Invaded State

# TODO write state description

Community 4.1

Invaded State: Non-native annual grass and sagebrush shrubland

# TODO write state description
dom_spp_grp3 <- get_dominant_spp(data = filter(plot_data_descriptive, PlantCommunity_fuzzy==3),
                                   species_cols = colnames(descriptive.df),
                                   user = user)

# fix weird names
dom_spp_grp3[dom_spp_grp3$SpeciesCode=="PLPA2", "ScientificName"] <- "Plantago patagonica"
dom_spp_grp3[dom_spp_grp3$SpeciesCode=="CETE5", "ScientificName"] <- "Ceratocephala testiculata"

dom_spp_grp3_pub <- filter(
  select(dom_spp_grp3, Indicator, ScientificName, CommonName, SpeciesCode, MeanCover, pct_plots),
               pct_plots >=10) %>%
  mutate(MeanCover = round(MeanCover, 2),
         pct_plots = round(pct_plots, 2),
         # make plant codes link to USDA Plants database
         SpeciesCode = paste0("[", SpeciesCode, "](https://plants.usda.gov/home/plantProfile?symbol=", SpeciesCode, ")")
         ) %>%
  setNames(c("Functional group", "Scientific name", "Common name", "USDA Plants code", "Mean cover (%)", "Percent of plots where present"))


format_tables_EDIT_style(data = dom_spp_grp3_pub,
                         caption = "Community 4.1 plant community composition. Species are listed by functional type in descending order of mean percent cover across all plot in Community 4.1. Mean cover and presence in plots were calculated based on LPI data (Herrick et al., 2017) from field plots (Nusser and Goebel, 1997; Toevs et al., 2011; Witwicki et al., 2017). For more information about each species, see the link to the USDA Plants database (USDA, NRCS, 2024).") %>%
  collapse_rows(columns = 1, valign = "top")
# ### Additional community tables
# 
# # TODO create table of production (or percent cover) by species, with species linking to their USDA Plants profile
# 
# # knitr::kable(x = dom_plant_spp_tab, caption = "Table 8. Community 1.1 plant community composition")
# 
# 
# # Interpretations
# 
# ## Animal community
# 
# TODO write wildlife narrative
# 
# ## Hydrological functions
# 
# TODO write hydrology narrative
# 
# ## Recreational uses
# 
# TODO write recreation narrative
# 
# ## Wood products
# 
# TODO write wood production narrative (probably "None")
# 
# ## Other products
# 
# TODO grazing narrative could go here
# 
# # Supporting information

Any use of trade, firm, or product names is for descriptive purposes only and does not imply endorsement by the U.S. Government.

References

Brungard, C., Nauman, T., Duniway, M., Veblen, K., Nehring, K., White, D., Salley, S., and Anchang, J. (2021). Regional ensemble modeling reduces uncertainty for digital soil mapping. Geoderma 397:114998.

Calinski T., and Harabasz J. (1974). A Dendrite Method for Cluster Analysis. Communications in Statistics – Theory and Methods, 3(1), 1–27.

Davies D.L., and Bouldin D.W. (1979). A cluster separation measure. IEEE Transactions on Pattern Analysis and Machine Intelligence, 1(2), 224–227.

Duniway, M. C., A. Knight, T. Nauman, T. B. B. Bishop, S. E. McCord, N. P. Webb, C. J. Williams, and J. T. Humphries. In review. Quantifying regional ecological dynamics using agency monitoring data, ecological site descriptions, and ecological site groups. Rangeland Ecology and Management.

Dunn, J. (1974). Well separated clusters and optimal fuzzy partitions. Journal Cybernetics, 4(1), 95–104.

Edwards, B. L., N. P. Webb, M. S. Galloza, J. W. Van Zee, E. M. Courtright, B. F. Cooper, L. J. Metz, J. E. Herrick, G. S. Okin, M. C. Duniway, J. Tatarko, N. H. Tedala, D. N. Moriasi, B. A. Newingham, F. B. Pierson, D. Toledo, and R. S. Van Pelt. 2022. Parameterizing an aeolian erosion model for rangelands. Aeolian Research 54:100769.

Hernandez, M., M. A. Nearing, O. Z. Al-Hamdan, F. B. Pierson, G. Armendariz, M. A. Weltz, K. E. Spaeth, C. J. Williams, S. K. Nouwakpo, D. C. Goodrich, C. L. Unkrich, M. H. Nichols, and C. D. Holifield Collins. 2017. The Rangeland Hydrology and Erosion Model: A Dynamic Approach for Predicting Soil Loss on Rangelands. Water Resources Research 53:9368-9391.

Herrick, J. E., Van Zee, J. W., McCord, S. E., Courtright, E. M., Karl, J. W., & Burkett, L. M. (2017). Monitoring Manual for Grassland, Shrubland, and Savanna Ecosystems (Second ed. Vol. 1: Core Methods). Las Cruces, New Mexico: USDA-ARS Jornada Experimental Range.

Hubert L.J., Levin J.R. (1976). A General Statistical Framework for Assessing Categorical Clustering in Free Recall. Psychological Bulletin, 83(6), 1072–1080.

Kaufman, L., and P. J. Rousseeuw. 1990. Finding groups in data: an introduction to cluster analysis. Probability and Mathematical Statistics. Applied Probability and Statistics.

Maechler, M., Rousseeuw, P., Struyf, A., Hubert, M., Hornik, K.(2021). cluster: Cluster Analysis Basics and Extensions. R package version 2.1.2.

McCord, S. E., Brehm, J. R., Burnett, S. H., Dietrich, C., Edwards, B., Metz, L. J., Hernandez Narvaez, M., Pierson, F., Ramirez, K. S., Stauffer, N. G., Webb, N. P., and Tweedie, C. E. (2022). A framework and toolset for standardizing agroecosystem indicators. Ecological Indicators, 144. doi:10.1016/j.ecolind.2022.109511

McCord, S. E., Webb, N. P., Bestelmeyer, B. T., Bonefont, K., Brehm, J. R., Brown, J., Courtright, E. M., Dietrich, C., Duniway, M. C., Edwards, B., Fraser, C. Herrick, J. E., Knight, A. C., Metz, L., Van Zee J. W., and Tweedie, C. (2023). "The Landscape Data Commons: A system for standardizing, accessing, and applying large environmental datasets for agroecosystem research and management." Agricultural & Environmental Letters 8(2).

Milligan G.W. (1980). An Examination of the effect of six types of error perturbation on fifteen clustering algorithms. Psychometrika, 45(3), 325–342.

Milligan G.W. (1981). A Monte Carlo study of thirty internal criterion measures for cluster analysis. Psychometrika, 46(2), 187–199.

Nauman, T. W., and M. C. Duniway. 2020. A hybrid approach for predictive soil property mapping using conventional soil survey data. Soil Science Society of America Journal 84:1170-1194.

Nauman, T. W., Burch, S. S., Humphries, J. T., Knight, A. C., & Duniway, M. C. (2022). A Quantitative Soil-Geomorphic Framework for Developing and Mapping Ecological Site Groups. Rangeland Ecology & Management, 81, 9-33. doi:10.1016/j.rama.2021.11.003

Nusser, S. M., and J. J. Goebel. (1997). The National Resources Inventory: A long-term multi-resource monitoring programme. Environmental and Ecological Statistics 4:181-204.

Oksanen, J., Simpson, G., Blanchet, F., Kindt, R., Legendre, P., Minchin, P., O'Hara, R., Solymos, P., Stevens, M., Szoecs, E., Wagner, H., Barbour, M., Bedward, M., Bolker, B., Borcard, D., Carvalho, G., Chirico, M., De Caceres, M., Durand, S., Evangelista, H., FitzJohn, R., Friendly, M., Furneaux, B., Hannigan, G., Hill, M., Lahti, L., McGlinn, D., Ouellette, M., Ribeiro Cunha, E., Smith, T., Stier, A., Ter Braak, C., Weedon, J. (2022). vegan: Community Ecology Package. R package version 2.6-4, https://CRAN.R-project.org/package=vegan.

PRISM Climate Group. (2020). PRISM climate data. Oregon State University.

R Core Team (2022). R: A language and environment for statistical computing. R Foundation for Statistical Computing, Vienna, Austria. URL https://www.R-project.org/.

Rousseeuw, P. (1987). Silhouettes: A graphical aid to the interpretation and validation of cluster analysis. Journal of Computational and Applied Mathematics, 20, 53–65.

Sarle W.S. (1983). SAS Technical Report A-108, Cubic Clustering Criterion. SAS Institute Inc. Cary, NC.

Soil Survey Staff, Natural Resources Conservation Service, United States Department of Agriculture. Soil Survey Geographic Database (SSURGO). Available online at https://www.nrcs.usda.gov/resources/data-and-reports/soil-survey-geographic-database-ssurgo. Accessed 2022.

Toevs, G. R., Karl, J. W., Taylor, J. J., Spurrier, C. S., Karl, M. S., Bobo, M. R., and Herrick, J. E. (2011). Consistent indicators and methods and a scalable sample design to meet assessment, inventory, and monitoring information needs across scales. Rangelands 33:14-20.

Trabucco, A., and R. J. Zomer. (2018). Global aridity index and potential evapotranspiration (ET0) climate database v2. Available at: the CGIAR-CSI GeoPortal at https://cgiarcsi.community Accessed 9/28/2020.

USDA, NRCS. (2024). The PLANTS Database (http://plants.usda.gov, 03/28/2024). National Plant Data Team, Greensboro, NC USA.

Witwicki, D., H. Thomas, R. Weissinger, A. Wight, S. Topp, S. L. Garman, and M. Miller. (2017). Upland vegetation and soils monitoring protocol for park units in the Northern Colorado Plateau Network.

# # Reference sheet
# 
# [Interpreting Indicators of Rangeland Health](https://wiki.landscapetoolbox.org/doku.php/field_methods:rangeland_health_assessment_i.e._indicators_of_rangeland_health) is a qualitative assessment protocol used to determine ecosystem condition based on benchmark characteristics described in the Reference Sheet. A suite of 17 (or more) indicators are typically considered in an assessment. The ecological site(s) representative of an assessment location must be known prior to applying the protocol and must be verified based on soils and climate. Current plant community cannot be used to identify the ecological site.
# 
# ```r
# # TODO create table with author, author contact info, date, approver, approved date, and "Composition (Indicators 10 and 12) based on" info for the IIRH reference sheet
# 
# # knitr::kable(x = dom_plant_spp_tab)
# 
# 
# ## Indicators
# 
# TODO fill in the 17 IIRH indicators for the reference state if available
# 
# TODO link to PDF of the reference sheet
# 
# # Print options
# 
# TODO look into options for printing from the RMarkdown HTML format
# # save data versions
# ard_folder <- "C:/Users/aknight/Documents/Telework_Backups/V_drive/ANNA_KNIGHT/ESG/STM/Data/Plot_data_used_Duniway_et_al_2024/Analysis_ready_data"
# 
# write.csv(x = plot_data_first,
#           file = file.path(ard_folder, "contains_NRI_do_not_sync", paste0("plot_data_first_", Sys.Date(), ".csv")),
#           row.names = F)
# 
# write.csv(x = plot_data_first_tall,
#           file = file.path(ard_folder, "contains_NRI_do_not_sync", paste0("plot_data_first_tall_", Sys.Date(), ".csv")),
#           row.names = F)
# 
# write.csv(x = plot_data_descriptive,
#           file = file.path(ard_folder, "contains_NRI_do_not_sync", paste0("plot_data_descriptive_", Sys.Date(), ".csv")),
#           row.names = F)
# 
# # save state species lists
# write.csv(x = dom_spp_grp2,
#           file = file.path(output_figure_folder, "Tables",
#                            paste0(target_ESG, "_Shrubland_species.csv")),
#           row.names = F)
# 
# write.csv(x = dom_spp_grp1,
#           file = file.path(output_figure_folder, "Tables",
#                            paste0(target_ESG, "_Woodland_species.csv")),
#           row.names = F)
# 
# write.csv(x = dom_spp_grp4,
#           file = file.path(output_figure_folder, "Tables",
#                            paste0(target_ESG, "_Grassland_species.csv")),
#           row.names = F)
# 
# write.csv(x = dom_spp_grp3,
#           file = file.path(output_figure_folder, "Tables",
#                            paste0(target_ESG, "_Invaded_species.csv")),
#           row.names = F)
# 1. plant functional group join plot with ordination
# 2. lattice of abiotic gradients that distinguish the states
# aero
# rhem if available
# soil stability
# climate?
# 3. NPP
# 4. biodiversity
# 5 remote sensing variance
# 6. Functional type cover grouped by functional type and colored by state
# 7. Disturbance and land use
# MTBS
# Grazing?
# PJ treatments
# SWreGAP protection classes

# 0. Set up labels and palettes. Optionally, consider excluding plots with lower group membership values, too.

#group_labels_short <- c("Grassland", "Perennial grass loss", "Shrubland", "Mesic shrubland", "Invaded")
plot_data_descriptive$PlantCommunity_fuzzy_reordered <- factor(plot_data_descriptive$PlantCommunity_fuzzy,
                                                               levels = c("1", "2", "3", "4"),
                                                               labels = c("Open woodland", "Shrubland", "Invaded",
                                                                          "Grassland"),
                                                               ordered = T)
pal_veg2_reordered <- c(pal_veg2[1], pal_veg2[2], pal_veg2[3], pal_veg2[4])

plot_data_descriptive <- left_join(plot_data_descriptive, select(fuzzy_cluster_membership,
                                                                 PlotCode,
                                                                 Group_1_membership,
                                                                 Group_2_membership,
                                                                 Group_3_membership,
                                                                 Group_4_membership)) %>%
  mutate(Best_group_membership = ifelse(PlantCommunity_fuzzy==1,
                                        yes = Group_1_membership,
                                        no = ifelse(PlantCommunity_fuzzy==2,
                                                    yes = Group_2_membership,
                                                    no = ifelse(PlantCommunity_fuzzy==3,
                                                                yes = Group_3_membership,
                                                                no = ifelse(PlantCommunity_fuzzy==4,
                                                                            yes = Group_4_membership,
                                                                            no = NA)))))

hist(filter(plot_data_descriptive, PlantCommunity_fuzzy==1)$Group_1_membership)
hist(filter(plot_data_descriptive, PlantCommunity_fuzzy==2)$Group_2_membership)
hist(filter(plot_data_descriptive, PlantCommunity_fuzzy==3)$Group_3_membership)
hist(filter(plot_data_descriptive, PlantCommunity_fuzzy==4)$Group_4_membership)
hist(plot_data_descriptive$Best_group_membership)
quantile(x=filter(plot_data_descriptive, PlantCommunity_fuzzy==1)$Group_1_membership, probs = c(0.1, 0.25, 0.5))
quantile(x=filter(plot_data_descriptive, PlantCommunity_fuzzy==2)$Group_2_membership, probs = c(0.1, 0.25, 0.5))
quantile(x=filter(plot_data_descriptive, PlantCommunity_fuzzy==3)$Group_3_membership, probs = c(0.1, 0.25, 0.5))
quantile(x=filter(plot_data_descriptive, PlantCommunity_fuzzy==4)$Group_4_membership, probs = c(0.1, 0.25, 0.5))
quantile(x=plot_data_descriptive$Best_group_membership, probs = c(0.1, 0.25, 0.5))

# Trying out excluding plots with less than 0.6 membership value in their best group.
# Note that this excludes a greater proportion of plots from the woodland
# than from the other clusters

# plot_data_descriptive_memb <- filter(plot_data_descriptive, Best_group_membership >= 0.6)
# 
# nrow(plot_data_descriptive) - nrow(plot_data_descriptive_memb)
# nrow(filter(plot_data_descriptive, PlantCommunity_fuzzy==1)) - nrow(filter(plot_data_descriptive_memb, PlantCommunity_fuzzy==1))
# nrow(filter(plot_data_descriptive, PlantCommunity_fuzzy==2)) - nrow(filter(plot_data_descriptive_memb, PlantCommunity_fuzzy==2))
# nrow(filter(plot_data_descriptive, PlantCommunity_fuzzy==3)) - nrow(filter(plot_data_descriptive_memb, PlantCommunity_fuzzy==3))
# nrow(filter(plot_data_descriptive, PlantCommunity_fuzzy==4)) - nrow(filter(plot_data_descriptive_memb, PlantCommunity_fuzzy==4))

# 1. plant functional group join plot with ordination
species_scores <- as.data.frame(scores(ord, choices = 1:3, display = "species")) # get axis scores for each response type
plot_scores <- as.data.frame(scores(ord, choices = 1:3, display = "sites")) # get the axis scores for each site
plot_scores$PlotCode <- rownames(plot_scores)
plot_scores <- left_join(plot_scores, select(plot_data_descriptive, PlotCode, PlantCommunity_fuzzy, PlantCommunity_fuzzy_reordered))

# this bit gets the arrow length and direction info
ord.fit <- envfit(ord ~ AH_PerenForbGrassCover + 
                    AH_AnnForbGrassCover +
                    AH_ShrubCover +
                    AH_TreeCover +
                    TotalFoliarCover,
                  data = plot_data_descriptive,
                  na.rm=T,
                  choices=1:3)

vect_functypes <- as.data.frame(ord.fit$vectors$arrows*sqrt(ord.fit$vectors$r))
vect_functypes$var <- rownames(vect_functypes)
vect_functypes$r <- ord.fit$vectors$r

vect_functypes$varnames <- c("Perennial herbaceous",
                             "Annual herbaceous",
                             "Shrub",
                             "Tree",
                             "Total foliar")

# # since I have 3 ordination axes, I'm filtering just to axis 1 and 2 for this plot
vect_functypes_12 <- filter(vect_functypes, abs(NMDS3)<abs(NMDS1)|abs(NMDS3)<abs(NMDS2))
vect_functypes_32 <- filter(vect_functypes, abs(NMDS1)<abs(NMDS3)|abs(NMDS1)<abs(NMDS2))
vect_functypes_13 <- filter(vect_functypes, abs(NMDS2)<abs(NMDS1)|abs(NMDS2)<abs(NMDS3))

# make pretty plot ggplot!
ord_pretty_plot <- function(points = plot_scores,
                            vectors = vect_functypes,
                            x_axis = "NMDS1",
                            y_axis = "NMDS2",
                            palette = pal_veg2_reordered,
                            #group_labels = group_labels_short,
                            labels = T){
  ord_plot <- ggplot(data = points, aes_string(x=x_axis, y=y_axis)) +
    geom_point(data = points, aes_string(x=x_axis, y=y_axis, color="PlantCommunity_fuzzy_reordered"), size=.5) +
    stat_ellipse(data=points, aes_string(x=x_axis, y=y_axis, color="PlantCommunity_fuzzy_reordered"),
                 level = 0.5, lwd = 1) +
    scale_color_manual(values =palette, name="Cluster state"#, # set custom colors for ellipses
                       #labels=group_labels
                       ) +
    coord_fixed() +
    lims(x=c(-1.25,1.25), y=c(-1.25,1.25)) +
    theme_bw() +
    theme(axis.text = element_text(size = 7, color = "black"),
          axis.title = element_text(size = 8),
          legend.text = element_text(size = 7),
          legend.title = element_text(size = 8),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank())
  if(!is.null(vectors)){
    ord_plot <- ord_plot + geom_segment(data = vectors, # make the arrows with environmental vars
                 aes_string(x="0", xend=x_axis, y="0", yend=y_axis),
                 arrow = arrow(length = unit(0.25, "cm")), lwd=1, colour="black", inherit.aes = F)

     if(labels){
    ord_plot <- ord_plot + ggrepel::geom_label_repel(data = vectors, # put labels on the arrows
                              aes_string(x=x_axis, y=y_axis, label="varnames"), size=3, colour="black")
     } 
  }

  return(ord_plot)
}

ord_plot12 <- ord_pretty_plot(x_axis = "NMDS1", y_axis = "NMDS2", vectors = vect_functypes_12)
ord_plot32 <- ord_pretty_plot(x_axis = "NMDS3", y_axis = "NMDS2", vectors = vect_functypes_32)
ord_plot13 <- ord_pretty_plot(x_axis = "NMDS1", y_axis = "NMDS3", vectors = vect_functypes_13)
ord_plot_legend <- cowplot::get_legend(ord_plot12)

ord_plot_all <- cowplot::plot_grid(ord_plot12 + theme(legend.position = "none"), ord_plot32 + theme(legend.position = "none"),
                   ord_plot13 + theme(legend.position = "none"), ord_plot_legend,
                   nrow = 2, labels = c("a", "b", "c"))

today <- Sys.Date()
highrestiff(plot_obj = ord_plot_all,
            file = paste0(output_figure_folder, "/Ordination/SW_SandyLoamyUplands_NMDS_ellipses_functypes_", today, ".tif"),
            width_in = 6,
            height_in = 6,
            resolution_dpi = 300)

ord_plot12_nolabels <- ord_pretty_plot(x_axis = "NMDS1", y_axis = "NMDS2", vectors = vect_functypes_12, labels = F)
ord_plot32_nolabels <- ord_pretty_plot(x_axis = "NMDS3", y_axis = "NMDS2", vectors = vect_functypes_32, labels = F)
ord_plot13_nolabels <- ord_pretty_plot(x_axis = "NMDS1", y_axis = "NMDS3", vectors = vect_functypes_13, labels = F)
ord_plot_legend_nolabels <- cowplot::get_legend(ord_plot12_nolabels)

ord_plot_all_nolabels <- cowplot::plot_grid(ord_plot12_nolabels + theme(legend.position = "none"), ord_plot32_nolabels + theme(legend.position = "none"),
                   ord_plot13_nolabels + theme(legend.position = "none"), ord_plot_legend_nolabels,
                   nrow = 2, labels = c("a", "b", "c"))

today <- Sys.Date()
highrestiff(plot_obj = ord_plot_all_nolabels,
            file = paste0(output_figure_folder, "/Ordination/SW_SandyLoamyUplands_NMDS_ellipses_functypes_nolabels_", today, ".tif"),
            width_in = 6,
            height_in = 6,
            resolution_dpi = 300)

# Duniway et al. 2023 figure 2
ord_plot12_noarrows <- ord_pretty_plot(x_axis = "NMDS1", y_axis = "NMDS2", vectors = NULL, labels = F)
ord_plot32_noarrows <- ord_pretty_plot(x_axis = "NMDS3", y_axis = "NMDS2", vectors = NULL, labels = F)
ord_plot13_noarrows <- ord_pretty_plot(x_axis = "NMDS1", y_axis = "NMDS3", vectors = NULL, labels = F)
ord_plot_legend_noarrows <- cowplot::get_legend(ord_plot12_noarrows)

ord_plot_all_noarrows <- cowplot::plot_grid(ord_plot12_noarrows + theme(legend.position = "none"), ord_plot32_noarrows + theme(legend.position = "none"),
                   ord_plot13_noarrows + theme(legend.position = "none"), ord_plot_legend_noarrows,
                   nrow = 2, labels = c("a", "b", "c"))
ord_plot_all_noarrows

today <- Sys.Date()
highrestiff(plot_obj = ord_plot_all_noarrows,
            file = paste0(output_figure_folder, "/Ordination/SW_SandyLoamyUplands_NMDS_ellipses_functypes_noarrows_", today, ".tif"),
            width_in = 7.48,
            height_in = 7.48,
            resolution_dpi = 500)

# NMDS axis gradients, Duniway et al 2023 figure 3 
NMDS_gradient_data_tall <- left_join(plot_data_descriptive, plot_scores) %>% 
  select(PlotCode, 
         NMDS1, NMDS2, NMDS3,
         BareSoilCover, AH_ShrubCover, AH_SubShrubCover, AH_PerenForbGrassCover,
         AH_AnnForbGrassCover, AH_TreeCover, TotalFoliarCover
         ) %>% 
  tidyr::pivot_longer(cols = c(BareSoilCover, AH_ShrubCover, AH_SubShrubCover, 
                               AH_PerenForbGrassCover, AH_AnnForbGrassCover, 
                               AH_TreeCover, TotalFoliarCover),
                      names_to = "CoverType",
                      values_to = "PctCover"
         )

ord.fit.test <- envfit(ord ~ AH_PerenForbGrassCover + 
                    AH_AnnForbGrassCover +
                      AH_SubShrubCover +
                    AH_ShrubCover +
                    AH_TreeCover +
                    TotalFoliarCover +
                      BareSoilCover,
                  data = plot_data_descriptive,
                  na.rm=T,
                  choices=1:3)

vect_functypes_test <- as.data.frame(ord.fit.test$vectors$arrows*sqrt(ord.fit.test$vectors$r))
vect_functypes_test$var <- rownames(vect_functypes_test)
vect_functypes_test$r <- ord.fit.test$vectors$r
#View(vect_functypes_test)

NMDS1_gradients_plot <- ggplot(data = filter(NMDS_gradient_data_tall,
                                             CoverType %in% c("TotalFoliarCover", "AH_ShrubCover", "AH_PerenForbGrassCover")),
                               mapping = aes(x=NMDS1, y=PctCover, color=CoverType, fill = CoverType)) +
    geom_point(alpha = 0.5, size=.5) +
    geom_smooth(method = "gam", alpha = 0.2, lwd = 1.2) + # linear model, lm, might work fine here
    ylab("Cover (%)") +
    ylim(c(0, 100)) +
    scale_color_manual(name = "Veg.", 
                         labels = c("Peren. herb.", "Shrub", "Total foliar"),
                         guide = "legend",
                       values = c("#88cdee", "#127733", "#cc6576")) +
    scale_fill_manual(name = "Veg.", 
                        labels = c("Peren. herb.", "Shrub", "Total foliar"),
                        guide = "legend",
                       values = c("#88cdee", "#127733", "#cc6576")) +
    #xlim(c(-1, 1)) +
  guides(color = guide_legend(nrow=3, byrow=T)) +
    theme_bw() +
    theme(axis.text = element_text(size = 7, color = "black"),
          axis.title = element_text(size = 8, color = "black"),
          legend.position = "top",
          legend.title = element_text(size = 8, color = "black"),
          legend.text = element_text(size = 8, color = "black"),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank())
NMDS1_gradients_plot

NMDS2_gradients_plot <- ggplot(data = filter(NMDS_gradient_data_tall,
                                             CoverType %in% c("AH_AnnForbGrassCover", "AH_TreeCover")),
                               mapping = aes(x=NMDS2, y=PctCover, color=CoverType, fill = CoverType)) +
    geom_point(alpha = 0.5, size=.5) +
    geom_smooth(method = "gam", alpha = 0.2, lwd = 1.2) +
    ylab("Cover (%)") +
    ylim(c(0, 100)) +
    scale_color_manual(name = "Veg.", 
                         labels = c("Ann. herb.", "Tree"),
                         guide = "legend",
                       values = c("#88cdee", "#cc6576")) +
    scale_fill_manual(name = "Veg.", 
                        labels = c("Ann. herb.", "Tree"),
                        guide = "legend",
                       values = c("#88cdee", "#cc6576")) +
    #xlim(c(-1, 1)) +
  guides(color = guide_legend(nrow=2, byrow=T)) +
    theme_bw() +
    theme(axis.text = element_text(size = 7, color = "black"),
          axis.title = element_text(size = 8, color = "black"),
          legend.position = "top",
          legend.title = element_text(size = 8, color = "black"),
          legend.text = element_text(size = 8, color = "black"),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank())
NMDS2_gradients_plot

NMDS3_gradients_plot <- ggplot(data = filter(NMDS_gradient_data_tall,
                                             CoverType %in% c("AH_AnnForbGrassCover", "AH_PerenForbGrassCover")),
                               mapping = aes(x=NMDS3, y=PctCover, color=CoverType, fill = CoverType)) +
    geom_point(alpha = 0.5, size=.5) +
    geom_smooth(method = "gam", alpha = 0.2, lwd = 1.2) +
    ylab("Cover (%)") +
    ylim(c(0, 100)) +
    scale_color_manual(name = "Veg.", 
                         labels = c("Ann. herb.", "Peren. herb."),
                         guide = "legend",
                       values = c("#88cdee", "#cc6576")) +
    scale_fill_manual(name = "Veg.", 
                        labels = c("Ann. herb.", "Peren. herb."),
                        guide = "legend",
                       values = c("#88cdee", "#cc6576")) +
    #xlim(c(-1, 1)) +
  guides(color = guide_legend(nrow=2, byrow=T)) +
    theme_bw() +
    theme(axis.text = element_text(size = 7, color = "black"),
          axis.title = element_text(size = 8, color = "black"),
          legend.position = "top",
          legend.title = element_text(size = 8, color = "black"),
          legend.text = element_text(size = 8, color = "black"),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank())
NMDS3_gradients_plot

NMDS_gradients_plots <- cowplot::plot_grid(
  cowplot::get_legend(NMDS1_gradients_plot), cowplot::get_legend(NMDS2_gradients_plot), cowplot::get_legend(NMDS3_gradients_plot),
  NMDS1_gradients_plot + theme(legend.position = "none"), NMDS2_gradients_plot + theme(legend.position = "none"), NMDS3_gradients_plot + theme(legend.position = "none"),
  nrow = 2, labels = c("a", "b", "c", "", "", ""), rel_heights = c(0.3, 1))
NMDS_gradients_plots

today <- Sys.Date()
highrestiff(plot_obj = NMDS_gradients_plots,
            file = paste0(output_figure_folder, "/Ordination/SW_SandyLoamyUplands_NMDSgradients_GAM_", today, ".tif"),
            #width_in = 3,
            width_in = 7.48,
            #height_in = 6.5,
            height_in = 3.5,
            resolution_dpi = 500)


# 2. lattice of abiotic gradients that distinguish the states
violin_pretty_plot <- function(dat = plot_data_descriptive,
                               y_var = "horizontal_flux_total_MN",
                               y_lab = expression(Sediment~mass~flux~(Q)~g~m^{-1}~day^{-1}),
                               fill_var = NULL,
                               fill_name = "Plant community",
                               scaling = "area"){
    if(!is.null(fill_var)){
    violin_plot <- ggplot(data = dat,
       aes_string(x="PlantCommunity_fuzzy_reordered", y=y_var, fill = fill_var)) +
      scale_fill_manual(values =pal_veg2_reordered, name= fill_name, # set custom colors for ellipses
                       #labels=group_labels_short
                      ) 
    }else{
   violin_plot <- ggplot(data = dat,
       aes_string(x="PlantCommunity_fuzzy_reordered", y=y_var))
    }

  violin_plot <- violin_plot +
  geom_violin(scale = scaling, trim = F, na.rm = T) +
  geom_boxplot(width=0.1) +
  #scale_x_discrete(labels=group_labels_short) +
  xlab(NULL) +
  ylab(y_lab) +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 10, color = "black"),
        axis.text.y = element_text(size = 10, color = "black"),
          axis.title = element_text(size = 12),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
        legend.position = "none") 

  return(violin_plot)
}

box_pretty_plot <- function(dat = plot_data_descriptive,
                               y_var = "horizontal_flux_total_MN",
                               y_lab = expression(Sediment~mass~flux~(Q)~g~m^{-1}~day^{-1}),
                            fill_var = "PlantCommunity_fuzzy_reordered",
                            palette = pal_veg2_reordered,
                            include_plot_count = T){
  if(include_plot_count){
   group_labels <- paste0(as.character(levels(dat$PlantCommunity_fuzzy_reordered)),
                               "
(n = ",
                               c(nrow(dat[as.integer(dat$PlantCommunity_fuzzy_reordered)==1 & !is.na(dat[[y_var]]), ]),
                                 nrow(dat[as.integer(dat$PlantCommunity_fuzzy_reordered)==2 & !is.na(dat[[y_var]]), ]),
                                 nrow(dat[as.integer(dat$PlantCommunity_fuzzy_reordered)==3 & !is.na(dat[[y_var]]), ]),
                                 nrow(dat[as.integer(dat$PlantCommunity_fuzzy_reordered)==4 & !is.na(dat[[y_var]]), ])
                                  ),
                               ")") 
  }else{
    group_labels <- as.character(levels(dat$PlantCommunity_fuzzy_reordered))
  }
  box_plot <- ggplot(data = dat,
       aes_string(x="PlantCommunity_fuzzy_reordered", y=y_var, fill = fill_var)) +
    geom_boxplot() +
    scale_x_discrete(labels=group_labels) +
    scale_fill_manual(values =palette, name="Cluster state", # set custom colors for ellipses
                       #labels=group_labels_short
                      ) +
  xlab(NULL) +
  ylab(y_lab) +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 7, color = "black"),
        axis.text.y = element_text(size = 7, color = "black"),
          axis.title = element_text(size = 8, color = "black"),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
        legend.position = "none")

  return(box_plot)
}

rain_pretty_plot <- function(dat = plot_data_descriptive,
                               y_var = "pdsi1y",
                               y_lab = "% of PDSI observations <-4
for 1 yr. prior to field samling",
                            fill_var = "PlantCommunity_fuzzy_reordered",
                            palette = pal_veg2_reordered){
  group_labels <- paste0(as.character(levels(dat$PlantCommunity_fuzzy_reordered)),
                               "
(n = ",
                               c(nrow(dat[as.integer(dat$PlantCommunity_fuzzy_reordered)==1 & !is.na(dat[[y_var]]), ]),
                                 nrow(dat[as.integer(dat$PlantCommunity_fuzzy_reordered)==2 & !is.na(dat[[y_var]]), ]),
                                 nrow(dat[as.integer(dat$PlantCommunity_fuzzy_reordered)==3 & !is.na(dat[[y_var]]), ]),
                                 nrow(dat[as.integer(dat$PlantCommunity_fuzzy_reordered)==4 & !is.na(dat[[y_var]]), ])
                                  ),
                               ")")

  rain_plot <- ggplot(data = dat,
       aes_string(x="PlantCommunity_fuzzy_reordered", y=y_var, fill = fill_var, color = fill_var)) +
  ggdist::stat_halfeye(#aes(fill= fill_var),
    adjust = .5, 
    width = .6, 
    .width = 0, 
    justification = -.2, 
    point_colour = NA
  ) + 
  geom_point(color="black",
    ## draw horizontal lines instead of points
    shape = 95,
    size = 5,
    alpha = .2
  ) +
  geom_boxplot(#aes(color = fill_var),
    width = .15, 
    outlier.shape = NA,
    fill = "transparent",
    lwd = 1,
    fatten = 2
  ) +
  scale_color_manual(values =palette, name="Plant community",
                     aesthetics = c("color", "fill")
                     ) +
  coord_cartesian(xlim = c(1.2, NA), clip = "off") +
  scale_x_discrete(labels=group_labels) +
  xlab(NULL) +
  ylab(y_lab) +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 10, color = "black"),
        axis.text.y = element_text(size = 10, color = "black"),
          axis.title = element_text(size = 12),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
        legend.position = "none")

  return(rain_plot)
}

# some numbers on which plots and states have AERO and RHEM data available - not even across states!
erosion_data_availability <- plot_data_descriptive %>% 
  group_by(PlantCommunity_fuzzy_reordered) %>% 
  summarise(n = n(),
            n_NCPN = length(which(SourceKey=="IM_NCPN")),
            prp_NCPN = length(which(SourceKey=="IM_NCPN"))/n(),
            n_noAERO = length(which(is.na(horizontal_flux_total_MN))),
            prp_noAERO = length(which(is.na(horizontal_flux_total_MN)))/n(),
            n_noRHEM = length(which(is.na(Soil_Loss_Long_Term_MEAN))),
            prp_noRHEM = length(which(is.na(Soil_Loss_Long_Term_MEAN)))/n())

# today <- Sys.Date()
# write.csv(x = erosion_data_availability,
#           file = paste0(output_figure_folder, "/SW_SandyLoamyUplands_erosion_data_availability_", today, ".csv"),
#           row.names = F)

# aero, Duniway et al 2023 figure 7b
aero_plot <- box_pretty_plot(dat = filter(plot_data_descriptive, !is.na(horizontal_flux_total_MN)),
                             y_var = "horizontal_flux_total_MN",
                             #y_lab = expression(Sediment~mass~flux~(Q)~g~m^{-1}~day^{-1}),
                             include_plot_count = F,
                            y_lab = "Sediment mass
flux (Q) g/m/day"#,
                             #scaling = "width"
) + 
  scale_y_log10(breaks = scales::trans_breaks("log10", function(x) 10^x),
                labels = scales::trans_format("log10", scales::label_math(10^.x))) #+ 
#annotation_logticks(sides = "l")
aero_plot

# soil stability, Duniway et al 2023 figure 7c
soilstab_plot <- box_pretty_plot(dat = plot_data_descriptive,
                                 y_var = "SoilStab_all",
                                 y_lab = "Soil stability
index",
                                 include_plot_count = F)
soilstab_plot

# climate?
AI_plot <- box_pretty_plot(dat = plot_data_descriptive,
                              y_var = "AI",
                              y_lab = "Aridity index",
                           include_plot_count = F)
AI_plot

ppt_plot <- box_pretty_plot(dat = plot_data_descriptive,
                               y_var = "Ann_ppt_mm",
                               y_lab = "Mean ann. ppt. (mm)",
                            include_plot_count = F)
ppt_plot

tmean_plot <- box_pretty_plot(dat = plot_data_descriptive,
                               y_var = "Ann_tmean",
                               y_lab = "Mean ann. temp. (C)",
                              include_plot_count = F)
tmean_plot

# rhem
# Duniway et al 2023 figure 7d
rhem_plot_runoff <- box_pretty_plot(dat = plot_data_descriptive,
                                    y_var = "Runoff_Long_Term_MEAN",
                                    #y_lab = expression(Mean~runoff~mm~yr^{-1}),
                                    y_lab = "Mean runoff
(mm/yr)",
                                    include_plot_count = F) + 
  scale_y_log10(breaks = scales::trans_breaks("log10", function(x) 10^x),
                labels = scales::trans_format("log10", scales::label_math(10^.x)))
rhem_plot_runoff

# Duniway et al 2023 figure 7e
rhem_plot_soilloss <- box_pretty_plot(dat = plot_data_descriptive,
                                    y_var = "Soil_Loss_Long_Term_MEAN",
                                    #y_lab = expression(Mean~soil~loss~tons~ha^{-1}~yr^{-1}),
                                    y_lab = "Mean soil loss
(tons/ha/yr)",
                                    include_plot_count = F) + 
  scale_y_log10(breaks = scales::trans_breaks("log10", function(x) 10^x),
                labels = scales::trans_format("log10", scales::label_math(10^.x)))
rhem_plot_soilloss

# put them all together
abiotic_plots <- cowplot::plot_grid(aero_plot,
                                    soilstab_plot,
                                    rhem_plot_runoff,
                                    rhem_plot_soilloss,
                                    ppt_plot, tmean_plot,
                                    nrow = 3, #rel_heights = c(0.6, 1),
                                    align = "v",
                                    labels = "auto")
abiotic_plots

today <- Sys.Date()
highrestiff(plot_obj = abiotic_plots,
            file = paste0(output_figure_folder, "/Boxplot/SW_SandyLoamyUplands_abiotic_boxplot_2_", today, ".tif"),
            width_in = 6,
            height_in = 9,
            resolution_dpi = 300)

# put them in a different layout because we love drafts
climate_plots <- cowplot::plot_grid(ppt_plot, tmean_plot, AI_plot,
                                    nrow = 1, 
                                    labels = "auto")
climate_plots

highrestiff(plot_obj = climate_plots,
            file = paste0(output_figure_folder, "/Boxplot/SW_SandyLoamyUplands_climate_boxplot_", today, ".tif"),
            width_in = 9,
            height_in = 3.5,
            resolution_dpi = 300)

soils_plots <- cowplot::plot_grid(aero_plot, soilstab_plot,
                                  rhem_plot_runoff, rhem_plot_soilloss,
                                    nrow = 2, 
                                    labels = "auto")
soils_plots

highrestiff(plot_obj = soils_plots,
            file = paste0(output_figure_folder, "/Boxplot/SW_SandyLoamyUplands_soils_boxplot_", today, ".tif"),
            width_in = 6,
            height_in = 6,
            resolution_dpi = 300)

# some extra RHEM plots for different storm event sizes
rhem_plot_runoff_2 <- box_pretty_plot(dat = plot_data_descriptive,
                                    y_var = "Runoff_Yearly_Max_Daily_2_Yr",
                                    y_lab = "2 yr. storm max.
daily runoff (mm)")
rhem_plot_runoff_2

rhem_plot_runoff_10 <- box_pretty_plot(dat = plot_data_descriptive,
                                    y_var = "Runoff_Yearly_Max_Daily_10_Yr",
                                    y_lab = "10 yr. storm max.
daily runoff (mm)")
rhem_plot_runoff_10

# Duniway et al 2023 figure 7f
rhem_plot_runoff_50 <- box_pretty_plot(dat = plot_data_descriptive,
                                       include_plot_count = F,
                                    y_var = "Runoff_Yearly_Max_Daily_50_Yr",
                                    y_lab = "50 yr storm max
daily runoff (mm)") + 
  scale_y_log10(breaks = scales::trans_breaks("log10", function(x) 10^x),
                labels = scales::trans_format("log10", scales::label_math(10^.x)))
rhem_plot_runoff_50

rhem_plot_runoff_100 <- box_pretty_plot(dat = plot_data_descriptive,
                                    y_var = "Runoff_Yearly_Max_Daily_100_Yr",
                                    y_lab = "100 yr. storm max.
daily runoff (mm)")
rhem_plot_runoff_100

rhem_plot_soilloss_2 <- box_pretty_plot(dat = plot_data_descriptive,
                                    y_var = "Soil_Loss_Yearly_Max_Daily_2_Yr",
                                    y_lab = "2 yr. storm max.
daily soil loss (tons/ha)")
rhem_plot_soilloss_2

rhem_plot_soilloss_10 <- box_pretty_plot(dat = plot_data_descriptive,
                                    y_var = "Soil_Loss_Yearly_Max_Daily_10_Yr",
                                    y_lab = "10 yr. storm max.
daily soil loss (tons/ha)")
rhem_plot_soilloss_10

# Duniway et al 2023 figure 7g
rhem_plot_soilloss_50 <- box_pretty_plot(dat = plot_data_descriptive,
                                         include_plot_count = F,
                                    y_var = "Soil_Loss_Yearly_Max_Daily_50_Yr",
                                    #y_lab = expression(50~yr.~storm~max.~daily~soil~loss~tons~ha^{-1}),
                                    y_lab = "50 yr storm
max daily soil
loss (tons/ha)"
                                    ) + 
  scale_y_log10(breaks = scales::trans_breaks("log10", function(x) 10^x),
                labels = scales::trans_format("log10", scales::label_math(10^.x)))
rhem_plot_soilloss_50

rhem_plot_soilloss_100 <- box_pretty_plot(dat = plot_data_descriptive,
                                    y_var = "Soil_Loss_Yearly_Max_Daily_100_Yr",
                                    y_lab = "100 yr. storm max.
daily soil loss (tons/ha)")
rhem_plot_soilloss_100

rhem_event_plots <- cowplot::plot_grid(rhem_plot_runoff_2, rhem_plot_soilloss_2,
                                       rhem_plot_runoff_10, rhem_plot_soilloss_10,
                                       rhem_plot_runoff_50, rhem_plot_soilloss_50,
                                       rhem_plot_runoff_100, rhem_plot_soilloss_100,
                                    ncol = 2, 
                                    labels = "auto")
rhem_event_plots

today <- Sys.Date()
highrestiff(plot_obj = rhem_event_plots,
            file = paste0(output_figure_folder, "/Boxplot/SW_SandyLoamyUplands_rhem_events_", today, ".tif"),
            width_in = 6,
            height_in = 9,
            resolution_dpi = 300)

# ground cover, Duniway et al. 2023 figure 7a
#group_labels_short <- c("Grassland", "Perennial grass loss", "Shrubland", "Mesic shrubland", "Invaded")
plot_data_first_tall$PlantCommunity_fuzzy_reordered <- factor(plot_data_first_tall$PlantCommunity_fuzzy,
                                                               levels = c("1", "2", "3", "4"),
                                                               labels = c("Open woodland", "Shrubland", "Invaded",
                                                                          "Grassland"),
                                                               ordered = T)
ground_grps <- c("BareSoilCover", "FH_TotalLitterCover", "CA_percent_200plus",
                 "FH_LichenMossCover", "CA_percent_100plus"
                 )
ground_cover_states_pub <- ggplot(data = filter(plot_data_first_tall, CoverType %in% ground_grps),
                              aes(x=CoverType, y=PctCover, fill=PlantCommunity_fuzzy_reordered)) +
  geom_boxplot() +
  scale_fill_manual(values = pal_veg2_reordered,
                    name = "Cluster state"
                    ) +
  scale_x_discrete(labels=c("Bare soil",
                            "Canopy gaps
> 100 cm",
                            "Canopy gaps
> 200 cm",
                            "Lichens
& mosses",                
                            "Litter")) +
  xlab(NULL) +
  ylab("Cover (%)") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 7, color = "black"),
        axis.text.y = element_text(size = 7, color = "black"),
          axis.title = element_text(size = 8, color = "black"),
        legend.text = element_text(size = 7, color = "black"),
        legend.title = element_text(size = 8, color = "black"),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank())
ground_cover_states_pub

# Duniway et al. figure 7 erosion compiled
erosion_rows2_4_plots <- cowplot::plot_grid(aero_plot, soilstab_plot,
                                            rhem_plot_runoff, rhem_plot_soilloss,
                                            rhem_plot_runoff_50, rhem_plot_soilloss_50,
                                            ncol=2, labels = c("b", "c",
                                                               "d", "e",
                                                               "f", "g"))
erosion_rows2_4_plots

erosion_all_plots <- cowplot::plot_grid(ground_cover_states_pub,
                                        erosion_rows2_4_plots,
                                        nrow = 2, rel_heights = c(.4, 1),
                                        labels = c("a", ""))
erosion_all_plots

today <- Sys.Date()
highrestiff(plot_obj = erosion_all_plots,
            file = paste0(output_figure_folder, "/Boxplot/SW_SandyLoamyUplands_erosion_panels_", today, ".tif"),
            width_in = 5.51,
            height_in = 8,
            resolution_dpi = 500)

# 3. NPP
# NPP data is from the RAP 3.0 partitioned NPP data set (Robinson et al., 2019). I downloaded the predicted NPP values within a 35-m radius of the AIM, LMF, NRI, and NCPN plot centers using a Google Earth Engine script. If we continue using NPP for other ESGs, we will probably need to continue downloading point values for RAP products through GEE - the raster file sizes are enormous!

# Robinson, N.P., M.O. Jones, A. Moreno, T.A. Erickson, D.E. Naugle, and B.W. Allred. 2019. Rangeland productivity partitioned to sub-pixel plant functional types. Remote Sensing 11:1427. https://dx.doi.org/10.3390/rs11121427 

NPP_files <- list.files(paste0("C:/Users/aknight/Documents/Telework_Backups/V_drive/ANNA_KNIGHT/ESG/STM/RAP_do_not_sync/", target_ESG, "/NPP"),
                        full.names = T)
npp <- read.csv(NPP_files[1])
npp$Year <- as.numeric(substr(NPP_files[1], 186, 189))

for(file in NPP_files[-1]){
  npp_temp <- read.csv(file)
  npp_temp$Year <- as.numeric(substr(file, 186, 189))
  npp <- bind_rows(npp, npp_temp)
  rm(npp_temp)
}

npp <- select(npp, PlotCod, Year, afgNPP, pfgNPP, shrNPP, treNPP) %>%
  setNames(c("PlotCode", "Year", "afgNPP", "pfgNPP", "shrNPP", "treNPP"))

plot_data_descriptive <- left_join(plot_data_descriptive, npp)

afg_npp_plot <- box_pretty_plot(dat = plot_data_descriptive, 
                               y_var = "afgNPP",
                               y_lab = "Ann. herb. NPP (g C/m2)")
afg_npp_plot

pfg_npp_plot <- box_pretty_plot(dat = plot_data_descriptive, 
                               y_var = "pfgNPP",
                               y_lab = "Per. herb. NPP (g C/m2)")
pfg_npp_plot

shr_npp_plot <- box_pretty_plot(dat = plot_data_descriptive, 
                               y_var = "shrNPP",
                               y_lab = "Shrub NPP (g C/m2)")
shr_npp_plot

tre_npp_plot <- box_pretty_plot(dat = plot_data_descriptive, 
                               y_var = "treNPP",
                               y_lab = "Tree NPP (g C/m2)")
tre_npp_plot

pft_npp_plots <- cowplot::plot_grid(afg_npp_plot + xlab(NULL) + scale_x_discrete(labels=NULL), pfg_npp_plot + xlab(NULL) + scale_x_discrete(labels=NULL),
                                    shr_npp_plot, tre_npp_plot,
                                    nrow = 2, rel_heights = c(0.6, 1), align = "v",
                                    labels = "auto")
pft_npp_plots

today <- Sys.Date()
highrestiff(plot_obj = pft_npp_plots,
            file = paste0(output_figure_folder, "/Boxplot/SW_SandyLoamyUplands_pftnpp_boxplot_", today, ".tif"),
            width_in = 6,
            height_in = 6.5,
            resolution_dpi = 300)

# combining npp to herbaceous, woody, and total
plot_data_descriptive$herbNPP <- plot_data_descriptive$afgNPP + plot_data_descriptive$pfgNPP
plot_data_descriptive$woodyNPP <- plot_data_descriptive$shrNPP + plot_data_descriptive$treNPP
plot_data_descriptive$totalNPP <- plot_data_descriptive$herbNPP + plot_data_descriptive$woodyNPP

herb_npp_plot <- box_pretty_plot(dat = plot_data_descriptive, 
                               y_var = "herbNPP",
                               y_lab = "Herb. NPP (g C/m2)")
herb_npp_plot

woody_npp_plot <- box_pretty_plot(dat = plot_data_descriptive, 
                               y_var = "woodyNPP",
                               y_lab = "Woody NPP (g C/m2)")
woody_npp_plot

total_npp_plot <- box_pretty_plot(dat = plot_data_descriptive, 
                               y_var = "totalNPP",
                               y_lab = expression(Total~NPP~(g~C~m^{-2})),
                               include_plot_count = F)
total_npp_plot

# median total NPP in open woodland, for inclusion in the text of Duniway et al 2024
plot_data_descriptive %>% group_by(PlantCommunity_fuzzy_reordered) %>% summarise(NPP_summary = median(totalNPP))

gh_npp_plots <- cowplot::plot_grid(herb_npp_plot + xlab(NULL) + scale_x_discrete(labels=NULL),
                                    woody_npp_plot + xlab(NULL) + scale_x_discrete(labels=NULL),
                                   total_npp_plot,
                                    nrow = 3, rel_heights = c(0.6, 0.6, 1), align = "v",
                                   labels = "auto")
gh_npp_plots

today <- Sys.Date()
highrestiff(plot_obj = gh_npp_plots,
            file = paste0(output_figure_folder, "/Boxplot/SW_SandyLoamyUplands_ghnpp_boxplot_", today, ".tif"),
            width_in = 3,
            height_in = 9,
            resolution_dpi = 300)

highrestiff(plot_obj = total_npp_plot,
            file = paste0(output_figure_folder, "/Boxplot/SW_SandyLoamyUplands_totalnpp_boxplot_2_", today, ".tif"),
            width_in = 3,
            height_in = 3.5,
            resolution_dpi = 300)

rm(npp)

# 4. biodiversity
diversity_plot <- box_pretty_plot(dat = plot_data_descriptive,
                                     y_var = "shannon_diversity",
                                     y_lab = "Shannon diversity index")
diversity_plot

richness_plot <- box_pretty_plot(dat = plot_data_descriptive,
                                     y_var = "sp_richness",
                                     y_lab = "Number of species",
                                 include_plot_count = F)
richness_plot

# median richness for Shrubland for inclusion in text of Duniway et al. 2025
plot_data_descriptive %>% group_by(PlantCommunity_fuzzy_reordered) %>% summarise(richness = median(sp_richness))

biodiversity_plots <- cowplot::plot_grid(richness_plot, diversity_plot, total_npp_plot,
                                         #nrow = 2, rel_heights = c(0.6, 1),
                                         nrow = 1,
                                         labels = "auto")
biodiversity_plots

today <- Sys.Date()
highrestiff(plot_obj = biodiversity_plots,
            file = paste0(output_figure_folder, "/Boxplot/SW_SandyLoamyUplands_biodiversity_boxplot_", today, ".tif"),
            #width_in = 3,
            width_in = 9,
            #height_in = 6.5,
            height_in = 3.5,
            resolution_dpi = 300)

native_relative_plot <- box_pretty_plot(dat = plot_data_descriptive,
                                        include_plot_count = F,
                                     y_var = "NativeRelativeCover",
                                     y_lab = "Relative native
plant cover (%)")
native_relative_plot

# median relative native cover for Invaded and Shrubland for inclusion in text of Duniway et al. 2024
plot_data_descriptive %>% group_by(PlantCommunity_fuzzy_reordered) %>% summarise(rel_nat = median(NativeRelativeCover))

biodiversity_plots_2 <- cowplot::plot_grid(richness_plot, native_relative_plot, total_npp_plot,
                                         #nrow = 2, rel_heights = c(0.6, 1),
                                         nrow = 1,
                                         labels = "auto")
biodiversity_plots_2

today <- Sys.Date()
highrestiff(plot_obj = biodiversity_plots_2,
            file = paste0(output_figure_folder, "/Boxplot/SW_SandyLoamyUplands_biodiversity_boxplot_2_", today, ".tif"),
            #width_in = 3,
            width_in = 9,
            #height_in = 6.5,
            height_in = 3.5,
            resolution_dpi = 300)

# 5. remote sensing variance
# Ideally, we should probably do this from the LandCART total foliar cover predictions, but obtaining those rasters for the whole UCRB is very slow. For now, we'll use the sum of the RAP 3.0 plant functional type cover values for each plot to represent total foliar cover instead. The RAP values were downloaded from within a 35-m radius of each point in this ESG using a Google Earth Engine script. Consider switching to LandCART, since it actually predicts a total foliar cover value, when raster generation and downloading for the UCRB 1995-2019 (April 1 - Nov 1) is complete.
library(broom) # for easy handling of many linear model parts
library(purrr) # for functional programming, eep!
library(tidyr)

VegCover_files <- list.files(paste0("C:/Users/aknight/Documents/Telework_Backups/V_drive/ANNA_KNIGHT/ESG/STM/RAP_do_not_sync/", target_ESG, "/VegCover"),
                        full.names = T)
VegCover <- read.csv(VegCover_files[1])
VegCover$Year <- as.numeric(substr(VegCover_files[1], 176, 179))

for(file in VegCover_files[-1]){
  VegCover_temp <- read.csv(file)
  VegCover_temp$Year <- as.numeric(substr(file, 176, 179))
  VegCover <- bind_rows(VegCover, VegCover_temp)
  rm(VegCover_temp)
}

VegCover <- select(VegCover, PlotCod, Bst_grp, Bst_gr_, Year, AFG, BGR, LTR, PFG, SHR, TRE) %>%
  setNames(c("PlotCode", "PlantCommunity_fuzzy", "PlantCommunity_name", "Year", "AFG", "BGR", "LTR", "PFG", "SHR", "TRE")) %>%
  rowwise() %>%
  mutate(TotalFoliar = sum(AFG, PFG, SHR, TRE, na.rm = T))

sampleyears <- select(plot_data_descriptive, PlotCode, Year) %>%
  setNames(c("PlotCode", "Year_sampled"))

VegCover <- left_join(VegCover, sampleyears)

VegCover_10yrs <- VegCover %>% # select the 10 years up to and including the field sampling year
  mutate(Years_to_sampling = Year_sampled - Year) %>%
  filter(Years_to_sampling >=0 & Years_to_sampling <10) 

VegCover_variance <- VegCover_10yrs %>%
  select(PlotCode, PlantCommunity_fuzzy, Year, Years_to_sampling, TotalFoliar) %>%
  as_tibble() %>%
  nest(data = c(Year, Years_to_sampling, TotalFoliar)) %>% # create an internal data frame for each plot
  mutate(model = map(data, ~lm(TotalFoliar ~ Years_to_sampling, data = .))) %>% # create a linear model from each plot's data frame
  mutate(data = map(model, augment), # get residuals for each model and add to internal plot data frames
         model_glance = map(model, glance)) %>% # get model summary statistics
  mutate(resid_variance = map_dbl(data, ~var(.$.resid)), # get the variance of the detrended data (aka residuals)
         resid_AC1 = map_dbl(data,
                             ~acf(x = .$.resid, type = "correlation", lag.max = 1, plot = F)$acf[2]) # get the lag-1 autocorrelation of the detrended data
         ) %>%
  select(-data, -model) %>%
  unnest(model_glance)


# make figures
plot_data_descriptive <- left_join(plot_data_descriptive, select(VegCover_variance, PlotCode, resid_variance, resid_AC1))

variance_plot <- box_pretty_plot(dat = plot_data_descriptive,
                                     y_var = "resid_variance",
                                     y_lab = "Variance")
variance_plot

AC1_plot <- box_pretty_plot(dat = plot_data_descriptive,
                                     y_var = "resid_AC1",
                                     y_lab = "Lag-1 autocorrelation") 
AC1_plot

RSvariability_plots <- cowplot::plot_grid(variance_plot + xlab(NULL) + scale_x_discrete(labels=NULL),
                                         AC1_plot,
                                         nrow = 2, rel_heights = c(0.6, 1),
                                         labels = "auto")
RSvariability_plots

today <- Sys.Date()
highrestiff(plot_obj = RSvariability_plots,
            file = paste0(output_figure_folder, "/Boxplot/SW_SandyLoamyUplands_RSvariability_boxplot_", today, ".tif"),
            width_in = 3,
            height_in = 6,
            resolution_dpi = 300)

variance_violin <- violin_pretty_plot(dat = plot_data_descriptive,
                                      y_var = "resid_variance",
                                      y_lab = "Variance",
                                      fill_var = "PlantCommunity_fuzzy_reordered")
variance_violin

today <- Sys.Date()
highrestiff(plot_obj = variance_violin,
            file = paste0(output_figure_folder, "/Violin/SW_SandyLoamyUplands_variance_violin_", today, ".tif"),
            width_in = 3,
            height_in = 3.5,
            resolution_dpi = 300)

rm(VegCover, VegCover_10yrs, VegCover_variance)

# 6. Functional type cover grouped by functional type and colored by state
functypes_grps <- c("TotalFoliarCover", #"AH_WoodyCover", #AH_ForbGrassCover, 
                    "AH_ShrubCover", "AH_TreeCover",
                    "AH_PerenForbGrassCover", "AH_AnnForbGrassCover"#,
                    #"BareSoilCover", "CA_percent_100plus"
                )
functypes_cover_plot <- ggplot(data = select(plot_data_descriptive, 
                                             any_of(c("PlantCommunity_fuzzy_reordered", functypes_grps))) %>%
                                   tidyr::pivot_longer(cols = all_of(functypes_grps),
                      names_to = "CoverType",
                      values_to = "PctCover"),
                             aes(fill=PlantCommunity_fuzzy_reordered, y=PctCover, x=CoverType)) +
  geom_boxplot() +
  scale_fill_manual(values = pal_veg2_reordered,
                    name = "Cluster state"#,
                    #labels = group_labels_short
                    ) +
  scale_x_discrete(labels=c("Annual
herbaceous", "Perennial
herbaceous",
                               "Shrub", "Tree",
                               #"Bare soil",
                               #"Canopy gaps
#> 100 cm",
                               "Total foliar")) +
  #xlab("Functional type") +
  xlab(NULL) +
  ylab("Cover (%)") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 7, color = "black"),
        axis.text.y = element_text(size = 7, color = "black"),
          axis.title = element_text(size = 8),
        legend.text = element_text(size = 7),
        legend.title = element_text(size = 8),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank())

functypes_cover_plot

today <- Sys.Date()
highrestiff(plot_obj = functypes_cover_plot,
            file = paste0(output_figure_folder, "/Boxplot/SW_SandyLoamyUplands_pftcover_boxplot_", today, ".tif"),
            width_in = 6,
            height_in = 3.5,
            resolution_dpi = 300)

# median covers needed for text of Duniway et al. 2024:
# Shrubland - perennial herb, shrub
# Grassland - total foliar cover
# Invaded - annual herbaceous
plot_data_descriptive %>% group_by(PlantCommunity_fuzzy_reordered) %>% summarise(peren_herb = median(AH_PerenForbGrassCover))
plot_data_descriptive %>% group_by(PlantCommunity_fuzzy_reordered) %>% summarise(shrub = median(AH_ShrubCover))
plot_data_descriptive %>% group_by(PlantCommunity_fuzzy_reordered) %>% summarise(tree = median(AH_TreeCover))
plot_data_descriptive %>% group_by(PlantCommunity_fuzzy_reordered) %>% summarise(ann_herb = median(AH_AnnForbGrassCover))
plot_data_descriptive %>% group_by(PlantCommunity_fuzzy_reordered) %>% summarise(totalfoliar = median(TotalFoliarCover))

# number of plots with tree cover >50% needed for text of Duniway et al. 2024:
plot_data_descriptive %>% filter(PlantCommunity_fuzzy_reordered=="Open woodland" & AH_TreeCover>50) %>% nrow()

# Duniway et al 2023 figure 5
pfts_biodiversity_row2_plots <- cowplot::plot_grid(richness_plot, native_relative_plot, total_npp_plot,
                                                   #nrow = 2, rel_heights = c(0.6, 1),
                                                   nrow = 1,
                                                   labels = c("b", "c", "d"))
pfts_biodiversity_plots <- cowplot::plot_grid(functypes_cover_plot,
                                              pfts_biodiversity_row2_plots,
                                              nrow = 2,
                                              labels = c("a", ""))
pfts_biodiversity_plots

today <- Sys.Date()
highrestiff(plot_obj = pfts_biodiversity_plots,
            file = paste0(output_figure_folder, "/Boxplot/SW_SandyLoamyUplands_pftcover_biodiversity_panel_", today, ".tif"),
            width_in = 5.51,
            height_in = 4.75,
            resolution_dpi = 300)

# 7. Disturbance and land use
# MTBS
# Grazing?
# PJ treatments
# SWreGAP protection classes

# MTBS
library(sf)
library(lubridate)

mtbs <- st_read(dsn = "C:/Users/aknight/Documents/Telework_Backups/V_drive/ANNA_KNIGHT/GIS/Disturbance_LandUse/mtbs_permis_DD_2020.shp")

fuzzy_cluster_memb_sf <- st_as_sf(fuzzy_cluster_memb_sf, crs = st_crs(plot_locations)) %>%
  st_transform(., crs=st_crs(mtbs))

mtbs_plots <- st_join(x=fuzzy_cluster_memb_sf, y=mtbs) %>%
  select(PlotCode, Best_group, Best_group_name, DateSampled, Ig_Date, geometry) %>%
  mutate(Burned_presampling = ifelse(test = Ig_Date < DateSampled,
                              yes = TRUE,
                              no = FALSE)) %>%
  group_by(PlotCode) %>%
  summarise(Best_group = first(Best_group),
            Best_group_name = first(Best_group_name),
            DateSampled = first(DateSampled),
            Times_burned_presampling = length(which(Burned_presampling)),
            Times_burned_postsampling = length(which(!Burned_presampling)),
            Date_burned_1 = sort(Ig_Date)[1],
            Date_burned_2 = sort(Ig_Date)[2]) %>%
  st_drop_geometry()

# write.csv(mtbs_plots,
#           "C:/Users/aknight/Documents/Telework_Backups/V_drive/ANNA_KNIGHT/ESG/STM/Analyses/SW_SandyLoamyUplands_burnedplots.csv",
#           row.names = F)

rm(mtbs)

mtbs_plots$PlantCommunity_fuzzy_reordered <- factor(mtbs_plots$Best_group,
                                                               levels = c("1", "2", "3", "4"),
                                                               labels = c("Open woodland", "Shrubland", "Invaded",
                                                                          "Grassland"),
                                                               ordered = T) 
mtbs_summary <- mtbs_plots %>%
  filter(PlotCode %in% plot_data_descriptive$PlotCode) %>% 
  group_by(PlantCommunity_fuzzy_reordered) %>%
  summarise(pct_Burned = (length(which(Times_burned_presampling > 0))/n())*100,
            pct_Burned_postsampling = (length(which(Times_burned_postsampling > 0))/n())*100)

burn_plot_mtbs <- ggplot(data = mtbs_summary,
                    aes(x=PlantCommunity_fuzzy_reordered, y=pct_Burned, fill = PlantCommunity_fuzzy_reordered)) +
  geom_col() +
  scale_fill_manual(values = pal_veg2_reordered) +
  xlab(NULL) +
  ylab("% of plots burned pre-sampling") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 10, color = "black"),
        axis.text.y = element_text(size = 10, color = "black"),
          axis.title = element_text(size = 12),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
        legend.position = "none") 

burn_plot_mtbs

burn_plot_post_mtbs <- ggplot(data = mtbs_summary,
                    aes(x=PlantCommunity_fuzzy_reordered, y=pct_Burned_postsampling, fill = PlantCommunity_fuzzy_reordered)) +
  geom_col() +
  scale_fill_manual(values = pal_veg2_reordered) +
  xlab(NULL) +
  ylab("% of plots burned post-sampling") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 10, color = "black"),
        axis.text.y = element_text(size = 10, color = "black"),
          axis.title = element_text(size = 12),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
        legend.position = "none") 

burn_plot_post_mtbs

today <- Sys.Date()
highrestiff(plot_obj = burn_plot_mtbs,
            file = paste0(output_figure_folder, "/Bar/SW_SandyLoamyUplands_MTBSpctburnedpresample_boxplot_", today, ".tif"),
            width_in = 3,
            height_in = 3.5,
            resolution_dpi = 300)

today <- Sys.Date()
highrestiff(plot_obj = burn_plot_post_mtbs,
            file = paste0(output_figure_folder, "/Bar/SW_SandyLoamyUplands_MTBSpctburnedpostsample_boxplot_", today, ".tif"),
            width_in = 3,
            height_in = 3.5,
            resolution_dpi = 300)

# Tara's expanded fire data set
FireFeatureData <- st_read(dsn = "G:/Base Layers/Disturbance/Fire/Fire_Feature_Data_Pro2_8_Geodatabase/USGS_Wildland_Fire_Combined_ShapeFile",
                           layer = "USGS_Wildland_Fire_Combined_ShapeFile")

fuzzy_cluster_memb_sf <- st_as_sf(fuzzy_cluster_memb_sf, crs = st_crs(plot_locations)) %>%
  st_transform(., crs=st_crs(FireFeatureData))

FireFeatureData_plots <- st_join(x=fuzzy_cluster_memb_sf, y=FireFeatureData) %>%
  select(PlotCode, Best_group, Best_group_name, DateSampled, Fire_Year, geometry) %>%
  mutate(Sample_Year = year(DateSampled)) %>%
  # I hand-checked all of the fires that happened the same year the plot was sampled.
  # Only one plot, AIM_Colorado NWDO Little Snake FO 2018_INTS-WyoSage-2023, was
  # sampled before the fire that burned it in the sampling year, so using <= in
  # the ifelse functions below works for all the other plots
  mutate(Burned_presampling = ifelse(test = Fire_Year <= Sample_Year,
                              yes = TRUE,
                              no = FALSE),
         Years_burn_to_sampling = ifelse(test = Fire_Year <= Sample_Year,
                              yes = Sample_Year - Fire_Year,
                              no = NA)) %>%
  arrange(Years_burn_to_sampling) %>% 
  group_by(PlotCode) %>%
  summarise(Best_group = first(Best_group),
            Best_group_name = first(Best_group_name),
            DateSampled = first(DateSampled),
            Times_burned_presampling = length(which(Burned_presampling)),
            Times_burned_postsampling = length(which(!Burned_presampling)),
            Date_burned_1 = sort(Fire_Year)[1],
            Date_burned_2 = sort(Fire_Year)[2],
            Date_burned_3 = sort(Fire_Year)[3],
            Years_burn_to_sampling = first(Years_burn_to_sampling) # years from most recent burn before sampling to sampling
            ) %>%
  st_drop_geometry()

# Edit the years from burn to sampling for the one plot that was sampled before it burned in the same year
FireFeatureData_plots[FireFeatureData_plots$PlotCode=="AIM_Colorado NWDO Little Snake FO 2018_INTS-WyoSage-2023", "Years_burn_to_sampling"] <-
  year(FireFeatureData_plots[FireFeatureData_plots$PlotCode=="AIM_Colorado NWDO Little Snake FO 2018_INTS-WyoSage-2023", ]$DateSampled) - 
  FireFeatureData_plots[FireFeatureData_plots$PlotCode=="AIM_Colorado NWDO Little Snake FO 2018_INTS-WyoSage-2023", ]$Date_burned_1

FireFeatureData_plots[FireFeatureData_plots$PlotCode=="AIM_Colorado NWDO Little Snake FO 2018_INTS-WyoSage-2023", "Times_burned_presampling"] <- 1
FireFeatureData_plots[FireFeatureData_plots$PlotCode=="AIM_Colorado NWDO Little Snake FO 2018_INTS-WyoSage-2023", "Times_burned_postsampling"] <- 1

# write.csv(FireFeatureData_plots,
#           "C:/Users/aknight/Documents/Telework_Backups/V_drive/ANNA_KNIGHT/ESG/STM/Analyses/SW_SandyLoamyUplands_burnedplots_allfiresources.csv",
#           row.names = F)

rm(FireFeatureData)

FireFeatureData_plots$PlantCommunity_fuzzy_reordered <- factor(FireFeatureData_plots$Best_group,
                                                               levels = c("1", "2", "3", "4"),
                                                               labels = c("Open woodland", "Shrubland", "Invaded",
                                                                          "Grassland"),
                                                               ordered = T)
plot_data_descriptive <- left_join(plot_data_descriptive, FireFeatureData_plots)

FireFeatureData_summary <- FireFeatureData_plots %>%
  # remove this line if NOT removing the fuzziest plots!
  filter(PlotCode %in% plot_data_descriptive$PlotCode) %>%
  group_by(PlantCommunity_fuzzy_reordered) %>%
  summarise(pct_Burned = (length(which(Times_burned_presampling > 0))/n())*100,
            pct_Burned_postsampling = (length(which(Times_burned_postsampling > 0))/n())*100)

# Duniway et al 2023 figure 9a
burn_plot_all <- ggplot(data = FireFeatureData_summary,
                    aes(x=PlantCommunity_fuzzy_reordered, y=pct_Burned, fill = PlantCommunity_fuzzy_reordered)) +
  geom_col() +
  scale_fill_manual(values = pal_veg2_reordered) +
  xlab(NULL) +
  ylab("% of plots burned
pre-sampling") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 7, color = "black"),
        axis.text.y = element_text(size = 7, color = "black"),
          axis.title = element_text(size = 8),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
        legend.position = "none")

burn_plot_all

burn_plot_post_all <- ggplot(data = FireFeatureData_summary,
                    aes(x=PlantCommunity_fuzzy_reordered, y=pct_Burned_postsampling, fill = PlantCommunity_fuzzy_reordered)) +
  geom_col() +
  scale_fill_manual(values = pal_veg2_reordered) +
  xlab(NULL) +
  ylab("% of plots burned post-sampling") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 10, color = "black"),
        axis.text.y = element_text(size = 10, color = "black"),
          axis.title = element_text(size = 12),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
        legend.position = "none")

burn_plot_post_all

today <- Sys.Date()
highrestiff(plot_obj = burn_plot_all,
            file = paste0(output_figure_folder, "/Bar/SW_SandyLoamyUplands_FireFeatureDatapctburnedpresample_boxplot_", today, ".tif"),
            width_in = 3,
            height_in = 3.5,
            resolution_dpi = 300)

today <- Sys.Date()
highrestiff(plot_obj = burn_plot_post_all,
            file = paste0(output_figure_folder, "/Bar/SW_SandyLoamyUplands_FireFeatureDatapctburnedpostsample_boxplot_", today, ".tif"),
            width_in = 3,
            height_in = 3.5,
            resolution_dpi = 300)

# PJ Treatments
WRI_polygons <- st_read(dsn = "C:/Users/aknight/Documents/Telework_Backups/V_drive/ANNA_KNIGHT/ESG/Maps/FickPJ",
                        layer = "PJ") %>%
  st_transform(crs = st_crs(fuzzy_cluster_memb_sf))

sf_use_s2(FALSE)
pj_plots <- fuzzy_cluster_memb_sf %>% 
  # remove this filter line if NOT excluding the fuzziest plots!
  filter(., PlotCode %in% plot_data_descriptive$PlotCode) %>% 
  st_join(., WRI_polygons, sparse = T) %>%
  mutate(YearSampled = year(DateSampled),
         TreatedPreSampling = ifelse(test = YearSampled>=yearStart,
                                     yes = TRUE,
                                     no = FALSE)) %>%
  mutate(TreatedPreSampling = ifelse(test = is.na(TreatedPreSampling),
                                     yes = FALSE,
                                     no = TreatedPreSampling))

# Look at the types of treatments present, then categorize into broader types for visual summaries
sort(unique(paste(pj_plots$method, pj_plots$treat, sep = " - ")))
# additional documentation available in G:\Base Layers\Disturbance\TREATMENTS.
# For WRI treatments, see also the database at https://wri.utah.gov/wri/project/search.html

pj_plots_cats <- pj_plots %>%
  filter(!is.na(src)) %>%
  mutate(GeneralTreatmenType = ifelse(test = treat %in% c(# my categories: prescribed fire, brush removal, herbaceous removal, seeding, soil improvements. CHECK aerator, mowing, and weeding for each ESG to make sure they still fit as assigned here!!!
                                                          "aerator",
                                                          "brush removal",
                                                          "mowing",
                                                          "weeding"),
                                      yes = "Woody removal",
                                      no = NA), 
         TreatmentSubType = ifelse(test = GeneralTreatmenType=="Woody removal" & method %in% c("anchor chain",
                                                                                               "bullhogfull size",
                                                                                               "bullhogskid steer",
                                                                                               "double drum (1-way)",
                                                                                               "lop and scatter",
                                                                                               "mastication",
                                                                                               "mechanical",
                                                                                               "mowing",
                                                                                               "rollerchop"),
                                   yes = "Mechanical",
                                   no = ifelse(test = GeneralTreatmenType=="Woody removal" & method %in% c("herbicide",
                                                                                                           "herbicide aerial"),
                                               yes = "Chemical",
                                               no = NA
                                               )
         ))

n_plots_treated <- length(unique(filter(pj_plots_cats, TreatedPreSampling==T)$PlotCode))
n_plots_tx_twice_or_more <- pj_plots_cats %>%
  filter(TreatedPreSampling==T) %>%
  group_by(PlotCode) %>%
  summarise(n_diff_treatments = n_distinct(yearStart)) %>%
  filter(n_diff_treatments >= 2) %>%
  nrow()

plots_per_group <- data.frame(Best_group = c(1:4),
                              plots_per_group = c(nrow(filter(plot_data_descriptive, PlantCommunity_fuzzy==1)),
                                                  nrow(filter(plot_data_descriptive, PlantCommunity_fuzzy==2)),
                                                  nrow(filter(plot_data_descriptive, PlantCommunity_fuzzy==3)),
                                                  nrow(filter(plot_data_descriptive, PlantCommunity_fuzzy==4))
                                                  ))

pj_plot_pct <- pj_plots_cats %>%
  left_join(., plots_per_group) %>%
  filter(TreatedPreSampling==T) %>%
  st_drop_geometry() %>%
  group_by(PlotCode) %>% # next 3 lines select only the treatment closest to the sampling year
  arrange(desc(yearStart), .by_group = T) %>%
  mutate(Tx_flag = row_number()) %>%
  filter(Tx_flag==1) %>%
  ungroup() %>%
  group_by(Best_group) %>%
  summarise(MechanicalUnseeded = 100*length(which(TreatmentSubType=="Mechanical" & seeded==0))/first(plots_per_group),
            MechanicalSeeded = 100*length(which(TreatmentSubType=="Mechanical" & seeded==1))/first(plots_per_group),
            ChemicalUnseeded = 100*length(which(TreatmentSubType=="Chemical" & seeded==0))/first(plots_per_group),
            ChemicalSeeded = 100*length(which(TreatmentSubType=="Chemical" & seeded==1)/first(plots_per_group))
  ) %>%
  tidyr::pivot_longer(cols = c(MechanicalUnseeded, MechanicalSeeded, ChemicalUnseeded, ChemicalSeeded),
                      names_to = "TreatmentPair",
                      values_to = "pct") %>%
  mutate(Best_group = as.factor(Best_group))

treatments_plot_pcts <- ggplot(data = pj_plot_pct, aes(x = Best_group, y = pct, fill = TreatmentPair)) +
  geom_col() +
  scale_fill_manual(values = c("#FDBF6F", "#FF7F00", "#CAB2D6", "#6A3D9A"),
                    name = "Treatment",
                    labels = c("Chemical woody treatment (seeded)",
                               "Chemical woody treatment (not seeded)",
                               "Mechanical woody treatment (seeded)",
                               "Mechanical woody treatment (not seeded)")) +
  scale_x_discrete(name = NULL,
                   labels = c("Open woodland", "Shrubland", "Invaded", "Grassland")) +
  ylab("% of plots treated in each
plant community") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 10, color = "black"),
        axis.text.y = element_text(size = 10, color = "black"),
        axis.title = element_text(size = 12),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        legend.position = "right")
treatments_plot_pcts

today <- Sys.Date()
highrestiff(plot_obj = treatments_plot_pcts,
            file = paste0(output_figure_folder, "/Bar/SW_SandyLoamyUplands_PJtreatments_pct_barplot_", today, ".tif"),
            width_in = 6,
            height_in = 3.5,
            resolution_dpi = 300)

rm(WRI_polygons)

# GAP protection classes

GAP_raster <- raster::raster("C:/Users/aknight/Documents/Telework_Backups/V_drive/ANNA_KNIGHT/GIS/Disturbance_LandUse/PADUS_GAP_Status_Code1.tif")
plot_GAP <- sf::st_as_sf(raster::extract(x = GAP_raster,
                               y = fuzzy_cluster_memb_sf,
                               sp = T)) %>%
  sf::st_drop_geometry()

plot_data_descriptive <- left_join(plot_data_descriptive, select(plot_GAP, PlotCode, GAP_Sts))

GAP_proportions <- plot_data_descriptive %>%
  group_by(PlantCommunity_fuzzy_reordered) %>%
  # NOTE the PADUS codes are stored in the "GAP_Sts" attribute, but extraction pulls the raster value,
  # which is different! Checked attribute table in Arc to sort it out!
  summarise(pct_BiodivProt_NatDistPermitted = (length(which(GAP_Sts == 4))/n())*100,
            pct_BiodivProt_NatDistSupressed = (length(which(GAP_Sts == 3))/n())*100,
            pct_MultiUse_ExtractPermitted = (length(which(GAP_Sts == 1))/n())*100,
            pct_NoBiodivProctect = (length(which(GAP_Sts == 2))/n())*100,
            pct_PrivateLand = (length(which(is.na(GAP_Sts)))/n())*100) %>%
  tidyr::pivot_longer(data=.,
              cols = c(pct_BiodivProt_NatDistPermitted, 
               pct_BiodivProt_NatDistSupressed,
               pct_MultiUse_ExtractPermitted, 
               pct_NoBiodivProctect, 
               pct_PrivateLand),
               names_to = "ProtectionStatus",
               values_to = "Percent")

protection_plot <- ggplot(data = GAP_proportions,
                          aes(x=ProtectionStatus, y=Percent, fill=PlantCommunity_fuzzy_reordered)) +
  geom_col() +
  scale_fill_manual(values = pal_veg2_reordered,
                    name = "Plant community"
                    ) +
  scale_x_discrete(labels=c("1 - Biodiversity protected,
natural disturbance permitted",
"2 - Biodiversity protected,
natural disturbance suppressed",
"3 - Multi-use, extraction
permitted",
"4 - No biodiversity protection",
"Private or tribal land")) +
  xlab("PADUS protection status") +
  ylab("% plots in
plant community") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 10, color = "black"),
        axis.text.y = element_text(size = 10, color = "black"),
          axis.title = element_text(size = 12),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
        plot.margin = unit(c(2,2,2,5), "mm")
        )

protection_plot

today <- Sys.Date()
highrestiff(plot_obj = protection_plot,
            file = paste0(output_figure_folder, "/Bar/SW_SandyLoamyUplands_PADUSprotection_flipped_barplot_", today, ".tif"),
            width_in = 6,
            height_in = 3.5,
            resolution_dpi = 300)

group_labels_fuzzy <- paste0(group_labels_base,
                               " (n = ",
                               c(nrow(filter(plot_data_descriptive, PlantCommunity_fuzzy==1)),
                                 nrow(filter(plot_data_descriptive, PlantCommunity_fuzzy==2)),
                                 nrow(filter(plot_data_descriptive, PlantCommunity_fuzzy==3)),
                                 nrow(filter(plot_data_descriptive, PlantCommunity_fuzzy==4))
                                  ),
                               ")")

protection_plot2 <- ggplot(data = GAP_proportions,
                          aes(fill=ProtectionStatus, y=Percent, x=PlantCommunity_fuzzy_reordered)) +
  geom_col() +
  scale_fill_manual(values = c("seagreen", "yellowgreen", "gray58", "dodgerblue", "palegoldenrod"),
                    name = "PADUS protection status",
                    labels = c("1 - Biodiversity protected,
natural disturbance permitted",
"2 - Biodiversity protected,
natural disturbance suppressed",
"3 - Multi-use, extraction
permitted",
"4 - No biodiversity protection",
"Private or tribal land")) +
  scale_x_discrete(labels= group_labels_fuzzy) +
  xlab(NULL) +
  ylab("% plots in plant community") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 10, color = "black"),
        axis.text.y = element_text(size = 10, color = "black"),
          axis.title = element_text(size = 12),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
        legend.key.size = unit(1.5, 'lines')) 

protection_plot2

today <- Sys.Date()
highrestiff(plot_obj = protection_plot2,
            file = paste0(output_figure_folder, "/Bar/SW_SandyLoamyUplands_PADUSprotection_dodge_barplot_", today, ".tif"),
            width_in = 6,
            height_in = 3.5,
            resolution_dpi = 300)

GAP_proportions_byclass <- plot_data_descriptive %>%
  # NOTE the PADUS codes are stored in the "GAP_Sts" attribute, but extraction pulls the raster value,
  # which is different! Checked attribute table in Arc to sort it out!
  mutate(PADUS_GAP_Status_lumped = ifelse(test = GAP_Sts %in% c(3,4),
                                          yes = "Protected public",
                                          no = NA)) %>%
  mutate(PADUS_GAP_Status_lumped =  ifelse(test = GAP_Sts %in% c(1,2),
                                           yes = "Unprotected public",
                                           no = PADUS_GAP_Status_lumped)) %>%
  mutate(PADUS_GAP_Status_lumped = ifelse(test = is.na(GAP_Sts),
                                          yes = "Private or tribal",
                                          no = PADUS_GAP_Status_lumped)) %>%
  group_by(PADUS_GAP_Status_lumped) %>%
  summarise(pct_OpenWoodland = (length(which(PlantCommunity_fuzzy_reordered == "Open woodland"))/n())*100,
            pct_Shrubland = (length(which(PlantCommunity_fuzzy_reordered == "Shrubland"))/n())*100,
            pct_Invaded = (length(which(PlantCommunity_fuzzy_reordered == "Invaded"))/n())*100,
            pct_Grassland = (length(which(PlantCommunity_fuzzy_reordered == "Grassland"))/n())*100) %>%
  tidyr::pivot_longer(data=.,
               cols = c(pct_OpenWoodland,
                        pct_Shrubland,
                        pct_Invaded,
                        pct_Grassland),
               names_to = "PlantCommunity",
               values_to = "Percent") %>%
  mutate(PlantCommunity_fuzzy_reordered = factor(PlantCommunity,
                                                 levels = c("pct_OpenWoodland", "pct_Shrubland", "pct_Invaded",
                                                            "pct_Grassland"),
         labels = c("Open woodland", "Shrubland",
                    "Invaded", "Grassland"),
         ordered = T))

# Duniway et al figure 9b
protection_lumped_plot <- ggplot(data = GAP_proportions_byclass,
                          aes(x=PADUS_GAP_Status_lumped, y=Percent, fill=PlantCommunity_fuzzy_reordered)) +
  geom_col() +
  scale_fill_manual(values = pal_veg2_reordered,
                    name = "Cluster state",
                    labels = c("Open
woodland",
                               "Shrubland",
                               "Invaded",
                               "Grassland")
                    ) +
  #scale_x_discrete(labels=c("Protected - public", "Unprotected - public" "Private or tribal land")) +
  xlab(NULL) +
  ylab("% of plots in
protection group") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 7, color = "black"),
        axis.text.y = element_text(size = 7, color = "black"),
          axis.title = element_text(size = 8),
        legend.text = element_text(size = 7),
        legend.title = element_text(size = 8),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank()#,
        #plot.margin = unit(c(2,2,2,5), "mm")
        )
protection_lumped_plot

disturbance_plots <- cowplot::plot_grid(burn_plot_all, protection_lumped_plot,
                                        nrow = 1, rel_widths = c(0.7, 1),
                                        labels = "auto")
disturbance_plots

today <- Sys.Date()
highrestiff(plot_obj = disturbance_plots,
            file = paste0(output_figure_folder, "/Bar/SW_SandyLoamyUplands_disturbanceproctection_barplot_", today, ".tif"),
            width_in = 7,
            height_in = 3.5,
            resolution_dpi = 300)


rm(GAP_raster, plot_GAP)

# 8. Drought summaries
# data from a GEE script and an R script created by the wonderful Gayle Tyree
# see C:\Users\aknight\Documents\Telework_Backups\V_drive\ANNA_KNIGHT\ESG\STM\Scripts\Drought_point_downloads
# for details.
drought <- read.csv("C:/Users/aknight/Documents/Telework_Backups/V_drive/ANNA_KNIGHT/ESG/Maps/contains_NRI_do_not_sync/SW_SandyLoamyUplands_spei_pdsi_reformatted.csv")

plot_data_descriptive <- left_join(plot_data_descriptive,
                                   select(drought, PlotCod, any_of(contains("spei")), any_of(contains("pdsi"))),
                                   by = c("PlotCode" = "PlotCod")) %>% 
  # We're going to assume that NAs in the PDSI data mean 0% in severe to extreme drought.
  # The way the GEE script is written, I don't think it can count instances with less than
  # one severe to extreme drought observation so it replaces with NAs. There might be a few
  # points where the data were masked out for other reasons than the drought threshold,
  # but SPEI (from the same GridMet rasters) only have a handful of cases like this
  mutate(pdsi1y = replace_na(pdsi1y, 0),
         pdsi2y = replace_na(pdsi2y, 0),
         pdsi5y = replace_na(pdsi5y, 0))

spei1y_plot <- box_pretty_plot(dat = plot_data_descriptive,
                               y_var = "spei1y",
                               y_lab = "SPEI (1 yr.) at time
of field sampling")
spei1y_plot

spei2y_plot <- box_pretty_plot(dat = plot_data_descriptive,
                               y_var = "spei2y",
                               y_lab = "SPEI (2 yr.) at time
of field sampling")
spei2y_plot

# Duniway et al 2023 figure 9c
spei5y_plot <- box_pretty_plot(dat = plot_data_descriptive,
                               include_plot_count = F,
                               y_var = "spei5y",
                               y_lab = "SPEI (5 yr.) at time
of field sampling")
spei5y_plot

pdsi_1y_plot <- rain_pretty_plot(dat = plot_data_descriptive,
                                fill_var = "PlantCommunity_fuzzy_reordered",
                                y_var = "pdsi1y",
                                y_lab = "% of PDSI observations <-4
for 1 yr. prior to field samling") + ylim(c(0, 100))
pdsi_1y_plot

pdsi_2y_plot <- rain_pretty_plot(dat = plot_data_descriptive,
                                   fill_var = "PlantCommunity_fuzzy_reordered",
                                y_var = "pdsi2y",
                                y_lab = "% of PDSI observations <-4
for 2 yrs. prior to field samling") + ylim(c(0, 100))
pdsi_2y_plot

pdsi_5y_plot <- rain_pretty_plot(dat = plot_data_descriptive,
                                   fill_var = "PlantCommunity_fuzzy_reordered",
                                y_var = "pdsi5y",
                                y_lab = "% of PDSI observations <-4
for 5 yrs. prior to field samling") + ylim(c(0, 100))
pdsi_5y_plot

drought_plots <- cowplot::plot_grid(spei1y_plot, spei2y_plot, spei5y_plot,
                                    pdsi_1y_plot, pdsi_2y_plot, pdsi_5y_plot,
                                    ncol = 3,
                                    labels = "auto")
drought_plots

today <- Sys.Date()
highrestiff(plot_obj = drought_plots,
            file = paste0(output_figure_folder, "/Boxplot/SW_SandyLoamyUplands_drought_boxplots_", today, ".tif"),
            width_in = 9,
            height_in = 6,
            resolution_dpi = 300)

# Duniway et al 2023 figure 9 all
# disturbance_drought_col1_plots <- cowplot::plot_grid(burn_plot_all, 
#                                                 spei5y_plot,
#                                         ncol = 1, 
#                                         labels = c("a", "c"))
# disturbance_drought_plots <- cowplot::plot_grid(disturbance_drought_col1_plots, 
#                                                 protection_lumped_plot + 
#                                                   theme(legend.position = "bottom",
#                                                         legend.direction = "vertical") ,
#                                         nrow = 1, 
#                                         labels = c("", "b"))
disturbance_drought_plots <- cowplot::plot_grid(burn_plot_all,
                                                protection_lumped_plot,
                                                spei5y_plot,
                                                ncol = 1, labels = "auto")
disturbance_drought_plots

today <- Sys.Date()
highrestiff(plot_obj = disturbance_drought_plots,
            file = paste0(output_figure_folder, "/Bar/SW_SandyLoamyUplands_disturbance_drought_boxplots_", today, ".tif"),
            width_in = 3.54, #5.51,
            height_in = 8,
            resolution_dpi = 500)

# 9. Sage grouse summaries
# sage grouse indicator plants
# Duniway et al 2023 figure 6b
plot_data_descriptive$GrassPctPerennial <- (plot_data_descriptive$AH_PerenGrassCover/plot_data_descriptive$AH_GrassCover)*100

grass_pct_perennial_plot <- box_pretty_plot(dat = plot_data_descriptive,
                                            include_plot_count = F,
                                            y_var = "GrassPctPerennial",
                                            y_lab = "Perennial grass to
total grass (%)")
grass_pct_perennial_plot

# Duniway et al 2023 figure 6a
sagegrouse_grps <- c(#"AH_GrassCover",
                     "ARTR2",
                     #"AH_TreeCover"
                     "AH_PerenGrassCover"
                )
sagegrouse_cover_plot <- ggplot(data = select(plot_data_descriptive, 
                                             any_of(c("PlantCommunity_fuzzy_reordered", sagegrouse_grps))) %>%
                                   tidyr::pivot_longer(cols = all_of(sagegrouse_grps),
                      names_to = "CoverType",
                      values_to = "PctCover"),
                             aes(fill=PlantCommunity_fuzzy_reordered, y=PctCover, x=CoverType)) +
  geom_boxplot() +
  scale_fill_manual(values = pal_veg2_reordered,
                    name = "Cluster state"
                    ) +
  scale_x_discrete(labels=c("Perennial grass", #"Grass", "Tree",
                               "Artemisia tridentata
(all subspecies)")) +
  xlab(NULL) +
  ylab("Cover (%)") +
  theme_bw() +
  theme(axis.text.x = element_text(#angle = 45, hjust = 1,
                                   size = 7, color = "black"),
        axis.text.y = element_text(size = 7, color = "black"),
          axis.title = element_text(size = 8),
        legend.text = element_text(size = 7),
        legend.title = element_text(size = 8),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank())

sagegrouse_cover_plot

# sagebrush core habitat
sagebrush_habitat_raster <- raster::raster("G:/Base Layers/Vegetation/Sagebrush_core_habitats/SEI_2017_2020_30_Current.tif")
plot_sagebrush <- sf::st_as_sf(raster::extract(x = sagebrush_habitat_raster,
                               y = fuzzy_cluster_memb_sf,
                               sp = T)) %>%
  sf::st_drop_geometry() %>% 
  mutate(sagebrush_biome_2017_2020 = stringr::str_replace_all(as.character(Definition),
                                                                c("0" = "Non-sagebrush area",
                                                                  "1" = "Core habitat",
                                                                  "2" = "Growth opportunity area",
                                                                  "3" = "Other rangeland")) %>% 
           replace_na(., "Non-sagebrush area") %>% 
           factor(x = ., levels = c("Core habitat", "Growth opportunity area",
                                    "Other rangeland", "Non-sagebrush area"),
                  ordered = T)
         )

plot_data_descriptive <- left_join(plot_data_descriptive, select(plot_sagebrush, PlotCode, sagebrush_biome_2017_2020))

sagebrush_biome_pct_byclass <- plot_data_descriptive %>%
  group_by(sagebrush_biome_2017_2020) %>%
  summarise(pct_OpenWoodland = (length(which(PlantCommunity_fuzzy_reordered == "Open woodland"))/n())*100,
            pct_Shrubland = (length(which(PlantCommunity_fuzzy_reordered == "Shrubland"))/n())*100,
            pct_Invaded = (length(which(PlantCommunity_fuzzy_reordered == "Invaded"))/n())*100,
            pct_Grassland = (length(which(PlantCommunity_fuzzy_reordered == "Grassland"))/n())*100) %>%
  tidyr::pivot_longer(data=.,
               cols = c(pct_OpenWoodland,
                        pct_Shrubland,
                        pct_Invaded,
                        pct_Grassland),
               names_to = "PlantCommunity",
               values_to = "Percent") %>%
  mutate(PlantCommunity_fuzzy_reordered = factor(PlantCommunity,
                                                 levels = c("pct_OpenWoodland", "pct_Shrubland", "pct_Invaded",
                                                            "pct_Grassland"),
         labels = c("Open woodland", "Shrubland",
                    "Invaded", "Grassland"),
         ordered = T))

sagebrush_biome_plot <- ggplot(data = sagebrush_biome_pct_byclass,
                          aes(x=sagebrush_biome_2017_2020, y=Percent, fill=PlantCommunity_fuzzy_reordered)) +
  geom_col() +
  scale_fill_manual(values = pal_veg2_reordered,
                    name = "Cluster state"
                    ) +
  scale_x_discrete(breaks = levels(sagebrush_biome_pct_byclass$sagebrush_biome_2017_2020),
                   labels = c("Core
habitat",
                              "Growth
opportunity
area",
                              "Other
rangeland",
                              "Non-sagebrush
area")) +
  xlab(NULL) +
  ylab("% of plots in
habitat type") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 7, color = "black"),
        axis.text.y = element_text(size = 7, color = "black"),
          axis.title = element_text(size = 8),
        legend.text = element_text(size = 7),
        legend.title = element_text(size = 8),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank()#,
        #plot.margin = unit(c(2,2,2,5), "mm")
        )
sagebrush_biome_plot

# sagegrouse_panel2 <- cowplot::plot_grid(grass_pct_perennial_plot, cowplot::get_legend(sagegrouse_cover_plot),
#                                         nrow = 1, labels = c("b", NULL))
# sagegrouse_plots <- cowplot::plot_grid(sagegrouse_cover_plot + theme(legend.position = "none"),
#                                        sagegrouse_panel2,
#                                        nrow = 2,
#                                        labels = c("a", NULL))

#Duniway et al 2023 figure 6 all
sagegrouse_panel2 <- cowplot::plot_grid(grass_pct_perennial_plot, sagebrush_biome_plot + theme(legend.position = "none"),
                                        nrow = 1, labels = c("b", "c"))
sagegrouse_plots <- cowplot::plot_grid(sagegrouse_cover_plot + theme(legend.position = "right"),
                                       sagegrouse_panel2,
                                       nrow = 2, rel_heights = c(.85, 1),
                                       labels = c("a", NULL))

sagegrouse_plots

today <- Sys.Date()
highrestiff(plot_obj = sagegrouse_plots,
            file = paste0(output_figure_folder, "/Boxplot/SW_SandyLoamyUplands_sagegrouse_indicators_boxplot_", today, ".tif"),
            width_in = 5.51,
            height_in = 5,
            resolution_dpi = 500)

# 10. extract county data
counties <- st_read(dsn = "G:/Base Layers/Political and Infrastructure/Counties",
                    layer = "tl_rd22_us_county")

fuzzy_cluster_memb_sf <- st_transform(fuzzy_cluster_memb_sf, crs=st_crs(counties))

counties_plots <- st_join(x=fuzzy_cluster_memb_sf, y=counties) %>% 
  mutate(state_county = paste(STATEFP, NAMELSAD, sep = "_")) %>% 
  select(PlotCode, state_county)

plot_data_descriptive <- left_join(plot_data_descriptive, counties_plots, by = "PlotCode")

rm(counties)



# write out the data needed for modeling
plot_data_descriptive <- plot_data_descriptive %>% 
  # NOTE the PADUS codes are stored in the "GAP_Sts" attribute, but extraction pulls the raster value,
  # which is different! Checked attribute table in Arc to sort it out!
  mutate(PADUS_protection_lumped = ifelse(test = GAP_Sts %in% c(3,4),
                                          yes = "Protected public",
                                          no = NA)) %>%
  mutate(PADUS_protection_lumped =  ifelse(test = GAP_Sts %in% c(1,2),
                                           yes = "Unprotected public",
                                           no = PADUS_protection_lumped)) %>%
  mutate(PADUS_protection_lumped = ifelse(test = is.na(GAP_Sts),
                                          yes = "Private or tribal",
                                          no = PADUS_protection_lumped))

modeling_data <- select(plot_data_descriptive,
                        PlotCode, # unique plot ID
                        SourceKey, # data set the plot is from
                        PlantCommunity_fuzzy, PlantCommunity_fuzzy_reordered, Best_group_membership, # best class
                        Group_1_membership, Group_2_membership, Group_3_membership, Group_4_membership, # class probability
                        state_county,
                        Years_burn_to_sampling, Times_burned_presampling, # fire data
                        PADUS_protection_lumped, # PAD-US protection class
                        spei1y, spei2y, spei5y # SPEI
                        )

class(modeling_data) # check to make sure NO location data are included

today <- Sys.Date()
write.csv(modeling_data,
          file = paste0("../../Analyses/", target_ESG, "_MixedModelData_", today, ".csv"),
          row.names = F)

# write out data for possible data release
# for treatment data, we will just do the most recent treatement, and we will
# combine the seeding info with the treatment sub-type (mechanical vs chemical woody removal).
# We also want to keep the year of treatment - we'll go with the end year
plot_treatments <- pj_plots_cats %>% 
  filter(!is.na(TreatmentSubType)) %>% 
  mutate(land_treatment_type = ifelse(seeded == 1,
                                      yes = paste(TreatmentSubType, "woody removal - seeded"),
                                      no = paste(TreatmentSubType, "woody removal - unseeded"))
         ) %>% 
  arrange(yearEnd) %>% 
  group_by(PlotCode) %>% 
  summarise(land_treatment_year = last(yearEnd), # most recent year
            land_treatment_type = last(land_treatment_type)) %>%  # most recent treatment
  st_drop_geometry()

release_data <- left_join(plot_data_descriptive, plot_scores) %>% 
  left_join(., select(plot_data_first, PlotCode,
                      AH_C3IntroducedPerenGrassCover, AH_C3NativePerenGrassCover,    
                      AH_C4NativePerenGrassCover, AH_IntroducedAnnForbCover, 
                      AH_IntroducedAnnGrassCover, AH_IntroducedPerenForbCover,
                      AH_NativeAnnForbCover, AH_NativeAnnGrassCover,
                      AH_NativePerenForbCover, AH_C4IntroducedPerenGrassCover,
                      AH_ArtemisiaTridentataCover, AH_OpuntiaCover,
                      BareSoilCover, FH_TotalLitterCover,
                      FH_LichenMossCover)
            ) %>% 
  left_join(., plot_treatments) %>% 
  select(# Plot identifiers
    PlotCode, SourceKey,
    # clustering
    PlantCommunity_fuzzy, PlantCommunity_fuzzy_reordered,
    # ordination
    NMDS1, NMDS2, NMDS3,
    # Mike asked for "indicators, functional cover" with no other details
    AH_C3IntroducedPerenGrassCover, AH_C3NativePerenGrassCover,    
    AH_C4NativePerenGrassCover, AH_IntroducedAnnForbCover, 
    AH_IntroducedAnnGrassCover, AH_IntroducedPerenForbCover,
    AH_NativeAnnForbCover, AH_NativeAnnGrassCover,
    AH_NativePerenForbCover, AH_C4IntroducedPerenGrassCover,
    AH_ArtemisiaTridentataCover, AH_OpuntiaCover,
    AH_ShrubCover, AH_SubShrubCover, AH_TreeCover,
    TotalFoliarCover, BareSoilCover, FH_TotalLitterCover,
    FH_LichenMossCover, CA_percent_100plus, CA_percent_200plus,
    # cluster membership
    Group_1_membership, Group_2_membership, Group_3_membership, Group_4_membership,
    # state & county
    state_county,
    # fire data
    Years_burn_to_sampling, Times_burned_presampling, 
    # drought
    spei1y, spei2y, spei5y, 
    # PAD-US protection class
    PADUS_protection_lumped,
    # land treatment data
    land_treatment_year, land_treatment_type,
    # climate
    AI, Ann_ppt_mm, Summer_ppt_ratio, Ann_tmean, Ann_tmax, Ann_tmin,
    # gridded soils
    clay_pct_mean, sand_pct_mean, fragment_pct_mean, soil_depth_class,
    # sagebrush biome 
    sagebrush_biome_2017_2020
  )

class(release_data) # check to make sure NO location data are included

today <- Sys.Date()
write.csv(release_data,
          file = paste0("../../Analyses/", target_ESG, "_DataRelease_", today, ".csv"),
          row.names = F)

# write out complete species tables for each state for Mike to reference while
# writing the STM paper
today <- Sys.Date()
write.csv(dom_spp_grp1,
          file = paste0("../../Analyses/", target_ESG, "_SpeciesTable_OpenWoodland_", today, ".csv"),
          row.names = F)

write.csv(dom_spp_grp2,
          file = paste0("../../Analyses/", target_ESG, "_SpeciesTable_Shrubland_", today, ".csv"),
          row.names = F)

write.csv(dom_spp_grp3,
          file = paste0("../../Analyses/", target_ESG, "_SpeciesTable_Invaded_", today, ".csv"),
          row.names = F)

write.csv(dom_spp_grp4,
          file = paste0("../../Analyses/", target_ESG, "_SpeciesTable_Grassland_", today, ".csv"),
          row.names = F)

# ## make a master data frame for Mike to work with - NRI only allowed for Four Corners states (no WY)
# Mike_data <- filter(plot_data_descriptive, PlotCode %in% fuzzy_cluster_memb_sf_mike$PlotCode) %>%
#   left_join(., FireFeatureData_plots)
# 
# Mike_pj_plots <- filter(pj_plots_cats, PlotCode %in% fuzzy_cluster_memb_sf_mike$PlotCode)
# 
# write.csv(plot_data_descriptive,
#           file = file.path("../../RAP_do_not_sync", target_ESG, "SW_SandyLoamyUplands_plot_data_Mike.csv"),
#           row.names = F)
# write.csv(Mike_pj_plots,
#           file = file.path("../../RAP_do_not_sync", target_ESG, "SW_SandyLoamyUplands_pj_treatment_plots_Mike.csv"),
#           row.names = F)
# write.csv(GAP_proportions_byclass,
#           file = file.path("../../RAP_do_not_sync", target_ESG, "SW_SandyLoamyUplands_PADUS_GAP_proportions_Mike.csv"),
#           row.names = F)

# ## make a master data frame for Mike to work with - NRI only allowed for Four Corners states (no WY)
# Mike_data <- filter(plot_data_descriptive, PlotCode %in% fuzzy_cluster_memb_sf_mike$PlotCode) %>% 
#   left_join(., FireFeatureData_plots)
# 
# Mike_pj_plots <- filter(pj_plots_cats, PlotCode %in% fuzzy_cluster_memb_sf_mike$PlotCode)
# 
# write.csv(Mike_data, 
#           file = file.path("../../RAP_do_not_sync", target_ESG, "SW_SandyLoamyUplands_plot_data_Mike.csv"),
#           row.names = F)
# write.csv(Mike_pj_plots, 
#           file = file.path("../../RAP_do_not_sync", target_ESG, "SW_SandyLoamyUplands_pj_treatment_plots_Mike.csv"),
#           row.names = F)
# write.csv(GAP_proportions_byclass, 
#           file = file.path("../../RAP_do_not_sync", target_ESG, "SW_SandyLoamyUplands_PADUS_GAP_proportions_Mike.csv"),
#           row.names = F)

# 11. Correlation figure
# plyr::count(cor_data, "soil_depth_class")
cor_data <- select(release_data,
                   NMDS1, NMDS2, NMDS3,
                   Group_1_membership, Group_2_membership, Group_3_membership, Group_4_membership,
                   #climate
                   AI, Ann_ppt_mm, Summer_ppt_ratio, Ann_tmean, Ann_tmax, Ann_tmin,
                   # Soils
                   clay_pct_mean, sand_pct_mean, fragment_pct_mean, soil_depth_class
                   ) %>% 
  # combine bedrock and very shallow into one category due to Colby's mapping 
  # methods (and also there are no very shallows in the data)
  # combine moderately deep and deep into one category due to mapping errors and
  # low representation of mod deep in the data
  mutate(soil_depth_class = as.character(soil_depth_class) %>% 
           ifelse(test = . == "Bedrock",
                  yes = "Very shallow",
                  no = .) %>% 
           ifelse(test = . == "Moderately deep",
                  yes = "Deep",
                  no = .) %>% 
           ordered(., levels = c("Very shallow",
                                 "Shallow",
                                 "Deep",
                                 "Very deep"))
           )

cor_matrix_stat <- data.frame(NMDS1=rep(NA, ncol(cor_data)),
                              NMDS2=rep(NA, ncol(cor_data)),
                              NMDS3=rep(NA, ncol(cor_data)),
                              Group_1_membership=rep(NA, ncol(cor_data)),
                              Group_2_membership=rep(NA, ncol(cor_data)),
                              Group_3_membership=rep(NA, ncol(cor_data)),
                              Group_4_membership=rep(NA, ncol(cor_data)),
                              AI=rep(NA, ncol(cor_data)),
                              Ann_ppt_mm=rep(NA, ncol(cor_data)),
                              Summer_ppt_ratio=rep(NA, ncol(cor_data)),
                              Ann_tmean=rep(NA, ncol(cor_data)),
                              Ann_tmax=rep(NA, ncol(cor_data)),
                              Ann_tmin=rep(NA, ncol(cor_data)),
                              clay_pct_mean=rep(NA, ncol(cor_data)),
                              sand_pct_mean=rep(NA, ncol(cor_data)),
                              fragment_pct_mean=rep(NA, ncol(cor_data)),
                              soil_depth_class=rep(NA, ncol(cor_data))
                              )

row.names(cor_matrix_stat) <- colnames(cor_matrix_stat)

cor_matrix_p <- cor_matrix_stat

for(col1 in colnames(cor_data)){
  for(col2 in colnames(cor_data)){
    if(col1==col2){
      corstat <- 1
      corp <- 0
    }else{
      cor <- pspearman::spearman.test(x=cor_data[[col1]], y=cor_data[[col2]],
                                      alternative = "two.sided", approximation = "exact")
      corstat <- cor$estimate
      corp <- cor$p.value
    }
    cor_matrix_stat[col1, col2] <- corstat
    cor_matrix_p[col1, col2] <- corp
  }
}

cor_p_tall <- cor_matrix_p %>%
  mutate(Variable1=rownames(.)) %>%
  tidyr::pivot_longer(., NMDS1:soil_depth_class, names_to="Variable2", values_to="cor_p")

cor_tall <- cor_matrix_stat %>%
  mutate(Variable1=rownames(.)) %>%
  tidyr::pivot_longer(., NMDS1:soil_depth_class, names_to="Variable2", values_to="cor") %>%
  left_join(., cor_p_tall)

# # everything x everything draft
# ggplot(data = cor_tall, aes(x=Variable1, y = Variable2, fill=cor)) +
#   geom_tile() +
#   scale_fill_gradient2(low= "brown", mid = "white", high = "blue", midpoint = 0) +
#   xlab(NULL) +
#   ylab(NULL) +
#   geom_text(aes(label=round(cor, digits = 2)),
#             colour="gray25", size=3.5, show.legend = F) +
#   theme_bw() +
#   theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 12),
#         axis.text.y = element_text(size = 12),
#         legend.position = "right")

`%notin%` <- Negate(`%in%`)

# Duniway et al 2023 figure 4
all_cor_tall_pub <- filter(cor_tall, Variable1 %in% c("NMDS1", # vars on x axis
                                                      "NMDS2",
                                                      "NMDS3",
                                                      "Group_1_membership",
                                                      "Group_2_membership",
                                                      "Group_3_membership",
                                                      "Group_4_membership"
                                                      ) 
                           & Variable2 %notin%  c("NMDS1", # vars on x axis
                                                  "NMDS2",
                                                  "NMDS3",
                                                  "Group_1_membership",
                                                  "Group_2_membership",
                                                  "Group_3_membership",
                                                  "Group_4_membership"
                           )) %>%
  mutate(sig_p = ifelse(test = cor_p <= 0.05,
                        yes = "sig",
                        no = "notsig"))

#clearcolor <- rgb(0, 0, 0, max = 255, alpha = 0, names = "transparent")

cor_pub_fig <- ggplot(data = all_cor_tall_pub, aes(x=Variable1, y = Variable2, fill=cor)) +
  geom_tile(color="white") +
  scale_fill_gradient2(name = "Spearman's
correlation",
                       low= "sienna", mid = "white", high = "turquoise4", midpoint = 0) +
  scale_x_discrete(limits=c("NMDS1", # vars on x axis
                            "NMDS2",
                            "NMDS3",
                            "Group_1_membership",
                            "Group_2_membership",
                            "Group_3_membership",
                            "Group_4_membership"
  ), 
  labels = c("NMDS axis 1",
             "NMDS axis 2",
             "NMDS axis 3",
             "Open woodland 
membership",
             "Shrubland
membership",
             "Invaded
membership",
             "Grassland
membership"
  ), position = "top") + 
  scale_y_discrete(limits=rev(c("clay_pct_mean", # soils
                                "fragment_pct_mean",
                                "sand_pct_mean",
                                "soil_depth_class",
                                "AI", # climate
                                "Ann_ppt_mm",
                                "Ann_tmax",
                                "Ann_tmean",
                                "Ann_tmin",
                                "Summer_ppt_ratio"
  )), # climate, then soil prof cont then cat
  labels = rev(c("Clay (%)", # soils
                                "Rock
fragments (%)",
                                "Sand (%)",
                                "Soil depth
class",
                                "Aridity index", # climate
                                "Ann. precip. 
(mm)",
                                "Ann. max.
temp. (C)",
                                "Ann. mean 
temp. (C)",
                                "Ann. min. 
temp. (C)",
                                "Summer 
precip. ratio"
  ))) +
  xlab(NULL) +
  ylab(NULL) +
  theme_bw() +
  geom_text(aes(label=round(cor, digits = 2), alpha=sig_p), 
            colour="gray20",
            size=2.6, fontface=2, show.legend = F) +
  scale_alpha_discrete(range = c(0, 1)) +
  # geom_text(aes(label=round(cor, digits = 2), alpha=NULL, color=sig_p), 
  #           #colour="gray25",
  #           size=4.25, fontface=2, show.legend = F) +
  # scale_color_manual(values = c(clearcolor, "gray25") ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 0, size = 7, color = "black"),
        axis.text.y = element_text(size = 7, color = "black"),
        legend.text = element_text(size = 7, color = "black"),
        legend.title = element_text(size=8, color = "black"),
        panel.grid=element_blank(),
        legend.position = "bottom", 
        plot.margin = unit(c(0,0.25,0,0.05), "in"))+
  coord_cartesian(expand=FALSE)
cor_pub_fig

today <- Sys.Date()
highrestiff(plot_obj = cor_pub_fig,
            file = paste0(output_figure_folder, "/Correlation/SW_SandyLoamyUplands_correlation_", today, ".tif"),
            width_in = 3.54,
            height_in = 4.75,
            resolution_dpi = 500)

# Duniway et al 2023 figure A1 (supplemental)
all_cor_tall_supp <- mutate(cor_tall,
                           sig_p = ifelse(test = cor_p <= 0.05,
                                          yes = "sig",
                                          no = "notsig"))

#clearcolor <- rgb(0, 0, 0, max = 255, alpha = 0, names = "transparent")

cor_supp_fig <- ggplot(data = all_cor_tall_supp, aes(x=Variable1, y = Variable2, fill=cor)) +
  geom_tile(color="white") +
  scale_fill_gradient2(name = "Spearman's
correlation",
                       low= "sienna", mid = "white", high = "turquoise4", midpoint = 0) +
  scale_x_discrete(limits=c("NMDS1", # ordination
                            "NMDS2",
                            "NMDS3",
                            "Group_1_membership", # cluster membership
                            "Group_2_membership",
                            "Group_3_membership",
                            "Group_4_membership",
                            "clay_pct_mean", # soils
                                "fragment_pct_mean",
                                "sand_pct_mean",
                                "soil_depth_class",
                                "AI", # climate
                                "Ann_ppt_mm",
                                "Ann_tmax",
                                "Ann_tmean",
                                "Ann_tmin",
                                "Summer_ppt_ratio"
  ), 
  labels = c("NMDS axis 1",
             "NMDS axis 2",
             "NMDS axis 3",
             "Open woodland membership",
             "Shrubland membership",
             "Invaded membership",
             "Grassland membership",
             "Clay (%)", # soils
             "Rock fragments (%)",
             "Sand (%)",
             "Soil depth class",
             "Aridity index", # climate
             "Ann. precip. (mm)",
             "Ann. max. temp. (C)",
             "Ann. mean temp. (C)",
             "Ann. max. temp. (C)",
             "Summer precip. ratio"
  ), position = "top") + 
  scale_y_discrete(limits=rev(c("NMDS1", # ordination
                            "NMDS2",
                            "NMDS3",
                            "Group_1_membership", # cluster membership
                            "Group_2_membership",
                            "Group_3_membership",
                            "Group_4_membership",
                            "clay_pct_mean", # soils
                                "fragment_pct_mean",
                                "sand_pct_mean",
                                "soil_depth_class",
                                "AI", # climate
                                "Ann_ppt_mm",
                                "Ann_tmax",
                                "Ann_tmean",
                                "Ann_tmin",
                                "Summer_ppt_ratio"
  )), # climate, then soil prof cont then cat
  labels = rev(c("NMDS axis 1",
             "NMDS axis 2",
             "NMDS axis 3",
             "Open woodland 
membership",
             "Shrubland
membership",
             "Invaded
membership",
             "Grassland
membership","Clay (%)", # soils
                                "Rock 
fragments (%)",
                                "Sand (%)",
                                "Soil depth class",
                                "Aridity index", # climate
                                "Ann. precip.
(mm)",
                                "Ann. max. 
temp. (C)",
                                "Ann. mean 
temp. (C)",
                                "Ann. min. 
temp. (C)",
                                "Summer 
precip. ratio"
  ))) +
  xlab(NULL) +
  ylab(NULL) +
  theme_bw() +
  geom_text(aes(label=round(cor, digits = 2), alpha=sig_p), 
            colour="gray20",
            size=2.6, fontface=2, show.legend = F) +
  scale_alpha_discrete(range = c(0, 1)) +
  # geom_text(aes(label=round(cor, digits = 2), alpha=NULL, color=sig_p), 
  #           #colour="gray25",
  #           size=4.25, fontface=2, show.legend = F) +
  # scale_color_manual(values = c(clearcolor, "gray25") ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 0, size = 8, color = "black"),
        axis.text.y = element_text(size = 8, color = "black"),
        legend.position = "right",
        legend.text = element_text(size = 7, color = "black"),
        legend.title = element_text(size=8, color = "black"),
        panel.grid=element_blank())+
  coord_cartesian(expand=FALSE)
cor_supp_fig

today <- Sys.Date()
highrestiff(plot_obj = cor_supp_fig,
            file = paste0(output_figure_folder, "/Correlation/SW_SandyLoamyUplands_correlation_fullmatrix_", today, ".tif"),
            width_in = 7.48,
            height_in = 7,
            resolution_dpi = 500)
# Make materials for spring 2023 workshop handouts
# 1. condensed community tables
# 2. bar charts of functional types and ground cover - separate copy for each state

### community tables
functype_table_palette <- c("#80B1D3", "#80B1D3", "#9ECAE1", "#FFFFB3", "#FFFFB3",  "#C6DBEF", "#FFFFB3", "#FFFFB3", "#C6DBEF", "#FB8072", "#BEBADA", "#8DD3C7")
names(functype_table_palette) <- sort(unique(dom_spp_grp1$Indicator))

dom_spp_grp2_workshop <- dom_spp_grp2_pub %>%
  filter(`Mean cover (%)` >= 1) %>%
  arrange(desc(`Mean cover (%)`))

dom_spp_grp1_workshop <- dom_spp_grp1_pub %>%
  filter(`Mean cover (%)` >= 1) %>%
  arrange(desc(`Mean cover (%)`))

dom_spp_grp4_workshop <- dom_spp_grp4_pub %>%
  filter(`Mean cover (%)` >= 1) %>%
  arrange(desc(`Mean cover (%)`))

dom_spp_grp3_workshop <- dom_spp_grp3_pub %>%
  filter(`Mean cover (%)` >= 1) %>%
  arrange(desc(`Mean cover (%)`))

workingdat <- dom_spp_grp3_workshop

kableExtra::kbl(x = workingdat,
                format = "html", row.names = F,
                  col.names = c("Functional group",
                                "Scientific name",
                                "Common name",
                                "USDA Plants
code",
                                "Mean
cover (%)",
                                "% of plots
where present"), align = "c",
      caption = #"Sagebrush and C3 perennial grass shrubland species cover" # grp2
        #"Pinyon-juniper and gambel oak woodland" # grp1
        #"C3 perennial grassland" # grp4
        "Non-native annual grass and sagebrush shrubland" # grp3
      ) %>%
    kable_styling(bootstrap_options = c("bordered", "condensed")) %>%
  column_spec(kable_input=., column=4, width = "2cm") %>%
  column_spec(kable_input=., column=5, width = "2cm") %>%
  column_spec(kable_input=., column=6, width = "2.5cm") %>% 
  row_spec(kable_input=., row=0, bold=T, background = "white") %>% 
  row_spec(kable_input=., row = which(workingdat$`Functional group`=="C3 introduced perennial grasses"), background = functype_table_palette["C3 introduced perennial grasses"]) %>% 
  row_spec(kable_input=., row = which(workingdat$`Functional group`=="C3 native perennial grasses"), background = functype_table_palette["C3 native perennial grasses"]) %>%
  row_spec(kable_input=., row = which(workingdat$`Functional group`=="C4 native perennial grasses"), background = functype_table_palette["C4 native perennial grasses"]) %>%
    row_spec(kable_input=., row = which(workingdat$`Functional group`=="Introduced annual forbs"), background = functype_table_palette["Introduced annual forbs"]) %>%
  row_spec(kable_input=., row = which(workingdat$`Functional group`=="Introduced annual grasses"), background = functype_table_palette["Introduced annual grasses"]) %>%
  row_spec(kable_input=., row = which(workingdat$`Functional group`=="Introduced perennial forbs"), background = functype_table_palette["Introduced perennial forbs"]) %>%
  row_spec(kable_input=., row = which(workingdat$`Functional group`=="Native annual forbs"), background = functype_table_palette["Native annual forbs"]) %>%
  row_spec(kable_input=., row = which(workingdat$`Functional group`=="Native annual grasses"), background = functype_table_palette["Native annual grasses"]) %>%
  row_spec(kable_input=., row = which(workingdat$`Functional group`=="Native perennial forbs"), background = functype_table_palette["Native perennial forbs"]) %>%
    row_spec(kable_input=., row = which(workingdat$`Functional group`=="Shrubs"), background = functype_table_palette["Shrubs"]) %>% 
      row_spec(kable_input=., row = which(workingdat$`Functional group`=="Succulents"), background = functype_table_palette["Succulents"]) %>% 
      row_spec(kable_input=., row = which(workingdat$`Functional group`=="Trees"), background = functype_table_palette["Trees"])

### bar charts
functypes_grps <- c("TotalFoliarCover", #"AH_WoodyCover", #AH_ForbGrassCover, 
                    "AH_ShrubCover", "AH_TreeCover",
                        "AH_PerenForbGrassCover", "AH_AnnForbGrassCover"
                )

plot_data_descriptive_grp <- filter(plot_data_descriptive, PlantCommunity_fuzzy==1)

functypes_cover_states <- ggplot(data = select(plot_data_descriptive_grp, 
                                             any_of(c("PlantCommunity_fuzzy", functypes_grps))) %>%
                                   tidyr::pivot_longer(cols = all_of(functypes_grps),
                      names_to = "CoverType",
                      values_to = "PctCover"),
                             aes(x=PlantCommunity_fuzzy, y=PctCover, fill=CoverType)) +
  geom_boxplot(outlier.shape = NA) + # make outliers invisible
  scale_fill_manual(values = c("#FFFFB3", "#80B1D3", "#FB8072", "#8DD3C7", "#666666"),
                    name = "Functional type",
                    labels = c("Annual herbaceous", "Perennial herbaceous",
                               #"Woody",
                               "Shrub", "Tree",
                               "Total foliar cover")
                    ) +
  scale_x_discrete(labels=group_labels_fuzzy[1]) +
  xlab(NULL) +
  ylab("Cover (%)") +
  ylim(c(0,100)) +
  ggtitle("Plant functional groups") +
  theme_bw() +
  theme(axis.text.x = element_text( color = "black"),
        axis.text.y = element_text(color = "black"))
functypes_cover_states

today <- Sys.Date()
highrestiff(plot_obj = functypes_cover_states,
            file = paste0(output_figure_folder, "/JFSP_Workshop_1/PlantFuncTypes_", "Woodland", "_", today, ".tif"),
            width_in = 6,
            height_in = 3.5,
            resolution_dpi = 300)

ground_grps <- c("BareSoilCover", "FH_TotalLitterCover", "CA_percent_200plus",
                 "FH_LichenMossCover", "CA_percent_100plus"
                 )

plot_data_first_tall_grp <- filter(plot_data_first_tall, PlantCommunity_fuzzy==4)

ground_cover_states <- ggplot(data = filter(plot_data_first_tall_grp, CoverType %in% ground_grps),
                              aes(x=PlantCommunity_fuzzy, y=PctCover, fill=CoverType)) +
  geom_boxplot(outlier.shape = NA) +
  scale_fill_manual(values = c("#8DA0CB", "#E78AC3", "#A6D854", "#FFD92F", "#E5C494"),
                    name = "Functional group",
                    labels = names(cover_labels[which(cover_labels %in% ground_grps)])
                    ) +
  scale_x_discrete(labels=group_labels_fuzzy[4]) +
  xlab(NULL) +
  ylab("Cover (%)") +
  ylim(c(0,100)) +
  ggtitle("Ground cover") +
  theme_bw() +
  theme(axis.text.x = element_text(color = "black"),
        axis.text.y = element_text(color = "black"))

ground_cover_states

today <- Sys.Date()
highrestiff(plot_obj = ground_cover_states,
            file = paste0(output_figure_folder, "/JFSP_Workshop_1/GroundCover_", "Grassland", "_", today, ".tif"),
            width_in = 6,
            height_in = 3.5,
            resolution_dpi = 300)
woodland_data_first <- filter(plot_data_first, PlantCommunity_fuzzy==1)

woodland_ord.df <- dplyr::select(woodland_data_first, -SourceKey, -PlotID, -SiteName, -PlotName,
                        -Year, -Longitude_NAD83, -Latitude_NAD83,
                        -Month, -Day,
                        -UCRB_SGUs_ProbMax, -geometry, -PlotCode_NoYear,
                        -PlantCommunity_fuzzy
                        ) %>%
  tibble::column_to_rownames("PlotCode") # keep an ID code for the plot as the row name to prevent data scrambling problems

# remove rare species
woodland_sp_pa <- mutate(rowwise(woodland_ord.df), across(everything(), function(x){if(x>0){1}else{0}})) # make a presence/absence data frame
woodland_sp_keep <- names(colSums(woodland_sp_pa)[which(colSums(woodland_sp_pa)>2)]) 
woodland_ord.df <- select(woodland_ord.df, all_of(woodland_sp_keep))

# remove unknown species if present
woodland_ord.df <- select(woodland_ord.df, -any_of(c("UNKS", "UNKPG", "UNKAF", "UNKPF", "PG1", "PG01", "AF1")))

#NMDS_scree(woodland_ord.df)

woodland_ord_dims <- 3 # change manually based on the plot results

#run NMS ordination 
set.seed(1)
woodland_ord <- metaMDS(woodland_ord.df,
               k=woodland_ord_dims, # number of dimensions
               trymax = 30) 

# visualize results
# 2D plotting
woodland_fit_data <- select(woodland_data_first, PlotCode,
                            PIED, JUOS, QUGA, AH_ArtemisiaTridentataCover) %>%
  left_join(., select(plot_data_descriptive, PlotCode, AH_TreeCover, AH_ShrubCover, AH_SubShrubCover, AH_WoodyCover))

hist(woodland_fit_data$PIED)
hist(woodland_fit_data$JUOS)
hist(woodland_fit_data$QUGA)
hist(woodland_fit_data$AH_ArtemisiaTridentataCover)
hist(woodland_fit_data$AH_TreeCover)
hist(woodland_fit_data$AH_ShrubCover)
hist(woodland_fit_data$AH_SubShrubCover)

woodland_ord.fit <- envfit(woodland_ord ~
                               PIED +
                               JUOS +
                               QUGA +
                               AH_ArtemisiaTridentataCover +
                               AH_TreeCover +
                               AH_ShrubCover
                             ,
                      data=woodland_fit_data, na.rm=T, choices=1:3)

par(mfrow=c(2,2))
# Axes 1x2
plot(woodland_ord, choices = c(1,2), type = "n", # plot the axes
     xlim = c(-1, 1),
     ylim = c(-1, 1))
points(woodland_ord, choices = c(1,2), display = "sites", # plot points - can choose "sites" or "species"
       #col=pal_veg2[plot_data_first$PlantCommunity_fuzzy],
       pch = 21, cex = .6, 
       #bg=pal_veg2[plot_data_first$PlantCommunity_fuzzy]
       )
# ordiellipse(ord, plot_data_first$PlantCommunity_fuzzy, col=pal_veg2, lwd = 2, label = F,
#             choices = c(1,2)) # plot your groups
# # can use oriellipse, orihull, or ordispider to plot groups, depending on your needs
plot(woodland_ord.fit, choices = c(1,2))
#ordisurf(woodland_ord~AH_ShrubCover, woodland_fit_data, choices=c(1,2))

# Axes 3x2
plot(woodland_ord, choices = c(3,2), type = "n", 
     xlim = c(-1, 1),
     ylim = c(-1, 1))
points(woodland_ord, choices = c(3,2), display = "sites", 
       #col=pal_veg2[plot_data_first$PlantCommunity_fuzzy],
       pch = 21, cex = .6, 
       #bg=pal_veg2[plot_data_first$PlantCommunity_fuzzy]
       )
# ordiellipse(woodland_ord, plot_data_first$PlantCommunity_fuzzy, col=pal_veg2, lwd = 2, label = F,
#             choices = c(3,2)) 
plot(woodland_ord.fit, choices = c(3,2))

# Axes 1x3
plot(woodland_ord, choices = c(1,3), type = "n",
     xlim = c(-1, 1),
     ylim = c(-1, 1))
points(woodland_ord, choices = c(1,3), display = "sites",
       #col=pal_veg2[plot_data_first$PlantCommunity_fuzzy],
       pch = 21, cex = .6, 
       #bg=pal_veg2[plot_data_first$PlantCommunity_fuzzy]
       )
# ordiellipse(woodland_ord, plot_data_first$PlantCommunity_fuzzy, col=pal_veg2, lwd = 2, label = F,
#             choices = c(1,3)) 
plot(woodland_ord.fit, choices = c(1,3))

# Legend
# plot(woodland_ord, type = "n", axes=FALSE,
#      display = "sites",
#      col=pal_veg2[plot_data_first$PlantCommunity_fuzzy],
#      xlab = "",
#      ylab = "")
# legend(x="center",
#        legend = group_labels_fuzzy,
#        fill = pal_veg2,
#        title = "Plant communities",
#        bty = "n",
#        y.intersp = 1.5)

par(mfrow=c(1,1))

# print out some data for Tara to play with
postworkshop_check_data <- left_join(select(sf::st_drop_geometry(plot_data_descriptive), SourceKey,
                                            PlotID, SiteName, PlotName,  PlotCode,
                                            Year, Month, Day, PlantCommunity_fuzzy,
                                            TotalFoliarCover, AH_WoodyCover,
                                            AH_ShrubCover, AH_SubShrubCover,
                                            AH_TreeCover, AH_ForbGrassCover,
                                            AH_PerenForbGrassCover, AH_AnnForbGrassCover,
                                            FH_CyanobacteriaCover, CA_percent_100plus,
                                            CA_percent_200plus),
                                     select(sf::st_drop_geometry(plot_data_first),
                                            -Longitude_NAD83, -Latitude_NAD83, -geometry)) %>% 
  mutate(PlantCommunity_fuzzy_name = ifelse(test = PlantCommunity_fuzzy==1,
                                            yes = "Open woodland",
                                            no = ifelse(test = PlantCommunity_fuzzy ==2,
                                                        yes = "Shrubland",
                                                        no = ifelse(test = PlantCommunity_fuzzy==3,
                                                                    yes = "Invaded",
                                                                    no = ifelse(test = PlantCommunity_fuzzy == 4,
                                                                                yes = "Grassland",
                                                                                no = NA)))))


write.csv(postworkshop_check_data,
          file = "C:/Users/aknight/DOI/GS-SBSC JFSP 2022 EcoSites - General/Analysis/Sample_Design_2023/SW_SandyLoamyUplands_plant_data.csv",
          row.names = F)


annack84/STMdevelopment documentation built on April 12, 2024, 6:46 p.m.