# Author: Kevin See
# Purpose: Look at correlations and patterns for GAAs
# Created: 12/17/2020
# Last Modified: 12/17/2021
# Notes: for master sample pts and 200m reaches
#-----------------------------------------------------------------
# load needed libraries
library(QRFcapacity)
library(tidyverse)
library(janitor)
library(magrittr)
library(sf)
library(quantregForest)
library(survey)
# set default theme for ggplot
theme_set(theme_bw())
#-----------------------------------------------------------------
# master sample points
#-----------------------------------------------------------------
data("gaa")
gaa_all = gaa %>%
# filter out a few areas
filter(!HUC6NmNRCS %in% c('Upper Sacramento', 'Southern Oregon Coastal', 'Puget Sound', 'Northern California Coastal', 'Oregon Closed Basins')) %>%
# don't use AEM sites in model
filter(!grepl('^AEM', Site)) %>%
# don't use non-GRTS sites
filter(SiteID_alt != 'NonGRTSSite' | is.na(SiteID_alt)) %>%
filter(!grepl('mega', Site, ignore.case = T)) %>%
rename(Channel_Type = ChanlType) %>%
rename(chnk_strmnet = chnk,
sthd_strmnet = steel) %>%
rename(sthd = sthd_range,
chnk = chnk_range) %>%
mutate_at(vars(Channel_Type),
list(as.factor))
# which GAAs to use
extrap_covars = c('TRange',
# 'GDD',
# 'Precip',
'Elev_M',
'CHaMPsheds',
'NatPrin1',
# 'NatPrin2',
'DistPrin1',
# 'BFW_M',
'SrtCumDrn',
'StrmPwr',
'Slp_NHD_v1',
'Channel_Type',
# 'MAVELV',
'WIDE_BF',
'S2_02_11')
# what type of covariate is each GAA?
extrap_class = gaa_all %>%
select(one_of(extrap_covars)) %>%
as.list() %>%
map_chr(.f = function(x) class(x)[1])
# which ones are numeric?
extrap_num = names(extrap_class)[extrap_class %in% c('integer', 'numeric')]
# which ones are categorical?
extrap_catg = names(extrap_class)[extrap_class %in% c('factor', 'character', 'ordered')]
# # correlation between numeric covariates
# gaa_all %>%
# select(one_of(extrap_num)) %>%
# cor(method = 'spearman')
# compare range of covariates from model dataset and prediction dataset
data('champ_site_2011_17_avg')
range_comp = gaa_all %>%
select(Site,
any_of(extrap_num)) %>%
mutate(Source = if_else(Site %in% unique(champ_site_2011_17_avg$Site),
"CHaMP Reaches",
"non-CHaMP Reaches")) %>%
pivot_longer(any_of(extrap_num),
names_to = "Metric",
values_to = "value") %>%
mutate(across(c(Source,
Metric),
as_factor))
range_max = range_comp %>%
group_by(Metric, Source) %>%
summarise_at(vars(value),
tibble::lst(min, max),
na.rm = T) %>%
filter(Source == 'CHaMP Reaches') %>%
ungroup() %>%
gather(type, value, -Metric, -Source)
range_max_all = range_max %>%
bind_rows(range_comp %>%
group_by(Metric, Source) %>%
summarize(across(value,
tibble::lst(quantile),
probs = c(0.25),
na.rm = T)) %>%
rename(lower_quant = value_quantile) %>%
left_join(range_comp %>%
group_by(Metric, Source) %>%
summarize(across(value,
tibble::lst(quantile),
probs = c(0.75),
na.rm = T)) %>%
rename(upper_quant = value_quantile)) %>%
filter(Source == 'CHaMP Reaches') %>%
ungroup() %>%
pivot_longer(cols = ends_with('quant'),
names_to = "type",
values_to = "value")) %>%
mutate(range_type = if_else(type %in% c('min', 'max'),
'Range',
'IQR'))
covar_range_p = range_comp %>%
mutate(Source = str_remove(Source, " Reaches$")) %>%
ggplot(aes(x = Source,
y = value,
fill = Source)) +
geom_boxplot() +
facet_wrap(~ Metric,
scales = 'free') +
geom_hline(data = range_max,
aes(yintercept = value),
lty = 2,
color = 'darkgray') +
theme_minimal() +
theme(legend.position = "bottom")
# covar_range_p
# ggsave('output/figures/GAA_mastPts_comparison.pdf',
# covar_range_p,
# height = 7,
# width = 10)
# another way to look at it
# break it down by HUC6
comp_df = gaa_all %>%
mutate(Source = if_else(Site %in% unique(pred_hab_sites$Site),
"CHaMP Reaches",
"non-CHaMP Reaches")) %>%
filter(SO_v1 <= 6,
!GNIS_NA %in% c("Columbia River",
"Snake River")) %>%
filter(sthd | chnk) %>%
pivot_longer(cols = any_of(extrap_num),
names_to = "Metric",
values_to = "value") %>%
left_join(range_max_all %>%
select(-range_type) %>%
pivot_wider(names_from = "type",
values_from = "value") %>%
select(-Source)) %>%
mutate(in_range = if_else(value >= min & value <= max,
T, F),
in_iqr = if_else(value >= lower_quant & value <= upper_quant,
T, F)) %>%
filter(!(Metric == "S2_02_11" & value < -9000)) %>%
filter(!is.na(value))
# log these covariates for plotting purposes
log_mets = c('SrtCumDrn',
'StrmPwr')
# 2 plots, one with logged covariates, one without
covar_comp_log = comp_df %>%
filter(Metric %in% log_mets) %>%
ggplot(aes(x = HUC6NmNRCS,
y = value,
fill = HUC6NmNRCS)) +
scale_fill_brewer(palette = "Spectral") +
geom_hline(data = range_max_all %>%
filter(Metric %in% log_mets),
aes(yintercept = value,
linetype = range_type),
color = 'darkblue') +
scale_linetype(guide = 'none') +
geom_boxplot(outlier.color = 'gray70') +
facet_wrap(~ Metric,
nrow = 2,
scales = 'free') +
scale_y_continuous(trans = "log",
breaks = scales::pretty_breaks()) +
theme_minimal() +
theme(legend.position = "bottom",
axis.title = element_blank(),
axis.text.x = element_text(angle = 45,
hjust = 1,
size = 6),
axis.text.y = element_text(size = 6))
covar_comp_nolog = comp_df %>%
filter(!Metric %in% log_mets) %>%
ggplot(aes(x = HUC6NmNRCS,
y = value,
fill = HUC6NmNRCS)) +
scale_fill_brewer(palette = "Spectral") +
geom_hline(data = range_max_all %>%
filter(!Metric %in% log_mets),
aes(yintercept = value,
linetype = range_type),
color = 'darkblue') +
scale_linetype(guide = 'none') +
geom_boxplot(outlier.color = 'gray70') +
facet_wrap(~ Metric,
nrow = 2,
scales = 'free') +
scale_y_continuous(trans = "identity",
breaks = scales::pretty_breaks()) +
theme_minimal() +
theme(legend.position = "bottom",
axis.title = element_blank(),
axis.text.x = element_text(angle = 45,
hjust = 1,
size = 6),
axis.text.y = element_text(size = 6))
covar_comp_p = ggpubr::ggarrange(covar_comp_log,
covar_comp_nolog,
nrow = 1,
widths = c(1, 3.5),
common.legend = T,
legend = "bottom")
# save the plot
# ggsave('output/figures/GAA_mastPts_comparison_huc6.pdf',
# covar_comp_p,
# height = 8,
# width = 11)
# another way to summarize these results
comp_summ = comp_df %>%
mutate(across(HUC6NmNRCS,
fct_drop)) %>%
group_by(Metric,
HUC6NmNRCS,
min,
lower_quant, upper_quant,
max) %>%
summarise(n_rch = n_distinct(Site),
n_champ = n_distinct(Site[Source == 'CHaMP Reaches']),
n_in_range = n_distinct(Site[in_range]),
n_out_range = n_distinct(Site[!in_range]),
# n_in_range = n_distinct(Site[in_iqr]),
# n_out_range = n_distinct(Site[!in_iqr]),
pct_in_range = n_in_range / n_rch,
max_huc = max(value, na.rm = T),
min_huc = min(value, na.rm = T),
.groups = "drop") %>%
mutate(pct_max_over = if_else(max_huc > max,
(max_huc - max) / max,
NA_real_),
pct_min_under = if_else(min_huc < min,
(min_huc - min) / min,
NA_real_))
comp_summ %>%
# filter(!is.na(pct_min_under))
# arrange(pct_in_range)
arrange(desc(pct_max_over))
comp_summ %>%
filter(HUC6NmNRCS == "Clearwater") %>%
pull(pct_in_range)
comp_df %>%
filter(HUC6NmNRCS == "Upper Columbia",
!in_range) %>%
group_by(GNIS_NA) %>%
summarise(n_rch = n_distinct(Site),
.groups = "drop") %>%
arrange(desc(n_rch))
#-----------------------------------------------------------------
# 200m reaches
#-----------------------------------------------------------------
data("rch_200")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.