# Author: Kevin See
# Purpose: Extrapolate QRF model to all 200 m reaches
# Created: 3/20/2020
# Last Modified: 12/15/2021
# Notes:
#-----------------------------------------------------------------
# load needed libraries
library(QRFcapacity)
library(tidyverse)
library(janitor)
library(magrittr)
library(sf)
library(quantregForest)
library(survey)
# set default theme for ggplot
theme_set(theme_bw())
#-----------------------------------------------------------------
# load model fit and habitat data associated with that model
#-----------------------------------------------------------------
mod_choice = c('juv_summer',
'juv_summer_dash',
'redds',
'juv_winter')[1]
load(paste0('output/modelFits/qrf_', mod_choice, '.rda'))
#-----------------------------------------------------------------
# predict capacity at all CHaMP sites
#-----------------------------------------------------------------
# what quantile is a proxy for capacity?
pred_quant = 0.9
set.seed(5)
# for overwintering juveniles, predictions done on channel unit scale, then summed up for each CHaMP site.
if(mod_choice == "juv_winter") {
# note that some Tier1 designations have to be imputed. Even if they're wrong, at least it will be some estimate of capacity, which seems better than 0, when summing at the CHaMP site scale
hab_impute = hab_avg %>%
filter(!is.na(Discharge)) %>%
mutate(across(c(Watershed,
Channel_Type,
Tier1),
fct_drop)) %>%
mutate(across(Tier1,
fct_recode,
NULL = "(Missing)")) %>%
impute_missing_data(data = .,
covars = unique(sel_hab_mets$Metric),
impute_vars = c('Watershed',
'Elev_M',
'Channel_Type',
'CUMDRAINAG'),
method = 'missForest') %>%
# method = "Hmisc") %>%
# method = "randomForestSRC") %>%
select(Site, Watershed, LON_DD, LAT_DD,
VisitID,
Lgth_Wet, Area_Wet,
ChUnitNumber, AreaTotal,
any_of(unique(sel_hab_mets$Metric)))
pred_hab_sites = hab_impute %>%
# filter(Tier1 %in% levels(qrf_mod_df$Tier1))
mutate(chnk_per_m2 = predict(qrf_mods[['Chinook']],
newdata = select(., one_of(unique(sel_hab_mets$Metric))),
what = pred_quant),
chnk_per_m2 = exp(chnk_per_m2) - dens_offset,
chnk_tot = chnk_per_m2 * AreaTotal) %>%
mutate(sthd_per_m2 = predict(qrf_mods[['Steelhead']],
newdata = select(., one_of(unique(sel_hab_mets$Metric))),
what = pred_quant),
sthd_per_m2 = exp(sthd_per_m2) - dens_offset,
sthd_tot = sthd_per_m2 * AreaTotal) %>%
group_by(Site,
Watershed,
LON_DD,
LAT_DD,
VisitID,
Lgth_Wet,
Area_Wet) %>%
summarise(across(c(chnk_tot, sthd_tot),
sum,
na.rm = T),
.groups = "drop") %>%
mutate(chnk_per_m = chnk_tot / Lgth_Wet,
chnk_per_m2 = chnk_tot / Area_Wet,
sthd_per_m = sthd_tot / Lgth_Wet,
sthd_per_m2 = sthd_tot / Area_Wet)
} else {
hab_impute = hab_avg %>%
mutate_at(vars(Watershed, Channel_Type),
list(fct_drop)) %>%
impute_missing_data(data = .,
covars = unique(sel_hab_mets$Metric),
impute_vars = c('Watershed',
'Elev_M',
'Channel_Type',
'CUMDRAINAG'),
method = 'missForest') %>%
select(Site, Watershed, LON_DD, LAT_DD, VisitYear, Lgth_Wet, Area_Wet, one_of(unique(sel_hab_mets$Metric)))
pred_hab_sites = hab_impute %>%
mutate(chnk_per_m = predict(qrf_mods[['Chinook']],
newdata = select(., one_of(unique(sel_hab_mets$Metric))),
what = pred_quant),
chnk_per_m = exp(chnk_per_m) - dens_offset,
chnk_per_m2 = chnk_per_m * Lgth_Wet / Area_Wet) %>%
mutate(sthd_per_m = predict(qrf_mods[['Steelhead']],
newdata = select(., one_of(unique(sel_hab_mets$Metric))),
what = pred_quant),
sthd_per_m = exp(sthd_per_m) - dens_offset,
sthd_per_m2 = sthd_per_m * Lgth_Wet / Area_Wet)
}
# only use sites that are in the 200 m reach dataset
data("champ_site_rch")
pred_hab_sites %<>%
inner_join(champ_site_rch) %>%
mutate_at(vars(Watershed),
list(fct_drop))
# split by species, and filter by each species domain
data("rch_200")
pred_hab_df = pred_hab_sites %>%
select(-starts_with('chnk')) %>%
rename(cap_per_m = sthd_per_m,
cap_per_m2 = sthd_per_m2) %>%
mutate(Species = 'Steelhead') %>%
bind_rows(pred_hab_sites %>%
select(-starts_with('sthd')) %>%
rename(cap_per_m = chnk_per_m,
cap_per_m2 = chnk_per_m2) %>%
mutate(Species = 'Chinook')) %>%
select(Species, UniqueID, Site:Area_Wet, starts_with("cap_")) %>%
left_join(rch_200 %>%
st_drop_geometry() %>%
select(UniqueID, chnk, sthd)) %>%
filter((Species == 'Steelhead' & sthd) |
(Species == 'Chinook' & chnk)) %>%
# filter(cap_per_m > 0,
# cap_per_m2 > 0) %>%
mutate_at(vars(Watershed),
list(fct_drop))
#----------------------------------------
# prep 200 m reaches for extrapolation
#----------------------------------------
rch_200_df = rch_200 %>%
st_drop_geometry() %>%
as_tibble() %>%
mutate_at(vars(regime),
list(~ as.factor(as.character(.))))
extrap_covars = names(rch_200_df)[c(18:20,
23:29,
37, 40:42)]
# what type of covariate is each GAA?
extrap_class = rch_200_df %>%
select(one_of(extrap_covars)) %>%
as.list() %>%
map_chr(.f = function(x) class(x)[1])
# which ones are numeric?
extrap_num = names(extrap_class)[extrap_class %in% c('integer', 'numeric')]
# which ones are categorical?
extrap_catg = names(extrap_class)[extrap_class %in% c('factor', 'character', 'ordered')]
# compare range of covariates from model dataset and prediction dataset
range_comp = bind_rows(rch_200_df %>%
filter(!UniqueID %in% unique(pred_hab_sites$UniqueID)) %>%
select(UniqueID,
one_of(extrap_num)) %>%
gather(Metric, value, -UniqueID) %>%
mutate(Source = 'non-CHaMP Reaches'),
rch_200_df %>%
filter(UniqueID %in% unique(pred_hab_sites$UniqueID)) %>%
select(UniqueID,
one_of(extrap_num)) %>%
distinct() %>%
gather(Metric, value, -UniqueID) %>%
mutate(Source = 'CHaMP Reaches')) %>%
mutate_at(vars(Source, Metric),
list(as.factor))
range_max = range_comp %>%
group_by(Metric, Source) %>%
summarise_at(vars(value),
tibble::lst(min, max),
na.rm = T) %>%
filter(Source == 'CHaMP Reaches') %>%
ungroup() %>%
gather(type, value, -Metric, -Source)
range_max_all = range_max %>%
bind_rows(range_comp %>%
group_by(Metric, Source) %>%
summarize(across(value,
tibble::lst(quantile),
probs = c(0.25),
na.rm = T)) %>%
rename(lower_quant = value_quantile) %>%
left_join(range_comp %>%
group_by(Metric, Source) %>%
summarize(across(value,
tibble::lst(quantile),
probs = c(0.75),
na.rm = T)) %>%
rename(upper_quant = value_quantile)) %>%
filter(Source == 'CHaMP Reaches') %>%
ungroup() %>%
pivot_longer(cols = ends_with('quant'),
names_to = "type",
values_to = "value")) %>%
mutate(range_type = if_else(type %in% c('min', 'max'),
'Range',
'IQR'))
covar_range_p = range_comp %>%
mutate(Source = str_remove(Source, " Reaches$")) %>%
ggplot(aes(x = Source,
y = value,
fill = Source)) +
geom_boxplot() +
facet_wrap(~ Metric,
scales = 'free') +
geom_hline(data = range_max,
aes(yintercept = value),
lty = 2,
color = 'darkgray') +
theme_minimal() +
theme(legend.position = "bottom")
# covar_range_p
# ggsave('output/figures/GAA_200rch_comparison.pdf',
# covar_range_p,
# height = 7,
# width = 10)
# another way to look at it
# break it down by HUC6
comp_df = rch_200_df %>%
mutate(Source = if_else(UniqueID %in% unique(pred_hab_sites$UniqueID),
"CHaMP Reaches",
"non-CHaMP Reaches")) %>%
filter(strm_order <= 6,
!GNIS_Name %in% c("Columbia River",
"Snake River")) %>%
filter(sthd | chnk) %>%
pivot_longer(cols = any_of(extrap_num),
names_to = "Metric",
values_to = "value") %>%
left_join(range_max_all %>%
select(-range_type) %>%
pivot_wider(names_from = "type",
values_from = "value") %>%
select(-Source)) %>%
mutate(in_range = if_else(value >= min & value <= max,
T, F),
in_iqr = if_else(value >= lower_quant & value <= upper_quant,
T, F)) %>%
filter(!(Metric == "S2_02_11" & value < -9000)) %>%
filter(!is.na(value))
# log these covariates for plotting purposes
log_mets = c('alp_accum',
'fines_accu',
'flow_accum',
'grav_accum',
'p_accum',
'fp_cur')
# 2 plots, one with logged covariates, one without
covar_comp_log = comp_df %>%
filter(Metric %in% log_mets) %>%
ggplot(aes(x = HUC6_name,
y = value,
fill = HUC6_name)) +
scale_fill_brewer(palette = "Spectral") +
geom_hline(data = range_max_all %>%
filter(Metric %in% log_mets),
aes(yintercept = value,
linetype = range_type),
color = 'darkblue') +
scale_linetype(guide = 'none') +
geom_boxplot(outlier.color = 'gray70') +
facet_wrap(~ Metric,
ncol = 3,
scales = 'free') +
scale_y_continuous(trans = "log",
breaks = scales::pretty_breaks()) +
theme_minimal() +
theme(legend.position = "bottom",
axis.title = element_blank(),
axis.text.x = element_text(angle = 45,
hjust = 1,
size = 6),
axis.text.y = element_text(size = 6))
covar_comp_nolog = comp_df %>%
filter(!Metric %in% log_mets) %>%
ggplot(aes(x = HUC6_name,
y = value,
fill = HUC6_name)) +
scale_fill_brewer(palette = "Spectral") +
geom_hline(data = range_max_all %>%
filter(!Metric %in% log_mets),
aes(yintercept = value,
linetype = range_type),
color = 'darkblue') +
scale_linetype(guide = 'none') +
geom_boxplot(outlier.color = 'gray70') +
facet_wrap(~ Metric,
nrow = 2,
scales = 'free') +
scale_y_continuous(trans = "identity",
breaks = scales::pretty_breaks()) +
theme_minimal() +
theme(legend.position = "bottom",
axis.title = element_blank(),
axis.text.x = element_text(angle = 45,
hjust = 1,
size = 6),
axis.text.y = element_text(size = 6))
covar_comp_p = ggpubr::ggarrange(plotlist = list(covar_comp_log,
covar_comp_nolog),
nrow = 1,
common.legend = T,
legend = "bottom")
# save the plot
# ggsave('output/figures/GAA_200rch_comparison_huc6.pdf',
# covar_comp_p,
# height = 8,
# width = 11)
# another way to summarize these results
comp_summ = comp_df %>%
mutate(across(HUC6_name,
fct_drop)) %>%
group_by(Metric,
HUC6_name,
min,
lower_quant, upper_quant,
max) %>%
summarise(n_rch = n_distinct(UniqueID),
n_champ = n_distinct(UniqueID[Source == 'CHaMP Reaches']),
n_in_range = n_distinct(UniqueID[in_range]),
n_out_range = n_distinct(UniqueID[!in_range]),
# n_in_range = n_distinct(UniqueID[in_iqr]),
# n_out_range = n_distinct(UniqueID[!in_iqr]),
pct_in_range = n_in_range / n_rch,
max_huc = max(value, na.rm = T),
min_huc = min(value, na.rm = T),
.groups = "drop") %>%
mutate(pct_max_over = if_else(max_huc > max,
(max_huc - max) / max,
NA_real_),
pct_min_under = if_else(min_huc < min,
(min_huc - min) / min,
NA_real_))
# tabyl(comp_summ,
# HUC6_name)
# comp_summ %>%
# filter(!is.na(pct_min_under))
# arrange(pct_in_range)
# arrange(desc(pct_max_over))
# comp_summ %>%
# filter(HUC6_name == "Clearwater") %>%
# pull(pct_in_range)
# comp_df %>%
# filter(HUC6_name == "Upper Columbia",
# !in_range) %>%
# group_by(GNIS_Name) %>%
# summarise(n_rch = n_distinct(UniqueID),
# .groups = "drop") %>%
# arrange(desc(n_rch))
# # correlation between numeric covariates
# rch_200_df %>%
# select(one_of(extrap_num)) %>%
# cor(method = 'spearman',
# use = "pairwise")
# Center the covariates
# filter out reaches with covariates outside range of covariates used to fit extrapolation model
out_range_rchs = rch_200_df %>%
select(one_of(extrap_num), UniqueID) %>%
gather(Metric, value, -UniqueID) %>%
left_join(select(range_max, -Source) %>%
spread(type, value)) %>%
group_by(Metric) %>%
filter(value > max |
value < min) %>%
ungroup() %>%
pull(UniqueID) %>%
unique()
#----------------------------------------
# for linear models
#----------------------------------------
# center covariates
extrap_summ = inner_join(pred_hab_df %>%
select(UniqueID) %>%
distinct(),
rch_200_df %>%
select(UniqueID, one_of(extrap_num))) %>%
gather(metric_nm, value, -UniqueID) %>%
group_by(metric_nm) %>%
summarise(metric_mean = mean(value, na.rm=T),
metric_sd = sd(value, na.rm=T)) %>%
ungroup()
# extrapolation model data set, with normalized covariates
mod_data = inner_join(pred_hab_df,
rch_200_df %>%
select(UniqueID, one_of(extrap_num))) %>%
gather(metric_nm, value, one_of(extrap_num)) %>%
left_join(extrap_summ) %>%
mutate(norm_value = (value - metric_mean) / metric_sd) %>%
select(-(value:metric_sd)) %>%
spread(metric_nm, norm_value) %>%
left_join(rch_200_df %>%
select(UniqueID, one_of(extrap_catg)))
sum(is.na(mod_data))
# mod_data %<>%
# bind_cols(mod_data %>%
# is.na() %>%
# as_tibble() %>%
# select(one_of(extrap_covars)) %>%
# transmute(n_NA = rowSums(.))) %>%
# filter(n_NA == 0) %>%
# mutate_at(vars(Watershed, one_of(extrap_catg)),
# list(fct_drop))
# where to make extrapolation predictions
rch_pred = rch_200_df %>%
select(UniqueID, one_of(extrap_num)) %>%
pivot_longer(cols = one_of(extrap_num),
names_to = "metric_nm",
values_to = "value") %>%
left_join(extrap_summ) %>%
mutate(norm_value = (value - metric_mean) / metric_sd) %>%
select(-(value:metric_sd)) %>%
pivot_wider(names_from = "metric_nm",
values_from = "norm_value") %>%
left_join(rch_200_df %>%
select(UniqueID, !any_of(extrap_num))) %>%
select(any_of(names(rch_200_df))) %>%
mutate(in_covar_range = if_else(UniqueID %in% out_range_rchs,
F, T))
#----------------------------------------
# pull in survey design related data
#----------------------------------------
# Calculate GRTS design weights.
data("gaa")
# pull in info about what strata each CHaMP site was assigned to (using 2014 as reference year)
site_strata = pred_hab_df %>%
select(Species, Site, Watershed) %>%
distinct() %>%
left_join(gaa %>%
select(Site,
strata = AStrat2014)) %>%
mutate(site_num = str_split(Site, '-', simplify = T)[,2]) %>%
mutate(strata = if_else(Watershed == 'Asotin',
site_num,
if_else(Watershed == 'Entiat' & grepl('ENT00001', Site),
paste('EntiatIMW', site_num, sep = '_'),
strata))) %>%
mutate(strata = if_else(grepl('EntiatIMW', strata),
str_remove(strata, '[[:digit:]]$'),
strata),
strata = if_else(grepl('EntiatIMW', strata),
str_remove(strata, '[[:digit:]]$'),
strata)) %>%
filter(!is.na(strata)) %>%
mutate(strata = paste(Watershed, strata, sep = '_')) %>%
select(-site_num)
# read in data from the CHaMP frame
champ_frame_df = read_csv('data/prepped/champ_frame_data.csv') %>%
mutate(Target2014 = ifelse(is.na(AStrat2014), 'Non-Target', Target2014)) %>%
mutate(AStrat2014 = ifelse(AStrat2014 == 'Entiat IMW', paste('EntiatIMW', GeoRchIMW, sep = '_'), AStrat2014)) %>%
mutate(UseTypCHSP = ifelse(CHaMPshed == 'Lemhi' & AStrat2014 == 'Little Springs',
"Spawning and rearing", UseTypCHSP),
UseTypSTSU = ifelse(CHaMPshed == 'Lemhi' & AStrat2014 %in% c('Big Springs', 'Little Springs'),
"Spawning and rearing", UseTypSTSU)) %>%
filter(Target2014 == 'Target') %>%
rename(Watershed = CHaMPshed)
# what strata do we have?
frame_strata = champ_frame_df %>%
mutate(strata = paste(Watershed, AStrat2014, sep='_')) %>%
select(Watershed,
strata) %>%
distinct()
# how long is each strata, by species?
chnk_strata_length = champ_frame_df %>%
filter(!is.na(UseTypCHSP)) %>%
mutate(strata = paste(Watershed, AStrat2014, sep='_')) %>%
select(Watershed, matches("Strat"), FrameLeng) %>%
group_by(Watershed, strata) %>%
summarise(tot_length_km = sum(FrameLeng) / 1000) %>%
ungroup() %>%
mutate_at(vars(Watershed, strata),
list(as.factor)) %>%
arrange(Watershed, strata)
sthd_strata_length = champ_frame_df %>%
filter(!is.na(UseTypSTSU)) %>%
mutate(strata = paste(Watershed, AStrat2014, sep='_')) %>%
select(Watershed, matches("Strat"), FrameLeng) %>%
group_by(Watershed, strata) %>%
summarise(tot_length_km = sum(FrameLeng) / 1000) %>%
bind_rows(tibble(Watershed = 'Asotin',
strata = paste('Asotin', c('CC', 'NF', 'SF'), sep = '_'),
tot_length_km = 12)) %>%
ungroup() %>%
mutate_at(vars(Watershed, strata),
list(as.factor)) %>%
arrange(Watershed, strata)
strata_length = chnk_strata_length %>%
mutate(Species = 'Chinook') %>%
bind_rows(sthd_strata_length %>%
mutate(Species = 'Steelhead')) %>%
select(Species, everything())
# how many sites in each strata? and what is the length of each strata?
strata_tab = pred_hab_df %>%
select(Species, Site, Watershed, matches('per_m')) %>%
left_join(site_strata) %>%
filter(strata != 'Entiat_Entiat IMW') %>%
mutate_at(vars(Watershed),
list(fct_drop)) %>%
group_by(Species, Watershed, strata) %>%
summarise(n_sites = n_distinct(Site)) %>%
ungroup() %>%
full_join(strata_length) %>%
mutate(n_sites = if_else(is.na(n_sites),
as.integer(0),
n_sites)) %>%
# calculate the weight of each site in each strata
mutate(site_weight = if_else(n_sites > 0,
tot_length_km / n_sites,
as.numeric(NA)))
# test to see if we've accounted for all strata and most of each watershed
strata_test = frame_strata %>%
full_join(strata_tab) %>%
mutate_at(vars(Watershed),
list(fct_drop)) %>%
mutate(n_sites = if_else(is.na(n_sites),
as.integer(0),
n_sites)) %>%
select(Species, everything()) %>%
arrange(Species, Watershed, strata)
# # what frame strata don't have any sites in them?
# strata_test %>%
# filter(n_sites == 0,
# !is.na(tot_length_km)) %>%
# arrange(Species, Watershed, strata) %>%
# as.data.frame()
#
# # what strata that we have sites for are not in the frame strata?
# strata_test %>%
# filter(n_sites > 0,
# (is.na(tot_length_km) |
# tot_length_km == 0)) %>%
# as.data.frame()
# champ_frame_df %>%
# filter(Watershed == 'Lemhi') %>%
# select(Watershed, AStrat2014, Target2014, UseTypSTSU, FrameLeng) %>%
# filter(!grepl('Mainstem', AStrat2014)) %>%
# group_by(AStrat2014) %>%
# summarise(use_length = sum(FrameLeng[!is.na(UseTypSTSU)]),
# nonuse_length = sum(FrameLeng[is.na(UseTypSTSU)]))
# # how much of each watershed is not accounted for with current sites / strata?
# strata_test %>%
# group_by(Species, Watershed) %>%
# summarise_at(vars(tot_length_km),
# list(sum),
# na.rm = T) %>%
# left_join(strata_test %>%
# filter(n_sites == 0) %>%
# group_by(Species, Watershed) %>%
# summarise_at(vars(missing_length = tot_length_km),
# list(sum),
# na.rm = T)) %>%
# mutate_at(vars(missing_length),
# list(~ if_else(is.na(.), 0, .))) %>%
# mutate(perc_missing = missing_length / tot_length_km) %>%
# mutate_at(vars(perc_missing),
# list(~ if_else(is.na(.), 0, .))) %>%
# arrange(desc(perc_missing))
# calculate adjusted weights for all predicted QRF capacity sites
mod_data_weights = mod_data %>%
left_join(site_strata) %>%
left_join(strata_tab) %>%
# if site not in a strata, it gets weight proportionate to it's length
mutate(site_weight = if_else(is.na(site_weight),
Lgth_Wet / 1000,
site_weight)) %>%
group_by(Species, Watershed) %>%
mutate(sum_weights = sum(site_weight)) %>%
ungroup() %>%
mutate(adj_weight = site_weight / sum_weights)
#-------------------------------------------------------------
# clean up some memory
#-------------------------------------------------------------
rm(champ_frame_df, champ_site_rch,
chnk_strata_length, sthd_strata_length, fish_hab,
frame_strata, strata_length, strata_tab, strata_test,
gaa,
hab_avg,
hab_data,
hab_impute,
rch_200)
#-------------------------------------------------------------
# Set up the survey design.
#-------------------------------------------------------------
# getOption('survey.lonely.psu')
# this will prevent strata with only 1 site from contributing to the variance
options(survey.lonely.psu = 'certainty')
# this centers strata with only 1 site to the sample grand mean; this is conservative
# options(survey.lonely.psu = 'adjust')
# extrapolation model formula
full_form = as.formula(paste('log_qrf_cap ~ -1 + (', paste(extrap_covars, collapse = ' + '), ')'))
# fit various models
model_svy_df = mod_data_weights %>%
gather(response, qrf_cap, matches('per_m')) %>%
select(-(n_sites:sum_weights)) %>%
mutate(log_qrf_cap = log(qrf_cap)) %>%
group_by(Species, response) %>%
nest() %>%
mutate(design = map(data,
.f = function(x) {
svydesign(id = ~ 1,
data = x,
strata = ~ Watershed,
# strata = ~ strata,
weights = ~ adj_weight)
})) %>%
mutate(mod_no_champ = map(design,
.f = function(x) {
svyglm(full_form,
design = x)
}),
mod_champ = map(design,
.f = function(x) {
svyglm(update(full_form, .~ . + Watershed),
design = x)
})) %>%
arrange(Species, response) %>%
ungroup()
# make predictions at all possible reaches, using both models
model_svy_df %<>%
mutate(pred_all_rchs = list(rch_pred %>%
select(UniqueID, one_of(extrap_covars)) %>%
na.omit() %>%
left_join(rch_pred)),
# which reaches are in CHaMP watersheds?
pred_champ_rchs = map2(pred_all_rchs,
mod_champ,
.f = function(x,y) {
x %>%
left_join(mod_data_weights %>%
select(UniqueID, Watershed) %>%
left_join(rch_200_df %>%
select(UniqueID, HUC8_code)) %>%
select(HUC8_code, Watershed) %>%
distinct()) %>%
filter(!is.na(Watershed)) %>%
filter(Watershed %in% y$xlevels$Watershed) %>%
mutate_at(vars(Watershed),
list(as.factor))
})) %>%
mutate(pred_no_champ = map2(mod_no_champ,
pred_all_rchs,
.f = function(x, y) {
y %>%
select(UniqueID) %>%
bind_cols(predict(x,
newdata = y,
se = T,
type = 'response') %>%
as_tibble()) %>%
rename(log_fit = response,
log_se = SE) %>%
mutate(pred_cap = exp(log_fit) * (1 + log_se^2 / 2),
pred_se = pred_cap * log_se)
}),
pred_champ = map2(mod_champ,
pred_champ_rchs,
.f = function(x, y) {
y %>%
select(UniqueID) %>%
bind_cols(predict(x,
newdata = y,
se = T,
type = 'response') %>%
as_tibble()) %>%
rename(log_fit = response,
log_se = SE) %>%
mutate(pred_cap = exp(log_fit) * (1 + log_se^2 / 2),
pred_se = pred_cap * log_se)
}))
# quick comparison of capacity predictons with both models
comp_pred_p = model_svy_df %>%
select(Species, type = response,
pred_no_champ) %>%
unnest(cols = pred_no_champ) %>%
rename(resp_no_champ = pred_cap,
se_no_champ = pred_se) %>%
select(-log_fit, -log_se) %>%
left_join(model_svy_df %>%
select(Species, type = response,
pred_champ) %>%
unnest(cols = pred_champ) %>%
rename(resp_champ = pred_cap,
se_champ = pred_se) %>%
select(-log_fit, -log_se)) %>%
left_join(rch_200_df %>%
select(UniqueID, HUC8_code) %>%
left_join(mod_data_weights %>%
select(UniqueID, Watershed) %>%
left_join(rch_200_df %>%
select(UniqueID, HUC8_code)) %>%
select(HUC8_code, Watershed) %>%
distinct())) %>%
filter(!is.na(resp_champ)) %>%
# if mod_choice == "redds", filter out crazy values
# filter(resp_champ < 0.2,
# resp_no_champ < 0.2) %>%
ggplot(aes(x = resp_champ,
y = resp_no_champ)) +
geom_point() +
geom_abline(linetype = 2,
color = 'red') +
facet_wrap(~ Watershed + Species + type,
scales = 'free') +
labs(x = 'CHaMP',
y = 'Non-CHaMP')
# comp_pred_p
# pull out all predictions, create data.frame with one row per reach
all_preds = model_svy_df %>%
select(Species, type = response,
pred_no_champ) %>%
unnest(cols = pred_no_champ) %>%
rename(resp_no_champ = pred_cap,
se_no_champ = pred_se) %>%
select(-log_fit, -log_se) %>%
left_join(model_svy_df %>%
select(Species, type = response,
pred_champ) %>%
unnest(cols = pred_champ) %>%
rename(resp_champ = pred_cap,
se_champ = pred_se) %>%
select(-log_fit, -log_se)) %>%
# add in direct QRF estimates
left_join(pred_hab_sites %>%
select(UniqueID, Watershed,
matches("per_m")) %>%
pivot_longer(cols = matches('per_m'),
names_to = "type",
values_to = "resp_qrf") %>%
mutate(Species = if_else(grepl('chnk', type),
'Chinook',
'Steelhead')) %>%
mutate(type = str_replace(type, 'chnk', 'cap'),
type = str_replace(type, 'sthd', 'cap')) %>%
group_by(UniqueID, Watershed, Species, type) %>%
# slice(which.max(resp_qrf)) %>%
summarise_at(vars(resp_qrf),
list(mean),
na.rm = T) %>%
ungroup()) %>%
# choose which response to use: if QRF is available, use that, then if CHaMP model is available use that, then go with non-CHaMP model
mutate(model = if_else(!is.na(resp_qrf),
"QRF",
if_else(!is.na(resp_champ),
"CHaMP",
"non-CHaMP")),
response = if_else(model == 'QRF',
resp_qrf,
if_else(model == 'CHaMP',
resp_champ,
resp_no_champ)),
SE = if_else(model == 'QRF',
0,
if_else(model == 'CHaMP',
se_champ,
se_no_champ))) %>%
select(Species, type, UniqueID, model, response, SE) %>%
# add watershed name (and HUC8 code)
left_join(rch_200_df %>%
select(UniqueID, HUC8_code, chnk, sthd) %>%
left_join(mod_data_weights %>%
select(UniqueID, Watershed) %>%
left_join(rch_200_df %>%
select(UniqueID, HUC8_code)) %>%
select(HUC8_code, Watershed) %>%
distinct())) %>%
mutate(model = if_else(!is.na(Watershed) & Watershed == 'Asotin' & model != 'QRF',
'CHaMP',
model)) %>%
pivot_longer(cols = c(response, SE),
names_to = "key",
values_to = "value") %>%
mutate(key = if_else(key == 'response',
if_else(Species == 'Chinook',
str_replace(type, 'cap', 'chnk'),
str_replace(type, 'cap', 'sthd')),
if_else(Species == 'Chinook',
paste0(str_replace(type, 'cap', 'chnk'), "_se"),
paste0(str_replace(type, 'cap', 'sthd'), "_se")))) %>%
select(UniqueID, Watershed, HUC8_code, model, chnk, sthd, key, value) %>%
pivot_wider(names_from = "key",
values_from = "value")
# save the results
save(extrap_covars,
full_form,
mod_data_weights,
model_svy_df,
all_preds,
file = paste0('output/modelFits/extrap_200rch_', mod_choice, '.rda'))
#---------------------------
# create a shapefile
load(paste0('output/modelFits/extrap_200rch_', mod_choice, '.rda'))
data("rch_200")
rch_200_cap = rch_200 %>%
select(UniqueID, GNIS_Name, reach_leng:HUC8_code,
chnk, chnk_use, chnk_ESU_DPS:chnk_NWR_NAME,
sthd, sthd_use, sthd_ESU_DPS:sthd_NWR_NAME) %>%
left_join(all_preds %>%
select(-HUC8_code)) %>%
filter(reach_leng < 500)
#
# rm(mod_data_weights, model_svy_df, extrap_covars)
# rm(rch_200, all_preds)
#
# save it
# as GPKG
# st_write(rch_200_cap,
# dsn = paste0('output/gpkg/Rch_Cap_', mod_choice, '.gpkg'),
# driver = 'GPKG')
#
#
# try splitting it up and appending each one subsequently, to help speed it up.
rch_200_cap %>%
mutate_at(vars(HUC6_name),
list(fct_explicit_na)) %>%
tabyl(HUC6_name) %>%
adorn_totals()
rch_200_cap_split = rch_200_cap %>%
group_split(HUC6_name)
for(i in 1:length(rch_200_cap_split)) {
cat(paste("Working on group", i, "of", length(rch_200_cap_split),
"with", nrow(rch_200_cap_split[[i]]), " rows\n"))
st_write(rch_200_cap_split[[i]],
dsn = paste0('output/gpkg/Rch_Cap_', mod_choice, '.gpkg'),
driver = 'GPKG',
append = if_else(i == 1, F, T))
}
#
# # as shapefile
# st_write(rch_200_cap,
# dsn = paste0('output/shapefiles/Rch_Cap_', mod_choice, '.shp'),
# driver = 'ESRI Shapefile')
#
#
# #-------------------------------------------------------------
# # build other extrapolation models
# # linear model without design weights
# # random forest
# #-------------------------------------------------------------
# # fit various models that don't account for survey design
# model_lm_df = mod_data %>%
# gather(response, qrf_cap, matches('per_m')) %>%
# mutate(log_qrf_cap = log(qrf_cap)) %>%
# group_by(Species, response) %>%
# nest() %>%
# mutate(mod_no_champ = map(data,
# .f = function(x) {
# lm(full_form,
# data = x)
# }),
# mod_champ = map(data,
# .f = function(x) {
# lm(update(full_form, .~ . + Watershed),
# data = x)
# })) %>%
# arrange(Species, response) %>%
# ungroup() %>%
# # make predictions at all possible reaches, using both models
# mutate(pred_all_rchs = list(rch_pred %>%
# select(UniqueID, one_of(extrap_covars)) %>%
# na.omit() %>%
# left_join(rch_pred)),
# # which reaches are in CHaMP watersheds?
# pred_champ_rchs = map2(pred_all_rchs,
# mod_champ,
# .f = function(x,y) {
# x %>%
# left_join(mod_data %>%
# select(UniqueID, Watershed) %>%
# left_join(rch_200_df %>%
# select(UniqueID, HUC8_code)) %>%
# select(HUC8_code, Watershed) %>%
# distinct()) %>%
# filter(!is.na(Watershed)) %>%
# filter(Watershed %in% y$xlevels$Watershed) %>%
# mutate_at(vars(Watershed),
# list(as.factor))
# })) %>%
# mutate(pred_no_champ = map2(mod_no_champ,
# pred_all_rchs,
# .f = function(x, y) {
# y %>%
# select(UniqueID) %>%
# bind_cols(predict(x,
# newdata = y,
# se = T,
# type = 'response') %>%
# as_tibble() %>%
# select(response = fit,
# SE = se.fit))
# }),
# pred_champ = map2(mod_champ,
# pred_champ_rchs,
# .f = function(x, y) {
# y %>%
# select(UniqueID) %>%
# bind_cols(predict(x,
# newdata = y,
# se = T,
# type = 'response') %>%
# as_tibble() %>%
# select(response = fit,
# SE = se.fit))
# }))
#
# # pull out all predictions, create data.frame with one row per reach
# all_preds = model_lm_df %>%
# select(Species, type = response,
# pred_no_champ) %>%
# unnest(cols = pred_no_champ) %>%
# rename(resp_no_champ = response,
# se_no_champ = SE) %>%
# left_join(model_lm_df %>%
# select(Species, type = response,
# pred_champ) %>%
# unnest(cols = pred_champ) %>%
# rename(resp_champ = response,
# se_champ = SE)) %>%
# mutate_at(vars(starts_with("resp"),
# starts_with("se")),
# list(exp)) %>%
# # add in direct QRF estimates
# left_join(pred_hab_sites %>%
# select(UniqueID, Watershed,
# matches("per_m")) %>%
# pivot_longer(cols = matches('per_m'),
# names_to = "type",
# values_to = "resp_qrf") %>%
# mutate(Species = if_else(grepl('chnk', type),
# 'Chinook',
# 'Steelhead')) %>%
# mutate(type = str_replace(type, 'chnk', 'cap'),
# type = str_replace(type, 'sthd', 'cap')) %>%
# group_by(UniqueID, Watershed, Species, type) %>%
# # slice(which.max(resp_qrf)) %>%
# summarise_at(vars(resp_qrf),
# list(mean),
# na.rm = T) %>%
# ungroup()) %>%
# # choose which response to use: if QRF is available, use that, then if CHaMP model is available use that, then go with non-CHaMP model
# mutate(model = if_else(!is.na(resp_qrf),
# "QRF",
# if_else(!is.na(resp_champ),
# "CHaMP",
# "non-CHaMP")),
# response = if_else(model == 'QRF',
# resp_qrf,
# if_else(model == 'CHaMP',
# resp_champ,
# resp_no_champ)),
# SE = if_else(model == 'QRF',
# 0,
# if_else(model == 'CHaMP',
# se_champ,
# se_no_champ))) %>%
# select(Species, type, UniqueID, model, response, SE) %>%
# # add watershed name (and HUC8 code)
# left_join(rch_200_df %>%
# select(UniqueID, HUC8_code, chnk, sthd) %>%
# left_join(mod_data_weights %>%
# select(UniqueID, Watershed) %>%
# left_join(rch_200_df %>%
# select(UniqueID, HUC8_code)) %>%
# select(HUC8_code, Watershed) %>%
# distinct())) %>%
# mutate(model = if_else(!is.na(Watershed) & Watershed == 'Asotin' & model != 'QRF',
# 'CHaMP',
# model)) %>%
# pivot_longer(cols = c(response, SE),
# names_to = "key",
# values_to = "value") %>%
# mutate(key = if_else(key == 'response',
# if_else(Species == 'Chinook',
# str_replace(type, 'cap', 'chnk'),
# str_replace(type, 'cap', 'sthd')),
# if_else(Species == 'Chinook',
# paste0(str_replace(type, 'cap', 'chnk'), "_se"),
# paste0(str_replace(type, 'cap', 'sthd'), "_se")))) %>%
# select(UniqueID, Watershed, HUC8_code, model, chnk, sthd, key, value) %>%
# pivot_wider(names_from = "key",
# values_from = "value")
# fit various random forest models
# to account for design weights, might need to create new dataset by resampling original data, with probabilities weighted by design weights - update: this may not be a good idea, I think it biases the out-of-bag error rates
# extrapolation model formula
full_form = as.formula(paste('log_qrf_cap ~ -1 + (', paste(extrap_covars, collapse = ' + '), ')'))
model_rf_df = inner_join(pred_hab_df,
rch_200_df %>%
select(UniqueID, one_of(extrap_covars))) %>%
gather(response, qrf_cap, matches('per_m')) %>%
# mutate(log_qrf_cap = log(qrf_cap)) %>%
group_by(Species, response) %>%
nest() %>%
ungroup() %>%
mutate(mod_no_champ = map(data,
.f = function(x) {
randomForest(update(full_form, qrf_cap ~ .),
data = x,
ntree = 2000)
}),
mod_champ = map(data,
.f = function(x) {
randomForest(update(full_form, qrf_cap ~. + Watershed),
data = x,
ntree = 2000)
})) %>%
# make predictions at all possible reaches, using both models
mutate(pred_all_rchs = list(rch_200_df %>%
mutate(in_covar_range = ifelse(UniqueID %in% out_range_rchs, F, T)) %>%
select(UniqueID, in_covar_range, everything()))) %>%
# drop reaches with missing covariates
mutate(pred_all_rchs = map(pred_all_rchs,
.f = function(x) {
x %>%
select(UniqueID, one_of(extrap_covars)) %>%
na.omit() %>%
left_join(x)
}),
# which reaches are in CHaMP watersheds?
pred_champ_rchs = map2(pred_all_rchs,
mod_champ,
.f = function(x,y) {
x %>%
left_join(pred_hab_df %>%
select(UniqueID, Watershed) %>%
left_join(rch_200_df %>%
select(UniqueID, HUC8_code)) %>%
select(HUC8_code, Watershed) %>%
distinct()) %>%
filter(!is.na(Watershed)) %>%
filter(Watershed %in% unique(pred_hab_df$Watershed)) %>%
mutate_at(vars(Watershed),
list(as.factor))
})) %>%
mutate(pred_no_champ = map2(mod_no_champ,
pred_all_rchs,
.f = function(x, y) {
# this doesn't work because I run out of memory. So I can't get a SE on non-CHaMP predictions
# preds = predict(x,
# newdata = y,
# predict.all = T)
#
# y %>%
# select(UniqueID) %>%
# bind_cols(tibble(pred_cap = preds$aggregate,
# pred_se = preds$individual %>%
# apply(1, sd)))
# split into smaller datasets by HUC
y %>%
group_by(HUC8_code) %>%
group_split() %>%
map_df(.f = function(z) {
preds = predict(x,
newdata = z,
predict.all = T)
z %>%
select(UniqueID) %>%
bind_cols(tibble(pred_cap = preds$aggregate,
pred_se = preds$individual %>%
apply(1, sd)))
})
# # this version doesn't provide any standard errors
# y %>%
# select(UniqueID) %>%
# bind_cols(tibble(pred_cap = predict(x,
# newdata = y)))
}),
pred_champ = map2(mod_champ,
pred_champ_rchs,
.f = function(x, y) {
# preds = predict(x,
# newdata = y,
# predict.all = T)
#
# y %>%
# select(UniqueID) %>%
# bind_cols(tibble(pred_cap = preds$aggregate,
# pred_se = preds$individual %>%
# apply(1, sd)))
# split into smaller datasets by HUC
y %>%
group_by(HUC8_code) %>%
group_split() %>%
map_df(.f = function(z) {
preds = predict(x,
newdata = z,
predict.all = T)
z %>%
select(UniqueID) %>%
bind_cols(tibble(pred_cap = preds$aggregate,
pred_se = preds$individual %>%
apply(1, sd)))
})
# # this version doesn't include any standard errors
# y %>%
# select(UniqueID) %>%
# bind_cols(tibble(pred_cap = predict(x,
# newdata = y)))
})) %>%
arrange(Species, response)
# Sys.unsetenv("R_MAX_VSIZE")
# pull out all predictions, create data.frame with one row per reach
all_preds = model_rf_df %>%
select(Species, type = response,
pred_no_champ) %>%
unnest(cols = pred_no_champ) %>%
# rename(resp_no_champ = pred_cap) %>%
rename(resp_no_champ = pred_cap,
se_no_champ = pred_se) %>%
left_join(model_rf_df %>%
select(Species, type = response,
pred_champ) %>%
unnest(cols = pred_champ) %>%
# rename(resp_champ = pred_cap)) %>%
rename(resp_champ = pred_cap,
se_champ = pred_se)) %>%
# mutate_at(vars(starts_with("resp"),
# starts_with("se")),
# list(exp)) %>%
# add in direct QRF estimates
left_join(pred_hab_sites %>%
select(UniqueID, Watershed,
matches("per_m")) %>%
pivot_longer(cols = matches('per_m'),
names_to = "type",
values_to = "resp_qrf") %>%
mutate(Species = if_else(grepl('chnk', type),
'Chinook',
'Steelhead')) %>%
mutate(type = str_replace(type, 'chnk', 'cap'),
type = str_replace(type, 'sthd', 'cap')) %>%
group_by(UniqueID, Watershed, Species, type) %>%
# slice(which.max(resp_qrf)) %>%
summarise_at(vars(resp_qrf),
list(mean),
na.rm = T) %>%
ungroup()) %>%
# choose which response to use: if QRF is available, use that, then if CHaMP model is available use that, then go with non-CHaMP model
mutate(model = if_else(!is.na(resp_qrf),
"QRF",
if_else(!is.na(resp_champ),
"CHaMP",
"non-CHaMP")),
response = if_else(model == 'QRF',
resp_qrf,
if_else(model == 'CHaMP',
resp_champ,
resp_no_champ))) %>%
mutate(SE = if_else(model == 'QRF',
0,
if_else(model == 'CHaMP',
se_champ,
# NA_real_))) %>%
se_no_champ))) %>%
select(Species, type, UniqueID, model, response, SE) %>%
# add watershed name (and HUC8 code)
left_join(rch_200_df %>%
select(UniqueID, HUC8_code, chnk, sthd) %>%
left_join(pred_hab_df %>%
select(UniqueID, Watershed) %>%
left_join(rch_200_df %>%
select(UniqueID, HUC8_code)) %>%
select(HUC8_code, Watershed) %>%
distinct())) %>%
mutate(model = if_else(!is.na(Watershed) & Watershed == 'Asotin' & model != 'QRF',
'CHaMP',
model)) %>%
filter(!is.na(response)) %>%
pivot_longer(cols = c(response, SE),
names_to = "key",
values_to = "value") %>%
mutate(key = if_else(key == 'response',
if_else(Species == 'Chinook',
str_replace(type, 'cap', 'chnk'),
str_replace(type, 'cap', 'sthd')),
if_else(Species == 'Chinook',
paste0(str_replace(type, 'cap', 'chnk'), "_se"),
paste0(str_replace(type, 'cap', 'sthd'), "_se")))) %>%
select(UniqueID, Watershed, HUC8_code, model, chnk, sthd, key, value) %>%
pivot_wider(names_from = "key",
values_from = "value",
values_fill = NA_real_)
# save the results
save(extrap_covars,
full_form,
pred_hab_df,
model_rf_df,
all_preds,
file = paste0('output/modelFits/extrap_200rch_RF_', mod_choice, '.rda'))
# # using the randomForestSRC package
# library(randomForestSRC)
# model_rfsrc_df = inner_join(pred_hab_df,
# rch_200_df %>%
# select(UniqueID, one_of(extrap_covars))) %>%
# gather(response, qrf_cap, matches('per_m')) %>%
# mutate(log_qrf_cap = log(qrf_cap)) %>%
# group_by(Species, response) %>%
# nest() %>%
# ungroup()%>%
# mutate(mod_no_champ = map(data,
# .f = function(x) {
# rfsrc(update(full_form, qrf_cap ~ .),
# data = as.data.frame(x),
# ntree = 5000)
# }),
# mod_champ = map(data,
# .f = function(x) {
# rfsrc(update(full_form, qrf_cap ~ . + Watershed),
# data = as.data.frame(x),
# ntree = 5000)
# })) %>%
# # make predictions at all possible reaches, using both models
# mutate(pred_all_rchs = list(rch_200_df %>%
# mutate(in_covar_range = ifelse(UniqueID %in% out_range_rchs, F, T)) %>%
# select(UniqueID, in_covar_range, everything()))) %>%
# # drop reaches with missing covariates
# mutate(pred_all_rchs = map(pred_all_rchs,
# .f = function(x) {
# x %>%
# select(UniqueID, one_of(extrap_covars)) %>%
# na.omit() %>%
# left_join(x)
# }),
# # which reaches are in CHaMP watersheds?
# pred_champ_rchs = map2(pred_all_rchs,
# mod_champ,
# .f = function(x,y) {
# x %>%
# left_join(pred_hab_df %>%
# select(UniqueID, Watershed) %>%
# left_join(rch_200_df %>%
# select(UniqueID, HUC8_code)) %>%
# select(HUC8_code, Watershed) %>%
# distinct()) %>%
# filter(!is.na(Watershed)) %>%
# filter(Watershed %in% unique(pred_hab_df$Watershed)) %>%
# mutate_at(vars(Watershed),
# list(as.factor))
# })) %>%
# mutate(pred_no_champ = map2(mod_no_champ,
# pred_all_rchs,
# .f = function(x, y) {
# preds = predict(x,
# newdata = y %>%
# select(UniqueID,
# one_of(x$forest$xvar.names)),
# na.action = "na.impute")
# y %>%
# select(UniqueID) %>%
# mutate(pred_cap = preds$predicted)
# }),
# pred_champ = map2(mod_champ,
# pred_champ_rchs,
# .f = function(x, y) {
# preds = predict(x,
# newdata = y %>%
# select(UniqueID,
# one_of(x$forest$xvar.names)),
# na.action = "na.impute")
#
# y %>%
# select(UniqueID) %>%
# mutate(pred_cap = preds$predicted)
# # bind_cols(tibble(pred_cap = predict(x,
# # newdata = y %>%
# # select(one_of(x$forest$xvar.names)) %>%
# # mutate_at(vars(ends_with('accu'),
# # ends_with('accum')),
# # list(as.numeric)),
# # na.action = "na.impute")$predicted))
# }))
#
# # pull out all predictions, create data.frame with one row per reach
# all_preds = model_rfsrc_df %>%
# select(Species, type = response,
# pred_no_champ) %>%
# unnest(cols = pred_no_champ) %>%
# rename(resp_no_champ = pred_cap) %>%
# # rename(resp_no_champ = pred_cap,
# # se_no_champ = pred_se) %>%
# left_join(model_rfsrc_df %>%
# select(Species, type = response,
# pred_champ) %>%
# unnest(cols = pred_champ) %>%
# rename(resp_champ = pred_cap)) %>%
# # rename(resp_champ = pred_cap,
# # se_champ = pred_se)) %>%
# # mutate_at(vars(starts_with("resp"),
# # starts_with("se")),
# # list(exp)) %>%
# # add in direct QRF estimates
# left_join(pred_hab_sites %>%
# select(UniqueID, Watershed,
# matches("per_m")) %>%
# pivot_longer(cols = matches('per_m'),
# names_to = "type",
# values_to = "resp_qrf") %>%
# mutate(Species = if_else(grepl('chnk', type),
# 'Chinook',
# 'Steelhead')) %>%
# mutate(type = str_replace(type, 'chnk', 'cap'),
# type = str_replace(type, 'sthd', 'cap')) %>%
# group_by(UniqueID, Watershed, Species, type) %>%
# # slice(which.max(resp_qrf)) %>%
# summarise_at(vars(resp_qrf),
# list(mean),
# na.rm = T) %>%
# ungroup()) %>%
# # choose which response to use: if QRF is available, use that, then if CHaMP model is available use that, then go with non-CHaMP model
# mutate(model = if_else(!is.na(resp_qrf),
# "QRF",
# if_else(!is.na(resp_champ),
# "CHaMP",
# "non-CHaMP")),
# response = if_else(model == 'QRF',
# resp_qrf,
# if_else(model == 'CHaMP',
# resp_champ,
# resp_no_champ))) %>% #,
# # SE = if_else(model == 'QRF',
# # 0,
# # if_else(model == 'CHaMP',
# # se_champ,
# # se_no_champ))) %>%
# select(Species, type, UniqueID, model, response) %>% #, SE) %>%
# # add watershed name (and HUC8 code)
# left_join(rch_200_df %>%
# select(UniqueID, HUC8_code, chnk, sthd) %>%
# left_join(mod_data_weights %>%
# select(UniqueID, Watershed) %>%
# left_join(rch_200_df %>%
# select(UniqueID, HUC8_code)) %>%
# select(HUC8_code, Watershed) %>%
# distinct())) %>%
# mutate(model = if_else(!is.na(Watershed) & Watershed == 'Asotin' & model != 'QRF',
# 'CHaMP',
# model)) %>%
# filter(!is.na(response)) %>%
# pivot_longer(cols = c(response),
# names_to = "key",
# values_to = "value") %>%
# mutate(key = if_else(key == 'response',
# if_else(Species == 'Chinook',
# str_replace(type, 'cap', 'chnk'),
# str_replace(type, 'cap', 'sthd')),
# if_else(Species == 'Chinook',
# paste0(str_replace(type, 'cap', 'chnk'), "_se"),
# paste0(str_replace(type, 'cap', 'sthd'), "_se")))) %>%
# select(UniqueID, Watershed, HUC8_code, model, chnk, sthd, key, value) %>%
# pivot_wider(names_from = "key",
# values_from = "value")
#
# # save the results
# save(extrap_covars,
# full_form,
# mod_data_weights,
# model_rfsrc_df,
# all_preds,
# file = paste0('output/modelFits/extrap_200rch_RFSRC_', mod_choice, '.rda'))
#---------------------------
# create a geopackage from random forest extrapolation
data("rch_200")
for(mod_choice in c('juv_summer',
'juv_summer_dash',
'redds',
'juv_winter')) {
load(paste0('output/modelFits/extrap_200rch_RF_', mod_choice, '.rda'))
rch_200_cap = rch_200 %>%
select(UniqueID, GNIS_Name, reach_leng:HUC8_code,
chnk, chnk_use, chnk_ESU_DPS:chnk_NWR_NAME,
sthd, sthd_use, sthd_ESU_DPS:sthd_NWR_NAME) %>%
left_join(all_preds %>%
select(-chnk, -sthd) %>%
select(-HUC8_code)) %>%
filter(reach_leng < 500)
rm(pred_hab_df, model_rf_df, extrap_covars, all_preds,
full_form)
# rm(rch_200)
# # try splitting it up and appending each one subsequently, to help speed it up.
# rch_200_cap %>%
# mutate_at(vars(HUC6_name),
# list(fct_explicit_na)) %>%
# tabyl(HUC6_name) %>%
# adorn_totals()
rch_200_cap_split = rch_200_cap %>%
group_split(HUC6_name)
for(i in 1:length(rch_200_cap_split)) {
cat(paste("Working on group", i, "out of", length(rch_200_cap_split), "with", nrow(rch_200_cap_split[[i]]), " rows\n"))
st_write(rch_200_cap_split[[i]],
dsn = paste0('output/gpkg/Rch_Cap_RF_', mod_choice, '.gpkg'),
driver = 'GPKG',
append = if_else(i == 1, F, T))
}
}
#-----------------------------------------------
# compare all the predictions
#-----------------------------------------------
preds_comp = list('Survey' = model_svy_df,
"lm" = model_lm_df,
"SRC" = model_rfsrc_df,
"RF" = model_rf_df) %>%
map_df(.id = 'model',
.f = function(x) {
x %>%
select(Species, type = response, mod_pred = pred_no_champ) %>%
unnest(cols = mod_pred) %>%
select(Species, type, UniqueID, response) %>%
gather(key, value, response) %>%
mutate(key = if_else(key == 'response',
if_else(Species == 'Chinook',
str_replace(type, 'cap', 'chnk'),
str_replace(type, 'cap', 'sthd')),
if_else(Species == 'Chinook',
paste0(str_replace(type, 'cap', 'chnk'), "_se"),
paste0(str_replace(type, 'cap', 'sthd'), "_se")))) %>%
select(UniqueID, key, value) %>%
spread(key, value) %>%
mutate_at(vars(matches('per_m')),
list(exp))
})
test = preds_comp %>%
gather(key, value, matches('per_m')) %>%
spread(model, value) %>%
filter(key == 'chnk_per_m')
summary(test)
cor_pear = test %>%
select(lm:Survey) %>%
corrr::correlate(method = 'pearson')
cor_spear = test %>%
select(lm:Survey) %>%
corrr::correlate(method = 'spearman')
cor_ken = test %>%
select(lm:Survey) %>%
corrr::correlate(method = 'kendall')
cor_pear
cor_spear
cor_ken
test %>%
filter(lm < 10,
Survey < 10) %>%
GGally::ggpairs(columns = 3:6,
lower = list("continuous" =
function(data, mapping) {
ggplot(data,
mapping) +
geom_point() +
geom_abline(linetype = 2,
color = 'red') +
geom_smooth()
}))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.