#Possible palettes #1: #538797, #C2D9CD, #FF842A https://www.instagram.com/p/B9En6h2gbJ5/
#Possible palette #2: #238189, #3FA8B3, #E8F3EE, #FF674D https://www.instagram.com/p/BxZZu82gBm3/
#File structure
rootdir <- rprojroot::find_root(rprojroot::has_dir("src"))
srcdir <- file.path(rootdir, 'src', 'globalIRmap')
datdir <- file.path(rootdir, 'data')
resdir <- file.path(rootdir, 'results')

#Source packages functions
library(drake)
library(flextable)
library(grid)
source(file.path(srcdir, 'R/IRmapping_packages.R'))
source(file.path(srcdir, 'R/IRmapping_functions.R'))
source(file.path(srcdir, 'R/IRmapping_plan.R'))

loadd(gpredsdt)
loadd(rivpred)
loadd(tasks_u10)
loadd(tasks_featsel_u10)
loadd(tasks_featsel_o1)
loadd(IRpop)

Geographical distribution of reference streamgauging stations

Statistics for paper

remove(gpredsdt)
remove(rivpred)
remove(tasks_u10)
remove(tasks_featsel_u10)
remove(tasks_featsel_o1)
remove(IRpop)
gc()

Threshold sensitivity

loadd(threshold_sensitivity)

png(paste0(file.path(resdir, 'figures//thresholdsensitivty_'), 
           format(Sys.Date(), '%Y%m%d'), '.png'), 
    width=6, height=6, units='in', res=600)
print(threshold_sensitivity$gperf)
dev.off()

print(threshold_sensitivity$predbounds)

IRES extrapolation

loadd(IRESextra)
loadd(netlength_extra)
print("Number of climate-basins sub-units occurrences: ")
print(IRESextra$preds[,.N])


print(paste0(
    'For mdur >= 1, statistically extrapolating the prevalence of intermittence in rivers < 0.1 m3/s, we predict that ',
    round(100*IRESextra$preds[
      , sum(percinter_all_GAM*cumL_pred, na.rm=T)/sum(cumL_pred, na.rm=T)]),
    '% of rivers >= 0.01 m3/s are intermittent'
  ))

print(paste0('For a total river length of ',IRESextra$preds[, sum(cumL_pred)], ' km'
             ))


print(paste0(
  'For mdur >= 1, assuming that rivers < 0.1 m3/s are as intermittent as those [0.1, 0.2),  we predict that ',
  round(100*IRESextra$preds[
    , sum(percinter_all_conservative*cumL_pred, na.rm=T)/sum(cumL_pred, na.rm=T)]),
  '% of rivers >= 0.01 m3/s are intermittent'
))


print('Example plots')
png(paste0(file.path(resdir, 'figures//extraIRESplot_62_18_'), 
           format(Sys.Date(), '%Y%m%d'), '.png'), 
    width=5, height=5, units='in', res=600)
print(IRESextra$plot_62_18)
dev.off()
png(paste0(file.path(resdir, 'figures//extraIRESplot_14_17_'), 
           format(Sys.Date(), '%Y%m%d'), '.png'), 
    width=5, height=5, units='in', res=600)
print(IRESextra$plot_14_17)
dev.off()

png(paste0(file.path(resdir, 'figures//extraLENGTHplot_62_18_'), 
           format(Sys.Date(), '%Y%m%d'), '.png'), 
    width=5, height=5, units='in', res=600)
print(netlength_extra$plot_62_18)
dev.off()
png(paste0(file.path(resdir, 'figures//extraLENGTHplot_14_17_'), 
           format(Sys.Date(), '%Y%m%d'), '.png'), 
    width=5, height=5, units='in', res=600)
print(netlength_extra$plot_14_17)
dev.off()



loadd(IRESextra_mdur30)
print(paste0(
    'For mdur >= 30, statistically extrapolating the prevalence of intermittence in rivers < 0.1 m3/s, we predict that ',
    round(100*IRESextra_mdur30$preds[
      , sum(percinter_all_GAM*cumL_pred, na.rm=T)/sum(cumL_pred, na.rm=T)]),
    '% of rivers >= 0.01 m3/s are intermittent'
  ))

remove(IRESextra)
remove(IRESextra_mdur30)
gc()

Gauges map

loadd(gauges_plot)

png(paste0(file.path(resdir, 'figures//gauges_plot'), 
           format(Sys.Date(), '%Y%m%d'), '.png'), 
    width=178, height=178, units='mm', res=1000)
print(gauges_plot)
dev.off()


remove(gauges_plot)
gc()

WaterGAP stats

loadd(watergap_stats)
flextable(watergap_stats)

remove(watergap_stats)
gc()

Environmental distribution of reference streamgauging stations

loadd(envhist)
grid.newpage() 
grid.draw(envhist)


pdf(paste0(file.path(resdir, 'figures//envhist_'), 
           format(Sys.Date(), '%Y%m%d'), '.pdf'), 
    width=10, height=9)
grid.draw(envhist)
dev.off()

remove(envhist)
gc()

Methods - Table 2. Specification and benchmark comparison of models

loadd(tablebm_classif1_u10)
loadd(tablebm_regr1_u10)
loadd(tablebm_classif2_u10)
print('Setup table u10')
setup_table_u10 <- rbindlist(list(tablebm_classif1_u10$setup,
                              tablebm_regr1_u10$setup,
                              tablebm_classif2_u10$setup),
                         fill=T, use.names=T)

flextable(setup_table_u10)

print('Results table u10')
results_table_u10 <- rbindlist(list(tablebm_classif1_u10$results,
                                tablebm_regr1_u10$results,
                                tablebm_classif2_u10$results),
                           fill=T, use.names=T)
flextable(results_table_u10)

print('Setup table o1')
loadd(tablebm_classif1_o1)
loadd(tablebm_regr1_o1)
loadd(tablebm_classif2_o1)
setup_table_o1 <- rbindlist(list(tablebm_classif1_o1$setup,
                              tablebm_regr1_o1$setup,
                              tablebm_classif2_o1$setup),
                         fill=T, use.names=T)

flextable(setup_table_o1)

print('Results table o1')
results_table_o1 <- rbindlist(list(tablebm_classif1_o1$results,
                              tablebm_regr1_o1$results,
                              tablebm_classif2_o1$results),
                         fill=T, use.names=T)
flextable(results_table_o1)

remove(tablebm_classif1_u10)
remove(tablebm_regr1_u10)
remove(tablebm_classif2_u10)
gc()

Methods - Figure 2. Benchmark comparison of models through curves

loadd(misclass_plot_u10)
print(misclass_plot_u10)

loadd(misclass_plot_o1)
print(misclass_plot_o1)


gpredsdt[, mlr3measures::bacc(factor(as.character(intermittent_o1800),
                                      levels=c('0', '1')),
                               factor(IRpredcat_CVsp, levels=c('0','1')))]
gpredsdt[, mlr3measures::bacc(factor(as.character(intermittent_o1800),
                                      levels=c('0', '1')),
                               factor(IRpredcat_CVnosp, levels=c('0','1')))]

Main text - Figure 2. Variable importance for top 20 variables

loadd(vimp_plot_u10)
loadd(vimp_plot_o1)

fwrite(vimp_plot_u10$data[, .(varnames, imp_wmean, imp_wsd, Keyscale, Keystat, 
                              Category, Attribute, Source, Citation, varname)],
       paste0(file.path(resdir, 'figures//vimpdata_u10_'), 
           format(Sys.Date(), '%Y%m%d'), '.csv')
)

fwrite(vimp_plot_o1$data[, .(varnames, imp_wmean, imp_wsd, Keyscale, Keystat, 
                              Category, Attribute, Source, Citation, varname)],
       paste0(file.path(resdir, 'figures//vimpdata_o1_'), 
           format(Sys.Date(), '%Y%m%d'), '.csv')
)

p_u10 <- vimp_plot_u10 +
  theme(plot.margin = unit(c(10,260,0,0), "pt")) +
  labs(x=bquote('A. Gauges with mean annual discharge < 10'~m^3~s^-1)) + 
  theme(plot.margin = unit(c(0,0,0,0), 'cm'))

p_o1format <- vimp_plot_o1 +
  coord_flip(ylim=c(min(vimp_plot_o1$data[, max(imp_wmean+imp_wsd)+1], 100), 0),
             clip='off') +
  scale_x_discrete(name=expression(
    B.~Gauges~with~mean~annual~discharge >= 1~m^3~s^-1),
    labels = function(x) {
      stringr::str_wrap(tolower(x), width = 25)
    },
    limits = rev,
    position='top') +
  scale_y_reverse(position='right', expand=c(0,0)) +
  theme(legend.position = 'none',
        plot.background = element_blank(),
        panel.background = element_blank(),
        plot.margin = unit(c(0,0,0,0.1), 'cm'))

pdf(paste0(file.path(resdir, 'figures//vimp_'), 
           format(Sys.Date(), '%Y%m%d'), '.pdf'), 
    width=7.5, height=6)
p_u10 + p_o1format
dev.off()

remove(vimp_plot_u10)
remove(vimp_plot_o1)
gc()

Main text - Figure 3. Partial dependence plots

loadd(pd_plot_u10)
lapply(pd_plot_u10, function(p) plot(p))

loadd(pd_plot_o1)
lapply(pd_plot_o1, function(p) plot(p))

remove(pd_plot_u10)
remove(pd_plot_o1)
gc()

Methods - Figure 3 A. Predictions uncertainty by metavariable and environment

loadd(gaugeIPR_plot)
grid.newpage()
grid.draw(gaugeIPR_plot)

remove(gaugeIPR_plot)
gc()

Final binned summary statistics for split model approach(< 10 and >= 1)

loadd(bin_finalmisclass_IRpredcat_CVnosp)
loadd(bin_finalmisclass_IRpredcat_CVsp)
loadd(gpredsdt)

print('Split model approach: 3-fold non-spatial CV')
print(paste0(
  'Overal bacc:', 
  gpredsdt[, round(mlr3measures::bacc(intermittent_o1800, 
                                      as.factor(IRpredcat_CVnosp)),
                   2)]
)
)
flextable(bin_finalmisclass_IRpredcat_CVnosp)

print('Slit model approach: 40-fold spatial CV')
print(paste0(
  'Overal bacc:', 
  gpredsdt[, round(mlr3measures::bacc(intermittent_o1800, 
                                      as.factor(IRpredcat_CVsp)),
                   2)]
)
)
flextable(bin_finalmisclass_IRpredcat_CVsp)

remove(bin_finalmisclass_IRpredcat_CVnosp)
remove(bin_finalmisclass_IRpredcat_CVsp)

Binned summary statistics for single model approach

loadd(res_featsel_cv_all)
print('Single model approach: 3-fold non-♣spatial CV')
bin_misclass_all <- bin_misclass(in_resampleresult = res_featsel_cv_all,
                                 binvar = 'dis_m3_pyr',
                                 binfunc = 'manual',
                                 binarg = c(0.1, 1, 10, 100, 1000, 10000, 1000000),
                                 interthresh=0.5,
                                 spatial_rsp=FALSE
)
flextable(bin_misclass_all)

remove(res_featsel_cv_all)
gc()

Basin-level accuracy and bias plots

loadd(basinBACC)

png(paste0(file.path(resdir, 'figures//basaccbias_'), 
           format(Sys.Date(), '%Y%m%d'), '.png'), 
    width=6, height=3.5, units='in', res=1000)
basinBACC$plot_acc + 
  basinBACC$plot_bias + theme(legend.position = 'none') +
  plot_annotation(tag_levels = 'a') & 
  theme(plot.tag = element_text(size = 12))
dev.off()

png(paste0(file.path(resdir, 'figures//jcdeviate_'), 
           format(Sys.Date(), '%Y%m%d'), '.png'), 
    width=5, height=5, units='in', res=1000)
basinBACC$plot_jcdeviate
dev.off()

gc()

Comparison of results with regional-national estimates

getdisplay_totalest <- function(target, subtarget='data', minbin=2) {
  flextable(target[[subtarget]] %>%
              rbind(.[bin>=minbin, list(bin='world',
                                  perc = weighted.mean(perc, binsumlength)
              ), by=dat],fill=T)
  )
}

loadd(fr_plot)
loadd(us_plot)
loadd(au_plot)
au_plot <- au_plot$plot + theme(axis.title.y = element_blank(), 
                                legend.title=element_blank()) + 
  labs(x=bquote('Surface area'~km^2))

pdf(paste0(file.path(resdir, 'figures//comparisonhist_'), 
           format(Sys.Date(), '%Y%m%d'), '.pdf'), 
    width=5, height=11)
grid.arrange(fr_plot$plot, us_plot$plot, au_plot, left = 'Prevalence of intermittency (% river length)')
dev.off()

print("Data for comparison: US - all")
getdisplay_totalest(us_plot, subtarget='data_all')

print("Data for comparison: US - no artificial")
getdisplay_totalest(us_plot, subtarget='data_noartificial')

print("Data for comparison: France")
getdisplay_totalest(fr_plot)

print("Data for comparison: Australia")
getdisplay_totalest(au_plot, minbin=1)

remove(ft_plot)
remove(us_plot)
remove(au_plot)
gc()

loadd(fr_plot_mdur30)
loadd(us_plot_mdur30)
loadd(au_plot_mdur30) 
au_plot_mdur30 <- au_plot_mdur30$plot + theme(axis.title.y = element_blank(), 
                                              legend.title=element_blank()) + 
    labs(x=bquote('Surface area'~km^2))
pdf(paste0(file.path(resdir, 'figures//comparisonhist_mdur30_'), 
           format(Sys.Date(), '%Y%m%d'), '.pdf'), 
    width=5, height=11)
grid.arrange(fr_plot_mdur30$plot, us_plot_mdur30$plot, au_plot_mdur30, 
             left = 'Prevalence of intermittency (% river length)') 
dev.off()

print("Data for comparison, mdur >= 30: US - all")
getdisplay_totalest(us_plot_mdur30, subtarget='data_all')

print("Data for comparison, mdur >= 30: US - no artificial")
getdisplay_totalest(us_plot_mdur30, subtarget='data_noartificial')

print("Data for comparison, mdur >= 30: France")
getdisplay_totalest(fr_plot_mdur30)

print("Data for comparison, mdur >= 30: Australia")
getdisplay_totalest(au_plot_mdur30, minbin=1)

remove(ft_plot_mdur30)
remove(us_plot_mdur30)
remove(au_plot_mdur30)
gc()

Comparison of results with on-the-ground observations for PROSPER

loadd(pnw_plot)
print(pnw_plot$plot)

pnw_measures <- pnw_plot$stats
print(paste0('Balanced accuracy of predictions based on PROSPER: ',
             round(pnw_measures$bacc, 3)))
print(paste0('AUC based on PROSPER: ',
             round(pnw_measures$auc, 3)))
print(paste0('Misclassification rate based on PROSPER: ',
             round(pnw_measures$ce, 3)))
print(paste0('Sensitivity based on PROSPER: ',
             round(pnw_measures$sen, 3)))
print(paste0('Specificity rate based on PROSPER: ',
             round(pnw_measures$spe, 3)))


print(paste0('Total number of obs: ',
             pnw_measures$nobs_total
))
print(paste0('Total number of reaches: ',
             pnw_measures$nreaches
))
print(paste0('Total number of perennial reaches: ',
              pnw_measures$nreaches_perennial
))
print(paste0('Total number of non-perennial reaches: ',
              pnw_measures$nreaches_nonperennial
))

remove(pnw_plot)
gc()

Comparison of results with on-the-ground observations for ONDE

loadd(onde_plot)
print(onde_plot$plot)

onde_measures <- onde_plot$stats
print(paste0('Balanced accuracy of predictions based on ONDE: ',
             round(onde_measures$bacc, 3)))
print(paste0('AUC based on ONDE: ',
             round(onde_measures$auc, 3)))
print(paste0('Misclassification rate based on ONDE: ',
             round(onde_measures$ce, 3)))
print(paste0('Sensitivity based on ONDE: ',
             round(onde_measures$sen, 3)))
print(paste0('Specificity rate based on ONDE: ',
             round(onde_measures$spe, 3)))

print(paste0('Total number of obs: ',
             onde_measures$nobs_total
))
print(paste0('Total number of reaches: ',
             onde_measures$nreaches
))
print(paste0('Total number of perennial reaches: ',
              onde_measures$nreaches_perennial
))
print(paste0('Total number of non-perennial reaches: ',
              onde_measures$nreaches_nonperennial
))

remove(onde_plot)
gc()

Environmental variables used in model training + variable selection results

loadd(predvars)

#Write predvars to csv
fwrite(predvars, file.path(resdir, paste0('predictor_variables.csv')))

#Make table
flextable(predvars[,c('Category', 'Attribute', 'Spatial representation',
                      'Temporal/Statistical aggreg.', 'Source', 'Citation'),
                   with=F])

remove(predvars)
gc()

Tables of intermittence by categories for base model

loadd(gaugestats_format)
get_tabletarget <- function(in_gaugeformat) {
  lapply(names(in_gaugeformat), function(var) {
    targetname <- paste0('globaltables_', var)
    #print(targetname)
    tryCatch(return(readd(eval(targetname), character_only = T)$`0.5` %>%
                      setDT(tab) %>%
                      setorder(-`Total stream length (10^3 km)`)),
             error=function(e) NULL)
  })
}

tables <- get_tabletarget(gaugestats_format) %>%
  compact

ft <- list()
for (i in 1:length(tables)) {
  numcols <- names(tables[[i]])[tables[[i]][,sapply(.SD, class)=='numeric']]
  ft[[i]] <- flextable(tables[[i]][, (numcols) := lapply(.SD, function(x) as.integer(round(x))),
                              .SDcols = numcols]) %>%
    autofit
}
ft[[1]]
ft[[2]]
ft[[3]]
ft[[4]]

print('Extrapolation')
loadd(globaltable_clzextend)
flextable(globaltable_clzextend)

remove(gaugestats_format)
gc()

Tables of intermittence by categories for mdur 30 model

loadd(gaugestats_format)
get_tabletarget_mdur30 <- function(in_gaugeformat) {
  lapply(names(in_gaugeformat), function(var) {
    targetname <- paste0('globaltables_', var, '_mdur30')
    #print(targetname)
    tryCatch(return(readd(eval(targetname), character_only = T)$`0.5` %>%
                      setDT(tab) %>%
                      setorder(-`Total stream length (10^3 km)`)),
             error=function(e) NULL)
  })
}

tables <- get_tabletarget_mdur30(gaugestats_format) %>%
  compact

ft_mdur30 <- list()
for (i in 1:length(tables)) {
  numcols <- names(tables[[i]])[tables[[i]][,sapply(.SD, class)=='numeric']]
  ft_mdur30[[i]] <- flextable(tables[[i]][, (numcols) := lapply(.SD, function(x) as.integer(round(x))),
                              .SDcols = numcols]) %>%
    autofit
}

print(paste0('Table organized by: ', ft[[1]]$header$col_keys[1]))
ft_mdur30[[1]]
print(paste0('Table organized by: ', ft[[2]]$header$col_keys[1]))
ft_mdur30[[2]]
print(paste0('Table organized by: ', ft[[3]]$header$col_keys[1]))
ft_mdur30[[3]]
print(paste0('Table organized by: ', ft[[4]]$header$col_keys[1]))
ft_mdur30[[4]]

print('Extrapolation')
loadd(globaltable_clzextend_mdur30)
flextable(globaltable_clzextend_mdur30)

remove(gaugestats_format)
gc()

Statistics for press

loadd(IRESextra)

#% IRES for MAF >= 0.1 m3/s in NZ
IRESextra$preds[substr(PFAF_IDclz, 1, 2) == '57', list(cumLpred = sum(cumL_pred),
                                                       cumLref = sum(cumL_cutoffref),
                                                       cumIRES_conservative = weighted.mean(percinter_all_conservative, cumL_pred),
                                                       cumIRES_GAM = weighted.mean(percinter_all_GAM, cumL_pred, na.rm=T)
)]

#% IRES for MAF >= 0.1 m3/s in Amazon basin
IRESextra$preds[substr(PFAF_IDclz, 1, 2) == '62', list(cumLpred = sum(cumL_pred),
                                                       cumLref = sum(cumL_cutoffref),
                                                       cumIRES_conservative = weighted.mean(percinter_all_conservative, cumL_pred),
                                                       cumIRES_GAM = weighted.mean(percinter_all_GAM, cumL_pred, na.rm=T)
)]

#% IRES for MAF >= 0.1 m3/s in Europe
IRESextra$preds[(substr(PFAF_IDclz, 1, 2) %in% as.character(seq(21,25)))
                & !(substr(PFAF_IDclz, 4, 5) %in% c('1','2','14')), 
                list(cumLpred = sum(cumL_pred),
                     cumLref = sum(cumL_cutoffref),
                     cumIRES_conservative = weighted.mean(percinter_all_conservative, cumL_pred),
                     cumIRES_GAM = weighted.mean(percinter_all_GAM, cumL_pred, na.rm=T)
                )]


NaiaraLopezRojo/globalIRmap documentation built on Dec. 17, 2021, 5:19 a.m.