scripts/old_functions/old_function_collection.R

#' Calculate exceedence Brier score
#'
#' @param dt Data table containing the predictions.
#' @param fc column name of the prediction. Contains predicted probabilities of exceedence
#' @param threshold_col which column contains the exceedence threshold?
#' @param obs column name of the observations. Can either be logical (exceedence or not) or real valued, containing precipitation (or the variable for which exceedence should be checked).
#' @param by column names of grouping variables, all of which need to be columns in dt.
#' Default is to group by all instances of month, season, lon, lat, system and lead_time that are columns in dt.
#' @param pool column name(s) for the variable(s) over which is averaged. Typically just 'year'.
#'
#' @examples
#' dt = data.table(ex_pr = c(0.5,0.3,0),thr = rep(0.4,3),obs = c(1,0.3,0))
#' print(dt)
#' BS_ex_dt(dt,fc = 'ex_pr','thr')
#'
#' @export

BS_ex_dt = function(dt,fc,threshold_col,
                    obs = 'obs',
                    by = intersect(c('month','season','lon','lat','system','lead_time'),names(dt)),
                    pool = c('year'))
{
  if(range(dt[,get(fc)],na.rm = T)[2] > 1) stop('your predictions should be values between 0 and 1.')
  if(is.logical(dt[,get(obs)]))
  {
    Score_dt = dt[,.(BS_ex = (get(fc)-get(obs))^2),by = c(by,threshold_col)]# the by-arguments are for keeping these columns only
  }
  if(!is.logical(dt[,get(obs)]))
  {
    Score_dt = dt[,.(BS_ex = (get(fc) - (get(obs) > get(threshold_col)))^2),by = c(by,threshold_col)]
  }
  return(Score_dt)
}



#' Calculate exceedence Brier skill score
#'
#' @param dt Data table containing the predictions.
#' @param fc column name of the prediction. Contains predicted probabilities of exceedence
#' @param clim_col column name containing the out-of-sample climatology forecast for the exceedence probability.
#' Usually generated by \code{climatology_threshold_exceedence}.
#' @param threshold_col which column contains the exceedence threshold?
#' @param obs column name of the observations. Can either be logical (exceedence or not) or real valued, containing precipitation (or the variable for which exceedence should be checked).
#' @param by column names of grouping variables, all of which need to be columns in dt.
#' Default is to group by all instances of month, season, lon, lat, system and lead_time that are columns in dt.
#' @export

BSS_ex_dt = function(dt,
                     fc,
                     clim_col = 'clim',
                     threshold_col,
                     obs = 'obs',
                     by = intersect(c('month','season','lon','lat','system','lead_time'),names(dt)))
{
  # for devtools::check:
  BSS_ex = clim_BS_ex = BS_ex = NULL


  BS_dt = BS_ex_dt(dt = dt,
                   fc = fc,
                   threshold_col = threshold_col,
                   obs = obs,
                   by = by)

  clim_es_dt = BS_ex_dt(dt = dt,
                        fc = clim_col,
                        threshold_col = threshold_col,
                        obs = obs,
                        by = by)


  setnames(clim_es_dt,'BS_ex','clim_BS_ex')
  score_dt = merge(BS_dt,clim_es_dt,by = intersect(names(BS_dt),names(clim_es_dt)))

  score_dt[,BSS_ex := (clim_BS_ex - BS_ex)/clim_BS_ex ]

  #deal with divisions by 0:
  score_dt[clim_BS_ex == 0 & BS_ex == 0, BSS_ex := 0] # when both are perfect, skill score should be 0
  score_dt[clim_BS_ex == 0 & BS_ex > 0, BSS_ex := -1] # set to something negative (capped at -1), when climatology predicts perfectly and the forecast does not
  #(should actually -Inf, but that's bad for plotting).
  return(score_dt)
}
SeasonalForecastingEngine/SeaVal documentation built on June 23, 2024, 12:03 a.m.