R/crps.numeric.R
In scoringRules: Scoring Rules for Parametric and Simulated Distribution Forecasts

Documented in crps.numeric

#' Continuous Ranked Probability Score for Parametric Forecast Distributions
#' 
#' Calculate the Continuous Ranked Probability Score (CRPS) given observations
#' and parameters of a family of distributions.
#' 
#' @param y vector of realized values.
#' @param family string which specifies the parametric family; current options:
#' \code{"2pexp", "2pnorm", "beta", "binom", "clogis", "cnorm", "ct", "exp", "expM",
#' "exponential", "gamma", "gev", "gpd", "gtclogis", "gtcnorm", "gtct", "hyper", "lapl",
#' "laplace", "llapl", "llogis", "lnorm", "log-laplace", "log-logistic",
#' "log-normal", "logis", "logistic", "mixnorm", "mixture-normal", "nbinom",
#' "negative-binomial", "norm", "normal", "pois", "poisson", "t", "tlogis",
#' "tnorm", "tt", "two-piece-exponential", "two-piece-normal", "unif", "uniform"}.
#' @param ... vectors of parameter values; expected input depends on the chosen
#' \code{family}. See details below.
#' 
#' @return Vector of score values.
#' \emph{A lower score indicates a better forecast.}
#' 
#' @references
#' 
#' \emph{Closed form expressions of the CRPS for specific distributions:}
#' 
#' Baran, S. and S. Lerch (2015):
#' `Log-normal distribution based Ensemble Model Output Statistics models for
#' probabilistic wind-speed forecasting',
#' Quarterly Journal of the Royal Meteorological Society 141, 2289-2299.
#' \doi{10.1002/qj.2521}
#' \emph{(Log-normal)}
#' 
#' Friederichs, P. and T.L. Thorarinsdottir (2012):
#' `Forecast verification for extreme value distributions with an application
#' to probabilistic peak wind prediction',
#' Environmetrics 23, 579-594.
#' \doi{10.1002/env.2176}
#' \emph{(Generalized Extreme Value, Generalized Pareto)}
#' 
#' Gneiting, T., Larson, K., Westvelt III, A.H. and T. Goldman (2005):
#' `Calibrated probabilistic forecasting using ensemble model output statistics
#' and minimum CRPS estimation',
#' Monthly Weather Review 133, 1098-1118.
#' \doi{10.1175/mwr2904.1}
#' \emph{(Normal)}
#' 
#' Gneiting, T., Larson, K., Westrick, K., Genton, M.G. and E. Aldrich (2006):
#' `Calibrated probabilistic forecasting at the stateline wind energy center:
#' The regime-switching space-time method',
#' Journal of the American Statistical Association 101, 968-979.
#' \doi{10.1198/016214506000000456}
#' \emph{(Censored normal)}
#' 
#' Gneiting, T. and T.L. Thorarinsdottir (2010):
#' `Predicting inflation: Professional experts versus no-change forecasts',
#' arXiv preprint arXiv:1010.2318.
#' \emph{(Two-piece normal)}
#' 
#' Grimit, E.P.,  Gneiting, T., Berrocal, V.J. and N.A. Johnson (2006):
#' `The continuous ranked probability score for circular variables and its
#' application to mesoscale forecast ensemble verification',
#' Quarterly Journal of the Royal Meteorological Society 132, 2925-2942.
#' \doi{10.1256/qj.05.235}
#' \emph{(Mixture of normals)}
#' 
#' Scheuerer, M. and D. Moeller (2015):
#' `Probabilistic wind speed forecasting on a grid based on ensemble model
#' output statistics', Annals of Applied Statistics 9, 1328-1349.
#' \doi{10.1214/15-aoas843}
#' \emph{(Gamma)}
#' 
#' Thorarinsdottir, T.L. and T. Gneiting (2010):
#' `Probabilistic forecasts of wind speed: ensemble model output statistics by
#' using heteroscedastic censored regression',
#' Journal of the Royal Statistical Society (Series A) 173, 371-388.
#' \doi{10.1111/j.1467-985x.2009.00616.x}
#' \emph{(Truncated normal)}
#' 
#' Wei, W. and L. Held (2014):
#' `Calibration tests for count data',
#' TEST 23, 787-205.
#' \doi{10.1007/s11749-014-0380-8}
#' \emph{(Poisson, Negative Binomial)}
#' 
#' \emph{Independent listing of closed-form solutions for the CRPS:}
#' 
#' Taillardat, M., Mestre, O., Zamo, M. and P. Naveau (2016):
#' `Calibrated ensemble forecasts using quantile regression forests and
#' ensemble model output statistics',
#' Monthly Weather Review 144, 2375-2393. 
#' \doi{10.1175/mwr-d-15-0260.1}
#' 
#' 
#' @author Alexander Jordan, Fabian Krueger, Sebastian Lerch
#' 
#' @details
#' Mathematical details are available in Appendix A of the vignette
#' \emph{Evaluating probabilistic forecasts with scoringRules} that
#' accompanies the package.
#' 
#' The parameters supplied to each of the functions are numeric vectors:
#' \enumerate{
#'  \item Distributions defined on the real line:
#'    \itemize{
#'      \item
#'        \code{"laplace"} or \code{"lapl"}:
#'        \code{location} (real-valued location parameter),
#'        \code{scale} (positive scale parameter);
#'        see \code{\link{crps_lapl}}
#'      \item
#'        \code{"logistic"} or \code{"logis"}:
#'        \code{location} (real-valued location parameter),
#'        \code{scale} (positive scale parameter);
#'        see \code{\link{crps_logis}}
#'      \item
#'        \code{"normal"} or \code{"norm"}:
#'        \code{mean}, \code{sd} (mean and standard deviation);
#'        see \code{\link{crps_norm}}
#'      \item
#'        \code{"normal-mixture"} or \code{"mixture-normal"} or \code{"mixnorm"}:
#'        \code{m} (mean parameters),
#'        \code{s} (standard deviations),
#'        \code{w} (weights);
#'        see \code{\link{crps_mixnorm}};
#'        note: matrix-input for parameters
#'      \item
#'        \code{"t"}:
#'        \code{df} (degrees of freedom),
#'        \code{location} (real-valued location parameter),
#'        \code{scale} (positive scale parameter);
#'        see \code{\link{crps_t}}
#'      \item
#'        \code{"two-piece-exponential"} or \code{"2pexp"}:
#'        \code{location} (real-valued location parameter),
#'        \code{scale1}, \code{scale2} (positive scale parameters);
#'        see \code{\link{crps_2pexp}}
#'      \item
#'        \code{"two-piece-normal"} or \code{"2pnorm"}:
#'        \code{location} (real-valued location parameter),
#'        \code{scale1}, \code{scale2} (positive scale parameters);
#'        see \code{\link{crps_2pnorm}}
#'    }
#'  \item Distributions for non-negative random variables:
#'    \itemize{
#'      \item
#'        \code{"exponential"} or \code{"exp"}:
#'        \code{rate} (positive rate parameter);
#'        see \code{\link{crps_exp}}
#'      \item
#'        \code{"gamma"}:
#'        \code{shape} (positive shape parameter),
#'        \code{rate} (positive rate parameter),
#'        \code{scale} (alternative to \code{rate});
#'        see \code{\link{crps_gamma}}
#'      \item
#'        \code{"log-laplace"} or \code{"llapl"}:
#'        \code{locationlog} (real-valued location parameter),
#'        \code{scalelog} (positive scale parameter);
#'        see \code{\link{crps_llapl}}
#'      \item
#'        \code{"log-logistic"} or \code{"llogis"}:
#'        \code{locationlog} (real-valued location parameter),
#'        \code{scalelog} (positive scale parameter);
#'        see \code{\link{crps_llogis}}
#'      \item
#'        \code{"log-normal"} or \code{"lnorm"}:
#'        \code{locationlog} (real-valued location parameter),
#'        \code{scalelog} (positive scale parameter);
#'        see \code{\link{crps_lnorm}}
#'    }
#'  \item Distributions with flexible support and/or point masses:
#'    \itemize{
#'      \item
#'        \code{"beta"}:
#'        \code{shape1}, \code{shape2} (positive shape parameters),
#'        \code{lower}, \code{upper} (lower and upper limits);
#'        see \code{\link{crps_beta}}
#'      \item
#'        \code{"uniform"} or \code{"unif"}:
#'        \code{min}, \code{max} (lower and upper limits),
#'        \code{lmass}, \code{umass} (point mass in lower or upper limit);
#'        see \code{\link{crps_unif}}
#'      \item
#'        \code{"expM"}:
#'        \code{location} (real-valued location parameter),
#'        \code{scale} (positive scale parameter),
#'        \code{mass} (point mass in \code{location});
#'        see \code{\link{crps_expM}}
#'      \item
#'        \code{"gev"}:
#'        \code{location} (real-valued location parameter),
#'        \code{scale} (positive scale parameter),
#'        \code{shape} (real-valued shape parameter);
#'        see \code{\link{crps_gev}}
#'      \item
#'        \code{"gpd"}:
#'        \code{location} (real-valued location parameter),
#'        \code{scale} (positive scale parameter),
#'        \code{shape} (real-valued shape parameter),
#'        \code{mass} (point mass in \code{location});
#'        see \code{\link{crps_gpd}}
#'      \item
#'        \code{"tlogis"}:
#'        \code{location} (location parameter),
#'        \code{scale} (scale parameter),
#'        \code{lower}, \code{upper} (lower and upper limits);
#'        see \code{\link{crps_tlogis}}
#'      \item
#'        \code{"clogis"}:
#'        \code{location} (location parameter),
#'        \code{scale} (scale parameter),
#'        \code{lower}, \code{upper} (lower and upper limits);
#'        see \code{\link{crps_clogis}}
#'      \item
#'        \code{"gtclogis"}:
#'        \code{location} (location parameter),
#'        \code{scale} (scale parameter),
#'        \code{lower}, \code{upper} (lower and upper limits);
#'        \code{lmass}, \code{umass} (point mass in lower or upper limit);
#'        see \code{\link{crps_gtclogis}}
#'      \item
#'        \code{"tnorm"}:
#'        \code{location} (location parameter),
#'        \code{scale} (scale parameter),
#'        \code{lower}, \code{upper} (lower and upper limits);
#'        see \code{\link{crps_tnorm}}
#'      \item
#'        \code{"cnorm"}:
#'        \code{location} (location parameter),
#'        \code{scale} (scale parameter),
#'        \code{lower}, \code{upper} (lower and upper limits);
#'        see \code{\link{crps_cnorm}}
#'      \item
#'        \code{"gtcnorm"}:
#'        \code{location} (location parameter),
#'        \code{scale} (scale parameter),
#'        \code{lower}, \code{upper} (lower and upper limits);
#'        \code{lmass}, \code{umass} (point mass in lower or upper limit);
#'        see \code{\link{crps_gtcnorm}}
#'      \item
#'        \code{"tt"}:
#'        \code{df} (degrees of freedom),
#'        \code{location} (location parameter),
#'        \code{scale} (scale parameter),
#'        \code{lower}, \code{upper} (lower and upper limits);
#'        see \code{\link{crps_tt}}
#'      \item
#'        \code{"ct"}:
#'        \code{df} (degrees of freedom),
#'        \code{location} (location parameter),
#'        \code{scale} (scale parameter),
#'        \code{lower}, \code{upper} (lower and upper limits);
#'        see \code{\link{crps_ct}}
#'      \item
#'        \code{"gtct"}:
#'        \code{df} (degrees of freedom),
#'        \code{location} (location parameter),
#'        \code{scale} (scale parameter),
#'        \code{lower}, \code{upper} (lower and upper limits);
#'        \code{lmass}, \code{umass} (point mass in lower or upper limit);
#'        see \code{\link{crps_gtct}}
#'    }
#'  \item Distributions of discrete variables:
#'    \itemize{
#'      \item 
#'        \code{"binom"}:
#'        \code{size} (number of trials (zero or more)),
#'        \code{prob} (probability of success on each trial);
#'        see \code{\link{crps_binom}}
#'      \item 
#'        \code{"hyper"}:
#'        \code{m} (the number of white balls in the urn),
#'        \code{n} (the number of black balls in the urn),
#'        \code{k} (the number of balls drawn from the urn);
#'        see \code{\link{crps_hyper}}
#'      \item
#'        \code{"negative-binomial"} or \code{"nbinom"}:
#'        \code{size} (positive dispersion parameter),
#'        \code{prob} (success probability),
#'        \code{mu} (mean, alternative to \code{prob});
#'        see \code{\link{crps_nbinom}}
#'      \item
#'        \code{"poisson"} or \code{"pois"}:
#'        \code{lambda} (positive mean);
#'        see \code{\link{crps_pois}}
#'    }
#' }
#' All numerical arguments should be of the same length.
#' An exception are scalars of length 1, which will be recycled.
#' 
#' @examples 
#' crps(y = 1, family = "normal", mean = 0, sd = 2)
#' crps(y = rnorm(20), family = "normal", mean = 1:20, sd = sqrt(1:20))
#' 
#' ## Arguments can have different lengths:
#' crps(y = rnorm(20), family = "normal", mean = 0, sd = 2)
#' crps(y = 1, family = "normal", mean = 1:20, sd = sqrt(1:20))
#' 
#' ## Mixture of normal distributions requires matrix input for parameters:
#' mval <- matrix(rnorm(20*50), nrow = 20)
#' sdval <- matrix(runif(20*50, min = 0, max = 2), nrow = 20)
#' weights <- matrix(rep(1/50, 20*50), nrow = 20)
#' crps(y = rnorm(20), family = "mixnorm", m = mval, s = sdval, w = weights)
#' 
#' 
#' @seealso \code{\link{logs.numeric}}
#' 
#' @export crps.numeric
#' @export
crps.numeric <- function(y, family, ...) {
  family <- getFamily(family, "crps")
  checkInput <- get(paste0("check_crps_", family))
  calculateCRPS <- get(paste0("crps_", family))
  
  input <- list(y = y, ...)
  checkInput(input)
  out <- do.call(calculateCRPS, input)
  
  if (any(is.na(out))) {
    warning("Missing CRPS values.")
  } else if (any(out < 0)) {
    warning(
      paste("Negative CRPS values.",
            "Check parameter combinations and contact package maintainer(s).")
    )
  }
  
  out
}