R/ts_forecast.R

Defines functions get_quantile_reliability calc_PIT_histogram plot_PIT_histogram plot_reliability plot_interval_width sharpness_avg IS_avg MAE equalize_normalization_factor_length plot_brier_by_power plot_brier_by_quantile plot_quantile_score Brier Brier_quantile weight_QS QS get_metric_time_series PRI RI qwCRPS CRPS_avg get_sundown_and_NaN_stats equalize_telemetry_forecast_length plot_cvar_over_time get_quantile_time_series get_index_window write.ts_forecast plot.ts_forecast length.ts_forecast get_forecast_class calc_forecasts check_sunup new_ts_forecast ts_forecast.list ts_forecast.matrix ts_forecast.array ts_forecast

Documented in Brier Brier_quantile calc_forecasts calc_PIT_histogram check_sunup CRPS_avg equalize_telemetry_forecast_length get_forecast_class get_metric_time_series get_quantile_reliability get_quantile_time_series get_sundown_and_NaN_stats IS_avg length.ts_forecast MAE plot_brier_by_power plot_brier_by_quantile plot_cvar_over_time plot_interval_width plot_PIT_histogram plot_quantile_score plot_reliability plot.ts_forecast PRI QS qwCRPS RI sharpness_avg ts_forecast ts_forecast.array ts_forecast.list ts_forecast.matrix weight_QS write.ts_forecast

#' Register ts_forecast constructor to allow for overloading
#' @param x A ts_forecast object
#' @export
ts_forecast <- function(x, ...) {
  UseMethod("ts_forecast",x)
}

#' Calculate a timeseries forecast from an array
#'
#' Construct a time series of power forecasts, using input training data.
#' Assumes training data already captures differences in magnitude (i.e., power
#' rating) amongst sites. Forecast is NA for times when sun is down.
#'
#' @param x An array of training data of dimensions [time x ntrain x nsites] for
#'   n-dimensional forecast
#' @param start_time A lubridate time stamp
#' @param time_step Time step in hours
#' @param scale One of 'region', 'total'
#' @param location A string
#' @param method One of 'gaussian', 'empirical', 'vine' (irrelevant if scale ==
#'   'site')
#' @param sun_up_threshold An absolute [MW] threshold on the ensemble members to
#'   remove dubious sunrise/sunset valud
#' @param MoreTSArgs An optional dictionary of time-series arguments to the
#'   forecast calculation
#' @param ... optional arguments to the prob_forecast object
#' @export
ts_forecast.array <- function(x, start_time, time_step, scale, location, method, sun_up_threshold=0.5, MoreTSArgs=NA, ...) {
  # Check inputs
  if (!((length(dim(x))==3 & tolower(scale) %in% c("region", "total")))){
    stop("Data and scale mis-match. x must be a 3-dimensional array for n-dimensional forecasts.")
  }
  if (any(names(MoreTSArgs) %in% c("location", "time"))) stop("MoreTSArgs may not include location or time, which are reserved names.")

  new_ts_forecast(x, start_time, time_step, scale, location, method, sun_up_threshold, MoreTSArgs, ...)
}

#' Calculate a timeseries forecast from a matrix
#'
#' Construct a time series of power forecasts, using input training data.
#' Assumes training data already captures differences in magnitude (i.e., power
#' rating) amongst sites. Forecast is NA for times when sun is down.
#'
#' @param x A matrix of training data of dimensions [time x ntrain] for a
#'   1-dimensional forecast
#' @param start_time A lubridate time stamp
#' @param time_step Time step in hours
#' @param scale Must be "site"
#' @param location A string
#' @param method One of 'gaussian', 'empirical', 'vine' (irrelevant if scale ==
#'   'site')
#' @param sun_up_threshold An absolute [MW] threshold on the ensemble members to
#'   remove dubious sunrise/sunset valud
#' @param MoreTSArgs An optional dictionary of time-series arguments to the
#'   forecast calculation
#' @param ... optional arguments to the prob_forecast object
#' @export
ts_forecast.matrix <- function(x, start_time, time_step, scale, location, method, sun_up_threshold=0.5, MoreTSArgs=NA, ...) {
  # Check inputs
  if (tolower(scale) !="site"){
    stop("Data and scale mis-match. x must be a 2-dimensional array for 1-dimensional forecasts.")
  }

  new_ts_forecast(x, start_time, time_step, scale, location, method, sun_up_threshold, MoreTSArgs, ...)
}


#' Calculate a timeseries forecast from a list
#'
#' An alternative ts_forecast constructor, which accepts an already calculated
#' list of forecasts. For future work, this could be integrated with copula
#' post-processing with site-level forecasts.
#'
#' @param x A list of prob_forecast objects
#' @param start_time A lubridate time stamp
#' @param time_step Time step in hours
#' @param scale One of 'site', 'region', 'total'
#' @param location A string
#' @param method A string
#' @export
ts_forecast.list <- function(x, start_time, time_step, scale, location, method) {
  # Check inputs
  if (!any(sapply(x, is.prob_forecast))) stop("Bad input. x must be a list of forecasts.")

  sun_up <- sapply(x, is.prob_forecast)

  dat <- list(start_time = start_time,
              scale = scale,
              location = location,
              time_step = time_step,
              forecasts = x,
              sun_up = sun_up
  )
  structure(dat, class = "ts_forecast")
}

# Helper constructor to calculate forecast from matrix or array data inputs
new_ts_forecast <- function(x, start_time, time_step, scale, location, method,
                                            sun_up_threshold, MoreTSArgs, ...) {
  if (any(names(MoreTSArgs) %in% c("location", "time"))) stop("MoreTSArgs may not include location or time, which are reserved names.")

  sun_up <- apply(x, MARGIN=1, FUN=check_sunup, sun_up_threshold=sun_up_threshold)
  forecasts <- calc_forecasts(x, sun_up, start_time=start_time, time_step=time_step, scale=scale, location=location,
                              method=method, MoreTSArgs=MoreTSArgs, ...)

  dat <- list(start_time = start_time,
              scale = scale,
              location = location,
              time_step = time_step,
              forecasts = forecasts,
              sun_up = sun_up
  )
  structure(dat, class = "ts_forecast")
}

#' Check GHI/power matrix for any positive numbers
#'
#' @param x A matrix
#' @return A boolean
#' @export
check_sunup <- function(x, sun_up_threshold=0.5){
  return(any(x>=sun_up_threshold, na.rm=TRUE))
}

#' Calculate a time series list of power forecasts.
#'
#' @param x A list of training data the length of the time-series. Each element
#'   should be [ntrain x 1] matrix of training data (for scale=='site') or a
#'   [ntrain x nsites] matrix for scale=='region' or 'total'
#' @param sun_up Logical vector the length of the time-series
#' @param start_time A lubridate time stamp
#' @param time_step Time step in hours
#' @param scale One of 'site', 'region', 'total'
#' @param location A string
#' @param method One of 'gaussian', 'empirical', 'vine'
#' @param MoreTSArgs An optional dictionary of time-series arguments to the
#'   forecast calculation
#' @param ... optional arguments to the prob_forecast, including marginal
#'   estimator arguments
#' @return A list of forecasts. Forecast is NA for times when sun is down.
#' @export
calc_forecasts <- function(x, sun_up, start_time, time_step, scale, location, method, MoreTSArgs=NA, ...) {
  class_type <- get_forecast_class(scale, method)
  sub_func <- function(i, time, sun_up){
    if (sun_up) {
      forecast <- tryCatch({
        # If additional time-series arguments are provided, this time points arguments are added to the list of optional arguments
        if (class_type$dim == '1') { # Can't use ifelse function or it will just return the first value
          args <- list(data.input=x[i,], location=location, time=time)
        } else {
          args <- list(data.input=x[i,,], location=location, time=time)
        }

        # Doing this manually to ensure list elements are coerced into a different type
        for (name in names(list(...))) {args[[name]] <- list(...)[[name]]}
        moreargs <- sapply(names(MoreTSArgs), FUN=function(name) return(name=MoreTSArgs[[name]][[i]]), simplify=F)
        for (name in names(moreargs)) {args[[name]] <- moreargs[[name]]}

        return(do.call(class_type$class, args))
      }, error = function(e) {
        e$message <- paste(e$message, "in forecasting time", time, "for location", location)
        stop(e)
      })
      return(forecast)
    } else {return(NA)}}
  return(mapply(sub_func, seq_len(dim(x)[1]), start_time + (seq_len(dim(x)[1])-1)*lubridate::dhours(time_step), sun_up, SIMPLIFY=FALSE))
}

#' Look-up function of the forecast class type
#'
#' @param scale One of 'site', 'region', 'total'
#' @param method One of 'binned' if scale is 'site', else one of gaussian',
#'   'empirical', 'vine'
#' @return A function to initialize a forecast of the desired type
get_forecast_class <- function(scale, method){
  if (tolower(scale) %in% c("site")){
    cls <- switch(tolower(method),
                  "binned" = fc_binned,
                  "kde" = fc_kde,
                  "bma" = fc_bma,
                  "empirical" = fc_empirical,
                  "emos" = fc_emos,
                  stop(paste('Forecast type', method, 'not recognized for single-site forecasts.', sep=' ')))
    d <- '1'
  } else if (tolower(scale) %in% c("region", "total")){
    cls <- switch(tolower(method),
           "vine" = stop("Not implemented. ts_forecast doesn't have direct handling for vine copula's new list of inputs. "),
           stop(paste('Forecast type', method, 'not recognized for multi-dimensional forecasts.', sep=' ')))
    d <- 'n'
  } else stop(paste('Forecast scale', scale, 'not recognized.', sep=' '))
  return(list(class=cls, dim=d))
  }

#' Calculate number of steps in time series forecast
#'
#' @export
length.ts_forecast <- function(x) {
  return(length(x$forecasts))
}

#' Plot fan plot of the time-series probabilistic forecast
#'
#' @param x A ts_forecast object
#' @param tel vector of telemetry (optional)
#' @param window (optional) A vector of (start index, end index) to plot certain
#'   time window
#' @param normalize.by (optional) Plant power to normalize results. Defaults to 1
#' @param xbreaks (optional) Determines width between x ticks, defaults to 4
#' @param ylim (optional) Upper limit of y-axis
#' @param ylab (optional) y-axis label
#' @param xlab (optional) x-axis label
#' @param xticklab (optional) Boolean Whether to label x-axis ticks. Defaults to True
#' @param yticklab (optional) Boolean Whether to label y-axis ticks. Defaults to True
#' @export
plot.ts_forecast <- function(x, tel=NA, window=NA, normalize.by=1, xbreaks=4, ylim=NULL, text.size =14,
                             ylab=ggplot2::element_blank(), xlab=ggplot2::element_blank(), xticklab=T, yticklab=T) {
  indices <- get_index_window(x, window)

  probs <- x$forecasts[[min(which(sapply(x$forecasts, FUN=is.prob_forecast)))]]$quantiles$q

  plotdata <- matrix(ncol=length(probs), nrow=length(indices))
  for (i in seq_along(indices)) {
    if (is.prob_forecast(x$forecasts[[indices[i]]])) {
      plotdata[i,] <- x$forecasts[[indices[i]]]$quantiles$x/normalize.by
    } else plotdata[i,] <- 0
  }
  colnames(plotdata) <- probs

  df <- tibble::rowid_to_column(data.frame(plotdata, check.names=F), var="x")
  df <- tidyr::gather(df, key="q", value="y", -x, convert=T)
  g <- ggplot2::ggplot(df,  ggplot2::aes(x=x,y=y,quantile=q)) + ggfan::geom_fan() +
    ggplot2::theme_bw() + ggplot2::xlab(xlab) +
    ggplot2::ylab(ylab) +
    ggplot2::theme(text=ggplot2::element_text(size=text.size),
                   panel.background = ggplot2::element_rect(fill = "transparent",colour = NA),
                   plot.background = ggplot2::element_rect(fill = "transparent",colour = NA))

  if (xticklab) {
    g <- g + ggplot2::scale_x_continuous(expand=c(0,0), breaks=seq(from=0, to=length(indices), by=xbreaks*x$time_step), labels=seq(from=0, to=length(indices)/x$time_step, by=xbreaks))
  } else {
    g <- g + ggplot2::scale_x_continuous(expand=c(0,0), breaks=seq(from=0, to=length(indices), by=xbreaks*x$time_step), labels=rep("", times=length(seq(from=0, to=length(indices)/x$time_step, by=xbreaks))))
  }

  if (!is.null(ylim)) {g <- g + ggplot2::coord_cartesian(ylim=ylim)}
  if (!yticklab) {
    g <- g + ggplot2::scale_y_continuous(labels=rep("", times=length(ggplot2::ggplot_build(g)$layout$panel_params[[1]]$y.major_source)))
  }

  if (any(!is.na(tel))) {
    actuals_time_step <- x$time_step*length(x)/length(tel)
    g <- g + ggplot2::geom_line(mapping=ggplot2::aes(x=x, y=y), # size=1.2,
                                data=data.frame(x=seq(from=actuals_time_step, length.out=length(indices)/actuals_time_step, by=actuals_time_step),
                                                y=tel[((indices[1]-1)*actuals_time_step+1):(indices[length(indices)]*actuals_time_step)]/normalize.by),
                                col="chocolate3", inherit.aes = F)
  }
  return(g)
}

#' Write ts_forecast quantiles to .csv
#'
#' Export a csv file of [time x quantiles] Note: this follows the convention of
#' e.g., write.csv and write.table, but write isn't an exported generic function
#'
#' @param x A ts_forecast object
#' @param file A file name
#' @param window (optional) A vector of (start index, end index) to plot certain
#'   time window
#' @param percentiles (optional -- defaults 1st to 99th percentiles) Select the
#'   quantiles associated with which percentiles will be exported
#' @export
write.ts_forecast <- function(x, file, window=NA, percentiles=1:99) {
  indices <- get_index_window(x, window)

  data <- sapply(percentiles, get_quantile_time_series, x=x, simplify="array")

  colnames(data) <- percentiles
  write.csv(data[indices, ], file=file)
}

# Helper function to select only a subset of the forecast length
get_index_window <- function(x, window) {
  if (all(!(is.na(window))) & length(window)!=2) stop("To use time window, must give a vector of starting and ending indices")

  start <- ifelse(all(is.na(window)), 1, window[1])
  end <- ifelse(all(is.na(window)), length(x), window[2])
  indices <- start:end
}

#' Get a time-series of the forecast value at a particular quantile
#'
#' @param x A ts_forecast object
#' @param alpha Quantile of interest, numeric [0, 100]
#' @return Numeric vector
#' @export
get_quantile_time_series <- function(x, alpha) {
  eps <- 1e-6
  q <- alpha/100
  timeseries <- sapply(x$forecasts, FUN=function(forecast, q) {
    if (is.prob_forecast(forecast)){v <- forecast$quantiles$x[which(abs(forecast$quantiles$q-q)<eps)]
      if (length(v)==0) stop(paste(alpha,'quantile not available')) else return(v)}
    else return(NA)},
    q=q)
  return(timeseries)
}

#' Plot a time-series of the upper and lower tail CVAR's
#' @param x A ts_forecast object
plot_cvar_over_time <- function(x) {
  y1 <- sapply(x$forecasts, FUN=function(fc) {if (is.prob_forecast(fc)) {return(fc$cvar$low)} else {return(0)}})
  y2 <- sapply(x$forecasts, FUN=function(fc) {if (is.prob_forecast(fc)) {return(fc$cvar$high)} else {return(0)}})
  colors <- c("darkseagreen4", "darkorange3")
  graphics::plot((1:length(x))*x$time_step, y2, type = 'b', col=colors[1], xlab='Time', ylab='CVaR [W]')
  graphics::lines((1:length(x))*x$time_step, y1, type = 'b', col=colors[2])
  graphics::legend(x="topleft", legend=c("Upper tail", "Lower tail"), bty='n', col=colors, lty=1, lwd=2, y.intersp=2)
}

# --------------------------------------------------------------------------------------------
# Forecast evaluation methods

#' Equalize lengths of telemetry and forecast vectors
#'
#' Preprocess for metrics evaluations, for when telemetry is at finer time
#' resolution than the forecast. The forecast and telemetry can be at different
#' time resolutions, so long as telemetry is a multiple of the forecast.
#'
#' @param tel A vector of the telemetry values OR a single value of the length
#'   of the telemetry values
#' @param fc A vector of data from the time series forecast OR a single value of
#'   the length of the forecast values
#' @param agg Boolean, TRUE to aggregate telemetry to forecast resolution, FALSE
#'   to expand forecast to telemetry resolution
#' @param align Can be "end-of-hour", "half-hour " NaN first, telemetry lags
#'   Defaults to "end-of-hour". Currently half hour approaches expand
#'   forecast the same way.
#' @return list of the telemetry and forecast data vectors, of equal length
#' @export
equalize_telemetry_forecast_length <- function(tel, fc, agg=TRUE, align="end-of-hour") {
  if (!(align %in% c("end-of-hour", "half-hour"))) stop(paste("Unknown method for align. Given ", align, sep=''))
  if (length(tel) > 1 & length(fc)> 1){
    if ((length(tel) != length(fc)) & (length(tel) < length(fc) | length(tel) %% (2*length(fc)) > 0)) stop("Telemetry length must be equal to or a even multiple of forecast length.")
    fc_length <- length(fc)
    tel_length <- length(tel)
  } else if (length(tel) > 1 & length(fc) == 1) {
    if ((length(tel) != fc) & (length(tel) < fc | length(tel) %% (2*fc) > 0)) stop("Telemetry length must be equal to or a even multiple of forecast length.")
    fc_length <- fc
    tel_length <- length(tel)
  } else if (length(tel) == 1 & length(fc) > 1) {
    if ((tel != length(fc)) & (tel < length(fc) | tel %% (2*length(fc)) > 0)) stop("Telemetry length must be equal to or a even multiple of forecast length.")
    tel_length <- tel
    fc_length <- length(fc)
  } else { # Both tel and fc are singletons
    tel_length <- 1
    fc_length <- 1
  }

  tel_2_fc <- tel_length/fc_length

  # Default translation from telemetry index to forecast index
  index_translation <- function(i) {i}

  if (tel_2_fc > 1) {
    if (align=="end-of-hour") {
      if (agg) {
        tel <- sapply(seq_len(fc_length), function(i) {return(sum(tel[(tel_2_fc*(i-1)+1):(tel_2_fc*i)])/tel_2_fc)})
      } else {
        fc <- rep(fc, each=tel_2_fc)
        # Modified translation function to align more frequent telemetry with forecast:
        index_translation <- purrr::partial(function(i, tel_2_fc) {floor((i-1)/tel_2_fc)+1}, tel_2_fc=tel_2_fc)
      }
    } else { # align == "half-hour"
      if (agg) {
        tel <- c(NaN, sapply(seq_len(fc_length-1), function(i) {return(sum(tel[(tel_2_fc*(i-0.5)+1):(tel_2_fc*(i+0.5))])/tel_2_fc)}))
      } else {
        fc <- c(rep(fc[1], each=tel_2_fc/2), rep(fc[-1], each=tel_2_fc), rep(NA, each=tel_2_fc/2))
        # Modified translation function to align more frequent telemetry with forecast
        index_translation <- purrr::partial(function(i, tel_2_fc) {ceiling((i-0.5*tel_2_fc)/tel_2_fc)+1}, tel_2_fc=tel_2_fc)
      }
    }
  }

  return(list(tel=tel, fc=fc, tel_2_fc=tel_2_fc, translate_forecast_index=index_translation ))
}

#' Get sun-up, sun-down, and missing data statistics
#'
#' Get statistics on the prevalence of NaN's in telemetry and sun-up/sun-down
#' discrepancies
#'
#' @param tel A vector of the telemetry values
#' @param len_ts Number of timesteps in the time series forecast
#' @param ... optional arguments to equalize_telemetry_forecast_length
#' @param return A list of summary statistics
#' @export
get_sundown_and_NaN_stats <- function(ts, tel, ...) {
  res <- equalize_telemetry_forecast_length(tel, ts$sun_up, ...)
  sun_up <- res$fc
  tel <- res$tel
  forecast_available <- equalize_telemetry_forecast_length(tel, !is.na(ts$forecasts), ...)$fc

  fc_sundown <- length(sun_up[sun_up==FALSE])
  fc_sunup <- length(sun_up[sun_up==TRUE])
  tel_sunup_NaN <- length(tel[is.nan(tel) & sun_up==TRUE])
  tel_sunup <- length(tel[!is.nan(tel) & sun_up==TRUE])
  tel_sundown_0 <- length(tel[!is.nan(tel) & tel==0 & sun_up==FALSE])
  tel_sundown <- length(tel[!is.nan(tel) & tel!=0 & sun_up==FALSE])
  tel_sundown_NaN <- length(tel[is.nan(tel) & sun_up==FALSE])
  return(list("Sun-up forecasts"=fc_sunup,
              "Sun-down forecasts"=fc_sundown,
              "Telemetry missing when sun up"=tel_sunup_NaN,
              "Telemetry available when sun up"=tel_sunup,
              "Telemetry is 0 when sun forecasted down"=tel_sundown_0,
              "Telemetry is non-zero when sun forecasted down"=tel_sundown,
              "Telemetry is NaN when sun forecasted down"=tel_sundown_NaN,
              "Sunup missing telemetry rate"=tel_sunup_NaN/(tel_sunup+tel_sunup_NaN),
              "Validatable forecasts"= sum(sun_up & !is.nan(tel) & forecast_available)
         ))
}

#' Get the average CRPS
#'
#' Get average estimated (continuous ranked probability score) for the
#' forecast; the score at each time point is estimated through numerical
#' integration. CRPS characterizes calibration and sharpness together
#'
#' @param ts A ts_forecast object
#' @param tel A vector of the telemetry values
#' @param normalize.by (optional) A normalization factor, either a scalar or
#'   vector
#' @param ... optional arguments to equalize_telemetry_forecast_length
#' @export
CRPS_avg <-function(ts, tel, normalize.by=1, ...){
  crps_list <- get_metric_time_series(CRPS, ts, tel, normalize.by, ...)
  return(list(mean=mean(crps_list, na.rm=T), min=min(crps_list, na.rm=T), max=max(crps_list, na.rm=T), sd=stats::sd(crps_list, na.rm=T),
              median=median(crps_list, na.rm=T)))
}

#' Get the quantile-weighted CRPS
#'
#' Calculate CRPS (continuous ranked probability score) for the forecast
#' estimated by trapezoidal integration of its quantile decomposition Quantile
#' weighting functions as suggested in Gneiting & Ranjan 2012
#'
#' @param ts A ts_forecast object
#' @param tel A vector of the telemetry values
#' @param weighting One of "none" (default), "tails", "right", "left", "center"
#' @param quantiles (optional) Sequence of quantiles to integrate over
#' @param qs (optional) A list of quantile scores corresponding to the quantiles
#' @export
qwCRPS <-function(ts, tel, weighting="none", quantiles=seq(0.01, 0.99, by=0.01), qs=NA){
  if (all(is.na(qs))) {
    if (length(quantiles) != length(qs)) stop("quantile and quantile score vectors must be same length.")
    qs <- QS(ts, tel, quantiles)
  }
  wqs <- weight_QS(qs, quantiles, weighting)
  return(pracma::trapz(quantiles, wqs))
}

#' Get the reliability index (RI) as per Delle Monache 2006
#'
#' @param ts A ts_forecast object
#' @param tel A vector of the telemetry values
#' @param nbins Number of bins (original ensemble members) to evaluate
#' @param ... optional arguments to equalize_telemetry_forecast_length
RI <-function(ts, tel, nbins=100, ...){
  counts <- calc_PIT_histogram(ts, tel, nbins, ...)$bin_hits
  return(sum(abs(counts/sum(counts) - 1/nbins)))
}

#' Get the percentile reliability index (PRI) of quantiles 1 to 99. An extension
#' of the Delle Monache 2006 to look at distance from probabilistic calibration
#' (P-P plot). * Note that this looks at the performance at the quantiles, not
#' for each bin -- that is, it is assessed over 99 rather than 100 values
#'
#' @param ts A ts_forecast object
#' @param tel A vector of the telemetry values
#' @param ... optional arguments to equalize_telemetry_forecast_length
PRI <-function(ts, tel, ...){
  counts <- calc_PIT_histogram(ts, tel, 100, ...)$bin_hits
  obs_rate <- cumsum(counts/sum(counts))

  return(sum(abs(seq(0.01, 1, by=0.01) - obs_rate)))
}

#' Get a time-series of the forecast performance in terms of one metric
#' @param metric A function for prob_forecast objects
#' @param ts A ts_forecast object
#' @param tel Vector of telemetry
#' @param normalize.by A vector or scalar
#' @param metricArgs (optional) a list of additional arguments to the metrics
#'   function
#' @param ... optional arguments to equalize_telemetry_forecast_length
#' @return A vector
#' @export
get_metric_time_series <- function(metric, ts, tel, normalize.by, metricArgs=NULL,  ...) {
  x <- equalize_telemetry_forecast_length(tel, !is.na(ts$forecasts), ...)
  forecast_available <- x$fc
  normalize.by <- equalize_normalization_factor_length(normalize.by, x$tel, ...)

  metric_list <- sapply(seq_along(forecast_available), function(i) {
    if (forecast_available[i] & !is.na(x$tel[i])) {
      args <- c(list(x=ts$forecasts[[x$translate_forecast_index(i)]], tel=x$tel[i]), metricArgs)
      return(do.call(metric, args)/normalize.by[i])
    } else return(NA)})
}

#' Get weighted quantile score at certain quantile(s)
#'
#' @param ts A ts_forecast object
#' @param tel A list of the telemetry values
#' @param quantiles, numeric [0,1].  Can be a scalar or a vector
#' @return the Quantile score
#' @export
QS <- function(ts, tel, quantiles) {
  if (length(ts) != length(tel)) stop("Forecast and telemetry must be at same time resolution")
  if (any(quantiles < 0) | any(quantiles > 1)) stop("Quantiles must all be [0,1]")

  thresholds <- t(sapply(100*quantiles, FUN=function(q) {get_quantile_time_series(ts, q)}, simplify="array"))

  valid <- !is.na(ts$forecasts) & !is.na(tel)
  indicator <- sapply(seq_along(quantiles), FUN=function(i) {as.integer(tel[valid] <= thresholds[i, valid])}, simplify="array")

  qs <- sapply(seq_along(quantiles), FUN=function(q) mean(2*(indicator[,q]-quantiles[q])*(thresholds[q,valid]-tel[valid]), na.rm = TRUE))
  return(qs)
}

#' Calculate a vector of weighted quantile scores
#'
#' Can emphasize one or both tails or center
#'
#' @param qs A vector of quantile scores
#' @param quantiles A vector of the quantiles (percentiles) in [0,1]
#' @param weighting One of "none" (default), "tails", "right", "left", "center"
#' @return A vector of the weighted scores
#' @export
weight_QS <- function(qs, quantiles, weighting="none") {
  if (length(qs) != length(quantiles)) stop("Quantiles and quantile score must be the same length")

  weights <- switch(tolower(weighting),
                    "none" = 1,
                    "tails" = (2*quantiles-1)^2,
                    "right" = quantiles^2,
                    "left" = (1-quantiles)^2,
                    "center" = quantiles*(1-quantiles),
                    stop("Weighting method not recognized"))
  return(weights*qs)
}

#' Get Brier score at a certain probability of exceedance
#'
#' This is calculated
#' with a constant probability threshold, rather than a constant value threshold
#'
#' @param ts A ts_forecast object
#' @param tel A list of the telemetry values
#' @param PoE Threshold probability of exceedance, numeric [0,1]
#' @param ... optional arguments to equalize_telemetry_forecast_length
#' @return the Brier score
Brier_quantile <- function(ts, tel, PoE, ...) {
  if (PoE < 0 | PoE > 1) stop(paste("Probability of exceedance must be [0,1], given ", PoE, '.', sep=''))
  thresholds <- get_quantile_time_series(ts, 100*(1-PoE))

  thresholds <- equalize_telemetry_forecast_length(tel, thresholds, ...)$fc
  res <- equalize_telemetry_forecast_length(tel, !is.na(ts$forecasts), ...)
  forecast_available <- res$fc
  tel <- res$tel

  # Find incidence of telemetry exceeding the threshold of exceedance
  indicator <- as.integer(tel[forecast_available & !is.na(tel)] >= thresholds[forecast_available & !is.na(tel)])
  # Compare Probability of exceedance to its actual incidence
  return(mean((PoE-indicator)^2, na.rm = TRUE))
}

#' Get Brier score at power thresholds
#'
#' @param ts A ts_forecast object
#' @param tel A list of the telemetry values
#' @param thresholds Thresholds in units of the forecast. Can be a scalar or a
#'   vector
#' @return the Brier score
#' @export
Brier <- function(ts, tel, thresholds) {
  if (length(ts) != length(tel)) stop("Forecast and telemetry must be at same time resolution")
  pit <- array(sapply(ts$forecasts, FUN=function(forecast, thresholds) {
    if (is.prob_forecast(forecast)) return(stats::approx(forecast$quantiles$x, forecast$quantiles$q, thresholds, yleft=0, yright=1)$y)
    else return(rep(NA, times=length(thresholds)) )},
    thresholds=thresholds, simplify="array"), dim=c(length(thresholds), length(tel)))

  valid <- !is.na(ts$forecasts) & !is.na(tel)
  indicator <- sapply(seq_along(thresholds), FUN=function(i) as.integer(tel[valid] <= thresholds[i]), simplify="array")

  bs <- sapply(seq_along(thresholds), FUN=function(i) mean((pit[i, valid]-indicator[,i])^2, na.rm = TRUE))

  return(bs)
}

#' Plot quantile score vs. quantiles
#'
#' @family plots
#' @param ts A ts_forecast object
#' @param tel A list of the telemetry values
#' @param quantiles A vector in (0,1)
#' @param weighting One of "none" (default), "tails", "right", "left", "center"
#' @param fname (optional) file name to save plot
#' @export
plot_quantile_score <- function(ts, tel, quantiles=seq(0.01, 0.99, by=0.01),
                                weighting="none", fname=NULL) {
  qs <- QS(ts, tel, quantiles)
  wqs <- weight_QS(qs, quantiles, weighting)

  if (weighting=="none") label<- "" else label <- paste(weighting,"-weighted", sep="")

  g <- ggplot2::ggplot(data=data.frame(x=quantiles, y=wqs), mapping=ggplot2::aes(x=x, y=y)) +
    ggplot2::geom_point() +
    ggplot2::xlab("Quantile") +
    ggplot2::ylab(paste(label, "Quantile Score"))
  if (!is.null(fname)) {ggplot2::ggsave(fname, width = 5, height = 3)}
  invisible(g)
}

#' Plot Brier score along the quantiles from 1 to 99
#'
#' @param ts A ts_forecast object
#' @param tel A vector of the telemetry values
#' @param nmem Number of ensemble members, to illustrate the n+1 bins
plot_brier_by_quantile <- function(ts, tel, nmem = NA) {
  q <- 1:99
  b <- sapply(q, FUN = function(qi) Brier_quantile(ts, tel, (100-qi)/100))

  g <- ggplot2::ggplot(data=data.frame(x=q, y=b), mapping=ggplot2::aes(x=x, y=y)) +
      ggplot2::xlab("Percentile") +
      ggplot2::ylab("Brier score") +
      ggplot2::theme_light()

  if (!is.na(nmem)) {
    xstart <-  seq(0, 100, by=100/(nmem+1)*2)
    xend <- seq(100/(nmem+1), 100, by=100/(nmem+1)*2)
    rects <- data.frame(xstart=xstart[1:length(xend)], xend = xend)
    g <- g + ggplot2::geom_rect(data = rects, mapping=ggplot2::aes(xmin = xstart, xmax = xend, ymin = -Inf, ymax = Inf), fill = "gray", inherit.aes = FALSE, alpha=0.4)
  }
  g <- g + ggplot2::geom_point() +
        ggplot2::geom_line(data=data.frame(x=q, y=q/100*(100-q)/100))

  plot(g)
}

#' Plot Brier score in terms of power
#'
#' @param ts A ts_forecast object
#' @param tel A vector of the telemetry values
#' @param xseq A vector of the power thresholds to use
#' @export
plot_brier_by_power <- function(ts, tel, xseq) {

  b <- Brier(ts, tel, xseq)

  g <- ggplot2::ggplot(data=data.frame(x=xseq, y=b), mapping=ggplot2::aes(x=x, y=y)) +
    ggplot2::geom_line() +
    ggplot2::xlab("Power [MW]") +
    ggplot2::ylab("Brier score") +
    ggplot2::ggtitle(paste("CRPS:", signif(pracma::trapz(xseq, b), digits = 6), "[MW]"))

  plot(g)
}

equalize_normalization_factor_length <- function(normalize.by, tel, ...) {
  if (length(normalize.by)>1) {
    return(equalize_telemetry_forecast_length(tel, normalize.by, ...)$fc)
  } else {
    return(rep(normalize.by, times=length(tel)))
  }
}

#' Get mean absolute error between the forecast median and the actual value
#'
#' @param ts A ts_forecast object
#' @param tel A list of the telemetry values
#' @param normalize.by (optional) A normalization factor, either a scalar or
#'   vector
#' @param ... optional arguments to equalize_telemetry_forecast_length
#' @return the MAE value
#' @export
MAE <-function(ts, tel, normalize.by=1, ...) {
  medians <- get_quantile_time_series(ts, 50)
  forecast_available <- equalize_telemetry_forecast_length(tel, !is.na(ts$forecasts), ...)$fc
  x <- equalize_telemetry_forecast_length(tel, medians, ...)
  medians <- x$fc
  normalize.by <- equalize_normalization_factor_length(normalize.by, x$tel, ...)
  indices <- forecast_available & !is.na(x$tel)
  return(mean(abs(medians[indices]-x$tel[indices])/normalize.by[indices], na.rm = TRUE))
}

#' Calculate average interval score
#'
#' Get average interval score, for an interval from alpha/2 to 1-alpha/2.
#' Negatively oriented (smaller is better) Characterizes sharpness, with a
#' penalty for reliability
#'
#' @param ts A ts_forecast object
#' @param tel A list of the telemetry values
#' @param alpha Numeric, to identify the (1-alpha)*100\% quantile of interest
#' @param normalize.by (optional) A normalization factor, either a scalar or
#'   vector
#' @param ... additional optional arguments to
#'   equalize_telemetry_forecast_length
#' @return the average IS value
#' @export
IS_avg <-function(ts, tel, alpha, normalize.by=1, ...) {
  is_list <- get_metric_time_series(IS, ts, tel, normalize.by, metricArgs = list(alpha=alpha), ...)
  return(list(mean=mean(is_list, na.rm=T), min=min(is_list, na.rm=T), max=max(is_list, na.rm=T), sd=stats::sd(is_list, na.rm=T)))
}

#' Average interval width
#'
#' Get average sharpness (interval width), for an interval from alpha/2 to
#' 1-alpha/2. Negatively oriented (smaller is better) Telemetry isn't used in
#' sharpness, but it is only averaged over times when telemetry is available for
#' direct compatibility with other metrics.
#'
#' @param ts A ts_forecast object
#' @param tel A vector of telemetry values.
#' @param alpha Numeric, to identify the (1-alpha)*100\% quantile of interest
#' @param normalize.by (optional) A normalization factor, either a scalar or
#'   vector
#' @param ... additional optional arguments to
#'   equalize_telemetry_forecast_length
#' @return the average sharpness value (interval width)
#' @export
sharpness_avg <-function(ts, tel, alpha, normalize.by=1, ...) {
  sharpness_list <- get_metric_time_series(sharpness, ts, tel, normalize.by,
                                           metricArgs = list(alpha=alpha), ...)
  return(list(mean=mean(sharpness_list, na.rm=T), min=min(sharpness_list, na.rm=T),
              max=max(sharpness_list, na.rm=T), sd=stats::sd(sharpness_list, na.rm=T)))
}


#' Plot average interval width at different nominal intervals
#'
#' @family plots
#' @param ts A ts_forecast object
#' @param tel A list of the telemetry values
#' @param alphas (optional) Vector of central (1-alpha)*100\% intervals to calculate
#' @param normalize.by (optional) Value or vector to normalize by
#' @param fname (optional) file name to save plot
#' @export
plot_interval_width <- function(ts, tel, alphas = seq(0.9, 0.1, by=-0.1), normalize.by=1,
                                fname=NULL) {
  widths <- sapply(alphas, FUN=function(a) sharpness_avg(ts, tel, alpha=a, normalize.by=normalize.by, agg=TRUE)$mean)

  g <- ggplot2::ggplot(data.frame(x=(1-alphas)*100, y=widths), ggplot2::aes(x=x, y=y)) +
    ggplot2::geom_point(mapping=ggplot2::aes(shape="A", color="A")) +
    ggplot2::geom_line(mapping=ggplot2::aes(linetype="A", color="A")) +
    ggplot2::xlab("Central Interval [%]") +
    ggplot2::ylab(latex2exp::TeX('$\\bar{\\delta}$ $\\lbrack$% AC Rating$\\rbrack$')) +
    ggplot2::scale_x_continuous(breaks=(1-alphas)*100) +
    ggplot2::ylim(0, NA)
  if (!is.null(fname)) {ggplot2::ggsave(fname, width = 5, height = 3)}
  invisible(g)
}

#' Plot reliability
#'
#' Plot diagonal line diagram of quantiles + observations (i.e., a P-P plot)
#' @family plots
#' @param ts A ts_forecast object
#' @param tel A list of the telemetry values
#' @param fname (optional) file name to save plot
#' @param ... optional arguments to equalize_telemetry_forecast_length
#' @export
plot_reliability <- function(ts, tel, fname=NULL, ...) {
  x <- get_quantile_reliability(ts, tel, ...)

  g <- ggplot2::ggplot(data.frame(x=x$quantiles, y=x$quantiles), ggplot2::aes(x=x, y=y)) +
    ggplot2::geom_line() +
    ggplot2::geom_point(data=data.frame(x=x$quantiles[1:(length(x$quantiles)-1)], y=cumsum(x$hit_rate)[1:(length(x$quantiles)-1)]), shape=1) +
    ggplot2::xlab("Nominal Proportion") +
    ggplot2::ylab("Observed Proportion")
  if (!is.null(fname)) {ggplot2::ggsave(fname, width = 5, height = 3)}
  invisible(g)
}

#' Plot probability integral transform histogram
#'
#' @family plots
#' @param ts A ts_forecast object
#' @param tel A list of the telemetry values
#' @param nbins To specify number of bars in the histogram
#' @param fname (optional) file name to save plot
#' @export
plot_PIT_histogram <- function(ts, tel, nbins=10, fname=NULL) {
  x <- calc_PIT_histogram(ts, tel, nbins, ...)

  g <- ggplot2::ggplot(data.frame(x=x$bin_means, y=x$bin_hits/sum(x$bin_hits)), ggplot2::aes(x=x, y=y)) +
    ggplot2::geom_col(width=x$bin_width) +
    ggplot2::geom_line(data.frame(x=c(0, 1), y=c(1/nbins, 1/nbins)), mapping=ggplot2::aes(x=x, y=y), linetype="dashed") +
    ggplot2::xlab("Probability Integral Transform") +
    ggplot2::ylab("Relative Frequency")
  if (!is.null(fname)) {ggplot2::ggsave(fname, width = 5, height = 3)}
  invisible(g)
}

#' Calculate data for probability integral transform histogram
#' @param ts A ts_forecast object
#' @param tel A list of the telemetry values
#' @param nbins To specify number of bars in the histogram
#' @param ... optional arguments to equalize_telemetry_forecast_length
#' @return List of the bin locations, their relative frequency, and the bin
#'   width
#' @export
calc_PIT_histogram <- function(ts, tel, nbins, ...) {
  breaks <- seq(0, 1, length.out =(nbins+1))
  bin_width <- diff(breaks)[1]
  bin_means <- breaks[-1] - bin_width/2
  qr <- get_quantile_reliability(ts, tel, ...)

  # Bin as desired
  bin_rate <- sapply(seq_len(length(breaks)-1), FUN = function(i) sum(qr$hits[qr$quantiles>breaks[i] & qr$quantiles<=breaks[i+1]]))
  return(list(bin_means=bin_means, bin_hits=bin_rate, bin_width=bin_width))
}

#' Evaluate forecast reliability by evaluating the actual hit rate of the
#' quantile bins
#' @param ts A ts_forecast object
#' @param tel A list of the telemetry values
#' @param quantiles A list of the quantiles to evaluate
#' @param ... optional arguments to equalize_telemetry_forecast_length
#' @return list of the quantiles and their hit rates
#' @export
get_quantile_reliability <- function(ts, tel, quantiles=NA, ...) {
  x <- equalize_telemetry_forecast_length(tel, !is.na(ts$forecasts), ...)
  forecast_available <- x$fc

  # Get the list of quantiles that have been evaluated, and add top limit at 1
  quants <- c(ts$forecasts[[min(which(sapply(ts$forecasts, FUN=is.prob_forecast)))]]$quantiles$q, 1)
  if (all(is.na(quantiles))) quantiles <- quants

  # Find time-points where telemetry and forecast data is available
  indices <- which(forecast_available & !is.na(x$tel))
  q_idx_subfunc <- function(i) {
    list_idx <- which(ts$forecasts[[x$translate_forecast_index(i)]]$quantiles$x >= x$tel[i]) # List of quantile indices above telemetry value
    if (length(list_idx) > 0) {idx <- min(which(quantiles >= quants[min(list_idx)]))} else{idx <- length(quantiles)} # Pick lowest quantile, or 100th percentile if it falls outside distribution
    return(idx)
  }
  q_indices <- sapply(indices, q_idx_subfunc) # Get the indices of the corresponding quantile for each time points
  counts <- sapply(seq_along(quantiles), function(i) return(sum(q_indices == i)), USE.NAMES = FALSE)

  hit_rate <- counts/length(indices)
  return(list(quantiles=quantiles, hit_rate=hit_rate, hits=counts))
}
kdayday/forecasting documentation built on Oct. 7, 2020, 7:16 p.m.