# Author: Kate Doubleday
# Last updated: 5/13/2020
# -----------------------------------------------------------------
# Execute this script to generate a Results/ folder containing 200+ plots comparing the benchmarks
# for each of the 7 SURFRAD sites, .csv files containing the forecast quantiles at each
# time-step through the year 2018, and .csv files comparing the CRPS summary statistics
# for each site across the methods at each temporal scale. If the ECMWF data is not available,
# the two ECMWF-based forecasts will be skipped.
# -----------------------------------------------------------------
# Load dependencies
library(solarbenchmarks)
# -----------------------------------------------------------------
print("Benchmark beginning, and will run for a while. See Results directory and its sub-directories.")
# -----------------------------------------------------------------
# Define constants
telemetry_directory <- here::here("GHI_files")
forecast_directory <- here::here("ECMWF_files", "SURFRAD_sites")
output_directory <- here::here("Results")
dir.create(output_directory, showWarnings = FALSE)
pit_directory <- here::here("Results", "PIT_histograms")
dir.create(pit_directory, showWarnings = FALSE)
ts_directory <- here::here("Results", "Quantile_time_series")
dir.create(ts_directory, showWarnings = FALSE)
qs_directory <- here::here("Results", "Quantile_score_plots")
dir.create(qs_directory, showWarnings = FALSE)
site_names <- c("Bondville",
"Boulder",
"Desert_Rock",
"Fort_Peck",
"Goodwin_Creek",
"Penn_State",
"Sioux_Falls")
percentiles <- seq(0.01, 0.99, by=0.01)
intervals <- seq(0.1, 0.9, by=0.1) # Central intervals for sharpness evaluation
num_peen_days <- 20 # Number of members in the hourly persistence ensemble
intrahour_training_hours <- 2 # Number of hours of preceeding data to use for training intra-hour PeEn and intra-hour Gaussian methods
mcm_training_days <- 20 # Number of days of preceeding data to use for training intra-hour MCM method
histogram_bins <- 20 # Number of bins to use in PIT histogram
resolution <- c("Hourly", "Intrahour")
# Choose the appropriate subset of forecast methods, based on temporal resolution and ECMWF data availability (or not).
if (all(sapply(site_names, FUN=function(site) paste(site, ".nc", sep="")) %in% list.files(file.path(forecast_directory)))) {
forecast_names <- list(Hourly=c("Climatology", "Ch-PeEn", "PeEn", "ECMWF Ensemble", "ECMWF Gaussian"))
} else forecast_names <- list(Hourly=c("Climatology", "Ch-PeEn", "PeEn"))
forecast_names$Intrahour<- c("Climatology", "Ch-PeEn", "PeEn", "Smart persistence Gaussian", "MCM")
metric_names <- c("CRPS", "Left-tail weighted CRPS", "Center weighted CRPS", "Right-tail weighted CRPS")
# -----------------------------------------------------------------
# Data export options
# Option to export a .csv of the forecast quantiles
csv_export <- T
# Option to export .R graph objects
R_graph_export <- F
# -----------------------------------------------------------------
get_site_data <- function(res, site, metrics_df, reliability_df, interval_width_df) {
# Load GHI ECMWF forecasts
if (file.exists(file.path(forecast_directory, paste(site, ".nc", sep="")))) {
nc <- ncdf4::nc_open(file.path(forecast_directory, paste(site, ".nc", sep="")))
nwp <- ncdf4::ncvar_get(nc, varid="irradiance")
ncdf4::nc_close(nc)
ndays <- nrow(nwp) # For loading in the same number of days of SURFRAD data, starting Jan. 1st. Useful during testing
} else{ # If data is unavailable
ndays <- 365
}
# Load GHI telemetry and sun_up indicator
nc <- ncdf4::nc_open(file.path(telemetry_directory, res, list.files(file.path(telemetry_directory, res), pattern=glob2rx(paste("*", site, "*2018*", sep="")))))
GHI <- ncdf4::ncvar_get(nc, varid="irradiance", count=c(ndays, -1))
sun_up <- ncdf4::ncvar_get(nc, varid="sun_up", count=c(ndays, -1))
clearsky_GHI <- ncdf4::ncvar_get(nc, varid="clearsky_irradiance", count=c(ndays, -1))
ncdf4::nc_close(nc)
sun_up <- apply(sun_up, MARGIN = c(1,2), FUN = as.logical)
# Load GHI telemetry and sun_up indicator from end of 2017 for PeEn forecast
nc <- ncdf4::nc_open(file.path(telemetry_directory, res, list.files(file.path(telemetry_directory, res), pattern=glob2rx(paste("*", site, "*2017*", sep="")))))
GHI_2017 <- ncdf4::ncvar_get(nc, varid="irradiance")
clearsky_GHI_2017 <- ncdf4::ncvar_get(nc, varid="clearsky_irradiance")
sun_up_2017 <- ncdf4::ncvar_get(nc, varid="sun_up")
ncdf4::nc_close(nc)
sun_up_2017 <- apply(sun_up_2017, MARGIN = c(1,2), FUN = as.logical)
# -----------------------------------------------------------------
# Conduct forecasts and get metrics
ts_per_hour <- ncol(GHI)/24
for (method in forecast_names[[res]]) {
if (method=="Climatology") {
fc <- forecast_climatology(GHI, percentiles, sun_up)
} else if (method=="ECMWF Ensemble") {
fc <- forecast_NWP(nwp, percentiles, sun_up)
} else if (method=="Ch-PeEn") {
fc <- forecast_CH_PeEn(GHI, percentiles, sun_up, clearsky_GHI, ts_per_hour)
} else if (method=="PeEn") {
if (res == "Hourly") {
fc <- forecast_PeEn_hourly(GHI, percentiles, sun_up, num_peen_days, GHI_2017)
} else fc <- forecast_PeEn_intrahour(as.vector(t(GHI)), percentiles, as.vector(t(sun_up)),
as.vector(t(clearsky_GHI)), ts_per_hour=ts_per_hour, nhours=intrahour_training_hours)
} else if (method=="ECMWF Gaussian"){
fc <- forecast_Gaussian_hourly(nwp, GHI, percentiles, sun_up, clearsky_GHI)
} else if (method=="Smart persistence Gaussian") {
fc <- forecast_Gaussian_intrahour(as.vector(t(GHI)), percentiles, as.vector(t(sun_up)),
as.vector(t(clearsky_GHI)), ts_per_hour=ts_per_hour, nhours=intrahour_training_hours)
} else if (method=="MCM") {
fc <- forecast_mcm(as.vector(t(GHI)), as.vector(t(GHI_2017)),
percentiles, as.vector(t(sun_up)), as.vector(t(sun_up_2017)),
as.vector(t(clearsky_GHI)), as.vector(t(clearsky_GHI_2017)),
ts_per_hour, mcm_training_days)
} else stop(paste("Forecast method", method, "not recognized"))
metrics_df[site, method, "CRPS"] <- qwCRPS(fc, as.vector(t(GHI)), as.vector(t(sun_up)), weighting = "none")
metrics_df[site, method, "Left-tail weighted CRPS"] <- qwCRPS(fc, as.vector(t(GHI)), as.vector(t(sun_up)), weighting = "left")
metrics_df[site, method, "Center weighted CRPS"] <- qwCRPS(fc, as.vector(t(GHI)), as.vector(t(sun_up)), weighting = "center")
metrics_df[site, method, "Right-tail weighted CRPS"] <- qwCRPS(fc, as.vector(t(GHI)), as.vector(t(sun_up)), weighting = "right")
reliability_df[site, method,] <- reliability(fc, as.vector(t(GHI)), as.vector(t(sun_up)))
interval_width_df[site, method,] <- interval_width(fc, as.vector(t(sun_up)), intervals = intervals)$widths
# If desired, export this forecast matrix of quantiles to a .csv file for future use.
if (csv_export) {
write.table(fc, file=file.path(ts_directory, paste(site, res, method, "quantiles.csv", sep=" ")), sep=",", row.names=F)
}
# Plot PIT histogram
plot_PIT_histogram(fc, as.vector(t(GHI)), as.vector(t(sun_up)), histogram_bins, site, res, method, pit_directory, R_graph_export)
# Plot unweighted and weighted quantile score comparisons
plot_quantile_score(fc, as.vector(t(GHI)), as.vector(t(sun_up)), percentiles, site, res, method, qs_directory, R_graph_export)
# Export sample forecasts: Days 133-135, May 13-15th
window <- c(132*24*ts_per_hour+1 + 6*ts_per_hour, 135*24*ts_per_hour + 6*ts_per_hour)
g <- plot_fanplot(fc, as.vector(t(GHI)), window, ts_per_hour, output_directory, site, res, method, R_graph_export)
}
return(list(metrics_df=metrics_df, reliability_df=reliability_df, interval_width_df=interval_width_df))
}
# Repeat the analysis for each temporal resolution: Hourly forecast or intra-hourly forecast
for (res in resolution) {
# Prepare results data frames
metrics_df <- array(dim=c(length(site_names), length(forecast_names[[res]]), length(metric_names)), dimnames=list(site_names, forecast_names[[res]], metric_names))
reliability_df <- array(dim=c(length(site_names), length(forecast_names[[res]]), length(percentiles)), dimnames=list(site_names, forecast_names[[res]], percentiles))
interval_width_df <- array(dim=c(length(site_names), length(forecast_names[[res]]), length(intervals)), dimnames=list(site_names, forecast_names[[res]], intervals))
# Repeat the analysis for each of the 7 SURFRAD sites
for (site in site_names) {
# Conduct the forecast validation for this site, saving the results for each method to the three data frames
results <- get_site_data(res, site, metrics_df, reliability_df, interval_width_df)
metrics_df <- results$metrics_df
reliability_df <- results$reliability_df
interval_width_df <- results$interval_width_df
# Export reliability and interval width graphs comparing the methods for each site
# ------------------------------------------------------------------
shapes <- c(17, 0, 16, 5, 4) # Select point shapes for graphs
plot_reliability(reliability_df, site, res, shapes, output_directory, R_graph_export)
plot_interval_width(interval_width_df, site, res, shapes, output_directory, R_graph_export)
}
# -----------------------------------------------------------------
# Export CRPS metrics table, comparing the methods across the sites
sink(file.path(output_directory, paste('CRPS', res, 'results.csv', sep="_")))
for (metric in dimnames(metrics_df)[[3]]) {
cat(paste(metric, "\n"))
write.csv(metrics_df[,,metric])
cat("\n")
}
sink()
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.