R/mid_term_future.R
In oRaklE: Multi-Horizon Electricity Demand Forecasting in High Resolution

Documented in mid_term_future

#' Generate future mid-term demand predictions
#'
#' This function extends the mid-term demand predictions generated by \code{\link{mid_term_lm}} until a specified future year.
#' The unknown temperature-based covariates for future days are obtained by averaging over the past 3 years of the dataset.
#' The function also produces and saves visualizations of the actual and the predicted demand over the training, test, and future periods.
#'
#' @param midterm_predictions Dataframe or list. Generated by \code{\link{mid_term_lm}}. Either the prediction dataframe or the complete output list can be used.
#' If the full list is supplied the function will extract the necessary models automatically.
#' @param end_year Integer. Specifies the final year for which future predictions will be generated.
#' @param Tref Numeric. Reference temperature as basis for the calculation of cooling and heating days.
#' @param data_directory The path to the directory where the data will be saved and where the function will look for
#' the mid-term model from \code{\link{mid_term_lm}}. The default is set to a temporary directory.
#' @param midterm_model The mid-term seasonality model from \code{\link{mid_term_lm}}. Only needs to be specified if the model
#' is not in the data directory.
#' @param verbose A boolean value indicating if you want the generated plot to be shown (set to TRUE if yes).
#' @return A list with the extended initial dataframe with the future predictions for the mid term model. And the plot with the midterm seasonality future forecast.
#' The dataset and the plot are saved in the respective folder for the country.
#' \describe{
#'   \item{midterm_future_predictions}{A dataframe with the input and prediction data for the future mid-term seasonality.}
#'   \item{midterm_future_plot}{A plot with the prediction results.}
#' }
#' @export
#' @seealso See also function \code{\link{long_term_future}} and \code{\link{short_term_future}} for the other prediction models.
#' @examples
#' example_midterm_future_predictions <- mid_term_future(example_midterm_predictions,
#'   end_year = 2028, Tref = 18
#' )
mid_term_future <- function(midterm_predictions, end_year, Tref = 18, data_directory = tempdir(), midterm_model = NULL, verbose = FALSE) {
  if (inherits(midterm_predictions, "list") && names(midterm_predictions)[1] == "midterm_predictions") {
    midterm_model <- midterm_predictions$midterm_model
    midterm_predictions <- midterm_predictions$midterm_predictions
  }

  if ("example" %in% colnames(midterm_predictions)) {
    if (unique(midterm_predictions$example) == TRUE) {
      message("Averaging temperature values for future predictions.")
      message("Extending the mid-term seasonality model predictions until the year 2028.")
      test_set_steps <- 2 * 365
      training_set <- nrow(midterm_predictions) - test_set_steps
      training_data <- midterm_predictions[1:training_set, ]
      test_data <- midterm_predictions[(training_set + 1):nrow(midterm_predictions), ]

      variables <- colnames(midterm_predictions)[c(9:43)]
      f <- stats::as.formula(paste("seasonal_avg_hourly_demand", paste(variables, collapse = " + "),
        sep = " ~ "
      ))

      y <- training_data$seasonal_avg_hourly_demand
      y_all <- midterm_predictions$seasonal_avg_hourly_demand
      y_test <- test_data$seasonal_avg_hourly_demand
      x <- data.matrix(training_data[, c(9:43)])
      x_all <- data.matrix(midterm_predictions[, 9:43])
      x_test <- data.matrix(test_data[, 9:43])

      cv_model <- glmnet::cv.glmnet(x, y, alpha = 1)

      best_lambda <- cv_model$lambda.min
      best_model <- glmnet::glmnet(x, y, alpha = 1, lambda = best_lambda)
      new_data <- oRaklE::example_midterm_future_predictions
      future_predictions <- stats::predict(best_model, newx = as.matrix(new_data[, 9:43]))

      length(unique(round(future_predictions,2) - round(new_data$midterm_model_fit,2)))

      if (length(unique(round(future_predictions,2) - round(new_data$midterm_model_fit,2))) < 21) {
        return(oRaklE::example_longterm_predictions)
      } else {
        stop("The example in mid_term_future() failed. Please contact the package maintainer at schwenzer@europa-uni.de")
      }
    }
  }

  if (grepl("Rtmp", data_directory)) {
    message(paste(
      "\nThis function will try to save the results, models and plots to a folder called", unique(midterm_predictions$country),
      "\nin the current data directory:", data_directory
    ))
    message("\nIt is recommended to save the data in a directory other than a tempdir, so that it is available after you finish the R Session.")

    message("\nPlease choose an option:")
    message("\n1: Keep it as a tempdir")
    message(paste("2: Save data in the current working directory (", getwd(), ")", sep = ""))
    message("3: Set the directory manually\n")

    choice <- readline(prompt = "Enter the option number (1, 2, or 3): ")

    if (choice == "1") {
      message("\nData will be saved in a temporary directory and cleaned up when R is shut down.")
      # data_directory remains unchanged.
    } else if (choice == "2") {
      data_directory <- getwd()
      message(paste0("\nResults, models, and plots will be saved in the current working directory in ", data_directory, "/", unique(midterm_predictions$country)))
      message("\nYou can specify the *data_directory* parameter in the following functions as '", data_directory, "'")
    } else if (choice == "3") {
      new_dir <- readline(prompt = "Enter the full path of the directory where you want to save the data: ")
      data_directory <- new_dir
      if (!dir.exists(data_directory)) {
        stop("The specified data_directory does not exist: ", data_directory, "\nPlease run the function again.")
      }
      message("\nResults, models, and plots will be saved in the specified directory: ", data_directory, "/", unique(midterm_predictions$country))
    } else {
      message("Invalid input. Keeping the temporary directory.\nData will be cleaned up when R is shut down.")
    }
  } else {
    if (!dir.exists(data_directory)) {
      stop("The specified data_directory does not exist: ", data_directory, "\nPlease run the function again.")
    }
    message("\nData, models, and plots will be saved in the specified working directory in ", data_directory, "/", unique(midterm_predictions$country))
  }


  last_years <- midterm_predictions[(nrow(midterm_predictions) - (3 * 365)):(nrow(midterm_predictions)), ]

  mean_values <- sapply(1:365, function(i) {
    day_values <- last_years$weighted_temperature[seq(i, nrow(last_years), by = 365)]
    mean(day_values, na.rm = TRUE)
  })

  last_date <- max(midterm_predictions$date)
  new_dates <- seq(from = last_date + 1, to = as.Date(paste0(end_year, "-12-31")), by = "day")

  new_data <- data.frame(country = unique(midterm_predictions$country), date = new_dates)
  new_data$year <- lubridate::year(new_data$date)
  new_data$month <- lubridate::month(new_data$date)
  new_data$day <- lubridate::day(new_data$date)
  new_data$wday <- lubridate::wday(new_data$date, label = T, locale = "en_US.UTF-8")
  new_data$avg_hourly_demand <- 0
  new_data$seasonal_avg_hourly_demand <- NA

  holiday_list <- list()
  years <- unique(new_data$year)
  country <- (unique(new_data$country))
  for (i in 1:length(years)) {
    year <- years[i]
    tryCatch(
      {
        Sys.sleep(1.5)
        response <- jsonlite::fromJSON(paste0(
          "https://date.nager.at/api/v3/publicholidays/",
          year, "/", country
        ))
        holiday_list[[i]] <- response$date
      },
      error = function(e) {
        i=i-1
        Sys.sleep(5)
        #stop("Error during JSON request to date.nager.at for getting holidays : ", e$message, "\nPlease run the function again sometimes date.nager is unstable since of 2025.", call. = FALSE)
      }
    )
  }


  holidays <- unlist(holiday_list)
  holidays <- as.Date(holidays)
  new_data$holiday <- 0
  new_data$holiday[new_data$date %in% holidays] <- 1

  new_data <- new_data[!(new_data$month == 2 & new_data$day == 29), ]

  new_data$weighted_temperature <- mean_values


  month_list <- c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Nov", "Dec")

  for (i in 1:length(month_list)) {
    new_data[month_list[i]] <- 0
    new_data[new_data$month == i, month_list[i]] <- 1
  }

  weekday_list <- as.character(unique(midterm_predictions$wday))
  for (i in 1:length(weekday_list)) {
    new_data[weekday_list[i]] <- 0
    new_data[new_data$wday == weekday_list[i], weekday_list[i]] <- 1
  }

  ### Temperature transformations
  if ("HD" %in% colnames(midterm_predictions)) {
    new_data$HD <- 0
    new_data$CD <- 0

    for (i in 1:nrow(new_data)) {
      if (new_data$weighted_temperature[i] < Tref) {
        new_data$HD[i] <- Tref - new_data$weighted_temperature[i]
      } else {
        new_data$CD[i] <- new_data$weighted_temperature[i] - Tref
      }
    }

    new_data$HD2 <- new_data$HD^2
    new_data$HD3 <- new_data$HD^3
    new_data$CD2 <- new_data$CD^2
    new_data$CD3 <- new_data$CD^3
    new_data$weighted_temperature2 <- new_data$weighted_temperature^2
    new_data$weighted_temperature3 <- new_data$weighted_temperature^3

    new_data$HDlag1 <- dplyr::lag(new_data$HD, n = 1)
    new_data$HDlag1[1] <- new_data$HD[1]
    new_data$HDlag2 <- dplyr::lag(new_data$HD, n = 2)
    new_data$HDlag2[1:2] <- new_data$HDlag1[1:2]

    new_data$CDlag1 <- dplyr::lag(new_data$CD, n = 1)
    new_data$CDlag1[1] <- new_data$CD[1]
    new_data$CDlag2 <- dplyr::lag(new_data$CD, n = 2)
    new_data$CDlag2[1:2] <- new_data$CDlag1[1:2]
  }

  new_data$weighted_temperaturelag1 <- dplyr::lag(new_data$weighted_temperature, n = 1)
  new_data$weighted_temperaturelag1[1] <- new_data$weighted_temperature[1]
  new_data$weighted_temperaturelag2 <- dplyr::lag(new_data$weighted_temperature, n = 2)
  new_data$weighted_temperaturelag2[1:2] <- new_data$weighted_temperaturelag1[1:2]

  new_data$end_of_year <- 0
  new_data$end_of_year[new_data$month == 12 & new_data$day > 22] <- 1


  ## LOAD MODEL
  country <- unique(new_data$country)

  globalmodel <- NULL
  best_model <- NULL
  if (!is.null(midterm_model)) {
    message("Taking the midterm_model from the input list.")
    if (inherits(midterm_model, "elnet") | inherits(midterm_model, "glmnet")) {
      suppressWarnings(
        future_predictions <- stats::predict(midterm_model, newx = as.matrix(new_data[, 9:43]))
      )
    } else if (inherits(midterm_model, "lm")) {
      suppressWarnings(
        future_predictions <- stats::predict(midterm_model, newdata = new_data)
      )
    }
  } else if (inherits(midterm_model, "elnet") | inherits(midterm_model, "glmnet")) {
    message("Taking the model specified in midterm_model.")
    suppressWarnings(
      future_predictions <- stats::predict(midterm_model, newx = as.matrix(new_data[, 9:43]))
    )
  } else if (inherits(midterm_model, "lm")) {
    message("Taking the model specified in midterm_model.")
    suppressWarnings(
      future_predictions <- stats::predict(midterm_model, newdata = new_data)
    )
  } else {
    model_path <- paste0(data_directory, "/", country, "/models/midterm/best_model.Rdata")

    if (file.exists(model_path)) {
      load(paste0(data_directory, "/", country, "/models/midterm/best_model.Rdata"))

      if ("HD" %in% colnames(midterm_predictions)) {
        if (!is.null(globalmodel)) {
          suppressWarnings(
            future_predictions <- stats::predict(globalmodel, newdata = new_data)
          )
        } else {
          suppressWarnings(
            future_predictions <- stats::predict(best_model, newx = as.matrix(new_data[, 9:43]))
          )
        }
      } else {
        suppressWarnings(
          future_predictions <- stats::predict(globalmodel, newdata = new_data)
        )
      }
    } else {
      stop("\nPlease either specify the base path where the country data is saved (e.g. the current working directory or supply the model in the *midterm_model* variable.")
    }
  }



  new_data$midterm_model_fit <- future_predictions
  new_data$test_set_steps <- unique(midterm_predictions$test_set_steps)

  for (col_name in setdiff(names(midterm_predictions), names(new_data))) {
    new_data[[col_name]] <- NA
  }


  all_data <- dplyr::bind_rows(midterm_predictions, new_data)


  years <- unique(all_data$year)
  index <- 1:length(years)
  for (i in 1:length(years)) {
    index[i] <- min(as.numeric(rownames(all_data[all_data$year == years[i], ])))
  }

  training_set_end <- nrow(midterm_predictions) - unique(midterm_predictions$test_set_steps)
  test_set_end <- training_set_end + unique(midterm_predictions$test_set_steps)
  future_set <- nrow(all_data) - test_set_end
  max_value <- max(c(max(all_data$midterm_model_fit), max(all_data$seasonal_avg_hourly_demand, na.rm = T)))

  suppressWarnings(
    mt_plot <- ggplot(all_data) +
      geom_line(aes(1:nrow(all_data), all_data$seasonal_avg_hourly_demand, color = "actual")) +
      geom_line(aes(1:nrow(all_data), all_data$midterm_model_fit, color = "fitted")) +
      geom_vline(xintercept = training_set_end, linetype = 2) +
      geom_vline(xintercept = test_set_end, linetype = 3) +
      ggthemes::theme_foundation(base_size = 14, base_family = "sans") +
      xlab("\nDay") +
      ylab("Change in avg. Hourly Demand\np. Day [MW]\n") +
      ggtitle(paste("Mid Term Model Results -", country, "\n")) +
      theme(
        plot.title = element_text(
          face = "bold",
          size = rel(1.2), hjust = 0.5
        ),
        text = element_text(),
        panel.background = element_rect(colour = NA),
        plot.background = element_rect(colour = NA),
        panel.border = element_rect(colour = NA),
        axis.title = element_text(face = "bold", size = rel(1)),
        axis.title.y = element_text(angle = 90, vjust = 2),
        axis.title.x = element_text(vjust = -0.2),
        axis.text = element_text(),
        axis.line.x = element_line(colour = "black"),
        axis.line.y = element_line(colour = "black"),
        axis.ticks = element_line(),
        panel.grid.major = element_line(colour = "#f0f0f0"),
        panel.grid.minor = element_blank(),
        legend.key = element_rect(colour = NA),
        legend.position = "bottom",
        legend.direction = "horizontal",
        legend.key.size = unit(0.2, "cm"),
        plot.margin = unit(c(10, 5, 5, 5), "mm"),
        strip.background = element_rect(colour = "#f0f0f0", fill = "#f0f0f0"),
        strip.text = element_text(face = "bold")
      ) +
      theme(legend.title = element_blank()) +
      scale_x_continuous(breaks = index, labels = years) +
      guides(color = guide_legend(override.aes = list(linewidth = 2))) +
      annotate("text", x = (training_set_end / 2), y = (max_value + max_value * 0.1), label = "Training", size = 4, hjust = 0.5, vjust = 0) +
      annotate("text", x = (training_set_end + unique(midterm_predictions$test_set_steps) / 2), y = (max_value + max_value * 0.1), label = "Test", size = 4, hjust = 0.5, vjust = 0) +
      annotate("text", x = (nrow(midterm_predictions) + future_set / 2), y = (max_value + max_value * 0.1), label = "Unknown", size = 4, hjust = 0.5, vjust = 0)
  )
  country <- unique(all_data$country)

  if (!file.exists(paste0(data_directory, "/", country))) {
    dir.create(paste0(data_directory, "/", country))
  }
  if (!file.exists(paste0(data_directory, "/", country, "/data"))) {
    dir.create(paste0(data_directory, "/", country, "/data"))
  }
  if (!file.exists(paste0(data_directory, "/", country, "/plots"))) {
    dir.create(paste0(data_directory, "/", country, "/plots"))
  }

  suppressWarnings(
    ggsave(filename = paste0(data_directory, "/", unique(all_data$country), "/plots/mid_term_results_future.png"), plot = mt_plot, width = 12, height = 8)
  )
  if (verbose == FALSE) {
    message("\nVerbose is set to FALSE. Set to TRUE if you want to see the generated plot automatically. The plot is saved in the output under *midterm_future_plot* and in the plots folder in ", data_directory)
  } else {
    suppressWarnings(
      print(mt_plot)
    )
  }
  utils::write.csv(all_data, file = paste0(data_directory, "/", unique(all_data$country), "/data/mid_term_results_future.csv"), row.names = F)

  return(list("midterm_future_predictions" = all_data, "midterm_future_plot" = mt_plot))
}