R/get_historic_load_data.R

Defines functions get_historic_load_data

Documented in get_historic_load_data

#' Load historic yearly average load data
#'
#' This function utilizes two historical load datasets obtained from the archive of https://www.entsoe.eu/data/power-stats/.
#' It calculates the hourly average load for each year and appends the results to the long-term dataframe generated by \code{\link{decompose_load_data}}.
#' @param longterm A data frame object with "country", "year", and "avg_hourly_demand" columns resulting from the function \code{\link{decompose_load_data}}.
#'
#' @return  Data frame with "country", "year", and "avg_hourly_demand" (yearly average of hourly demand).
#' @export
#'
#' @examples
#'
#' print("Input dataframe should be in the following form:")
#' example_decomposed_data$longterm
#' example_longterm_data <- get_historic_load_data(example_decomposed_data$longterm)
#' print("Historic load data is added starting from 2006.")
#' example_longterm_data
#'
get_historic_load_data <- function(longterm) {
  if (!"country" %in% colnames(longterm)) {
    stop("No column named \"country\"")
  }
  if (!"avg_hourly_demand" %in% colnames(longterm)) {
    stop("No column named \"avg_hourly_demand\"")
  }
  if (!"year" %in% colnames(longterm)) {
    stop("No column named \"year\"")
  }

  if ("example" %in% colnames(longterm)) {
    if (unique(longterm$example) == TRUE) {
      return(oRaklE::example_longterm_data)
    }
  }

  country <- unique(longterm$country)
  if (country == "UK") {
    country <- "GB"
  }

  historic_entsodata1 <- tempfile(fileext = ".rda")

  utils::download.file("https://github.com/JohannesSchwenzer/oRaklE_data/releases/download/v1.0/historic_entsodata1.rda", historic_entsodata1, mode = "wb", quiet = TRUE)

  if (!file.exists(historic_entsodata1) || file.size(historic_entsodata1) == 0) {
    stop("Error: Download of historic power data failed. Are you connected to the internet?")
  }
  load(historic_entsodata1)
  entsodata <- historic_entsodata1

  colnames(entsodata)[1:5] <- c("country", "year", "month", "day", "coverage_ratio")


  data <- entsodata[entsodata$country == country, ]

  y <- vector(mode = "list", length = 0)
  x_year <- vector(mode = "list", length = 0)
  year_list <- vector(mode = "list", length = 0)
  x_coverage <- vector(mode = "list", length = 0)
  coverage_list <- vector(mode = "list", length = 0)


  for (i in seq_len(nrow(data))) {
    x <- t(data[i, 6:29])
    y <- c(y, x)
    x_year[1:24] <- data[i, 2]
    year_list <- c(year_list, x_year)
    x_coverage[1:24] <- data[i, 5]
    coverage_list <- c(coverage_list, x_coverage)
  }

  data1 <- as.data.frame(t(y))
  data1 <- as.data.frame(as.numeric(t(data1)))

  colnames(data1) <- "load"
  data1$year <- as.numeric(year_list)
  data1$coverage_ratio <- as.numeric(coverage_list)
  data1$load_scaled_with_coverage_ratio <- data1$load / (data1$coverage / 100)

  year_list <- unique(data1$year)
  for (year in year_list) {
    if (nrow(data1[data1$year == year, ]) < 8000) {
      data1 <- data1[!data1$year == year, ]
    }
  }
  year_list <- unique(data1$year)

  historic_entsodata2 <- tempfile(fileext = ".rda")

  utils::download.file("https://github.com/JohannesSchwenzer/oRaklE_data/releases/download/v1.0/historic_entsodata2.rda", historic_entsodata2, mode = "wb", quiet = TRUE)

  if (!file.exists(historic_entsodata2) || file.size(historic_entsodata2) == 0) {
    stop("Error: Download of historic power data failed. Are you connected to the internet?")
  }
  load(historic_entsodata2)
  entsodata2 <- historic_entsodata2

  data2 <- entsodata2[entsodata2$CountryCode == country, ]
  data2$year <- lubridate::year(data2$DateUTC)

  year_list2 <- unique(data2$year)
  for (year in year_list2) {
    if (nrow(data2[data2$year == year, ]) < 8000) {
      data2 <- data2[!data2$year == year, ]
    }
  }
  year_list2 <- unique(data2$year)

  if (2015 %in% year_list && 2015 %in% year_list2) {
    year_list <- year_list[year_list != 2015]
  }

  yearly_load_df <- data.frame(c(year_list, year_list2))
  colnames(yearly_load_df) <- "year"
  yearly_load_df$load <- 0
  yearly_load_df$load_scaled_with_coverage_ratio <- 0


  for (year in year_list) {
    yearly_load_df$load[yearly_load_df$year == year] <- mean(data1$load[data1$year == year], na.rm = TRUE)
    yearly_load_df$load_scaled_with_coverage_ratio[yearly_load_df$year == year] <- mean(data1$load_scaled_with_coverage_ratio[data1$year == year], na.rm = TRUE)
  }
  for (year in year_list2) {
    yearly_load_df$load[yearly_load_df$year == year] <- mean(data2$Value[data2$year == year], na.rm = TRUE)
    yearly_load_df$load_scaled_with_coverage_ratio[yearly_load_df$year == year] <- mean(data2$Value_ScaleTo100[data2$year == year], na.rm = TRUE)
  }

  if (2017 %in% longterm$year) {
    distance1 <- sqrt((longterm$avg_hourly_demand[longterm$year == 2017] -
      yearly_load_df$load[yearly_load_df$year == 2017])^2)
    distance2 <- sqrt((longterm$avg_hourly_demand[longterm$year == 2017] -
      yearly_load_df$load_scaled_with_coverage_ratio[yearly_load_df$year == 2017])^2)
    all_years <- c(unique(yearly_load_df$year[!yearly_load_df$year == 2017]), unique(longterm$year))

    if (distance1 <= distance2) {
      all_demand <- c(yearly_load_df$load[!yearly_load_df$year == 2017], longterm$avg_hourly_demand)
    } else {
      all_demand <- c(yearly_load_df$load_scaled_with_coverage_ratio[!yearly_load_df$year == 2017], longterm$avg_hourly_demand)
    }
  } else {
    all_years <- c(unique(yearly_load_df$year), unique(longterm$year))
    all_demand <- c(yearly_load_df$load, longterm$avg_hourly_demand)
  }
  longterm_full <- data.frame(all_years, all_demand)
  colnames(longterm_full) <- c("year", "avg_hourly_demand")
  longterm_full <- data.frame(country = country, longterm_full)
  return(longterm_full)
}

Try the oRaklE package in your browser

Any scripts or data that you put into this service are public.

oRaklE documentation built on June 8, 2025, 12:41 p.m.