R/load_geodata.R

Defines functions load_geodata

Documented in load_geodata

#' Load Processed Geodata
#'
#' Loads shapefiles of states (Länder: LAN), districts (Kreise, kreisfreie Städte: KRS), and municipalities (Gemeinde: GEM) for a given year. The shapefiles are processed and assigned to the global environment as \code{vg250_lan}, \code{vg250_krs}, and \code{vg250_gem}. This function is useful if you want to use the shapefiles with other R packages to plot your data.
#'
#' @param year Integer. The year of geodata to load (must be between 1998 and 2022).
#'
#' @return No return value. This function assigns the spatial datasets \code{vg250_lan}, \code{vg250_krs}, and \code{vg250_gem} to the global environment using \code{<<-}.
#' @export
#' @importFrom sf st_read
#' @importFrom magrittr %>%
#' @importFrom dplyr select rename mutate filter distinct all_of
#' @importFrom rlang sym
#' @examples
#' \donttest{
#' # Load the geodata from year 2015 into the R environment
#' load_geodata(year = 2015)
#' }
load_geodata <- function(year) {
  
  #! NOTE: Code structure may need to be maintained annually.
  ##  attribute name based on the year
  attribute <- if (year >= 1998 & year <= 2002) {
    "SHN"
  } else if (year >= 2003 & year <= 2012) {
    "AGS"
  } else if (year >= 2013 & year <= 2022) {
    "AGS_0"
  } else {
    stop("Data is available only from 1998 to 2022.")
  }
  
  # Check if shapefiles for the given year exist
  geo_dir <- file.path(tools::R_user_dir("DEplotting", "data"), paste0("vg250_", year))
  if (!dir.exists(geo_dir) || length(list.files(geo_dir, recursive = TRUE)) == 0) {
    stop(paste0(
      "Geodata for year ", year, " not found. ",
      "Please run download_geo(", year, ", ", year, ") to download it, ",
      "or run download_geo() to check if there are other missing years in your Geodata folder."
    ))
  }
  
  # Load GEODATA Shapefiles
  get_shapefile_path <- function(year, layer) {
    # Base directory inside the package's extdata
    base_dir <- file.path(tools::R_user_dir("DEplotting", "data"), paste0("vg250_", year))
    
    #! NOTE: Code structure may need to be maintained annually.
    # Determine directory structure based on year
    dir_part <- if (year >= 2019) {
      file.path("vg250_12-31.gk3.shape.ebenen", "vg250_ebenen_1231")
    } else if (year >= 2015 && year <= 2018) {
      file.path(paste0("vg250_", year, "-12-31.gk3.shape.ebenen"), "vg250_ebenen")
    } else if (year >= 2012 && year <= 2014) {
      file.path("vg250_3112.gk3.shape.ebenen", "vg250_ebenen")
    } else if (year >= 1998 && year <= 2011) {
      file.path(paste0("vg250_", year, "-12-31.gk3.shape.ebenen"),
                paste0("vg250_ebenen-historisch/de", substr(as.character(year), 3, 4), "12"))
    } else {
      stop("Unsupported year: ", year)
    }
    
    #! NOTE: Code structure may need to be maintained annually.
    # Determine filename based on year and layer
    if (year >= 2013) {
      prefix <- "VG250_"
      suffix <- layer
    } else if (year == 2012) {
      prefix <- "vg250_"
      suffix <- switch(layer,
                       "LAN" = "bld",
                       "KRS" = "krs",
                       "GEM" = "gem",
                       stop("Invalid layer for 2012")
      )
    } else if (year >= 2003 && year <= 2011) {
      prefix <- "vg250_"
      suffix <- switch(layer,
                       "LAN" = "bld",
                       tolower(layer)
      )
    } else if (year >= 1998 && year <= 2002) {
      prefix <- "vg250"
      suffix <- switch(layer,
                       "LAN" = "lnd",
                       tolower(layer)
      )
    }
    
    filename <- paste0(prefix, suffix, ".shp")
    
    # Return the full path
    return(file.path(base_dir, dir_part, filename))
  }
  
  # Load shapefiles using the generalized function
  load_shapefile <- function(year, layer) {
    path <- get_shapefile_path(year, layer)
    st_read(path, quiet = TRUE)
  }
  
  # Usage
  vg250_lan <- load_shapefile(year, "LAN")
  vg250_krs <- load_shapefile(year, "KRS")
  vg250_gem <- load_shapefile(year, "GEM")
  
  
  ### Process shapefiles based on the year
  process_vg_data <- function(data, attribute, year) {
    
    selected_columns <- c("GEN", attribute)
    #### Add GF and BSG if they exist in the data
    if ("GF" %in% names(data)) {
      selected_columns <- c(selected_columns, "GF")
    }
    if ("BSG" %in% names(data)) {
      selected_columns <- c(selected_columns, "BSG")
    }
    
    processed_data <- data %>%
      dplyr::select(all_of(selected_columns)) %>%
      dplyr::rename(Name = GEN, AGS = !!rlang::sym(attribute))
    
    #! NOTE: Code structure may need to be maintained annually.
    #### Apply year-specific filters
    if (year >= 1998 & year <= 2002) {
      processed_data <- processed_data %>%
        dplyr::mutate(AGS = paste0(substr(AGS, 1, 5), substr(AGS, 8, 10)))
    } else {
      if ("GF" %in% names(processed_data)) {
        processed_data <- processed_data %>%
          dplyr::filter(GF == 4)
      }
      if ("BSG" %in% names(processed_data)) {
        processed_data <- processed_data %>%
          dplyr::filter(BSG == 1)
      }
    }
    
    #### Remove duplicates based on AGS only, keep first occurrence
    processed_data <- processed_data %>%
      dplyr::distinct(AGS, .keep_all = TRUE)
    
    return(processed_data)
  }
  
  vg250_krs <- process_vg_data(vg250_krs, attribute, year)
  vg250_lan <- process_vg_data(vg250_lan, attribute, year)
  vg250_gem <- process_vg_data(vg250_gem, attribute, year)
  
  ### Process and assign the data to the global environment
  vg250_krs <<- vg250_krs
  vg250_lan <<- vg250_lan
  vg250_gem <<- vg250_gem
  
  message("Processed datasets have been assigned to your environment.")
  invisible(NULL)
}

Try the DEplotting package in your browser

Any scripts or data that you put into this service are public.

DEplotting documentation built on June 8, 2025, 12:59 p.m.