R/map_plot.R

Defines functions map_plot

Documented in map_plot

#' Plot Regionalized Data on Maps of Germany
#'
#' Visualizes regional data by creating maps of Germany. Currently supports states (Länder), districts (Kreise, kreisfreie Städte), and municipalities (Gemeinde). Can be used to plot any lower level within a higher one, e.g. states within Germany or municipalities within a district.
#'
#' @param data Data frame. The dataset containing the variable to be plotted.
#' @param var Character. The name of the variable (column) in \code{data} to be visualized.
#' @param map_section Character vector of AGS codes or federal state shortcuts. Used to define the outer boundaries of the map. Shortcuts are available for Germany ("DE") and for each federal state (e.g., "BE" for Berlin). For other areas, providing an AGS code is necessary, which can be looked up using the \code{list_codes} function.
#' @param level Character "land", "kreis", or "gemeinde". Used to define the inner boundaries, i.e. the areas which will be plotted within \code{map_section}. For example, to plot districts in the state of Brandenburg specify \code{map_section = "BB"} and \code{level = "kreis"}.
#' @param add_labels Logical. Whether to show numeric labels on the map. If TRUE, will not only fill each area specified in \code{level} with a color but also print the number provided in \code{var} on the map.
#' @param year Integer. Year of the data to be plotted. Will detect the respective column in \code{data} if it is named "Year" or "Jahr" and filter the year specified. Will by default also determine the year from which the geospatial data (shapefiles) will be used (see \code{geo_year}). Useful if \code{data} contains information from multiple years. 
#' @param geo_year Integer or "". Year of the geodata to use; defaults to "" for which the year provided in \code{year} is used. Specify manually to plot data from a given year using geospatial data from a different year. Can be useful in case of matching issues. 
#' @param palette Character. Name of the color palette ("Red", "Blue", etc.).
#'
#' @return A ggplot2 object visualizing the provided regional data. Can be further customized.
#'
#' @examples
#' \donttest{
#' data <- nstudents2022 # provide the data you want to plot here
#'
#' # Example: Plotting the number of students in Germany by state
#' map_plot(
#'   data = data,               # the dataset containing the variable to plot
#'   var = "nStudents",         # the variable to plot
#'   map_section = "DE",        # plot entire Germany
#'   level = "land",            # plot by state
#'   add_labels = FALSE,        # do not show labels
#'   year = 2022,               # year of the data
#'   geo_year = "",             # use the same year for geodata
#'   palette = "red"            # use red color palette
#' )
#' }
#'
#' @export
#' @importFrom data.table :=
#' @importFrom dplyr filter mutate select rename distinct left_join all_of
#' @importFrom ggplot2 ggplot aes geom_sf geom_sf_text scale_fill_gradientn theme theme_minimal element_blank element_text
#' @importFrom grDevices col2rgb colorRampPalette rgb
#' @importFrom grid unit
#' @importFrom magrittr %>%
#' @importFrom patchwork wrap_plots
#' @importFrom rlang sym
#' @importFrom scales number
#' @importFrom sf st_read st_geometry st_sf st_drop_geometry
#' @importFrom stringr str_pad
#' @importFrom utils read.csv
#' @encoding UTF-8
#' @keywords mapping visualization germany shapefiles
map_plot <- function(data, var, map_section = "", level = "", add_labels, year, geo_year, palette = "") {
  
  # Ensure the target variable exists
  if (!(var %in% names(data))) {
    stop(paste("The specified target variable", var, "does not exist in the target dataset."))
  }
  
  # Normalize column names for spatial joins: unify AGS variants to "ARS"
  if (any(tolower(names(data)) %in% c("ags", "ars"))) {
    col_to_rename <- names(data)[tolower(names(data)) %in% c("ags", "ars")][1]
    names(data)[names(data) == col_to_rename] <- "ARS"
  }
  
  # Default to using the same year for geodata if geo_year is not specified
  if (geo_year == "") {
    geo_year <- year
  }
  
  # Check if shapefiles for the given year exist
  geo_dir <- file.path(tools::R_user_dir("DEplotting", "data"), paste0("vg250_", geo_year))
  if (!dir.exists(geo_dir) || length(list.files(geo_dir, recursive = TRUE)) == 0) {
    stop(paste0(
      "Geodata for year ", geo_year, " not found. ",
      "Please run download_geo(", geo_year, ", ", geo_year, ") to download it, ",
      "or run download_geo() to check if there are other missing years in your Geodata folder."
    ))
  }
  
  # Limit to at most 9 maps per call to avoid overload in visualization
  if (length(map_section) > 9) {
    stop("A maximum of 9 plots are allowed. You provided ", length(map_section), " MAP_AGS values.")
  }
  
  # Filter data for the selected year using either "Year" or "Jahr" column
  if ("Year" %in% colnames(data)) {
    if (!(year %in% data[["Year"]])) {
      stop(paste("The provided year", year, "does not exist in the 'Year' column of the data."))
    } else {
      data <- data[data[["Year"]] == year, ]
    }
  }
  if ("Jahr" %in% colnames(data)) {
    if (!(year %in% data[["Jahr"]])) {
      stop(paste("The provided year", year, "does not exist in the 'Jahr' column of the data."))
    } else {
      data <- data[data[["Jahr"]] == year, ]
    }
  }
  
  plot_list <- list()
  orig_data <- data
  
  # Loop through the MAP_AGS in map_section to prepare data
  for (MAP_AGS in map_section) {
    
    data <- orig_data
    
    map_ags_lookup <- c(
      "DE" = "DG000000",
      "BW" = "08000000",
      "BY" = "09000000",
      "BE" = "11000000",
      "BB" = "12000000",
      "HB" = "04000000",
      "HH" = "02000000",
      "HE" = "06000000",
      "MV" = "13000000",
      "NI" = "03000000",
      "NW" = "05000000",
      "RP" = "07000000",
      "SL" = "10000000",
      "SN" = "14000000",
      "ST" = "15000000",
      "SH" = "01000000",
      "TH" = "16000000"
    )
    
    # Convert MAP_AGS to uppercase for case-insensitive matching
    MAP_AGS <- toupper(MAP_AGS)
    
    if (MAP_AGS == "") {
      MAP_AGS <- "DG000000"
    } else if (MAP_AGS %in% names(map_ags_lookup)) {
      MAP_AGS <- map_ags_lookup[MAP_AGS]
    }
    
    # Determine the attribute name based on geo_year
    attribute <- if (geo_year >= 1998 & geo_year <= 2002) {
      "SHN"
    } else if (geo_year >= 2003 & geo_year <= 2012) {
      "AGS"
    } else if (geo_year >= 2013 & geo_year <= 2022) {
      "AGS_0"
    } else {
      stop("Data is available only from 1998 to 2022.")
    }
    
    # Prepare and clean data for joining
    data <- data %>%
      mutate(ARS = str_pad(ARS, width = 8, side = "right", pad = "0")) %>%
      mutate(!!sym(var) := as.numeric(ifelse(!!sym(var) %in% c("-", "x", "."), NA, !!sym(var))))
    
    
    get_shapefile_path <- function(year, layer) {
      # Base directory inside the package's extdata
      base_dir <- file.path(tools::R_user_dir("DEplotting", "data"), paste0("vg250_", year))
      
      # Determine directory structure based on year
      dir_part <- if (year >= 2019) {
        file.path("vg250_12-31.gk3.shape.ebenen", "vg250_ebenen_1231")
      } else if (year >= 2015 && year <= 2018) {
        file.path(paste0("vg250_", year, "-12-31.gk3.shape.ebenen"), "vg250_ebenen")
      } else if (year >= 2012 && year <= 2014) {
        file.path("vg250_3112.gk3.shape.ebenen", "vg250_ebenen")
      } else if (year >= 1998 && year <= 2011) {
        file.path(paste0("vg250_", year, "-12-31.gk3.shape.ebenen"),
                  paste0("vg250_ebenen-historisch/de", substr(as.character(year), 3, 4), "12"))
      } else {
        stop("Unsupported year: ", year)
      }
      
      # Determine filename based on year and layer
      if (year >= 2013) {
        prefix <- "VG250_"
        suffix <- layer
      } else if (year == 2012) {
        prefix <- "vg250_"
        suffix <- switch(layer,
                         "LAN" = "bld",
                         "KRS" = "krs",
                         "GEM" = "gem",
                         stop("Invalid layer for 2012")
        )
      } else if (year >= 2003 && year <= 2011) {
        prefix <- "vg250_"
        suffix <- switch(layer,
                         "LAN" = "bld",
                         tolower(layer)
        )
      } else if (year >= 1998 && year <= 2002) {
        prefix <- "vg250"
        suffix <- switch(layer,
                         "LAN" = "lnd",
                         tolower(layer)
        )
      }
      
      filename <- paste0(prefix, suffix, ".shp")
      
      # Return the full path
      return(file.path(base_dir, dir_part, filename))
    }
    
    # Load shapefiles using the generalized function
    load_shapefile <- function(year, layer) {
      path <- get_shapefile_path(year, layer)
      st_read(path, quiet = TRUE)
    }
    
    # Usage: Load shapefile layers
    vg250_lan <- load_shapefile(geo_year, "LAN")
    vg250_krs <- load_shapefile(geo_year, "KRS")
    vg250_gem <- load_shapefile(geo_year, "GEM")
    
    # Process shapefiles based on the year
    process_vg_data <- function(data, attribute, year) {
      
      selected_columns <- c("GEN", attribute)
      # Add GF and BSG if they exist in the data
      if ("GF" %in% names(data)) {
        selected_columns <- c(selected_columns, "GF")
      }
      if ("BSG" %in% names(data)) {
        selected_columns <- c(selected_columns, "BSG")
      }
      
      processed_data <- data %>%
        dplyr::select(all_of(selected_columns)) %>%
        dplyr::rename(Name = GEN, AGS = !!rlang::sym(attribute))
      
      # Apply year-specific filters
      if (year >= 1998 & year <= 2002) {
        processed_data <- processed_data %>%
          dplyr::mutate(AGS = paste0(substr(AGS, 1, 5), substr(AGS, 8, 10)))
      } else {
        if ("GF" %in% names(processed_data)) {
          processed_data <- processed_data %>%
            dplyr::filter(GF == 4)
        }
        if ("BSG" %in% names(processed_data)) {
          processed_data <- processed_data %>%
            dplyr::filter(BSG == 1)
        }
      }
      
      # Remove duplicates based on AGS, keeping the first occurrence
      processed_data <- processed_data %>%
        dplyr::distinct(AGS, .keep_all = TRUE)
      
      return(processed_data)
    }
    
    vg250_krs <- process_vg_data(vg250_krs, attribute, geo_year)
    vg250_lan <- process_vg_data(vg250_lan, attribute, geo_year)
    vg250_gem <- process_vg_data(vg250_gem, attribute, geo_year)
    
    # Merge user data with geospatial data
    data_Land <- vg250_lan %>%
      left_join(data %>% select(ARS, !!sym(var)), by = c("AGS" = "ARS")) %>%
      mutate(!!sym(var) := as.numeric(!!sym(var)))
    
    data_Kreis <- vg250_krs %>%
      left_join(data %>% select(ARS, !!sym(var)), by = c("AGS" = "ARS")) %>%
      mutate(!!sym(var) := as.numeric(!!sym(var)))
    
    data_Gem <- vg250_gem %>%
      left_join(data %>% select(ARS, !!sym(var)), by = c("AGS" = "ARS")) %>%
      mutate(!!sym(var) := as.numeric(!!sym(var)))
    
    
    # PREPARING THE DATASETS FOR MERGING
    # Prepare data_Gem
    data_Gem <- data_Gem %>%
      mutate(AGS_Kreis = paste0(substr(AGS, 1, nchar(AGS) - 3), "000")) %>%
      st_drop_geometry() %>%
      left_join(
        data_Kreis %>%
          mutate(AGS_Kreis = substr(AGS, 1, 8)) %>%
          st_drop_geometry() %>%
          select(AGS_Kreis, Name_Kreis = Name),
        by = "AGS_Kreis"
      ) %>%
      st_sf(geometry = st_geometry(data_Gem))
    
    data_Gem <- data_Gem %>%
      mutate(AGS_Land = paste0(substr(AGS, 1, nchar(AGS) - 6), "000000")) %>%
      st_drop_geometry() %>%
      left_join(
        data_Land %>%
          mutate(AGS_Land = substr(AGS, 1, 8)) %>%
          st_drop_geometry() %>%
          select(AGS_Land, Name_Land = Name),
        by = "AGS_Land"
      ) %>%
      st_sf(geometry = st_geometry(data_Gem))
    
    # Prepare data_Kreis
    data_Kreis <- data_Kreis %>%
      mutate(AGS_Land = paste0(substr(AGS, 1, nchar(AGS) - 6), "000000")) %>%
      st_drop_geometry() %>%
      left_join(
        data_Land %>%
          mutate(AGS_Land = substr(AGS, 1, 8)) %>%
          st_drop_geometry() %>%
          select(AGS_Land, Name_Land = Name),
        by = "AGS_Land"
      ) %>%
      st_sf(geometry = st_geometry(data_Kreis))
    
    level <- tolower(level)
    # Filtering data based on MAP_AGS and level
    if (MAP_AGS == "DG000000") {
      if (level == "land") {
        data <- data_Land
        console <- sub("\\s+\\.$", ".", paste("Visualization of Germany by State, in year", year, ",and in geodata year", geo_year, "."))
      } else if (level == "kreis") {
        data <- data_Kreis
        console <- sub("\\s+\\.$", ".", paste("Visualization of Germany by Districts, in year", year, ",and in geodata year", geo_year, "."))
      } else if (level == "gemeinde") {
        data <- data_Gem
        console <- sub("\\s+\\.$", ".", paste("Visualization of Germany by Municipalities, in year", year, ",and in geodata year", geo_year, "."))
      } else {
        stop("Warning: Invalid level specified for the whole country (Germany). Levels of 'land', 'kreis' or 'gemeinde' are expected.")
      }
    } else if (MAP_AGS %in% data_Land$AGS) {
      if (level == "kreis") {
        data <- data_Kreis %>% filter(AGS_Land == MAP_AGS)
        console <- sub("\\s+\\.$", ".", paste("Visualization of the", unique(data$Name_Land), "State by Districts, in year", year, ",and in geodata year", geo_year, "."))
      } else if (level == "gemeinde") {
        data <- data_Gem %>% filter(AGS_Land == MAP_AGS)
        console <- sub("\\s+\\.$", ".", paste("Visualization of the", unique(data$Name_Land), "State by Municipalities, in year", year, ",and in geodata year", geo_year, "."))
      } else {
        stop("Warning: Invalid level specified for a Land in MAP_AGS. Only 'kreis' or 'gemeinde' levels are allowed.")
      }
    } else if (MAP_AGS %in% data_Kreis$AGS) {
      if (level == "gemeinde") {
        data <- data_Gem %>% filter(AGS_Kreis == MAP_AGS)
        console <- sub("\\s+\\.$", ".", paste("Visualization of the", unique(data$Name_Kreis), "District by Municipalities, in year", year, ",and in geodata year", geo_year, "."))
      } else {
        stop("Warning: Invalid level specified for a Kreis in MAP_AGS. Only 'gemeinde' level is allowed.")
      }
    } else {
      stop("Invalid MAP_AGS code provided.")
    }
    print(paste(console))
    
    # Handle missing data: areas with no data are displayed in gray.
    if (any(is.na(data[[var]]))) {
      message("For some of the areas plotted, no data was found in the dataset you provided. These are displayed in gray.")
    }
    
    create_gradient_palette <- function(hex_color, n = 7, lighten = 0.8) {
      lighter <- col2rgb(hex_color) / 255
      lighter <- rgb(
        pmin(lighter[1, ] + lighten * (1 - lighter[1, ]), 1),
        pmin(lighter[2, ] + lighten * (1 - lighter[2, ]), 1),
        pmin(lighter[3, ] + lighten * (1 - lighter[3, ]), 1)
      )
      colorRampPalette(c(lighter, hex_color))(n)
    }
    
    base_colors <- list(
      "Red" = "#790F2D",
      "Pink" = "#CA1643",
      "Orange" = "#EF7D00",
      "Yellow" = "#FFCC00",
      "Purple"  = "#6B517F",
      "Green" = "#52AE32",
      "Teal" = "#00A183",
      "Blue"  = "#004876"
    )
    
    custom_palettes <- lapply(base_colors, create_gradient_palette)
    
    show_labels <- FALSE
    labels_land <- c(
      "DG000000" = "DE",
      "08000000" = "BW",
      "09000000" = "BY",
      "11000000" = "BE",
      "12000000" = "BB",
      "04000000" = "HB",
      "02000000" = "HH",
      "06000000" = "HE",
      "13000000" = "MV",
      "03000000" = "NI",
      "05000000" = "NW",
      "07000000" = "RP",
      "10000000" = "SL",
      "14000000" = "SN",
      "15000000" = "ST",
      "01000000" = "SH",
      "16000000" = "TH"
    )
    
    if (level == "land" && MAP_AGS == "DG000000") {
      show_labels <- TRUE
      data <- data %>%
        mutate(
          Shortcut_Land = labels_land[AGS],
          label = paste(Shortcut_Land, ": ", scales::number(!!sym(var), big.mark = ".", decimal.mark = ","))
        )
    } else if (level == "kreis" && MAP_AGS %in% data_Land$AGS) {
      show_labels <- TRUE
      data <- data %>%
        mutate(label = scales::number(!!sym(var), big.mark = ".", decimal.mark = ","))
    } else if (level == "gemeinde" && MAP_AGS %in% data_Kreis$AGS) {
      show_labels <- TRUE
      data <- data %>%
        mutate(label = scales::number(!!sym(var), big.mark = ".", decimal.mark = ","))
    } else if (level == "gemeinde" && MAP_AGS %in% c("04000000", "02000000", "11000000")) {
      show_labels <- TRUE
      data <- data %>%
        mutate(label = scales::number(!!sym(var), big.mark = ".", decimal.mark = ","))
    }
    
    if (add_labels == FALSE) {
      show_labels <- FALSE
    }
    
    p <- ggplot(data, aes(fill = !!sym(var), geometry = geometry)) +
      geom_sf(color = "grey65", lwd = 0.1, show.legend = TRUE)
    
    palette <- tools::toTitleCase(tolower(palette))
    if (palette %in% names(custom_palettes)) {
      
      min_val <- min(data[[var]], na.rm = TRUE)
      max_val <- max(data[[var]], na.rm = TRUE)
      
      if (min_val == max_val || is.infinite(min_val) || is.infinite(max_val)) {
        # Fallback to avoid errors with constant or invalid values
        breaks <- c(min_val)
        labels <- scales::label_number(big.mark = ".", decimal.mark = ",")(breaks)
      } else {
        breaks <- seq(min_val, max_val, length.out = 5)
        labels <- scales::label_number(big.mark = ".", decimal.mark = ",")(breaks)
      }
      
      p <- p + scale_fill_gradientn(
        colours = custom_palettes[[palette]],
        name = paste(var, ""),
        limits = c(min_val, max_val),
        breaks = breaks,
        labels = labels,
        oob = scales::squish
      )
    } else {
      stop(paste0("Palette '", palette, "' not recognized. Available options are: ", 
                  paste(names(custom_palettes), collapse = ", ")))
    }
    
    if (show_labels) {
      p <- p + geom_sf_text(
        aes(label = label),
        size = 3,
        color = "black",
        show.legend = FALSE
      )
    }
    
    p <- p + 
      theme_minimal() +
      theme(
        legend.position = "right",
        legend.key.height = unit(1, "cm"),
        legend.key.width = unit(0.5, "cm"),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        axis.text = element_blank(),
        axis.ticks = element_blank(),
        axis.title = element_blank(),
        plot.caption = element_text(size = 12, color = "gray40", face = "italic")
      )
    
    plot_list[[length(plot_list) + 1]] <- p
  }
  
  combined_plot <- Reduce(`+`, plot_list)
  return(combined_plot)
}

Try the DEplotting package in your browser

Any scripts or data that you put into this service are public.

DEplotting documentation built on June 8, 2025, 12:59 p.m.