Nothing
#' Plot Regionalized Data on Maps of Germany
#'
#' Visualizes regional data by creating maps of Germany. Currently supports states (Länder), districts (Kreise, kreisfreie Städte), and municipalities (Gemeinde). Can be used to plot any lower level within a higher one, e.g. states within Germany or municipalities within a district.
#'
#' @param data Data frame. The dataset containing the variable to be plotted.
#' @param var Character. The name of the variable (column) in \code{data} to be visualized.
#' @param map_section Character vector of AGS codes or federal state shortcuts. Used to define the outer boundaries of the map. Shortcuts are available for Germany ("DE") and for each federal state (e.g., "BE" for Berlin). For other areas, providing an AGS code is necessary, which can be looked up using the \code{list_codes} function.
#' @param level Character "land", "kreis", or "gemeinde". Used to define the inner boundaries, i.e. the areas which will be plotted within \code{map_section}. For example, to plot districts in the state of Brandenburg specify \code{map_section = "BB"} and \code{level = "kreis"}.
#' @param add_labels Logical. Whether to show numeric labels on the map. If TRUE, will not only fill each area specified in \code{level} with a color but also print the number provided in \code{var} on the map.
#' @param year Integer. Year of the data to be plotted. Will detect the respective column in \code{data} if it is named "Year" or "Jahr" and filter the year specified. Will by default also determine the year from which the geospatial data (shapefiles) will be used (see \code{geo_year}). Useful if \code{data} contains information from multiple years.
#' @param geo_year Integer or "". Year of the geodata to use; defaults to "" for which the year provided in \code{year} is used. Specify manually to plot data from a given year using geospatial data from a different year. Can be useful in case of matching issues.
#' @param palette Character. Name of the color palette ("Red", "Blue", etc.).
#'
#' @return A ggplot2 object visualizing the provided regional data. Can be further customized.
#'
#' @examples
#' \donttest{
#' data <- nstudents2022 # provide the data you want to plot here
#'
#' # Example: Plotting the number of students in Germany by state
#' map_plot(
#' data = data, # the dataset containing the variable to plot
#' var = "nStudents", # the variable to plot
#' map_section = "DE", # plot entire Germany
#' level = "land", # plot by state
#' add_labels = FALSE, # do not show labels
#' year = 2022, # year of the data
#' geo_year = "", # use the same year for geodata
#' palette = "red" # use red color palette
#' )
#' }
#'
#' @export
#' @importFrom data.table :=
#' @importFrom dplyr filter mutate select rename distinct left_join all_of
#' @importFrom ggplot2 ggplot aes geom_sf geom_sf_text scale_fill_gradientn theme theme_minimal element_blank element_text
#' @importFrom grDevices col2rgb colorRampPalette rgb
#' @importFrom grid unit
#' @importFrom magrittr %>%
#' @importFrom patchwork wrap_plots
#' @importFrom rlang sym
#' @importFrom scales number
#' @importFrom sf st_read st_geometry st_sf st_drop_geometry
#' @importFrom stringr str_pad
#' @importFrom utils read.csv
#' @encoding UTF-8
#' @keywords mapping visualization germany shapefiles
map_plot <- function(data, var, map_section = "", level = "", add_labels, year, geo_year, palette = "") {
# Ensure the target variable exists
if (!(var %in% names(data))) {
stop(paste("The specified target variable", var, "does not exist in the target dataset."))
}
# Normalize column names for spatial joins: unify AGS variants to "ARS"
if (any(tolower(names(data)) %in% c("ags", "ars"))) {
col_to_rename <- names(data)[tolower(names(data)) %in% c("ags", "ars")][1]
names(data)[names(data) == col_to_rename] <- "ARS"
}
# Default to using the same year for geodata if geo_year is not specified
if (geo_year == "") {
geo_year <- year
}
# Check if shapefiles for the given year exist
geo_dir <- file.path(tools::R_user_dir("DEplotting", "data"), paste0("vg250_", geo_year))
if (!dir.exists(geo_dir) || length(list.files(geo_dir, recursive = TRUE)) == 0) {
stop(paste0(
"Geodata for year ", geo_year, " not found. ",
"Please run download_geo(", geo_year, ", ", geo_year, ") to download it, ",
"or run download_geo() to check if there are other missing years in your Geodata folder."
))
}
# Limit to at most 9 maps per call to avoid overload in visualization
if (length(map_section) > 9) {
stop("A maximum of 9 plots are allowed. You provided ", length(map_section), " MAP_AGS values.")
}
# Filter data for the selected year using either "Year" or "Jahr" column
if ("Year" %in% colnames(data)) {
if (!(year %in% data[["Year"]])) {
stop(paste("The provided year", year, "does not exist in the 'Year' column of the data."))
} else {
data <- data[data[["Year"]] == year, ]
}
}
if ("Jahr" %in% colnames(data)) {
if (!(year %in% data[["Jahr"]])) {
stop(paste("The provided year", year, "does not exist in the 'Jahr' column of the data."))
} else {
data <- data[data[["Jahr"]] == year, ]
}
}
plot_list <- list()
orig_data <- data
# Loop through the MAP_AGS in map_section to prepare data
for (MAP_AGS in map_section) {
data <- orig_data
map_ags_lookup <- c(
"DE" = "DG000000",
"BW" = "08000000",
"BY" = "09000000",
"BE" = "11000000",
"BB" = "12000000",
"HB" = "04000000",
"HH" = "02000000",
"HE" = "06000000",
"MV" = "13000000",
"NI" = "03000000",
"NW" = "05000000",
"RP" = "07000000",
"SL" = "10000000",
"SN" = "14000000",
"ST" = "15000000",
"SH" = "01000000",
"TH" = "16000000"
)
# Convert MAP_AGS to uppercase for case-insensitive matching
MAP_AGS <- toupper(MAP_AGS)
if (MAP_AGS == "") {
MAP_AGS <- "DG000000"
} else if (MAP_AGS %in% names(map_ags_lookup)) {
MAP_AGS <- map_ags_lookup[MAP_AGS]
}
# Determine the attribute name based on geo_year
attribute <- if (geo_year >= 1998 & geo_year <= 2002) {
"SHN"
} else if (geo_year >= 2003 & geo_year <= 2012) {
"AGS"
} else if (geo_year >= 2013 & geo_year <= 2022) {
"AGS_0"
} else {
stop("Data is available only from 1998 to 2022.")
}
# Prepare and clean data for joining
data <- data %>%
mutate(ARS = str_pad(ARS, width = 8, side = "right", pad = "0")) %>%
mutate(!!sym(var) := as.numeric(ifelse(!!sym(var) %in% c("-", "x", "."), NA, !!sym(var))))
get_shapefile_path <- function(year, layer) {
# Base directory inside the package's extdata
base_dir <- file.path(tools::R_user_dir("DEplotting", "data"), paste0("vg250_", year))
# Determine directory structure based on year
dir_part <- if (year >= 2019) {
file.path("vg250_12-31.gk3.shape.ebenen", "vg250_ebenen_1231")
} else if (year >= 2015 && year <= 2018) {
file.path(paste0("vg250_", year, "-12-31.gk3.shape.ebenen"), "vg250_ebenen")
} else if (year >= 2012 && year <= 2014) {
file.path("vg250_3112.gk3.shape.ebenen", "vg250_ebenen")
} else if (year >= 1998 && year <= 2011) {
file.path(paste0("vg250_", year, "-12-31.gk3.shape.ebenen"),
paste0("vg250_ebenen-historisch/de", substr(as.character(year), 3, 4), "12"))
} else {
stop("Unsupported year: ", year)
}
# Determine filename based on year and layer
if (year >= 2013) {
prefix <- "VG250_"
suffix <- layer
} else if (year == 2012) {
prefix <- "vg250_"
suffix <- switch(layer,
"LAN" = "bld",
"KRS" = "krs",
"GEM" = "gem",
stop("Invalid layer for 2012")
)
} else if (year >= 2003 && year <= 2011) {
prefix <- "vg250_"
suffix <- switch(layer,
"LAN" = "bld",
tolower(layer)
)
} else if (year >= 1998 && year <= 2002) {
prefix <- "vg250"
suffix <- switch(layer,
"LAN" = "lnd",
tolower(layer)
)
}
filename <- paste0(prefix, suffix, ".shp")
# Return the full path
return(file.path(base_dir, dir_part, filename))
}
# Load shapefiles using the generalized function
load_shapefile <- function(year, layer) {
path <- get_shapefile_path(year, layer)
st_read(path, quiet = TRUE)
}
# Usage: Load shapefile layers
vg250_lan <- load_shapefile(geo_year, "LAN")
vg250_krs <- load_shapefile(geo_year, "KRS")
vg250_gem <- load_shapefile(geo_year, "GEM")
# Process shapefiles based on the year
process_vg_data <- function(data, attribute, year) {
selected_columns <- c("GEN", attribute)
# Add GF and BSG if they exist in the data
if ("GF" %in% names(data)) {
selected_columns <- c(selected_columns, "GF")
}
if ("BSG" %in% names(data)) {
selected_columns <- c(selected_columns, "BSG")
}
processed_data <- data %>%
dplyr::select(all_of(selected_columns)) %>%
dplyr::rename(Name = GEN, AGS = !!rlang::sym(attribute))
# Apply year-specific filters
if (year >= 1998 & year <= 2002) {
processed_data <- processed_data %>%
dplyr::mutate(AGS = paste0(substr(AGS, 1, 5), substr(AGS, 8, 10)))
} else {
if ("GF" %in% names(processed_data)) {
processed_data <- processed_data %>%
dplyr::filter(GF == 4)
}
if ("BSG" %in% names(processed_data)) {
processed_data <- processed_data %>%
dplyr::filter(BSG == 1)
}
}
# Remove duplicates based on AGS, keeping the first occurrence
processed_data <- processed_data %>%
dplyr::distinct(AGS, .keep_all = TRUE)
return(processed_data)
}
vg250_krs <- process_vg_data(vg250_krs, attribute, geo_year)
vg250_lan <- process_vg_data(vg250_lan, attribute, geo_year)
vg250_gem <- process_vg_data(vg250_gem, attribute, geo_year)
# Merge user data with geospatial data
data_Land <- vg250_lan %>%
left_join(data %>% select(ARS, !!sym(var)), by = c("AGS" = "ARS")) %>%
mutate(!!sym(var) := as.numeric(!!sym(var)))
data_Kreis <- vg250_krs %>%
left_join(data %>% select(ARS, !!sym(var)), by = c("AGS" = "ARS")) %>%
mutate(!!sym(var) := as.numeric(!!sym(var)))
data_Gem <- vg250_gem %>%
left_join(data %>% select(ARS, !!sym(var)), by = c("AGS" = "ARS")) %>%
mutate(!!sym(var) := as.numeric(!!sym(var)))
# PREPARING THE DATASETS FOR MERGING
# Prepare data_Gem
data_Gem <- data_Gem %>%
mutate(AGS_Kreis = paste0(substr(AGS, 1, nchar(AGS) - 3), "000")) %>%
st_drop_geometry() %>%
left_join(
data_Kreis %>%
mutate(AGS_Kreis = substr(AGS, 1, 8)) %>%
st_drop_geometry() %>%
select(AGS_Kreis, Name_Kreis = Name),
by = "AGS_Kreis"
) %>%
st_sf(geometry = st_geometry(data_Gem))
data_Gem <- data_Gem %>%
mutate(AGS_Land = paste0(substr(AGS, 1, nchar(AGS) - 6), "000000")) %>%
st_drop_geometry() %>%
left_join(
data_Land %>%
mutate(AGS_Land = substr(AGS, 1, 8)) %>%
st_drop_geometry() %>%
select(AGS_Land, Name_Land = Name),
by = "AGS_Land"
) %>%
st_sf(geometry = st_geometry(data_Gem))
# Prepare data_Kreis
data_Kreis <- data_Kreis %>%
mutate(AGS_Land = paste0(substr(AGS, 1, nchar(AGS) - 6), "000000")) %>%
st_drop_geometry() %>%
left_join(
data_Land %>%
mutate(AGS_Land = substr(AGS, 1, 8)) %>%
st_drop_geometry() %>%
select(AGS_Land, Name_Land = Name),
by = "AGS_Land"
) %>%
st_sf(geometry = st_geometry(data_Kreis))
level <- tolower(level)
# Filtering data based on MAP_AGS and level
if (MAP_AGS == "DG000000") {
if (level == "land") {
data <- data_Land
console <- sub("\\s+\\.$", ".", paste("Visualization of Germany by State, in year", year, ",and in geodata year", geo_year, "."))
} else if (level == "kreis") {
data <- data_Kreis
console <- sub("\\s+\\.$", ".", paste("Visualization of Germany by Districts, in year", year, ",and in geodata year", geo_year, "."))
} else if (level == "gemeinde") {
data <- data_Gem
console <- sub("\\s+\\.$", ".", paste("Visualization of Germany by Municipalities, in year", year, ",and in geodata year", geo_year, "."))
} else {
stop("Warning: Invalid level specified for the whole country (Germany). Levels of 'land', 'kreis' or 'gemeinde' are expected.")
}
} else if (MAP_AGS %in% data_Land$AGS) {
if (level == "kreis") {
data <- data_Kreis %>% filter(AGS_Land == MAP_AGS)
console <- sub("\\s+\\.$", ".", paste("Visualization of the", unique(data$Name_Land), "State by Districts, in year", year, ",and in geodata year", geo_year, "."))
} else if (level == "gemeinde") {
data <- data_Gem %>% filter(AGS_Land == MAP_AGS)
console <- sub("\\s+\\.$", ".", paste("Visualization of the", unique(data$Name_Land), "State by Municipalities, in year", year, ",and in geodata year", geo_year, "."))
} else {
stop("Warning: Invalid level specified for a Land in MAP_AGS. Only 'kreis' or 'gemeinde' levels are allowed.")
}
} else if (MAP_AGS %in% data_Kreis$AGS) {
if (level == "gemeinde") {
data <- data_Gem %>% filter(AGS_Kreis == MAP_AGS)
console <- sub("\\s+\\.$", ".", paste("Visualization of the", unique(data$Name_Kreis), "District by Municipalities, in year", year, ",and in geodata year", geo_year, "."))
} else {
stop("Warning: Invalid level specified for a Kreis in MAP_AGS. Only 'gemeinde' level is allowed.")
}
} else {
stop("Invalid MAP_AGS code provided.")
}
print(paste(console))
# Handle missing data: areas with no data are displayed in gray.
if (any(is.na(data[[var]]))) {
message("For some of the areas plotted, no data was found in the dataset you provided. These are displayed in gray.")
}
create_gradient_palette <- function(hex_color, n = 7, lighten = 0.8) {
lighter <- col2rgb(hex_color) / 255
lighter <- rgb(
pmin(lighter[1, ] + lighten * (1 - lighter[1, ]), 1),
pmin(lighter[2, ] + lighten * (1 - lighter[2, ]), 1),
pmin(lighter[3, ] + lighten * (1 - lighter[3, ]), 1)
)
colorRampPalette(c(lighter, hex_color))(n)
}
base_colors <- list(
"Red" = "#790F2D",
"Pink" = "#CA1643",
"Orange" = "#EF7D00",
"Yellow" = "#FFCC00",
"Purple" = "#6B517F",
"Green" = "#52AE32",
"Teal" = "#00A183",
"Blue" = "#004876"
)
custom_palettes <- lapply(base_colors, create_gradient_palette)
show_labels <- FALSE
labels_land <- c(
"DG000000" = "DE",
"08000000" = "BW",
"09000000" = "BY",
"11000000" = "BE",
"12000000" = "BB",
"04000000" = "HB",
"02000000" = "HH",
"06000000" = "HE",
"13000000" = "MV",
"03000000" = "NI",
"05000000" = "NW",
"07000000" = "RP",
"10000000" = "SL",
"14000000" = "SN",
"15000000" = "ST",
"01000000" = "SH",
"16000000" = "TH"
)
if (level == "land" && MAP_AGS == "DG000000") {
show_labels <- TRUE
data <- data %>%
mutate(
Shortcut_Land = labels_land[AGS],
label = paste(Shortcut_Land, ": ", scales::number(!!sym(var), big.mark = ".", decimal.mark = ","))
)
} else if (level == "kreis" && MAP_AGS %in% data_Land$AGS) {
show_labels <- TRUE
data <- data %>%
mutate(label = scales::number(!!sym(var), big.mark = ".", decimal.mark = ","))
} else if (level == "gemeinde" && MAP_AGS %in% data_Kreis$AGS) {
show_labels <- TRUE
data <- data %>%
mutate(label = scales::number(!!sym(var), big.mark = ".", decimal.mark = ","))
} else if (level == "gemeinde" && MAP_AGS %in% c("04000000", "02000000", "11000000")) {
show_labels <- TRUE
data <- data %>%
mutate(label = scales::number(!!sym(var), big.mark = ".", decimal.mark = ","))
}
if (add_labels == FALSE) {
show_labels <- FALSE
}
p <- ggplot(data, aes(fill = !!sym(var), geometry = geometry)) +
geom_sf(color = "grey65", lwd = 0.1, show.legend = TRUE)
palette <- tools::toTitleCase(tolower(palette))
if (palette %in% names(custom_palettes)) {
min_val <- min(data[[var]], na.rm = TRUE)
max_val <- max(data[[var]], na.rm = TRUE)
if (min_val == max_val || is.infinite(min_val) || is.infinite(max_val)) {
# Fallback to avoid errors with constant or invalid values
breaks <- c(min_val)
labels <- scales::label_number(big.mark = ".", decimal.mark = ",")(breaks)
} else {
breaks <- seq(min_val, max_val, length.out = 5)
labels <- scales::label_number(big.mark = ".", decimal.mark = ",")(breaks)
}
p <- p + scale_fill_gradientn(
colours = custom_palettes[[palette]],
name = paste(var, ""),
limits = c(min_val, max_val),
breaks = breaks,
labels = labels,
oob = scales::squish
)
} else {
stop(paste0("Palette '", palette, "' not recognized. Available options are: ",
paste(names(custom_palettes), collapse = ", ")))
}
if (show_labels) {
p <- p + geom_sf_text(
aes(label = label),
size = 3,
color = "black",
show.legend = FALSE
)
}
p <- p +
theme_minimal() +
theme(
legend.position = "right",
legend.key.height = unit(1, "cm"),
legend.key.width = unit(0.5, "cm"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
axis.title = element_blank(),
plot.caption = element_text(size = 12, color = "gray40", face = "italic")
)
plot_list[[length(plot_list) + 1]] <- p
}
combined_plot <- Reduce(`+`, plot_list)
return(combined_plot)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.