Nothing
## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE)
library(ggplot2)
library(sf)
library(dplyr)
library(tidyr)
library(tidycensuskr)
library(biscale)
library(cowplot)
library(grid)
library(ggspatial)
## ----load-data_1--------------------------------------------------------------
# Load 2020 boundaries
data(adm2_sf_2020)
## ----filter-merge_1-----------------------------------------------------------
# Load census population data for 2010 and 2020
df_2010_pop <- anycensus(year = 2010,
codes = c("Gyeongsangnam-do", "Busan", "Ulsan"),
type = "population")
df_2020_pop <- anycensus(year = 2020,
codes = c("Gyeongsangnam-do", "Busan", "Ulsan"),
type = "population")
# Merge with spatial data and compute population change
sf_target <- adm2_sf_2020 |>
inner_join(df_2010_pop, by = "adm2_code") |>
inner_join(df_2020_pop, by = "adm2_code") |>
mutate(change = `all households_total_prs.y` - `all households_total_prs.x`)
## ----choropleth_1, fig.width=7, fig.height=5----------------------------------
# Choropleth map for population change
map <- ggplot(sf_target) +
geom_sf(aes(fill = change), color = "gray80", size = 0.1) +
labs(title = "Population change between 2010 and 2020") +
scale_fill_gradient2(
low = "#2C7BB6", mid = "white", high = "#D7191C",
midpoint = 0,
name = "Change"
) +
theme_void() +
annotation_scale(
location = "tr", width_hint = 0.25, text_cex = 0.7, line_width = 0.7
)
# National boundary (union of all districts)
sf_korea_boundary <- adm2_sf_2020 |>
summarise(geometry = st_union(geometry))
# Target region boundary (union of selected provinces/cities)
sf_target_boundary <- sf_target |>
summarise(geometry = st_union(geometry))
# Inset map: whole Korea + highlighted target region
korea_inset <- ggplot() +
geom_sf(data = sf_korea_boundary, fill = "grey90", color = "grey90") +
geom_sf(data = sf_target_boundary, fill = "grey30", color = "grey30") +
theme_void()
# Combine main map and inset
cowplot::ggdraw() +
cowplot::draw_plot(map) +
cowplot::draw_plot(
korea_inset, x = 0.7, y = 0.05, width = 0.25, height = 0.25
) +
draw_grob(grid::rectGrob(gp = gpar(col = "black", fill = NA, lwd = 0.6)),
x = 0.7, y = 0.05, width = 0.25, height = 0.25)
## ----filter-merge_2-----------------------------------------------------------
# Load census data
df_2020_pop <- anycensus(year = 2020,
type = "population")
df_2020_tax <- anycensus(year = 2020,
type = "tax")
# Merge population with boundaries
adm2_sf_2020_pop <- adm2_sf_2020 |>
left_join(df_2020_pop, by = "adm2_code") |>
mutate(
adm2_code_chr = as.character(adm2_code),
adm2_prefix4 = substr(adm2_code_chr, 1, 4),
last_digit = substr(adm2_code_chr, 5, 5)
)
# Aggregate smaller units (adm2_code ending not with 0) into 4-digit groups
sf_union_needed <- adm2_sf_2020_pop |>
filter(last_digit != "0") |>
group_by(adm2_prefix4) |>
summarise(
across(where(is.numeric), ~ sum(.x, na.rm = TRUE)),
geometry = st_union(geometry),
.groups = "drop"
) |>
mutate(adm2_code = as.numeric(paste0(adm2_prefix4, "0")))
# Combine aggregated units with existing "0"-ending districts
adm2_sf_2020_unioned <- adm2_sf_2020_pop |>
filter(last_digit == "0") |>
bind_rows(sf_union_needed)
# Join with tax data
sf_final <- adm2_sf_2020_unioned |>
left_join(df_2020_tax, by = "adm2_code")
## ----bivariate_2, fig.width=7, fig.height=5-----------------------------------
# Create 3x3 bivariate classes (population vs tax)
bi_data <- bi_class(
sf_final,
x = `all households_total_prs`,
y = income_general_mkr,
style = "quantile",
dim = 3
)
# Bivariate legend
legend <- bi_legend(
pal = "DkCyan", dim = 3,
xlab = "Low to High population",
ylab = "Low to High tax",
size = 7
)
# Mapping
cowplot::ggdraw() +
cowplot::draw_plot(
ggplot() +
geom_sf(data = bi_data, aes(fill = bi_class), color = NA) +
bi_scale_fill(pal = "DkCyan", dim = 3, guide = "none") +
bi_theme() +
labs(title = "Population vs Tax (2020)") +
theme(plot.title = element_text(size = 10))
) +
cowplot::draw_plot(legend, x = 0.7, y = 0.1, width = 0.3, height = 0.3)
## ----data_load_3, include=TRUE------------------------------------------------
# Load population data for the Seoul Metropolitan Area (SMA)
df_sma <- anycensus(
year = 2020,
codes = c("Seoul", "Gyeonggi-do", "Incheon"),
type = "population"
)
# Calculate sex ratio (males per 100 females)
df_sma <- df_sma |>
mutate(
sex_ratio = `all households_male_prs` / `all households_female_prs` * 100
)
# Extract overall distribution (all SMA combined)
df_all <- df_sma |>
select(sex_ratio)
## ----histogram_3, include=TRUE, fig.width=7, fig.height=3---------------------
ggplot() +
# Background: overall distribution across all SMA
geom_histogram(
data = df_all,
aes(x = sex_ratio, y = after_stat(density)),
bins = 20, fill = "grey80", color = NA, alpha = 1
) +
# Regional distributions
geom_histogram(
data = df_sma,
aes(x = sex_ratio, y = after_stat(density), fill = adm1),
bins = 20, alpha = 0.7, color = NA, position = "identity"
) +
facet_wrap(~ adm1, ncol = 3, scales = "free_y") +
scale_fill_manual(values = c(
"Seoul" = "darkorange",
"Incheon" = "purple",
"Gyeonggi-do" = "cyan4"
)) +
labs(
title = "Sex ratio distribution by region (SMA, 2020)",
x = "Sex ratio (male per 100 females)",
y = "Density"
) +
theme_bw()
## ----choropleth_3, fig.width=7, fig.height=3.5--------------------------------
# Merge SMA population data with boundaries
adm2_sf_2020_sma <- adm2_sf_2020 |>
inner_join(df_sma, by = "adm2_code")
# Choropleth map for sex ratio
ggplot(adm2_sf_2020_sma) +
geom_sf(aes(fill = sex_ratio), color = "gray", size = 0.01) +
scale_fill_gradient2(
low = "#2C7BB6", mid = "white", high = "#D7191C",
midpoint = 100, # 100 = equal male/female
name = "Sex ratio\n(males per 100 females)"
) +
labs(title = "Sex ratio in the Seoul Metropolitan Area (2020)") +
theme_void()
## ----pca_demo_1, fig.width=7.5, fig.height=7----------------------------------
library(tidycensuskr)
library(dplyr)
library(ggplot2)
library(janitor)
# Load data
sf_2020 <- data(adm2_sf_2020)
# housing
df_hou <- anycensus(year = 2020, type = "housing", level = "adm2")
df_hou <- df_hou |>
dplyr::group_by(adm1_code, adm2_code, year, type) |>
dplyr::mutate(dplyr::across(
dplyr::everything(),
~ ifelse(is.na(.), .[which(!is.na(.))], .)
)) |>
dplyr::ungroup() |>
dplyr::distinct()
# population
df_pop <- anycensus(year = 2020, type = "population", level = "adm2")
# mortality
df_mort <- anycensus(year = 2020, type = "mortality", level = "adm2")
# social security
df_ss <- anycensus(year = 2020, type = "social security", level = "adm2")
# Combine data frames
df_wide <- Reduce(
function(x, y) {
left_join(
x, y,
by = c("adm1", "adm1_code", "adm2", "adm2_code", "year")
)
},
list(
df_hou,
df_pop,
df_mort,
df_ss
)
) |>
dplyr::select(-dplyr::starts_with("type"))
# reorganize the variables by basic local governments
df_wide_re <-
df_wide |>
dplyr::mutate(adm2_code_ = paste0(substr(adm2_code, 1, 4), "0")) |>
dplyr::group_by(adm2_code_) |>
dplyr::summarize(
dplyr::across(
dplyr::matches("households|income|housing|grdp|security"),
~ sum(.x, na.rm = TRUE)
),
dplyr::across(
dplyr::matches("fertility|causes"),
~ mean(.x, na.rm = TRUE)
),
adm2 = dplyr::first(adm2)
) |>
dplyr::ungroup() |>
dplyr::transmute(
adm2_code_ = adm2_code_,
adm2 = adm2,
persons_per_housing = `all households_total_prs` / `housing types_total_cnt`,
sex_ratio = 100 * `all households_male_prs` / `all households_female_prs`,
mortality_rate = `all causes_total_p1p`,
fertility_rate = fertility_total_brt,
security_rate = 100 * (`basic living security_female_prs` + `basic living security_male_prs`) /
`all households_total_prs`
)
## ----pca_demo_2, fig.width=7.5, fig.height=7, dpi=200-------------------------
# Run PCA
prc_df <-
df_wide_re |>
dplyr::select(3:7) |>
as.data.frame() |>
prcomp(scale = TRUE)
# Rotation by variables
prc_df$rotation |> as.data.frame() |> round(3)
## ----pca_demo_3, fig.width=7.5, fig.height=7----------------------------------
# Proper labeling for biplot for BLGs
adm2labels <- paste0(df_wide_re$adm2, " (", df_wide_re$adm2_code_, ")")
rownames(prc_df$x) <- adm2labels
# Biplot with PC1 and PC2
biplot(prc_df, choices = c(1, 2), cex = 0.5, arrow.len = 0.2)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.