#devtools::install_github("ropensci/rnaturalearth")
library(rnaturalearth)
library(sf)
library(lwgeom)
library(wbstats)
library(readxl)
library(dplyr)
library(s2)
## STEP 1 Download main shape
W <- ne_countries(returnclass = "sf")
W$a3 = W$adm0_iso
W$a3[W$a3 == "PR1"] = "PRT" # portugal
W$a3[W$a3 == "SDZ"] = "SDN" # Sudan
W$a3[W$a3 == "PN1"] = "PNG" # Papua New Guinea
W$a3[W$name == "Kosovo"] = "XKX" # Kosovo
W$a3[W$name == "W. Sahara"] = "ESH"
W$a3[W$name == "Palestine"] = "PSE" # Palestine: PSX?
W$a3[W$name == "Falkland Is."] = "FLK"
W$a3[W$name == "Somaliland"] = "SOM"
# 15 and 18
which(!sf::st_is_valid(W))
# fixed 18 with:
W = st_make_valid(W)
#manually fix 15: Sudan
if (FALSE) {
# inspect
tmap_mode("view")
qtm(W[15,1]) + tm_mouse_coordinates()
}
co = st_coordinates(W$geometry[[15]])
# remove 2 spikes
co2 = co[setdiff(2:80, 48), ]
co2 = rbind(co2, co2[1,])
W$geometry[[15]][[1]][[1]] = co2[,1:2]
# also problems with Fiji (1) and Russia (19) because they cross the 180 meridian
split_multipolyon_at_180 = function(mp) {
co = st_coordinates(mp)
colist = unname(split.data.frame(co[,1:2], co[,4]))
colist = lapply(colist, function(col) {
col[,1][col[,1] < 0] = col[,1][col[,1] < 0] + 360
col
})
sfc1 = do.call(st_sfc, c(lapply(colist, function(c) {
st_polygon(list(c))
}), list(crs = 4326)))
m180 = st_sfc(st_linestring(cbind(180, c(88,0,-88))), crs = 4326)
sfc1b = st_split(sfc1, m180)
colist2 = unlist(lapply(sfc1b, function(x) {
lapply(x, function(x2) {
col = st_coordinates(x2)[,1:2]
if (any(col[,1] > 180)) {
col[,1] = col[,1] - 360
}
col
})
}), recursive = FALSE)
colist2 = lapply(colist2, function(x) {
list(x)
})
st_multipolygon(colist2)
}
W$geometry[[1]] = split_multipolyon_at_180(W$geometry[[1]])
W$geometry[[19]] = split_multipolyon_at_180(W$geometry[[19]])
# Manually fix Antarctica
mp = W$geometry[[160]]
co = st_coordinates(mp)
colist = unname(split.data.frame(co[,1:2], co[,4]))
col = colist[[8]]
mn = which.min(col[,1])
x1 = unname(col[mn, 1])
x2 = unname(col[mn+1, 1])
col = rbind(cbind(X = 180, Y = -90), col[1:mn, ], cbind(X = c(-180, 180), Y = -90))
colist[[8]] = col
colist2 = lapply(colist, function(x) {
list(x)
})
res = st_multipolygon(colist2)
W$geometry[[160]] = st_multipolygon(colist2)
##################
# area
W_detail <- ne_countries(returnclass = "sf", scale = 10)
W_detail = st_make_valid(W_detail)
W_detail = W_detail[match(W$name_long, W_detail$name_long), ]
W$area <- units::set_units(units::set_units(s2_area(W_detail), m^2), km^2)
## STEP 2 Download detailed shape to extract area size (these areas will be used as fallback source. The main source of area sizes is the World Bank (see STEP 5 below)
## STEP 3 Subset variables, check/fix ASCII incompatibility
keepVars <- c("a3", "name", "sovereignt",
"continent", "pop_est", "gdp_md", "economy", "income_grp", "area")
setdiff(keepVars, names(W))
W2 <- W[, keepVars]
nonASCII <- lapply(as.data.frame(W2[, c("name", "sovereignt",
"continent")]), function(x){
grep("I_WAS_NOT_ASCII", iconv(x, "latin1", "ASCII", sub="I_WAS_NOT_ASCII"))
})
W2$name[61] <- "Cote d'Ivoire"
W2$pop_est_dens <- W2$pop_est / as.numeric(W2$area)
#qtm(W2, fill="pop_est_dens", fill.style="kmeans")
## Clean data, and add gdp_cap_est
W2 <- W2 %>%
mutate(area_diff = NULL,
area_from_shp = NULL,
area_from_WB = NULL,
gdp_cap_est = gdp_md / pop_est * 1e6,
gdp_md_est = NULL
)
##############################################
############ Add other data sources
##############################################
##################
## HPI data
##################
# interactive map: https://happyplanetindex.org/countries/
tmpfile <- tempfile(fileext = ".xlsx")
utils::download.file("https://happyplanetindex.org/HPI_2024_public_dataset.xlsx", destfile = tmpfile)
hpi <- read_excel(tmpfile, sheet="1. All countries", skip=8, n_max = 149)
hpi = hpi[,c(3,7:10)]
names(hpi) = c("a3", "life_exp", "well_being", "footprint", "HPI")
##################
## World Bank - income inequality
##################
# https://data.worldbank.org/indicator/SI.POV.GINI
# https://blogs.worldbank.org/en/opendata/inside-the-world-bank-s-new-inequality-indicator--the-number-of-
library(WDI)
library(tidyverse)
income_inequality = WDI(indicator = "SI.POV.GINI") |>
dplyr::filter(!is.na(SI.POV.GINI)) |>
select(iso3c, year, SI.POV.GINI) |>
group_by(iso3c) |>
dplyr::filter(year == max(year)) |>
ungroup() |>
transmute(a3 = iso3c,
inequality = SI.POV.GINI)
##################
## gender inequality
##################
library(jsonlite)
# Fetch the data
df <- read.csv("https://ourworldindata.org/grapher/gender-inequality-index-from-the-human-development-report.csv?v=1&csvType=full&useColumnShortNames=true")
# Fetch the metadata
metadata <- fromJSON("https://ourworldindata.org/grapher/gender-inequality-index-from-the-human-development-report.metadata.json?v=1&csvType=full&useColumnShortNames=true")
# UNDP, Human Development Report (2024) – with minor processing by Our World in Data
gender_inequality = df |>
dplyr::filter(Code != "") |>
group_by(Code) |>
dplyr::filter(Year == max(Year)) |>
ungroup() |>
transmute(a3 = Code,
gender = gii)
##################
## press freedom
##################
# https://rsf.org/en/index
# World Press Freedom Index
tmpfile <- tempfile(fileext = ".csv")
utils::download.file("https://rsf.org/sites/default/files/import_classement/2024.csv", destfile = tmpfile)
rsf_source <- read_csv2(tmpfile)
rsf = rsf_source |>
transmute(a3 = ISO,
press = Score)
W3 = W2 |>
left_join(hpi) |>
left_join(income_inequality) |>
left_join(gender_inequality) |>
left_join(rsf)
W3$press[W3$a3 == "GRL"] = W3$press[W3$a3 == "DNK"] # according to map, Danmark data is used
W3$press[W3$name == "N. Cyprus"] = rsf_source$Score[rsf_source$Country_EN == "Northern Cyprus"] # joint country in this dataset
W3$press[W3$name == "W. Sahara"] = W3$press[W3$name == "Morocco"] # joint country in this dataset
## Tidy up
W4 <- W3 %>%
rename(iso_a3 = a3) |>
dplyr::select(iso_a3, name, sovereignt, continent, area, pop_est, pop_est_dens, economy, income_grp, gdp_cap_est,
life_exp, well_being, footprint, HPI,
inequality, gender, press, geometry) %>%
mutate(continent = factor(continent),
economy = factor(economy),
income_grp = factor(income_grp)) |>
arrange(name)
# checking data join results:
if (FALSE) {
######## HPI ###############
tm_shape(W4) +
tm_polygons("HPI", fill.scale = tm_scale_intervals(values = "rd_yl_gn"))
tm_shape(W4) +
tm_polygons("HPI", fill.scale = tm_scale_intervals(values = "classic_orange_white_blue_light", value.na = "#000000"),id= "name", hover=FALSE)
# compare with interactive map: https://happyplanetindex.org/countries/
# diff: French Guiana is apart from France
# missings:
W4[is.na(W4$HPI), ]
######## gender ###############
tm_shape(W4) +
tm_polygons("gender", fill.scale = tm_scale_intervals(breaks = seq(0,1,by =.1), values = "yl_or_rd", value.na = "#000000"))
# compare with https://ourworldindata.org/grapher/gender-inequality-index-from-the-human-development-report
# diff: French Guiana is apart from France
# missings
W4[is.na(W4$gender), ]
######## inequality ###############
tm_shape(W4) +
tm_polygons("inequality", fill.scale = tm_scale_intervals(values = "-classic_orange_white_blue_light", value.na = "#000000"),id= "name", hover=FALSE)
# compare with https://blogs.worldbank.org/en/opendata/inside-the-world-bank-s-new-inequality-indicator--the-number-of-
# missings
W4[is.na(W4$inequality), ]
######## press ###############
tm_shape(W4) +
tm_polygons("press")
# missings
W4[is.na(W4$press), ]
}
W4$iso_a3[W4$name == "N. Cyprus"] = "XNC" # just to make unique iso a3 codes
attr(W4, "agr")[] = c("identity", "identity", "constant", "constant", "aggregate",
"aggregate", "aggregate", "constant", "constant", "constant",
"constant", "constant", "constant", "constant", "constant",
"constant", "constant")
World = W4
ccodes = geodata::country_codes()
setdiff(World$iso_a3, ccodes$ISO3)
World$geometry = World |> st_geometry() |> st_sfc(precision = 1000) %>% st_as_binary %>% st_as_sfc
World = st_set_crs(World, 4326)
World$geometry = st_cast(World$geometry, "MULTIPOLYGON")
save(World, file="data/World.rda", compress="xz")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.