In choisy/censusVN2009: Shape files of Vietnam after 2008 with census 2009

knitr::knit_hooks$set(margin = function(before, options, envir) {
  if (before) par(mgp = c(1.5, .5, 0), bty = "n", plt = c(.105, .97, .13, .97))
  else NULL
})

knitr::opts_chunk$set(margin = TRUE, prompt = TRUE, comment = "",
                      collapse = TRUE, cache = FALSE,
                      dev.args = list(pointsize = 11), fig.height = 3.5,
                      fig.width = 4.24725, fig.retina = 2, fig.align = "center")

This package contains the polygons of the communes (11,163), districts (703) and provinces (63) of Vietnam after the last major administrative borders update of 2008, January 1st (essentially the merging of the provinces of Ha Tay and Ha Noi), together with the populations sizes from the 2009 census as attributes for each of the three levels (commune, district and province).

Installation and loading

You can install censusVN2009 from GitHub with:

# install.packages("devtools")
devtools::install_github("choisy/censusVN2009", build_vignettes = TRUE)

Once installed, you can load the package:

library(censusVN2009)

Usage examples

The package contains 6 SpatialPolygonsDataFrame: communes, districts, and provinces for low polygons resolution and communes_r, districts_r, and provinces_r for high polygons resolution. These objects can be loaded with the base R data function:

data(communes)
data(districts)
data(provinces)

And can be plotted with the sp plot method:

library(sp)
plot(communes)
plot(districts)
plot(provinces)

Note that the Paracel islands are included in the province of Da Nang:

plot(subset(provinces_r, province == "Da Nang"))

Likewise, the Spratley islands are included in the province of Khanh Hoa (Nha Trang):

plot(subset(provinces_r, province == "Khanh Hoa"))

We can see the number of polygons for each province:

library(magrittr)
setNames(sapply(provinces@polygons, function(x) length(x@Polygons)),
         provinces@data$province) %>%
  sort(TRUE) %>%
  head(13)

Quang Ninh and Hai Phong correspond to Ha Long bay, Kien Giang correpons to Phu Quoc, Ba Ria - Vung Tau corresponds to Can Dao, Quang Nam corresponds to Cham islands. We can trim off these islands off by extracting the largest polygon of each of these provinces:

provinces2 <- sptools::largest_polygons(provinces)
plot(provinces2)

We can decide to trim off islands only for selected provinces:

provinces3 <- sptools::largest_polygons(provinces, province == "Da Nang")
plot(provinces3)

provinces4 <- sptools::largest_polygons(provinces, province == "Khanh Hoa")
plot(provinces4)

provinces5 <- sptools::largest_polygons(provinces, province %in% c("Da Nang", "Khanh Hoa"))
plot(provinces5)

We can also fancy extracting the Paracels islands:

paracels <- sptools::rm_largest_polygons(subset(provinces_r, province == "Da Nang"))
plot(paracels)

Or the Spratley islands:

spratleys <- sptools::rm_largest_polygons(subset(provinces_r, province == "Khanh Hoa"))
plot(spratleys)

The attributes of these spatial objects are:

head(communes@data)
head(districts@data)
head(provinces@data)

And we can verify the consistency between the 3 spatial objects:

length(communes_r)
length(unique(communes_r$district_id))
length(districts_r)
setdiff(communes_r$district_id, districts_r$district_id)
length(unique(districts_r$province_id))
length(provinces_r)
setdiff(provinces_r$province_id, districts_r$province_id)

Total population size and area:

sum(provinces$population)
sum(provinces$area) / 100  # km2

Distributions of administrative areas:

hist(communes$area / 100, n = 100, col = "grey", main = NA,
     xlab = "area", ylab = "frequency")
hist(districts$area / 100, col = "grey", main = NA,
     xlab = "area", ylab = "frequency")
hist(provinces$area / 100, col = "grey", main = NA,
     xlab = "area", ylab = "frequency")

Distributions of adminitrative units' populations sizes:

hist(communes$population, n = 100, col = "grey", main = NA,
     xlab = "population size", ylab = "frequency")
hist(districts$population, col = "grey", main = NA,
     xlab = "population size", ylab = "frequency")
hist(provinces$population, n = 15, col = "grey", main = NA,
     xlab = "population size", ylab = "frequency")

Distributions of the administrative units' populations densities:

with(communes@data, hist(log10(100 * population / area), n = 100, col = "grey",
                         main = NA, xlab = "population density", ylab = "frequency"))
with(districts@data, hist(log10(100 * population / area), n = 100, col = "grey",
                         main = NA, xlab = "population density", ylab = "frequency"))
with(provinces@data, hist(log10(100 * population / area), n = 15, col = "grey",
                         main = NA, xlab = "population density", ylab = "frequency"))

Relationship between communes populations and areas:

plot(population ~ area, communes@data)
plot(population ~ area, communes@data, log = "xy")

Let's now map the populations sizes. Let's first make a palette of colors form RColorBrewer:

n <- 9
pal <- RColorBrewer::brewer.pal(n, "Blues")

Let's find a classes intervals definition:

library(classInt)
tmp <- classIntervals(districts$population, n = n, style = "quantile")
plot(tmp, pal = pal, main = NA)

Once we're satisfied with the class interval definition we can plot the map:

plot(districts, col = findColours(tmp, pal))

Same thing for the human population densities:

tmp <- classIntervals(districts$population / districts$area, n = n, style = "quantile")
plot(tmp, pal = pal, main = NA)
plot(districts, col = findColours(tmp, pal))