knitr::opts_chunk$set( collapse = TRUE, warning = FALSE, message = FALSE, comment = "#>" )
This vignette documents used how to access and explore the census data in Indonesia in 2010, using the lastest census data update 2015, and shows a few interesting varibles to visualization and exploring.
library(tidyverse) library(readxl) library(knitr) library(ggplot2) library(indoelectdata) data(IDNS2010)
in this part to answer the question which provinces has the high unemployement rate in Indonesia
data_dictionary <- data.frame( Variable = names(IDNS2010), details= c( "ID", "Provinces", "PopulationIDN", "TotalHousehold", "PopulationMen", "PopulationWomen", "Household", "Incomeff", "Incomemerchant", "Incomegov", "Incomepriv", "Incomeentrep", "Incomefreelancer", "Medianage", "RatioParticp", "Unemployment2015", "Unemployment2016", "Unemployment2017", "Unemployment2018", "PopulationProjection2010", "PopulationProjection2015", "PopulationProjection2020", "PopulationProjection2025", "PopulationProjection2030", "PopulationProjection2035", "No" ))
library(knitr) kable(data_dictionary)
So let's just look at some nice and simple plots using ggplot2
.
library(ggthemes) IDNS <- IDNS2010_download() ggplot(data=IDNS2010[1:6,], aes(x = reorder(Provinces, -Unemployment2015), y = Unemployment2015, colour = Provinces))+ geom_boxplot() + labs(x = "Provinces", y = "% Unemployment") + theme_minimal() + theme(legend.position = "none")+ coord_flip()
the results above showed that province Aceh has the high percentage for unempolyment rate with almost 15% from the population.
ggplot(data=IDNS2010[1:6,], aes(x = reorder(ID, -Incomegov), y = Incomegov, colour = ID))+ geom_boxplot() + geom_jitter(alpha = 0.35, size = 2, width = 0.3) + labs(x = "ID", y = "Incomegov (RP)") + theme_minimal() + theme(legend.position = "none")+ coord_flip()
from the result above showed that province Banten with ID area (16) has the high income in government employee with average 4,3 million rupiah/month and DKI jakarta the second province with the high income in government employee with average 4,2 million rupiah/month
in this part we want to see which provinces has the highest men population in Indonesia
ggplot(data=IDNS2010[1:6,], aes(x = reorder(Provinces, -PopulationMen), y = PopulationMen, colour = Provinces))+ geom_boxplot() + geom_jitter(alpha = 0.35, size = 2, width = 0.3) + theme_minimal() + labs(x = "Provinces", y = "Population Men") + theme(legend.position = "none") + coord_flip()
Province of Sumatera Utara has the highest men population in Indonesia
in this part we want to see which provinces has the highest women population in Indonesia
ggplot(data= IDNS2010[1:6,], aes(x = reorder(Provinces, -PopulationWomen), y = PopulationWomen, colour = Provinces))+ geom_boxplot() + geom_jitter(alpha = 0.35, size = 2, width = 0.3) + theme_minimal() + labs(x = "Provinces", y = "Population women") + theme(legend.position = "none") + coord_flip()
it's similar with men population, Sumatera utara has the highest women population
in this part to answer what the future size and structur of the population look like
library (tidyr) library(dplyr) IDNS <- IDNS2010 %>% select(PopulationProjection2010, PopulationProjection2020, PopulationProjection2025) %>% gather(key = "Projection", value = "total") ggplot(data = IDNS, aes(x = reorder(Projection, -total), y = total, colour = Projection)) + geom_jitter(alpha = 0.35, size = 2, width = 0.3) + theme_minimal() + labs(x = "population projection", y = "total")+ theme(legend.position = "none") + coord_flip()
from the populaltion projection above we can see that the population projection for future will growth in 2025
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.