library("httr")
library("tidyverse")
library("haven")
library("stringr")
# Tab separated
# URL <- "http://ws.ukdataservice.ac.uk/REST/Download/Download/DSO/7822tab_a9c0bf00b701b5c0dda6e9dbab19d913.zip"
# Stata
URL <- "http://ws.ukdataservice.ac.uk/REST/Download/Download/DSO/7822stata11_4bda01ce1faf9e42c2340e76bc12f386.zip"
dst_dir <- "data-raw/UKDA-7822-stata11"
dst_zip <- file.path("data-raw", basename(URL))
if (!dir.exists(dst_dir)) {
GET(URL, write_disk(dst_zip, overwrite = TRUE))
unzip(dst_zip, exdir = "data-raw")
unlink(dst_zip)
}
col_types <- cols(
id = col_integer(),
Union = col_character(),
Region = col_integer(),
County = col_integer(),
Type = col_integer(),
BoothGroup = col_integer(),
BoothDensity = col_double(),
Popn1871_2 = col_integer(),
MABI71 = col_integer(),
FABI71 = col_integer(),
ChABI71 = col_integer(),
MNotABI71 = col_integer(),
FNotABI71 = col_integer(),
ChNotABI71 = col_integer(),
MABO71 = col_integer(),
FABO71 = col_integer(),
ChABO71 = col_integer(),
MNotABO71 = col_integer(),
FNotABO71 = col_integer(),
ChNotABO71 = col_integer(),
MABI81 = col_integer(),
FABI81 = col_integer(),
ChABI81 = col_integer(),
MNotABI81 = col_integer(),
FNotABI81 = col_integer(),
ChNotABI81 = col_integer(),
MABO81 = col_integer(),
FABO81 = col_integer(),
ChABO81 = col_integer(),
MNotABO81 = col_integer(),
FNotABO81 = col_integer(),
ChNotABO81 = col_integer(),
Popn1881 = col_integer(),
popratio7181 = col_double(),
pauper71 = col_double(),
pauper81 = col_double(),
paupratio7181 = col_double(),
outratio71 = col_double(),
outratio81 = col_double(),
outratio7181 = col_double(),
MABI91 = col_integer(),
FABI91 = col_integer(),
ChABI91 = col_integer(),
MNotABI91 = col_integer(),
FNotABI91 = col_integer(),
ChNotABI91 = col_integer(),
MABO91 = col_integer(),
FABO91 = col_integer(),
ChABO91 = col_integer(),
MNotABO91 = col_integer(),
FNotABO91 = col_integer(),
ChNotABO91 = col_integer(),
Popn1881_2 = col_integer(),
Popn1891 = col_integer(),
popratio8191 = col_double(),
pauper91 = col_double(),
pauper81_2 = col_double(),
paupratio8191 = col_double(),
outratio91 = col_double(),
outratio8191 = col_double(),
mfab71I = col_double(),
mfab81I = col_double(),
mfab91I = col_double(),
mfab71O = col_double(),
mfab81O = col_double(),
mfab91O = col_double(),
mfnotab71I = col_double(),
mfnotab81I = col_double(),
mfnotab91I = col_double(),
mfnotab71O = col_double(),
mfnotab81O = col_double(),
mfnotab91O = col_double(),
adultchab71I = col_double(),
adultchab81I = col_double(),
adultchab91I = col_double(),
adultchnotab71I = col_double(),
adultchnotab81I = col_double(),
adultchnotab91I = col_double(),
adultchab71O = col_double(),
adultchab81O = col_double(),
adultchab91O = col_double(),
adultchnotab71O = col_double(),
adultchnotab81O = col_double(),
adultchnotab91O = col_double(),
Popn1871 = col_integer(),
PopnM1871 = col_integer(),
PopnF1871 = col_integer(),
M6571 = col_integer(),
PropM6571 = col_double(),
F6571 = col_integer(),
PropF6571 = col_double(),
PopnM1881 = col_integer(),
PopnF1881 = col_integer(),
M6581 = col_integer(),
PropM6581 = col_double(),
F6581 = col_integer(),
PropF6581 = col_double(),
PopnM1891 = col_integer(),
PopnF1891 = col_integer(),
M6591 = col_integer(),
PropM6591 = col_double(),
F6591 = col_integer(),
PropF6591 = col_double()
)
# yule <- read_tsv(file.path(dst_dir, "tab", "yule.tab"),
# col_types = col_types) %>%
yule_raw <- read_dta(file.path(dst_dir, "stata11", "yule_ip1.dta"))
name_replacements <-
names(yule_raw) %>%
str_replace("(18[789]1)$", "_\\1") %>%
str_replace("(7181|8191)$", "diff_\\1") %>%
str_replace("_7181", "_1881") %>%
str_replace("_8191", "_1891") %>%
str_replace("((?<!18)[789]1)$", "_18\\1") %>%
str_replace("(71|81|91)([IO])$", "\\2_18\\1") %>%
str_replace("(18[789]1)_2$", "2_\\1") %>%
str_replace("([789]1)_2$", "2_18\\1") %>%
{set_names(names(yule_raw), .)}
convert_labelled <- function(x) {
x[!is.finite(x)] <- NA
as.character(haven::as_factor(x))
}
yule <- yule_raw %>%
# many variables have NaN instead of NA
mutate_if(is.labelled, convert_labelled) %>%
# replace NaN with NA
mutate_if(is.numeric, funs(if_else(!is.finite(.), NA_real_, .))) %>%
# remove attributes
mutate_all(as.vector) %>%
rename_(.dots = name_replacements)
pctratiodiff <- function(x) {
z <- 100 * (x / lag(x))
# if lag(x) == 0 then z is missing
# z[!is.finite(z)] <- NA_real_
z
}
# Poor Law Union - Year data
yule %>%
gather(variable_year, value, matches("_18\\d{2}$")) %>%
select(ID, variable_year, value) %>%
separate(variable_year, c("variable", "year"), convert = TRUE) %>%
spread(variable, value) %>%
mutate(pauper2 = coalesce(pauper2, pauper),
Popn2 = coalesce(Popn2, Popn),
# proportion of total population 65 and above
Prop65 = (PopnF * PropF65 + PopnM * PropM65) / (PopnF + PopnM),
year = as.integer(year)) %>%
arrange(ID, year) %>%
group_by(ID) %>%
mutate(oldratiodiff = pctratiodiff(Prop65)) %>%
ungroup() %>%
arrange(ID, year) %>%
save_data("pauperism_year")
# Poor Law Union data
yule %>%
select(-matches("_18\\d\\d$")) %>%
save_data("pauperism_plu")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.