##' Load measles England & Wales data
##'
##' @param path Path in which to find the raw data
##' @author Sebastian Funk
##' @import reshape2
loadBryanData <- function(path) {
load("ewm")
ms.ew.4494 <- ewM4494
times <- as.numeric(rownames(ms.ew.4494)) + 1900
years <- ceiling(times)
weeks <- sapply(unique(years), function(x) sum(times > (x - 1) & times <= x))
weeks <- unlist(sapply(weeks, function(x) rep(x, x)))
ms.ew.4494$time <- parse_date_time(paste((years - 1), round((times - years + 1) * weeks), 1, sep = "-"), "%Y %W %w")
setnames(ms.ew.4494, "rn", "time")
ms.ew.4494[, time := as.numeric(as.character(time)) + 1900]
births.ew.4494 <- data.table(ewB4494, keep.rownames = TRUE)
setnames(births.ew.4494, "rn", "time")
births.ew.4494[, time := as.numeric(as.character(time)) + 1900]
deaths.ew.4494 <- data.table(ewD4494, keep.rownames = TRUE)
setnames(deaths.ew.4494, "rn", "time")
deaths.ew.4494[, time := as.numeric(as.character(time)) + 1900]
pop.ew.4494 <- data.table(ewP4494, keep.rownames = TRUE)
setnames(pop.ew.4494, "rn", "time")
pop.ew.4494[, time := as.numeric(as.character(time)) + 1900]
coordinates.ew.4494 <- data.table(ewXY4494)
save(ms.ew.4494, births.ew.4494, deaths.ew.4494, pop.ew.4494,
coordinates.ew.4494, file = "ew_4494.rdata")
}
##' Load age-specific measles England & Wales data
##'
##' @param path Path in which to find the raw data
##' @author Sebastian Funk
##' @import reshape2
loadMeaslesEWAge <- function(path) {
## bands into which we have the data split
yearbands <- c("4455", "5675", "7685", "8686", "8792", "9394", "9597")
bandnames <- sapply(yearbands, function(x) {
paste("19", substr(x, 1, 2), "-", substr(x, 3, 4), sep = "")
})
## read age-specific case data, 1944-1997 England & Wales
ms.ew.l <- list()
for (i in 1:length(yearbands)) {
ms.ew.l[[bandnames[i]]] <-
data.table(read.csv(paste(path, "/ages_", yearbands[i], ".csv",
sep = "")))
}
## time variables that we might be interested in
time.variables <- c("year", "quarter", "week")
## time variables that they all have in common (for merging later)
common.time.columns <- time.variables
## variable to hold a list of the different datasets
ms.ew.age.l <- list()
## extract year, quarter and age group data (ignoring ones of unknown age)
for (i in 1:length(ms.ew.l)) {
## extract age columns (they begin with an "x")
age.columns <- names(ms.ew.l[[i]])[grep("^X", names(ms.ew.l[[i]]))]
## extract time columns
time.columns <- intersect(time.variables, tolower(names(ms.ew.l[[i]])))
## convert colum names of time columns to lower case
time.column.numbers <- match(time.columns, tolower(names(ms.ew.l[[i]])))
setnames(ms.ew.l[[i]], time.column.numbers,
tolower(names(ms.ew.l[[i]])[time.column.numbers]))
## update common time columns
common.time.columns <- intersect(common.time.columns, time.columns)
ms.ew.l[[i]] <- ms.ew.l[[i]][, c(time.columns, age.columns), with = F]
ms.ew.age.l[[i]] <- data.table(melt(ms.ew.l[[i]], id.vars = time.columns))
setnames(ms.ew.age.l[[i]], "value", "abs.incidence")
## convert all columns to lower case
## only select variables that we are interested in
ms.ew.age.l[[i]] <- ms.ew.age.l[[i]][, lower.age.limit := NA_real_]
## extract number from original age data if there is a lower
## case m in the variable name, it's talking about month so we
## get the age by dividing by 12
ms.ew.age.l[[i]] <-
ms.ew.age.l[[i]][grep("X.*m", variable),
lower.age.limit := as.numeric(sub("\\..*", "",
sub("X", "", variable)))/12]
## extract number from original age data if there is no lower
## case m in the variable name, it's talking about years so we
## get the age directly
ms.ew.age.l[[i]] <-
ms.ew.age.l[[i]][grep("^X[^m]*$", variable),
lower.age.limit := as.numeric(sub("\\..*", "",
sub("X", "", variable)))]
## sum incidences for each age group
ms.ew.age.l[[i]] <-
ms.ew.age.l[[i]][, list(abs.incidence = sum(abs.incidence)),
by = c(time.columns, "lower.age.limit")]
## if year is given in [0, 100] add 1900
ms.ew.age.l[[i]] <-
ms.ew.age.l[[i]][year <= 100, year := as.integer(year + 1900)]
## update common age bands
if (i == 1) {
## first iteration, we start with the age bands in that group
common.lower.age.limits <- unique(ms.ew.age.l[[i]][, lower.age.limit])
} else {
common.lower.age.limits <-
intersect(common.lower.age.limits,
unique(ms.ew.age.l[[i]][, lower.age.limit]))
}
setkeyv(ms.ew.age.l[[i]], c(time.columns, "lower.age.limit"))
}
names(ms.ew.age.l) <- names(ms.ew.l)
ms.ew.list.pre1998 <- ms.ew.age.l
save(ms.ew.list.pre1998, file = "ms_ew_age_pre1998.RData")
## construct aggregate data frame
for (i in 1:length(ms.ew.age.l)) {
temp.ms.ew.age <-
ms.ew.age.l[[i]][, c(common.time.columns, "abs.incidence",
"lower.age.limit"), with = F]
## find in which interval of common lower age limits the lower
## age limits are
intervals <- findInterval(temp.ms.ew.age[, lower.age.limit],
common.lower.age.limits)
## convert them to limits
lower.limits <- common.lower.age.limits[intervals]
## assign them
temp.ms.ew.age <- temp.ms.ew.age[, lower.age.limit := lower.limits]
temp.ms.ew.age <-
temp.ms.ew.age[, list(abs.incidence = sum(abs.incidence)),
by = c(common.time.columns, "lower.age.limit")]
if (i == 1) {
ms.ew.age <- temp.ms.ew.age
} else {
ms.ew.age <- rbind(ms.ew.age, temp.ms.ew.age)
}
}
## get ECDC data (for recent E&W)
data(ms_europe_cases)
## reduce to England and Wales
ms.ew.cases.post2000 <-
ms.europe.cases[ReportingCountry == "UK" &
place != "UKM" & place != "UKN"]
## convert age to numeric
ms.ew.cases.post2000 <- ms.ew.cases.post2000[Age != "NULL"]
ms.ew.cases.post2000 <-
ms.ew.cases.post2000[, numeric.age := as.integer(as.character(Age))]
## find in which interval of common lower age limits the lower
## age limits are
intervals <- findInterval(ms.ew.cases.post2000[, numeric.age],
common.lower.age.limits)
## convert them to limits
lower.limits <- common.lower.age.limits[intervals]
## assign them
ms.ew.cases.post2000 <-
ms.ew.cases.post2000[, lower.age.limit := lower.limits]
ms.ew.post2000 <-
ms.ew.cases.post2000[, list(abs.incidence = length(numeric.age)),
by = c(common.time.columns, "lower.age.limit")]
ms.ew.age <- ms.ew.age[, source := "Registrar"]
ms.ew.post2000 <- ms.ew.post2000[, source := "ECDC"]
ms.ew.age <- rbind(ms.ew.age, ms.ew.post2000)
## get population data for relative incidence
data(pop_ew_age)
## find common lower age limits
common.lower.age.limits <- intersect(unique(pop.ew.age[, lower.age.limit]),
unique(ms.ew.age[, lower.age.limit]))
## find in which interval of common lower age limits the population
## lower age limits are
pop.intervals <- findInterval(pop.ew.age[, lower.age.limit],
common.lower.age.limits)
## convert them to limits
lower.limits <- common.lower.age.limits[pop.intervals]
## assign them
pop.ew.age <- pop.ew.age[, lower.age.limit := lower.limits]
## synthesise them
pop.ew.age <- pop.ew.age[, list(population = sum(population)),
by = list(year, lower.age.limit)]
## find in which interval of common lower age limits the chickenpox
## lower age limits are
ms.intervals <- findInterval(ms.ew.age[, lower.age.limit],
common.lower.age.limits)
## convert them to limits
lower.limits <- common.lower.age.limits[ms.intervals]
## assign them
ms.ew.age <- ms.ew.age[, lower.age.limit := lower.limits]
## synthesise them
ms.ew.age <- ms.ew.age[, list(abs.incidence = sum(abs.incidence)),
by = list(year, lower.age.limit)]
## fill missing population years with later/earlier reported data
missing.years <- setdiff(unique(ms.ew.age[, year]),
unique(pop.ew.age[, year]))
## forward loop (fill in future missing data from past years)
early.missing.years <-
missing.years[missing.years < min(unique(pop.ew.age[, year]))]
earliest.present <- pop.ew.age[year == min(year)]
for (early.year in early.missing.years) {
pop.ew.age <-
rbind(earliest.present[, year := early.year], pop.ew.age)
}
missing.years <- setdiff(missing.years, early.missing.years)
for (missing.year in missing.years) {
closest.year <- max(pop.ew.age[year < missing.year, year])
closest.present <- pop.ew.age[year == closest.year]
pop.ew.age <-
rbind( pop.ew.age, closest.present[, year := missing.year])
}
setkey(pop.ew.age, year, lower.age.limit)
setkey(ms.ew.age, year, lower.age.limit)
## now, merge them
ms.ew.age <- merge(ms.ew.age, pop.ew.age)
ms.ew.age <- ms.ew.age[, rel.incidence := abs.incidence / population]
setkeyv(ms.ew.age, c(common.time.columns, "lower.age.limit"))
save(ms.ew.age, file = "ms_ew_age.RData")
}
##' Load measles Europe data
##'
##' @param path Path in which to find the raw data
##' @author Sebastian Funk
##' @import ISOweek
loadMeaslesEurope <- function(path) {
## ECDC case data
ms.europe.cases <-
data.table(read.csv(paste(path, "/ecdc_measles.csv", sep = "")))
## get best possible spatial resolution
ms.europe.cases <- ms.europe.cases[, place := PlaceOfNotification]
ms.europe.cases <- ms.europe.cases[place == "UNK", place := PlaceOfResidence]
ms.europe.cases <- ms.europe.cases[place == "NULL", place := PlaceOfResidence]
ms.europe.cases <- ms.europe.cases[place == "UNK", place := ReportingCountry]
ms.europe.cases <- ms.europe.cases[place == "NULL", place := ReportingCountry]
## set date
ms.europe.cases <- ms.europe.cases[, date := as.Date(DateUsedForStatisticsISO)]
ms.europe.cases <-
ms.europe.cases[grep("W", DateUsedForStatisticsISO),
date := ISOweek2date(paste(DateUsedForStatisticsISO,
1, sep = "-"))]
## set date parts
ms.europe.cases <-
ms.europe.cases[, week := as.integer(substr(DateUsedForStatisticsWeek, 6, 7))]
ms.europe.cases <-
ms.europe.cases[, month := as.integer(DateUsedForStatisticsMonth)]
ms.europe.cases <-
ms.europe.cases[, quarter := as.integer(DateUsedForStatisticsQuarter)]
ms.europe.cases <-
ms.europe.cases[, year := as.integer(DateUsedForStatisticsYear)]
setkey(ms.europe.cases, date)
save(ms.europe.cases, file = "measles_europe_cases.RData")
## spatial data
}
##' Load measles WHO data
##'
##' @param path Path in which to find the raw data
##' @author Sebastian Funk
##' @import lubridate
loadMeaslesWHO <- function(path) {
## denominator for the relative incidence data
denominator <- 1e+5
## load data and define useful variables
ms.who.cases <-
data.table(read.csv(paste(path, "/who_measles.csv", sep = "")))
ms.who.cases <-
ms.who.cases[, rash.date := as.Date(DRash, "%d/%m/%Y %H:%M")]
ms.who.cases <- ms.who.cases[, week := isoweek(rash.date)]
ms.who.cases <- ms.who.cases[, year := year(rash.date)]
ms.who.cases <-
ms.who.cases[, ISOweek := paste(year, "-W",
sprintf("%02i", week), "-1", sep="")]
ms.who.cases <- ms.who.cases[, week.date := ISOweek2date(ISOweek)]
setnames(ms.who.cases, c("ISO3", "Region", "AgeAtRashOnset", "NumOfVaccines"),
c("country", "region", "age", "nvaccines"))
save(ms.who.cases, file = "ms_who_cases.RData")
## store only those that have a sensible data (100 seems to
## indicate unknown)
ms.who.age.cases <- ms.who.cases[age >= 0 & age < 100]
## get population data (for relative incidences)
data(pop_world_age)
common.lower.age.limits <-
intersect(unique(pop.world.age[, lower.age.limit]),
unique(ms.who.age.cases[, lower.age.limit]))
## find the age groups that cases are in
ms.intervals <- findInterval(ms.who.age.cases[, age],
common.lower.age.limits)
## convert them to limits
ms.lower.limits <- common.lower.age.limits[intervals]
## assign
ms.who.age.cases <- ms.who.age.cases[, lower.age.limit := ms.lower.limits]
save(ms.who.age.cases, file = "ms_who_age_cases.RData")
## find the age groups that cases are in
pop.intervals <- findInterval(pop.world.age[, lower.age.limit],
common.lower.age.limits)
## convert them to limits
pop.lower.limits <- common.lower.age.limits[pop.intervals]
## assign
pop.world.age <- pop.world.age[, lower.age.limit := pop.lower.limits]
## reduce
pop.world.age <-
pop.world.age[, list(population = sum(both.sexes.population)),
by = list(country, lower.age.limit)]
ms.who.age <-
ms.who.age.cases[, list(abs.incidence = length(DRYEAR)),
by = list(country, week.date, year, week,
lower.age.limit)]
setkey(ms.who.age, country, lower.age.limit)
setkey(pop.world.age, country, lower.age.limit)
ms.who.age <- merge(ms.who.age, pop.world.age, allow.cartesian = T)
ms.who.age <- ms.who.age[, rel.incidence := abs.incidence * denominator /
population]
ms.who.age[, population := NULL]
setkey(ms.who.age, country, year, week)
save(ms.who.age, file = "ms_who_age.RData")
}
##' Load measles serology data
##'
##' @param path Path in which to find the raw data
##' @author Sebastian Funk
loadMeaslesSerology <- function(path) {
## get 2000 serology data
ms.sero <-
data.table(read.csv(paste(path, "/measles_serology.csv", sep = "")))
setnames(ms.sero, names(ms.sero), tolower(names(ms.sero)))
ms.sero <- ms.sero[, exact.age := as.numeric(as.character(exact.age))]
ms.sero <- ms.sero[, non.negative := 1]
ms.sero <- ms.sero[stdres == "NEG", non.negative := 0]
setkey(ms.sero, country, age1)
save(ms.sero, file = "ms_sero.RData")
}
##' Load measles serology data from Fine(1950)
##'
##' @param path Path in which to find the raw data
##' @author Sebastian Funk
loadMeaslesSerologyFine1950 <- function(path) {
## get 2000 serology data
ms.sero.temp <-
data.table(read.csv(paste(path, "/measles_serology_1950_fine.csv", sep = "")))
ms.sero.temp <- rbind(data.table(age = 0, immunity = 1), ms.sero)
ms.approx <- approx(ms.sero.temp[, age], ms.sero.temp[, immunity], 1:20)
ms.sero.fine <- data.table(matrix(unlist(ms.approx), ncol = 2))
setnames(ms.sero.fine, 1:2, names(ms.sero.temp))
ms.sero.fine <- ms.sero.fine[, immunity := immunity / max(immunity)]
setkey(ms.sero.fine, age)
save(ms.sero.fine, file = "ms_sero_fine_1950.RData")
}
##' Load MMR world data
##'
##' @param path Path in which to find the raw data
##' @author Sebastian Funk
##' @import reshape2
##' @importFrom readxl read_excel
loadMMRWorld <- function(path) {
clean_mcv_data <- function(workbook, sheet)
{
mcv.table <- data.table(read_excel(workbook, sheet))
setnames(mcv.table, names(mcv.table), tolower(names(mcv.table)))
setnames(mcv.table, "cname", "country")
mcv.empty.columns <- apply(mcv.table, 2, function(x) {all(is.na(x))})
if (any(mcv.empty.columns))
{
mcv.table <-
mcv.table[, !mcv.empty.columns, with = F]
}
mcv.region <-
grep("region", colnames(mcv.table), value = TRUE)
setnames(mcv.table, mcv.region, "who_region")
mcv.long <-
data.table(melt(mcv.table,
id.vars = c("who_region", "iso_code", "country", "vaccine")))
setnames(mcv.long, "variable", "year")
setnames(mcv.long, "value", "uptake")
mcv.long <- mcv.long[, year := as.integer(gsub("x", "", year))]
mcv.long <- mcv.long[!is.na(uptake)]
mcv.long <- clean_countries(mcv.long)
return(mcv.long)
}
mcv1.world <- clean_mcv_data(paste(path, "/coverage_series.xls", sep = ""), "MCV")
mcv2.world <- clean_mcv_data(paste(path, "/coverage_series.xls", sep = ""), "MCV2")
mcv.world <- rbind(mcv1.world, mcv2.world)
save(mcv.world, file = "mcv_world.RData")
mcv1.world.estimate <- clean_mcv_data(paste(path, "/coverage_estimates_series.xls", sep = ""), "MCV")
mcv2.world.estimate <- clean_mcv_data(paste(path, "/coverage_estimates_series.xls", sep = ""), "MCV2")
mcv.world.estimate <- rbind(mcv1.world.estimate, mcv2.world.estimate)
save(mcv.world.estimate, file = "mcv_world_estimate.RData")
}
##' Clean country names
##'
##' @param table data table which contains country names
##' @param column the column with country names
##' @return cleaned data table
##' @author Sebastian Funk
clean_countries <- function(table, column = "country")
{
dt <- data.table(table)
dt[get(column) == "Czech Republic (the)", paste(column) := "Czech Republic"]
dt[get(column) == "Netherlands (the)", paste(column) := "Netherlands"]
dt[get(column) == "Moldova", paste(column) := "Rep of Moldova"]
dt[get(column) == "Republic of Moldova (the)", paste(column) := "Rep of Moldova"]
dt[get(column) == "Russia", paste(column) := "Russian Federation"]
dt[get(column) == "Russian Federation (the)",
paste(column) := "Russian Federation"]
dt[get(column) == "Macedonia", paste(column) := "FYR Macedonia"]
dt[get(column) == "The former Yugoslav Republic of Macedonia",
paste(column) := "FYR Macedonia"]
dt[get(column) == "United Kingdom of Great Britain and Northern Ireland (the)",
paste(column) := "United Kingdom"]
return(dt)
}
##' Load MMR England & Wales data
##'
##' @param path Path in which to find the raw data
##' @author Sebastian Funk
loadMMREW <- function(path){
## join vaccination data
vaccine.ew <-
data.table(read.csv(paste(path, "/vaccine_ew.csv", sep = "")))
setnames(vaccine.ew, "X", "year")
save(vaccine.ew, file = "vaccine_ew.RData")
}
##' Load Chickenpox England & Wales age data
##'
##' @param path Path in which to find the raw data
##' @author Sebastian Funk
loadChickenpoxEWAge <- function(path) {
## denominator for the absolute incidence data
denominator <- 1e+5
## age groups
agegroups <- c("0_4", "5_14", "15_44", "45_64", "65_")
cpx.ew.age <- data.table(week = integer(0), year = integer(0),
agegroup = character(0),
rel.incidence = numeric(0))
## read in files
for (age in agegroups) {
temp.cpx.ew.age <-
data.table(read.csv(paste(path, "/cpx_data_", age, ".csv", sep = ""),
header = T, row.names = 1))
for (i in 1:ncol(temp.cpx.ew.age)) {
year <- as.integer(sub("X", "", names(temp.cpx.ew.age)[i]))
incidence <- unlist(unname(temp.cpx.ew.age[, i, with = F]))
cpx.ew.age <-
rbind(cpx.ew.age,
data.frame(week = as.integer(rownames(temp.cpx.ew.age)),
year = year,
agegroup = age,
rel.incidence = incidence / denominator))
}
}
## clean up data set and define useful variables
cpx.ew.age <- cpx.ew.age[!is.na(rel.incidence)]
cpx.ew.age <-
cpx.ew.age[, lower.age.limit := as.numeric(sub("_.*", "", agegroup))]
## get population data for absolute incidence
data(pop_ew_age)
## determine lower age limits of chickenpox data
lower.age.limits <- unique(cpx.ew.age[, lower.age.limit])
## find in which interval of lower age limits the population lower age limits
pop.intervals <- findInterval(pop.ew.age[, lower.age.limit],
lower.age.limits)
## convert them to limits
lower.limits <- lower.age.limits[pop.intervals]
## assign them
pop.ew.age <-
pop.ew.age[, lower.age.limit := lower.limits]
pop.ew.age <-
pop.ew.age[, list(population = sum(population)),
by = list(year, lower.age.limit)]
setkey(cpx.ew.age, year, week, lower.age.limit)
## fill missing population years with later/earlier reported data
missing.years <- setdiff(unique(cpx.ew.age[, year]),
unique(pop.ew.age[, year]))
## forward loop (fill in future missing data from past years)
early.missing.years <-
missing.years[missing.years < min(unique(pop.ew.age[, year]))]
earliest.present <- pop.ew.age[year == min(year)]
for (early.year in early.missing.years) {
pop.ew.age <-
rbind(earliest.present[, year := early.year], pop.ew.age)
}
missing.years <- setdiff(missing.years, early.missing.years)
for (missing.year in missing.years) {
closest.year <- max(pop.ew.age[year < missing.year, year])
closest.present <- pop.ew.age[year == closest.year]
pop.ew.age <-
rbind( pop.ew.age, closest.present[, year := missing.year])
}
setkey(pop.ew.age, year, lower.age.limit)
## merge chickenpox data with population data
cpx.ew.age <- merge(cpx.ew.age, pop.ew.age)
cpx.ew.age <- cpx.ew.age[, abs.incidence :=
round(rel.incidence * population)]
cpx.ew.age <- cpx.ew.age[, agegroup := NULL]
setkey(cpx.ew.age, year, week, lower.age.limit)
save(cpx.ew.age, file = "cpx_ew_age.RData")
## overall data
cpx.ew <- data.table(week = integer(0), year = integer(0),
rel.incidence = numeric(0))
## read in files
temp.cpx.ew <-
data.table(read.csv(paste(path, "/cpx_data_all.csv", sep = ""),
header = T, row.names = 1))
for (i in 1:ncol(temp.cpx.ew)) {
year <- as.integer(sub("X", "", names(temp.cpx.ew)[i]))
incidence <- unlist(unname(temp.cpx.ew[, i, with = F]))
cpx.ew <-
rbind(cpx.ew, data.frame(week = as.integer(rownames(temp.cpx.ew)),
year = year,
rel.incidence = incidence / denominator))
}
## clean up data set and define useful variables
cpx.ew <- cpx.ew[!is.na(rel.incidence)]
setkey(cpx.ew, year, week)
## get population data for absolute incidence
data(pop_ew)
## fill missing population years with later/earlier reported data
missing.years <- setdiff(unique(cpx.ew[, year]),
unique(pop.ew[, year]))
## forward loop (fill in future missing data from past years)
early.missing.years <-
missing.years[missing.years < min(unique(pop.ew[, year]))]
earliest.present <- pop.ew[year == min(year)]
for (early.year in early.missing.years) {
pop.ew <-
rbind(earliest.present[, year := early.year], pop.ew)
}
missing.years <- setdiff(missing.years, early.missing.years)
for (missing.year in missing.years) {
closest.year <- max(pop.ew[year < missing.year, year])
closest.present <- pop.ew[year == closest.year]
pop.ew <-
rbind(pop.ew, closest.present[, year := missing.year])
}
setkey(pop.ew, year)
## merge chickenpox data with population data
cpx.ew <- merge(cpx.ew, pop.ew)
cpx.ew <- cpx.ew[, abs.incidence :=
round(rel.incidence * population)]
setkey(cpx.ew, year, week)
save(cpx.ew, file = "cpx_ew.RData")
}
##' Load Chickenpox serology data
##'
##' @param path Path in which to find the raw data
##' @author Sebastian Funk
loadChickenpoxSerology <- function(path) {
cpx.sero <-
data.table(read.csv(paste(path, "/cpx_serology.csv", sep = "")))
cpx.sero <- melt(cpx.sero, id.vars = "year")
cpx.sero <- cpx.sero[, lower.age.limit := as.numeric(gsub("X", "", variable))]
cpx.sero <- cpx.sero[, antibodies := as.integer(gsub("/.*$", "", value))]
cpx.sero <- cpx.sero[, samples := as.integer(gsub("^.*/", "", value))]
cpx.sero <- cpx.sero[, variable := NULL]
cpx.sero <- cpx.sero[, value := NULL]
setkey(cpx.sero, year, lower.age.limit)
save(cpx.sero, file = "cpx_sero.RData")
}
##' Load Copenhagen disease data
##'
##' @param path Path in which to find the raw data
##' @author Sebastian Funk
##' @import reshape2
loadCopenhagenDisease <- function(path) {
## load csv file
cph <- data.table(read.csv(paste(path, "/4other_cph_diseases.csv",
sep = "")))
## get columns in csv file that correspond to diseases
disease.columns <- which(grepl("^[^(X|tot|date)]", names(cph)))
disease.names <- tolower(names(cph)[disease.columns])
## for the loop later
disease.columns <- c(disease.columns, ncol(cph) + 1)
date.column <- which(grepl("^date", names(cph)))
## get date
setnames(cph, names(cph), as.character(unlist(unname(cph[1]))))
setnames(cph, date.column, "date")
cph <- cph[-1]
cph.disease.total.l <- list()
for (i in 1:length(disease.names)) {
cph.disease.total.l[[disease.names[i]]] <-
cph[, c("date", "tot"), with = F]
cph.disease.total.l[[i]] <-
cph.disease.total.l[[i]][, cases := as.integer(as.character(tot))]
cph.disease.total.l[[i]] <-
cph.disease.total.l[[i]][!is.na(cases)]
cph.disease.total.l[[i]] <-
cph.disease.total.l[[i]][, name := disease.names[i]]
cph.disease.total.l[[i]] <-
cph.disease.total.l[[i]][, list(date, cases, name)]
}
if (length(cph.disease.total.l) > 0) {
cph.disease.total <- cph.disease.total.l[[1]]
for (disease in names(cph.disease.total.l)[-1]) {
cph.disease.total <-
rbind(cph.disease.total, cph.disease.total.l[[disease]])
}
}
setkey(cph.disease.total, date)
save(cph.disease.total, file = "cph_disease.RData")
cph.disease.total <- cph.disease.total.l[[1]]
for (i in names(cph.disease.total.l)[-1]) {
cph.disease.total <- rbind(cph.disease.total, cph.disease.total.l[[i]])
}
## get different diseases
cph.disease.age.l <- list()
for (i in seq(1, length(disease.columns) - 1)) {
cph.disease.age.l[[disease.names[[i]]]] <-
cph[, c(date.column,
seq(disease.columns[i], disease.columns[i + 1] - 1)),
with = F]
## get age groups
cph.disease.age.l[[i]] <-
cph.disease.age.l[[i]][, name := disease.names[i]]
cph.disease.age.l[[i]] <-
data.table(melt(cph.disease.age.l[[i]], id.vars=c("date", "name")))
cph.disease.age.l[[i]] <-
cph.disease.age.l[[i]][value != ""]
cph.disease.age.l[[i]] <-
cph.disease.age.l[[i]][, cases := as.integer(as.character(value))]
cph.disease.age.l[[i]] <-
cph.disease.age.l[[i]][, date := as.Date(date, format="%m/%d/%Y")]
setnames(cph.disease.age.l[[i]], "variable", "agegroup")
cph.disease.age.l[[i]] <-
cph.disease.age.l[[i]][grepl("[[:digit:]]", agegroup)]
cph.disease.age.l[[i]] <-
cph.disease.age.l[[i]][, lower.age.limit := 0]
agegroups <- as.character(unique(cph.disease.age.l[[i]][, agegroup]))
for (current.agegroup in agegroups) {
if (!grepl("^<", current.agegroup)) {
digits <- regexpr("[[:digit:]]+", current.agegroup)
if (digits > 0) {
limit <- regmatches(current.agegroup, digits)
cph.disease.age.l[[i]] <-
cph.disease.age.l[[i]][agegroup == current.agegroup,
lower.age.limit := as.numeric(limit)]
}
}
}
## sum within age groups
cph.disease.age.l[[i]] <-
cph.disease.age.l[[i]][, list(cases = sum(cases)),
by = list(date, name,
lower.age.limit)]
## clean
cph.disease.age.l[[i]] <- cph.disease.age.l[[i]][!is.na(cases)]
}
if (length(cph.disease.age.l) > 0) {
cph.disease.age <- cph.disease.age.l[[1]]
for (disease in names(cph.disease.age.l)[-1]) {
cph.disease.age <-
rbind(cph.disease.age, cph.disease.age.l[[disease]])
}
}
setkey(cph.disease.age, date, name, lower.age.limit)
save(cph.disease.age, file = "cph_disease_age.RData")
}
##' Load Measles England & Wales confirmation data
##'
##' @param path Path in which to find the raw data
##' @author Sebastian Funk
loadMeaslesEWConfirmation <- function(path) {
ms.ew.conf <-
data.table(read.csv(paste(path, "/hpa_confirmations.csv", sep = "")))
setnames(ms.ew.conf, names(ms.ew.conf), tolower(names(ms.ew.conf)))
ms.ew.conf <-
ms.ew.conf[, percent.positive := number.confirmed / number.tested]
ms.ew.conf <- ms.ew.conf[, quarter := as.integer(substr(quarter, 1, 1))]
setkey(ms.ew.conf, year, quarter)
save(ms.ew.conf, file = "ms_ew_conf.RData")
}
##' Load population England & Wales age data
##'
##' @param path Path in which to find the raw data
##' @author Sebastian Funk
##' @import reshape2
loadPopulationEWAge <- function(path, mult.factor = 1000) {
countries <- c("england", "wales")
## first dataset
for (i in 1:length(countries)) {
## read file
temp.pop.ew.age <-
data.table(read.csv(paste(path, "/pop_age_", countries[i],
".csv", sep = ""),
header = T), country = countries[i])
## melt
temp.pop.ew.age <- melt(temp.pop.ew.age, id.vars = c("year", "country"))
## start new data frame if first country, otherwise append
if (i == 1) {
pop.ew.age <- temp.pop.ew.age
} else {
pop.ew.age <- rbind(pop.ew.age, temp.pop.ew.age)
}
}
## convert age classses to lower age limits
pop.ew.age <-
pop.ew.age[grep("^X[^m]*$", variable),
lower.age.limit := as.numeric(sub("\\..*", "",
sub("X", "", variable)))]
## sum by age year and age group, multiply by factor according to
## how the data is stored
pop.ew.age <-
pop.ew.age[, list(population = sum(value) * mult.factor),
by = list(year, lower.age.limit)]
## clean up
pop.ew.age <- pop.ew.age[!is.na(lower.age.limit)]
pop.ew.age <- pop.ew.age[!is.na(population)]
## second dataset
pop.ew.age.2 <-
data.table(read.csv(paste(path, "/pop_old.csv", sep = "")))
pop.ew.age.2[, 2:nrow(pop.ew.age.2)] <-
as.data.table(lapply(pop.ew.age.2[, 2:nrow(pop.ew.age.2), with = F],
as.numeric))
setnames(pop.ew.age.2, "Year", "lower.age.limit")
pop.ew.age.2 <- melt(pop.ew.age.2[c(4:8, 10:nrow(pop.ew.age.2))],
id.vars = "lower.age.limit", variable.name = "year",
value.name = "population")
pop.ew.age.2 <- pop.ew.age.2[, year := as.integer(gsub("^X", "", year))]
pop.ew.age.2 <-
pop.ew.age.2[, lower.age.limit := as.integer(as.character(lower.age.limit))]
pop.ew.age.2 <- pop.ew.age.2[, population := as.integer(mult.factor * population)]
pop.ew.age.2[, lower.age.limit :=
reduce.agegroups(lower.age.limit,
unique(pop.ew.age[, lower.age.limit]))]
pop.ew.age.2 <- pop.ew.age.2[, list(population = sum(population)),
by = list(year, lower.age.limit)]
pop.ew.age <- rbind(pop.ew.age.2, pop.ew.age)
setkey(pop.ew.age, year, lower.age.limit)
save(pop.ew.age, file = "pop_ew_age.RData")
pop.ew <- pop.ew.age[, list(population = sum(population)), by = list(year)]
save(pop.ew, file = "pop_ew.RData")
}
##' Load population Europe data
##'
##' @param path Path in which to find the raw data
##' @author Sebastian Funk
loadPopulationEurope <- function(path) {
## load European population CSV
pop.europe <-
data.table(read.csv(paste(path, "/nuts_populations.csv", sep = "")))
setnames(pop.europe, "TIME", "year")
## clean data
pop.europe <- pop.europe[, place := as.character(NUTS)]
pop.europe <- pop.europe[!(place == "")]
## fill missing data in both directions
## mark filled data in the data
pop.europe <- pop.europe[, missing := F]
years <- unique(pop.europe[, year])
## forward loop (fill in future missing data from past years)
for (i in 2:(length(years))) {
missing.places <- pop.europe[Value == ":" & year == years[i], place]
pop.europe[place %in% missing.places, missing := T]
## if places have missing data, look up the previous year in data
lookup.previous.year <-
pop.europe[place %in% missing.places & year == years[i - 1], Value]
pop.europe <- pop.europe[place %in% missing.places & year == years[i],
Value := lookup.previous.year]
}
## reverse loop (fill in past missing data from future years)
for (i in rev(1:(length(years) - 1))) {
missing.places <- pop.europe[Value == ":" & year == years[i], place]
## if places have missing data, look up the next year in data
pop.europe[place %in% missing.places, missing := T]
lookup.next.year <-
pop.europe[place %in% missing.places & year == years[i + 1], Value]
pop.europe <- pop.europe[place %in% missing.places & year == years[i],
Value := lookup.next.year]
}
pop.europe <- pop.europe[, population := as.numeric(gsub(" ", "", Value))]
pop.europe <- pop.europe[, list(year, place, population)]
setkey(pop.europe, year, place)
save(pop.europe, file = "pop_europe.RData")
}
##' Load population world data
##'
##' @param path Path in which to find the raw data
##' @author Sebastian Funk
loadPopulationWorldAge <- function(path) {
## load European population CSV
pop.world.age <-
data.table(read.csv(paste(path, "/country_age_structure.csv",
sep = "")))
setnames(pop.world.age, names(pop.world.age), tolower(names(pop.world.age)))
## get lower age limits
lower.limits <- sub("[-+].*$", "", pop.world.age[, age])
pop.world.age <- pop.world.age[, lower.age.limit := as.numeric(lower.limits)]
pop.world.age <- pop.world.age[!is.na(lower.age.limit)]
pop.world.age <- clean_countries(pop.world.age)
save(pop.world.age, file = "pop_world_age.RData")
}
##' Load demographic England & Wales data
##'
##' @param path Path in which to find the raw data
##' @author Sebastian Funk
##' @importFrom readxl read_excel
loadDemographicsEW <- function(path) {
demo_sheets <- list(births = "Live births", deaths = "Deaths")
for (sheet in names(demo_sheets))
{
demo.ew <- read_excel(paste(path, "annualreferencetablesummer2015_tcm77-416007.xlsx", sep = "/"), sheet = demo_sheets[[sheet]])
header_found <- FALSE
n <- 0
while (!header_found )
{
n <- n + 1
col <- grep("England and Wales", demo.ew[n, ])
if (length(col) > 0)
{
header_found <- TRUE
}
}
demo.ew <- demo.ew[seq(n + 1, nrow(demo.ew)), c(1, min(col))]
colnames(demo.ew) <- c("year", sheet)
demo.ew$year <- as.integer(demo.ew$year)
demo.ew[[sheet]] <- as.integer(demo.ew[[sheet]])
demo.ew <- demo.ew[!is.na(demo.ew$year), ]
var_name <- paste(sheet, "ew", sep = ".")
assign(var_name, data.table(demo.ew))
save(list = var_name, file = paste0(sheet, "_ew.RData"))
}
}
##' Load POLYMOD data
##'
##' @param path Path in which to find the raw data
##' @author Sebastian Funk
loadPolymod <- function(path) {
## read in participant data
participants <-
fread(paste(path, "/participants_final_v3.csv", sep = ""))
## read in contact data
contacts <-
fread(paste(path, "/contacts_final_v2_with_weights.csv", sep = ""))
participants[country == "BE", country := "Belgium"]
participants[country == "DE", country := "Germany"]
participants[country == "FI", country := "Finland"]
participants[country == "GB", country := "United Kingdom"]
participants[country == "IT", country := "Italy"]
participants[country == "LU", country := "Luxembourg"]
participants[country == "NL", country := "Netherlands"]
participants[country == "PL", country := "Poland"]
participants[, country := factor(country)]
contacts[country == "BE", country := "Belgium"]
contacts[country == "DE", country := "Germany"]
contacts[country == "FI", country := "Finland"]
contacts[country == "GB", country := "United Kingdom"]
contacts[country == "IT", country := "Italy"]
contacts[country == "LU", country := "Luxembourg"]
contacts[country == "NL", country := "Netherlands"]
contacts[country == "PL", country := "Poland"]
contacts[, country := factor(country)]
polymod <- list(participants = participants, contacts = contacts)
save(polymod, file = "polymod.RData")
}
##' Load European maps
##'
##' @param path Path in which to find the raw data
##' @author Sebastian Funk
##' @import maptools ggplot2
loadEuropeanMaps <- function(shapePath) {
## prepare map of Europe
europe.shape <-
readShapePoly(paste(shapePath, "/NUTS_RG_60M_2010.shp", sep = ""))
europe.data <- data.table(europe.shape@data)
setnames(europe.data, "NUTS_ID", "place")
setkey(europe.data, "place")
gpclibPermit()
europe <- data.table(fortify(europe.shape, region = "NUTS_ID"))
setnames(europe, "id", "place")
setkey(europe, "place")
europe <- merge(europe, europe.data, by="place")
save(europe, file = "europe.RData")
}
##' Load MMR European timing data
##'
##' @param path Path in which to find the raw data
##' @author Sebastian Funk
loadMMREuropeTimings <- function(path) {
## read in vaccination timings
mmr.timings <-
data.table(read.csv(paste(path, "/europe_mmr_timings.csv", sep = "")))
for (coverage in c("mcv1", "mcv2")) {
temp.mmr.timings <-
unlist(strsplit(as.character(mmr.timings[, get(coverage)]), " "))
dt.mmr.timings <- data.table(matrix(temp.mmr.timings, ncol = 2, byrow = T))
setnames(dt.mmr.timings, "V1", "number")
setnames(dt.mmr.timings, "V2", "unit")
dt.mmr.timings <-
dt.mmr.timings[, min := as.numeric(gsub("-.*$", "",
as.character(dt.mmr.timings[, number])))]
dt.mmr.timings <-
dt.mmr.timings[, max := as.numeric(gsub("^.*-", "",
as.character(dt.mmr.timings[, number])))]
dt.mmr.timings[grep("month", unit), min := min / 12]
dt.mmr.timings[grep("month", unit), max := max / 12]
setnames(dt.mmr.timings, "min", paste(coverage, "min", sep = "."))
setnames(dt.mmr.timings, "max", paste(coverage, "max", sep = "."))
dt.mmr.timings <- dt.mmr.timings[, number := NULL]
dt.mmr.timings <- dt.mmr.timings[, unit := NULL]
mmr.timings <- cbind(mmr.timings, dt.mmr.timings)
}
mmr.timings <- clean_countries(mmr.timings)
save(mmr.timings, file = "europe_mmr_timings.RData")
}
##' Load measles world data
##'
##' @param path Path in which to find the raw data
##' @importFrom readxl read_excel
##' @author Sebastian Funk
loadWorldMeaslesData <- function(path) {
## read in world measles data
incidence.workbook <- paste0(path, "/who_incidence.xlsx")
ms.world <- data.table(read_excel(incidence.workbook, "Measles"))
setnames(ms.world, names(ms.world), tolower(names(ms.world)))
ms.world[, disease := NULL]
ms.world <-
data.table(melt(ms.world, id.vars = c("who_region", "iso_code", "cname")))
ms.world[, variable := as.integer(sub("^x", "", variable))]
setnames(ms.world, "cname", "country")
setnames(ms.world, "variable", "year")
setnames(ms.world, "value", "cases")
ms.world <- clean_countries(ms.world)
save(ms.world, file = "ms_world.RData")
}
##' Load SIA data
##'
##' @param path Path in which to find the raw data
##' @importFrom readxl read_excel
##' @author Sebastian Funk
loadSIAData <- function(path)
{
sia.workbook <- paste0(path, "/RVC-vaccination-info-from-JRF-WHO-HQ.xlsx")
sia <- data.table(read_excel(sia.workbook, "SIA 2000-2014"))
setnames(sia, colnames(sia), tolower(colnames(sia)))
sia <- clean_countries(sia)
for (col in colnames(sia))
{
if ("character" %in% class(sia[, get(col)]))
{
sia[, paste(col) := factor(get(col))]
} else if ("POSIXct" %in% class(sia[, get(col)]))
{
sia[, paste(col) := as.Date(get(col))]
}
}
sia[, age.lower := sub("-.*$", "", age.group)]
sia[, age.upper := sub("^.*-", "", age.group)]
sia <- sia[grep("^<", age.lower), age.lower := "0"]
sia <- sia[, age.lower := gsub("\\+", "", age.lower)]
sia <- sia[grep(" M", age.lower), months := TRUE]
sia <- sia[, age.lower := gsub(" M", "", age.lower)]
sia <- sia[, age.lower := gsub(" Y", "", age.lower)]
sia <- sia[, age.lower := as.integer(age.lower)]
sia <- sia[months == TRUE, age.lower := as.integer(age.lower / 12)]
sia <- sia[, age.upper := gsub("<", "", age.upper)]
sia <- sia[grep("\\+$", age.upper), age.upper := "123"]
sia <- sia[grep(" Y", age.upper), months := FALSE]
sia <- sia[grep(" M", age.upper), months := TRUE]
sia <- sia[, age.upper := gsub(" M", "", age.upper)]
sia <- sia[, age.upper := gsub(" Y", "", age.upper)]
sia <- sia[, age.upper := as.integer(age.upper)]
sia <- sia[months == TRUE, age.upper := as.integer(age.upper / 12)]
sia[, months := NULL]
sia[is.na(age.lower) & is.na(age.upper) & age.group == "School-age",
c("age.lower", "age.upper") := list(6, 18)]
sia <- clean_countries(sia)
save(sia, file = "sia.RData")
}
##' Load SIA data
##'
##' @param path Path in which to find the raw data
##' @import reshape2
##' @importFrom readxl read_excel
##' @author Sebastian Funk
loadRVCVaccinationData <- function(path)
{
rvc.workbook <- paste0(path, "/Historical-coverages-reported-to-RVC_28_10.xlsx")
rvc.measles <- data.table(read_excel(rvc.workbook, "Measles coverage to RVC"))
cols1.nb <- grep("^Col[0-9]+", colnames(rvc.measles), invert = TRUE)
cols1.names <- colnames(rvc.measles)[cols1.nb]
if (cols1.nb[length(cols1.nb)] < ncol(rvc.measles))
{
cols1.nb <- c(cols1.nb, ncol(rvc.measles))
}
cols1 <- c()
if (cols1.nb[1] > 1)
{
cols1 <- rep("", cols1.nb[1] - 1)
}
for (i in seq_len(length(cols1.nb) - 1))
{
cols1 <- c(cols1, rep(cols1.names[i], diff(cols1.nb)[i]))
}
cols2 <- which(!is.na(rvc.measles[1, ]))
setnames(rvc.measles, cols2,
paste(cols1[cols2],
unname(unlist(rvc.measles[1, cols2, with = FALSE])), sep = "."))
setnames(rvc.measles, colnames(rvc.measles), tolower(colnames(rvc.measles)))
setnames(rvc.measles, colnames(rvc.measles), gsub(" ", ".", colnames(rvc.measles)))
rvc.measles <- rvc.measles[!is.na(country)]
rvc.measles[, year := sub("\\/[0-9]+$", "", year)]
rvc.measles[, year := gsub("(\\*|\\.)", "", year)]
rvc.measles[, year := gsub("…", "", year)]
rvc.measles[, year := gsub("г", "", year)]
rvc.measles[, year := as.integer(year)]
rvc.measles <- melt(rvc.measles, id.vars = c("country", "year"))
rvc.measles[grep("1st", variable), dose := 1]
rvc.measles[grep("2nd", variable), dose := 2]
rvc.measles <- rvc.measles[!is.na(dose)]
rvc.measles[, variable := sub("^measles.vaccine..*dose\\.", "", variable)]
rvc.measles <- data.table(dcast(rvc.measles, country + year + dose ~ variable,
value.var = "value"))
rvc.mmr <- data.table(readWorksheet(rvc.workbook, "MMR to RVC"))
save(rvc.measles, file = "rvc.RData")
}
##' Load worldwide serology data
##'
##' @param path Path in which to find the raw data
##' @import reshape2
##' @importFrom readxl excel_sheets read_excel
##' @author Sebastian Funk
loadWorldPopulationProspectData <- function(path)
{
wpp.workbook <- paste0(path, "/WPP2012_POP_F07_1_POPULATION_BY_AGE_BOTH_SEXES.XLS")
all_sheets <- excel_sheets(wpp.workbook)
sheets <- setdiff(all_sheets, "NOTES")
wpp.table <- NULL
for (sheet in sheets)
{
temp.table <- data.table(read_excel(wpp.workbook, sheet))
temp.table <- temp.table[!is.na(Col1)]
setnames(temp.table, colnames(temp.table), unlist(temp.table[1]))
temp.table <- temp.table[-1]
temp.table[, Index := NULL]
if (is.null(wpp.table))
{
wpp.table <- temp.table
} else
{
common.columns <- intersect(colnames(wpp.table),
colnames(temp.table))
wpp.table <- rbind(wpp.table[, common.columns, with = F],
temp.table[, common.columns, with = F])
}
}
setnames(wpp.table, "Major area, region, country or area *", "area")
setnames(wpp.table, "Reference date (as of 1 July)", "year")
wpp.table[, Notes := NULL]
for (col in 3:ncol(wpp.table))
{
colname <- colnames(wpp.table)[col]
wpp.table[, paste(colname) := as.integer(get(colname))]
}
wpp <- melt(wpp.table, id.vars = c("Variant", "area", "Country code", "year"),
variable.name = "age_group", value.name = "population")
setnames(wpp, colnames(wpp), gsub(" ", "_", tolower(colnames(wpp))))
save(wpp, file = "wpp.RData")
}
##' Load Childcare data
##'
##' @param path Path in which to find the raw data
##' @author Sebastian Funk
loadChickenpoxSerology <- function(path) {
childcare_raw <-
data.table(read.csv(paste(path, "/childcare.csv", sep = "")))
years <- seq(ceiling(min(childcare$year)), floor(max(childcare$year)))
approx_childcare <- approx(childcare$year, childcare$childcare, years)
childcare <- data.table(year = approx_childcare$x,
childcare = approx_childcare$y)
setkey(childcare, year)
save(childcare, file = "childcare.RData")
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.