#' Prepare an Italy covariate for modeling
#'
#' @description Generic function to prepare a covariate for modeling. This
#' function is a wrapper for sub-functions called for specific covariates and
#' will fail if a non-initialized covariate has been passed. It returns both
#' the prepared covariate and a vector of expected indices that can be used
#' to merge back onto the original dataset.
#'
#' @param covar_fp Filepath to the raw covariate
#' @param covar_name Short name for the covariate. This will be used to send the
#' covariate to a particular prep function.
#' @param model_years Vector of years to consider for modeling
#' @param location_table Location code merge table for Italy
#' @param pop_raster [optional] Population rasterBrick object, with one layer
#' per modeling year. Only used to prepare raster covariates, default NULL.
#' @param polys_sf [optional] Polygon boundaries
#' @param projection [optional] Character vector giving the proj4 CRS
#' definition. Example: "+proj=utm +zone=32 +datum=WGS84 +units=m +no_defs"
#' @param final_obs_week [optional] ID for the final observed week in 2020,
#' formatted as an integer between 1 and 52 inclusive.
#'
#' @return A list of two items:
#' - "prepped_covar": data.table containing the prepped covariate
#' - "covar_indices": Vector of identifiers that should be used to merge onto
#' the modeling dataset. The prepared covariate dataset should only include
#' identifier columns and the covariate value, specified by the covariate
#' name
#'
#' @import data.table
#' @export
ita_prepare_covariate <- function(
covar_name, covar_fp, model_years, location_table, pop_raster = NULL,
polys_sf = NULL, projection = NULL, final_obs_week = NULL
){
# Check that an appropriate covariate function exists
covar_func <- get_covar_prep_function(covar_name)
# Apply the function to the data
message(" - Preparing ",covar_name)
covar_out_list <- do.call(
covar_func,
args = list(
covar_fp = covar_fp,
model_years = model_years,
location_table = location_table,
pop_raster = pop_raster,
polys_sf = polys_sf,
projection = projection,
final_obs_week = final_obs_week
)[names(formals(covar_func))]
)
# Check that output is valid
check_covar_validity(
prepped_covar = covar_out_list$prepped_covar,
covar_name = covar_name,
covar_indices = covar_out_list$covar_indices,
model_years = model_years,
location_table = location_table
)
# Return output
return(covar_out_list)
}
#' Pull a specific prep function for an Italy covariate
#'
#' @description Helper function for \code{\link{prepare_covariate}} that checks
#' whether a covariate-specific prep function exists yet by searching the
#' package namespace. All covariate-specific prep functions should take the
#' name format "ita_prep_covar_<COVAR NAME>". Errors if the function does not
#' exist; returns the function name if the function does exist.
#'
#' @param covar_name Short name for the covariate.
#'
#' @return Name of the function to prepare this covariate
#'
get_covar_prep_function <- function(covar_name){
# Search for prep functions
f_pattern <- 'ita_prepare_covar_'
potential_f_name <- paste0(f_pattern, covar_name)
all_prep_functions <- ls('package:covidemr', pattern=sprintf("^%s",f_pattern))
if(potential_f_name %in% all_prep_functions){
return(potential_f_name)
} else {
stop(sprintf(
"The function %s does not exist! Valid options include:\n - %s",
potential_f_name,
paste(all_prep_functions, collapse='\n - ')
))
}
}
#' Prepare year covariate
#'
#' @description Covariate-specific prep function for the year covariate. This
#' one is pretty simple - the (unnormalized) covariate is the same as the year
#'
#' @param model_years Vector of years to consider for modeling
#'
#' @return data.table containing prepared covariate data and vector of indices
#'
#' @import data.table
#' @export
ita_prepare_covar_year_cov <- function(model_years){
# The prepped covariate is the same as the year
out_list <- list(
prepped_covar = data.table(year = model_years, year_cov = model_years),
covar_indices = 'year'
)
# Return
return(out_list)
}
#' Prepare TFR covariate
#'
#' @description Covariate-specific prep function for TFR (total fertility rate).
#' This is a convenience function for specific use with datasets exported from
#' IStat.
#'
#' @param covar_fp Filepath to the raw covariate
#' @param model_years Vector of years to consider for modeling
#' @param location_table Location code merge table for Italy
#'
#' @return data.table containing prepared covariate data and vector of indices
#'
#' @import data.table
#' @export
ita_prepare_covar_tfr <- function(covar_fp, model_years, location_table){
covar_data <- ita_read_istat_csv(covar_fp)
data.table::setnames(
covar_data, c('ITTER107','TIME','Value'), c('icode', 'year', 'tfr')
)
# Merge on location codes
covar_data_merged <- data.table::merge.data.table(
covar_data,
location_table[, .(icode, location_code)],
by='icode'
)
# Apply Sud Sardegna fix
covar_data_merged <- backfill_input_data(
input_data = covar_data_merged,
index_field = 'icode',
check_vals = 'IT111'
)
# Drop unnecessary columns
covar_indices <- c('location_code', 'year')
all_cols <- c(covar_indices, 'tfr')
covar_data_merged <- covar_data_merged[, ..all_cols ]
# Extend years
prepped_covar <- extend_covar_time_series(
covar_data = covar_data_merged,
model_years = model_years
)
# Return
return(list(prepped_covar = prepped_covar, covar_indices = covar_indices))
}
#' Prepare unemployment covariate
#'
#' @description Covariate-specific prep function for unemployment by sex and by
#' quarter. This is a convenience function for specific use with datasets
#' exported from IStat.
#'
#' @param covar_fp Filepath to the raw covariate
#' @param model_years Vector of years to consider for modeling
#' @param location_table Location code merge table for Italy
#'
#' @return data.table containing prepared covariate data and vector of indices
#'
#' @import data.table
#' @export
ita_prepare_covar_unemp <- function(covar_fp, model_years, location_table){
covar_data <- ita_read_istat_csv(covar_fp)
covar_indices <- c('location_code','sex','year')
# Update names
setnames(covar_data, c('ITTER107','TIME','Value'), c('icode', 'year', 'unemp'))
# Keep only annual values and convert to integer
suppressWarnings(covar_data[, year := as.integer(year) ])
covar_data <- covar_data[ !is.na(year), ]
# Format sex identifiers
covar_data[, sex := gsub('s', '', Gender) ]
# Fix Bolzano and Trento location codes, then merge on standard code table
covar_data[ icode == 'ITD1', icode := 'ITD10' ] # Bolzano
covar_data[ icode == 'ITD2', icode := 'ITD20' ] # Trento
covar_data_merged <- data.table::merge.data.table(
covar_data,
location_table[, .(location_code, icode)],
by='icode'
)
# Apply Sud Sardegna fix
covar_data_merged <- backfill_input_data(
input_data = covar_data_merged,
index_field = 'icode',
check_vals = 'IT111'
)
covar_data_merged <- covar_data_merged[, c(covar_indices, 'unemp'), with=FALSE]
# Subset columns and extend to 2020 and return
prepped_covar <- extend_covar_time_series(
covar_data = covar_data_merged,
model_years = model_years
)
# Return
return(list(prepped_covar = prepped_covar, covar_indices = covar_indices))
}
#' Prepare social services covariate
#'
#' @description Covariate-specific prep function for social services coverage
#' for families. This is a convenience function for specific use with datasets
#' exported from IStat.
#'
#' @param covar_fp Filepath to the raw covariate
#' @param model_years Vector of years to consider for modeling
#' @param location_table Location code merge table for Italy
#'
#' @return data.table containing prepared covariate data and vector of indices
#'
#' @import data.table
#' @export
ita_prepare_covar_socserv <- function(covar_fp, model_years, location_table){
covar_data <- ita_read_istat_csv(covar_fp)
covar_indices <- c('year', 'location_code')
# Update names
setnames(covar_data, c('ITTER107', 'TIME', 'Value'), c('icode', 'year', 'socserv'))
# Subset to just proportion of families/children receiving at home social services
covar_data <- covar_data[(TIPUTENZA1=='FAM') & (TIPSERVSOC=='HOMECARE'), ]
# Fix for Sud Sardegna, which is represented by its old defunct provinces.
# Note that this fix differs from others because Sud Sardegna is not
# represented in any years -- instead, the average is taken between Medio
# Campidano and Carbonia-Iglesias
covar_ssd <- covar_data[ icode %in% c('ITG2B', 'ITG2C'), .(icode, year, socserv)]
covar_ssd[, icode := 'IT111' ]
covar_ssd <- covar_ssd[, .(socserv=mean(socserv)), by=.(icode, year)]
covar_data <- rbindlist(list(covar_data, covar_ssd), use.names=TRUE, fill=TRUE)
# Merge on location codes
covar_data_merged <- data.table::merge.data.table(
covar_data,
location_table[, .(icode, location_code)],
by='icode'
)
covar_data_merged <- covar_data_merged[, c(covar_indices, 'socserv'), with=FALSE]
prepped_covar <- extend_covar_time_series(
covar_data = covar_data_merged,
model_years = model_years
)
# Return
return(list(prepped_covar = prepped_covar, covar_indices = covar_indices))
}
#' Prepare covariate: proportion of households with income under E10k
#'
#' @description Covariate-specific prep function for proportion of households
#' with taxable income under 10,000 Euros per year. A related presentation of
#' this data is mean taxable income per household, prepared in another
#' function. This function is meant specifically for use with IStat data.
#'
#' @param covar_fp Filepath to the raw covariate
#' @param model_years Vector of years to consider for modeling
#' @param location_table Location code merge table for Italy
#'
#' @return data.table containing prepared covariate data and vector of indices
#'
#' @import data.table
#' @export
ita_prepare_covar_tax_brackets <- function(
covar_fp, model_years, location_table
){
covar_indices <- c('year', 'location_code')
covar_data <- ita_read_istat_csv(covar_fp)
setnames(covar_data, 'TIME', 'year')
# Set the province code based on the comuna code
covar_data[, ITTER107 := as.character(ITTER107)]
covar_data[, loc_nc := nchar(ITTER107) ]
covar_data[, location_code := as.integer(substr(ITTER107, 1, loc_nc - 3))]
# Some provinces in Sardinia were reorganized in 2018: try to reassign the
# provinces for those municipalities based on 2018 data
sard_provinces <- location_table[region_code == 20, location_code ]
sard_merge_table <- unique(covar_data[
(year == 2018) & (location_code %in% sard_provinces),
.(Territory, location_code)
])
old_provs <- 104:107
existing_provinces <- covar_data[ !(location_code %in% old_provs), ]
sard_pre_2018 <- covar_data[location_code %in% old_provs,]
sard_pre_2018[
sard_merge_table, on='Territory', location_code := i.location_code
]
# Reconstitute the full dataset with updated province codes
covar_data <- rbindlist(list(existing_provinces, sard_pre_2018), use.names=TRUE)
# Get the number of households with income less than 15k Euros
covar_data[, low_bk := IMPORTOEURO %in% c('E_UN0', 'E0-10000')]
prepped_covar <- covar_data[,
.(tax_brackets = .SD[low_bk==1, sum(Value, na.rm=T)] / sum(Value, na.rm=T)),
by = covar_indices
]
# Extend for 2019-2020 and return
prepped_covar <- extend_covar_time_series(
covar_data = prepped_covar,
model_years = model_years
)
return(list(prepped_covar = prepped_covar, covar_indices = covar_indices))
}
#' Prepare covariate: mean taxable income across all households
#'
#' @description Covariate-specific prep function for average taxable income
#' across all households in a province. This covariate is derived from two
#' IStat datasets, one listing the number of households per tax bracket and
#' another listing total taxed income by bracket. A related presentation of
#' this data is proportion of households with less than 10k Euros in taxable
#' income, prepared in the function `ita_prepare_covar_tax_brackets()`.
#'
#' @param covar_fp Filepath to the raw covariate
#' @param model_years Vector of years to consider for modeling
#' @param location_table Location code merge table for Italy
#'
#' @return data.table containing prepared covariate data and vector of indices
#'
#' @import data.table
#' @export
ita_prepare_covar_tax_income <- function(
covar_fp, model_years, location_table
){
# Load data
covar_indices <- c('year', 'location_code')
covar_data_long <- rbindlist(
lapply(covar_fp, ita_read_istat_csv)
)[, .(ITTER107, Territory, TIPO_DATO_MEF, TIME, Value)]
# Cast so that total income and number of households are in different fields
covar_data <- data.table::dcast(
covar_data_long,
ITTER107 + Territory + TIME ~ TIPO_DATO_MEF,
fun.aggregate = function(x) sum(x, na.rm=T),
value.var = 'Value'
)
setnames(covar_data, c('TIME','AGGINCF','AGGINCR'), c('year','hh','income'))
# Set the province code based on the comuna code
covar_data[, ITTER107 := as.character(ITTER107)]
covar_data[, loc_nc := nchar(ITTER107) ]
covar_data[, location_code := as.integer(substr(ITTER107, 1, loc_nc - 3))]
# Some provinces in Sardinia were reorganized in 2018: try to reassign the
# provinces for those municipalities based on 2018 data
sard_provinces <- location_table[region_code == 20, location_code ]
sard_merge_table <- unique(covar_data[
(year == 2018) & (location_code %in% sard_provinces),
.(Territory, location_code)
])
old_provs <- 104:107
existing_provinces <- covar_data[ !(location_code %in% old_provs), ]
sard_pre_2018 <- covar_data[location_code %in% old_provs,]
sard_pre_2018[
sard_merge_table, on='Territory', location_code := i.location_code
]
# Reconstitute the full dataset with updated province codes
covar_data <- rbindlist(list(existing_provinces, sard_pre_2018), use.names=TRUE)
# Get the number of households with income less than 15k Euros
prepped_covar <- covar_data[,
.(tax_income = sum(income) / sum(hh)),
by = covar_indices
]
# Extend for 2019-2020 and return
prepped_covar <- extend_covar_time_series(
covar_data = prepped_covar,
model_years = model_years
)
return(list(prepped_covar = prepped_covar, covar_indices = covar_indices))
}
#' Prepare covariate: population density
#'
#' @description Covariate-specific prep function for all-ages population density
#' by Italian province
#'
#' @param covar_fp Filepath to the raw covariate
#' @param model_years Vector of years to consider for modeling
#' @param location_table Location code merge table for Italy
#' @param polys_sf [optional] Polygon boundaries
#'
#' @return A list of two items:
#' - "prepped_covar": data.table containing the prepped covariate
#' - "covar_indices": Vector of identifiers that should be used to merge onto
#' the modeling dataset. The prepared covariate dataset should only include
#' identifier columns and the covariate value, specified by the covariate
#' name
#'
#' @import data.table sf
#' @export
ita_prepare_covar_pop_density <- function(
covar_fp, model_years, location_table, polys_sf
){
# Load annual population data
pop_by_age <- data.table::fread(covar_fp)
pop_agg <- pop_by_age[, .(pop = sum(pop)), by=.(location_code, year)]
pop_agg <- pop_agg[order(location_code, year)]
# Generate areas for each location based on the projected polygons
loc_areas <- data.table::data.table(
location_code = polys_sf$location_code,
area_sqkm = as.vector(sf::st_area(polys_sf) / 1E6) # Coming from units in m
)
pop_agg[ loc_areas, area_sqkm := as.numeric(i.area_sqkm), on = 'location_code'
][, pop_density := pop / area_sqkm # Units: people / km2
][, c('pop','area_sqkm') := NULL ]
return(list(
prepped_covar = pop_agg,
covar_indices = c('location_code','year')
))
}
#' Prepare covariate: Elevation
#'
#' @description Covariate-specific prep function for elevation by location.
#' Specifically, this aggregated covariate represents the population-weighted
#' mean elevation inhabited by residents of each province.
#'
#' @param covar_fp Filepath to the raw covariate
#' @param pop_raster [optional] Population rasterBrick object, with one layer
#' per modeling year. Only used to prepare raster covariates, default NULL.
#' @param polys_sf [optional] Polygon boundaries
#' @param projection [optional] Character vector giving the proj4 CRS
#' definition. Example: "+proj=utm +zone=32 +datum=WGS84 +units=m +no_defs"
#'
#' @return A list of two items:
#' - "prepped_covar": data.table containing the prepped covariate
#' - "covar_indices": Vector of identifiers that should be used to merge onto
#' the modeling dataset. The prepared covariate dataset should only include
#' identifier columns and the covariate value, specified by the covariate
#' name
#'
#' @import data.table raster fasterize
#' @export
ita_prepare_covar_elevation <- function(
covar_fp, pop_raster, polys_sf, projection
){
# Load raw data
elev_raw <- raster::raster(covar_fp)
# Get population in final year (mean elevation should be stable across years)
pop <- pop_raster[[dim(pop_raster)[3]]]
# Align with population raster and mask
elev_projected <- raster::projectRaster(from = elev_raw, crs = projection)
elev_sub <- raster::crop(x = elev_projected, y = pop)
elev_sub <- raster::resample(x = elev_sub, y = pop, method = 'bilinear')
elev_sub <- raster::mask(x = elev_sub, mask = pop)
locs_raster <- fasterize::fasterize(
sf = polys_sf, raster = pop, field = 'location_code', fun = 'last'
)
# If all dimensions align, run the population-weighted aggregation
if(any(dim(elev_sub) != dim(pop))) stop("Access raster not aligned")
if(any(dim(pop) != dim(locs_raster))) stop("Location raster not aligned")
prepped_covar <- na.omit(data.table::data.table(
elevation = as.vector(elev_sub),
population = as.vector(pop),
location_code = as.vector(locs_raster)
))[, .(elevation = weighted.mean(elevation, w=population)), by=location_code]
prepped_covar <- prepped_covar[order(location_code)]
return(list(prepped_covar = prepped_covar, covar_indices = 'location_code'))
}
#' Prepare covariate: access to healthcare facility
#'
#' @description Covariate-specific prep function for the average person's travel
#' time to the nearest healthcare facility (by motor vehicle).
#'
#' @param covar_fp Filepath to the raw covariate
#' @param model_years Vector of years to consider for modeling
#' @param location_table Location code merge table for Italy
#' @param pop_raster [optional] Population rasterBrick object, with one layer
#' per modeling year. Only used to prepare raster covariates, default NULL.
#' @param polys_sf [optional] Polygon boundaries
#' @param projection [optional] Character vector giving the proj4 CRS
#' definition. Example: "+proj=utm +zone=32 +datum=WGS84 +units=m +no_defs"
#'
#' @return A list of two items:
#' - "prepped_covar": data.table containing the prepped covariate
#' - "covar_indices": Vector of identifiers that should be used to merge onto
#' the modeling dataset. The prepared covariate dataset should only include
#' identifier columns and the covariate value, specified by the covariate
#' name
#'
#' @import data.table raster fasterize
#' @export
ita_prepare_covar_hc_access <- function(
covar_fp, model_years, location_table, pop_raster, polys_sf, projection
){
rast_raw <- raster::raster(covar_fp)
# Only use the final year of population data, which aligns with the access
# dataset
pop <- pop_raster[[length(model_years)]]
# Crop in unprojected space first - this will make the projection much faster
unproj_extent <- raster::extent(
raster::projectRaster(pop, crs = crs(rast_raw))
)
buffered_extent <- raster::extent(
unproj_extent@xmin - .1, unproj_extent@xmax + .1,
unproj_extent@ymin - .1, unproj_extent@ymax + .1
)
rast_sub <- raster::crop(rast_raw, y = buffered_extent)
# Project, crop, resample and mask to population raster
rast_sub <- raster::projectRaster(rast_sub, crs = projection)
rast_sub <- raster::crop(x = rast_sub, y = pop)
rast_resampled <- raster::resample(x = rast_sub, y = pop, method = 'ngb')
rast_resampled <- raster::mask(x = rast_resampled, mask = pop)
# Rasterize polygons to get location code for each pixel, and get "average
# access" using a population-weighted aggregation
locs_raster <- fasterize::fasterize(
sf = polys_sf, raster = pop, field = 'location_code', fun = 'last'
)
# If all dimensions align, run the population-weighted aggregation
if(any(dim(rast_resampled) != dim(pop))) stop("Access raster not aligned")
if(any(dim(pop) != dim(locs_raster))) stop("Location raster not aligned")
prepped_covar <- na.omit(data.table::data.table(
hc_access = as.vector(rast_resampled),
population = as.vector(pop),
location_code = as.vector(locs_raster)
))[, .(hc_access = weighted.mean(hc_access, w=population)), by=location_code]
prepped_covar <- prepped_covar[order(location_code)]
return(list(prepped_covar = prepped_covar, covar_indices = 'location_code'))
}
#' Load and cache daily temperature data from API
#'
#' @description Pull daily temperature data from the Meteostat API. This
#' function uses cacheing: if the `cache_file` already exists, it pulls the
#' results from file rather than re-querying the API
#'
#' @param model_years Years of data to pull
#' @param api_key API key for querying the Meteostat API
#' @param prov_latlong Table indexing locations to latitute and longitude. Must
#' contain at least the following three fields:
#' - 'location_code': Numeric code identifying location
#' - 'lat': Latitude
#' - 'lon': Longitude
#' @param cache_file File where the pulled data will be cached
#' @param refresh_cache [optional, default FALSE] Should the data be re-pulled
#' even if a cache file exists?
#'
#' @return Data.table containing full Meteostat daily API data as well as
#' location code for all available years
#'
#' @import data.table glue httr
#'
ita_temperature_query_api <- function(
model_years, api_key, prov_latlong, cache_file, refresh_cache = FALSE
){
# Load and return cached file if it exists
if(file.exists(cache_file) & !refresh_cache){
return(fread(cache_file))
}
# Prepare a list to store individual API query results
temp_data_list <- vector('list', length=length(model_years))
# Iterate through years and locations to save on memory
for(year_idx in 1:length(model_years)){
model_year <- model_years[year_idx]
message(" Querying all province data for year ", model_year)
# For each province, query the API for daily weather patterns across the
# year
temp_data_list[[year_idx]] <- vector('list', length=nrow(prov_latlong))
for(pnt_idx in 1:nrow(prov_latlong)){
cat('.'); flush.console();
this_prov_code <- prov_latlong[pnt_idx, location_code]
# Build query
meteo_query <- glue::glue(
"https://api.meteostat.net/v2/point/daily?lat={prov_latlong[pnt_idx, lat]}",
"&lon={prov_latlong[pnt_idx, lon]}&start={model_year}-01-01",
"&end={model_year}-12-31"
)
# Send query with header
meteo_response <- httr::GET(meteo_query, add_headers('x-api-key'=api_key))
# Wait between requests
Sys.sleep(0.55)
response_code <- httr::status_code(meteo_response)
if(response_code == 200){
# The query succeeded
response_dataset <- suppressWarnings(data.table::rbindlist(
httr::content(meteo_response)$data, use.names = TRUE
))
response_dataset[, location_code := this_prov_code ]
response_dataset[, point_id := pnt_idx ]
response_dataset$lat <- prov_latlong[pnt_idx, lat]
response_dataset$lon <- prov_latlong[pnt_idx, lon]
if(nrow(response_dataset)==1){
message(" W: No temperature data for province ",this_prov_code," in ",model_year)
} else {
temp_data_list[[year_idx]][[pnt_idx]] <- response_dataset
}
} else {
# The query failed
stop(
"API request failed with code ", response_code, " on year ",
model_year, " and location code ", this_prov_code
)
}
}
}
# Compile full dataset, save to file, and return
raw_data_full <- data.table::rbindlist(
lapply(temp_data_list, rbindlist, fill=TRUE)
)
fwrite(raw_data_full, file = cache_file)
return(raw_data_full)
}
#' Prepare covariate: weekly temperature
#'
#' @description Covariate-specific prep function for average experienced weekly
#' temperature by province. Note that this function uses cacheing, so if the
#' raw data has been pulled from the API already it will not be re-pulled
#'
#' @param covar_fp Filepath to an API key for Meteostat, which will be used to
#' pull weekly templerature data
#' @param model_years Vector of years to consider for modeling
#' @param location_table Location code merge table for Italy
#' @param pop_raster [optional] Population rasterBrick object, with one layer
#' per modeling year. Only used to prepare raster covariates, default NULL.
#' @param polys_sf [optional] Polygon boundaries
#' @param final_obs_week ID for the final observed week in 2020, formatted as an
#' integer between 1 and 52 inclusive.
#'
#' @return A list of two items:
#' - "prepped_covar": data.table containing the prepped covariate
#' - "covar_indices": Vector of identifiers that should be used to merge onto
#' the modeling dataset. The prepared covariate dataset should only include
#' identifier columns and the covariate value, specified by the covariate
#' name
#'
#' @import data.table raster
#' @export
ita_prepare_covar_temperature <- function(
covar_fp, model_years, location_table, pop_raster, polys_sf, final_obs_week
){
## Find the pixel with the highest population in each province
## The API will be queried at these points
prov_indexing <- index_populated_grid_cells(
pop = pop_raster[[ dim(pop_raster)[3] ]],
polys_sf = polys_sf,
loc_field = 'location_code'
)
prov_latlong <- prov_indexing[, .SD[frank(-pop) <= 3,], by=location_code
][, pop := NULL
][, `:=` (lat = round(lat, 3), lon = round(lon, 3))
][order(location_code)]
if(nrow(prov_latlong) != nrow(location_table) * 3) stop("Missing some provinces")
# Query the Meteostat API for daily temperatures in selected locations
api_key <- readLines(covar_fp)
meteostat_folder <- file.path(dirname(covar_fp), 'meteostat')
dir.create(meteostat_folder, showWarnings = FALSE)
raw_temp_data <- ita_temperature_query_api(
model_years = model_years,
api_key = api_key,
prov_latlong = prov_latlong,
cache_file = file.path(meteostat_folder, 'api_data_compiled.csv'),
refresh_cache = FALSE
)
# Aggregate temperature from days to weeks
temp_weekly <- raw_temp_data[, date := as.Date(date, format = '%Y-%m-%d')
][, year := as.integer(strftime(date, format='%Y'))
][, week := week_id_from_date(date)
][, .(tavg = mean(tavg, na.rm=T)), by=.(location_code, point_id, year, week)]
# Merge on a template to ensure that all points, weeks, and years are included
template <- data.table::merge.data.table(
x = data.table::data.table(
location_code = rep(location_table$location_code, each = 3),
point_id = 1:(3 * nrow(location_table)),
dummy = 1
),
y = data.table::CJ(year = model_years, week = 1:52, dummy = 1),
by = 'dummy',
allow.cartesian = TRUE
)[, dummy := NULL ]
temp_weekly <- data.table::merge.data.table(
x = template, y = temp_weekly, by = names(template), all.x=TRUE
)[order(location_code, point_id, year, week)]
# Subset to only included weeks
temp_weekly <- temp_weekly[(year < 2020) | (week <= final_obs_week), ]
# For small gaps (less than 4 weeks), linearly interpolate available data
num_na_start <- sum(is.na(temp_weekly$tavg))
temp_weekly[, allna := all(is.na(tavg)), by = point_id]
temp_weekly[
allna == FALSE,
temp_interp := stats::approx(x = .I, y=tavg, xout=.I, method='linear')$y,
by=point_id
]
temp_weekly[, na_grouping := data.table::rleid(is.na(tavg)), by = point_id
][, num_missing := .N, by = .(na_grouping, point_id)
][ is.na(tavg) & (num_missing < 4), tavg := temp_interp
][, c('num_missing', 'temp_interp', 'na_grouping') := NULL ]
num_na_end <- sum(is.na(temp_weekly$tavg))
# Aggregate to the province level
temp_by_province <- temp_weekly[,
.(tavg = mean(tavg, na.rm=T)),
by=.(location_code, year, week)
]
# When a weekly temperature is not available for a given province, pull it
# from a neighboring province
reassign_dt <- data.table::rbindlist(lapply(list(
c(1,2), c(4,9), c(5,18), c(6,9), c(10,9), c(11,45), c(14,13), c(21,25),
c(22,25), c(29,28), c(33,19), c(34,19), c(35,20), c(36,20), c(37,39),
c(38,39), c(42,41), c(43,41), c(44,41), c(48,50), c(51,54), c(52,54),
c(55,54), c(57,54), c(64,62), c(65,63), c(66,69), c(67,69), c(68,69),
c(71,77), c(72,74), c(76,77), c(78,77), c(79,77), c(82,83), c(84,81),
c(85,81), c(88,87), c(91,90), c(95,111), c(98,18), c(100,47), c(101,80),
c(102,77), c(109,41), c(110,74)
), as.list))
names(reassign_dt) <- c('target', 'source')
temp_by_province[ reassign_dt, source := i.source, on = c(location_code = 'target')
][
temp_by_province,
tavg_neighbor := i.tavg,
on = c(source = 'location_code', week = 'week', year = 'year')
][, from_neighbor := as.integer(is.na(tavg))
][ from_neighbor == 1, tavg := tavg_neighbor
][, c('source','tavg_neighbor','from_neighbor') := NULL ]
# Rename value column
data.table::setnames(temp_by_province, 'tavg', 'temperature')
# Return
return(list(
prepped_covar = temp_by_province,
covar_indices = c('location_code','year','week')
))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.