R/4_addins.r

Defines functions addin_make_skeleton_calyear addin_make_skeleton_isoyear addin_make_skeleton_isoweek addin_make_skeleton_day

addin_make_skeleton_day <- function(){
  require_namespace("rstudioapi")
  rstudioapi::insertText(
    '
# the data is loaded in
data <- copy(d_agg)

# 1. Create a variable (possibly a list) to hold the data
d_agg <- list()
d_agg$day_municip <- copy(data$day_municip)
d_agg$day_county <- copy(data$day_county)
d_agg$day_nation <- copy(data$day_nation)

# 2. Clean the data

# fixing location codes
# 3. Replace NAs as appropriate for MSIS location numbers
d_agg$day_municip[is.na(location_msis_municip), location_msis_municip := 9999]
d_agg$day_county[is.na(location_msis_county), location_msis_county := 99]

# 4. Convert MSIS location numbers to location_code"s using `fhidata::norway_locations_msis_to_fhidata`
# you may also do redistricting (kommunesammenslaaing) at this point (fhidata::norway_locations_redistricting())
d_agg$day_municip[, location_code_original := fhidata::norway_locations_msis_to_fhidata(location_msis_municip)]
d_agg$day_county[, location_code_original := fhidata::norway_locations_msis_to_fhidata(location_msis_county)]
d_agg$day_nation[, location_code_original := fhidata::norway_locations_msis_to_fhidata(location_msis_nation)]

# redistricting
for(i in seq_along(d_agg)){
  # d_agg[[i]][, calyear := lubridate::year(date)]
  d_agg[[i]] <- merge(
    d_agg[[i]],
    fhidata::norway_locations_redistricting()[,-"granularity_geo"],
    by.x = c("location_code", "calyear"),
    by.y = c("location_code_original", "calyear"),
    all.x = TRUE
  )
}

# 5. Re-aggregate your data to different geographical levels to ensure that duplicates have now been removed
# this will also fix the redistricting/kommunesammenslaaing issues
for (i in seq_along(d_agg)) {
  d_agg[[i]] <- d_agg[[i]][, .(
    cases_n = round(sum(cases_n * weighting))
  ), keyby = .(
    location_code = location_code_current,
    date
  )]
}

d_agg[]

# 6. Pull out important dates
date_min <- min(d_agg$day_nation$date)
date_max <- max(d_agg$day_nation$date)

# 7. Create `multiskeleton`
# granularity_geo should have the following groups:
# - nodata (when no data is available, and there is no "finer" data available to aggregate up)
# - all levels of granularity_geo where you have data available
# If you do not have data for a specific granularity_geo, but there is "finer" data available
# then you should not include this granularity_geo in the multiskeleton, because you will create
# it later when you aggregate up your data (baregion)
multiskeleton_day <- fhidata::make_skeleton(
  date_min = date_min,
  date_max = date_max,
  granularity_geo = list(
    "nodata" = c(
      "wardoslo",
      "extrawardoslo",
      "missingwardoslo",
      "wardbergen",
      "missingwardbergen",
      "wardstavanger",
      "missingwardstavanger"
    ),
    "municip" = c(
      "municip",
      "notmainlandmunicip",
      "missingmunicip"
    ),
    "county" = c(
      "county",
      "notmainlandcounty",
      "missingcounty"
    ),
    "nation" = c(
      "nation"
    )
  )
)

# 8. Merge in the information you have at different geographical granularities
# one level at a time
# municip
multiskeleton_day$municip[
  d_agg$day_municip,
  on = c("location_code", "date"),
  cases_n := cases_n
]
multiskeleton_day$municip[is.na(cases_n), cases_n := 0]

multiskeleton_day$municip[]

# county
multiskeleton_day$county[
  d_agg$day_county,
  on = c("location_code", "date"),
  cases_n := cases_n
]
multiskeleton_day$county[is.na(cases_n), cases_n := 0]

multiskeleton_day$county[]

# nation
multiskeleton_day$nation[
  d_agg$day_nation,
  on = c("location_code", "date"),
  cases_n := cases_n
]
multiskeleton_day$nation[is.na(cases_n), cases_n := 0]

multiskeleton_day$nation[]

# 9. Aggregate up to higher geographical granularities
multiskeleton_day$baregion <- multiskeleton_day$municip[
  fhidata::norway_locations_hierarchy(
    from = "municip",
    to = "baregion"
  ),
  on = c(
    "location_code==from_code"
  )
][,
  .(
    cases_n = sum(cases_n),
    granularity_geo = "baregion"
  ),
  by = .(
    granularity_time,
    date,
    location_code = to_code
  )
]

multiskeleton_day$baregion[]

# combine all the different granularity_geos
skeleton_day <- rbindlist(multiskeleton_day, fill = TRUE, use.names = TRUE)

skeleton_day[]

# 10. (If desirable) aggregate up to higher time granularities
# if necessary, it is now easy to aggregate up to weekly data from here
skeleton_isoweek <- copy(skeleton_day)
skeleton_isoweek[, isoyearweek := fhiplot::isoyearweek_c(date)]
skeleton_isoweek <- skeleton_week[
  ,
  .(
    cases_n = sum(cases_n),
    granularity_time = "isoweek"
  ),
  keyby = .(
    isoyearweek,
    granularity_geo,
    location_code
  )
]

skeleton_isoweek[]
'
  )
}

addin_make_skeleton_isoweek <- function(){
  require_namespace("rstudioapi")
  rstudioapi::insertText(
    '
# the data is loaded in
data <- copy(d_agg)

# 1. Create a variable (possibly a list) to hold the data
d_agg <- list()
d_agg$isoyearweek_municip <- copy(data$isoyearweek_municip)
d_agg$isoyearweek_county <- copy(data$isoyearweek_county)
d_agg$isoyearweek_nation <- copy(data$isoyearweek_nation)

# 2. Clean the data

# fixing location codes
# 3. Replace NAs as appropriate for MSIS location numbers
d_agg$isoyearweek_municip[is.na(location_msis_municip), location_msis_municip := 9999]
d_agg$isoyearweek_county[is.na(location_msis_county), location_msis_county := 99]

# 4. Convert MSIS location numbers to location_code"s using `fhidata::norway_locations_msis_to_fhidata`
# you may also do redistricting (kommunesammenslaaing) at this point (fhidata::norway_locations_redistricting())
d_agg$isoyearweek_municip[, location_code_original := fhidata::norway_locations_msis_to_fhidata(location_msis_municip)]
d_agg$isoyearweek_county[, location_code_original := fhidata::norway_locations_msis_to_fhidata(location_msis_county)]
d_agg$isoyearweek_nation[, location_code_original := fhidata::norway_locations_msis_to_fhidata(location_msis_nation)]

# redistricting
for(i in seq_along(d_agg)){
  # d_agg[[i]][, calyear := lubridate::year(date)]
  d_agg[[i]] <- merge(
    d_agg[[i]],
    fhidata::norway_locations_redistricting()[,-"granularity_geo"],
    by.x = c("location_code", "calyear"),
    by.y = c("location_code_original", "calyear"),
    all.x = TRUE
  )
}

# 5. Re-aggregate your data to different geographical levels to ensure that duplicates have now been removed
# this will also fix the redistricting/kommunesammenslaaing issues
for (i in seq_along(d_agg)) {
  d_agg[[i]] <- d_agg[[i]][, .(
    cases_n = round(sum(cases_n * weighting))
  ), keyby = .(
    location_code = location_code_current,
    date
  )]
}

d_agg[]

# 6. Pull out important dates
isoyearweek_min <- min(d_agg$isoyearweek_nation$isoyearweek)
isoyearweek_max <- max(d_agg$isoyearweek_nation$isoyearweek)

# 7. Create `multiskeleton`
# granularity_geo should have the following groups:
# - nodata (when no data is available, and there is no "finer" data available to aggregate up)
# - all levels of granularity_geo where you have data available
# If you do not have data for a specific granularity_geo, but there is "finer" data available
# then you should not include this granularity_geo in the multiskeleton, because you will create
# it later when you aggregate up your data (baregion)
multiskeleton_isoyearweek <- fhidata::make_skeleton(
  isoyearweek_min = isoyearweek_min,
  isoyearweek_max = isoyearweek_max,
  granularity_geo = list(
    "nodata" = c(
      "wardoslo",
      "extrawardoslo",
      "missingwardoslo",
      "wardbergen",
      "missingwardbergen",
      "wardstavanger",
      "missingwardstavanger"
    ),
    "municip" = c(
      "municip",
      "notmainlandmunicip",
      "missingmunicip"
    ),
    "county" = c(
      "county",
      "notmainlandcounty",
      "missingcounty"
    ),
    "nation" = c(
      "nation"
    )
  )
)

# 8. Merge in the information you have at different geographical granularities
# one level at a time
# municip
multiskeleton_isoyearweek$municip[
  d_agg$isoyearweek_municip,
  on = c("location_code", "isoyearweek"),
  cases_n := cases_n
]
multiskeleton_isoyearweek$municip[is.na(cases_n), cases_n := 0]

multiskeleton_isoyearweek$municip[]

# county
multiskeleton_isoyearweek$county[
  d_agg$isoyearweek_county,
  on = c("location_code", "isoyearweek"),
  cases_n := cases_n
]
multiskeleton_isoyearweek$county[is.na(cases_n), cases_n := 0]

multiskeleton_isoyearweek$county[]

# nation
multiskeleton_isoyearweek$nation[
  d_agg$isoyearweek_nation,
  on = c("location_code", "isoyearweek"),
  cases_n := cases_n
]
multiskeleton_isoyearweek$nation[is.na(cases_n), cases_n := 0]

multiskeleton_isoyearweek$nation[]

# 9. Aggregate up to higher geographical granularities
multiskeleton_isoyearweek$baregion <- multiskeleton_isoyearweek$municip[
  fhidata::norway_locations_hierarchy(
    from = "municip",
    to = "baregion"
  ),
  on = c(
    "location_code==from_code"
  )
][,
  .(
    cases_n = sum(cases_n),
    granularity_geo = "baregion"
  ),
  by = .(
    granularity_time,
    date,
    location_code = to_code
  )
]

multiskeleton_isoyearweek$baregion[]

# combine all the different granularity_geos
skeleton_isoyearweek <- rbindlist(multiskeleton_isoyearweek, fill = TRUE, use.names = TRUE)

skeleton_isoyearweek[]

# 10. (If desirable) aggregate up to higher time granularities
# if necessary, it is now easy to aggregate up to isoyear data from here
skeleton_isoyear <- copy(skeleton_isoyearweek)
skeleton_isoyear[, isoyear := stringr::str_extract(isoyearweek, "^[0-9][0-9][0-9][0-9]")]
skeleton_isoyear <- skeleton_isoyearweek[
  ,
  .(
    cases_n = sum(cases_n),
    granularity_time = "isoyear"
  ),
  keyby = .(
    isoyear,
    granularity_geo,
    location_code
  )
]

skeleton_isoyear[]
'
  )
}


addin_make_skeleton_isoyear <- function(){
  require_namespace("rstudioapi")
  rstudioapi::insertText(
    '
# the data is loaded in
data <- copy(d_agg)

# 1. Create a variable (possibly a list) to hold the data
d_agg <- list()
d_agg$isoyear_municip <- copy(data$isoyear_municip)
d_agg$isoyear_county <- copy(data$isoyear_county)
d_agg$isoyear_nation <- copy(data$isoyear_nation)

# 2. Clean the data

# fixing location codes
# 3. Replace NAs as appropriate for MSIS location numbers
d_agg$isoyear_municip[is.na(location_msis_municip), location_msis_municip := 9999]
d_agg$isoyear_county[is.na(location_msis_county), location_msis_county := 99]

# 4. Convert MSIS location numbers to location_code"s using `fhidata::norway_locations_msis_to_fhidata`
# you may also do redistricting (kommunesammenslaaing) at this point (fhidata::norway_locations_redistricting())
d_agg$isoyear_municip[, location_code_original := fhidata::norway_locations_msis_to_fhidata(location_msis_municip)]
d_agg$isoyear_county[, location_code_original := fhidata::norway_locations_msis_to_fhidata(location_msis_county)]
d_agg$isoyear_nation[, location_code_original := fhidata::norway_locations_msis_to_fhidata(location_msis_nation)]

# redistricting
for(i in seq_along(d_agg)){
  # d_agg[[i]][, calyear := lubridate::year(date)]
  d_agg[[i]] <- merge(
    d_agg[[i]],
    fhidata::norway_locations_redistricting()[,-"granularity_geo"],
    by.x = c("location_code", "calyear"),
    by.y = c("location_code_original", "calyear"),
    all.x = TRUE
  )
}

# 5. Re-aggregate your data to different geographical levels to ensure that duplicates have now been removed
# this will also fix the redistricting/kommunesammenslaaing issues
for (i in seq_along(d_agg)) {
  d_agg[[i]] <- d_agg[[i]][, .(
    cases_n = round(sum(cases_n * weighting))
  ), keyby = .(
    location_code = location_code_current,
    date
  )]
}

d_agg[]

# 6. Pull out important dates
isoyear_min <- min(d_agg$isoyear_nation$isoyear)
isoyear_max <- max(d_agg$isoyear_nation$isoyear)

# 7. Create `multiskeleton`
# granularity_geo should have the following groups:
# - nodata (when no data is available, and there is no "finer" data available to aggregate up)
# - all levels of granularity_geo where you have data available
# If you do not have data for a specific granularity_geo, but there is "finer" data available
# then you should not include this granularity_geo in the multiskeleton, because you will create
# it later when you aggregate up your data (baregion)
multiskeleton_isoyear <- fhidata::make_skeleton(
  isoyear_min = isoyear_min,
  isoyear_max = isoyear_max,
  granularity_geo = list(
    "nodata" = c(
      "wardoslo",
      "extrawardoslo",
      "missingwardoslo",
      "wardbergen",
      "missingwardbergen",
      "wardstavanger",
      "missingwardstavanger"
    ),
    "municip" = c(
      "municip",
      "notmainlandmunicip",
      "missingmunicip"
    ),
    "county" = c(
      "county",
      "notmainlandcounty",
      "missingcounty"
    ),
    "nation" = c(
      "nation"
    )
  )
)

# 8. Merge in the information you have at different geographical granularities
# one level at a time
# municip
multiskeleton_isoyear$municip[
  d_agg$isoyear_municip,
  on = c("location_code", "isoyear"),
  cases_n := cases_n
]
multiskeleton_isoyear$municip[is.na(cases_n), cases_n := 0]

multiskeleton_isoyear$municip[]

# county
multiskeleton_isoyear$county[
  d_agg$isoyear_county,
  on = c("location_code", "isoyear"),
  cases_n := cases_n
]
multiskeleton_isoyear$county[is.na(cases_n), cases_n := 0]

multiskeleton_isoyear$county[]

# nation
multiskeleton_isoyear$nation[
  d_agg$isoyear_nation,
  on = c("location_code", "isoyear"),
  cases_n := cases_n
]
multiskeleton_isoyear$nation[is.na(cases_n), cases_n := 0]

multiskeleton_isoyear$nation[]

# 9. Aggregate up to higher geographical granularities
multiskeleton_isoyear$baregion <- multiskeleton_isoyear$municip[
  fhidata::norway_locations_hierarchy(
    from = "municip",
    to = "baregion"
  ),
  on = c(
    "location_code==from_code"
  )
][,
  .(
    cases_n = sum(cases_n),
    granularity_geo = "baregion"
  ),
  by = .(
    granularity_time,
    date,
    location_code = to_code
  )
]

multiskeleton_isoyear$baregion[]

# combine all the different granularity_geos
skeleton_isoyear <- rbindlist(multiskeleton_isoyear, fill = TRUE, use.names = TRUE)

skeleton_isoyear[]
'
  )
}

addin_make_skeleton_calyear <- function(){
  require_namespace("rstudioapi")
  rstudioapi::insertText(
    '
# the data is loaded in
data <- copy(d_agg)

# 1. Create a variable (possibly a list) to hold the data
d_agg <- list()
d_agg$calyear_municip <- copy(data$calyear_municip)
d_agg$calyear_county <- copy(data$calyear_county)
d_agg$calyear_nation <- copy(data$calyear_nation)

# 2. Clean the data

# fixing location codes
# 3. Replace NAs as appropriate for MSIS location numbers
d_agg$calyear_municip[is.na(location_msis_municip), location_msis_municip := 9999]
d_agg$calyear_county[is.na(location_msis_county), location_msis_county := 99]

# 4. Convert MSIS location numbers to location_code"s using `fhidata::norway_locations_msis_to_fhidata`
# you may also do redistricting (kommunesammenslaaing) at this point (fhidata::norway_locations_redistricting())
d_agg$calyear_municip[, location_code_original := fhidata::norway_locations_msis_to_fhidata(location_msis_municip)]
d_agg$calyear_county[, location_code_original := fhidata::norway_locations_msis_to_fhidata(location_msis_county)]
d_agg$calyear_nation[, location_code_original := fhidata::norway_locations_msis_to_fhidata(location_msis_nation)]

# redistricting
for(i in seq_along(d_agg)){
  # d_agg[[i]][, calyear := lubridate::year(date)]
  d_agg[[i]] <- merge(
    d_agg[[i]],
    fhidata::norway_locations_redistricting()[,-"granularity_geo"],
    by.x = c("location_code", "calyear"),
    by.y = c("location_code_original", "calyear"),
    all.x = TRUE
  )
}

# 5. Re-aggregate your data to different geographical levels to ensure that duplicates have now been removed
# this will also fix the redistricting/kommunesammenslaaing issues
for (i in seq_along(d_agg)) {
  d_agg[[i]] <- d_agg[[i]][, .(
    cases_n = round(sum(cases_n * weighting))
  ), keyby = .(
    location_code = location_code_current,
    date
  )]
}

d_agg[]

# 6. Pull out important dates
calyear_min <- min(d_agg$calyear_nation$calyear)
calyear_max <- max(d_agg$calyear_nation$calyear)

# 7. Create `multiskeleton`
# granularity_geo should have the following groups:
# - nodata (when no data is available, and there is no "finer" data available to aggregate up)
# - all levels of granularity_geo where you have data available
# If you do not have data for a specific granularity_geo, but there is "finer" data available
# then you should not include this granularity_geo in the multiskeleton, because you will create
# it later when you aggregate up your data (baregion)
multiskeleton_calyear <- fhidata::make_skeleton(
  calyear_min = calyear_min,
  calyear_max = calyear_max,
  granularity_geo = list(
    "nodata" = c(
      "wardoslo",
      "extrawardoslo",
      "missingwardoslo",
      "wardbergen",
      "missingwardbergen",
      "wardstavanger",
      "missingwardstavanger"
    ),
    "municip" = c(
      "municip",
      "notmainlandmunicip",
      "missingmunicip"
    ),
    "county" = c(
      "county",
      "notmainlandcounty",
      "missingcounty"
    ),
    "nation" = c(
      "nation"
    )
  )
)

# 8. Merge in the information you have at different geographical granularities
# one level at a time
# municip
multiskeleton_calyear$municip[
  d_agg$calyear_municip,
  on = c("location_code", "calyear"),
  cases_n := cases_n
]
multiskeleton_calyear$municip[is.na(cases_n), cases_n := 0]

multiskeleton_calyear$municip[]

# county
multiskeleton_calyear$county[
  d_agg$calyear_county,
  on = c("location_code", "calyear"),
  cases_n := cases_n
]
multiskeleton_calyear$county[is.na(cases_n), cases_n := 0]

multiskeleton_calyear$county[]

# nation
multiskeleton_calyear$nation[
  d_agg$calyear_nation,
  on = c("location_code", "calyear"),
  cases_n := cases_n
]
multiskeleton_calyear$nation[is.na(cases_n), cases_n := 0]

multiskeleton_calyear$nation[]

# 9. Aggregate up to higher geographical granularities
multiskeleton_calyear$baregion <- multiskeleton_calyear$municip[
  fhidata::norway_locations_hierarchy(
    from = "municip",
    to = "baregion"
  ),
  on = c(
    "location_code==from_code"
  )
][,
  .(
    cases_n = sum(cases_n),
    granularity_geo = "baregion"
  ),
  by = .(
    granularity_time,
    date,
    location_code = to_code
  )
]

multiskeleton_calyear$baregion[]

# combine all the different granularity_geos
skeleton_calyear <- rbindlist(multiskeleton_calyear, fill = TRUE, use.names = TRUE)

skeleton_calyear[]
'
  )
}
folkehelseinstituttet/fhidata documentation built on June 3, 2022, 2:49 p.m.