R/r6_amort.R

Defines functions amort_get_fits amort_upload_rrs amort_results

#' amort
#' xxx
#' @import R6
#' @export amort
amort <- R6::R6Class(
  "amort",
  portable = FALSE,
  cloneable = FALSE,
  list(
    #' @description
    #' Say hi.
    run_all = function() {
      # check to see if it can run
      if (!fd::exists_rundate("normomo")) {
        return()
      }
      if (!fd::exists_rundate("sykdomspuls")) {
        return()
      }
      if (!fd::exists_rundate("weather")) {
        return()
      }

      rundate <- fd::get_rundate()

      # determine if it should run
      run <- TRUE
      if (fd::exists_rundate("brain_amort")) {
        max_source_date <- max(rundate[package %in% c("normomo", "sykdomspuls")]$date_extraction)
        if (rundate[package == "brain_amort"]$date_extraction >= max_source_date) run <- FALSE
      }
      # need to be from the same week
      if (fhi::isoweek_n(rundate[package == "normomo"]$date_results) != fhi::isoweek_n(rundate[package == "sykdomspuls"]$date_results)) run <- FALSE

      if (!run & fd::config$is_production) {
        return()
      }

      fd::msg("Brain amort - creating/uploading RRs", slack = T)
      fd::drop_table("brain_amort_rr")
      amort_upload_rrs()

      fd::drop_table("brain_amort_results")
      amort_results(train_year_max = 2015)
      years <- seq(2016, fhi::isoyear_n(), 1)
      for (i in years) {
        fd::msg(glue::glue("Brain amort - estimating numbers {i}"))
        amort_results(
          train_year_max = i,
          pred_year_max = i,
          pred_year_min = i
        )
      }

      date_extraction <- max(
        rundate[package == "normomo"]$date_extraction,
        rundate[package == "sykdomspuls"]$date_extraction
      )

      date_results <- max(
        rundate[package == "normomo"]$date_results,
        rundate[package == "sykdomspuls"]$date_results
      )

      # update rundate
      fd::update_rundate(
        package = "brain_amort",
        date_extraction = date_extraction,
        date_results = date_results,
        date_run = lubridate::today()
      )

      fd::msg("Brain amort - done", slack = T)
    }
  )
)

amort_get_fits <- function(
                           year_max = fhi::isoyear_n(),
                           year_min = year_max - 4) {
  weather <- fd::get_weather(impute_missing = TRUE)

  virology <- readxl::read_excel(system.file("extdata", "influenza.xlsx", package = "brain"))
  setDT(virology)

  locs <- unique(c("norge", fd::norway_locations()$county_code))
  fits <- vector("list", length = length(locs))
  for (i in seq_along(locs)) {
    loc <- locs[i]
    mem <- fd::tbl("spuls_mem_results") %>%
      dplyr::filter(tag == "influensa") %>%
      dplyr::filter(location_code == !!loc) %>%
      dplyr::collect() %>%
      fd::latin1_to_utf8()

    ils <- data.table(date = seq.Date(min(mem$date), max(mem$date), 1))
    ils[mem, on = "date", ils := rate]
    ils[, ils := zoo::na.locf(ils, fromLast = T)]
    ils[, season := fhi::season(fhi::isoyearweek(date), start_week = 40)]

    # assign the new data
    ils[virology, on = "season", A_H1N1 := A_H1N1]
    ils[virology, on = "season", A_H3N2 := A_H3N2]
    ils[virology, on = "season", B_victoria := B_victoria]
    ils[virology, on = "season", B_yamagata := B_yamagata]
    ils[virology, on = "season", B := B_victoria + B_yamagata]

    # fill down missing (i.e. if new season starts) and multiply by ils
    ils[, A_H1N1 := ils * zoo::na.locf(A_H1N1, fromLast = T)]
    ils[, A_H3N2 := ils * zoo::na.locf(A_H3N2, fromLast = T)]
    ils[, B_victoria := ils * zoo::na.locf(B_victoria, fromLast = T)]
    ils[, B_yamagata := ils * zoo::na.locf(B_yamagata, fromLast = T)]
    ils[, B := ils * zoo::na.locf(B, fromLast = T)]

    d <- fd::tbl("normomo_daily_data") %>%
      dplyr::filter(location_code == !!loc) %>%
      dplyr::filter(age == "Total") %>%
      dplyr::collect() %>%
      fd::latin1_to_utf8()

    dates <- intersect(weather$date, d$date)
    dates <- intersect(dates, ils$date)
    dates <- dates[fhi::isoyear_n(as.Date(dates, origin = "1970-01-01")) %in% year_min:year_max]

    w <- weather[date %in% dates & location_code == loc]
    d <- d[date %in% dates]
    ils <- ils[date %in% dates]
    dates <- sort(dates)
    dates <- as.Date(dates, origin = "1970-01-01")

    outcome <- d$nbc
    temp <- w$tx
    A_H1N1 <- ils$A_H1N1 * 100
    A_H3N2 <- ils$A_H3N2 * 100
    B <- ils$B * 100

    fits[[i]] <- attrib::fit_attrib(
      dates = dates,
      outcome = outcome,
      exposure_values = list(
        "tx" = temp,
        "A_H1N1_per10000" = A_H1N1,
        "A_H3N2_per10000" = A_H3N2,
        "B_per10000" = B
      ),
      exposure_types = list(
        "tx" = "cubic",
        "A_H1N1_per10000" = "linear",
        "A_H3N2_per10000" = "linear",
        "B_per10000" = "linear"
      ),
      exposure_knots = list(
        "tx" = c(-5, 20)
      ),
      exposure_boundary_knots = list(
        "tx" = c(-25, 35)
      )
    )
  }

  x <- attrib::create_blup(
    fits[-1]
  )

  return(list(
    location_codes = locs,
    dates = dates,
    norge = fits[[1]],
    counties = x
  ))
}

amort_upload_rrs <- function(
                             year_max = fhi::isoyear_n(),
                             year_min = year_max - 4) {
  x <- amort_get_fits(
    year_max = year_max,
    year_min = year_min
  )

  brain_amort_rr_field_types <- c(
    "location_code" = "TEXT",
    "age" = "TEXT",
    "year_train_min" = "INTEGER",
    "year_train_max" = "INTEGER",
    "exposure" = "TEXT",
    "exposure_value" = "INTEGER",
    "rr_est" = "DOUBLE",
    "rr_l95" = "DOUBLE",
    "rr_u95" = "DOUBLE"
  )

  brain_amort_rr_keys <- c(
    "location_code",
    "age",
    "year_train_min",
    "year_train_max",
    "exposure",
    "exposure_value"
  )

  rr_x <- fd::schema$new(
    db_config = fd::config$db_config,
    db_table = glue::glue("brain_amort_rr"),
    db_field_types = brain_amort_rr_field_types,
    db_load_folder = "/xtmp/",
    keys = brain_amort_rr_keys,
    check_fields_match = TRUE
  )
  rr_x$db_connect()

  year_train_min <- fhi::isoyear_n(min(x$dates))
  year_train_max <- fhi::isoyear_n(max(x$dates))
  age <- "Totalt"

  for (i in 1:(length(x$counties) + 1)) {
    if (i == 1) {
      attrib_small <- x$norge$attrib_fixed
    } else {
      attrib_small <- x$counties[[i - 1]]$attrib_blup
    }
    for (ex in names(attrib_small$pred)) {
      exposure <- ex
      exposure_value <- as.numeric(names(attrib_small$pred[[ex]]$allRRfit))
      rr_est <- as.numeric(attrib_small$pred[[ex]]$allRRfit)
      rr_l95 <- as.numeric(attrib_small$pred[[ex]]$allRRlow)
      rr_u95 <- as.numeric(attrib_small$pred[[ex]]$allRRhigh)

      upload <- data.table(
        location_code = x$location_codes[i],
        age = age,
        year_train_min = year_train_min,
        year_train_max = year_train_max,
        exposure = exposure,
        exposure_value = exposure_value,
        rr_est = rr_est,
        rr_l95 = rr_l95,
        rr_u95 = rr_u95
      )

      rr_x$db_upsert_load_data_infile(upload)
    }
  }
}


amort_results <- function(
                          train_year_max = fhi::isoyear_n(),
                          train_year_min = train_year_max - 4,
                          pred_year_max = train_year_max,
                          pred_year_min = train_year_max - 4) {
  brain_amort_results_field_types <- c(
    "granularity_time" = "TEXT",
    "granularity_geo" = "TEXT",
    "location_code" = "TEXT",
    "age" = "TEXT",
    "season" = "TEXT",
    "yrwk" = "TEXT",
    "date" = "DATE",
    "exposure" = "TEXT",
    "exposure_value" = "TEXT",
    "attr_est" = "DOUBLE",
    "attr_low" = "DOUBLE",
    "attr_high" = "DOUBLE"
  )

  brain_amort_results_keys <- c(
    "granularity_time",
    "granularity_geo",
    "location_code",
    "age",
    "season",
    "yrwk",
    "date",
    "exposure",
    "exposure_value"
  )

  results_x <- fd::schema$new(
    db_config = fd::config$db_config,
    db_table = glue::glue("brain_amort_results"),
    db_field_types = brain_amort_results_field_types,
    db_load_folder = "/xtmp/",
    keys = brain_amort_results_keys,
    check_fields_match = TRUE
  )
  results_x$db_connect()

  x <- amort_get_fits(
    year_max = train_year_max,
    year_min = train_year_min
  )

  # hot summers
  index_summers <- fhi::isoweek_n(x$dates) %in% 21:39 & fhi::isoyear_n(x$dates) %in% pred_year_min:pred_year_max
  number_summers <- which(index_summers)
  seasons <- fhi::isoyear_c(x$dates)[index_summers]
  summers <- split(number_summers, seasons)

  a <- attrib::get_attrib(x$counties, use_blup = T, tag = "tx", range = c(25, 100), sub = summers)
  if (!is.null(a)) {
    a$granularity_time <- "seasonal"
    a$granularity_geo <- "national"
    a$location_code <- "norge"
    a$age <- "Totalt"
    a$season <- names(summers)
    a$yrwk <- glue::glue("{a$season}-21")
    a[fhidata::days, on = "yrwk", date := sun]
    a$exposure <- "tx"
    a$exposure_value <- "hot"

    results_x$db_upsert_load_data_infile(a)
  }

  # winter dates
  if (pred_year_min > train_year_min) {
    index_winters <- !fhi::isoweek_n(x$dates) %in% 21:39 & fhi::isoyear_n(x$dates) %in% (pred_year_min - 1):pred_year_max
  } else {
    index_winters <- !fhi::isoweek_n(x$dates) %in% 21:39 & fhi::isoyear_n(x$dates) %in% pred_year_min:pred_year_max
  }
  number_winters <- which(index_winters)
  seasons <- fhi::season(fhi::isoyearweek(x$dates), start_week = 40)[index_winters]
  winters <- split(number_winters, seasons)[-1]


  # winters - cold
  a <- attrib::get_attrib(x$counties, use_blup = T, tag = "tx", range = c(-100, -5), sub = winters)
  if (!is.null(a)) {
    a$granularity_time <- "seasonal"
    a$granularity_geo <- "national"
    a$location_code <- "norge"
    a$age <- "Totalt"
    a$season <- names(winters)
    a$yrwk <- glue::glue("{stringr::str_sub(a$season,1,4)}-40")
    a[fhidata::days, on = "yrwk", date := sun]
    a$exposure <- "tx"
    a$exposure_value <- "cold"

    results_x$db_upsert_load_data_infile(a)
  }

  # winters - A_H1N1_per1000
  a <- attrib::get_attrib(x$counties, use_blup = T, tag = "A_H1N1_per10000", range = c(1, 1000), sub = winters)
  if (!is.null(a)) {
    a$granularity_time <- "seasonal"
    a$granularity_geo <- "national"
    a$location_code <- "norge"
    a$age <- "Totalt"
    a$season <- names(winters)
    a$yrwk <- glue::glue("{stringr::str_sub(a$season,1,4)}-40")
    a[fhidata::days, on = "yrwk", date := sun]
    a$exposure <- "A_H1N1_per1000"
    a$exposure_value <- "any"

    results_x$db_upsert_load_data_infile(a)
  }

  # winters - A_H3N2_per1000
  a <- attrib::get_attrib(x$counties, use_blup = T, tag = "A_H3N2_per10000", range = c(1, 1000), sub = winters)
  if (!is.null(a)) {
    a$granularity_time <- "seasonal"
    a$granularity_geo <- "national"
    a$location_code <- "norge"
    a$age <- "Totalt"
    a$season <- names(winters)
    a$yrwk <- glue::glue("{stringr::str_sub(a$season,1,4)}-40")
    a[fhidata::days, on = "yrwk", date := sun]
    a$exposure <- "A_H3N2_per1000"
    a$exposure_value <- "any"

    results_x$db_upsert_load_data_infile(a)
  }

  # winters - B_per1000
  a <- attrib::get_attrib(x$counties, use_blup = T, tag = "B_per10000", range = c(1, 1000), sub = winters)
  if (!is.null(a)) {
    a$granularity_time <- "seasonal"
    a$granularity_geo <- "national"
    a$location_code <- "norge"
    a$age <- "Totalt"
    a$season <- names(winters)
    a$yrwk <- glue::glue("{stringr::str_sub(a$season,1,4)}-40")
    a[fhidata::days, on = "yrwk", date := sun]
    a$exposure <- "B_per1000"
    a$exposure_value <- "any"

    results_x$db_upsert_load_data_infile(a)
  }

  # winters - all ili
  a <- attrib::get_attrib(
    x$counties,
    use_blup = T,
    tag = c("A_H1N1_per10000", "A_H3N2_per10000", "B_per10000"),
    range = c(1, 1000),
    sub = winters
  )
  if (!is.null(a)) {
    a$granularity_time <- "seasonal"
    a$granularity_geo <- "national"
    a$location_code <- "norge"
    a$age <- "Totalt"
    a$season <- names(winters)
    a$yrwk <- glue::glue("{stringr::str_sub(a$season,1,4)}-40")
    a[fhidata::days, on = "yrwk", date := sun]
    a$exposure <- "ili_per10000"
    a$exposure_value <- "any"

    results_x$db_upsert_load_data_infile(a)
  }
}
folkehelseinstituttet/dashboards_brain documentation built on March 19, 2020, 4:46 a.m.