R/cdc_channel.R

Defines functions epi_observe_alert epi_plot_channel epi_join_channel epi_create_channel epi_adapt_timeserie

Documented in epi_adapt_timeserie epi_create_channel epi_join_channel epi_observe_alert epi_plot_channel

#' @title Create an Endemic Channel
#'
#' @description Exploratory Alarm Tool for Outbreak Detection.
#'
#' @describeIn epi_adapt_timeserie
#'
#' @param db_disease disease surveillance datasets
#' @param db_population estimated population for each adm area
#' @param var_admx administrative code r name as string
#' @param var_year year of agregated observations
#' @param var_week week of agregated observations
#' @param var_event_count number of events per week-year
#' @param var_population estimated population at that year
#'
#' @import dplyr
#' @import tidyr
#' @import broom
#' @import ggplot2
#'
#' @return canal endemico, union y grafico
#'
#' @export epi_adapt_timeserie
#' @export epi_create_channel
#' @export epi_join_channel
#' @export epi_plot_channel
#' @export epi_observe_alert
#'
#' @examples
#'
#' library(tidyverse)
#'
#' # import data -------------------------------------------------------------
#'
#' denv <-
#'   readr::read_csv("https://dengueforecasting.noaa.gov/Training/Iquitos_Training_Data.csv") %>%
#'   mutate(year = lubridate::year(week_start_date),
#'          epiweek = lubridate::epiweek(week_start_date)) %>%
#'   mutate(adm="iquitos") %>%
#'   # cases per season - replace wiht a dummy year
#'   mutate(year = str_replace(season,"(.+)/(.+)","\\1") %>% as.double())
#'
#' # denv %>% count(year,season,lag_year)
#'
#' denv %>% glimpse()
#'
#' # denv %>%
#' #   ggplot(aes(x = week_start_date,y = total_cases)) +
#' #   geom_col()
#'
#' popdb <-
#'   readr::read_csv("https://dengueforecasting.noaa.gov/PopulationData/Iquitos_Population_Data.csv") %>%
#'   janitor::clean_names() %>%
#'   mutate(adm="iquitos")
#'
#' popdb %>% glimpse()
#'
#' # popdb %>% count(year)
#' # denv %>% count(year)
#' # denv %>% left_join(popdb)
#'
#' # first, adapt ------------------------------------------------------------
#'
#' epi_adapted <-
#'   epi_adapt_timeserie(db_disease = denv,
#'                       db_population = popdb,
#'                       var_admx = adm,
#'                       # var_year = year, # must be a common variable between datasets
#'                       # var_week = epiweek,
#'                       var_year = year, # not working - need to create pseudo-years
#'                       var_week = season_week,
#'                       var_event_count = total_cases,
#'                       var_population = estimated_population)
#'
#' # second, filter ----------------------------------------------------------
#'
#' disease_now <- epi_adapted %>%
#'   filter(var_year==max(var_year))
#'
#' disease_pre <- epi_adapted %>%
#'   filter(var_year!=max(var_year))
#'
#' # third, create -----------------------------------------------------------
#'
#' disease_channel <-
#'   epi_create_channel(time_serie = disease_pre,
#'                      disease_name = "denv")
#'
#' disease_channel
#'
#' # fourth, ggplot it -------------------------------------------------------
#'
#' epi_join_channel(disease_channel = disease_channel,
#'                  disease_now = disease_now) %>%
#'   # ggplot
#'   epi_plot_channel() +
#'   labs(title = "Dengue virus Endemic Channel. Iquitos, Peru 2008/2009",
#'        caption = "Source: https://dengueforecasting.noaa.gov/",
#'        # x = "epiweeks",
#'        x = "Seasonal week",
#'        y = "Number of cases") +
#'   theme_bw()
#'
#'

epi_adapt_timeserie <- function(db_disease,
                                db_population,
                                var_admx,
                                var_year,
                                var_week,
                                var_event_count,
                                var_population) {
  db_disease_f <- db_disease %>%
    #adapt variables
    rename(var_admx={{var_admx}},
           var_year={{var_year}},
           var_week={{var_week}},
           var_event_count={{var_event_count}})
  db_population_f <- db_population %>%
    #adapt variables
    rename(var_population={{var_population}},
           var_admx={{var_admx}},
           var_year={{var_year}})

  parte_1_4 <-
    #Paso 1:  N° de casos  por var_week,  7 años
    db_disease_f %>%
    # complete missings
    complete(var_admx,
             var_week=full_seq(var_week,1),
             var_year=full_seq(var_year,1),
             fill = list(var_event_count=0)) %>%
    #Paso 1.2: unir con el tamaño poplacional por var_admx-anho
    left_join(db_population_f) %>%

    ##Paso 1.3: retirar ubigeos sin poplacion para ningun anho
    ##naniar::miss_var_summary()
    ##filter(is.na(var_population)) %>% count(var_admx,var_year) %>% count(n)
    #filter(!is.na(var_population)) %>%

    #Paso 2: Cálculo de tasas y suma 1 (facilitar transformación en 0 casos)
    mutate(tasa=var_event_count/var_population*100000+1,
           #Paso 3: Transformación logarítmica de las tasas
           log_tasa=log(tasa))

  return(parte_1_4)
}

#' @describeIn epi_adapt_timeserie create endemic channel
#' @inheritParams epi_adapt_timeserie
#' @param time_serie time serie
#' @param disease_name free code name of diseases
#' @param method specify the method. "gmean_1sd" is geometric mean w/ 1 standard deviation (default). "gmean_2sd" is gmean w/ 2 sd. "gmean_ci" is gmean w/ 95 percent confidence intervals.

epi_create_channel <- function(time_serie,
                               disease_name="disease_name",
                               method="gmean_1sd") {

  parte_1_4 <- time_serie %>%
    #Paso 3: agrupar
    group_by(var_admx, var_week)

  db_population_f <- time_serie %>%
    select(var_admx,var_year,var_population) %>%
    distinct()

  if(method=="gmean_1sd"){
    parte_final <-
      parte_1_4 %>%
      # Paso 4: Cálculo de medias, 1*DE y delimitacion del 68.26% de valores del log_tasas
      summarise(media_l=mean(log_tasa), sd_l=sd(log_tasa)) %>%
      mutate(lo_95_l=media_l-(1*sd_l), hi_95_l=media_l+(1*sd_l)) %>%
      ungroup()
  }

  if(method=="gmean_2sd"){
    parte_final <-
      parte_1_4 %>%
      # Paso 4: Cálculo de medias, 2*DE y delimitacion del 95.48% de valores del log_tasas
      summarise(media_l=mean(log_tasa), sd_l=sd(log_tasa)) %>%
      mutate(lo_95_l=media_l-(2*sd_l), hi_95_l=media_l+(2*sd_l)) %>%
      ungroup()
  }

  if(method=="gmean_ci"){
    # Paso 4: Cálculo de median e IC95% de log_tasas
    parte_final <-
      parte_1_4 %>%
      nest() %>%
      #ungroup() %>% unnest(cols = c(data))
      # #ISSUE tagged -> non-vairability stops t.test
      # count(log_tasa,sort = T) %>%
      # ungroup() %>% filter(n==7) %>%
      # count(var_admx,sort = T)
      # count(log_tasa)
      # filter(is.na(log_tasa)) %>% count(var_admx)
      # #SOLUTION: retirar var_week con puro cero o NA
      # #PLAN 01
      mutate(sum_tasa=map_dbl(.x = data,.f = ~sum(.$log_tasa),na.rm=TRUE)) %>% #arrange(sum_tasa)
      filter(sum_tasa>0) %>%
      # mutate(t_test=map(.x = data,.f = ~t.test(.$log_tasa))) %>%
      # mutate(tidied=map(t_test,broom::tidy)) %>%
      # unnest(cols = c(tidied))
      # NOT WORKED: non-vairability  maintained after filter of values higher than 0
      #PLAN 02
      mutate(t_test=map(.x = data,.f = ~lm(log_tasa ~ 1,data=.x))) %>%
      mutate(tidied=map(t_test,broom::tidy),
             tidy_ci=map(t_test,broom::confint_tidy)) %>%
      unnest(cols = c(tidied,tidy_ci)) %>%
      rename(media_l=estimate,
             lo_95_l=conf.low,
             hi_95_l=conf.high) %>%
      ungroup()
  }


  parte_final %>%
    # Paso 5: Transformación a unidades originales (de log_tasas a tasas), menos 1 (agregado arriba)
    mutate(media_t= exp(media_l)-1, #sd_t= exp(sd_l)-1,
           lo_95_t=exp(lo_95_l)-1, hi_95_t=exp(hi_95_l)-1) %>%
    # Paso 6: Transformación a de tasas a casos (esperados)
    left_join(db_population_f %>%
                group_by(var_admx) %>%
                summarise(media_pop=mean(var_population))%>%
                ungroup()) %>%
    mutate(median=media_t*media_pop/100000,
           low_95=lo_95_t*media_pop/100000,
           upp_95=hi_95_t*media_pop/100000) %>%
    # seleccionado solo las variables a usar
    select(var_admx, var_week, median, low_95, upp_95) %>%
    mutate(key={{disease_name}})%>%
    mutate(var_admx=as.factor(var_admx)) %>%
    return()

}

#' @describeIn epi_adapt_timeserie join preliminary steps
#' @inheritParams epi_adapt_timeserie
#' @param disease_channel salida de epi_*_mutate
#' @param disease_now nueva base de vigilancia
#' @param adm_columns dataframe with names of the administrative code or name strings

epi_join_channel <- function(disease_channel,
                             disease_now,
                             adm_columns=NULL) {
  #unir con canal
  out <- disease_channel %>%
    full_join(disease_now)
  if (!is.null(adm_columns)) {
    out <- out %>%
      left_join(
        adm_columns
      ) #%>%
    # mutate_at(.vars = vars(value,low_95,upp_95,median),replace_na,0)
  }
  return(out)
}

#' @describeIn epi_adapt_timeserie create ggplot
#' @inheritParams epi_adapt_timeserie
#' @param joined_channel base de datos unida
#' @param n_breaks number of breaks in x axis

epi_plot_channel <- function(joined_channel,n_breaks=10) {
  #unir con canal
  # plot_name <- joined_channel %>%
  #   select(dist,prov,dpto) %>%
  #   distinct() %>% as.character() %>% paste(collapse = ", ")
  joined_channel %>%
    group_by(var_admx) %>%
    mutate(new= max(c(var_event_count,upp_95),na.rm = T),
           #plot_name= str_c(dist,"\n",prov,"\n",dpto)
    ) %>%
    ungroup() %>%
    ggplot(aes(x = var_week, y = var_event_count#, fill=key
    )) +
    geom_area(aes(x=var_week, y=new), fill = "#981000", alpha=0.6, stat="identity")+
    geom_area(aes(x=var_week, y=upp_95), fill = "#fee76a", stat="identity")+
    geom_area(aes(x=var_week, y=median), fill = "#3e9e39",  stat="identity")+
    geom_area(aes(x=var_week, y=low_95), fill = "white", stat="identity") +
    geom_line(size = 0.8) +
    scale_x_continuous(breaks = scales::pretty_breaks(n = {{n_breaks}})) #+
  # xlab("semanas") + ylab("N° de casos") #+
  #labs(title = paste0("Distrito de ",plot_name))
}

#' @describeIn epi_adapt_timeserie create ggplot
#' @inheritParams epi_adapt_timeserie

epi_observe_alert <- function(joined_channel,threshold=upp_95,alert_distance=3) {
  joined_channel %>%
    filter(var_event_count>{{threshold}}) %>%
    group_by(var_admx) %>%
    mutate(difference_lag=var_week-lag(var_week),
           difference_lead=lead(var_week)-var_week) %>%
    ungroup() %>%
    rowwise() %>%
    mutate(difference_min=min(c_across(difference_lag:difference_lead))) %>%
    ungroup() %>%
    select(-difference_lag,-difference_lead) %>%
    filter(difference_min<alert_distance)
}
avallecam/epichannel documentation built on Dec. 26, 2020, 10 p.m.