R/linelist_tools.R

Defines functions linelist_from_case_counts generate_pseudo_linelist

Documented in generate_pseudo_linelist linelist_from_case_counts

#' Generate a sample linelist from the observed linelist and sampled linelists
#'
#' @param count_linelist Dataframe with two variables: date_report and daily_linelist. As generated by  `linelist_from_case_counts`.
#' @param observed_linelist Dataframe with two variables: date_report and daily_observed_linelist. As generated by `split_linelist_by_day``
#' @param merge_actual_onsets Logical, defaults to `TRUE`. Should linelist onset dates be used where available?
#' @return Dataframe with   two variables: date_report and date_onset
#' @export
#' @importFrom data.table setkey merge.data.table fifelse
#' @importFrom purrr map2
generate_pseudo_linelist <- function(count_linelist = NULL, observed_linelist = NULL,
                                     merge_actual_onsets = TRUE) {
  
  if (!is.null(observed_linelist)) {
    if (nrow(observed_linelist) > 0 & merge_actual_onsets) {
      data.table::setkey(count_linelist,date_report, n)
      data.table::setkey(observed_linelist,date_report, n)
      
      populated_linelist <- data.table::merge.data.table(count_linelist,observed_linelist, all.x = TRUE)
      populated_linelist <- populated_linelist[, date_onset := data.table::fifelse(is.na(date_onset.y),
                                                                                   date_onset.x,
                                                                                   date_onset.y)][
                                                                                     , list(date_onset,
                                                                                            date_report = date_report)]
      
      
    }else{
      populated_linelist <- NULL
    }
  }else{
    populated_linelist <- NULL
  }
  
  ## If observed data hasnt been used just return pseudo linelist alone
  if (is.null(populated_linelist)) {
    populated_linelist <- count_linelist[,list(date_onset, date_report)]
  }
  
  return(populated_linelist)
}



#' Sample a linelist from case counts and a reporting delay distribution
#'
#' @param cases Dataframe with two variables: confirm (numeric) and date_report (date).
#'
#' @return A linelist grouped by day as a `data.table` with two variables: date_report, and daily_observed_linelist
#' @export
#' @importFrom purrr map2
#' @importFrom lubridate days
#' @importFrom data.table copy .N
linelist_from_case_counts <- function(cases = NULL) {
  
  cases_linelist <- suppressWarnings(data.table::copy(cases)[confirm > 0,
                                                             .(date_report = rep(date,confirm),date_onset = as.Date(NA_character_)),
                                                             ][, n := 1:.N, date_report])
  return(cases_linelist)
}



#' Convert a linelist into a nested `data.table`` of linelists by day
#'
#' @param linelist Dataframe with the following variables date_onset_symptoms and date_confirmation
#'
#' @return A nested `data.table` with a linelist per day (daily_observed_linelist) variable containing date_onset and date_report and
#' a date_report variable
#' @export
#' @importFrom purrr map
#' @importFrom data.table .N
split_linelist_by_day <- function(linelist = NULL) {
  
  
  if (!all(is.na(linelist$date_onset_symptoms))) {
    linelist_by_day <- linelist[, list(date_onset = date_onset_symptoms, date_report = date_confirmation)
                                ][!is.na(date_onset)][, n := 1:.N, date_report]
  }else{
    linelist_by_day <- data.frame()
  }
  
  
  return(linelist_by_day)
}

#' Sample Onset Dates for Cases missing them
#'
#' @param linelist Dataframe with two variables: date_report and date_onset. As generated by  `generate_pseudo_linelist`.
#' @param earliest_allowed_onset A character string in the form of a date ("2020-01-01") indiciating the earliest
#' allowed onset.
#' @param delay_fn A sampling funtion that takes a single numeric argument and returns a vector of
#' numeric samples this long.
#' @return Dataframe with no missing data and two variables: date_report and date_onset
#' @export
#' @importFrom data.table fifelse
#' @importFrom purrr map2
#' @importFrom data.table copy
sample_delay <- function(linelist = NULL, delay_fn = NULL,
                         earliest_allowed_onset = NULL) {
  
  
  ## Sample onsets for all data at once
  ## If missing an actual onset then use the sampled version
  ## Select only onsets and report dates
  out <- data.table::copy(linelist)
  
  out <- out[, date_onset_sample := date_report - lubridate::days(delay_fn(.N)),][
    is.na(date_onset), date_onset := date_onset_sample][,list(date_onset, date_report)]
  
  if (!is.null(earliest_allowed_onset)) {
    out <- out[date_onset >= as.Date(earliest_allowed_onset)]
    
  }
  
  return(out)
}
epiforecasts/EpiNow documentation built on Oct. 26, 2020, 2:38 p.m.