R/llanalysis.R

Defines functions cmpll_bydaterange cmpll_bycounty scendiff ll_bydaterange cutdate ll_bydate ll_bycounty

Documented in cmpll_bycounty cmpll_bydaterange cutdate ll_bycounty ll_bydate ll_bydaterange

#' Functions for analyzing data contributions to the likelihood
#' 
#' These functions all take the table produced by the \code{waicmode} of 
#' \code{\link{gen_likelihood}} and summarize it by different groupings.
#' This allows us to see whether certain counties or certain time periods
#' are having an outsized contribution to the overall likelihood.
#' 
#' To use these functions, run the parameters through a likelihood function
#' generated by \code{\link{gen_likelihood}} with \code{waicmode = TRUE}.  Then
#' run the parameters you wish to investigate through that likelihood function
#' and pass the result as \code{lldetail}.
#' 
#' @importFrom dplyr %>%
#' @param lldetail Data frame of detailed log-likelihood contributions.
#' @param lldetail2 Data frame of detailed log-likelihood contributions for a 
#' comparison scenario.
#' @param n Number of ranges to divide dates into
#' @name llanalysis
NULL


#' @describeIn llanalysis Report log-likelihood contributions by county
#' 
#' @export
ll_bycounty <- function(lldetail)
{
  ## suppress warnings
  fips <- locality <- logl <- NULL
  dplyr::group_by(lldetail, fips, locality) %>%
    dplyr::summarise(logl = sum(logl)) %>%
    dplyr::ungroup() %>%
    dplyr::arrange(logl)
}

#' @describeIn llanalysis Report log-likelihood contributions by date
#' @export
ll_bydate <- function(lldetail)
{
  ## suppress warnings
  date <- logl <- NULL
  dplyr::group_by(lldetail, date) %>%
    dplyr::summarise(logl = sum(logl)) %>%
    dplyr::ungroup() %>%
    dplyr::arrange(logl)
}

#' Convert vector of dates into a factor of date ranges
#' 
#' Convert the date vector into a numeric vector, then apply \code{\link[base]{cut}},
#' and rename the levels of the factor to reflect the date ranges included.
#' 
#' @param dates Vector of dates
#' @param breaks Number of date ranges to split into
#' @export
cutdate <- function(dates, breaks)
{
  t <- as.numeric(dates - as.Date('2020-01-01'))
  tr <- cut(t, breaks=breaks)
  lvals <- 
    sapply(unique(tr),
    function(lvl) {
      lvldates <- dates[tr==lvl]
      d1 <- min(lvldates)
      d2 <- max(lvldates)
      paste(d1,'--',d2)  
    }
    )
  ordered(tr, labels=lvals)
}

#' @describeIn llanalysis Report log-likelihood contributions by date ranges
#' @export
ll_bydaterange <- function(lldetail, n = 5)
{
  ## silence warnings
  logl <- daterange <- NULL
  
  lldetail$daterange <- cutdate(lldetail$date, breaks=n)
  dplyr::group_by(lldetail, daterange) %>%
    dplyr::summarise(logl=sum(logl)) %>%
    dplyr::ungroup() %>%
    dplyr::arrange(logl)
}

## helper function for comparison analyses
scendiff <- function(lldetail, lldetail2) {
  ## if the second model is the better fit, then multiply by -1 so the differences
  ## are (mostly) positive
  if(sum(lldetail$logl) > sum(lldetail2$logl)) {
    fac <- 1
  }
  else {
    fac <- -1
  }
  fac * (lldetail$logl - lldetail2$logl)
}

#' @describeIn llanalysis Compare log-likelihood contributions by county for two scenarios.
#' 
#' @export
cmpll_bycounty <- function(lldetail, lldetail2)
{
  ## suppress warnings
  fips <- locality <- logldiff <- absdiff <- NULL
  
  lldetail$logldiff <- scendiff(lldetail, lldetail2)
  dplyr::group_by(lldetail, fips, locality) %>%
    dplyr::summarise(logldiff = sum(logldiff)) %>%
    dplyr::ungroup() %>%
    dplyr::mutate(absdiff = abs(logldiff)) %>%
    dplyr::arrange(dplyr::desc(absdiff)) %>%
    dplyr::select(fips, locality, logldiff)
}

#' @describeIn llanalysis Compare log-likelihood contributions by date range for two scenarios
#' @export
cmpll_bydaterange <- function(lldetail, lldetail2, n=5)
{
  ## silence warnings
  daterange <- logldiff <- absdiff <- NULL
  
  lldetail$logldiff <- scendiff(lldetail, lldetail2)
  lldetail$daterange <- cutdate(lldetail$date, n)
  dplyr::group_by(lldetail, daterange) %>%
    dplyr::summarise(logldiff = sum(logldiff)) %>%
    dplyr::ungroup() %>%
    dplyr::mutate(absdiff = abs(logldiff)) %>%
    dplyr::arrange(dplyr::desc(absdiff)) %>%
    dplyr::select(daterange, logldiff)
}
rplzzz/CovMitigation documentation built on June 7, 2021, 8:48 a.m.