#' Functions for analyzing data contributions to the likelihood
#'
#' These functions all take the table produced by the \code{waicmode} of
#' \code{\link{gen_likelihood}} and summarize it by different groupings.
#' This allows us to see whether certain counties or certain time periods
#' are having an outsized contribution to the overall likelihood.
#'
#' To use these functions, run the parameters through a likelihood function
#' generated by \code{\link{gen_likelihood}} with \code{waicmode = TRUE}. Then
#' run the parameters you wish to investigate through that likelihood function
#' and pass the result as \code{lldetail}.
#'
#' @importFrom dplyr %>%
#' @param lldetail Data frame of detailed log-likelihood contributions.
#' @param lldetail2 Data frame of detailed log-likelihood contributions for a
#' comparison scenario.
#' @param n Number of ranges to divide dates into
#' @name llanalysis
NULL
#' @describeIn llanalysis Report log-likelihood contributions by county
#'
#' @export
ll_bycounty <- function(lldetail)
{
## suppress warnings
fips <- locality <- logl <- NULL
dplyr::group_by(lldetail, fips, locality) %>%
dplyr::summarise(logl = sum(logl)) %>%
dplyr::ungroup() %>%
dplyr::arrange(logl)
}
#' @describeIn llanalysis Report log-likelihood contributions by date
#' @export
ll_bydate <- function(lldetail)
{
## suppress warnings
date <- logl <- NULL
dplyr::group_by(lldetail, date) %>%
dplyr::summarise(logl = sum(logl)) %>%
dplyr::ungroup() %>%
dplyr::arrange(logl)
}
#' Convert vector of dates into a factor of date ranges
#'
#' Convert the date vector into a numeric vector, then apply \code{\link[base]{cut}},
#' and rename the levels of the factor to reflect the date ranges included.
#'
#' @param dates Vector of dates
#' @param breaks Number of date ranges to split into
#' @export
cutdate <- function(dates, breaks)
{
t <- as.numeric(dates - as.Date('2020-01-01'))
tr <- cut(t, breaks=breaks)
lvals <-
sapply(unique(tr),
function(lvl) {
lvldates <- dates[tr==lvl]
d1 <- min(lvldates)
d2 <- max(lvldates)
paste(d1,'--',d2)
}
)
ordered(tr, labels=lvals)
}
#' @describeIn llanalysis Report log-likelihood contributions by date ranges
#' @export
ll_bydaterange <- function(lldetail, n = 5)
{
## silence warnings
logl <- daterange <- NULL
lldetail$daterange <- cutdate(lldetail$date, breaks=n)
dplyr::group_by(lldetail, daterange) %>%
dplyr::summarise(logl=sum(logl)) %>%
dplyr::ungroup() %>%
dplyr::arrange(logl)
}
## helper function for comparison analyses
scendiff <- function(lldetail, lldetail2) {
## if the second model is the better fit, then multiply by -1 so the differences
## are (mostly) positive
if(sum(lldetail$logl) > sum(lldetail2$logl)) {
fac <- 1
}
else {
fac <- -1
}
fac * (lldetail$logl - lldetail2$logl)
}
#' @describeIn llanalysis Compare log-likelihood contributions by county for two scenarios.
#'
#' @export
cmpll_bycounty <- function(lldetail, lldetail2)
{
## suppress warnings
fips <- locality <- logldiff <- absdiff <- NULL
lldetail$logldiff <- scendiff(lldetail, lldetail2)
dplyr::group_by(lldetail, fips, locality) %>%
dplyr::summarise(logldiff = sum(logldiff)) %>%
dplyr::ungroup() %>%
dplyr::mutate(absdiff = abs(logldiff)) %>%
dplyr::arrange(dplyr::desc(absdiff)) %>%
dplyr::select(fips, locality, logldiff)
}
#' @describeIn llanalysis Compare log-likelihood contributions by date range for two scenarios
#' @export
cmpll_bydaterange <- function(lldetail, lldetail2, n=5)
{
## silence warnings
daterange <- logldiff <- absdiff <- NULL
lldetail$logldiff <- scendiff(lldetail, lldetail2)
lldetail$daterange <- cutdate(lldetail$date, n)
dplyr::group_by(lldetail, daterange) %>%
dplyr::summarise(logldiff = sum(logldiff)) %>%
dplyr::ungroup() %>%
dplyr::mutate(absdiff = abs(logldiff)) %>%
dplyr::arrange(dplyr::desc(absdiff)) %>%
dplyr::select(daterange, logldiff)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.