R/utils.r

Defines functions pit_test_sample assess_incidence_forecast calculate_quantiles

Documented in assess_incidence_forecast calculate_quantiles pit_test_sample

##' Performs an Anderson-Darling test for uniformity for a randomised PIT histogram using predictive Monte-Carlo samples
##'
##' See Eqs. 1 and 2 in Czado, Geniting and Held (2009), Predictive Model Assessment for Count Data, Biometrics Vol. 65, No. 4 (Dec., 2009), pp. 1254-1261
##' @param y vector of data (length n)
##' @param dat nxN matrix of predictive samples, n (number of rows) being the number of data points and N (number of columns) the number of Monte Carlo samples
##' @param J the number of bins in the PIT histogram. If not given, will use the square root of n
##' @param N the number of tests to perform, each time re-randomising the PIT
##' @importFrom goftest ad.test
##' @return the n p-values from the tests
##' @author Sebastian Funk \email{sebastian.funk@lshtm.ac.uk}
##' @export
pit_test_sample <- function(y, dat, J, N=10) {
    if (missing(J)) J <- as.integer(round(sqrt(length(y))))
    f <- lapply(seq_along(y), function(i) ecdf(dat[i, ]))
    P_x <- vapply(seq_along(y), function(i) f[[i]](y[i]), .0)
    P_xm1 <- vapply(seq_along(y), function(i) f[[i]](y[i]-1), .0)
    pvalues <-
        replicate(N, ad.test(P_xm1 + runif(length(P_x)) * (P_x - P_xm1))$p.value)
    return(pvalues)
}

##' Determines sharpness of an incidence forecast with an Anderson-Darling test of the randomised PIT histogram.
##'
##' @inheritParams pit_test_sample
##' @return data frame with mean and standard deviation of the resulting p values
##' @author Sebastian Funk \email{sebastian.funk@lshtm.ac.uk}
##' @seealso pit_test_sample
##' @param ... other arguments for \code{\link{pit_test_sample}}
calibration_sample <- function (y, dat, ...) {
    pvalues <- pit_test_sample(y, dat, ...)
    return(data.frame(mean=mean(pvalues), sd=sd(pvalues)))
}

##' Determines sharpness of an incidence forecast as the width of the prediction interval
##'
##' @return data frame with sharpness for each interval by date
##' @author Sebastian Funk \email{sebastian.funk@lshtm.ac.uk}
##' @param interval prediction interval(s) to use
sharpness_sample <- function (y, dat) {
    sharpness <- apply(dat, 1, mad)
    return(data.frame(date=as.Date(rownames(dat)), sharpness=sharpness))
}

##' Determines bias of an incidence forecast from predictive Monte-Carlo samples as the proportion of predictive samples greater than the data
##'
##' @return data frame with bias by date
##' @author Sebastian Funk \email{sebastian.funk@lshtm.ac.uk}
bias_sample <- function (y, dat) {
    f <- lapply(seq_along(y), function(i) ecdf(dat[i, ]))
    P_x <- vapply(seq_along(y), function(i) f[[i]](y[i]), .0)
    P_xm1 <- vapply(seq_along(y), function(i) f[[i]](y[i]-1), .0)
    res <- data.frame(date=as.Date(rownames(dat)),
                      bias=1-(P_x+P_xm1))
    return(res)
}

##' Determines the mean absolute error of an incidence forecast from predictive
##' Monte-Carlo samples as difference between the mean and the data
##'
##' @return data frame with mae by date
##' @author Sebastian Funk \email{sebastian.funk@lshtm.ac.uk}
ae_sample <- function (y, dat) {
    ae <- vapply(seq_along(y), function(i) {abs(median(dat[i, ])-y[i])}, .0)
    res <- data.frame(date=as.Date(rownames(dat)),
                      ae=ae)
    return(res)
}

##' Determines calibration of an incidence forecast with an Anderson-Darling test of the randomised PIT histogram.
##'
##' @param x a data frame with 'date', 'last_obs', (observed) 'incidence',  (predicted) 'cases'and 'sample_id' columns
##' @param func function for forecast assessment
##' @return mean and standard deviation of the resulting p values
##' @author Sebastian Funk \email{sebastian.funk@lshtm.ac.uk}
##' @seealso calibration sharpness bias
apply_forecast_metric <- function (x, func, ...) {
    ## remove any NAs
    y <- x %>%
        group_by(last_obs) %>%
        summarise(incidence=unique(incidence)) %>%
        .$incidence
    dat <- x %>%
        select(-incidence, -date) %>%
        spread(sample_id, cases) %>%
        select(-last_obs) %>%
        as.matrix
    colnames(dat) <- as.character(unique(x$sample_id))
    rownames(dat) <- as.character(unique(x$date))
    return(func(y, dat, ...))
}

##' Assesses incidence forecasts for the 2014--15 Ebola epidemic in Western Area in Sierra Leone from Monte-Carlo samples
##'
##' @param x a forecast data frame with columns 'date', 'last_obs', 'incidence', 'cases' and 'sample_id', and possibly more columns (which will be grouped by)
##' @inheritParams apply_forecast_metric
##' @importFrom dplyr %>% filter left_join group_by_at vars select mutate
##' @importFrom tidyr gather nest unnest spread
##' @importFrom tibble enframe
##' @importFrom purrr map
##' @return data frame with (random) PIT p-values
##' @author Sebastian Funk \email{sebastian.funk@lshtm.ac.uk}
assess_incidence_forecast <- function(x, func, ...) {
    return(x %>%
           filter(date > last_obs) %>%
           left_join(ebola_wa, by="date") %>%
           filter(!is.na(incidence)) %>%
           mutate(horizon=as.integer((date-last_obs)/7)) %>%
           group_by_at(vars(-cases, -date, -last_obs, -sample_id, -incidence)) %>%
           nest %>%
           mutate(score=map(data, apply_forecast_metric, func, ...)) %>%
           unnest(score) %>%
           ungroup())
}

##' Calculate quantiles for a data frame
##'
##' am x a data frame with a 'value' and a 'sample_id' column
##' @param quantiles the quantiles to compute
##' @return data frame with quantiles
##' @author Sebastian Funk \email{sebastian.funk@lshtm.ac.uk}
##' @importFrom dplyr %>% select group_by_at summarise
##' @importFrom tidyr unnest spread
##' @importFrom tibble enframe
##' @importFrom purrr map
calculate_quantiles <- function(x, quantiles)
{
    return (x %>%
            select(-sample_id) %>%
            group_by_at(vars(-value)) %>%
            summarise(quantiles=list(enframe(quantile(value, quantiles)))) %>%
            unnest(quantiles) %>%
            spread(name, value) %>%
            ungroup())
}
sbfnk/ebola.forecast.wa.sl documentation built on Feb. 18, 2020, 6:19 p.m.