R/epidataCS_aggregate.R

################################################################################
### Convert "epidataCS" to the (aggregated) classes "epidata" or "sts"
###
### Copyright (C) 2009-2016,2018 Sebastian Meyer
###
### This file is part of the R package "surveillance",
### free software under the terms of the GNU General Public License, version 2,
### a copy of which is available at https://www.R-project.org/Licenses/.
################################################################################


######################################
### Transform "epidataCS" to "epidata"
######################################

## CAVE: this only generates a SIS epidemic, i.e. atRiskY is set to 1
##       immediately after recovery
## length of infectious period is taken from epidataCS$events$eps.t
## fcols are not generated here. these must be generated by a second call to
## twinSIR's as.epidata with desired f. (for safety)
## tileCentroids is a coordinate matrix whose row names are the tile levels

as.epidata.epidataCS <- function (data, tileCentroids, eps = 0.001, ...)
{
    if (!requireNamespace("intervals"))
        stop("conversion from ", dQuote("epidataCS"), " to ", dQuote("epidata"),
             " requires the ", sQuote("intervals"), " package")

    ### generate twinSIR's epidata object from stgrid (no events)
    centroidIdx <- match(levels(data$stgrid$tile), rownames(tileCentroids), nomatch = NA_integer_)
    if (anyNA(centroidIdx)) {
        stop("some levels of 'data$stgrid$tile' are not available from 'tileCentroids'")
    }
    centroids <- tileCentroids[centroidIdx,]
    if (any(c("xCent", "yCent") %in% names(data$stgrid))) {
        stop("'data$stgrid' already has columns \"xCent\" and \"yCent\"")
    }
    stgrid <- cbind(data$stgrid,
        atRiskY = 1L, event = 0L, Revent = 0L,
        xCent = centroids[,1], yCent = centroids[,2]
        # relies on ordering of stgrid by first BLOCK, then tile
    )
    names(stgrid)[names(stgrid)=="tile"] <- "id"
    timeRange <- with(stgrid, c(start[1], stop[length(stop)]))

    ### now determine "events" with respect to the tiles
    # individual data
    indItimes <- data$events$time
    if (anyDuplicated(indItimes)) stop("'data$events' has concurrent event times")
    indRtimes <- indItimes + data$events$eps.t
    indInts <- intervals::Intervals(cbind(indItimes, indRtimes, deparse.level = 0L))
    indTiles <- data$events$tile

    # tile data
    tileRows <- tapply(seq_along(indTiles), indTiles, c, simplify = FALSE)
    tileInts <- lapply(tileRows, function (rows) {
        if (length(rows)==0L) { matrix(0,0,2) } else if (length(rows)==1L) {
            as.matrix(indInts[rows])
        } else as.matrix(intervals::reduce(indInts[rows]))
    })
    tileNames <- rep.int(names(tileInts), sapply(tileInts, nrow))
    tileItimes <- unlist(lapply(tileInts, function(ints) ints[,1]), use.names=FALSE)
    tileRtimes <- unlist(lapply(tileInts, function(ints) ints[,2]), use.names=FALSE)

    # there are possibly Rtimes which equal Itimes of other individuals
    # => break ties by considering Rtime shortly before Itime (arbitrary choice)
    while(length(dup <- which(tileRtimes %in% tileItimes)) > 0L) {
        tileRtimes[dup] <- tileRtimes[dup] - eps
    }
    # now there could be duplicated Rtimes... grml (choose another 'eps' in this case)
    if (anyDuplicated(tileRtimes)) {
        stop("breaking ties introduced duplicated Rtimes")
    }

    ### add additional stop times to stgrid for tile infections and recoveries
    requiredStopTimes <- sort(c(tileItimes,tileRtimes[tileRtimes<timeRange[2]]))
    requiredStopTimes <- requiredStopTimes[requiredStopTimes > timeRange[1]]  # omit prehistory
    class(stgrid) <- c("epidata", "data.frame")
    attr(stgrid, "timeRange") <- timeRange
    cat("Inserting extra stop times in 'stgrid' (this might take a while) ... ")
    evHist <- intersperse(stgrid, requiredStopTimes, verbose=interactive())
                                        # CAVE: this resets the BLOCK index
    class(evHist) <- "data.frame"
    ### <- THIS IS THE MOST TIME-CONSUMING PART OF THIS FUNCTION !!!
    cat("Done.\n")

    ### set event, Revent and atRiskY indicators
    tileNamesCodes <- match(tileNames, levels(evHist$id))
    # event indicator (currently in evHist event==0 everywhere)
    idxItimes <- match(tileItimes, evHist$stop) - 1L + tileNamesCodes
    evHist$event[idxItimes] <- 1L
    # Revent indicator (currently in evHist Revent==0 everywhere)
    idxRtimes <- match(tileRtimes, evHist$stop) - 1L + tileNamesCodes  # (may contain NA's if Revent after last stop)
    evHist$Revent[idxRtimes] <- 1L
    # atRiskY indicator
    .atRiskY <- rep.int(1L, nrow(evHist))
    nTiles <- nlevels(evHist$id)
    nBlocks <- tail(evHist$BLOCK, 1)
    stopTimes <- unique(evHist$stop)  # has length nBlocks
    for (i in seq_along(tileItimes)) {
        .Itime <- tileItimes[i]
        .Rtime <- tileRtimes[i]
        if (.Rtime <= timeRange[1L]) next  # prehistory infection and removal
        .tileCode <- tileNamesCodes[i]
        idxsTileInEpi <- seq(.tileCode, by=nTiles, length.out=nBlocks)
        first0block <- if (.Itime < stopTimes[1L]) 1L else match(.Itime, stopTimes) + 1L
        last0block <- if (.Rtime > stopTimes[nBlocks]) nBlocks else match(.Rtime, stopTimes)
        .atRiskY[idxsTileInEpi[first0block:last0block]] <- 0L
    }
    evHist$atRiskY <- .atRiskY

    ### Return final epidata object of twinSIR-type
    cat("Generating final \"epidata\" object for use with twinSIR ... ")
    epi <- as.epidata(evHist[-grep("BLOCK", names(evHist))],
        id.col="id", start.col="start", stop.col="stop", atRiskY.col="atRiskY",
        event.col="event", Revent.col="Revent", coords.cols=c("xCent","yCent")
    )
    cat("Done.\n")
    epi
}



####################################################################
### Transform "epidataCS" to "sts" by aggregation of cases on stgrid
####################################################################

epidataCS2sts <- function (object, freq, start,
                           neighbourhood, tiles = NULL,
                           popcol.stgrid = NULL, popdensity = TRUE)
{
    stopifnot(inherits(object, "epidataCS"))
    tileLevels <- levels(object$stgrid$tile)
    if (!is.null(tiles)) {
        stopifnot(inherits(tiles, "SpatialPolygons"),
                  tileLevels %in% row.names(tiles))
        tiles <- tiles[tileLevels,]
    }

    ## prepare sts components
    blocks <- unique(object$stgrid$BLOCK) # epidataCS is sorted
    eventsByCell <- with(object$events@data,
                         table(BLOCK=factor(BLOCK, levels=blocks), tile))
    if (missing(neighbourhood)) { # auto-detect neighbourhood from tiles
        if (is.null(tiles))
            stop("'tiles' is required for auto-generation of 'neighbourhood'")
        neighbourhood <- poly2adjmat(tiles, zero.policy=TRUE)
        if (nIslands <- sum(rowSums(neighbourhood) == 0))
            message("Note: auto-generated neighbourhood matrix contains ",
                    nIslands, ngettext(nIslands, " island", " islands"))
    }
    populationFrac <- if (is.null(popcol.stgrid)) NULL else {
        stopifnot(is.vector(popcol.stgrid), length(popcol.stgrid) == 1)
        popByCell <- object$stgrid[[popcol.stgrid]]
        if (popdensity) popByCell <- popByCell * object$stgrid[["area"]]
        totalpop <- sum(popByCell[seq_along(tileLevels)])
        matrix(popByCell/totalpop,
               nrow=length(blocks), ncol=length(tileLevels),
               byrow=TRUE, dimnames=dimnames(eventsByCell))
    }

    ## initialize sts object (sts() constructor discards NULL slots)
    sts(frequency=freq, start=start, # epoch=seq_along(blocks) [default]
        ##do not set epoch=blocks as blocks[1] could be >1 (from simulation)
        observed=unclass(eventsByCell), neighbourhood=neighbourhood,
        populationFrac=populationFrac, map=tiles)
}

Try the surveillance package in your browser

Any scripts or data that you put into this service are public.

surveillance documentation built on Nov. 28, 2023, 8:04 p.m.