Nothing
#' @title Computes the likelihood cross-validation of the raster partitions
#'
#' @description The \code{lemXv} function computes the likelihood cross-validation scores for the observed data with the input bandwidths used for the smoothing matrix. The cross-validation test and training datasets of the observed cases are generated by k-fold sampling without replacement.
#'
#' @param cases Spatial polygons with case data
#' @param population Spatial polygons with population data
#' @param cellsCoarse Minimum resolution for rasterization of case data for numerical accuracy of smoothing matrix
#' @param cellsFine Minimum resolution for rasterization of population data for numerical integration of smoothing matrix
#' @param bw Vector of bandwidths
#' @param fact Aggregation factor prior to 'final step' smoothing (set to zero to skip final step)
#' @param xv Number of cross-validation datasets
#' @param lemObjects List of arrays for the smoothing matrix, and raster stacks for the partition and smoothed offsets
#' @param ncores Number of cores/threads for parallel processing
#' @param iterations List of convergence tolerance, number of iterations, and use of gpuR package for running local-EM recursions
#' @param randomSeed Seed for random number generator
#' @param path Folder for storing rasters
#' @param filename Filename (must have .grd extension) of the risk estimation
#' @param verbose Verbose output
#'
#' @details After using the \code{lemXv} function, a raster stack containing the IDs for the partitions is created by overlaying the spatial polygons of the case and population data. The smoothed offsets and smoothing matrix are computed for the specified bandwidths of each cross-validation set.
#'
#' @return The \code{lemXv} function returns a data frame of specified bandwidths and their cross-validation scores, and if specified, a raster of the risk estimation of the bandwidth with the lowest cross-validation score.
#'
#' @import raster sp Matrix
#' @useDynLib localEM
#' @export
lemXv = function(
cases,
population,
cellsCoarse,
cellsFine,
bw,
fact = 1,
xv = 4,
lemObjects,
ncores = 1,
iterations = list(tol = 1e-5, maxIter = 1000, gpu = FALSE),
randomSeed = NULL,
path = getwd(),
filename = 'riskXv.grd',
verbose = FALSE
){
dir.create(path, showWarnings = FALSE, recursive = TRUE)
if(!(length(grep("\\.gr[id]$", filename)))){
warning("filename should have .grd extension")
}
if(!length(grep('/', filename))) {
riskFile = file.path(path, filename)
}
# warning messages
if(missing(lemObjects)) {
if(class(cases) != 'SpatialPolygonsDataFrame') {
stop("spatial polygons for case data must be provided if smoothing matrix not supplied")
}
if(class(cases) == 'SpatialPolygonsDataFrame') {
#names of interest for regions in the coarse shapefile
countcol = grep('^(count|cases)', names(cases), value=TRUE, ignore.case=TRUE)
if(!length(countcol)) {
stop("column names of case data must start with 'count' or 'cases'")
}
}
}
# initializing parallel features (if specified)
if(ncores > 1) {
theCluster = parallel::makeCluster(spec=ncores, type='PSOCK', methods=TRUE)
parallel::setDefaultCluster(theCluster)
parallel::clusterEvalQ(theCluster, library('raster'))
} else {
theCluster = NULL
}
# for cross-validation scores
if(!is.null(randomSeed)) {
set.seed(randomSeed)
}
# parameter specifications (if not supplied)
if(!length(iterations$tol)) iterations$tol = 1e-6
if(!length(iterations$maxIter)) iterations$maxIter = 2000
if(!length(iterations$gpu)) iterations$gpu = FALSE
if(missing(lemObjects)) {
if(verbose) {
cat(date(), "\n")
cat("computing smoothing matrix\n")
}
##raster partition
xvLemRaster = rasterPartition(
polyCoarse = cases,
polyFine = population,
cellsCoarse = cellsCoarse,
cellsFine = cellsFine,
xv = xv,
bw = bw,
ncores = theCluster,
path = path,
idFile = 'idXv.grd',
offsetFile = 'offsetXv.grd',
verbose = verbose)
# restart cluster
if(ncores > 1 & !is.null(theCluster)) {
parallel::stopCluster(theCluster)
theCluster = parallel::makeCluster(spec=ncores, type='PSOCK', methods=TRUE)
parallel::setDefaultCluster(theCluster)
parallel::clusterEvalQ(theCluster, library('raster'))
}
# smoothing matrix
xvSmoothMat = smoothingMatrix(
rasterObjects = xvLemRaster,
ncores = theCluster,
path = path,
filename = 'smoothMatXv.grd',
verbose = verbose)
# save smoothing matrix, useful in case of failure later on
saveRDS(xvSmoothMat, file = file.path(path, 'smoothingMatrix.rds'))
# xvSmoothMat = readRDS(file.path(path, 'smoothingMatrix.rds'))
xvMat = xvSmoothMat$xv
} else {
if(verbose) {
cat(date(), "\n")
cat("using supplied smoothing matrix\n")
}
xvSmoothMat = lemObjects
xvMat = lemObjects$xv
}
# checking structure of case data
if(is.vector(cases)) {
cases = data.frame(cases=cases)
}
if(is.matrix(cases)) {
cases = as.data.frame(cases)
}
if(class(cases) == 'SpatialPolygonsDataFrame') {
polyCoarse = cases
cases = cases@data
} else {
polyCoarse = NULL
}
#names of interest for regions in the coarse shapefile
countcol = grep('^(count|cases)', names(cases), value=TRUE, ignore.case=TRUE)
# if(!length(countcol)){
# countcol = grep("^(id|name)", names(cases), invert=TRUE, value=TRUE)[1]
# }
if(verbose) {
cat(date(), "\n")
cat("running local-EM for validation sets\n")
}
# estimate risk (by partition, not continuous) for each bw/cv combination
forMoreArgs = list(
x=cases[,countcol, drop=FALSE],
lemObjects = xvSmoothMat,
tol = iterations$tol,
maxIter = iterations$maxIter,
gpu = iterations$gpu,
verbose=verbose)
if(!is.null(theCluster)) {
estList = parallel::clusterMap(
theCluster,
finalLemIter,
bw = xvSmoothMat$bw,
MoreArgs = forMoreArgs,
SIMPLIFY=FALSE
)
} else {
estList = mapply(
finalLemIter,
bw = xvSmoothMat$bw,
MoreArgs = forMoreArgs,
SIMPLIFY=FALSE
)
}
if(any(unlist(lapply(estList, class)) == 'try-error') ) {
warning("errors in local-em estimation")
}
estListExp = try(lapply(estList, function(x) x$expected))
estListRisk = lapply(estList, function(x) x$risk)
estDf = as.matrix(do.call(cbind, estListExp))
riskDf = as.matrix(do.call(cbind, estListRisk))
colnames(estDf) = colnames(riskDf) = paste(
rep(names(estList), unlist(lapply(estListExp, function(xx) dim(xx)[2]))),
unlist(lapply(
estListExp, colnames
)), sep = '_')
rownames(riskDf) = colnames(xvSmoothMat$regionMat)
if(verbose) {
cat("computing CV scores\n")
}
# compute the CV scores
# expected counts in left out regions
xvEst = estDf[,grep("xv[[:digit:]]+", colnames(estDf))]
#Sxv = gsub("^bw[[:digit:]]+xv|_(count|cases|count[[:digit:]]|cases[[:digit:]])+?$|_$", "", colnames(xvEst))
Sxv = substr(colnames(xvEst) , regexpr("xv",colnames(xvEst))+2, regexpr("_",colnames(xvEst))-1)
Scount = gsub("bw[[:digit:]]+xv[[:digit:]]+_", "", colnames(xvEst))
if(all(nchar(Scount)==0)) Scount = rep_len(countcol, length(Scount))
Sbw = gsub('^bw|xv.*', "", colnames(xvEst))
suppressMessages(xvEstMask <- xvMat[,Sxv] * xvEst)
# observed counts in left out regions
suppressMessages(xvObs <- xvMat[,Sxv] * as.matrix(cases[,Scount]))
logProbCoarse = stats::dpois(as.matrix(xvObs), as.matrix(xvEstMask), log=TRUE)
badVec = which(apply(logProbCoarse, 1, function(xx) any(is.na(xx))))
if(length(badVec)) {
warning(paste("regions containing NA: ",
paste(badVec, collapse=', ')), "\ntreating all NA's as 0")
}
Sbad = c()
i = 0
for(i in 1:length(logProbCoarse)){
if(is.na(as.vector(logProbCoarse[i])) == "TRUE")
Sbad = append(Sbad, i)
}
logProbCoarse[Sbad] = 0
logProbCoarse[is.infinite(logProbCoarse)] = 0
logProb = apply(logProbCoarse, 2, sum)
xvFullMat = list(
logProb = logProbCoarse, obs = xvObs, exp = xvEstMask,
xvMat = xvMat, Sxv = Sxv, xvEst = xvEst, cases = cases, Scount = Scount
)
logProbFull = data.frame(
bw = as.numeric(Sbw),
cases = Scount,
fold = Sxv,
minusLogProb = -logProb
)
xvRes = stats::aggregate(
logProbFull[,'minusLogProb'],
as.list(logProbFull[,c('bw','cases')]),
sum,
na.rm = TRUE
)
xvRes = stats::reshape(
xvRes, direction = 'wide',
idvar = 'bw',
timevar = 'cases'
)
colnames(xvRes) = gsub("^x[.]", "", colnames(xvRes))
xvRes = xvRes[,c('bw',countcol)]
minXvScore = apply(xvRes[,countcol, drop=FALSE],2,min, na.rm=TRUE)
xvRes[,countcol] = xvRes[,countcol] -
matrix(minXvScore, nrow=nrow(xvRes), ncol=length(minXvScore), byrow=TRUE)
# stuff <<- list(xvSmoothMat$rasterFine, riskDf)
newDf <- riskDf[
as.character(raster::levels(xvSmoothMat$rasterFine)[[1]]$partition),]
riskRaster = xvSmoothMat$rasterFine
levels(riskRaster)[[1]] = as.data.frame(cbind(
ID = raster::levels(xvSmoothMat$rasterFine)[[1]]$ID,
newDf))
result = list(
xv = xvRes,
xvFull = logProbFull,
riskAll = riskRaster,
smoothingMatrix = xvSmoothMat,
expected = polyCoarse,
folds = xvMat,
extra = xvFullMat
)
# estimate continuous risk at high resolution (if specified)
if(fact > 0) {
if(verbose) {
cat(date(), "\n")
cat("final smoothing step for bw with lowest CV score\n")
}
bwMin = result$xv[apply(result$xv[,colnames(result$xv)[-1]],2,which.min),'bw']
Slayers = paste('bw', format(bwMin,scientific = FALSE), '_', countcol, sep = '')
result$riskEst = finalSmooth(
x = result,
Slayers = Slayers,
fact = fact,
filename = riskFile,
ncores = theCluster
)
# names(result$riskEst) = Slayers
result$bw = paste('bw', bwMin, sep = '')
}
# done with the cluster
if(!is.null(theCluster))
parallel::stopCluster(theCluster)
if(verbose) {
cat(date(), "\n")
cat("done\n")
}
return(result)
}
#' @export
lemXvMulti = function(
cases,
population,
cellsCoarse,
cellsFine,
bw,
fact = 1,
xv = 4,
lemObjects,
ncores = 1,
iterations = list(tol = 1e-5, maxIter = 1000, gpu = FALSE),
randomSeed = NULL,
path = getwd(),
filename = 'riskXv.grd',
verbose = FALSE
){
if(verbose) {
cat(date(), '\n')
cat('running local-EM for validation sets for all maps\n')
}
resList = list()
lambdaList = list()
for(inM in 1:length(cases)) {
## partition-level risk estimation
if(is.null(randomSeed)) {
randomSeed = inM
}
lemXvMap = lemXv(
cases = cases[[inM]],
fact = 0,
lemObjects = lemObjects[[inM]],
ncores = ncores,
iterations = iterations,
randomSeed = randomSeed,
path = path,
filename = paste0(gsub('.grd', '', filename), inM, '.grd'),
verbose = verbose)
resList[[inM]] = lemXvMap
lambdaRasterMap = raster::deratify(lemXvMap$riskAll)
names(lambdaRasterMap) = paste0(names(lambdaRasterMap), '_', inM)
lambdaList[[inM]] = lambdaRasterMap
}
lambdaRasterStack = stack(lambdaList)
lambdaRaster = raster::stackApply(lambdaRasterStack,
indices = gsub('_[[:digit:]]+', '', names(lambdaRasterStack)),
fun = 'sum', na.rm = TRUE)
names(lambdaRaster) = gsub('index_', '', names(lambdaRaster))
if(verbose) {
cat(date(), '\n')
cat('computing cross-validation scores\n')
}
xvList = list()
logProbFull = NULL
for(inM in 1:length(cases)) {
## risk estimation for cross-validation regions
lemXvMap = resList[[inM]]
xvMatMap = lemXvMap$smoothingMatrix$xv
xvList[[inM]] = xvMatMap
# Nxv = dim(xvMatMap)[2] / length(cases)
idMatMap = as.numeric(dimnames(xvMatMap)[[2]]) - 1
# idMatMap = (idMatMap %/% Nxv) + 1
idMatMap = (idMatMap %% length(cases)) + 1
idMatMap = which(idMatMap == inM)
regionMatMap = lemXvMap$smoothingMatrix$regionMat
offsetMatMap = lemXvMap$smoothingMatrix$offsetMat[['offset']]
offsetMatMap = offsetMatMap[colnames(regionMatMap), colnames(regionMatMap)]
partitionAreasMatMap = Matrix::Diagonal(
length(lemXvMap$smoothingMatrix$partitionAreas),
lemXvMap$smoothingMatrix$partitionAreas)
dimnames(partitionAreasMatMap) = list(
names(lemXvMap$smoothingMatrix$partitionAreas),
names(lemXvMap$smoothingMatrix$partitionAreas)
)
partitionAreasMatMap = partitionAreasMatMap[colnames(regionMatMap), colnames(regionMatMap)]
riskRasterMap = stack(
raster::deratify(lemXvMap$smoothingMatrix$rasterFine)[['partition']],
lambdaRaster[[grep('xv', names(lambdaRaster))]])
riskRasterMap = raster::mask(riskRasterMap, lemXvMap$smoothingMatrix$rasterFine)
littleLambdaMap = as.data.frame(stats::na.omit(raster::unique(riskRasterMap)))
duplLambdaMap = unique(littleLambdaMap$partition[duplicated(littleLambdaMap$partition)])
duplLambdaMap = littleLambdaMap$partition %in% duplLambdaMap
zeroLambdaMap = apply(littleLambdaMap[,grep('bw', names(littleLambdaMap))] == 0, 1, all)
littleLambdaMap = littleLambdaMap[!(duplLambdaMap & zeroLambdaMap),]
littleLambdaMap = aggregate(
littleLambdaMap[,grep('bw', names(littleLambdaMap))],
by = list(partition = littleLambdaMap$partition),
FUN = 'mean', na.rm = TRUE)
bigLambdaMap = partitionAreasMatMap %*%
as.matrix(littleLambdaMap[,grep('bw', names(littleLambdaMap))])
## cross-validation scores
countcol = grep('count', names(cases[[inM]]), value = TRUE)
expectedCoarseMap = as.matrix(regionMatMap %*% offsetMatMap %*% bigLambdaMap)
obsCountsMap = getObsCounts(
x = as.matrix(data.frame(cases[[inM]])[,countcol]),
polyCoarse = cases[[inM]],
regionMat = regionMatMap)
obsCountsMap = matrix(obsCountsMap,
nrow = nrow(expectedCoarseMap), ncol = ncol(expectedCoarseMap),
dimnames = dimnames(expectedCoarseMap))
Sxv = as.numeric(
substr(colnames(obsCountsMap),
regexpr('xv', colnames(obsCountsMap)) + 2, regexpr('_', colnames(obsCountsMap)) - 1))
expectedCoarseMap = expectedCoarseMap[,Sxv %in% idMatMap]
obsCountsMap = obsCountsMap[,Sxv %in% idMatMap]
for(inX in idMatMap) {
expectedCoarseMap[!xvMatMap[,inX], grep(paste0('xv', inX), colnames(expectedCoarseMap))] = 0
obsCountsMap[!xvMatMap[,inX], grep(paste0('xv', inX), colnames(obsCountsMap))] = 0
}
logProbCoarseMap = stats::dpois(as.matrix(obsCountsMap),
as.matrix(expectedCoarseMap),
log = TRUE)
logProbCoarseMap[is.infinite(logProbCoarseMap)] = 0
logProbMap = apply(logProbCoarseMap, 2, sum)
Sbw = as.numeric(gsub('^bw|(xv[[:digit:]]+_[[:alnum:]]+$)', '', colnames(obsCountsMap)))
Scount = gsub('bw[[:digit:]]+xv[[:digit:]]+_', '', colnames(obsCountsMap))
Sxv = paste0(
substr(colnames(obsCountsMap),
regexpr('xv', colnames(obsCountsMap)) + 2, regexpr('_', colnames(obsCountsMap)) - 1),
'_', inM)
logProbFull = rbind(logProbFull,
data.frame(bw = Sbw, cases = Scount, fold = Sxv, minusLogProb = -logProbMap))
}
rownames(logProbFull) = NULL
xvRes = stats::aggregate(
logProbFull[,'minusLogProb'],
as.list(logProbFull[,c('bw','cases')]),
sum,
na.rm = TRUE)
xvRes = stats::reshape(
xvRes, direction = 'wide',
idvar = 'bw',
timevar = 'cases')
colnames(xvRes) = gsub('^x[.]', '', colnames(xvRes))
xvRes = xvRes[,c('bw',countcol)]
minXvScore = apply(xvRes[,countcol, drop=FALSE], 2, min, na.rm = TRUE)
xvRes[,countcol] = xvRes[,countcol] -
matrix(minXvScore, nrow = nrow(xvRes), ncol = length(minXvScore), byrow = TRUE)
lambdaRasterFinal = lambdaRaster[[grep('xv', names(lambdaRaster), invert = TRUE)]]
for(inM in 1:length(cases)) {
lambdaRasterMask = lambdaList[[inM]][[1]]
lambdaRasterFinal = raster::mask(lambdaRasterFinal, lambdaRasterMask)
}
result = list(
xv = xvRes,
xvFull = logProbFull,
riskAll = lambdaRasterFinal,
smoothingMatrix = resList,
folds = xvList)
# estimate continuous risk at high resolution (if specified)
if(fact > 0) {
if(verbose) {
cat(date(), '\n')
cat('final smoothing step for bw with lowest CV score\n')
}
bwMin = result$xv[apply(result$xv[,colnames(result$xv)[-1]], 2, which.min),'bw']
Slayers = paste('bw', format(bwMin, scientific = FALSE), '_', countcol, sep = '')
riskFile = file.path(path, filename)
result$riskEst = finalSmoothMulti(
x = result$riskAll,
focalMat = result$smoothingMatrix[[1]]$smoothingMatrix$focal$focal,
Slayers = Slayers,
fact = fact,
filename = riskFile,
ncores = ncores)
result$bw = paste('bw', bwMin, sep = '')
}
if(verbose) {
cat(date(), '\n')
cat('done\n')
}
return(result)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.