#' Compute Spatial Residuals from CV Fits
#'
#' @param fit cv-fit
#' @param x original data pattern used to generate the design matrix
#'
#'@export
residuals_cv <- function(fit, x,
dum_min = 1000, dum_max = 5*dum_min, rho_factor = 10,
dum_grid_dimyx = NULL, # in case given use grid dummies
verb = FALSE, mc.cores = 1, weight_by_n = FALSE, ...) {
if(length(fit$quads) < 1|length(fit$cvfits) < 1)
stop("CV fit not detected (fit$quads or fit$cvfits of zero length).")
# the lambda
l <- fit$lambda
nl <- length(fit$lambda)
# cv fits
cvfits <- fit$cvfits
if(verb){
require(looptimer)
t0 <- looptimer(n = length(cvfits))
}
# cv quads
quads <- fit$quads
# setup
Qpars <- fit$Qpars
# we need data to compute the integrals
xpp <- x
# Parse data
x <- check_pp(xpp)
bbox <- get_bbox(xpp)
marks <- parse_marks(xpp)
pat <- get_coords(xpp)
types <- if(is.factor(marks)) levels(marks) else unique(marks)
pat <- pat[,1:2]
# compute risk over CV splits:
nq <- length(cvfits)
nblocks <- ceiling(nq/mc.cores)
bl <- rep(1:nblocks, each=mc.cores)[1:nq]
blocks <- split(1:nq, bl)
# for one quadrat
for_one <- function(k) {
fitk <- cvfits[[k]]
if(!is.list(fitk)){
warning(paste0("CV fit ", k, "is not of correct type."))
NULL
}
else{
W <- quads[[k]]
# test data pattern
xin <- inside.owin(pat[,1], pat[,2], W)
if(sum(xin) == 0){
warning(paste("quadrat", k, "is empty"))
res <- NULL
count <- 0
}
xm <- list(x=pat[xin,], bbox = cbind(W$xrange, W$yrange))
m <- marks[xin]
count <- length(m)
# check if all types present
z <- unique(m)
mis <- setdiff(types, z)
# add a point of the missing type to keep data structure intact for downstream computations.
# add it outisde the window so it will not influence anything.
for(mi in mis) {
#xm$x <- rbind(xm$x, as.matrix(coords(runifpoint(1, W))))
xnew <- c(0, W$xrange[2] + Qpars$border_r + 1)
xm$x <- rbind(xm$x, xnew)
m <- c(m, mi)
}
m <- factor(m)
# check if grid dummies for integration
if(!is.null(dum_grid_dimyx)){
dummies <- dummy_grid(types, nx = dum_grid_dimyx[2], ny = dum_grid_dimyx[1],
bbox = xm$bbox)
} else{
dummies <- NULL
}
#
# compute new Q in the quad window, idea is to estimate the integral by dummy mean.
xspp <- ppp(xm$x[,1], xm$x[,2], marks = m, window = W, check = FALSE)
# check type
if(Qpars$type == "splines")
Qk <- make_Q_splines_multi(xspp, knots1 = Qpars$knots1, knots2 = Qpars$knots2,
spline_order = Qpars$spline_order,
rho_factor = rho_factor,
auto_sat = FALSE, #Qpars$auto_sat, FALSE as not real intens.
sat1l = Qpars$sat1, sat2l = Qpars$sat2,
dum_min = dum_min, dum_max = dum_max,
dummies = dummies,
save_locations = TRUE,
...)
else if(Qpars$type == "genstepper")
Qk <- make_Q_stepper_multi_rmat(xspp, ranges1 = Qpars$ranges1, ranges2 = Qpars$ranges2,
rho_factor = rho_factor,
auto_sat = FALSE,
sat1l = Qpars$sat1, sat2l = Qpars$sat2,
dum_min = dum_min, dum_max = dum_max,
dummies = dummies,
save_locations = TRUE,
...)
else
Qk <- make_Q_stepper_multi(xspp, ranges1 = Qpars$ranges1, ranges2 = Qpars$ranges2,
rho_factor = rho_factor,
auto_sat = FALSE,
sat1l = Qpars$sat1, sat2l = Qpars$sat2,
dum_min = dum_min, dum_max = dum_max,
dummies = dummies,
save_locations = TRUE,
...)
#
# compute residuals
res <- resid_by_Q(fitk, Qk, W = erosion.owin(W, Qpars$border_r), ...)
res_ave <- average_residuals_over_types(res)
# add NA in case AIC stop rule was used:
ni <- nrow(res_ave)
if(ni < nl) res_ave[(ni+1):nl,] <- NA
list(res=res_ave, n=count, res_by_type = res)
}
}# eo for_one(...)
Rk <- list()
# run through the blocks
t0 <- looptimer(n = nblocks, prefix = "* resid CV")
counts <- NULL
res_by_type <- list()
for(b in blocks) {
resl <- mclapply(b, for_one, mc.cores = mc.cores)
for(r in resl) for(n in names(r$res)) Rk[[n]] <- rbind(Rk[[n]], r$res[[n]])
counts <- c(counts, sapply(resl, getElement, "n"))
res_by_type[b] <- lapply(resl, getElement, "res_by_type")
if(verb) print(t0 <- looptimer(t0))
}
# Estimate of risk per residual type as the mean of squared residuals
#
ww <- if(weight_by_n) counts/sum(counts) else 1/length(counts)
#browser()
Rest <- data.frame( sapply(Rk, function(Rkl) colSums(ww*(Rkl^2), na.rm=TRUE)) )
#
# Average Risk over residual types, weighted by standard deviation:
Rsd <- sapply(Rk, function(Rkl) apply(Rkl, 2, sd, na.rm=TRUE))
Rsd <- Rsd/rowSums(Rsd)
Rest[["mean"]] <- rowSums(Rest*Rsd)
#
# Average of inv and pearson residuals only:
Rsd <- Rsd[,-1]/rowSums(Rsd[,-1])
Rest[["mean_no_raw"]] <- rowSums(Rest[,-1]*Rsd)
#
# done
Rest
out <- list(ave=Rest, by_resid = Rk, counts = counts, res_by_type = res_by_type)
class(out) <- c("cvresiduals")
out
}
#' Compute various h-residuals from group lasso fits from the Q object.
#'
#' The integral is estimated using MC expectation over a set of dummies.
#' @param fit glbinc fit
#' @param Q corresponding design object
#' @param W window for border correction
#' @export
resid_by_Q <- function(fit, Q, W=NULL, ...) {
if(is.null(Q$locations)) stop("Q needs to have locations saved (save_locations = TRUE)")
# the subwindow
if(is.null(W))
W <- as.owin(c( Q$bbox + c(1,-1) * Q$parameters$border_r) ) # border correction!
V <- area(W)
# the prediction points, data and MC dummies
these <- inside.owin(Q$locations[,1], Q$locations[,2], W)
isdata <- Q$y[these] == 1
mark <- Q$locations[these, 3]
# the different prediction types.
types <- unique(mark)
# the estimated model coefficients
beta <- fit$beta[, !is.na(fit$aic), drop = FALSE]
# re-level intercepts to match indicators:
for(i in seq_along(types)[-1]) beta[i,] <- beta[i,] + beta[1,]
# compute papangelou
log_lam <- as.matrix( Q$X[these,] %*% beta )
# start computing residuals.
raw <- inv <- pearson <- NULL
# Residual per type
for(i in types) { # per type.
lda <- exp(log_lam[ isdata & mark == i , , drop = FALSE ] ) # at data
ldu <- exp(log_lam[!isdata & mark == i , , drop = FALSE ] ) # at MC integration points
# raw residuals
raw <- rbind(raw, nrow(lda) - apply(ldu, 2, mean) * V)
# inverse
inv <- rbind(inv, colSums(1/lda) - V)
# pearson
pearson <- rbind(pearson, colSums(1/sqrt(lda)) - apply(sqrt(ldu), 2, mean) * V)
}
#
n <- c( table(Q$locations[Q$y == 1, 3]) )
list(raw=raw, inv=inv, pearson=pearson, n = n)
}
#' Sum or average residuals over types
#' @export
average_residuals_over_types <- function(res, weight_by_count = FALSE,
sqrt_weights = FALSE, trim = 0){
if(trim == 0){
n <- res$n
w <- 1 # the usual is just the sum over types
if(sqrt_weights) n <- sqrt(n) # more weight on abundant types, sqrt
if(weight_by_count) w <- n/sum(n)
# total sums over types, weight by point count.
data.frame(raw = colSums(w * res$raw),
inverse = colSums(w * res$inv),
pearson = colSums(w * res$pearson))
}
else{
mf <- function(x) apply(x, 2, mean, trim = trim, na.rm=TRUE)
data.frame(raw = mf(res$raw),
inverse = mf(res$inv),
pearson = mf(res$pearson))
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.