knitr::opts_chunk$set(cache = FALSE, fig.height=5, fig.width=7, fig.align = 'center') # knitr::opts_chunk$set(comment=NA, prompt=TRUE, dev='svg', out.width=1024, fig.height=6, fig.width=8)
This vignette (a working draft) tries to illustrate the use of the npsp
package...
library(npsp) library(sp)
As an example, the precipitation
data set will be considered
(a SpatialPointsDataFrame
object). It consists of total precipitations
(rainfall inches) during March 2016 recorded over 1053 locations on the
continental part of USA (available at https://www.ncdc.noaa.gov/cdo-web/datasets).
# Total Monthly Precipitation spoints(precipitation) n <- length(precipitation$y) coord <- coordinates(precipitation) attributes <- attributes(precipitation) labels <- attributes$labels border <- attributes$border interior <- attributes$interior slim <- range(precipitation$y) col <- jet.colors(256) col2 <- hot.colors(256) cpu.time(reset=TRUE)
y.summary <- summary(precipitation$y) # Resumen de datos y.summary scattersplot(precipitation) cpu.time(total = FALSE)
np.fitgeo
(automatically) fits an isotropic nonparametric geostatistical model by estimating the trend and the variogram (using a bias-corrected estimator) iteratively.
nbin <- c(30, 30) geomod <- np.fitgeo(coord, precipitation$y, nbin = nbin, svm.resid = TRUE) cpu.time(total = FALSE)
Data mask (filtering):
spp.grid <- SpatialPoints(coords(geomod)) proj4string(spp.grid) <- proj4string(border) # CRS("+init=epsg:28992 +units=km") mask.sp <- !is.na(over(spp.grid, as(border, 'SpatialPolygons'))) geomod <- mask(geomod, mask = mask.sp | (geomod$binw > 0)) cpu.time(total = FALSE)
Plot final trend and variogram estimates:
plot(geomod, asp = 1)
The algorithm is described below...
cpu.time(reset=TRUE) bin <- binning(coord, precipitation$y, nbin = nbin, set.NA = TRUE) cpu.time(total = FALSE) simage(bin, main = 'Binning averages and data points', slim = slim, col = col2, xlab = labels$x[1], ylab = labels$x[2], sub = paste0("(", paste(dim(bin), collapse = "x"), ")")) plot(border, border = "darkgray", lwd = 2, add = TRUE) plot(interior, border = "lightgray", lwd = 2, add = TRUE) points(bin$data$x, pch ='+') coordvs <- coordvalues(bin) abline(v = coordvs[[1]], lty = 3) abline(h = coordvs[[2]], lty = 3)
lp0.h <- h.cv(bin)$h lp0.h # <- diag(c(6.85061, 2.946348)) # Initial linear Local trend estimation lp0 <- locpol(bin, h = lp0.h, hat.bin = TRUE) cpu.time(total = FALSE) simage(lp0, main = "Initial trend estimates", slim = slim, col = col2, xlab = labels$x[1], ylab = labels$x[2]) plot(border, border = "darkgray", lwd = 2, add = TRUE) plot(interior, border = "lightgray", lwd = 1, add = TRUE) points(coord[,1], coord[,2], col="darkgray") # Residuals lp0.pred <- predict(lp0) lp0.resid <- lp0$data$y - lp0.pred lp0.r2 <- cor(lp0.pred, lp0$data$y)^2 # assuming independence... old.par <- par(mfrow=c(1,2)) hist(lp0.resid) plot(lp0.pred, lp0.resid, main = "Residuals vs Fitted", sub = paste("R^2 =", round(lp0.r2, 2)), xlab = "Fitted values", ylab = "Residuals") abline(h = 0, lty = 2, col = "darkgray") par(old.par)
# rule.svar(coord) nlags <- 60 maxlag <- 10 # Empirical variogram with linear binning: svar.bin <- svariso(coord, lp0.resid, nlags = nlags, maxlag = maxlag) sv.lags <- coords(svar.bin) svar.np.h <- h.cv(svar.bin)$h # Local linear variogram estimation svar.np <- np.svar(svar.bin, h = svar.np.h) # biased # Fitted Shapiro-Botha variogram model svm0 <- fitsvar.sb.iso(svar.np, dk = 0) # Bias-corrected variogram estimation svar.np2 <- np.svariso.corr(lp0, nlags = nlags, maxlag = maxlag, h=svar.np.h, plot = TRUE) # Fitted Shapiro-Botha variogram model svm02 <- fitsvar.sb.iso(svar.np2, dk = 0) cpu.time(total = FALSE) plot(svm02, main = "Nonparametric bias-corrected semivariogram\nand fitted models", legend = FALSE, xlim = c(0,max(coords(svar.np2))), ylim = c(0,max(svar.np2$biny, na.rm = TRUE))) plot(svm0, add = TRUE) plot(svar.np, type = "p", pch = 2, add = TRUE) abline(h = c(svm02$nugget, svm02$sill), lty = 3) abline(v = 0, lty = 3) legend("bottomright", legend = c("corrected", 'biased'), lty = c(1, 1), pch = c(NA, NA), lwd = c(2, 1))
# GCV criterion (Francisco-Fernandez and Opsomer, 2005) lp.h <- h.cv(bin, objective = "GCV", cov = svm02, DEalgorithm = FALSE)$h lp.h # <- diag(c(11.11463, 18.59536)) cpu.time(total = FALSE) # lp.h <- hcv.data(bin, objective = "GCV", cov = svm02, DEalgorithm = FALSE)$h # lp.h # <- diag(c(10.0871, 18.3956)) ## Time of last operation: hcv.data ## user system elapsed ## 10.39 1.39 11.87
Final trend (low resolution):
lp <- locpol(bin, h = lp.h, hat.bin = TRUE) cpu.time(total = FALSE) simage(lp, main = "Trend Estimation", slim = slim, xlab = labels$x[1], ylab = labels$x[2], col = col2) plot(border, border = "darkgray", lwd = 2, add = TRUE) plot(interior, border = "lightgray", lwd = 1, add = TRUE) # spoints(coord[,1], coord[,2], precipitation$y, add = TRUE) # New Residuals lp.pred <- predict(lp) lp.resid <- lp$data$y - lp.pred lp.r2 <- cor(lp.pred, lp$data$y)^2 # assuming independence... old.par <- par(mfrow=c(1,2)) hist(lp.resid) plot(lp.pred, lp.resid, main = "Residuals vs Fitted", sub = paste("R^2 =", round(lp0.r2, 2)), xlab = "Fitted values", ylab = "Residuals") abline(h = 0, lty = 2, col = "darkgray") par(old.par)
svar.bin <- svariso(coord, lp.resid, nlags = nlags, maxlag = maxlag) svar.np.h <- h.cv(svar.bin)$h # svar.np.h <- 2.257358 svar.np <- np.svar(svar.bin, h = svar.np.h) svm <- fitsvar.sb.iso(svar.np, dk = 0) svar.np2 <- np.svariso.corr(lp, nlags = nlags, maxlag = maxlag, h = svar.np.h, plot = TRUE, ylim = c(0, 0.3))
Final variogram:
svm2 <- fitsvar.sb.iso(svar.np2, dk = 0) # Shapiro-Botha model cpu.time(total = FALSE) plot(svm2, main = "Nonparametric bias-corrected semivariogram\nand fitted models", legend = FALSE) plot(svm, add = TRUE) plot(svar.np, type = "p", pch = 2, add = TRUE) abline(h = c(svm2$nugget, svm2$sill), lty = 3) abline(v = 0, lty = 3) plot(svm0, lty = 2, lwd = 1, add = TRUE) plot(svm02, lty = 2, lwd = 2, add = TRUE) legend("bottomright", legend = c("corrected", 'biased', "corrected initial", 'biased initial'), lty = c(1, 1, 2, 2), pch = c(1, 2, NA, NA), lwd = c(2, 1)) cpu.time(total = FALSE)
High resolution binning (120x120):
bin.hd <- binning(coord, precipitation$y, nbin = c(120,120), set.NA = TRUE) simage(bin.hd, main = 'Binning averages', xlab = labels$x[1], ylab = labels$x[2], sub = paste0("(", paste(dim(bin.hd), collapse = "x"), ")"), slim = slim) plot(border, border = "darkgray", lwd = 2, add = TRUE) plot(interior, border = "lightgray", lwd = 1, add = TRUE)
Data mask (filtering):
spp.grid <- SpatialPoints(coords(bin.hd)) proj4string(spp.grid) <- proj4string(border) # CRS("+init=epsg:28992 +units=km") mask.sp <- !is.na(over(spp.grid, as(border, 'SpatialPolygons'))) mask <- mask.sp | (bin.hd$binw > 0) # to avoid filtering data... bin.hd <- mask(bin.hd, mask = mask)
Final trend (high resolution):
lp.hd <- locpol(bin.hd, h = lp.h) simage(lp.hd, main = "Final trend estimates", slim = slim, xlab = labels$x[1], ylab = labels$x[2], col = col2) plot(border, border = "darkgray", lwd = 2, add = TRUE) plot(interior, border = "lightgray", lwd = 1, add = TRUE) cpu.time(total = FALSE)
krig.grid <- np.kriging(lp.hd, svm2) cpu.time(total = FALSE)
simage(krig.grid, 'kpred', main = 'Kriging predictions', slim = slim, xlab = labels$x[1], ylab = labels$x[2]) plot(border, border = "darkgray", lwd = 2, add = TRUE) plot(interior, border = "lightgray", lwd = 1, add = TRUE) simage(krig.grid, 'ksd', main = 'Kriging sd', xlab = labels$x[1], ylab = labels$x[2], col = col2) plot(border, border = "darkgray", lwd = 2, add = TRUE) plot(interior, border = "lightgray", lwd = 1, add = TRUE) cpu.time() # save.image(".RData")
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.