docs/Chap3_Lab2.R

## Wikle, C. K., Zammit-Mangion, A., and Cressie, N. (2019), 
## Spatio-Temporal Statistics with R, Boca Raton, FL: Chapman & Hall/CRC
## Copyright (c) 2019 Wikle, Zammit-Mangion, Cressie
##
## This program is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License
## as published by the Free Software Foundation; either version 2
## of the License, or (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
## GNU General Public License for more details.

library("leaps")
library("lmtest")
library("nlme")
library("ape")
library("broom")
library("FRK")
library("purrr")
library("lattice")
library("ggplot2")
library("RColorBrewer")
library("dplyr")
library("gstat")
library("sp")
library("spacetime")
library("STRbook")
library("tidyr")

## ------------------------------------------------------------------------
data("NOAA_df_1990", package = "STRbook")
Tmax <- filter(NOAA_df_1990,       # subset the data
              proc == "Tmax" &     # only max temperature
              month == 7 &         # July
              year == 1993)        # year of 1993

## ------------------------------------------------------------------------
G <- auto_basis(data = Tmax[,c("lon","lat")] %>%  # Take Tmax
                       SpatialPoints(),           # To sp obj
                nres = 1,                         # One resolution
                type = "Gaussian")                # Gaussian BFs

## ------------------------------------------------------------------------
S <- eval_basis(basis = G,                     # basis functions
                s = Tmax[,c("lon","lat")] %>%  # spat locations
                     as.matrix()) %>%          # conv. to matrix
     as.matrix()                               # results as matrix
colnames(S) <- paste0("B", 1:ncol(S)) # assign column names

## ------------------------------------------------------------------------
Tmax2 <- cbind(Tmax, S) %>%             # append S to Tmax
         select(-year, -month, -proc,   # and remove vars we
                -julian, -date)         # will not be using in
                                        # the model

## ------------------------------------------------------------------------
Tmax_no_14 <- filter(Tmax2, !(day == 14))  # remove day 14

## ------------------------------------------------------------------------
Tmax_July_lm <- lm(z ~ (lon + lat + day)^2 + .,     # model
                   data = select(Tmax_no_14, -id))  # omit id

## ------------------------------------------------------------------------
options(width = 55)

## ------------------------------------------------------------------------
Tmax_July_lm %>% summary()

## ------------------------------------------------------------------------
Tmax_July_gls <- gls(z ~ (lon + lat + day)^2 + .,
                     data = select(Tmax_no_14, -id),
                     correlation = corGaus(value = 0.5,
                                        form = ~ lon + lat + day,
                                        fixed = TRUE))

## ------------------------------------------------------------------------
Tmax_July_lm4 <- list()   # initialize
for(i in 0:4) {           # for four steps (after intercept model)
   ## Carry out stepwise forward selection for i steps
   Tmax_July_lm4[[i+1]] <- step(lm(z ~ 1,
                               data = select(Tmax_no_14, -id)),
                               scope = z ~(lon + lat + day)^2 + .,
                               direction = 'forward',
                               steps = i)
}

## ------------------------------------------------------------------------
regfit.full = regsubsets(z ~ 1 + (lon + lat + day)^2 + .,  # model
                         data = select(Tmax_no_14, -id),
                         method = "forward",
                         nvmax = 4)                  # 4 steps

## ------------------------------------------------------------------------
regfit.summary <- summary(regfit.full)

## ------------------------------------------------------------------------
set.seed(1) # Fix seed for reproducibility
Tmax_no_14_2 <- Tmax_no_14 %>%
                mutate(B13 = B5 + 0.01*rnorm(nrow(Tmax_no_14)))

## ------------------------------------------------------------------------
Tmax_July_lm3 <- lm(z ~ (lon + lat + day)^2 + .,
                   data = Tmax_no_14_2 %>%
                          select(-id))

## ------------------------------------------------------------------------
summary(Tmax_July_lm3)

## ------------------------------------------------------------------------
vcov(Tmax_July_lm3)[c("B5", "B13"),c("B5", "B13")] %>%
    cov2cor()

## ------------------------------------------------------------------------
Tmax_no_14$residuals <- residuals(Tmax_July_lm)

## ------------------------------------------------------------------------
g <- ggplot(filter(Tmax_no_14, day %in% 24:31)) +
  geom_point(aes(lon, lat, colour = residuals)) +
  facet_wrap(~ day, ncol=4) +
  col_scale(name = "degF") +
  geom_point(data = filter(Tmax_no_14,day %in% 24:31 &
                                        id %in% c(3810, 3889)),
               aes(lon, lat), colour = "black",
               pch = 2, size = 2.5) +
  theme_bw()

## ------------------------------------------------------------------------
print(g)

## ------------------------------------------------------------------------
P <- list()                                 # init list
days <- c(1:13, 15:31)                      # set of days
for(i in seq_along(days)) {                 # for each day
  Tmax_day <- filter(Tmax_no_14,
                     day == days[i])        # filter by day
  station.dists <- Tmax_day %>%             # take the data
    select(lon, lat) %>%                    # extract coords.
    dist() %>%                              # comp. dists.
    as.matrix()                             # conv. to matrix
  station.dists.inv <- 1/station.dists      # weight matrix
  diag(station.dists.inv) <- 0              # 0 on diag
  P[[i]] <- Moran.I(Tmax_day$residuals,     # run Moran's I
                    station.dists.inv) %>%
            do.call("cbind", .)             # conv. to df
}

## ------------------------------------------------------------------------
do.call("rbind", P) %>% head()

## ------------------------------------------------------------------------
station.dists <- Tmax_no_14 %>%  # take the data
  select(lon, lat, day) %>%      # extract coordinates
  dist() %>%                     # compute distances
  as.matrix()                    # convert to matrix

## ------------------------------------------------------------------------
station.dists.inv <- 1/station.dists
diag(station.dists.inv) <- 0
Moran.I(Tmax_no_14$residuals, station.dists.inv)$p.value

## ------------------------------------------------------------------------
TS1 <- filter(Tmax_no_14, id == 3810)$residuals
TS2 <- filter(Tmax_no_14, id == 3889)$residuals

## ------------------------------------------------------------------------
par(mar=c(4, 4, 1, 1))
plot(TS1,                           # Station 3810 residuals
     xlab = "day of July 1993",
     ylab = "residuals (degF)",
     type = 'o', ylim = c(-8, 7))
lines(TS2,                          # Station 3889 residuals
      xlab = "day of July 1993",
      ylab = "residuals (degF)",
      type = 'o', col = "red")


## ------------------------------------------------------------------------
nested_Tmax_no_14 <- group_by(Tmax_no_14, lon, lat) %>% nest()
head(nested_Tmax_no_14, 3)

## ------------------------------------------------------------------------
dwtest_one_station <- function(data)
                        dwtest(residuals ~ 1, data = data)

## ------------------------------------------------------------------------
dwtest_one_station(nested_Tmax_no_14$data[[1]])

## ------------------------------------------------------------------------
map(nested_Tmax_no_14$data, dwtest_one_station) %>% head()

## ------------------------------------------------------------------------
dwtest_one_station_tidy <- nested_Tmax_no_14$data[[1]] %>%
                           dwtest_one_station() %>%
                           tidy()

## ------------------------------------------------------------------------
dwtest_one_station_tidy[, 1:3]

## ------------------------------------------------------------------------
Tmax_DW_no_14 <- nested_Tmax_no_14 %>%
    mutate(dwtest = map(data, dwtest_one_station)) %>%
    mutate(test_df = map(dwtest, tidy)) %>%
    unnest(test_df)

## ------------------------------------------------------------------------
options(digits = 3)

## ------------------------------------------------------------------------
Tmax_DW_no_14 %>% select(-method, -alternative) %>% head(3)

## ------------------------------------------------------------------------
mean(Tmax_DW_no_14$p.value < 0.05/nrow(Tmax_DW_no_14)) * 100

## ------------------------------------------------------------------------
options(digits=1)

## ------------------------------------------------------------------------
data("STObj3", package = "STRbook")
STObj4 <- STObj3[, "1993-07-01::1993-07-31"]

## ------------------------------------------------------------------------
STObj4@data <- left_join(STObj4@data, Tmax_no_14)

## ------------------------------------------------------------------------
vv <- variogram(object = residuals ~ 1, # fixed effect component
                data = STObj4,     # July data
                width = 80,        # spatial bin (80 km)
                cutoff = 1000,     # consider pts < 1000 km apart
                tlags = 0.01:6.01) # 0 days to 6 days


## ------------------------------------------------------------------------
pred_grid <- expand.grid(lon = seq(-100, -80, length = 20),
                         lat = seq(32, 46, length = 20),
                         day = seq(4, 29, length = 6))

## ------------------------------------------------------------------------
Spred <- eval_basis(basis = G,                      # basis functs
                s = pred_grid[,c("lon","lat")] %>%  # pred locs
                     as.matrix()) %>%         # conv. to matrix
     as.matrix()                              # results as matrix
colnames(Spred) <- paste0("B", 1:ncol(Spred)) # assign col names
pred_grid <- cbind(pred_grid, Spred)          # attach to grid

## ------------------------------------------------------------------------
linreg_pred <- predict(Tmax_July_lm,
                       newdata = pred_grid,
                       interval = "prediction")

## ------------------------------------------------------------------------
## Assign prediction and prediction s.e. to the prediction grid
pred_grid$z_pred <- linreg_pred[,1]
pred_grid$z_err <- (linreg_pred[,3] - linreg_pred[,2]) / (2*1.96)
andrewzm/STRbook documentation built on May 18, 2024, 5:17 a.m.