# load required libraries library(lattice) library(fda) library(reshape) library(mgcv) #opts_chunk $ set(echo=FALSE, fig.path='myproject/plot-', cache=TRUE) opts_chunk $ set(fig.width = 10, fig.height = 10, fig.path = "B:/Loch_Ard_Felling/fig/", cache = TRUE, cache.path = "B:/Loch_Ard_Felling/cache/page3/")
# get full data set load("B:/Loch_Ard_Felling/Package/rivertemp/data/fullwater.rda") # a quick fix stream $ year[stream $ Sensor.Model == "Squirrel"] <- stream $ year[stream $ Sensor.Model == "Squirrel"] + 1900
# set up a bspline basis of dimension 48 and order 6 hourbasis <- create.bspline.basis(c(0, 24), 48, 6) # and a harmonic accelerator penalty... penalises deviations from sinusiodal curves penalty <- vec2Lfd(c(0, (2*pi/24)^2, 0), c(0, 24))
# select a few days data and add in decimal hour dat <- with(subset(stream, site == 2 & year %in% 2003:2009), data.frame(temp = Original.Value, dhour = hour + min/60, month = factor(month.abb[mon + 1], levels = month.abb), yday = yday, year = year, site = site)) dat <- subset(dat, yday < 365) # remove last day of leap year getcoef <- function(lambda, .yday, .year = 2009, .site = 2) { sdat <- subset(dat, yday == .yday & year == .year & site == .site) #if (nrow(sdat) < 23) return (NULL) if (sum(unique(round(sdat $ dhour)) %in% 0:24) < 24) return(NULL) c(coef(smooth.basis(sdat $ dhour, sdat $ temp, fdPar(hourbasis, penalty, lambda)))) } yday <- rep(0:365, length(2003:2009)) year <- rep(2003:2009, each = 366) ck <- mapply(getcoef, .yday = yday, .year = year, MoreArgs = list(lambda = exp(-8))) whichNULL <- sapply(ck, is.null) ck <- simplify2array(ck[!whichNULL]) info2 <-data.frame(yday = yday[!whichNULL], year = year[!whichNULL]) tempfd2 <- fd(ck, hourbasis) pc2 <- pca.fd(tempfd2, nharm = 4) # is this sensitive to missing values?... info2[pc2 $ harmonics $ fdnames[[2]]] <- as.data.frame(pc2 $ scores) head(info2)
# select a few days data and add in decimal hour dat <- with(subset(stream, site == 10 & year %in% 2003:2009), data.frame(temp = Original.Value, dhour = hour + min/60, month = factor(month.abb[mon + 1], levels = month.abb), yday = yday, year = year, site = site)) dat <- subset(dat, yday < 365) # remove last day of leap year getcoef <- function(lambda, .yday, .year = 2009, .site = 10) { sdat <- subset(dat, yday == .yday & year == .year & site == .site) #if (nrow(sdat) < 23) return (NULL) if (sum(unique(round(sdat $ dhour)) %in% 0:24) < 24) return(NULL) c(coef(smooth.basis(sdat $ dhour, sdat $ temp, fdPar(hourbasis, penalty, lambda)))) } yday <- rep(0:365, length(2003:2009)) year <- rep(2003:2009, each = 366) ck <- mapply(getcoef, .yday = yday, .year = year, MoreArgs = list(lambda = exp(-8))) whichNULL <- sapply(ck, is.null) ck <- simplify2array(ck[!whichNULL]) info10 <-data.frame(yday = yday[!whichNULL], year = year[!whichNULL]) tempfd10 <- fd(ck, hourbasis) pc10 <- pca.fd(tempfd10, nharm = 4) # is this sensitive to missing values?... info10[pc10 $ harmonics $ fdnames[[2]]] <- as.data.frame(pc10 $ scores) head(info10)
All we want to use now is info2
and info 10
.
info <- merge(info2, info10, by = c("yday", "year"), suffixes = c(".2", ".10")) info $ day <- with(info, yday + (year - 2003) * 365) info $ month <- month.abb[strptime(paste(info $ yday + 1, info $ year), format = "%j %Y") $ mon + 1] info $ season <- with(info, ifelse(month %in% c("Nov", "Dec", "Jan", "Feb"), "Winter", ifelse(month %in% c("Mar", "Apr"), "Spring", ifelse(month %in% c("May", "Jun", "Jul", "Aug"), "Summer", "Autumn")))) info $ season <- factor(info $ season, levels = c("Winter", "Spring", "Summer", "Autumn")) head(info)
now to look at some regressions between stuff
plot.pcs <- function(infodat, main = NULL) { par(mfrow = c(4, 4), mar = c(0,0,0,0), oma = c(2, 2, 5, 4)) for (i in 1:4) { for (j in 1:4) { plot(infodat[[paste0("PC",i, ".2")]], infodat[[paste0("PC",j, ".10")]], ann = FALSE, axes = FALSE) box() } } mtext(paste("PC", 1:4), side = 3, line = .5, at = 1:4 / 4 - .125, outer = TRUE) mtext(paste("PC", 1:4), side = 4, line = .5, at = 4:1 / 4 - .125, outer = TRUE, las = 1) mtext("Burn 2", side = 1, line = .5, at = .5, outer = TRUE) mtext("Burn 10", side = 2, line = .5, at = .5, outer = TRUE) if (!is.null(main)) mtext(main, side = 3, line = 3, at = 0.5, outer = TRUE, font = 2, cex = 1.5) } plot.pcs(info, main = "All data")
plot.pcs(subset(info, yday > 0 & yday <= 90))
plot.pcs(subset(info, yday > 90 & yday <= 180))
plot.pcs(subset(info, yday > 180 & yday <= 270))
plot.pcs(subset(info, yday > 270 & yday <= 366))
# extracted on 16:41 19/05/2014 from: # https://climatedataguide.ucar.edu/sites/default/files/climate_index_files/nao_station_monthly.txt nao <- read.table("B:/Loch_Ard_Felling/data/nao_station_monthly.txt", skip = 1, header = TRUE) plot(c(t(nao[paste(2003:2009),])), type = "l")
# extracted on 16:41 19/05/2014 from: # http://www.esrl.noaa.gov/psd/data/correlation/amon.us.data amo <- read.table("B:/Loch_Ard_Felling/data/amon.us.data.txt", skip = 1, nrows = 66, row.names = 1, col.names = c("year", month.abb)) plot(c(t(amo[paste(2003:2009),])), type = "l")
These come in monthly values... so we might use a smoothed version of them...
naolong <- data.frame(year = rep(rownames(nao), each = ncol(nao)), month = rep(colnames(nao), nrow(nao)), nao = c(t(nao))) amolong <- data.frame(year = rep(rownames(amo), each = ncol(amo)), month = rep(colnames(amo), nrow(amo)), amo = c(t(amo))) info <- merge(info, amolong, all.x = TRUE) info <- merge(info, naolong, all.x = TRUE) info <- info[order(info $ day), ] #splom( ~ info[c("PC1.10", "PC1.2", "PC2.2", "PC3.2", "PC4.2", "nao", "amo")] | year) xyplot(PC1.10 ~ amo | season , groups = year, data = info)
g1 <- gamm(PC1.10 ~ s(yday, bs = "cc", k = 9) + s(day, k = 9) + s(yday, k = 9, by = PC1.2, bs = "cc") + s(yday, k = 9, by = PC2.2, bs = "cc") + s(yday, k = 9, by = PC3.2, bs = "cc") + s(yday, k = 9, by = PC4.2, bs = "cc"), data = info, correlation = corAR1(form = ~ yday | factor(year))) plot(g1 $ gam, scale = 0, pages = 1)
plot(g1 $ gam, select = 2, scale = 0) abline(v = 1:7 * 365, col = "darkblue")
So... Lets try fitting this model to each of the components and then we can see how good the predictions of daily temperature are!
g2 <- gamm(PC2.10 ~ s(yday, bs = "cc", k = 9) + s(day, k = 9) + s(yday, k = 9, by = PC1.2, bs = "cc") + s(yday, k = 9, by = PC2.2, bs = "cc") + s(yday, k = 9, by = PC3.2, bs = "cc") + s(yday, k = 9, by = PC4.2, bs = "cc"), data = info, correlation = corAR1(form = ~ yday | factor(year))) g3 <- gamm(PC3.10 ~ s(yday, bs = "cc", k = 9) + s(day, k = 9) + s(yday, k = 9, by = PC1.2, bs = "cc") + s(yday, k = 9, by = PC2.2, bs = "cc") + s(yday, k = 9, by = PC3.2, bs = "cc") + s(yday, k = 9, by = PC4.2, bs = "cc"), data = info, correlation = corAR1(form = ~ yday | factor(year))) g4 <- gamm(PC4.10 ~ s(yday, bs = "cc", k = 9) + s(day, k = 9) + s(yday, k = 9, by = PC1.2, bs = "cc") + s(yday, k = 9, by = PC2.2, bs = "cc") + s(yday, k = 9, by = PC3.2, bs = "cc") + s(yday, k = 9, by = PC4.2, bs = "cc"), data = info, correlation = corAR1(form = ~ yday | factor(year)))
plot(g2 $ gam, scale = 0, pages = 1)
plot(g3 $ gam, scale = 0, pages = 1)
plot(g4 $ gam, scale = 0, pages = 1)
OKAY so we have our component models, now lets try to predict daily temperatures for burn 10!
preddat <- expand.grid(yday = 0:365, year = 2003:2005) preddat $ day <- preddat $ yday + (preddat $ year - 2003) * 365 # choose some values for PC1 etc... # okay so this time we really just want the fitted values fits <- info[c("yday", "day", "month", "season", "year", "nao", "amo")] fits $ PC1 <- fitted(g1 $ gam) fits $ PC2 <- fitted(g2 $ gam) fits $ PC3 <- fitted(g3 $ gam) fits $ PC4 <- fitted(g4 $ gam)
So now we convert these into daily curves through the functional object for burn 10
hours <- 0:24 preds <- eval.fd(hours, pc10 $ harmonics) %*% t(as.matrix(fits[paste0("PC", 1:4)])) preds <- preds + c(eval.fd(hours, pc10 $ meanfd)) raws <- eval.fd(hours, pc10 $ harmonics) %*% t(as.matrix(info[order(info $ day),paste0("PC", 1:4, ".10")])) raws <- raws + c(eval.fd(hours, pc10 $ meanfd)) fits2 <- fits[rep(1:nrow(fits), each = length(hours)),] fits2 $ hour <- rep(hours, nrow(fits)) fits2 $ fit <- c(preds) fits2 $ raw <- c(raws) fits2 $ resid <- with(fits2, raw - fit) xyplot(fit ~ hour | factor(month, month.abb) + factor(year), groups = day, data = fits2, type = "l", as.table = TRUE)
But what to the residuals look like.... bearing in mind the analysis has removed AR1 noise from each component.
xyplot(resid ~ hour | factor(month, month.abb) + factor(year), groups = day, data = fits2, type = "l", as.table = TRUE)
Fine for now. So if the smooth term for day is indeed the felling effect, can we isolate that! Conditional on the observed temperature curves in burn 2??
# predict with felling in there... done # now predict with no felling effect and look at the difference X1 <- predict(g1 $ gam, type = "lpmatrix") c1 <- coef(g1 $ gam) X2 <- predict(g2 $ gam, type = "lpmatrix") c2 <- coef(g2 $ gam) X3 <- predict(g3 $ gam, type = "lpmatrix") c3 <- coef(g3 $ gam) X4 <- predict(g4 $ gam, type = "lpmatrix") c4 <- coef(g4 $ gam) cnames <- colnames(X1) feffect <- grepl("s[(]day[)]", cnames) X1val <- X1[1,feffect] X2val <- X2[1,feffect] X3val <- X3[1,feffect] X4val <- X4[1,feffect] X1f <- X1; X1f[,feffect] <- rep(X1val, each = nrow(X1)) X2f <- X2; X2f[,feffect] <- rep(X2val, each = nrow(X2)) X3f <- X3; X3f[,feffect] <- rep(X3val, each = nrow(X3)) X4f <- X4; X4f[,feffect] <- rep(X4val, each = nrow(X4)) fullPC <- cbind(X1 %*% c1, X2 %*% c2, X3 %*% c3, X4 %*% c4) nofellPC <- cbind(X1f %*% c1, X2f %*% c2, X3f %*% c3, X4f %*% c4) nofell <- eval.fd(hours, pc10 $ harmonics) %*% t(nofellPC) + c(eval.fd(hours, pc10 $ meanfd)) fell <- eval.fd(hours, pc10 $ harmonics) %*% t(fullPC) + c(eval.fd(hours, pc10 $ meanfd)) fits2 $ nofell <- c(nofell) fits2 $ wfell <- c(fell) fits2 $ feffect <- c(fell) - c(nofell) xyplot(feffect ~ hour | factor(month, month.abb) + factor(year), groups = day, data = fits2, type = "l", as.table = TRUE, grid = TRUE)
fits2 $ yhour <- fits2 $ hour + fits2 $ day * 24 with(fits2, { plot(yhour, feffect, pch = ".", col = rainbow(25)[hour + 1], ylab = "Felling effect", xlab = "Date", axes = FALSE) aty <- seq(-1, 1, by = 0.2) axis(2, at = aty) atx <- seq(0, 10, by = 0.5) axis(1, at = atx * 24 * 365, labels = 2003 + atx) box(bty = "l") }) points(subset(fits2, hour == 12) $ yhour, colMeans(fell - nofell), pch = ".", col = 1, cex = 3) points(subset(fits2, hour == 12) $ yhour, apply(fell - nofell, 2, max), pch = ".", col = 1, cex = 3) points(subset(fits2, hour == 12) $ yhour, apply(fell - nofell, 2, min), pch = ".", col = 1, cex = 3) abline(v = 365 * 24 * 0:7, h = 0, col = "darkblue")
x <- 1:(max(fits2 $ day) + 1) y <- 1:25 z <- matrix(NA, length(y), length(x)) z[,unique(fits2 $ day)+1] <- fell - nofell head(z) image(z) contour(z, add = TRUE)
To reload the data in the previous R chunks for analysis use
lazyLoad("B:/Loch_Ard_Felling/cache/page3/predict_2b403e383e33ffaaee92341d77492759")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.