# Gaussian process modelling
library(ggplot2)
library(bayesm)
library(zoo)
library(splines)
library(maptools)
library(plyr)
library(ggmap)
library(broom)
library(MASS)
data("WaterQuality")
ggplot(data = WQ_PPB, aes(x = Date, y = log(Measure))) + geom_point() + facet_wrap(~Species)
SOI = levels(as.factor(WQ_PPB$Species))[32]
names(WQ_PPB)
subWQ.df = subset(WQ_PPB, Species == SOI, select = c(Date,Measure,Site,Latitude, Longitude, Species))
subWQ.df = subWQ.df[!is.na(subWQ.df$Measure),]
range(subWQ.df$Date)
subWQ.df = subset(subWQ.df, Date < "2010-01-01", select = c(Date,Measure,Site,Latitude, Longitude, Species))
subWQ.df = subset(subWQ.df, Date > "1990-01-01", select = c(Date,Measure,Site,Latitude, Longitude, Species))
subWQ.df$Site <- tolower(sub("PORT PHILLIP BAY ","",subWQ.df$Site))
subWQ.df$Site = as.factor(subWQ.df$Site)
subWQ.df$Species = as.factor(subWQ.df$Species)
head(subWQ.df)
df = aggregate(Measure ~ format(Date, "%m") + format(Date, "%Y"), data = subWQ.df, mean)
y = df$Measure
x = as.numeric(as.character(df$`format(Date, "%m")`))/12 + as.numeric(as.character(df$`format(Date, "%Y")`))
plot(x,(y))
p0 = c(log(1),log(10),log(1),log(1),log(1),log(1),log(5))
kernelList = createKernel(list(k_longterm, k_seasonal),
kernelName = c("Longterm", "Seasonal"),
kernelParameters = list(c("q1","q2"), c("q3","q4","qs", "f")))
res = optim(p0, fn = wrap_ll, y=y, kernelList = kernelList, x=x,
control = list(fnscale = -1, maxit = 10000))
res = optifix(p0, fixed = c(FALSE,TRUE,FALSE,TRUE,TRUE,FALSE,FALSE),fn = wrap_ll, y=y, kernel = kernelList, x=x,
control = list(fnscale = -1, maxit = 10000), method = "Nelder-Mead")
wrap_ll(p0,y,kernelList,x)
param = parToList(res$fullpar, kernelList)
kernel_g = list(k_longterm, k_seasonal)
xd = seq(min(x),max(x),length.out = 500)
test = GPpred(xd,x, y, param = param, kernel = kernel_g)
xdd = seq(min(x),max(x),length.out = 200)
longterm = GPpred(xdd,x, y, param = list(list(q1 = exp(res$fullpar[1]), q2 = exp(res$fullpar[2]))), kernel = list(k_longterm))
xddd = xdd
lt = GPpred(x,x, y, param = list(list(q1 = exp(res$fullpar[1]), q2 = exp(res$fullpar[2]))), kernel = list(k_longterm))
season = GPpred(xddd,x, (y - lt$mp), param = list(list(q3=exp(res$fullpar[3]), q4=exp(res$fullpar[4]),qs=exp(res$fullpar[5]), f = exp(res$fullpar[6]))), kernel = list(k_seasonal))
par(mfrow = c(3,1), mar = c(3,3,3,1))
plot(x,(y))
points(xd, (test$mp), col = "red", type="l")
CII = cbind(test$mp - 1.96 * sqrt(diag(test$sp) + param$sigma_noise),
test$mp + 1.96 * sqrt(diag(test$sp) + param$sigma_noise))
polygon(c((xd), rev(xd)),(c(CII[,1], rev(CII[,2]))), col = "lightgrey", density = 30)
plot(x,(y))
points(xdd, (longterm$mp), col = "red", type="l")
CII = cbind(longterm$mp - 1.96 * sqrt(diag(longterm$sp) + param$sigma_noise),
longterm$mp + 1.96 * sqrt(diag(longterm$sp) + param$sigma_noise))
polygon(c((xdd), rev(xdd)),(c(CII[,1], rev(CII[,2]))), col = "lightgrey", density = 30)
plot(x,y - lt$mp)
points(xddd, (season$mp), col = "red", type="l")
CII = cbind(season$mp - 1.96 * sqrt(diag(season$sp) + param$sigma_noise),
season$mp + 1.96 * sqrt(diag(season$sp) + param$sigma_noise))
polygon(c((xddd), rev(xddd)),(c(CII[,1], rev(CII[,2]))), col = "lightgrey", density = 30)
### Plotting for each site ###
par(mfrow = c(3,3))
Sites = levels(subWQ.df$Site)
for(s in Sites){
df.temp = subset(subWQ.df, Site == s, select = c(Date,Measure,Site,Latitude, Longitude, Species))
df.temp$date = as.numeric(as.character(format(df.temp$Date, "%m")))/12 + as.numeric(as.character(format(df.temp$Date, "%Y")))
xd = seq(min(df.temp$date),max(df.temp$date),length.out = diff(range(df.temp$date))*12)
test = GPpred(xd,df.temp$date, (df.temp$Measure ), param = param, kernel = kernel_g)
plot(df.temp$date,(df.temp$Measure))
points(xd, test$mp, col = "red", type="l")
CII = cbind(test$mp - 1.96 * sqrt(diag(test$sp) + param$sigma_noise),
test$mp + 1.96 * sqrt(diag(test$sp) + param$sigma_noise))
polygon(c((xd), rev(xd)),(c(CII[,1], rev(CII[,2]))), col = "lightgrey", density = 30)
longterm = GPpred(xd,df.temp$date, (df.temp$Measure), param = list(list(q1 = exp(res$fullpar[1]), q2 = exp(res$fullpar[2]))), kernel = list(k_longterm))
points(xd, longterm$mp, col = "darkgreen", type="l")
text(x= max(x)-0.5, y = max(df.temp$Measure), round((longterm$mp)[length( longterm$mp)],3))
}
#### Functions ####
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.