tests/analysis3.R

# Gaussian process modelling

library(ggplot2)
library(bayesm)
library(zoo)
library(splines)
library(maptools)
library(plyr)
library(ggmap)
library(broom)
library(MASS)
data("WaterQuality")
ggplot(data = WQ_PPB, aes(x = Date, y = log(Measure))) + geom_point() + facet_wrap(~Species)
SOI = levels(as.factor(WQ_PPB$Species))[32]
names(WQ_PPB)
subWQ.df = subset(WQ_PPB, Species == SOI, select = c(Date,Measure,Site,Latitude, Longitude, Species))
subWQ.df = subWQ.df[!is.na(subWQ.df$Measure),]
range(subWQ.df$Date)
subWQ.df = subset(subWQ.df, Date < "2010-01-01", select = c(Date,Measure,Site,Latitude, Longitude, Species))
subWQ.df = subset(subWQ.df, Date > "1990-01-01", select = c(Date,Measure,Site,Latitude, Longitude, Species))

subWQ.df$Site <- tolower(sub("PORT PHILLIP BAY ","",subWQ.df$Site))
subWQ.df$Site = as.factor(subWQ.df$Site)
subWQ.df$Species = as.factor(subWQ.df$Species)
head(subWQ.df)

df = aggregate(Measure ~ format(Date, "%m") + format(Date, "%Y"), data = subWQ.df, mean)

y = df$Measure
x = as.numeric(as.character(df$`format(Date, "%m")`))/12 +  as.numeric(as.character(df$`format(Date, "%Y")`))

plot(x,(y))

p0 = c(log(1),log(10),log(1),log(1),log(1),log(1),log(5))
kernelList = createKernel(list(k_longterm, k_seasonal),
                          kernelName = c("Longterm", "Seasonal"),
                          kernelParameters =  list(c("q1","q2"), c("q3","q4","qs", "f")))
res = optim(p0, fn = wrap_ll, y=y, kernelList = kernelList, x=x,
            control = list(fnscale = -1, maxit = 10000))

res = optifix(p0, fixed = c(FALSE,TRUE,FALSE,TRUE,TRUE,FALSE,FALSE),fn = wrap_ll, y=y, kernel = kernelList, x=x,
            control = list(fnscale = -1, maxit = 10000), method = "Nelder-Mead")

wrap_ll(p0,y,kernelList,x)

param = parToList(res$fullpar, kernelList)

kernel_g = list(k_longterm, k_seasonal)

xd = seq(min(x),max(x),length.out = 500)
test = GPpred(xd,x, y, param = param, kernel = kernel_g)

xdd = seq(min(x),max(x),length.out = 200)
longterm = GPpred(xdd,x, y, param =  list(list(q1 = exp(res$fullpar[1]), q2 = exp(res$fullpar[2]))), kernel = list(k_longterm))

xddd = xdd
lt = GPpred(x,x, y, param =  list(list(q1 = exp(res$fullpar[1]), q2 = exp(res$fullpar[2]))), kernel = list(k_longterm))
season = GPpred(xddd,x, (y - lt$mp), param =  list(list(q3=exp(res$fullpar[3]), q4=exp(res$fullpar[4]),qs=exp(res$fullpar[5]), f = exp(res$fullpar[6]))), kernel = list(k_seasonal))


par(mfrow = c(3,1), mar = c(3,3,3,1))
plot(x,(y))
points(xd, (test$mp), col = "red", type="l")
CII = cbind(test$mp - 1.96 * sqrt(diag(test$sp) + param$sigma_noise),
        test$mp + 1.96 * sqrt(diag(test$sp) + param$sigma_noise))
polygon(c((xd), rev(xd)),(c(CII[,1], rev(CII[,2]))), col = "lightgrey", density = 30)

plot(x,(y))
points(xdd, (longterm$mp), col = "red", type="l")
CII = cbind(longterm$mp - 1.96 * sqrt(diag(longterm$sp) + param$sigma_noise),
            longterm$mp + 1.96 * sqrt(diag(longterm$sp) + param$sigma_noise))
polygon(c((xdd), rev(xdd)),(c(CII[,1], rev(CII[,2]))), col = "lightgrey", density = 30)

plot(x,y - lt$mp)
points(xddd, (season$mp), col = "red", type="l")
CII = cbind(season$mp - 1.96 * sqrt(diag(season$sp) + param$sigma_noise),
            season$mp + 1.96 * sqrt(diag(season$sp) + param$sigma_noise))
polygon(c((xddd), rev(xddd)),(c(CII[,1], rev(CII[,2]))), col = "lightgrey", density = 30)

### Plotting for each site ###
par(mfrow = c(3,3))
Sites = levels(subWQ.df$Site)
for(s in Sites){
  df.temp = subset(subWQ.df, Site == s, select = c(Date,Measure,Site,Latitude, Longitude, Species))
  df.temp$date = as.numeric(as.character(format(df.temp$Date, "%m")))/12 +  as.numeric(as.character(format(df.temp$Date, "%Y")))
  xd = seq(min(df.temp$date),max(df.temp$date),length.out = diff(range(df.temp$date))*12)
  test = GPpred(xd,df.temp$date, (df.temp$Measure ), param = param, kernel = kernel_g)
  plot(df.temp$date,(df.temp$Measure))
  points(xd, test$mp, col = "red", type="l")
  CII = cbind(test$mp - 1.96 * sqrt(diag(test$sp) + param$sigma_noise),
              test$mp + 1.96 * sqrt(diag(test$sp) + param$sigma_noise))
  polygon(c((xd), rev(xd)),(c(CII[,1], rev(CII[,2]))), col = "lightgrey", density = 30)
  longterm = GPpred(xd,df.temp$date, (df.temp$Measure), param =  list(list(q1 = exp(res$fullpar[1]), q2 = exp(res$fullpar[2]))), kernel = list(k_longterm))
  points(xd, longterm$mp, col = "darkgreen", type="l")
  text(x= max(x)-0.5, y = max(df.temp$Measure),  round((longterm$mp)[length( longterm$mp)],3))
}


#### Functions ####
ick003/SpTMixture documentation built on May 18, 2019, 2:32 a.m.