vignettes/SpTMixture_FirstUse.R

## ---- message=FALSE, warning=FALSE---------------------------------------
library(SpTMixture, quiet = TRUE)
require("rgdal", quiet = TRUE) # requires sp, will use proj.4 if installed
require("maptools", quiet = TRUE)
require("ggplot2", quiet = TRUE)
require("plyr", quiet = TRUE)
library(grid, quiet = TRUE)
library(gridExtra, quiet = TRUE)
library(mytoolbox, quiet = TRUE)
library(raster, quiet=TRUE)

## ------------------------------------------------------------------------
data(GoyderData)
subWaterQ.df = subset(WaterQ.df, param == levels(WaterQ.df$param)[6], 
                      select = c(site, date, value))
subWaterQ.df = subset(subWaterQ.df, date < "2013-01-01", select = c(site, date, value))
subWaterQ.df = subset(subWaterQ.df, date > "2008-01-01", select = c(site, date, value))
ID_keep = names(which(table(subWaterQ.df$site)>30))
subWaterQ.df = subset(subWaterQ.df, site %in% ID_keep )
names(subWaterQ.df) <- c("ID" ,"date", "obs")
subWaterQ.df$ID <- as.factor(as.character(subWaterQ.df$ID))
subWaterQ.df$obs[subWaterQ.df$obs==0] <- NA
subWaterQ.df$obs <- log(subWaterQ.df$obs)

## ------------------------------------------------------------------------
ggplot(data = subWaterQ.df, aes(x = date, y = obs, color = ID)) + 
  geom_point() + ylab("log(obs)") + geom_line() + facet_wrap(~ID)

## ------------------------------------------------------------------------
LU = area2comp(LandUse.df, compCol = "DES_SEC_V6", IDcol = "SiteID")
subLandUse.df = subset(LU, ID %in% unique(subWaterQ.df$ID))
rownames(subLandUse.df) <- subLandUse.df$ID
subLandUse.df$ID <- NULL
keep_lu = names(which(apply(subLandUse.df,2,max) > .4))

subLandUse.df = subset(subLandUse.df, select = keep_lu)
subLandUse.df$`Grazing modified pastures` = subLandUse.df$`Grazing modified pastures` + subLandUse.df$`Other minimal uses`
subLandUse.df$`Services` = subLandUse.df$`Services` + subLandUse.df$`Transport and communication`
subLandUse.df$`Other minimal uses` <- NULL
subLandUse.df$`Transport and communication` <- NULL

subWaterQ.df = subset(subWaterQ.df, ID %in% rownames(subLandUse.df))
subWaterQ.df$ID <- as.factor(as.character(subWaterQ.df$ID))
subCoords.df = unique(subset(Coords.df, site %in% rownames(subLandUse.df)))
names(subCoords.df)[1] <- c("ID")

## ------------------------------------------------------------------------
bbox = t(apply(subCoords.df[,2:3], 2,function(x) c(min(x), max(x))))
rownames(bbox) <- c("lon","lat")
colnames(bbox) <- c('min','max')
polybox = asSpatialPolygonsBbox(bbox, zoom = 0.85)
proj4string(polybox) <- proj4string(CatchmentMap)
CatchmentMapC <- crop(CatchmentMap,polybox, byid = TRUE, drop_lower_td = TRUE)
CatchmentMapC@data$id = CatchmentMapC@data$NewCatchId
Catchment.points = fortify(CatchmentMapC, region="id")
Catchment.df = join(Catchment.points,CatchmentMapC@data, by="id")
Catchment.df$SiteID = FALSE
Catchment.df$SiteID[Catchment.df$Site_Code %in% levels(subWaterQ.df$ID)] = TRUE
Catchment.df$SiteID = as.factor(as.character(Catchment.df$SiteID))

## ---- fig.height=8-------------------------------------------------------
ggplot() + geom_polygon(data = Catchment.df, aes(long,lat, group = group)) + 
  geom_path(data = Catchment.df, aes(long,lat, group = group),color="darkgrey") +
#  coord_equal(ratio = 1) + 
  xlab("") + ylab("")+
  geom_point(data = subCoords.df, aes(x = longitude, y = latitude), col = "white", size = 2)


## ------------------------------------------------------------------------
p.old = subLandUse.df[,which(colSums(subLandUse.df) > 0)]
p.old[is.na(p.old)] = 0
p.old= as.matrix((p.old)/rowSums(p.old))

## ------------------------------------------------------------------------
df = SPTMData(subWaterQ.df, agg = "month", seasonality = c("quarter"), debug=F,
               method = "splines", degFree = 8)
nrow(df$obs.data)
df$basis<-NULL
df = updateSPTMData(df, "all")
p.old = p.old[match(levels(df$obs.data$ID),rownames(p.old)),]
y = df$obs.data
lu.long = data.frame(lu = as.factor(apply(subLandUse.df[,1:ncol(subLandUse.df)],1, which.max)),
                      ID = as.character(rownames(subLandUse.df)))

## ---- fig.height=6-------------------------------------------------------
season.sites=df$season
season.lu = updateSeason(season.sites, p.sites = p.old)
plot(season.lu, select = c(1:ncol(p.old)), cex.axis = 0.7)


## ------------------------------------------------------------------------
theta.mean = rnorm(ncol(p.old), mean = rep(mean(y$obs, na.rm=T),ncol(p.old)),
                  sd = rep(mean(abs(y$obs), na.rm=T),ncol(p.old))/10)
theta.season = rnorm(max(ncol(df$basis),2)*ncol(p.old), mean = 1, sd = 0.5)
theta.sigma = rnorm(ncol(p.old), mean = -0.5, sd = 0.05)
theta.0 = c(theta.mean,theta.season, theta.sigma)
subCoords.df = subCoords.df[match(levels(df$obs.data$ID),subCoords.df$ID),]
proj.xy = project(cbind(subCoords.df$longitude,subCoords.df$latitude), "+proj=utm +zone=51 ellps=WGS84")/1000
subCoords.df$x = proj.xy[,1]
subCoords.df$y = proj.xy[,2]
sptmod = SPTModel(df, p.old, proj.xy)

## ------------------------------------------------------------------------
resEM = estimEM(sptmod, theta.init = NULL, print.res = F, debug=F, hessian = F)
theta.EM = resEM$Result$theta
p.EM = resEM$Result$latent

## ---- fig.height=8-------------------------------------------------------
plot(resEM, type = "residuals")
plot(resEM, type = "tempBasis")

## ------------------------------------------------------------------------
# colnames(p.EM)<-colnames(p.old)
# season.lu = updateSeason(season.sites, p.sites=p.EM)
# season.lu[is.na(season.lu)] <- NA
# dataS = data.frame(
#   Comp = as.factor(sapply(apply(p.EM, 1, which.max), function(x) which(sort(theta.EM[1:ncol(p.old)],index.return=T)$ix == x))),
#   Longitude = subCoords.df[,2], 
#   Latitude = subCoords.df[,3], 
#   CompName = as.factor(colnames(p.EM)[apply(p.EM, 1, which.max)]), ID = subCoords.df[,"ID"])
# 
# SpPt = SpatialPointsDataFrame(coords = as.matrix(subCoords.df[,2:3]), data = dataS, proj4string = CRS("+proj=longlat +ellps=GRS80 +no_defs"))

## ---- fig.height=8-------------------------------------------------------
# ggplot() + geom_polygon(data = Catchment.df, aes(long,lat, group = group)) + 
#   geom_path(data = Catchment.df, aes(long,lat, group = group),color="darkgrey", size = 1) +
#   coord_equal() + 
#   geom_point(data = SpPt@data, aes(x = Longitude, y = Latitude, col =Comp), size=3) + 
#   scale_color_manual(values = rev(brewer.pal(ncol(p.EM),"RdYlGn")), name = "LU Composition",
#                      labels = colnames(p.EM)[sort(theta.EM[1:ncol(p.old)],index.return=T)$ix])+ 
#   theme(legend.position="bottom")

## ---- fig.height=8-------------------------------------------------------
# Ypred = predict(resEM, transform = "none")
# Ypred$ClName = as.factor(colnames(p.EM)[as.numeric(as.character(Ypred$Cluster))])
# Ypred$ClName <- ordered(Ypred$ClName, levels = colnames(p.EM)[sort(theta.EM[1:ncol(p.old)],index.return=T)$ix])
# ggplot(data = Ypred) + geom_point(aes(x=date, y = obs), size = 0.4)+
#   geom_line(aes(x = date, y = pred, color = ClName), size=0.5)+
#   geom_ribbon(aes(x = date, ymin=CIlow,ymax=CIup, fill = ClName),alpha=0.3)+
#     facet_wrap(~ID, scales = "free") + 
#     scale_color_manual(values = rev(brewer.pal(ncol(p.EM),"RdYlGn")), name = "LU Composition",
#                      labels = colnames(p.EM)[sort(theta.EM[1:ncol(p.old)],index.return=T)$ix]) + 
#   scale_fill_manual(values = rev(brewer.pal(ncol(p.EM),"RdYlGn")), name = "LU Composition",
#                      labels = colnames(p.EM)[sort(theta.EM[1:ncol(p.old)],index.return=T)$ix]) + 
#   theme(legend.position="bottom")


## ------------------------------------------------------------------------
# theta.EM[(length(theta.EM) - ncol(p.EM) + 1):length(theta.EM)] = exp(theta.EM[(length(theta.EM) - ncol(p.EM) + 1):length(theta.EM)])
# 
# fisher_info<-solve(-resEM$Result$Hessian)
# prop_theta<-(diag(fisher_info))
# 
# idx.s = (ncol(season.lu)+1):length(theta.EM)
# priors = list(theta = list(m0 = theta.EM[1:(ncol(season.lu))], s0 = prop_theta[1:ncol(season.lu)]),
#               sigma2 = list(a0 = theta.EM[idx.s]^2 / ((prop_theta[idx.s])) + 2, b0 = (theta.EM[idx.s]^2 / ((prop_theta[idx.s])) + 1)*theta.EM[idx.s]),
#                z = list(a = p.EM)) 
# set.seed(1)
# resGibbs = estimGibbs(sptmod, method = 'simple', priors = priors, print.res = F,
#                      N.run = 2500, beta.range = c(0,3), meanFunc = meanFunc, debug=F)
# 
# p.Gibbs = resGibbs$Result$latent
# colnames(p.Gibbs)<-colnames(p.old)
# theta.Gibbs = resGibbs$Result$theta

## ---- fig.height=8-------------------------------------------------------
# plot(resGibbs, type = "residuals")
# plot(resGibbs, type = "tempBasis")

## ---- fig.height=8-------------------------------------------------------
# diagPlot(resGibbs, par = "theta",main = "", breaks = 50)
# diagPlot(resGibbs, par = "z",main = "")
# diagPlot(resGibbs, par = "paired-theta", plot.cluster = 1)

## ---- fig.height=8-------------------------------------------------------
# Ypred = predict(resGibbs)
# Ypred$ClName = as.factor(colnames(p.old)[as.numeric(as.character(Ypred$Cluster))])
# Ypred$ClName <- ordered(Ypred$ClName, levels = colnames(p.old)[sort(theta.Gibbs[1:ncol(p.old)],index.return=T)$ix])
# ggplot(data = Ypred) + geom_point(aes(x=date, y = obs), size = 0.4)+
#   geom_line(aes(x = date, y = pred, color = (ClName)))+
#   geom_ribbon(aes(x = date, ymin=CIlow,ymax=CIup, fill = ClName),alpha=0.3)+
#     facet_wrap(~ID, scales = "free_x") + 
#     scale_color_manual(values = rev(brewer.pal(ncol(p.Gibbs),"RdYlGn")), name = "LU Composition",
#                      labels = colnames(p.old)[sort(theta.Gibbs[1:ncol(p.old)],index.return=T)$ix]) + 
#    scale_fill_manual(values = rev(brewer.pal(ncol(p.Gibbs),"RdYlGn")), name = "LU Composition",
#                      labels = colnames(p.old)[sort(theta.Gibbs[1:ncol(p.old)],index.return=T)$ix]) + 
#   theme(legend.position="bottom")


## ------------------------------------------------------------------------
# season.lu = updateSeason(season.sites, p.sites=p.Gibbs)
# season.lu[is.na(season.lu)] <- NA
# dataS = data.frame(
#   Comp = as.factor(sapply(apply(p.Gibbs, 1, which.max), function(x) which(sort(theta.Gibbs[1:ncol(p.old)],index.return=T)$ix == x))),
#   Longitude = subCoords.df[,2], 
#   Latitude = subCoords.df[,3], 
#   CompName = as.factor(colnames(p.EM)[apply(p.Gibbs, 1, which.max)]), ID = subCoords.df[,"ID"])
# 
# SpPt = SpatialPointsDataFrame(coords = as.matrix(subCoords.df[,2:3]), data = dataS, proj4string = CRS("+proj=longlat +ellps=GRS80 +no_defs"))

## ---- fig.height=8-------------------------------------------------------
# ggplot() + geom_polygon(data = Catchment.df, aes(long,lat, group = group)) + 
#   geom_path(data = Catchment.df, aes(long,lat, group = group),color="darkgrey", size = 1) +
#   coord_equal() + 
#   geom_point(data = SpPt@data, aes(x = Longitude, y = Latitude, col =Comp), size=3) + 
#   scale_color_manual(values = rev(brewer.pal(ncol(p.Gibbs),"RdYlGn")), name = "LU Composition",
#                      labels = colnames(p.Gibbs)[sort(theta.Gibbs[1:ncol(p.old)],index.return=T)$ix])+ 
#   theme(legend.position="bottom")

## ------------------------------------------------------------------------
# resGibbsSpat = estimGibbs(sptmod, method = 'spat', priors = priors, print.res = F,
#                      N.run = 2500, beta.range = c(2,3), meanFunc = meanFunc, debug = F)
# 
# p.GibbsSpat = resGibbsSpat$Result$latent
# colnames(p.GibbsSpat)<-colnames(p.old)
# theta.GibbsSpat = resGibbsSpat$Result$theta


## ---- fig.height=8-------------------------------------------------------
# plot(resGibbsSpat, type = "residuals")
# plot(resGibbsSpat, type = "tempBasis")

## ---- fig.height=6-------------------------------------------------------
# diagPlot(resGibbsSpat, par = "theta", main = "", breaks = 50)
# diagPlot(resGibbsSpat, par = "z",         main = "")
# diagPlot(resGibbsSpat, par = "beta",main = "", breaks = 50)

## ---- fig.height=8-------------------------------------------------------
# Ypred = predict(resGibbsSpat)
# Ypred$ClName = as.factor(colnames(p.old)[as.numeric(as.character(Ypred$Cluster))])
# Ypred$ClName <- ordered(Ypred$ClName, levels = colnames(p.GibbsSpat)[sort(theta.GibbsSpat[1:ncol(p.old)],index.return=T)$ix])
# ggplot(data = Ypred) + geom_point(aes(x=date, y = obs), size = 0.4)+
#   geom_line(aes(x = date, y = pred, color = (ClName)))+
#     facet_wrap(~ID, scales = "free_x") + 
#     scale_color_manual(values = rev(brewer.pal(ncol(p.GibbsSpat),"RdYlGn")), name = "LU Composition",
#                      labels = colnames(p.GibbsSpat)[sort(theta.GibbsSpat[1:ncol(p.old)],index.return=T)$ix])+ 
#   theme(legend.position="bottom")


## ------------------------------------------------------------------------
# season.lu = updateSeason(season.sites, p.sites=p.GibbsSpat)
# season.lu[is.na(season.lu)] <- NA
# dataS = data.frame(
#   Comp = as.factor(sapply(apply(p.GibbsSpat, 1, which.max), function(x) which(sort(theta.GibbsSpat[1:ncol(p.old)],index.return=T)$ix == x))),
#   Longitude = subCoords.df[,2], 
#   Latitude = subCoords.df[,3], 
#   CompName = as.factor(colnames(p.EM)[apply(p.GibbsSpat, 1, which.max)]), ID = subCoords.df[,"ID"])
# 
# SpPt = SpatialPointsDataFrame(coords = as.matrix(subCoords.df[,2:3]), data = dataS, proj4string = CRS("+proj=longlat +ellps=GRS80 +no_defs"))

## ---- fig.height=7-------------------------------------------------------
# ggplot() + geom_polygon(data = Catchment.df, aes(long,lat, group = group)) + 
#   geom_path(data = Catchment.df, aes(long,lat, group = group),color="darkgrey", size = 1) +
#   coord_equal() + 
#   geom_point(data = SpPt@data, aes(x = Longitude, y = Latitude, col =Comp), size=3) + 
#   scale_color_manual(values = rev(brewer.pal(ncol(p.GibbsSpat),"RdYlGn")), name = "LU Composition",
#                      labels = colnames(p.GibbsSpat)[sort(theta.GibbsSpat[1:ncol(p.old)],index.return=T)$ix])+ 
#   theme(legend.position="bottom")
ick003/SpTMixture documentation built on May 18, 2019, 2:32 a.m.