Mixture models have been studied for a long time, and a lot of work can be found on advantages and weaknesses of mixture models [see @McLachlan2000]. In particular, in the case of potential spatial dependency, multiple approaches are considered: \begin{itemize} \item A mixture of expert with spatial random effects [see @Neelon2014]; \item A mixture with a discrete or continuous (Markov random field - Potts or Gaussian process model) prior on the latent variable [see @Woolrich2005]. \end{itemize}
Mixture models for spatio-temporal data
In this article, we present a new approach to work with spatio-temporal data. \begin{itemize} \item Produce robust estimation of land-use impact; \item Improve prediction of water quality measurements for non-modelled sites; \end{itemize} The major modification stems from the assumption that each land-use impacts the amplitude of the measurement but also the seasonality of it.
library(SpTMixture, quiet = TRUE) require("rgdal", quiet = TRUE) # requires sp, will use proj.4 if installed require("maptools", quiet = TRUE) require("ggplot2", quiet = TRUE) require("plyr", quiet = TRUE) library(grid, quiet = TRUE) library(gridExtra, quiet = TRUE) library(mytoolbox, quiet = TRUE) library(raster, quiet=TRUE)
We first load the water quality data, and select the parameter we are interested in.
data(GoyderData) subWaterQ.df = subset(WaterQ.df, param == levels(WaterQ.df$param)[6], select = c(site, date, value)) subWaterQ.df = subset(subWaterQ.df, date < "2013-01-01", select = c(site, date, value)) subWaterQ.df = subset(subWaterQ.df, date > "2008-01-01", select = c(site, date, value)) ID_keep = names(which(table(subWaterQ.df$site)>30)) subWaterQ.df = subset(subWaterQ.df, site %in% ID_keep ) names(subWaterQ.df) <- c("ID" ,"date", "obs") subWaterQ.df$ID <- as.factor(as.character(subWaterQ.df$ID)) subWaterQ.df$obs[subWaterQ.df$obs==0] <- NA subWaterQ.df$obs <- log(subWaterQ.df$obs)
Let's plot what we have.
ggplot(data = subWaterQ.df, aes(x = date, y = obs, color = ID)) + geom_point() + ylab("log(obs)") + geom_line() + facet_wrap(~ID)
Then we load and shape the land use data.
LU = area2comp(LandUse.df, compCol = "DES_SEC_V6", IDcol = "SiteID") subLandUse.df = subset(LU, ID %in% unique(subWaterQ.df$ID)) rownames(subLandUse.df) <- subLandUse.df$ID subLandUse.df$ID <- NULL keep_lu = names(which(apply(subLandUse.df,2,max) > .4)) subLandUse.df = subset(subLandUse.df, select = keep_lu) subLandUse.df$`Grazing modified pastures` = subLandUse.df$`Grazing modified pastures` + subLandUse.df$`Other minimal uses` subLandUse.df$`Services` = subLandUse.df$`Services` + subLandUse.df$`Transport and communication` subLandUse.df$`Other minimal uses` <- NULL subLandUse.df$`Transport and communication` <- NULL subWaterQ.df = subset(subWaterQ.df, ID %in% rownames(subLandUse.df)) subWaterQ.df$ID <- as.factor(as.character(subWaterQ.df$ID)) subCoords.df = unique(subset(Coords.df, site %in% rownames(subLandUse.df))) names(subCoords.df)[1] <- c("ID")
bbox = t(apply(subCoords.df[,2:3], 2,function(x) c(min(x), max(x)))) rownames(bbox) <- c("lon","lat") colnames(bbox) <- c('min','max') polybox = asSpatialPolygonsBbox(bbox, zoom = 0.85) proj4string(polybox) <- proj4string(CatchmentMap) CatchmentMapC <- crop(CatchmentMap,polybox, byid = TRUE, drop_lower_td = TRUE) CatchmentMapC@data$id = CatchmentMapC@data$NewCatchId Catchment.points = fortify(CatchmentMapC, region="id") Catchment.df = join(Catchment.points,CatchmentMapC@data, by="id") Catchment.df$SiteID = FALSE Catchment.df$SiteID[Catchment.df$Site_Code %in% levels(subWaterQ.df$ID)] = TRUE Catchment.df$SiteID = as.factor(as.character(Catchment.df$SiteID))
ggplot() + geom_polygon(data = Catchment.df, aes(long,lat, group = group)) + geom_path(data = Catchment.df, aes(long,lat, group = group),color="darkgrey") + # coord_equal(ratio = 1) + xlab("") + ylab("")+ geom_point(data = subCoords.df, aes(x = longitude, y = latitude), col = "white", size = 2)
From the Land-use data we create a probability matrix.
p.old = subLandUse.df[,which(colSums(subLandUse.df) > 0)] p.old[is.na(p.old)] = 0 p.old= as.matrix((p.old)/rowSums(p.old))
We calculate the temporal functions for each site, using SPTMData.
df = SPTMData(subWaterQ.df, agg = "month", seasonality = c("quarter"), debug=F, method = "splines", degFree = 8) nrow(df$obs.data) df$basis<-NULL df = updateSPTMData(df, "all") p.old = p.old[match(levels(df$obs.data$ID),rownames(p.old)),] y = df$obs.data lu.long = data.frame(lu = as.factor(apply(subLandUse.df[,1:ncol(subLandUse.df)],1, which.max)), ID = as.character(rownames(subLandUse.df)))
That gives us the following plot, for each landuse type.
season.sites=df$season season.lu = updateSeason(season.sites, p.sites = p.old) plot(season.lu, select = c(1:ncol(p.old)), cex.axis = 0.7)
We then need to set some initial values for the algorithm.
theta.mean = rnorm(ncol(p.old), mean = rep(mean(y$obs, na.rm=T),ncol(p.old)), sd = rep(mean(abs(y$obs), na.rm=T),ncol(p.old))/10) theta.season = rnorm(max(ncol(df$basis),2)*ncol(p.old), mean = 1, sd = 0.5) theta.sigma = rnorm(ncol(p.old), mean = -0.5, sd = 0.05) theta.0 = c(theta.mean,theta.season, theta.sigma) subCoords.df = subCoords.df[match(levels(df$obs.data$ID),subCoords.df$ID),] proj.xy = project(cbind(subCoords.df$longitude,subCoords.df$latitude), "+proj=utm +zone=51 ellps=WGS84")/1000 subCoords.df$x = proj.xy[,1] subCoords.df$y = proj.xy[,2] sptmod = SPTModel(df, p.old, proj.xy)
resEM = estimEM(sptmod, theta.init = NULL, print.res = F, debug=F, hessian = F) theta.EM = resEM$Result$theta p.EM = resEM$Result$latent
plot(resEM, type = "residuals") plot(resEM, type = "tempBasis")
# colnames(p.EM)<-colnames(p.old) # season.lu = updateSeason(season.sites, p.sites=p.EM) # season.lu[is.na(season.lu)] <- NA # dataS = data.frame( # Comp = as.factor(sapply(apply(p.EM, 1, which.max), function(x) which(sort(theta.EM[1:ncol(p.old)],index.return=T)$ix == x))), # Longitude = subCoords.df[,2], # Latitude = subCoords.df[,3], # CompName = as.factor(colnames(p.EM)[apply(p.EM, 1, which.max)]), ID = subCoords.df[,"ID"]) # # SpPt = SpatialPointsDataFrame(coords = as.matrix(subCoords.df[,2:3]), data = dataS, proj4string = CRS("+proj=longlat +ellps=GRS80 +no_defs"))
# ggplot() + geom_polygon(data = Catchment.df, aes(long,lat, group = group)) + # geom_path(data = Catchment.df, aes(long,lat, group = group),color="darkgrey", size = 1) + # coord_equal() + # geom_point(data = SpPt@data, aes(x = Longitude, y = Latitude, col =Comp), size=3) + # scale_color_manual(values = rev(brewer.pal(ncol(p.EM),"RdYlGn")), name = "LU Composition", # labels = colnames(p.EM)[sort(theta.EM[1:ncol(p.old)],index.return=T)$ix])+ # theme(legend.position="bottom")
# Ypred = predict(resEM, transform = "none") # Ypred$ClName = as.factor(colnames(p.EM)[as.numeric(as.character(Ypred$Cluster))]) # Ypred$ClName <- ordered(Ypred$ClName, levels = colnames(p.EM)[sort(theta.EM[1:ncol(p.old)],index.return=T)$ix]) # ggplot(data = Ypred) + geom_point(aes(x=date, y = obs), size = 0.4)+ # geom_line(aes(x = date, y = pred, color = ClName), size=0.5)+ # geom_ribbon(aes(x = date, ymin=CIlow,ymax=CIup, fill = ClName),alpha=0.3)+ # facet_wrap(~ID, scales = "free") + # scale_color_manual(values = rev(brewer.pal(ncol(p.EM),"RdYlGn")), name = "LU Composition", # labels = colnames(p.EM)[sort(theta.EM[1:ncol(p.old)],index.return=T)$ix]) + # scale_fill_manual(values = rev(brewer.pal(ncol(p.EM),"RdYlGn")), name = "LU Composition", # labels = colnames(p.EM)[sort(theta.EM[1:ncol(p.old)],index.return=T)$ix]) + # theme(legend.position="bottom")
# theta.EM[(length(theta.EM) - ncol(p.EM) + 1):length(theta.EM)] = exp(theta.EM[(length(theta.EM) - ncol(p.EM) + 1):length(theta.EM)]) # # fisher_info<-solve(-resEM$Result$Hessian) # prop_theta<-(diag(fisher_info)) # # idx.s = (ncol(season.lu)+1):length(theta.EM) # priors = list(theta = list(m0 = theta.EM[1:(ncol(season.lu))], s0 = prop_theta[1:ncol(season.lu)]), # sigma2 = list(a0 = theta.EM[idx.s]^2 / ((prop_theta[idx.s])) + 2, b0 = (theta.EM[idx.s]^2 / ((prop_theta[idx.s])) + 1)*theta.EM[idx.s]), # z = list(a = p.EM)) # set.seed(1) # resGibbs = estimGibbs(sptmod, method = 'simple', priors = priors, print.res = F, # N.run = 2500, beta.range = c(0,3), meanFunc = meanFunc, debug=F) # # p.Gibbs = resGibbs$Result$latent # colnames(p.Gibbs)<-colnames(p.old) # theta.Gibbs = resGibbs$Result$theta
# plot(resGibbs, type = "residuals") # plot(resGibbs, type = "tempBasis")
# diagPlot(resGibbs, par = "theta",main = "", breaks = 50) # diagPlot(resGibbs, par = "z",main = "") # diagPlot(resGibbs, par = "paired-theta", plot.cluster = 1)
# Ypred = predict(resGibbs) # Ypred$ClName = as.factor(colnames(p.old)[as.numeric(as.character(Ypred$Cluster))]) # Ypred$ClName <- ordered(Ypred$ClName, levels = colnames(p.old)[sort(theta.Gibbs[1:ncol(p.old)],index.return=T)$ix]) # ggplot(data = Ypred) + geom_point(aes(x=date, y = obs), size = 0.4)+ # geom_line(aes(x = date, y = pred, color = (ClName)))+ # geom_ribbon(aes(x = date, ymin=CIlow,ymax=CIup, fill = ClName),alpha=0.3)+ # facet_wrap(~ID, scales = "free_x") + # scale_color_manual(values = rev(brewer.pal(ncol(p.Gibbs),"RdYlGn")), name = "LU Composition", # labels = colnames(p.old)[sort(theta.Gibbs[1:ncol(p.old)],index.return=T)$ix]) + # scale_fill_manual(values = rev(brewer.pal(ncol(p.Gibbs),"RdYlGn")), name = "LU Composition", # labels = colnames(p.old)[sort(theta.Gibbs[1:ncol(p.old)],index.return=T)$ix]) + # theme(legend.position="bottom")
# season.lu = updateSeason(season.sites, p.sites=p.Gibbs) # season.lu[is.na(season.lu)] <- NA # dataS = data.frame( # Comp = as.factor(sapply(apply(p.Gibbs, 1, which.max), function(x) which(sort(theta.Gibbs[1:ncol(p.old)],index.return=T)$ix == x))), # Longitude = subCoords.df[,2], # Latitude = subCoords.df[,3], # CompName = as.factor(colnames(p.EM)[apply(p.Gibbs, 1, which.max)]), ID = subCoords.df[,"ID"]) # # SpPt = SpatialPointsDataFrame(coords = as.matrix(subCoords.df[,2:3]), data = dataS, proj4string = CRS("+proj=longlat +ellps=GRS80 +no_defs"))
# ggplot() + geom_polygon(data = Catchment.df, aes(long,lat, group = group)) + # geom_path(data = Catchment.df, aes(long,lat, group = group),color="darkgrey", size = 1) + # coord_equal() + # geom_point(data = SpPt@data, aes(x = Longitude, y = Latitude, col =Comp), size=3) + # scale_color_manual(values = rev(brewer.pal(ncol(p.Gibbs),"RdYlGn")), name = "LU Composition", # labels = colnames(p.Gibbs)[sort(theta.Gibbs[1:ncol(p.old)],index.return=T)$ix])+ # theme(legend.position="bottom")
# resGibbsSpat = estimGibbs(sptmod, method = 'spat', priors = priors, print.res = F, # N.run = 2500, beta.range = c(2,3), meanFunc = meanFunc, debug = F) # # p.GibbsSpat = resGibbsSpat$Result$latent # colnames(p.GibbsSpat)<-colnames(p.old) # theta.GibbsSpat = resGibbsSpat$Result$theta
# plot(resGibbsSpat, type = "residuals") # plot(resGibbsSpat, type = "tempBasis")
# diagPlot(resGibbsSpat, par = "theta", main = "", breaks = 50) # diagPlot(resGibbsSpat, par = "z", main = "") # diagPlot(resGibbsSpat, par = "beta",main = "", breaks = 50)
# Ypred = predict(resGibbsSpat) # Ypred$ClName = as.factor(colnames(p.old)[as.numeric(as.character(Ypred$Cluster))]) # Ypred$ClName <- ordered(Ypred$ClName, levels = colnames(p.GibbsSpat)[sort(theta.GibbsSpat[1:ncol(p.old)],index.return=T)$ix]) # ggplot(data = Ypred) + geom_point(aes(x=date, y = obs), size = 0.4)+ # geom_line(aes(x = date, y = pred, color = (ClName)))+ # facet_wrap(~ID, scales = "free_x") + # scale_color_manual(values = rev(brewer.pal(ncol(p.GibbsSpat),"RdYlGn")), name = "LU Composition", # labels = colnames(p.GibbsSpat)[sort(theta.GibbsSpat[1:ncol(p.old)],index.return=T)$ix])+ # theme(legend.position="bottom")
# season.lu = updateSeason(season.sites, p.sites=p.GibbsSpat) # season.lu[is.na(season.lu)] <- NA # dataS = data.frame( # Comp = as.factor(sapply(apply(p.GibbsSpat, 1, which.max), function(x) which(sort(theta.GibbsSpat[1:ncol(p.old)],index.return=T)$ix == x))), # Longitude = subCoords.df[,2], # Latitude = subCoords.df[,3], # CompName = as.factor(colnames(p.EM)[apply(p.GibbsSpat, 1, which.max)]), ID = subCoords.df[,"ID"]) # # SpPt = SpatialPointsDataFrame(coords = as.matrix(subCoords.df[,2:3]), data = dataS, proj4string = CRS("+proj=longlat +ellps=GRS80 +no_defs"))
# ggplot() + geom_polygon(data = Catchment.df, aes(long,lat, group = group)) + # geom_path(data = Catchment.df, aes(long,lat, group = group),color="darkgrey", size = 1) + # coord_equal() + # geom_point(data = SpPt@data, aes(x = Longitude, y = Latitude, col =Comp), size=3) + # scale_color_manual(values = rev(brewer.pal(ncol(p.GibbsSpat),"RdYlGn")), name = "LU Composition", # labels = colnames(p.GibbsSpat)[sort(theta.GibbsSpat[1:ncol(p.old)],index.return=T)$ix])+ # theme(legend.position="bottom")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.