tests/analysis.R

#load("~/Documents/Donnees/Publications/Journaux/2016 - Ecological Statistics - Compositional Data Modelling/R Code/GoyderData.Rdata")

library(ggplot2)
library(bayesm)
library(zoo)
library(splines)
library(SpTMixture)
#tools::checkRdaFiles("data/")
#tools::resaveRdaFiles("data/", compress="auto")
data(GoyderData)


subWaterQ.df = subset(WaterQ.df, param == levels(WaterQ.df$param)[6], select = c(site, date, value))

subWaterQ.df = subset(subWaterQ.df, date < "2013-01-01", select = c(site, date, value))
subWaterQ.df = subset(subWaterQ.df, date > "2000-01-01", select = c(site, date, value))

ID_keep = names(which(table(subWaterQ.df$site)>30))
subWaterQ.df = subset(subWaterQ.df, site %in% ID_keep )
names(subWaterQ.df) <- c("ID" ,"date", "obs")
subWaterQ.df$ID <- as.factor(as.character(subWaterQ.df$ID))
subWaterQ.df$obs[subWaterQ.df$obs==0] <- NA
subWaterQ.df$obs <- log(subWaterQ.df$obs)


LU = area2comp(LandUse.df, compCol = "DES_SEC_V6", IDcol = "SiteID")

subLandUse.df = subset(LU, ID %in% unique(subWaterQ.df$ID))
rownames(subLandUse.df) <- subLandUse.df$ID
subLandUse.df$ID <- NULL
keep_lu = names(which(apply(subLandUse.df,2,max) > .487))

# Merging GMP and OMU as they seem to behave identically

subLandUse.df = subset(subLandUse.df, select = keep_lu)
subWaterQ.df = subset(subWaterQ.df, ID %in% rownames(subLandUse.df))
subWaterQ.df$ID <- as.factor(as.character(subWaterQ.df$ID))


subCoords.df = unique(subset(Coords.df, site %in% rownames(subLandUse.df)))
names(subCoords.df)[1] <- c("ID")

p.old = subLandUse.df[,which(colSums(subLandUse.df) > 0)]
p.old[is.na(p.old)] = 0
p.old= as.matrix((p.old)/rowSums(p.old))



df = SPTMData(subWaterQ.df, agg = "month", seasonality = c("month"), debug=F,
              method = "splines", degFree = 3)

df= updateSPTMData(df, 2)

p.old = p.old[match(levels(df$obs.data$ID),rownames(p.old)),]

y = df$obs.data

lu.long = data.frame(lu = as.factor(apply(subLandUse.df[,1:ncol(subLandUse.df)],1, which.max)), ID = as.character(rownames(subLandUse.df)))

full = merge(y, lu.long, by="ID")
full = full[order(full$date),]
ggplot(data = full, aes(x = date, y = obs, col = lu)) + geom_point() + facet_wrap(~ID)

season = df$season
season.sites=season
season.lu = updateSeason(season.sites, p.old)
plot(season.lu, select = c(1:7))


theta.mean = rnorm(ncol(p.old), mean = rep(mean(y$obs, na.rm=T),ncol(p.old)),
                   sd = rep(mean(abs(y$obs), na.rm=T),ncol(p.old))/10)
theta.season = rnorm(ncol(season.lu), mean = 1, sd = 0.1)
theta.sigma = rnorm(ncol(p.old), mean = -0.5, sd = 0.05)

theta.0 = c(theta.mean,theta.season, theta.sigma)

y.p = meanFunc(theta.0, n.mixt=7, p.sites=p.old, season.sites = season.sites)


theta.mean = rnorm(ncol(p.old), mean = rep(mean(y$obs, na.rm=T),ncol(p.old)),
                   sd = rep(mean(abs(y$obs), na.rm=T),ncol(p.old))/10)
theta.season = rnorm(ncol(df$basis)*ncol(p.old), mean = 1, sd = 0.5)
theta.sigma = rnorm(ncol(p.old), mean = -0.5, sd = 0.05)
theta.0 = c(theta.mean,theta.season, theta.sigma)

y.p = meanFuncSplines(theta.0, n.mixt=5, basis = df$basis)


SiteID = unique(df$obs.data$ID)
par(mfrow=c(4,4), mar=c(2,2,2,1))
for(i in 1:length(SiteID)){
  obs = y$obs[y$ID %in% SiteID[i]]
  idx.k = which.max(p.old[i,])
  pred= y.p[,idx.k]
  dates = y$date[y$ID %in% SiteID[i]]
  ci = cbind(pred - 2*exp(theta.0[(length(theta.0) - ncol(p.old) + 1):length(theta.0)][idx.k]),pred + 2*exp(theta.0[(length(theta.0) - ncol(p.old) + 1):length(theta.0)][idx.k]) )
  plot(dates,obs,main = SiteID[i],
       type="l", ylim = range(ci, na.rm=T))
  polygon(x=c(rev(dates), (dates)), y = c(rev(ci[,1]), (ci[,2])),
          border=NA, col = "grey", density = 100)
  points(dates,pred, type="l", col="red")
  points(y$date[y$ID %in% SiteID[i]],y$obs[y$ID %in% SiteID[i]])
}


subCoords.df = subCoords.df[match(levels(df$obs.data$ID),subCoords.df$ID),]

proj.xy = project(cbind(subCoords.df$longitude,subCoords.df$latitude), "+proj=utm +zone=51 ellps=WGS84")/1000

subCoords.df$x = proj.xy[,1]
subCoords.df$y = proj.xy[,2]

sptmod = SPTModel(df, p.old, proj.xy)

resEM = estimEM(sptmod, theta.init = NULL, print.res = T, debug=F)

theta.EM = resEM$Result$theta
p.EM = resEM$Result$latent

plot(resEM, type = "residuals")
plot(resEM, type = "tempBasis")
plot(resEM, type = "prediction")

colnames(p.EM)<-colnames(p.old)
season.lu = updateSeason(season.sites, p.EM)

season.lu[is.na(season.lu)] <- NA

par(mar = c(5,4,4,1))
plot(season.lu, select = 1:5)

SpPt = SpatialPointsDataFrame(coords = as.matrix(subCoords.df[,2:3]), data = data.frame(Comp = apply(p.EM, 1, which.max)), proj4string = CRS("+proj=longlat +ellps=GRS80 +no_defs"))

library(leaflet)
m <- leaflet() %>%
  addTiles() %>%
  addPolygons(data = CatchmentMap[which(CatchmentMap@data$Site_Code %in% SiteID),],
              stroke = FALSE, fillOpacity = 0.2, smoothFactor = 0.5) %>%
  addCircles(data = SpPt, color = c("green", "red", "blue","orange","pink")[SpPt@data$Comp], radius=rep(400,nrow(subCoords.df)))
#  addMarkers(data = SpPt, popup = as.character(unlist(Coords[,1])))
m



theta.EM[(length(theta.EM) - ncol(p.EM) + 1):length(theta.EM)] = exp(theta.EM[(length(theta.EM) - ncol(p.EM) + 1):length(theta.EM)])

meanFunc(theta.EM, n.mixt = 5, p.EM, season.sites)


resGibbs = estimGibbs(sptmod, method = 'simple', theta.init = theta.EM, print.res = T,
                      N.run = 1000, debug=FALSE)

plot(resGibbs)
par(mar=c(0,0,0,0))
diagPlot(resGibbs, par = "theta",
         main = "", breaks = 50)
diagPlot(resGibbs, par = "paired-theta",
         main = "", breaks = 50)

diagPlot(resGibbs, par = "tempBasis")

theta.Gibbs = resGibbs$Result$theta
p.Gibbs = resGibbs$Result$latent

season.lu = updateSeason(season.sites, p.Gibbs)

season.lu[is.na(season.lu)] <- NA

plot(season.lu, select = 1:5)


SpPt = SpatialPointsDataFrame(coords = as.matrix(subCoords.df[,2:3]), data = data.frame(Comp = apply(p.Gibbs, 1, which.max)), proj4string = CRS("+proj=longlat +ellps=GRS80 +no_defs"))

library(leaflet)
m <- leaflet() %>%
  addTiles() %>%
  addPolygons(data = CatchmentMap[which(CatchmentMap@data$Site_Code %in% SiteID),],
              stroke = FALSE, fillOpacity = 0.2, smoothFactor = 0.5) %>%
  addCircles(data = SpPt, color = c("green", "red", "blue","orange","pink")[SpPt@data$Comp], radius=rep(400,nrow(subCoords.df)))
#  addMarkers(data = SpPt, popup = as.character(unlist(Coords[,1])))
m

resGibbsSpat = estimGibbs(sptmod, method = 'spat', theta.init = theta.EM, print.res = T,
                      N.run = 15000, beta.range = c(0,3))

plot(resGibbsSpat)

pred = predict(resGibbs)

diagPlot(resGibbsSpat, par = "theta",
         main = "", breaks = 50)
diagPlot(resGibbsSpat, par = "paired-theta",
         main = "", breaks = 50)
diagPlot(resGibbsSpat, par = "beta",
         main = "", breaks = 50)

p.GibbsSpat = resGibbsSpat$Result$latent
season.lu = updateSeason(season.sites, p.GibbsSpat)
plot(season.lu)

SpPt = SpatialPointsDataFrame(coords = as.matrix(subCoords.df[,2:3]), data = data.frame(Comp = apply(p.GibbsSpat, 1, which.max)), proj4string = CRS("+proj=longlat +ellps=GRS80 +no_defs"))



par(mfrow=c(1,2), mar = c(1,1,1,1))
plot(CatchmentMap[which(CatchmentMap@data$Site_Code %in% SiteID),])
points(subCoords.df$longitude, subCoords.df$latitude)
compCircle(subCoords.df$longitude, subCoords.df$latitude, p.old, color = rainbow(6), radius = 0.015)
text(subCoords.df$longitude, subCoords.df$latitude, subCoords.df$ID, cex=.7)
plot(CatchmentMap[which(CatchmentMap@data$Site_Code %in% SiteID),])
points(subCoords.df$longitude, subCoords.df$latitude)
compCircle(subCoords.df$longitude, subCoords.df$latitude, p.Gibbs, color = rainbow(6), radius = 0.015)
text(subCoords.df$longitude, subCoords.df$latitude, subCoords.df$ID, cex=.7)

# Voronoi Cells
cov = subCoords.df
library(deldir)
library(rgeos)
par(mfrow = c(1,3), mar = c(5,4,4,1))
plot(cov$longitude, cov$latitude, xlab = "Longitude", ylab = "Latitude")
dd = deldir(cov$longitude, cov$latitude)
tile.dd = tile.list(dd)
col.fill = rainbow(8, alpha = 0.2)[apply(p.old, 1, which.max)]
plot(tile.dd, add=T, fillcol = col.fill, border = NA, close = FALSE)
plot(CatchmentMap[which(CatchmentMap@data$Site_Code %in% SiteID),], add=T)
plot(CatchmentMap, add=T)
plot(cov$longitude, cov$latitude)
col.fill = rainbow(8, alpha = 0.2)[apply(p.Gibbs, 1, which.max)]
plot(tile.dd, add=T, fillcol = col.fill, border = NA, close = FALSE)
plot(CatchmentMap, add=T)
plot(cov$longitude, cov$latitude)
col.fill = rainbow(8, alpha = 0.2)[apply(p.GibbsSpat, 1, which.max)]
plot(tile.dd, add=T, fillcol = col.fill, border = NA, close = FALSE)
plot(CatchmentMap, add=T)


library(spatstat)
library(maptools)
df = data.frame(x = cov$longitude, y = cov$latitude)
W <- ripras(df, shape="rectangle")
W <- owin(range(cov$longitude)*c(0.999,1.001), range(cov$latitude)*c(1.001,0.999))

# create point pattern
X <- as.ppp(df, W=W)
plot(X)

# compute Dirichlet triangles
Y <- dirichlet(X)
plot(Y, add=T)

# convert
Z <- as(Y, "SpatialPolygons")
Xp <- as(X, "SpatialPoints")
par(mfrow=  c(1,1), mar = c(5,4,4,1))
plot(Z)
plot(subCatchmentMap, add=T)
CatMap = CatchmentMap[which(colSums(gIntersects(CatchmentMap, Xp, byid = TRUE)) > 0),]
CatMap2 = CatchmentMap[which(CatchmentMap@data$CATNAME == "Onkaparinga River" & CatchmentMap@data$Site_Code %in% SiteID),]
plot(CatMap)
ssCM = gIntersection(CatMap,Z, byid = TRUE,  drop_lower_td=TRUE)

IDs = NULL
for(i in 1:length(ssCM@polygons)){
  IDs = rbind(IDs, c(unlist(strsplit(ssCM@polygons[[i]]@ID, " "))))
}
palette <- colorRampPalette(c("green","red"))(7)
palette[sort(resGibbsSpat$Result$theta[1:7], index.return=T)$ix] = palette
col.fill=   palette[apply(p.GibbsSpat, 1, which.max)]

par(mfrow=c(1,1))
plot(CatMap2, ylim = c(-35.20,-34.87))
axis(1);axis(2)
plot(ssCM, col = col.fill[as.numeric(IDs[,2])], border=NA, add=T)
plot(CatMap, add=T)
plot(X, add=T, cex = .5)
legend(x = 138.43,y=-34.90,legend = names(subLandUse.df)[sort(resGibbsSpat$Result$theta[1:7], index.return=T)$ix], fill = palette[sort(resGibbsSpat$Result$theta[1:7], index.return=T)$ix])


pdf(file = "../MapNitrateLevels.pdf", width = 10, height = 8)
par(mfrow=c(1,1))
plot(CatchmentMap, xlim = c(138.4,139.0), ylim = c(-35.2,-34.93))
plot(CatMap, add=T)
axis(1);axis(2)
plot(ssCM, col = col.fill[as.numeric(IDs[,2])], border=NA, add=T)
plot(CatMap, add=T)
plot(X, add=T, cex = .5)
legend(x = 138.41,y=-34.96,cex = 0.85,
       legend = names(subLandUse.df)[sort(resGibbs$Result$theta[1:7], index.return=T)$ix],
       fill = palette[sort(resGibbs$Result$theta[1:7], index.return=T)$ix])
dev.off()



require(graphics)
library(splines)
library(MASS)
splineDesign(knots = 1:10, x = 4:7)
splineDesign(knots = 1:10, x = 4:7, deriv = 1)
## visualize band structure
Matrix::drop0(zapsmall(6*splineDesign(knots = 1:40, x = 4:37, sparse = TRUE)))

knots <- seq(1,10,.85)  # 10 => 11-4 = 7 Basis splines
x <- seq(min(knots)+1.5, max(knots)-1.5, length.out = 501)
bb <- splineDesign(knots, x = x, outer.ok = TRUE)

dataT = df$obs.data
y=dataT$obs[!is.na(dataT$obs)] - mean(dataT$obs[!is.na(dataT$obs)])
x=dataT$date[!is.na(dataT$obs)]


plot(x,y, type="p")
bbW = bs(as.numeric(format(x, "%W")),df = 51)
bbS = bs(as.numeric(format(x, "%m")),df = 11)
bbT = bs(as.numeric(format(x, "%Y")),df = diff(range(as.numeric(format(x, "%Y")))))
bbF = cbind(bbW,bbS,bbT)
w = ginv(t(bbF) %*% bbF) %*% t(bbF) %*% matrix(y, ncol=1)
var.w = ginv(t(bbF) %*% bbF) * mean((y-bbF %*% w)^2)
plot(x,(y), type="p")
points(x, bbF %*% w, type= "l", lwd=2, col="red")
points(x, bbF %*% w + 1.96*sqrt(diag((bbF) %*% var.w %*% t(bbF))), type="l", lwd = 0.5, col="red")
points(x, bbF %*% w - 1.96*sqrt(diag((bbF) %*% var.w %*% t(bbF))), type="l", lwd = 0.5, col="red")

idx.w = 1:ncol(bbW)
df.week = unique(cbind(as.numeric(format(x, "%W")),bbW %*% w[idx.w],
                       bbW %*% w[idx.w] - 1.96 * sqrt(diag((bbW) %*% var.w[idx.w,idx.w] %*% t(bbW))),
                       bbW %*% w[idx.w] + 1.96 * sqrt(diag((bbW) %*% var.w[idx.w,idx.w] %*% t(bbW)))))
plot(df.week[,1],df.week[,2], type="p", ylim = range(df.week[,2:4]))
arrows(df.week[,1], df.week[,3], df.week[,1], df.week[,4], code=0)
abline(h=0)

idx.m = (ncol(bbW)+1):(ncol(bbW)+ncol(bbS))
df.month = unique(cbind(as.numeric(format(x, "%m")),bbS %*% w[idx.m],
                        bbS %*% w[idx.m] - 1.96 * sqrt(diag((bbS) %*% var.w[idx.m,idx.m] %*% t(bbS))),
                        bbS %*% w[idx.m] + 1.96 * sqrt(diag((bbS) %*% var.w[idx.m,idx.m] %*% t(bbS)))))

plot(df.month[,1],df.month[,2], type="l", ylim = range(df.month[,2:4]))
arrows(df.month[,1], df.month[,3], df.month[,1], df.month[,4], code=0)
abline(h=0)

idx.y = ((ncol(bbW)+ncol(bbS))+1):length(w)
df.year = unique(cbind(as.numeric(format(x, "%Y")),bbT %*% w[idx.y],
                       bbT %*% w[idx.y] - 1.96 * sqrt(diag((bbT) %*% var.w[idx.y,idx.y] %*% t(bbT))),
                       bbT %*% w[idx.y] + 1.96 * sqrt(diag((bbT) %*% var.w[idx.y,idx.y] %*% t(bbT)))))

plot(df.year[,1],df.year[,2], type="l", ylim = range(df.year[,2:4]))
arrows(df.year[,1], df.year[,3], df.year[,1], df.year[,4], code=0)
abline(h=0)
ick003/SpTMixture documentation built on May 18, 2019, 2:32 a.m.