tests/analysis4.R

#### Simulated data ####
min.date = as.Date("01-01-2010", format = "%d-%m-%Y")
max.date = as.Date("01-01-2013", format = "%d-%m-%Y")
date = seq(min.date,max.date,by = "month")

seasonality = cos(as.numeric(format(date, "%m"))*2*3.146 / 12)

n_s = 25
n_param = 3
n_lu = 3
n_b = 2
n_t = length(date)

locations = matrix(20*runif(n_s*2),ncol=2)
LU.comp = dirmult::rdirichlet(n_s,alpha = c(0.5,0.5,0.5))
LU.true = t(apply(cbind(LU.comp, locations),1,function(x) rmultinom(1,1,x[1:3] * c(x[4]<5, abs(x[4]-11)<6, abs(x[4]-18)<2))))

plot(locations, col = rainbow(3)[apply(LU.true,1,which.max)])


sim.raw.cov = data.frame(ID = 1:n_s, Longitude = locations[,1], Latitude = locations[,2])
LU.df = as.data.frame(LU.comp)
names(LU.df) <- paste0("LU",1:n_lu)
LU.df$ID = 1:n_s

sim.raw.cov = merge(sim.raw.cov, LU.df, by = "ID")
sim.raw.obs = data.frame(date = rep(date, each= n_s), ID = rep(1:n_s, length(date)))

alpha.true = matrix(c(sample(c(-1,0,1),n_lu),5,c(0,1,0.5),2),ncol=1)
X.true = cbind(LU.true,1)
D = as.matrix(dist(sim.raw.cov[,c("Longitude","Latitude")]))
for(i in 1:n_s){
  sim.raw.obs$obs[sim.raw.obs$ID == i] = X.true[i,] %*% alpha.true[1:(n_lu+1),] + seasonality * X.true[i,] %*% alpha.true[(n_lu+2):(2*n_lu+2),]
}

for(j in date){
  sim.raw.obs$obs[sim.raw.obs$date == j] = sim.raw.obs$obs[sim.raw.obs$date == j] + rmvnorm(1, mean=rep(0,n_s), sigma = 1*exp(-0.05*D))
}

#ggplot(data = sim.raw.obs, aes(x = date, y = obs)) + geom_line() + facet_wrap(~ID)
# Create  Spat-Temp Covariates #
# library(SpatioTemporal)


sim.raw.obs$ID = as.character(sim.raw.obs$ID)
sim.raw.cov$ID = as.character(sim.raw.cov$ID)



ggplot(sim.raw.obs, aes(x=date, y = obs)) + geom_point() + facet_wrap(~ID)

save(sim.raw.cov, sim.raw.obs, alpha.true, LU.true,file = "data/sim_analysis.RData")

#### Run the analysis ####

load("data/sim_analysis.RData")

Xcov = sim.raw.cov[,4:6]
subCoords.df = sim.raw.cov[,1:3]
nLu = ncol(LU.true)
p.old = sim.raw.cov[,4:6]
p.old= as.matrix((p.old)/rowSums(p.old))
colnames(p.old) <- paste("Cl",1:nLu)
sim.raw.obs$date = as.Date(as.character(sim.raw.obs$date))
sim.raw.obs$ID = as.factor(sim.raw.obs$ID)
df = SPTMData(sim.raw.obs, agg = "month", seasonality = c("month"), debug=F,method = "splines", degFree = 6)
df$basis<-NULL
df = updateSPTMData(df, "all")
y = df$obs.data
theta.mean = rnorm(ncol(p.old), mean = rep(mean(y$obs, na.rm=T),ncol(p.old)),
                   sd = rep(mean(abs(y$obs), na.rm=T),ncol(p.old))/10)
theta.season = rnorm(max(ncol(df$basis),2)*ncol(p.old), mean = 1, sd = 0.5)
theta.sigma = rnorm(ncol(p.old), mean = -0.5, sd = 0.05)
theta.0 = c(theta.mean,theta.season, theta.sigma)
subCoords.df = subCoords.df[match(levels(df$obs.data$ID),subCoords.df$ID),]
proj.xy = cbind(subCoords.df[,2],subCoords.df[,3])
subCoords.df$x = proj.xy[,1]
subCoords.df$y = proj.xy[,2]
sptmod = SPTModel(df, p.old, proj.xy, cov = NULL)
season.lu = updateSeason(df$season, p.sites = p.old)
resEM = estimEM(sptmod, theta.init = NULL, print.res = T, debug=F, hessian = T)
theta.EM = resEM$Result$theta
p.EM = resEM$Result$latent
plot(resEM, type= "residuals")
plot(resEM)

theta.EM[(length(theta.EM) - ncol(p.EM) + 1):length(theta.EM)] = exp(theta.EM[(length(theta.EM) - ncol(p.EM) + 1):length(theta.EM)])
fisher_info<-solve(-resEM$Result$Hessian)
prop_theta<-sqrt((diag(fisher_info)))
idx.s = (ncol(season.lu)+1):length(theta.EM)
priors = list(theta = list(m0 = theta.EM[1:(ncol(season.lu))], s0 = prop_theta[1:ncol(season.lu)]),
              sigma2 = list(a0 = theta.EM[idx.s]^2 / ((prop_theta[idx.s])) + 2, b0 = (theta.EM[idx.s]^2 / ((prop_theta[idx.s])) + 1)*theta.EM[idx.s]),
              z = list(a = p.EM))
sptmod = SPTModel(df, p.old, proj.xy, cov = NULL)
set.seed(1)
resGibbs = estimGibbs(sptmod, method = 'spat', priors = priors, print.res = F,
                      N.run = 1500, beta.range = c(0,3), meanFunc = meanFunc, debug=F)
p.Gibbs = resGibbs$Result$latent
colnames(p.Gibbs)<-colnames(p.old)
theta.Gibbs = resGibbs$Result$theta

plot(resGibbs, type= "residuals")
plot(resGibbs, type = "tempBasis")
plot(resGibbs, type= "prediction")

diagPlot(resGibbs, par = "theta", breaks = 50, main = "")
diagPlot(resGibbs, par = "beta")
diagPlot(resGibbs, par = "z")
diagPlot(resGibbs, par = "spatial")


priors = list(theta = list(m0 = rep(1, ncol(season.lu)), s0 = prop_theta[1:ncol(season.lu)]),
              sigma2 = list(a0 = theta.EM[idx.s]^2 / ((prop_theta[idx.s])) + 2, b0 = (theta.EM[idx.s]^2 / ((prop_theta[idx.s])) + 1)*theta.EM[idx.s]),
              z = list(a = p.EM))
resGibbsM = estimGibbs(sptmod, method = 'mixedeffect', priors = priors, print.res = F,
                       N.run = 2500, beta.range = c(0,3), meanFunc = meanFunc, debug=F)
p.GibbsM = resGibbsM$Result$latent
colnames(p.GibbsM)<-colnames(p.old)
theta.GibbsM = resGibbsM$Result$theta

resGibbsM$data$data$basis = NULL

plot(resGibbsM, type= "residuals")
plot(resGibbsM, type = "tempBasis")
plot(resGibbsM, type= "prediction")

diagPlot(resGibbsM, par = "theta", breaks = 50, main = "")
diagPlot(resGibbsM, par = "random-effect", breaks = 50, main = "")
diagPlot(resGibbsM, par = "covariates", breaks = 20, main = "")
diagPlot(resGibbsM, par = "beta")
diagPlot(resGibbsM, par = "z")
diagPlot(resGibbsM, par = "spatial")

#### Run the analysis with missing data ####

load("data/sim_analysis.RData")

sim.raw.obs$obs[sample(925,200, replace = F)] <- NA

Xcov = sim.raw.cov[,4:6]
subCoords.df = sim.raw.cov[,1:3]
nLu = ncol(LU.true)
p.old = sim.raw.cov[,4:6]
p.old= as.matrix((p.old)/rowSums(p.old))
colnames(p.old) <- paste("Cl",1:nLu)
sim.raw.obs$date = as.Date(as.character(sim.raw.obs$date))
sim.raw.obs$ID = as.factor(sim.raw.obs$ID)
df = SPTMData(sim.raw.obs, agg = "month", seasonality = c("month"), debug=F,method = "splines", degFree = 6)
df$basis<-NULL
df = updateSPTMData(df, "all")
y = df$obs.data
theta.mean = rnorm(ncol(p.old), mean = rep(mean(y$obs, na.rm=T),ncol(p.old)),
                   sd = rep(mean(abs(y$obs), na.rm=T),ncol(p.old))/10)
theta.season = rnorm(max(ncol(df$basis),2)*ncol(p.old), mean = 1, sd = 0.5)
theta.sigma = rnorm(ncol(p.old), mean = -0.5, sd = 0.05)
theta.0 = c(theta.mean,theta.season, theta.sigma)
subCoords.df = subCoords.df[match(levels(df$obs.data$ID),subCoords.df$ID),]
proj.xy = cbind(subCoords.df[,2],subCoords.df[,3])
subCoords.df$x = proj.xy[,1]
subCoords.df$y = proj.xy[,2]
sptmod = SPTModel(df, p.old, proj.xy, cov = NULL)
season.lu = updateSeason(df$season, p.sites = p.old)
resEM = estimEM(sptmod, theta.init = NULL, print.res = T, debug=F, hessian = T)
theta.EM = resEM$Result$theta
p.EM = resEM$Result$latent
plot(resEM, type= "residuals")
plot(resEM)

theta.EM[(length(theta.EM) - ncol(p.EM) + 1):length(theta.EM)] = exp(theta.EM[(length(theta.EM) - ncol(p.EM) + 1):length(theta.EM)])
fisher_info<-solve(-resEM$Result$Hessian)
prop_theta<-sqrt((diag(fisher_info)))
idx.s = (ncol(season.lu)+1):length(theta.EM)

priors = list(theta = list(m0 = rep(1, ncol(season.lu)), s0 = prop_theta[1:ncol(season.lu)]),
              sigma2 = list(a0 = theta.EM[idx.s]^2 / ((prop_theta[idx.s])) + 2, b0 = (theta.EM[idx.s]^2 / ((prop_theta[idx.s])) + 1)*theta.EM[idx.s]),
              z = list(a = p.EM))
df = SPTMData(sim.raw.obs, agg = "month", seasonality = c("month"), debug=F,method = "splines", degFree = 6)
sptmod = SPTModel(df, p.old, proj.xy, cov = NULL)
resGibbsM = estimGibbs(sptmod, method = 'mixedeffect', priors = priors, print.res = F,
                       N.run = 25000, beta.range = c(0,3), meanFunc = meanFunc, debug=F)
p.GibbsM = resGibbsM$Result$latent
colnames(p.GibbsM)<-colnames(p.old)
theta.GibbsM = resGibbsM$Result$theta

plot(resGibbsM, type= "residuals")
plot(resGibbsM, type = "tempBasis")
plot(resGibbsM, type= "prediction")

diagPlot(resGibbsM, par = "theta", breaks = 100, main = "")
diagPlot(resGibbsM, par = "random-effect", breaks = 50, main = "")
diagPlot(resGibbsM, par = "covariates", breaks = 100, main = "")
diagPlot(resGibbsM, par = "beta")
diagPlot(resGibbsM, par = "z")
diagPlot(resGibbsM, par = "spatial")
ick003/SpTMixture documentation built on May 18, 2019, 2:32 a.m.