mcmc.CatSPIM: Fit the categorical spatial partial identity model (catSPIM)

View source: R/mcmc.CatSPIM.R

mcmc.CatSPIMR Documentation

Fit the categorical spatial partial identity model (catSPIM)

Description

Will document after I defend. Should be able to get the idea in the example below.

Usage

mcmc.CatSPIM(data, niter = 2400, nburn = 1200, nthin = 5, M = 200,
  inits = NA, obstype = "poisson", nswap = NA, proppars = list(lam0
  = 0.05, sigma = 0.1, sx = 0.2, sy = 0.2), keepACs = FALSE,
  keepGamma = FALSE, keepG = FALSE, IDup = "Gibbs", priors = NA)

Author(s)

Ben Augustine

Examples

## Not run: library(coda)
#Normal SCR stuff
N=50
lam0=0.25
sigma=0.50
K=10
buff=3 #state space buffer. Should be at least 3 sigma.
X<- expand.grid(3:11,3:11)
obstype="poisson"

#categorical identity covariate stuff
ncat=2  #number of ID covariates
gamma=vector("list",ncat) #population frequencies of each category level

#Do this if you want to use the number of alleles at a locus to generate all possible genotypes
nlevels=rep(NA,ncat) #Number of levels per ID covariate
nallele=rep(3,ncat)  #number of alleles at each loci
for(i in 1:ncat){
 nlevels[i]=nallele[i]*(nallele[i]+1)/2
 gamma[[i]]=rep(1/nlevels[i],nlevels[i]) #generating all equal genotype frequencies
}
IDcovs=vector("list",ncat)#Store unique genotypes
for(i in 1:length(IDcovs)){
 IDcovs[[i]]=expand.grid(1:nallele[i],1:nallele[i])
 IDcovs[[i]]=unique(t(apply(IDcovs[[i]],1,sort)))
}
#sequentially number the unique genotypes
for(i in 1:ncat){
 IDcovs[[i]]=1:nrow(IDcovs[[i]])
}

#Or this for generic ID covariates
gamma=vector("list",ncat)
nlevels=rep(3,ncat) #Number of levels per ID covariate
for(i in 1:ncat){
 gamma[[i]]=rep(1/nlevels[i],nlevels[i]) #generating all equal category level frequencies
}
IDcovs=vector("list",ncat)
for(i in 1:length(IDcovs)){
 IDcovs[[i]]=1:nlevels[i]
}
pID=rep(0.8,ncat)#sample by covariate level observation probability.  e.g. loci amplification probability

#ncat=1 with nlevel=1 will produce unmarked SCR data with no ID covariates. 
#Well, everyone has the same covariate value so they are effectively unmarked

#Simulate some data
data=simCatSPIM(N=N,lam0=lam0,sigma=sigma,K=K,X=X,buff=buff,obstype=obstype,
ncat=ncat,pID=pID,gamma=gamma,
IDcovs=IDcovs)
str(data$y)#True data 
str(data$y.obs)#observed data
rowSums(data$y.obs)#one observation per row because they cannot be deterministically linked
str(data$G.true) #True categorical identities. Same # of rows as y
str(data$G.obs) #Observed categorical identities. Same # of rows as y.obs
data$G.obs #zeros are missing values

#MCMC stuff
niter=2000 #how long to run the MCMC chain. 
nburn=0 #how much burnin to discard. I always do this afterwards so I can assess convergence better
nthin=1 #do we thin the chain. Only necessary if you need to reduce file size
nswap=nrow(data$y.obs)/2 #Number of latent IDs to swap on each iteration. Updating half seems to work fine.
#Theoretically a tradeoff between mixing and run time, but mixing is fine with not many swaps.
M=150 #Data augmentation level
inits=list(psi=0.3,lam0=lam0,sigma=sigma,gamma=gamma) #initial values. Using simulated values
proppars=list(lam0=0.05,sigma=0.075,sx=1,sy=1) #MCMC proposal parameters. Tuning these is the most difficult part.
#shoot for rejection rates between 0.2 and 0.4. If a parameter is not moving, the proposal parameter is too large.
IDup="MH" #Gibbs or metropolis-hastings latent ID update. Must use MH for binomial model.
#Both about equally as fast for Poisson
keepACs=TRUE #Do we store the activity center posteriors and other latent structure including the ID vector?
keepGamma=TRUE #Do we store the gamma posterior?

a=Sys.time()
out=mcmc.CatSPIM(data,niter=niter,nburn=nburn,nthin=nthin, nswap=nswap,
                M = M, inits=inits,proppars=proppars,obstype=obstype,
                IDup=IDup,keepACs=keepACs,keepGamma=keepGamma)
b=Sys.time()
b-a

#Let's see what happened!
plot(mcmc(out$out))
#The new, key parameter here is n, the number of individuals actually captured. 
#If it's not changing value, you have enough ID covariates to remove all uncertainty in n.
#Discard some of the burnin to see the posterior of n better
plot(mcmc(out$out[500:niter,]))
data$n #true value of n

#If you kept the gamma posteriors, you can plot those, too.
plot(mcmc(out$gammaOut[[1]]))
plot(mcmc(out$gammaOut[[2]]))
gamma #True values

#This will get you the acceptance probabilies. Can't change N, n, or psi.
1-rejectionRate(mcmc(out$out))
#This will get you the acceptance probabilities for the x dimension of the activity centers.
#Hard to tune because it will be lower when an activity center has samples allocated
#to it and higher when it does not. I think just make sure it's not under 0.1ish
1-rejectionRate(mcmc(out$sxout))
effectiveSize(out$out)#should shoot for at least 400 for N. 

#OK, now try it with more/fewer ID covariates, category levels, or more missing data

#Get posterior probability sample x and sample y came from same individual
niters=niter-nburn

check=1 #Which sample to check
storematch=matrix(0,nrow=ncol(out$IDout),ncol=niters)
for(i in 1:niters){
 storematch[,i]=out$IDout[i,check]==out$IDout[i,]
}
rowSums(storematch)/niters
#True IDs stored here
data$IDtrue

## End(Not run)

benaug/SPIM documentation built on April 28, 2024, 7:27 a.m.