Nothing
#' function to simulate distance sampling data from simple model with increasing abundance
#' intensity, no assumed spatial structure, and point independence. If no parameters are given, uses internally defined values.
#' @param S number of spatial strata (a single transect is placed in each strata and assumed to cover the whole strata)
#' @param Observers A (2 x S) matrix giving the observer identity for each transect
#' @param misID If TRUE (default), misidentification is assumed
#' @param X.site model.matrix for habitat covariates (defaults to a linear, quadratic effects of transect # on log scale)
#' @param n.species Number of species to simulate (current max is 2) (default is 2)
#' @param Beta.hab A (# of species X # parameters) matrix giving parameters for habitat-abundance relationship (default is linear increase for species 1, quadratic for species 2)
#' @param detect.model A formula for the detection model. Default is ~Observer+Distance+Group+Species (formula should consist of these key words)
#' @param Beta.det A vector giving parameters for the detection function; # of parameters must match model.matrix()!
#' @param dist.cont If TRUE, uses continuous distances on (0,1). If FALSE, uses discrete distance classes
#' @param n.bins If dist.cont=FALSE, how many bins to use for distances. Default is 5.
#' @param cor.par Correlation at maximum distance. Default is 0.5
#' @param Grp.par A vector with an entry for each species giving parameters for group size (assumed zero-truncated Poisson). Default is 3 and 1, corresponding to mean group sizes of 4 and 2 for each species
#' @param misID.mat With true state on rows and assigned state on column, each positive entry provides an index to misID.models (i.e. what model to assume on multinomial logit space); a 0 indicates an impossible assigment; a negative number designates which column is to be obtained via subtraction
#' @param misID.par A list, each element of which gives the parameters associated with each entry in misID.models
#' @param misID.models A formula vector providing linear model-type formulas for each positive value of misID.mat. If the same model is used in multiple columns it is assumed that all fixed effects (except the intercept) are shared
#' @param misID.symm If TRUE, the constraint pi^{i|j}=pi^{j|i} is implemented; in this case, entries for pi^{j|i} are all assumed to be = pi^{i|j} (default is TRUE)
#' @return a distance sampling dataset
#' @export
#' @keywords distance sampling, simulation
#' @author Paul B. Conn
simulate_data_overd<-function(S,Observers,misID=TRUE,X.site=NULL,n.species=2,Beta.hab=NULL,Beta.det=NULL,detect.model=~Observer+Distance+Group,dist.cont=FALSE,n.bins=5,cor.par=0.5,Grp.par=c(3,1),misID.models=NULL,misID.par=NULL,misID.mat=NULL,misID.symm=TRUE){
require(mvtnorm)
if(n.species>2)cat("\n Error: current max species is 2 \n")
if(n.species==1 & misID==TRUE){
cat("\n n.speces=1 so misID set to FALSE \n")
misID=FALSE
}
if((n.bins!=5 | dist.cont==TRUE) & (is.null(Beta.det)==TRUE))cat("\n Error: if using continuous distances or a non-default distance bin #, you must input Beta.det \n")
#process parameters
if(is.null(X.site)==TRUE)X.site=cbind(rep(1,S),log(c(1:S)/S),(log(c(1:S)/S))^2) #covariate on abundance intensity, sp 1
if(is.null(Beta.hab)==TRUE){
Beta.hab=matrix(0,n.species,3)
#Beta.hab[1,]=c(log(40),1,0)
Beta.hab[1,]=c(log(100),1,0)
if(n.species==2)Beta.hab[2,]=c(log(10),-2,-1)
Beta.hab[1,]=c(log(40),0,0)
if(n.species==2)Beta.hab[2,]=c(log(10),0,0)
}
#detection parameters
if(dist.cont==FALSE)Levels=list(Observer=sort(unique(c(Observers))),Distance=as.factor(c(1:n.bins)),Species=as.factor(1:n.species))
else Levels=list(Observer=unique(c(Observers)),Species=as.factor(1:n.species))
factor.ind=list(Observer=TRUE,Distance=(dist.cont==FALSE),Group=FALSE,Species=TRUE)
#if(is.null(Beta.det)==TRUE)Beta.det=c(10,-.2,-.4,-.6,-.9,-1.1,-1.3,.1,.3) #obs 1 (bin 1), obs 2, obs 3, offset for bin 2, ..., offset for bin n.bins, grp size,species
if(is.null(Beta.det)==TRUE)Beta.det=c(1.2,-.2,-.4,-.6,-.9,-1.1,-1.3,.1) #obs 1 (bin 1), obs 2, obs 3, offset for bin 2, ..., offset for bin n.bins, grp size
N1=c(0,0,0,0,0,0,1,0,0,0,2,0,0,14,4,3)*3
N2=c(0,5,0,7,0,0,0,25,3,1,12,3,0,0,0,0)*3
#N1=round(exp(X.site%*%Beta.hab[1,]))
#N2=N1*0
#if(n.species==2)N2=round(exp(X.site%*%Beta.hab[2,]))
cat(paste("\n True G, sp 1 = ",N1,'\n\n'))
cat(paste("\n True G.tot, sp 1= ",sum(N1),'\n'))
if(n.species==2){
cat(paste("\n True G, sp 2 = ",N2,'\n\n'))
cat(paste("\n True G.tot, sp 2= ",sum(N2),'\n'))
}
Bin.length=c(1,1.2,1.5,2,3)
Dat=matrix(0,sum(N1)+sum(N2),8) #rows are site, observer 1 ID, obs 2 ID, Y_1, Y_2, Distance, Group size
#initialize confusion matrix, etc.
if(misID==1){
if(is.null(misID.mat)){
misID.mat=matrix(0,2,3)
misID.mat[1,]=c(1,-1,2)
#Confusion[2,]=c(-1,1,2)
misID.mat[2,]=c(-1,3,-1)
}
if(is.null(misID.models)==TRUE)misID.models=c(~1,~1,~1)
#Conf.model=c(~Observer+Group+Distance+Species,~Observer)
if(is.null(misID.par)==TRUE){misID.par=list(2,1,3)
#Conf.par=list(c(2,.2,.4,.3,-.1,-.2,-.4,-.8,.5),c(-1,-.2,.2)) #parameters for getting an 'unknown'
}
}
pl=1
for(i in 1:S){
cur.Observers=Observers[,i]
n.observers=2-is.na(cur.Observers[2])
if(N1[i]>0){
for(j in 1:N1[i]){
if(dist.cont==TRUE)dist=runif(1)
else dist=sample(c(1:n.bins),1,prob=Bin.length)
grp.size=rpois(1,Grp.par[1])+1
Dat1=matrix(c(cur.Observers[1],dist,grp.size,1),1,4)
if(n.observers==2){
Dat2=Dat1
Dat2[1]=cur.Observers[2]
X2=get_mod_matrix(Cur.dat=Dat2,stacked.names=c("Observer","Distance","Group","Species"),factor.ind=factor.ind,Det.formula=detect.model,Levels=Levels)
}
X1=get_mod_matrix(Cur.dat=Dat1,stacked.names=c("Observer","Distance","Group","Species"),factor.ind=factor.ind,Det.formula=detect.model,Levels=Levels)
Dat[pl,1]=i
Dat[pl,2]=cur.Observers[1]
Dat[pl,3]=cur.Observers[2]
Dat[pl,6]=dist
Dat[pl,7]=grp.size
Dat[pl,8]=1
if(dist.cont==FALSE)cur.cor=(dist-1)/(n.bins-1)*cor.par
else cur.cor=dist*cor.par
mu1=X1%*%Beta.det
if(n.observers==2){
mu2=X2%*%Beta.det
Dat[pl,4:5]=rmvnorm(1,c(mu1,mu2),matrix(c(1,cur.cor,cur.cor,1),2,2))
}
else Dat[pl,4:5]=c(rnorm(1,mu1,1),NA)
Dat[pl,4:5]=(Dat[pl,4:5]>0)*1.0
pl=pl+1
}
}
if(N2[i]>0){
for(j in 1:N2[i]){
if(dist.cont==TRUE)dist=runif(1)
else dist=sample(c(1:n.bins),1,prob=Bin.length)
grp.size=rpois(1,Grp.par[2])+1
Dat1=matrix(c(cur.Observers[1],dist,grp.size,2),1,4)
X1=get_mod_matrix(Cur.dat=Dat1,stacked.names=c("Observer","Distance","Group","Species"),factor.ind=factor.ind,Det.formula=detect.model,Levels=Levels)
if(n.observers==2){
Dat2=Dat1
Dat2[1]=cur.Observers[2]
X2=get_mod_matrix(Cur.dat=Dat2,stacked.names=c("Observer","Distance","Group","Species"),factor.ind=factor.ind,Det.formula=detect.model,Levels=Levels)
}
Dat[pl,1]=i
Dat[pl,2]=cur.Observers[1]
Dat[pl,3]=cur.Observers[2]
Dat[pl,6]=dist
Dat[pl,7]=grp.size
Dat[pl,8]=2
mu1=X1%*%Beta.det
if(dist.cont==FALSE)cur.cor=(dist-1)/(n.bins-1)*cor.par
else cur.cor=dist*cor.par
if(n.observers==2){
mu2=X2%*%Beta.det
Dat[pl,4:5]=rmvnorm(1,c(mu1,mu2),matrix(c(1,cur.cor,cur.cor,1),2,2))
}
else Dat[pl,4:5]=c(rnorm(1,mu1,1),NA)
Dat[pl,4:5]=c(Dat[pl,4:5]>0)*1.0
pl=pl+1
}
}
}
cat(paste("Total N, sp 1 = ",sum(Dat[Dat[,8]==1,7])))
if(n.species==2)cat(paste("Total N, sp 2 = ",sum(Dat[Dat[,8]==2,7])))
Dat=Dat[which(Dat[,4]>0 | Dat[,5]>0),] #get rid of animals never observed
#put things in "Jay's" format
Dat2=matrix(0,2*nrow(Dat),7)
ipl=1
for(irecord in 1:nrow(Dat)){
Dat2[ipl,1]=Dat[irecord,1]
Dat2[ipl+1,1]=Dat[irecord,1]
Dat2[ipl,3]=Dat[irecord,2]
Dat2[ipl+1,3]=Dat[irecord,3]
Dat2[ipl,4]=Dat[irecord,4]
Dat2[ipl+1,4]=Dat[irecord,5]
Dat2[ipl,5]=Dat[irecord,6]
Dat2[ipl+1,5]=Dat[irecord,6]
Dat2[ipl,6]=Dat[irecord,7]
Dat2[ipl+1,6]=Dat[irecord,7]
Dat2[ipl,7]=Dat[irecord,8]
Dat2[ipl+1,7]=Dat[irecord,8]
Dat2[ipl,2]=irecord #match number
Dat2[ipl+1,2]=irecord
ipl=ipl+2
}
Dat2=as.data.frame(Dat2)
colnames(Dat2)=c("Transect","Match","Observer","Obs","Distance","Group","Species")
Dat2[,"Observer"]=as.factor(Dat2[,"Observer"])
Dat2[,"Distance"]=as.factor(Dat2[,"Distance"])
#get rid of NA records for transects where there was only one observer
if(sum(is.na(Dat2[,4]))>0){
which.NA=which(is.na(Dat2[,4]))
Dat2=Dat2[-which.NA,]
}
Dat=Dat2
True.species=Dat[,"Species"]
n.indiv=nrow(Dat)
subset_conf_array2<-function(Confuse,Species,n.indiv){
Cur=matrix(0,n.indiv,dim(Confuse)[3])
for(iind in 1:n.indiv)Cur[iind,]=Confuse[iind,Species[iind],]
Cur
}
if(misID==TRUE){
Confuse=array(0,dim=c(n.indiv,dim(misID.mat)))
Confuse=get_confusion_array(Confuse,Cov=NULL,Beta=as.matrix(misID.par),n.indiv=n.indiv,misID.mat=misID.mat,misID.formulas=misID.models,symm=misID.symm)
# Now, put in partial observation process
Probs=subset_conf_array2(Confuse=Confuse,Species=Dat[,"Species"],n.indiv=n.indiv)
get_samp<-function(prob)sample(c(1:length(prob)),1,prob=prob)
Dat[,"Species"]=apply(Probs,1,'get_samp')
}
Dat[,"Species"]=as.integer(Dat[,"Species"])
Dat=cbind(Dat[,1:4],Dat[,"Species"],Dat[,5:6])
colnames(Dat)[5]="Species"
G.true=N1
if(n.species==2)G.true=cbind(G.true,N2)
Out=list(Dat=Dat,G.true=G.true,True.species=True.species)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.