Nothing
# Incomplete stratification code.
# These functions will not be visible to the user.
# At the moment, these functions only work for the full likelihood and not individual covariates or using
# other variables in the data frame
# I've copied over L's code with minor modifications to make it run here
#'
#' @importFrom reshape2 melt
#' @importFrom plyr llply
#' @importFrom stats optim
#' @importFrom MASS ginv
#' @importFrom rlang .data
#' @noRd
###############################################################################
# Function "IS.fit.model"
# Inputs: model id, row data, design matrices and offset vectors for capture
# probabilities, category proportions and sub-sample proportions
# Outputs: MLE , variance, SE of all the parameters( capture probabilities,
# category proportions, sub-sample proportions, and population size),
# and model information (id, number of parameters,
# negative log-likelihood and AICc values)
# table of capture histaries, observed and expected counts
# residual plot(standardized residual vs expected counts) for model assessment
IS.fit.model = function(model.id ,Data,
captureDM, thetaDM, lambdaDM,
captureOFFSET, thetaOFFSET, lambdaOFFSET,
control.optim=list(trace=1,maxit=1000)){
# deal with R package visible binding. See https://ggplot2.tidyverse.org/articles/ggplot2-in-packages.html
Result = NULL
ncats = length(Data$category) # number of categories
Result$rawdata = Data # raw data set
# Initial Estimates (in regular form)
initial.est = IS.initial.estimates(Data)
# initial.pack.parm.est are in logit/log form
initial.pack.parm.est = IS.pack.parm(initial.est$full,Data,
captureDM,thetaDM,lambdaDM)
Result$initial.unpack.parm = IS.unpack.parm(initial.pack.parm.est, Data,
captureDM, thetaDM, lambdaDM,
captureOFFSET,thetaOFFSET,lambdaOFFSET)
# initial value for the log(N)
log.N.init = initial.pack.parm.est[length(initial.pack.parm.est)]
# lower bound for optimization routine
l.b = c(rep(logit(0.000001),length(initial.pack.parm.est)-1),max(2,log.N.init-3))
# upper bound for optimization routine
u.b = c(rep(logit(.999999),length(initial.pack.parm.est)-1),log.N.init+3)
# optimize the negative log likelihood function(MLEs in logit/log form)
res =stats::optim(par=initial.pack.parm.est, fn=IS.neg.log.likelihood, method="L-BFGS-B",
lower=l.b, upper=u.b , control=control.optim,
Data=Data,
captureDM=captureDM, thetaDM=thetaDM,lambdaDM=lambdaDM,
captureOFFSET=captureOFFSET, thetaOFFSET=thetaOFFSET,
lambdaOFFSET=lambdaOFFSET)
Result$res = res
#extract the MLEs for the parameter (estimates in regular form)
Result$est = IS.unpack.parm(Result$res$par,Data,captureDM,thetaDM,lambdaDM,
captureOFFSET,thetaOFFSET,lambdaOFFSET)
# if all the DM have zero columns
if((ncol(captureDM)== 0) && (ncol(thetaDM)== 0) && (ncol(lambdaDM)==0)){
hess = 1/Result$res$par
} else{
# Variance Covariance Matrix of logit/log parameters
hess = IS.inv.fisher.info(Result$res$par,Data,
captureDM, thetaDM,lambdaDM,
captureOFFSET,thetaOFFSET,lambdaOFFSET)
}
# reduced Variance Covariance Matrix of logit/log parameters
Result$vcv$logit.full.red = hess
###############################################
#find the variance covariance (vcv) of parameters in regular form
#Go from reduced estimates to expanded estimates
#browser()
X = as.matrix(Matrix::bdiag(captureDM, # capture recapture parameters in logit form
lambdaDM, # lamda parameters in logit form
thetaDM, # theta patameters in logit form
diag(1))) # the log(N) parameter
vcv.logit.full = X %*% Result$vcv$logit.full.red %*% t(X)
Result$vcv$logit.full = vcv.logit.full
Result$se$logit.full = sqrt(diag(vcv.logit.full))
# Go from logit(continution ratio) to continuation ratio and then continuation
# ratio to original p and log(N) -> N
# We use the deltamethod() function in the msm package
# and so need to create expressions to do the delta method
# First take anti-logits of all parameters except the last (which is anti-log)
antilogit = plyr::llply(c(paste("~1/(1+exp(-x",1:(nrow(Result$vcv$logit.full)-1),"))",sep=""),
paste("~exp(x",nrow(Result$vcv$logit.full),")",sep="")),as.formula)
vcv.cont = msm::deltamethod(antilogit, Result$est$logit.full,
Result$vcv$logit.full, ses=FALSE)
Result$est$logit.full.antilogit = expit(Result$est$logit.full)
Result$est$logit.full.antilogit[length(Result$est$logit.full.antilogit)] =
exp(Result$est$logit.full[length(Result$est$logit.full.antilogit)])
anti.cont.formula = IS.make.anticont.exp((2*ncats+1), ncats)
anti.cont.formula = unlist(anti.cont.formula)
anti.cont.formula = c(paste("~x",seq(1,(2*ncats)),sep=""),anti.cont.formula,
paste("~x",seq((3*ncats),(3*ncats+2)),sep="") )
anti.cont.formula = plyr::llply(anti.cont.formula,as.formula)
vcv.full = msm::deltamethod(anti.cont.formula, Result$est$logit.full.antilogit,
vcv.cont, ses=FALSE)
###############################################
# extract the individual VCV's and SE's of the parameters
Result$vcv$p = vcv.full[1:(2*ncats),1:(2*ncats)]
Result$se$p = matrix(sqrt(diag(Result$vcv$p)), nrow=ncats,byrow =FALSE)
Result$vcv$lambda = vcv.full[(2*ncats+1):(3*ncats),(2*ncats+1):(3*ncats)]
Result$se$lambda = sqrt(diag(Result$vcv$lambda))
Result$vcv$theta = vcv.full[(3*ncats+1):(3*ncats+2),(3*ncats+1):(3*ncats+2)]
Result$se$theta = sqrt(diag(Result$vcv$theta))
Result$vcv$N = vcv.full[nrow(vcv.full),nrow(vcv.full)]
Result$se$N = sqrt(Result$vcv$N)
Result$se$full = sqrt(diag(vcv.full))
###############################################
# Extract the variance and the SE's of the expected number of individuals
# in each category.
#estimated category proportions(lambda) and estimated population size(N)
lambda.N.hat = Result$est$full[c((2*ncats+1):(3*ncats),nrow(vcv.full))]
# vcv of estimated category proportions(lambda) and estimated population size(N)
vcv.lambda.N.hat = vcv.full[c((2*ncats+1):(3*ncats),nrow(vcv.full)),
c((2*ncats+1):(3*ncats),nrow(vcv.full))]
form=c(0)
for(k in (1:ncats)){
form[k] = c(paste("~x",k, "*x",length(lambda.N.hat),sep=""))
}
form = plyr::llply(form,as.formula)
# vcv of expected number of fish in each category
vcv.categoty.total = msm::deltamethod(form, lambda.N.hat, vcv.lambda.N.hat, ses=FALSE)
# variance of expected number of fish in each category
Result$variance$N_lambda = diag(vcv.categoty.total)
# SE of expected number of fish in each category
Result$se$N_lambda = sqrt(Result$variance$N_lambda)
########## Model information #########################
# model identification
Result$model.id = model.id
# number of parameters
Result$np = length(Result$res$par)
#Negative log likeligood value
Result$NLL = IS.neg.log.likelihood(Result$res$par,Data,
captureDM,thetaDM,lambdaDM,
captureOFFSET,thetaOFFSET,lambdaOFFSET)
#corrected AIC information for the model
Result$AICc = IS.AICc(Result$res$par,Data,captureDM,thetaDM,lambdaDM,
captureOFFSET,thetaOFFSET,lambdaOFFSET)
# design matrices and offset vectors for capture probabilities, category
# proportions and sub-sample proportions
Result$captureDM = captureDM
Result$thetaDM = thetaDM
Result$lambdaDM = lambdaDM
Result$captureOFFSET = captureOFFSET
Result$thetaOFFSET = thetaOFFSET
Result$lambdaOFFSET = lambdaOFFSET
######### Model assessment using residual plot #########
#Find expected counts for model1:
MLE.results= NULL
MLE.results$param = Result$est
MLE.results$category = Data$category
# exp.data is a list with expected capture histories,
# corresponding counts and categories and vatiance for each expected category
exp.data = IS.expected.counts(MLE.results)
# Obs.counts
# This produces the observed counts as same order of the expected capture
# history. If some histories are not present in the
# observed data, then their counts will be zero
#sum of the counts for each of history in Data$History
obs.counts = outer(exp.data$History, Data$History, '==') %*% Data$count
residual = obs.counts - exp.data$counts # calculate the residuals
# calculate the standardized residuals
standardized.residual = residual/sqrt(exp.data$variance)
# Output.df gives the Histories, expected and observed counts and residuals
Output.df =data.frame(History = exp.data$History,Observed.Counts =obs.counts,
Expected.counts =exp.data$counts,
Residual = residual,
Standardized.Residuals = standardized.residual)
#plot the standardized residuals
res.plot = ggplot( Output.df,aes(x = .data$Expected.counts, y =.data$Standardized.Residuals,
label=.data$History)) +
geom_point(size = 3,colour="#990000") +
xlab("Expected Counts") + ylab("Standardized Residuals")+
ggtitle(paste("Residual Plot \n model : ", model.id,sep=" " ))+
theme(axis.text=element_text(size=10,face="bold"),
axis.title=element_text(size=14),
plot.title = element_text(size = 12,face="bold")) +
geom_hline(yintercept=c(-1.96,0,1.96),linewidth=1,
linetype=c("dashed","solid","dashed"),
colour=c("#660000","black","#660000"))+
geom_text(aes(label=.data$History),hjust=- 0.2, vjust=0.2)
# capture Histories and obesred and expected counts
Result$obs.exp.counts = Output.df
Result$res.plot = res.plot # residual plot
return(Result)
} # end of fit.model
###############################################################################
###############################################################################
# "IS.inv.fisher.info" function create the variance covariance matrix using the
# negative log likelihood.
# Singular Value Decomposition is used to calculate the inverse of the hessian
#
# Input : vector of estimate in logit/log form and Data
#
# Output: Variance Covariance Matrix (vcv) logit/log form
IS.inv.fisher.info = function(logit.est,Data, captureDM , thetaDM,lambdaDM,
captureOFFSET,thetaOFFSET,lambdaOFFSET){
hess.matx = numDeriv::hessian(IS.neg.log.likelihood, x=logit.est, method=("Richardson"),
Data=Data,
captureDM=captureDM,thetaDM=thetaDM,lambdaDM=lambdaDM,
captureOFFSET=captureOFFSET,thetaOFFSET=thetaOFFSET,
lambdaOFFSET=lambdaOFFSET)
hess.svd = svd(hess.matx) # singular value decomposition
hess.svd$d = 1/hess.svd$d # inverse of the singular values
# variance covariance matrix (inverse of the hessian matrix)
vcv = hess.svd$v %*% diag(hess.svd$d) %*% t(hess.svd$u)
return(vcv)
} # end of IS.inv.fisher.info
###############################################################################
IS.make.anticont.exp <- function(x,ncats){
# make an expression to convert a continution ratio to the original p
# (This is used to find the category proportion)
# start = starting index, ncr=number of continuation ratios
# It will return a list of size ncr
# x is the starting position
start = x
ncr = ncats-1
index = seq(start, by=1, length.out=ncr)
myform = c(paste("~x",index,sep=""),"~1")
for(i in 1:ncr){
myform = paste(myform,c(rep(" ",i),paste("*(1-x",start:(start+ncr-i),")",sep="")))
}
myform
} # end of IS.IS.make.anticont.exp
###############################################################################
############ Test Function ###################################################
###############################################################################
# # Following files are needed to test the function "fit.model"
#
# source(file="get.data.R")
# source(file="create.DM.R")
# source(file="helper.functions.R")
#
# test.fit.model = function(){
# Data = NULL
# Data$History = c("U0", "M0", "MM", "F0" ,"FF", "0M", "0F", "0U", "UU" )
# Data$counts = c(41, 16, 4, 23, 7, 12, 25, 63,9)
# Data$category = c("M","F")
#
#
# #model identification"
# model.id ="No restrictions"
#
# #get the required desigm matrices
# captureDM = create.DM(c(1,2,3,4)) # Design matrix for cap prob
# thetaDM = create.DM(c(1,2)) # Design matrix for theta
# lambdaDM = create.DM(c(1)) # Design matrix for lambda
#
# #give the offset vectors(vectors of zeros should be given since no restriction)
# captureOFFSET = c(0,0,0,0)
# thetaOFFSET = c(0,0)
# lambdaOFFSET = c(0)
#
#
# test.fit = fit.model(model.id ,Data,captureDM, thetaDM,lambdaDM,
# captureOFFSET,thetaOFFSET,lambdaOFFSET)
# return(test.fit)
# }
# test.fit.model()
#
# ########## End Test Function ###############################################
# #############################################################################
###############################################################################
# The following function creates initial estimates for the optimization routine.
# Function "IS.initial.estimates"
# Input : Data
# Output : initial values for capture probabilities, category proportions,
# sub-sample proportions and population size
IS.initial.estimates = function(Data){
cats = Data$category
# n1 is the total number of individuals caught in sample 1
# This is calculated counting the histories without a 0 in the first
# digit of the history (e.g. U0,UU, M0,MM, ...)
n1 = sum(Data$counts[ ! (substr(Data$History,1,1)=="0" )])
# m2 is the number of marked individuals caught in sample 2 ( e.g. UU, MM, FF,...)
m2 = n1 - sum(Data$counts[ (substr(Data$History,2,2)=="0" )])
# n2 is the total number of individuals caught in sample 2
# ( m2 + count the histories with a 0 in the first digit of the history)
n2 = m2 + sum(Data$counts[ (substr(Data$History,1,1)=="0")])
# n1.star is the number of individuals in sub-sample in sample 1
# ( subtract total count of the historieswith a U in the first digit of
# the history from n1)
n1.star = n1 - sum(Data$counts[substr(Data$History,1,1)=="U" ])
# n2.star is the number of individuals in sub-sample in sample 2
# ( subtract total count of the histories with 0U from histories with
# a 0 in the first digit of the history )
n2.star = sum(Data$counts[ (substr(Data$History,1,1)=="0")]) -
sum(Data$counts[Data$History=="0U"])
############ Initial values ###################
# initial value for the population size; this is the simple Lincoln Petersen estimate
N.init = n1*n2/m2
theta_1 = n1.star/n1 # initial value for sub-sample proportion in sample 1
theta_2 = n2.star/(n2-m2) # initial value for sub-sample proportion in sample 2
# initial values for capture probabilities
sample.1.cap.prob = rep(n1/N.init , length(cats)) # at time 1
sample.2.cap.prob = rep(n2/N.init , length(cats)) # at time 2
# initial values for category proportions:
# use equal category proportions as inital values
lambda.init = rep(1/length(cats),length(cats))
# al initial capture probabilities as a vector
cap.prob.init = c(sample.1.cap.prob,sample.2.cap.prob)
theta.init = c(theta_1 ,theta_2) # initial sub-sample proportions
initial.estimates = NULL
initial.estimates$p = matrix(cap.prob.init,ncol=2, byrow = FALSE)
colnames(initial.estimates$p) = c("time1", "time2")
rownames(initial.estimates$p) = c(paste(cats))
initial.estimates$lambda = lambda.init
names( initial.estimates$lambda)= c(paste("lambda_",cats))
initial.estimates$theta = theta.init
names(initial.estimates$theta) = c("theta_1", "theta_2")
initial.estimates$N = N.init
names(initial.estimates$N) = c("N")
initial.estimates$full = c(cap.prob.init,lambda.init,theta.init,N.init)
return(initial.estimates)
} # end of function "IS.initial.estimates"
###############################################################################
########## Test Function #####################################################
###############################################################################
# # test the function IS.initial.estimates
#
# test.IS.initial.estimates = function(){
# Data = NULL
# Data$History = c("u0", "U0", "M0" ,"MM", "F0" ,"F0" ,"FF", "0M",
# "0F", "0U", "0U", "0U","UU", "UU" )
# Data$counts = c(20, 21, 16 , 4, 3, 20, 7, 12, 25, 43, 2, 18, 5, 4)
# Data$category = c("M","F")
# ## here U0 = 41, UU= 9, M0 = 16, MM = 4, F0 = 23, FF= 7, 0M = 12,
# ## 0F= 25, 0U = 63
#
# return(IS.initial.estimates(Data))
# }
# test.IS.initial.estimates()
#
###############################################################################
########## End Test Function #################################################
###############################################################################
###############################################################################
###############################################################################
#The function "pack.parm" creates initial estimates for the optimization routine.
# Input : initial estimates are in regular form(length is (3*length(cats)+3)),
# and raw data
# Output : vector in logit/log form(length is (3*length(cats)+3)-1)
IS.pack.parm = function(init.est,Data,captureDM,thetaDM,lambdaDM){
cats = Data$category
# capture rates for the sample 1
p1 = init.est[1:length(cats)]
# capture rates for the sample 2
p2 = init.est[(length(cats)+1): (2*length(cats))]
# category proportions. This should add to 1.
lambda = init.est[(2*length(cats)+1):(3*length(cats))]
# sampling fraction for sample 1 and sample 2
theta = init.est[(3*length(cats)+1):(3*length(cats)+2)]
N = init.est[length(init.est)]
if(ncol(captureDM)==0){
logit.capture.rate.hat = rep(NULL, nrow(captureDM))
} else {
# use least squares to estimate the capture probabilities based on captureDM
capture.rate = c(p1,p2)
logit.capture.rate = logit(capture.rate)
# transform the estimates using the logit function
logit.capture.rate.hat = IS.LS(captureDM, logit.capture.rate)
}
if(ncol(lambdaDM)==0){
logit.cats.proprtion.hat = rep(NULL, nrow(lambdaDM))
} else {
#initial reduced estimates for lambda
logit.cats.proprtion = lambda.to.cumulative.logit.lambda(lambda)
# use least squares to estimate based on lambdaDM
logit.cats.proprtion.hat = IS.LS(lambdaDM,logit.cats.proprtion)
}
if(ncol(thetaDM)==0){
logit.theta.hat = rep(NULL, nrow(thetaDM))
} else {
# use least squares to estimate the theta based on thetaDM
logit.theta = logit(theta)
# transform the estimates using the logit function
logit.theta.hat = IS.LS(thetaDM,logit.theta)
}
# transform the initial value for the population size to log scale
log.N = log(N)
return(c(logit.capture.rate.hat,logit.cats.proprtion.hat,logit.theta.hat,log.N ))
}
###############################################################################
###############################################################################
#Following code is to check the above function
# testing.pack.parm = function(){
# temp1 = IS.initial.estimates(Data)
# temp2 = IS.pack.parm(temp1,Data,captureDM,thetaDM,lambdaDM)
# temp = IS.unpack.parm(temp2,Data,captureDM,thetaDM,lambdaDM,captureOFFSET,
# thetaOFFSET,lambdaOFFSET)
# temp
#
# }
#
# testing.pack.parm()
#
###############################################################################
###############################################################################
###############################################################################
###############################################################################
# function "IS.unpack.parm " extract the vector of parameters
# breaks the estimates into 4 groups
# 1. Capture rates ( this is a length(cats)x2 matrix) where
# length(cats) = no. of categories. 2 columns are sampling time 1 and 2
# 2. category proportions (this is vector of size length(cat))
# 3. sampling fractions (this is a vector of size 2) ; theta1 and theta 2 for
# sampling time 1 and 2
# 4. population size, N
# Input : A vector of reduced estimates in logit/log scale, Data,
# Design Matrices and OFFSET vectors
# Output: A vector of probabilities of length 3*length(cats)+3 ; in order as stated above 4 groups
IS.unpack.parm = function(est, Data,captureDM,thetaDM,lambdaDM,
captureOFFSET,thetaOFFSET,lambdaOFFSET){
# extract the parameter estimates in full form from the parameter vector
# parameters are in the order
# logit.capture.rates, cumulativ.logit.category.proportion,
# logit.sampling.fraction, log.population.size.
# cumulative.logit.category.proportion has length(cats)-1 values since
# it preserve the constraint "sum to one"
# category proportions add to 1.
# cats is the is the vector of categories.
# est are in logit or log form.
cats = Data$category
parm = NULL
parm$logit.full.red = est
# if statements extract the reduced estimates. extract what is actually optimised
if(ncol(captureDM)== 0){ # If Capture design matrix has zero columns
#parm$logit.p.red = rep(NULL, nrow(captureDM))
parm$logit.p = captureOFFSET
} else {
# extract the reduced estimates to the full estimates using appropriate design matrix
parm$logit.p.red = est[1:ncol(captureDM)]
parm$logit.p = (captureDM %*% parm$logit.p.red) + captureOFFSET
}
if(ncol(lambdaDM)==0){ # If lambda design matrix has zero columns
#parm$logit.lambda.red = rep(NULL, nrow(lambdaDM))
# this is vector of size "length(cats)-1"
parm$logit.lambda = lambdaOFFSET # this is vector of size "length(cats)-1"
} else {
# extract the reduced estimates to the full
# estimates using appropriate design matrix
# this is vector of size "length(cats)-1"
parm$logit.lambda.red = est[(ncol(captureDM)+1):(ncol(captureDM)+ ncol(lambdaDM))]
# this is vector of size "length(cats)-1"
parm$logit.lambda = (lambdaDM %*% parm$logit.lambda.red ) + lambdaOFFSET
}
if(ncol(thetaDM)== 0){ # If theta design matrix has zero columns
#parm$logit.theta.red = rep(NULL, nrow(thetaDM))
parm$logit.theta = thetaOFFSET
} else {
# extract the reduced estimates to the full
# estimates using appropriate design matrix
parm$logit.theta.red = est[(ncol(captureDM)+ ncol(lambdaDM)+1): ( ncol(captureDM)+
ncol(lambdaDM)+
ncol(thetaDM) )]
parm$logit.theta = (thetaDM %*% parm$logit.theta.red) + thetaOFFSET
}
parm$log.N = est[length(est)]
parm$logit.full = c(parm$logit.p, parm$logit.lambda,
parm$logit.theta, parm$log.N)
# convert from logit or log form to regular form.
capture.rates = as.vector(expit(parm$logit.p)) # in vector form
parm$p = matrix(capture.rates, nrow = length(cats),
ncol = 2,byrow=FALSE) # rows are categories
# parm$lambda is a vector of size length(cats)
parm$lambda = as.vector(IS.cumulative.logit.lambda.to.lambda(parm$logit.lambda))
parm$theta = as.vector(expit(parm$logit.theta))
parm$N = exp(parm$log.N)
# parm$full is a vector of length 3*length(cats)+3
parm$full = c(capture.rates, parm$lambda, parm$theta, parm$N)
# estimate expected number of fish in each category
parm$N_lambda = parm$N * parm$lambda
colnames(parm$p) = c("time1", "time2")
rownames(parm$p) = c(paste(cats))
names( parm$lambda)= c(paste("lambda_",cats))
names( parm$theta) = c("theta_1", "theta_2")
names( parm$N) = c("N")
return(parm)
} # end of unpack.parm
###############################################################################
###############################################################################
###############################################################################
#Following code is to check the above function
# testing.IS.unpack.parm = function(){
#
# data = NULL
# data$History = c("U0","UU","M0","MM","F0","FF","0M","0F","0U")
# data$counts = c(41,9,16,4,23,7,12,25,63)
# data$category = c("M","F") # for 2 categories
# #data$category = c("A","B","C")# for 3 categories
# str(data)
# #print(data)
#
# # Set up some values of the parameters for testing
#
# # testing for 2 categories
# p = seq(.1,.4,.1)
# lambda = c(0.4, 0.6)
# theta = c(0.3, 0.6)
# N = 500
#
# # testing for 3 categories
# p3 = c(0.2,0.1,0.3,0.2,0.4,0.1)
# l = c(0.2,0.5,0.3)
#
# # testparm2 is for 2 categories, and restparm3 is for 3 categories.
# # (probabilities in logit and log form)
# testparm2 = c(log(p/(1-p)),log((lambda[1]/sum(lambda))/(1-(lambda[1]/sum(lambda)))),
# log(theta/(1-theta)),log(N))
# #testparm3 = c(log(p3/(1-p3)),log((l[1]/sum(l))/(1-(l[1]/sum(l)))),
# log((l[2]/sum(l[2:3]))/(1-(l[2]/sum(l[2:3])))),
# log(theta/(1-theta)),log(N))
#
# capDM = create.DM(c(1,1,3,4))
# thetaDM =create.DM(c(1,2))
# #lambdaDM = create.DM(c(1))
# lambdaDM = matrix(, ncol=0,nrow=1) # Design matrix for lambda
#
# captureOFFSET = c(0,0,0,0)
# thetaOFFSET = c(0,0)
# #lambdaOFFSET = c(0)
# lambdaOFFSET = c(logit(.4))
#
# res2 = IS.unpack.parm(testparm2,data, capDM,thetaDM,lambdaDM,
# captureOFFSET,thetaOFFSET,lambdaOFFSET) # for 2 categories
# #res3 = IS.unpack.parm(testparm3,data) # for 3 categories
# res2 # for 2 categories
# #res3 # for 3 categories
# }
#
#
# testing.unpack.parm()
#
###############################################################################
###############################################################################
###############################################################################
###############################################################################
# Function IS.neg.log.likelihood
# Input : parameters are in logit or log form, raw Data, Desigm Matrices and
# OFFSET vectors
# (estimates are in the order:logit.capture.rates(p),
# logit.cumulative.category.proportion(lambda),
# logit.sampling.fraction(theta), log.population.size(N))
# (Data is a list with capture history, counts, category)
# Output: negative log likelihood velue
IS.neg.log.likelihood = function(logit.est, Data,
captureDM,thetaDM,lambdaDM,
captureOFFSET,thetaOFFSET,lambdaOFFSET){
parm = IS.unpack.parm(logit.est,Data,
captureDM,thetaDM,lambdaDM,
captureOFFSET,thetaOFFSET,lambdaOFFSET)
# N hat
N = parm$N
hist = Data$History # capture history from raw data
ct = Data$counts # counts from raw data
cats = Data$category # categories
History = c(hist,"00") # add "00" capture history to end of the
# vector of capture history from raw data
counts = c(ct,(N-sum(ct))) # add total animals did not catch(relating to
# capture history "00") to the end of
# the vector of counts from the raw data
# call the function 'IS.log.prob.history'. These probabilities and the counts
# are in the exact order related to each capture history.
log.prob = IS.log.prob.history(parm,History,cats)
# log-ikelihood has 2 parts. Ignore the count! for the observed counts
# part1 = lgamma(N+1) - sum(lgamma(counts+1)) # factorial part
part1 = lgamma(N+1) - lgamma(N-sum(ct)+1) # factorial part
part2 = sum(counts * log.prob)
# log-likelihhod
LLH = part1 + part2
# negative log-likelihood
NLLH = - LLH # -(log-likelihood),convert to negative value for optimization purpose
return(NLLH)
} # end of "IS.neg.log.likelihood"
###############################################################################
###############################################################################
##### CJS functions to compute the probability of each history passed to it. ##
IS.log.prob.history <- function(parm, history, cats){
# This computes the log(probability) of each history based on the parms list
# of parameters and the vector of category labels.
#
# We assume that parms has been unpacked and has elements (at a minimum)
# p - length(cat) x 2 matrix
# lambda - proportions of categories in population (must add to 1)
# theta - probability that an unmarked fish captured is “categorized”
#
# cats is a vector of category codes. The value of “U” CANNOT be used as a valid
# code as this indicates an unknown category. Upper and lower case categories
# are ok, but the user must use caution.
#
# history - a vector of histories. Each history is a character string of length 2
# e.g. M0 indicates a male captured at time 1 and never seen again
#
# This routine will compute the probability of history (00) if you pass this to this
# routine even though this history is unobservable.
#
# Returns a vector of the log(prob) of each history. If a history has probability of 0
# a 0 is returned rather than a -Inf or NA. [This will allow graceful handling
# of cases when certain parameters are fixed at 1 or 0
# We construct a (length(cat)+2) x (length(cat)+2) matrix representing all of the
# possible capture histories log(probabilites) for (cat, "0" ,”U”) combinations
prob <- matrix(0, nrow=length(cats)+2, ncol=length(cats)+2)
# Categories are captured at both occasions (e.g. MM)
prob[matrix(rep(1:length(cats),2),ncol=2)] <- parm$lambda*parm$p[,1]*parm$theta[1]*parm$p[,2]
# Categories captured at first occasion and then never seen (e.g M0)
prob[matrix(c(1:length(cats),
rep(length(cats)+1,
length(cats))),ncol=2)] <- parm$lambda*parm$p[,1]*parm$theta[1]*(1-parm$p[,2])
# Categories not captured at first occassion, but captured at time 2 (e.g. 0M)
prob[matrix(c(rep(length(cats)+1,
length(cats)),1:length(cats)),
ncol=2)] <- parm$lambda*(1-parm$p[,1])*parm$p[,2]*parm$theta[2]
# History (00)
prob[length(cats)+1,length(cats)+1] <- sum(parm$lambda*(1-parm$p[,1])*(1-parm$p[,2]))
# History (U0)
prob[length(cats)+2,
length(cats)+1] <- sum(parm$lambda*parm$p[,1]*(1-parm$theta[1])*(1-parm$p[,2]))
# History (0U)
prob[length(cats)+1,
length(cats)+2] <- sum(parm$lambda*(1-parm$p[,1])*parm$p[,2]*(1-parm$theta[2]))
# History (UU)
prob[length(cats)+2,
length(cats)+2] <- sum(parm$lambda*parm$p[,1]*(1-parm$theta[1])*parm$p[,2])
if(sum(prob)<.9999999){stop("Error in computing cell probabilities")}
# Now index each history into the matrix of probabilities and return the log(prob)
index <- matrix(c(match(substr(history,1,1),c(cats,"0","U")),
match(substr(history,2,2),c(cats,"0","U"))),ncol=2)
#print(index)
log_p_hist <- log(prob[index]+(prob[index]==0))
#print(log_p_hist)
return(log_p_hist)
} # end of IS.log.prob.history
###############################################################################
###############################################################################
# #Following code is to check the above functions
#
# # data and patameter estimates for checking functions
# testing.data <- function(){
#
# data <- NULL
# data$History = c("U0","UU","M0","MM","F0","FF","0M","0F","0U")
# data$counts = c(41,9,16,4,23,7,12,25,63)
# data$category = c("M","F")
# str(data)
# #print(data)
#
# # Set up some values of the parameters for testing
# parm = NULL
# parm$p = matrix(seq(.1,.4,.1),nrow=2)
# parm$lambda = c(0.4, 0.6)
# parm$theta = c(0.3, 0.6)
# parm$N = 500
# str(parm)
# #print(parm)
#
# return(list(parm,data))
# }
#
#
# # test both 'neg.log.like' and 'IS.log.prob.history' functions
# # 'IS.log.prob.history'should produce sum of the probabilities equal to 1
#
# check.both.functions = function(){
# # Check the IS.log.prob.history function
# parm = testing.data()[[1]]
# data = testing.data()[[2]]
#
# res1 = neg.lg.ld(parm,data)
# print(paste("Negative log Likelihhod: " , res1 , sep = ""))
#
# res2 = IS.log.prob.history(parm,c(data$History,"00"),data$category)
# print(paste("sum of the probabilities: " , sum(exp(res2)), sep = "")) # to see add to 1
# }
#
# check.both.functions()
###############################################################################
###############################################################################
# ##### CJS function to check probability #####################################
#
# check.IS.log.prob.history <- function(){
# # Check the IS.log.prob.history function
#
# data <- NULL
# data$History <- c("U0","UU","M0","MM","F0","FF","0M","0F","0U")
# data$Count <- c(41,9,16,4,23,7,12,25,63)
# data$Category <- c("M","F")
# str(data)
# print(data)
#
# # Set up some values of the parameters for testing
# parm <- NULL
# parm$p <- matrix(seq(.1,.4,.1),nrow=2)
# parm$lambda <- c(0.4, 0.6)
# parm$theta <- c(0.3, 0.6)
# print(parm)
#
# # Test out on all possible capture histories (including the 00 history)
# res <- IS.log.prob.history(parm, c(data$History,"00"), data$Category)
# sum(exp(res)) # see if add to 1
# }
#
# check.IS.log.prob.history()
#
#
###############################################################################
###############################################################################
# Numerical version of "logit" and "expit" functions
# p scale to logit
logit = function(prob){
result =log(prob/(1-prob))
return(result)
}
#antilogit(i.e. from logit to p scale)
expit = function(logit.prob){
# if logit value is too small or too big then the regular value is -Inf or Inf
# Therefore to address that issue, keep the logit value between -10 and 10
#logit.prob = max(-10, min(10, logit.prob)) # this dosent work for vector
for(i in 1:length(logit.prob)){
logit.prob[i] = max(-10, min(10, logit.prob[i]))
}
result = 1/(1+exp(-logit.prob))
return(result)
}
##############################################################################
# To pack lambda parameters
# regular probabilities to cumulative logit probabilities
# Input : lambda vector of length(prob)
# Output: Vector of logit cumulatives of lambda. lenght is length(prob)-1
lambda.to.cumulative.logit.lambda = function(prob){
result = regular.to.cumulative.logit.prob(prob)
return(result)
}
# The following transformation preserves the "sum to one" constraint.
# lambda.to.cumulative.logit.lambda ;
# Input: A vector of categary proportions of length = length(prob)
# Ouput: A vector of length = length(prob)-1
# Takes a vector of length(prob) Probabilities and returns a transformed vector with length(prob)-1 terms.
# Where the terms are caculated as follows;
#
# P_i = logit( P_i/(P_i + ... P_k) ) , i = 1..k-1
#
# Assumptions: The vector prob sums to 1
regular.to.cumulative.logit.prob = function(prob){
return(logit(cum.prob(prob)))
}
# The following function transform the vector of size length(prob) to form of a vactor of size length(prob)-1 vector
# for converting the lambda vector to cumulative form
cum.prob = function(prob){
cum.prob = rev(cumsum(rev(prob)))
cp = prob/cum.prob
cp[is.na(cp)]= 1
return(cp[-length(cp)])
}
##############################################################################
# To unpack lambda parameters
# Inverts logit probabilities back to regular probabiliries
IS.cumulative.logit.lambda.to.lambda = function(logit.prob){
result = IS.inv.logit.lambda.to.lambda(logit.prob)
#result[result < .00001] = 0 # force all small entries to be zero
return(result)
}
# The following transformation preserves the "sum to one" constraint.
# IS.inv.logit.lambda.to.lambda;
# Input: A vector of logit.prob probabilities with length(logit.prob) elements;
# Ouput: A vector of probabilities "p" of length(logit.prob)+1; this vector sums to 1.
#
# The input vector elements are in the following form
# p_i = logit( P_i/(P_i + ... P_k) ) where, i = 1,...,(k-1)
IS.inv.logit.lambda.to.lambda = function(logit.prob){
prob = expit(logit.prob)
p = IS.inv.cum.prob(prob)
return(p)
}
IS.inv.cum.prob = function(prob){
temp = cumprod(1-prob)
temp = c(1,temp[-length(temp)])*prob
return(c(temp,1-sum(temp)))
}
# using Least Squares method
#Input : a Matrix X(Design Matrix) and Vector y
#Output: Least Squares Solution
IS.LS = function(x,y){
b = solve(t(x) %*% x) %*% t(x) %*% y
return(b)
}
# using Generalized inverse method
#Input : a Matrix X(Design Matrix) and Vector y
#Output: generalized linear model Solution
GI = function(x,y){
b = MASS::ginv(t(x) %*% x) %*% t(x) %*% y
return(b)
}
###############################################################################
###############################################################################
# function "AICc" calculates the AIC with a correction for finite sample sizes
#
# AIC = 2*K - 2*log(likelihood) where k is the number of parameters in the model
#
# AICc = AIC + 2*K*(k+1)/(n-k-1) ; n denotes the sample size
#
# Input : vector of estimate in logit/log form and Data
#
# Output: AICc Value.
IS.AICc <- function(logit.est, Data,captureDM,thetaDM,lambdaDM,
captureOFFSET,thetaOFFSET,lambdaOFFSET){
NLL = IS.neg.log.likelihood(logit.est, Data,captureDM,thetaDM,lambdaDM,
captureOFFSET,thetaOFFSET,lambdaOFFSET)
np = length(logit.est) # number of parameters
AIC = 2*np + 2*NLL # Akaike information criterion (AIC)
n = sum(Data$counts) # total captured
AICc = AIC + 2*np*(np+1)/(n-np-1) # after the correction
return(AICc)
}
###############################################################################
# testing the above function
# test.IS.AICc = function(Data){
# temp1 = initial.estimates(Data)
# temp2 = pack.parm(temp1,Data,captureDM,thetaDM,lambdaDM)
# temp3 = IS.AICc(temp2,Data,captureDM,thetaDM,lambdaDM,
# captureOFFSET,thetaOFFSET,lambdaOFFSET)
# temp3
# }
#
# test.IS.AICc(Data)
###############################################################################
###############################################################################
###############################################################################
# "IS.expected.counts" generate the expected counts give the parameter set.
# Input : capture probabilities, category proportions,sub-sample proportions
# and categotires in the data set
# output : expected counts for the all possible observable capture histories
# (except the capture history "00"- animals not captured)
# : variance for each expected count
IS.expected.counts = function(result){
param = result$param
cats = result$category
expected.Data = NULL
# Create capture histories for Categories captured at first occasion and then
# never seen (e.g. M0)and Categories are captured at both occasions (e.g. MM) and
# Categories not captured at first occasion, but captured at time 2 (e.g. 0M)
cap.hist = c(paste(cats,"0",sep=""),paste(cats,cats,sep=""),paste("0",cats,sep=""))
# all possible observable capture histories("00" is unobservable)
expected.Data$History = c("U0", "UU", cap.hist,"0U")
expected.Data$category = cats
# add "00" capture history to end of the vector of capture history of expected.data
hist = c(expected.Data$History,"00")
# the function "log.prob.history" is in the file "neg.log.likelihood" which was
# used to find the log probabilities
exp.cats = expected.Data$category
expected.log.prob = IS.log.prob.history(param,hist,exp.cats)#call the function 'log.prob.history'.
# These log probabilities and the counts are in the exact
# order related to each capture history.
# expected probabilities
expected.prob = exp(expected.log.prob) - (expected.log.prob==0)
# have to use "-(expected.log.prob==0)" because of "prob[index]==0" used
# inside the neg.log.likelihood
## sum(expected.prob) # this is just to check. Sum of the probabilities should equal to 1
N = param$N
# expected counts for all the capture histories including history "00"
expected.counts.all = N * expected.prob
# variance for all the capture histories including history "00"
expected.variance.all = N*expected.prob*(1-expected.prob )
# variance for the capture histories without history "00"
expected.Data$variance = expected.variance.all[1:length(expected.variance.all)-1]
#print.expected.counts =cat(paste("Expected count for the capture History",hist[1:length(hist)],
# sep = " ", ":",expected.counts.all[1:length(expected.counts.all)],'\n'))
# expected couts for all the capture
expected.count = expected.counts.all[1:length(expected.counts.all)-1]
## histories without history "00"
## data frame for expected histories and counts
## df.expected.counts = data.frame(expected.Data$History , expected.count)
## names(df.expected.counts) = c("expected.History","expected.count")
expected.Data$counts = expected.count
return(expected.Data)
}
###############################################################################
########### Test function #####################################################
###############################################################################
#
# test.IS.expected.counts= function(){
# data <- NULL
# data$History <- c("U0","UU","M0","MM","F0","FF","0M","0F","0U")
# data$count <- c(41,9,16,4,23,7,12,25,63)
# data$category <- c("M","F")
# str(data)
# print(data)
#
# # Set up some values of the parameters for testing
# param <- NULL
# param$p <- matrix(c( 0.1848103,0.1597991 ,0.1829310,0.2142439),nrow=2)
# param$lambda <- c(0.3666383,(1-0.3666383))
# param$theta <- c(0.5, 0.37)
# param$N = 591
#
# print(param)
#
# test=NULL
# test$param = param
# test$category = data$category
# return(IS.expected.counts(test))
# }
#
# test.IS.expected.counts()
###############################################################################
########### End Test function #################################################
###############################################################################
# print the results
IS.print.output = function(x){
cats = x$rawdata$category
########## model information ####################################
cat("\n")
cat("Model information: \n")
cat("\n")
cat("Model Name: ", x$model.id,"\n")
cat("Neg Log-Likelihood: ",x$NLL,"\n")
cat("Number of Parameters: ",x$np,"\n")
cat("AICc value:" ,x$AICc,"\n")
cat("\n \n")
########### Raw Data ############################################
cat("\n")
cat("Raw data: \n")
cat("\n")
print(x$rawdata)
cat("\n \n")
ncats = length(x$rawdata$category)
cat("\n \n")
########### initial values for optimization routine ##############
cat("Initial values used for optimization routine: \n")
cat("\n")
cat("Initial capture probabilities: \n")
print(x$initial.unpack.parm$p)
cat("\n")
cat("Initial category proportions: \n")
print(x$initial.unpack.parm$lambda)
cat("\n")
cat("Initial sub-sample proportions: \n")
print(x$initial.unpack.parm$theta)
cat("\n")
cat("Initial population size for optimization(simple Lincoln Petersen estimator is used) :" , prettyNum(round(x$initial.unpack.parm$N), big.mark = ",") , "\n")
cat("\n \n")
############################################################################################################
## Design matrices and offset vectors for capture probabilities, category proportions and sub-sample proportions
cats =x$rawdata$category
cat("Design matrix and OFFSET vector for capture probabilities: \n")
cap.DM = x$captureDM
rownames(cap.DM)= c((paste("p1", cats,sep="")),(paste("p2", cats,sep="")))
print(data.frame(Beta=cap.DM, OFFSET.vector=x$captureOFFSET))
cat("\n")
cat("Design matrix and OFFSET vector for sub-sample proportions (theta): \n")
rownames(x$thetaDM)= c("theta_1","theta_2")
print(data.frame(Beta=x$thetaDM, OFFSET.vector=x$thetaOFFSET))
cat("\n")
cat("Design matrix and OFFSET vector for category proportions (lambda):\n")
rownames(x$lambdaDM)= c(paste("lambda_", cats[1:(length(cats)-1)],sep=""))
print(data.frame(Beta=x$lambdaDM, OFFSET.vector=x$lambdaOFFSET))
cat("\n")
############ Find the MLEs ##########################################################
cat("\n")
cat("Find MLEs: \n")
cat("\n")
cat("MLEs for capture probabilities: \n")
print(x$est$p)
cat("\n")
cat("MLEs for category proportions: \n")
print(x$est$lambda)
cat("\n")
cat("MLEs for sub-sample proportions: \n")
print(x$est$theta)
cat("\n")
cat("MLE for Population size : ", prettyNum(round(x$est$N), big.mark = ",")) #print number with commas
category.total = prettyNum(round(x$est$N_lambda), big.mark = ",")
cat(paste("\nMLE for Population size of category", cats,sep = " ", ":",category.total))
cat("\n \n")
############# SE's of the above MLEs ##################################################
cat("SE's of the MLEs \n")
cat("\n")
cat("SE's of the MLEs of capture probabilities: \n")
se_p = x$se$p
colnames(se_p) = c("time1", "time2")
rownames(se_p) = c(paste(cats))
print(se_p)
cat("\n")
cat("SE's of the MLEs of the category proportions: \n")
se_lambad = x$se$lambda
names( se_lambad)= c(paste("lambda_",cats))
print(se_lambad)
cat("\n")
cat("SE's of the MLEs of the sub-sample proportions: \n")
se_theta = x$se$theta
names(se_theta) = c("theta_1", "theta_2")
print(se_theta)
cat("\n")
cat("SE of the MLE of the population size: ", prettyNum(round(x$se$N), big.mark = ","))
cat(paste("\nSE for Population size of category", cats, sep = " ", ":",prettyNum(round(x$se$N_lambda), big.mark = ",")))
cat("\n")
######### Table of Histories, observed and expected counts and residuals ################
cat("\n")
cat(paste("Observed and Expected counts for capture histories for the model ", x$model.id,sep=" " ,'\n'))
print(x$obs.exp.counts)
########### Residual plot ##############################################################
cat("\n")
cat(paste("Standardized residual plot for the model", x$model.id,sep=" " ,'\n'))
plot(x$res.plot)
} # end of "IS.print.output"
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.