Nothing
## (c) Simon N. Wood & Natalya Pya (scat, beta),
## 2013-2023. Released under GPL2.
## See gam.fit4.r for testing functions fmud.test and fetad.test.
estimate.theta <- function(theta,family,y,mu,scale=1,wt=1,tol=1e-7,attachH=FALSE) {
## Simple Newton iteration to estimate theta for an extended family,
## given y and mu. To be iterated with estimation of mu given theta.
## If used within a PIRLS loop then divergence testing of coef update
## will have to re-compute pdev after theta update.
## Not clear best way to handle scale - could optimize here as well
if (!inherits(family,"extended.family")) stop("not an extended family")
nlogl <- function(theta,family,y,mu,scale=1,wt=1,deriv=2) {
## compute the negative log likelihood and grad + hess
nth <- length(theta) - if (scale<0) 1 else 0
if (scale < 0) {
scale <- exp(theta[nth+1])
theta <- theta[1:nth]
get.scale <- TRUE
} else get.scale <- FALSE
dev <- sum(family$dev.resids(y, mu, wt,theta))/scale
if (deriv>0) Dd <- family$Dd(y, mu, theta, wt=wt, level=deriv)
ls <- family$ls(y,w=wt,theta=theta,scale=scale)
nll <- dev/2 - ls$ls
if (deriv>0) {
g1 <- colSums(as.matrix(Dd$Dth))/(2*scale)
g <- if (get.scale) c(g1,-dev/2) else g1
ind <- 1:length(g)
g <- g - ls$lsth1[ind]
## g <- if (deriv>0) colSums(as.matrix(Dd$Dth))/(2*scale) - ls$lsth1[1:nth] else NULL
} else g <- NULL
if (deriv>1) {
x <- colSums(as.matrix(Dd$Dth2))/(2*scale)
Dth2 <- matrix(0,nth,nth)
k <- 0
for (i in 1:nth) for (j in i:nth) {
k <- k + 1
Dth2[i,j] <- Dth2[j,i] <- x[k]
}
if (get.scale) Dth2 <- rbind(cbind(Dth2,-g1),c(-g1,dev/2))
H <- Dth2 - as.matrix(ls$lsth2)[ind,ind]
# H <- Dth2 - as.matrix(ls$lsth2)[1:nth,1:nth]
} else H <- NULL
list(nll=nll,g=g,H=H)
} ## nlogl
if (scale>=0&&family$n.theta==0) stop("erroneous call to estimate.theta - no free parameters")
n.theta <- length(theta) ## dimension of theta vector (family$n.theta==0 => all fixed)
del.ind <- 1:n.theta
if (scale<0) theta <- c(theta,log(var(y)*.1))
nll <- nlogl(theta,family,y,mu,scale,wt,2)
g <- if (family$n.theta==0) nll$g[-del.ind] else nll$g
H <- if (family$n.theta==0) nll$H[-del.ind,-del.ind,drop=FALSE] else nll$H
step.failed <- FALSE
for (i in 1:100) { ## main Newton loop
#H <- if (family$n.theta==0) nll$H[-del.ind,-del.ind,drop=FALSE] else nll$H
#g <- if (family$n.theta==0) nll$g[-del.ind] else nll$g
eh <- eigen(H,symmetric=TRUE)
pdef <- sum(eh$values <= 0)==0
if (!pdef) { ## Make the Hessian pos def
eh$values <- abs(eh$values)
thresh <- max(eh$values) * 1e-7
eh$values[eh$values<thresh] <- thresh
}
## compute step = -solve(H,g) (via eigen decomp)...
step <- - drop(eh$vectors %*% ((t(eh$vectors) %*% g)/eh$values))
if (family$n.theta==0) step <- c(rep(0,n.theta),step)
## limit the step length...
ms <- max(abs(step))
if (ms>4) step <- step*4/ms
nll1 <- nlogl(theta+step,family,y,mu,scale,wt,2)
iter <- 0
while (nll1$nll > nll$nll) { ## step halving
step <- step/2; iter <- iter + 1
if (sum(theta!=theta+step)==0||iter>25) {
step.failed <- TRUE
break
}
nll1 <- nlogl(theta+step,family,y,mu,scale,wt,0)
} ## step halving
if (step.failed) break
theta <- theta + step ## accept updated theta
## accept log lik and derivs ...
nll <- if (iter>0) nlogl(theta,family,y,mu,scale,wt,2) else nll1
g <- if (family$n.theta==0) nll$g[-del.ind] else nll$g
H <- if (family$n.theta==0) nll$H[-del.ind,-del.ind,drop=FALSE] else nll$H
## convergence checking...
if (sum(abs(g) > tol*abs(nll$nll))==0) break
} ## main Newton loop
if (step.failed) warning("step failure in theta estimation")
if (attachH) attr(theta,"H") <- H#nll$H
theta
} ## estimate.theta
find.null.dev <- function(family,y,eta,offset,weights) {
## obtain the null deviance given y, best fit mu and
## prior weights
fnull <- function(gamma,family,y,wt,offset) {
## evaluate deviance for single parameter model
mu <- family$linkinv(gamma+offset)
sum(family$dev.resids(y,mu, wt))
}
mu <- family$linkinv(eta-offset)
mum <- mean(mu*weights)/mean(weights) ## initial value
eta <- family$linkfun(mum) ## work on l.p. scale
deta <- abs(eta)*.1 + 1 ## search interval half width
ok <- FALSE
while (!ok) {
search.int <- c(eta-deta,eta+deta)
op <- optimize(fnull,interval=search.int,family=family,y=y,wt = weights,offset=offset)
if (op$minimum > search.int[1] && op$minimum < search.int[2]) ok <- TRUE else deta <- deta*2
}
op$objective
} ## find.null.dev
## extended families for mgcv, standard components.
## family - name of family character string
## link - name of link character string
## linkfun - the link function
## linkinv - the inverse link function
## mu.eta - d mu/d eta function (derivative of inverse link wrt eta)
## note: for standard links this information is supplemented using
## function fix.family.link.extended.family with functions
## gkg where k is 2,3 or 4 giving the kth derivative of the
## link over the first derivative of the link to the power k.
## for non standard links these functions must be supplied.
## dev.resids - function computing deviance residuals.
## Dd - function returning derivatives of deviance residuals w.r.t. mu and theta.
## aic - function computing twice -ve log likelihood for 2df to be added to.
## initialize - expression to be evaluated in gam.fit4 and initial.spg
## to initialize mu or eta.
## preinitialize - optional function of y and family, returning a list with optional elements
## Theta - intitial Theta and y - modified y for use in fitting (see e.g. ocat and betar)
## postproc - function with arguments family, y, prior.weights, fitted, linear.predictors, offset, intercept
## to call after fit to compute (optionally) the label for the family, deviance and null deviance.
## See ocat for simple example and betar or ziP for complicated. Called in estimate.gam.
## ls - function to evaluated log saturated likelihood and derivatives w.r.t.
## phi and theta for use in RE/ML optimization. If deviance used is just -2 log
## lik. can just return zeroes.
## validmu, valideta - functions used to test whether mu/eta are valid.
## n.theta - number of theta parameters.
## no.r.sq - optional TRUE/FALSE indicating whether r^2 can be computed for family
## ini.theta - function for initializing theta.
## putTheta, getTheta - functions for storing and retriving theta values in function
## environment.
## rd - optional function for simulating response data from fitted model.
## residuals - optional function for computing residuals.
## predict - optional function for predicting from model, called by predict.gam.
## family$data - optional list storing any family specific data for use, e.g. in predict
## function. - deprecated (commented out below - appears to be used nowhere)
## scale - < 0 to estimate. ignored if NULL
#######################
## negative binomial...
#######################
nb <- function (theta = NULL, link = "log") {
## Extended family object for negative binomial, to allow direct estimation of theta
## as part of REML optimization. Currently the template for extended family objects.
linktemp <- substitute(link)
if (!is.character(linktemp)) linktemp <- deparse(linktemp)
if (linktemp %in% c("log", "identity", "sqrt")) stats <- make.link(linktemp)
else if (is.character(link)) {
stats <- make.link(link)
linktemp <- link
} else {
if (inherits(link, "link-glm")) {
stats <- link
if (!is.null(stats$name))
linktemp <- stats$name
}
else stop(linktemp, " link not available for negative binomial family; available links are \"identity\", \"log\" and \"sqrt\"")
}
## Theta <- NULL;
n.theta <- 1
if (!is.null(theta)&&theta!=0) {
if (theta>0) {
iniTheta <- log(theta) ## fixed theta supplied
n.theta <- 0 ## signal that there are no theta parameters to estimate
} else iniTheta <- log(-theta) ## initial theta supplied
} else iniTheta <- 0 ## inital log theta value
env <- new.env(parent = .GlobalEnv)
assign(".Theta", iniTheta, envir = env)
getTheta <- function(trans=FALSE) if (trans) exp(get(".Theta")) else get(".Theta") # get(".Theta")
putTheta <- function(theta) assign(".Theta", theta,envir=environment(sys.function()))
variance <- function(mu) mu + mu^2/exp(get(".Theta")) ## Not actually needed!
validmu <- function(mu) all(mu > 0)
dev.resids <- function(y, mu, wt,theta=NULL) {
if (is.null(theta)) theta <- get(".Theta")
theta <- exp(theta) ## note log theta supplied
mu[mu<=0] <- NA
2 * wt * (y * log(pmax(1, y)/mu) -
(y + theta) * log((y + theta)/(mu + theta)))
} ## nb residuals
Dd <- function(y, mu, theta, wt, level=0) {
## derivatives of the nb deviance...
##ltheta <- theta
theta <- exp(theta)
yth <- y + theta
muth <- mu + theta
r <- list()
## get the quantities needed for IRLS.
## Dmu2eta2 is deriv of D w.r.t mu twice and eta twice,
## Dmu is deriv w.r.t. mu once, etc...
r$Dmu <- 2 * wt * (yth/muth - y/mu)
r$Dmu2 <- -2 * wt * (yth/muth^2 - y/mu^2)
r$EDmu2 <- 2 * wt * (1/mu - 1/muth) ## exact (or estimated) expected weight
if (level>0) { ## quantities needed for first derivatives
r$Dth <- -2 * wt * theta * (log(yth/muth) + (1 - yth/muth) )
r$Dmuth <- 2 * wt * theta * (1 - yth/muth)/muth
r$Dmu3 <- 4 * wt * (yth/muth^3 - y/mu^3)
r$Dmu2th <- 2 * wt * theta * (2*yth/muth - 1)/muth^2
r$EDmu2th <- 2 * wt / muth^2
}
if (level>1) { ## whole damn lot
r$Dmu4 <- 2 * wt * (6*y/mu^4 - 6*yth/muth^4)
r$Dth2 <- -2 * wt * theta * (log(yth/muth) +
theta*yth/muth^2 - yth/muth - 2*theta/muth + 1 +
theta /yth)
r$Dmuth2 <- 2 * wt * theta * (2*theta*yth/muth^2 - yth/muth - 2*theta/muth + 1)/muth
r$Dmu2th2 <- 2 * wt * theta * (- 6*yth*theta/muth^2 + 2*yth/muth + 4*theta/muth - 1) /muth^2
r$Dmu3th <- 4 * wt * theta * (1 - 3*yth/muth)/muth^3
}
r
} ## nb Dd
aic <- function(y, mu, theta=NULL, wt, dev) {
if (is.null(theta)) theta <- get(".Theta")
Theta <- exp(theta)
term <- (y + Theta) * log(mu + Theta) - y * log(mu) +
lgamma(y + 1) - Theta * log(Theta) + lgamma(Theta) -
lgamma(Theta + y)
2 * sum(term * wt)
} ## aic nb
ls <- function(y,w,theta,scale) {
## the log saturated likelihood function for nb
Theta <- exp(theta)
#vec <- !is.null(attr(theta,"vec.grad")) ## lsth by component?
ylogy <- y;ind <- y>0;ylogy[ind] <- y[ind]*log(y[ind])
term <- (y + Theta) * log(y + Theta) - ylogy +
lgamma(y + 1) - Theta * log(Theta) + lgamma(Theta) -
lgamma(Theta + y)
ls <- -sum(term*w)
## first derivative wrt theta...
yth <- y+Theta
lyth <- log(yth)
psi0.yth <- digamma(yth)
psi0.th <- digamma(Theta)
term <- Theta * (lyth - psi0.yth + psi0.th-theta)
#lsth <- if (vec) -term*w else -sum(term*w)
LSTH <- matrix(-term*w,ncol=1)
lsth <- sum(LSTH)
## second deriv wrt theta...
psi1.yth <- trigamma(yth)
psi1.th <- trigamma(Theta)
term <- Theta * (lyth - Theta*psi1.yth - psi0.yth + Theta/yth + Theta * psi1.th + psi0.th - theta -1)
lsth2 <- -sum(term*w)
list(ls=ls, ## saturated log likelihood
lsth1=lsth, ## first deriv vector w.r.t theta - last element relates to scale, if free
LSTH1=LSTH, ## rows are above derivs by datum
lsth2=lsth2) ## Hessian w.r.t. theta, last row/col relates to scale, if free
} ## ls nb
initialize <- expression({
if (any(y < 0)) stop("negative values not allowed for the negative binomial family")
##n <- rep(1, nobs)
mustart <- y + (y == 0)/6
})
postproc <- function(family,y,prior.weights,fitted,linear.predictors,offset,intercept){
posr <- list()
posr$null.deviance <- find.null.dev(family,y,eta=linear.predictors,offset,prior.weights)
posr$family <-
paste("Negative Binomial(",round(family$getTheta(TRUE),3),")",sep="")
posr
} ## postproc nb
rd <- function(mu,wt,scale) {
Theta <- exp(get(".Theta"))
rnbinom(n=length(mu),size=Theta,mu=mu)
} ## rd nb
qf <- function(p,mu,wt,scale) {
Theta <- exp(get(".Theta"))
qnbinom(p,size=Theta,mu=mu)
} ## qf nb
environment(dev.resids) <- environment(aic) <- environment(getTheta) <-
environment(rd)<- environment(qf)<- environment(variance) <- environment(putTheta) <- env
structure(list(family = "negative binomial", link = linktemp, linkfun = stats$linkfun,
linkinv = stats$linkinv, dev.resids = dev.resids,Dd=Dd,variance=variance,
aic = aic, mu.eta = stats$mu.eta, initialize = initialize,postproc=postproc,ls=ls,
validmu = validmu, valideta = stats$valideta,n.theta=n.theta,
ini.theta = iniTheta,putTheta=putTheta,getTheta=getTheta,rd=rd,qf=qf),
class = c("extended.family","family"))
} ## nb
#########################
## Censored normal family
#########################
cnorm <- function (theta = NULL, link = "identity") {
## Extended family object for censored Gaussian, as required for Tobit regression or log-normal
## Accelarated Failure Time models.
linktemp <- substitute(link)
if (!is.character(linktemp)) linktemp <- deparse(linktemp)
if (linktemp %in% c("log", "identity", "sqrt")) stats <- make.link(linktemp)
else if (is.character(link)) {
stats <- make.link(link)
linktemp <- link
} else {
if (inherits(link, "link-glm")) {
stats <- link
if (!is.null(stats$name))
linktemp <- stats$name
}
else stop(linktemp, " link not available for negative binomial family; available links are \"identity\", \"log\" and \"sqrt\"")
}
## Theta <- NULL;
n.theta <- 1
if (!is.null(theta)&&theta!=0) {
if (theta>0) {
iniTheta <- log(theta) ## fixed theta supplied
n.theta <- 0 ## signal that there are no theta parameters to estimate
} else iniTheta <- log(-theta) ## initial theta supplied
} else iniTheta <- 0 ## inital log theta value
env <- new.env(parent = .GlobalEnv)
assign(".Theta", iniTheta, envir = env)
getTheta <- function(trans=FALSE) if (trans) exp(get(".Theta")) else get(".Theta") # get(".Theta")
putTheta <- function(theta) assign(".Theta", theta,envir=environment(sys.function()))
validmu <- if (link=="identity") function(mu) all(is.finite(mu)) else function(mu) all(mu>0)
dev.resids <- function(y, mu, wt,theta=NULL) { ## cnorm
if (is.null(theta)) theta <- get(".Theta")
th <- theta - log(wt)/2
yat <- attr(y,"censor")
if (is.null(yat)) yat <- rep(NA,length(y))
ii <- which(yat==y) ## uncensored observations
d <- rep(0,length(y))
if (length(ii)) d[ii] <- (y[ii]-mu[ii])^2*exp(-2*th[ii])
ii <- which(is.finite(yat)&yat!=y) ## interval censored
if (length(ii)) {
y1 <- pmax(yat[ii],y[ii]); y0 <- pmin(yat[ii],y[ii])
y10 <- (y1-y0)*exp(-th[ii])/2
d[ii] <- 2*log(dpnorm(-y10,y10)) - ## 2 * log saturated likelihood
2*log(dpnorm((y0-mu[ii])*exp(-th[ii]),(y1-mu[ii])*exp(-th[ii])))
}
ii <- which(yat == -Inf) ## left censored
if (length(ii)) d[ii] <- -2*pnorm((y[ii]-mu[ii])*exp(-th[ii]),log.p=TRUE)
ii <- which(yat == Inf) ## right censored
if (length(ii)) d[ii] <- -2*pnorm(-(y[ii]-mu[ii])*exp(-th[ii]),log.p=TRUE)
d
} ## dev.resids cnorm
Dd <- function(y, mu, theta, wt, level=0) {
## derivatives of the cnorm deviance...
th <- theta - log(wt)/2
eth <- exp(-th)
e2th <- eth*eth
e3th <- e2th*eth
yat <- attr(y,"censor")
if (is.null(yat)) yat <- y
## get case indices...
iu <- which(yat==y) ## uncensored observations
ii <- which(is.finite(yat*y)&yat!=y) ## interval censored
il <- which(yat == -Inf) ## left censored
ir <- which(yat == Inf) ## right censored
n <- length(mu)
Dmu <- Dmu2 <- rep(0,n)
if (level>0) Dth <- Dmuth <- Dmu3 <- Dmu2th <- Dmu
if (level>1) Dmu4 <- Dth2 <- Dmuth2 <- Dmu2th2 <- Dmu3th <- Dmu
if (length(iu)) { ## uncensored
ethi <- eth[iu]; e2thi <- e2th[iu]
ymeth <- (y[iu]-mu[iu])*ethi
Dmu[iu] <- Dmui <- -2*ymeth*ethi
Dmu2[iu] <- 2*e2thi
if (level>0) {
Dth[iu] <- -2*ymeth^2
Dmuth[iu] <- -2*Dmui
Dmu3[iu] <- 0
Dmu2th[iu] <- -4*e2thi
}
if (level>1) {
Dmu4[iu] <- 0
Dth2[iu] <- -2*Dth[iu]
Dmuth2[iu] <- 4*Dmui
Dmu2th2[iu] <- 8*e2thi
Dmu3th[iu] <- 0
}
} ## uncensored done
if (length(ii)) { ## interval censored
y0 <- pmin(y[ii],yat[ii]); y1 <- pmax(y[ii],yat[ii])
ethi <- eth[ii];e2thi <- e2th[ii];e3thi <- e3th[ii]
y10 <- (y1-y0)*ethi/2
ymeth0 <- (y0-mu[ii])*ethi;ymeth1 <- (y1-mu[ii])*ethi
D0 <- dpnorm(ymeth0,ymeth1)
dnorm0 <- dnorm(ymeth0);dnorm1 <- dnorm(ymeth1)
Dmui <- Dmu[ii] <- 2*ethi*(dnorm1-dnorm0)/D0
Dmu2[ii] <- Dmui^2/2 + 2*e2thi*(dnorm1*ymeth1-dnorm0*ymeth0)/D0
if (level>0) {
Dmu2i <- Dmu2[ii];
ymeth12 <- ymeth1^2; ymeth13 <- ymeth12*ymeth1
ymeth02 <- ymeth0^2; ymeth03 <- ymeth02*ymeth0
Dls <- dpnorm(-y10,y10)
Dth[ii] <- Dthi <- 2*(dnorm1*ymeth1-dnorm0*ymeth0)/D0
Dmuth[ii] <- Dmuthi <- Dmui*Dthi/2 + 2*ethi*(dnorm1*(ymeth12-1)-dnorm0*(ymeth02-1))/D0
Dmu3[ii] <- Dmui*(3*Dmu2i/2 - Dmui^2/4 - e2thi) +
2*e3thi*(dnorm1*ymeth12-dnorm0*ymeth02)/D0
Dmu2th[ii] <- (Dmu2i*Dthi+Dmui*Dmuthi)/2 +
ethi*(dnorm1*(2*ymeth13*ethi+ Dmui*ymeth12-6*ymeth1*ethi - Dmui)-
dnorm0*(2*ymeth03*ethi+ Dmui*ymeth02-6*ymeth0*ethi - Dmui))/D0
}
if (level>1) {
ymeth14 <- ymeth13*ymeth1; ymeth15 <- ymeth12*ymeth13
ymeth04 <- ymeth03*ymeth0; ymeth05 <- ymeth02*ymeth03
Dmu3i <- Dmu3[ii]; Dmu2thi <- Dmu2th[ii]
Dmu4[ii] <- Dmu2i*(3*Dmu2i/2-Dmui^2/4-e2thi) + Dmui*(3*Dmu3i/2-Dmui*Dmu2i/2) +
e3thi*(dnorm1*(2*ymeth13*ethi + Dmui*ymeth12-4*ymeth1*ethi) -
dnorm0*(2*ymeth03*ethi + Dmui*ymeth02-4*ymeth0*ethi))/D0
Dth2[ii] <- Dth2i <- Dthi^2/2 + 2*(dnorm1*(ymeth13 - ymeth1) - dnorm0*(ymeth03 - ymeth0))/D0
Dmuth2[ii] <- (Dmuthi*Dthi+Dmui*Dth2i)/2 +
ethi*(dnorm1*(2*ymeth14 + (Dthi-8)*ymeth12 + 2-Dthi) -
dnorm0*(2*ymeth04 + (Dthi-8)*ymeth02 + 2-Dthi))/D0
Dmu2th2[ii] <- (Dmu2thi*Dthi+Dmuthi^2 + Dmu2i*Dth2i + Dmui*Dmuth2[ii])/2 +
0.5*ethi*(dnorm1*(4*ymeth15*ethi+2*Dmui*ymeth14+2*(Dthi-16)*ymeth13*ethi+
2*(18-3*Dthi)*ymeth1*ethi+(2*Dmuthi+(Dthi-8)*Dmui)*ymeth12+(2-Dthi)*Dmui-2*Dmuthi)-
dnorm0*(4*ymeth05*ethi+2*Dmui*ymeth04+2*(Dthi-16)*ymeth03*ethi+
2*(18-3*Dthi)*ymeth0*ethi+(2*Dmuthi+(Dthi-8)*Dmui)*ymeth02+(2-Dthi)*Dmui-2*Dmuthi)
)/D0
Dmu3th[ii] <- Dmu3i*Dthi/2 + Dmu2i*Dmuthi + Dmui*Dmu2thi/2 +
0.5*ethi*(dnorm1*(4*ymeth14*e2thi + 4*Dmui*ymeth13*ethi - 12*Dmui*ymeth1*ethi +
(Dmui^2+2*Dmu2i-24*e2thi)*ymeth12+ 12*e2thi -(Dmui^2+2*Dmu2i)) -
dnorm0*(4*ymeth04*e2thi + 4*Dmui*ymeth03*ethi - 12*Dmui*ymeth0*ethi +
(Dmui^2+2*Dmu2i-24*e2thi)*ymeth02+ 12*e2thi -(Dmui^2+2*Dmu2i)))/D0
}
if (level>0) Dth[ii] <- Dthi -4*dnorm(y10)*y10/Dls
if (level>1) Dth2[ii] <- Dth2i - 8*(dnorm(y10)*y10/Dls)^2 - 4*dnorm(y10)*(y10^3 -y10)/Dls
} ## interval censored done
if (length(il)) { ## left censoring (y0 = -Inf, basically)
ethi <- eth[il];e2thi <- e2th[il];e3thi <- e3th[il]
ymeth1 <- (y[il]-mu[il])*ethi;dnorm1 <- dnorm(ymeth1)
D0 <- pnorm(ymeth1)
Dmui <- Dmu[il] <- 2*ethi*dnorm1/D0
Dmu2[il] <- Dmui^2/2 + 2*e2thi*dnorm1*ymeth1/D0
if (level>0) {
ymeth12 <- ymeth1^2; ymeth13 <- ymeth12*ymeth1
Dmu2i <- Dmu2[il]
Dth[il] <- Dthi <- 2*dnorm1*ymeth1/D0
Dmuth[il] <- Dmuthi <- Dmui*Dthi/2 + 2*ethi*dnorm1*(ymeth12-1)/D0
Dmu3[il] <- Dmui*(3*Dmu2i/2 - Dmui^2/4 - e2thi) + 2*e3thi*dnorm1*ymeth12/D0
Dmu2th[il] <- (Dmu2i*Dthi+Dmui*Dmuthi)/2 +
ethi*dnorm1*(2*ymeth13*ethi+ Dmui*ymeth12-6*ymeth1*ethi - Dmui)/D0
}
if (level>1) {
ymeth14 <- ymeth13*ymeth1; ymeth15 <- ymeth12*ymeth13
Dmu3i <- Dmu3[il]; Dmu2thi <- Dmu2th[il]
Dmu4[il] <- Dmu2i*(3*Dmu2i/2-Dmui^2/4-e2thi) + Dmui*(3*Dmu3i/2-Dmui*Dmu2i/2) +
e3thi*dnorm1*(2*ymeth13*ethi + Dmui*ymeth12-4*ymeth1*ethi)/D0
Dth2[il] <- Dth2i <- Dthi^2/2 + 2*dnorm1*(ymeth13 - ymeth1)/D0
Dmuth2[il] <- (Dmuthi*Dthi+Dmui*Dth2i)/2 +
ethi*dnorm1*(2*ymeth14 + (Dthi-8)*ymeth12 + 2-Dthi)/D0
Dmu2th2[il] <- (Dmu2thi*Dthi+Dmuthi^2 + Dmu2i*Dth2i + Dmui*Dmuth2[il])/2 +
0.5*ethi*dnorm1*(4*ymeth15*ethi+2*Dmui*ymeth14+2*(Dthi-16)*ymeth13*ethi+
2*(18-3*Dthi)*ymeth1*ethi+(2*Dmuthi+(Dthi-8)*Dmui)*ymeth12+(2-Dthi)*Dmui-2*Dmuthi)/D0
Dmu3th[il] <- Dmu3i*Dthi/2 + Dmu2i*Dmuthi + Dmui*Dmu2thi/2 +
0.5*ethi*dnorm1*(4*ymeth14*e2thi + 4*Dmui*ymeth13*ethi - 12*Dmui*ymeth1*ethi +
(Dmui^2+2*Dmu2i-24*e2thi)*ymeth12+ 12*e2thi -(Dmui^2+2*Dmu2i))/D0
}
} # left censoring done
if (length(ir)) { ## right censoring - basically y1 = Inf
ethi <- eth[ir];e2thi <- e2th[ir];e3thi <- e3th[ir]
ymeth0 <- (y[ir]-mu[ir])*ethi;
D0 <- pnorm(-ymeth0)
dnorm0 <- dnorm(ymeth0);
Dmu[ir] <- Dmui <- -2*ethi*dnorm0/D0
Dmu2[ir] <- Dmui^2/2 - 2*e2thi*dnorm0*ymeth0/D0
if (level>0) {
ymeth02 <- ymeth0^2; ymeth03 <- ymeth02*ymeth0
Dmu2i <- Dmu2[ir]
Dth[ir] <- Dthi <- -2*dnorm0*ymeth0/D0
Dmuth[ir] <- Dmuthi <- Dmui*Dthi/2 - 2*ethi*dnorm0*(ymeth02-1)/D0
Dmu3[ir] <- Dmui*(3*Dmu2i/2 - Dmui^2/4 - e2thi) - 2*e3thi*dnorm0*ymeth02/D0
Dmu2th[ir] <- (Dmu2i*Dthi+Dmui*Dmuthi)/2 -
ethi*dnorm0*(2*ymeth0^3*ethi+ Dmui*ymeth02-6*ymeth0*ethi - Dmui)/D0
}
if (level>1) {
ymeth04 <- ymeth03*ymeth0; ymeth05 <- ymeth02*ymeth03
Dmu3i <- Dmu3[ir]; Dmu2thi <- Dmu2th[ir]
Dmu4[ir] <- Dmu2i*(3*Dmu2i/2-Dmui^2/4-e2thi) + Dmui*(3*Dmu3i/2-Dmui*Dmu2i/2) -
e3thi*dnorm0*(2*ymeth0^3*ethi + Dmui*ymeth02-4*ymeth0*ethi)/D0
Dth2[ir] <- Dthi^2/2 - 2*dnorm0*(ymeth0^3 - ymeth0)/D0
Dmuth2[ir] <- (Dmuthi*Dthi+Dmui*Dth2[ir])/2 -
ethi*dnorm0*(2*ymeth04 + (Dthi-8)*ymeth02 + 2-Dthi)/D0
Dmu2th2[ir] <- (Dmu2thi*Dthi+Dmuthi^2 + Dmu2i*Dth2[ir] + Dmui*Dmuth2[ir])/2 -
0.5*ethi*dnorm0*(4*ymeth05*ethi+2*Dmui*ymeth04+2*(Dthi-16)*ymeth0^3*ethi+
2*(18-3*Dthi)*ymeth0*ethi+(2*Dmuthi+(Dthi-8)*Dmui)*ymeth02+(2-Dthi)*Dmui-2*Dmuthi)/D0
Dmu3th[ir] <- Dmu3i*Dthi/2 + Dmu2i*Dmuthi + Dmui*Dmu2thi/2 -
0.5*ethi*(dnorm0*(4*ymeth04*e2thi + 4*Dmui*ymeth0^3*ethi -
12*Dmui*ymeth0*ethi + (Dmui^2+2*Dmu2i-24*e2thi)*ymeth02+ 12*e2thi -(Dmui^2+2*Dmu2i)))/D0
}
} # right censoring done
r <- list(Dmu=Dmu,Dmu2=Dmu2,EDmu2=Dmu2)
if (level>0) {
r$Dth <- Dth;r$Dmuth <- Dmuth;r$Dmu3 <- Dmu3
r$EDmu2th <- r$Dmu2th <- Dmu2th;
}
if (level>1) {
r$Dmu4 <- Dmu4; r$Dth2 <- Dth2; r$Dmuth2 <- Dmuth2;
r$Dmu2th2 <- Dmu2th2; r$Dmu3th <- Dmu3th
}
r
} ## Dd cnorm
aic <- function(y, mu, theta=NULL, wt, dev) { ## cnorm AIC
if (is.null(theta)) theta <- get(".Theta")
th <- theta - log(wt)/2
yat <- attr(y,"censor")
if (is.null(yat)) yat <- y
ii <- which(is.na(yat)|yat==y) ## uncensored observations
d <- rep(0,length(y))
if (length(ii)) d[ii] <- (y[ii]-mu[ii])^2*exp(-2*th[ii]) + log(2*pi) + 2*th[ii]
ii <- which(is.finite(yat)&yat!=y) ## interval censored
if (length(ii)) {
y1 <- pmax(yat[ii],y[ii]); y0 <- pmin(yat[ii],y[ii])
d[ii] <- - 2*log(dpnorm((y0-mu[ii])*exp(-th[ii]),(y1-mu[ii])*exp(-th[ii])))
}
ii <- which(yat == -Inf) ## left censored
if (length(ii)) d[ii] <- -2*pnorm((y[ii]-mu[ii])*exp(-th[ii]),log.p=TRUE)
ii <- which(yat == Inf) ## right censored
if (length(ii)) d[ii] <- -2*pnorm(-(y[ii]-mu[ii])*exp(-th[ii]),log.p=TRUE)
sum(d) ## -2*log likelihood
} ## AIC cnorm
ls <- function(y,w,theta,scale) {
## the cnorm log saturated likelihood function.
th <- theta - log(w)/2
yat <- attr(y,"censor")
if (is.null(yat)) yat <- y
ii <- which(yat==y) ## uncensored observations
d2 <- d1 <- d <- rep(0,length(y))
if (length(ii)) {
d[ii] <- log(2*pi)/2 - th[ii]
d1[ii] <- -1
}
ii <- which(is.finite(yat)&yat!=y) ## interval censored
if (length(ii)) {
y1 <- pmax(yat[ii],y[ii]); y0 <- pmin(yat[ii],y[ii])
y10 <- (y1-y0)*exp(-th[ii])/2
d0 <- dpnorm(-y10,y10)
d[ii] <- log(d0) ## log saturated likelihood
d1[ii] <- -2*dnorm(y10)*y10/d0
d2[ii] <- -d1[ii]^2 - 2*dnorm(y10)*y10^2*(y1-y0)/2
}
## right or left censored saturated log likelihoods are zero.
list(ls=sum(d), ## saturated log likelihood
lsth1=sum(d1), ## first deriv vector w.r.t theta - last element relates to scale, if free
LSTH1=matrix(d1,ncol=1),
lsth2=sum(d2)) ## Hessian w.r.t. theta, last row/col relates to scale, if free
} ## ls cnorm
initialize <- expression({ ## cnorm
if (is.matrix(y)) {
.yat <- y[,2]
y <- y[,1]
attr(y,"censor") <- .yat
}
mustart <- if (family$link=="identity") y else pmax(y,min(y>0))
})
postproc <- function(family,y,prior.weights,fitted,linear.predictors,offset,intercept){
posr <- list()
if (is.matrix(y)) {
.yat <- y[,2]
y <- y[,1]
attr(y,"censor") <- .yat
}
posr$null.deviance <- find.null.dev(family,y,eta=linear.predictors,offset,prior.weights)
posr$family <-
paste("cnorm(",round(family$getTheta(TRUE),3),")",sep="")
posr
} ## postproc cnorm
rd <- function(mu,wt,scale) { ## NOTE - not done
Theta <- exp(get(".Theta"))
}
qf <- function(p,mu,wt,scale) { ## NOTE - not done
Theta <- exp(get(".Theta"))
}
subsety <- function(y,ind) { ## function to subset response
if (is.matrix(y)) return(y[ind,])
yat <- attr(y,"censor")
y <- y[ind]
if (!is.null(yat)) attr(y,"censor") <- yat[ind]
y
} ## subsety
environment(dev.resids) <- environment(aic) <- environment(getTheta) <-
environment(rd)<- environment(qf)<- environment(putTheta) <- env
structure(list(family = "cnorm", link = linktemp, linkfun = stats$linkfun,
linkinv = stats$linkinv, dev.resids = dev.resids,Dd=Dd,subsety=subsety,#variance=variance,
aic = aic, mu.eta = stats$mu.eta, initialize = initialize,postproc=postproc,ls=ls,
validmu = validmu, valideta = stats$valideta,n.theta=n.theta,
ini.theta = iniTheta,putTheta=putTheta,getTheta=getTheta),#,rd=rd,qf=qf),
class = c("extended.family","family"))
} ## cnorm
#################################################
## extended family object for ordered categorical
#################################################
ocat <- function(theta=NULL,link="identity",R=NULL) {
## extended family object for ordered categorical model.
## one of theta and R must be supplied. length(theta) == R-2.
## weights are ignored. #! is stuff removed under re-definition of ls as 0
linktemp <- substitute(link)
if (!is.character(linktemp)) linktemp <- deparse(linktemp)
if (linktemp %in% c("identity")) stats <- make.link(linktemp)
else if (is.character(link)) {
stats <- make.link(link)
linktemp <- link
} else {
if (inherits(link, "link-glm")) {
stats <- link
if (!is.null(stats$name))
linktemp <- stats$name
} else stop(linktemp, " link not available for ordered categorical family; available links are \"identity\"")
}
if (is.null(theta)&&is.null(R)) stop("Must supply theta or R to ocat")
if (!is.null(theta)) R <- length(theta) + 2 ## number of catergories
## Theta <- NULL;
n.theta <- R-2
## NOTE: data based initialization is in preinitialize...
if (!is.null(theta)&&sum(theta==0)==0) {
if (sum(theta<0)) iniTheta <- log(abs(theta)) ## initial theta supplied
else {
iniTheta <- log(theta) ## fixed theta supplied
n.theta <- 0
}
} else iniTheta <- rep(-1,length=R-2) ## inital log theta value
env <- new.env(parent = .GlobalEnv)
assign(".Theta", iniTheta, envir = env)
putTheta <-function(theta) assign(".Theta", theta,envir=environment(sys.function()))
getTheta <-function(trans=FALSE) {
theta <- get(".Theta")
if (trans) { ## transform to (finite) cut points...
R = length(theta)+2
alpha <- rep(0,R-1) ## the thresholds
alpha[1] <- -1
if (R > 2) {
ind <- 2:(R-1)
alpha[ind] <- alpha[1] + cumsum(exp(theta))
}
theta <- alpha
}
theta
} ## getTheta ocat
postproc <- function(family,y,prior.weights,fitted,linear.predictors,offset,intercept) {
posr <- list()
## null.deviance needs to be corrected...
posr$null.deviance <- find.null.dev(family,y,eta=linear.predictors,offset,prior.weights)
posr$family <-
paste("Ordered Categorical(",paste(round(family$getTheta(TRUE),2),collapse=","),")",sep="")
posr
}
validmu <- function(mu) all(is.finite(mu))
dev.resids <- function(y, mu, wt,theta=NULL) { ## ocat dev resids
Fdiff <- function(a,b) {
## cancellation resistent F(b)-F(a), b>a
h <- rep(1,length(b)); h[b>0] <- -1; eb <- exp(b*h)
h <- h*0+1; h[a>0] <- -1; ea <- exp(a*h)
ind <- b<0;bi <- eb[ind];ai <- ea[ind]
h[ind] <- bi/(1+bi) - ai/(1+ai)
ind1 <- a>0;bi <- eb[ind1];ai <- ea[ind1]
h[ind1] <- (ai-bi)/((ai+1)*(bi+1))
ind <- !ind & !ind1;bi <- eb[ind];ai <- ea[ind]
h[ind] <- (1-ai*bi)/((bi+1)*(ai+1))
h
}
if (is.null(theta)) theta <- get(".Theta")
R = length(theta)+2
alpha <- rep(0,R+1) ## the thresholds
alpha[1] <- -Inf;alpha[R+1] <- Inf
alpha[2] <- -1
if (R > 2) {
ind <- 3:R
alpha[ind] <- alpha[2] + cumsum(exp(theta))
}
al1 <- alpha[y+1];al0 = alpha[y] ## cut points above and below y
## Compute sign for deviance residuals, based on sign of interval
## midpoint for each datum minus the fitted value of the latent
## variable. This makes first and last categories like 0s and 1s in
## logistic regression....
s <- sign((al1 + al0)/2-mu) ## sign for deviance residuals
al1mu <- al1-mu;al0mu <- al0-mu
f <- Fdiff(al0mu,al1mu)
rsd <- -2*wt*log(f)
attr(rsd,"sign") <- s
rsd
} ## ocat dev.resids
Dd <- function(y, mu, theta, wt=NULL, level=0) {
## derivatives of the ocat deviance...
Fdiff <- function(a,b) {
## cancellation resistent F(b)-F(a), b>a
h <- rep(1,length(b)); h[b>0] <- -1; eb <- exp(b*h)
h <- h*0+1; h[a>0] <- -1; ea <- exp(a*h)
ind <- b<0;bi <- eb[ind];ai <- ea[ind]
h[ind] <- bi/(1+bi) - ai/(1+ai)
ind1 <- a>0;bi <- eb[ind1];ai <- ea[ind1]
h[ind1] <- (ai-bi)/((ai+1)*(bi+1))
ind <- !ind & !ind1;bi <- eb[ind];ai <- ea[ind]
h[ind] <- (1-ai*bi)/((bi+1)*(ai+1))
h
}
abcd <- function(x,level=-1) {
bj <- cj <- dj <- NULL
## compute f_j^2 - f_j without cancellation error
## x <- 10;F(x)^2-F(x);abcd(x)$aj
h <- rep(1,length(x)); h[x>0] <- -1; ex <- exp(x*h)
ex1 <- ex+1;ex1k <- ex1^2
aj <- -ex/ex1k
if (level>=0) {
## compute f_j - 3 f_j^2 + 2f_j^3 without cancellation error
ex1k <- ex1k*ex1;ex2 <- ex^2
bj <- h*(ex-ex^2)/ex1k
if (level>0) {
## compute -f_j + 7 f_j^2 - 12 f_j^3 + 6 f_j^4
ex1k <- ex1k*ex1;ex3 <- ex2*ex
cj <- (-ex3 + 4*ex2 - ex)/ex1k
if (level>1) {
## compute d_j
ex1k <- ex1k*ex1;ex4 <- ex3*ex
dj <- h * (-ex4 + 11*ex3 - 11*ex2 + ex)/ex1k
}
}
}
list(aj=aj,bj=bj,cj=cj,dj=dj)
}
if (is.null(wt)) wt <- 1
R = length(theta)+2
alpha <- rep(0,R+1) ## the thresholds
alpha[1] <- -Inf;alpha[R+1] <- Inf
alpha[2] <- -1
if (R > 2) {
ind <- 3:R
alpha[ind] <- alpha[2] + cumsum(exp(theta))
}
al1 <- alpha[y+1];al0 = alpha[y]
al1mu <- al1-mu;al0mu <- al0 - mu
f <- pmax(Fdiff(al0mu,al1mu),.Machine$double.xmin)
r1 <- abcd(al1mu,level); a1 <- r1$aj
r0 <- abcd(al0mu,level); a0 <- r0$aj
a <- a1 - a0
if (level>=0) {
b1 <- r1$bj;b0 <- r0$bj
b <- b1 - b0
}
if (level>0) {
c1 <- r1$cj;c0 <- r0$cj
c <- c1 - c0
}
if (level>1) {
d1 <- r1$dj;d0 <- r0$dj
d <- d1 - d0
}
oo <- list(D=NULL,Dmu=NULL,Dmu2=NULL,Dth=NULL,Dmuth=NULL,
Dmu2th=NULL)
n <- length(y)
## deviance...
oo$D <- -2 * wt * log(f)
if (level >= 0) { ## get derivatives used in coefficient estimation
oo$Dmu <- -2 * wt * a / f
a2 <- a^2
oo$EDmu2 <- oo$Dmu2 <- 2 * wt * (a2/f - b)/f
}
if (R<3) level <- 0 ## no free parameters
if (level > 0) { ## get first derivative related stuff
f2 <- f^2;a3 <- a2*a
oo$Dmu3 <- 2 * wt * (- c - 2 * a3/f2 + 3 * a * b/f)/f
Dmua0 <- 2 * (a0 * a/f - b0)/f
Dmua1 <- -2 * (a1 * a /f - b1)/f
Dmu2a0 <- -2 * (c0 + (a0*(2*a2/f - b)- 2*b0*a )/f)/f
Dmu2a1 <- 2 * (c1 + (2*(a1*a2/f - b1*a) - a1*b)/f)/f
Da0 <- -2*a0/f
Da1 <- 2 * a1/f
## now transform to derivatives w.r.t. theta...
oo$Dmu2th <- oo$Dmuth <- oo$Dth <- matrix(0,n,R-2)
for (k in 1:(R-2)) {
etk <- exp(theta[k])
ind <- y == k+1;wti <- wt[ind]
oo$Dth[ind,k] <- wti * Da1[ind]*etk
oo$Dmuth[ind,k] <- wti * Dmua1[ind]*etk
oo$Dmu2th[ind,k] <- wti * Dmu2a1[ind]*etk
if (R>k+2) {
ind <- y > k+1 & y < R;wti <- wt[ind]
oo$Dth[ind,k] <- wti * (Da1[ind]+Da0[ind])*etk
oo$Dmuth[ind,k] <- wti * (Dmua1[ind]+Dmua0[ind])*etk
oo$Dmu2th[ind,k] <- wti * (Dmu2a1[ind]+Dmu2a0[ind])*etk
}
ind <- y == R;wti <- wt[ind]
oo$Dth[ind,k] <- wti * Da0[ind]*etk
oo$Dmuth[ind,k] <- wti * Dmua0[ind]*etk
oo$Dmu2th[ind,k] <- wti * Dmu2a0[ind]*etk
}
oo$EDmu2th <- oo$Dmu2th
}
if (level >1) { ## and the second derivative components
oo$Dmu4 <- 2 * wt * ((3*b^2 + 4*a*c)/f + a2*(6*a2/f - 12*b)/f2 - d)/f
Dmu3a0 <- 2 * ((a0*c + 3*c0*a + 3*b0*b)/f - d0 +
6*a*(a0*a2/f - b0*a - a0*b)/f2 )/f
Dmu3a1 <- 2 * (d1 - (a1*c + 3*(c1*a + b1*b))/f
+ 6*a*(b1*a - a1*a2/f + a1*b)/f2)/f
Dmua0a0 <- 2*(c0 + (2*a0*(b0 - a0*a/f) - b0*a)/f )/f
Dmua1a1 <- 2*( (b1*a + 2*a1*(b1 - a1*a/f))/f - c1)/f
Dmua0a1 <- 2*(a0*(2*a1*a/f - b1) - b0*a1)/f2
Dmu2a0a0 <- 2*(d0 + (b0*(2*b0 - b) + 2*c0*(a0 - a))/f +
2*(b0*a2 + a0*(3*a0*a2/f - 4*b0*a - a0*b))/f2)/f
Dmu2a1a1 <- 2*( (2*c1*(a + a1) + b1*(2*b1 + b))/f
+ 2*(a1*(3*a1*a2/f - a1*b) - b1*a*(a + 4*a1))/f2 - d1)/f
Dmu2a0a1 <- 0
Da0a0 <- 2 * (b0 + a0^2/f)/f
Da1a1 <- -2* (b1 - a1^2/f)/f
Da0a1 <- -2*a0*a1/f2
## now transform to derivatives w.r.t. theta...
n2d <- (R-2)*(R-1)/2
oo$Dmu3th <- matrix(0,n,R-2)
oo$Dmu2th2 <- oo$Dmuth2 <- oo$Dth2 <- matrix(0,n,n2d)
i <- 0
for (j in 1:(R-2)) for (k in j:(R-2)) {
i <- i + 1 ## the second deriv col
ind <- y >= j ## rest are zero
wti <- wt[ind]
ar1.k <- ar.k <- rep(exp(theta[k]),n)
ar.k[y==R | y <= k] <- 0; ar1.k[y<k+2] <- 0
ar.j <- ar1.j <- rep(exp(theta[j]),n)
ar.j[y==R | y <= j] <- 0; ar1.j[y<j+2] <- 0
ar.kj <- ar1.kj <- rep(0,n)
if (k==j) {
ar.kj[y>k&y<R] <- exp(theta[k])
ar1.kj[y>k+1] <- exp(theta[k])
oo$Dmu3th[ind,k] <- wti * (Dmu3a1[ind]*ar.k[ind] + Dmu3a0[ind]*ar1.k[ind])
}
oo$Dth2[,i] <- wt * (Da1a1*ar.k*ar.j + Da0a1*ar.k*ar1.j + Da1 * ar.kj +
Da0a0*ar1.k*ar1.j + Da0a1*ar1.k*ar.j + Da0 * ar1.kj)
oo$Dmuth2[,i] <- wt * (Dmua1a1*ar.k*ar.j + Dmua0a1*ar.k*ar1.j + Dmua1 * ar.kj +
Dmua0a0*ar1.k*ar1.j + Dmua0a1*ar1.k*ar.j + Dmua0 * ar1.kj)
oo$Dmu2th2[,i] <- wt * (Dmu2a1a1*ar.k*ar.j + Dmu2a0a1*ar.k*ar1.j + Dmu2a1 * ar.kj +
Dmu2a0a0*ar1.k*ar1.j + Dmu2a0a1*ar1.k*ar.j + Dmu2a0 * ar1.kj)
}
}
oo
} ## Dd (ocat)
aic <- function(y, mu, theta=NULL, wt, dev) {
Fdiff <- function(a,b) {
## cancellation resistent F(b)-F(a), b>a
h <- rep(1,length(b)); h[b>0] <- -1; eb <- exp(b*h)
h <- h*0+1; h[a>0] <- -1; ea <- exp(a*h)
ind <- b<0;bi <- eb[ind];ai <- ea[ind]
h[ind] <- bi/(1+bi) - ai/(1+ai)
ind1 <- a>0;bi <- eb[ind1];ai <- ea[ind1]
h[ind1] <- (ai-bi)/((ai+1)*(bi+1))
ind <- !ind & !ind1;bi <- eb[ind];ai <- ea[ind]
h[ind] <- (1-ai*bi)/((bi+1)*(ai+1))
h
}
if (is.null(theta)) theta <- get(".Theta")
R = length(theta)+2
alpha <- rep(0,R+1) ## the thresholds
alpha[1] <- -Inf;alpha[R+1] <- Inf
alpha[2] <- -1
if (R > 2) {
ind <- 3:R
alpha[ind] <- alpha[2] + cumsum(exp(theta))
}
al1 <- alpha[y+1];al0 = alpha[y]
##f1 <- F(al1-mu);f0 <- F(al0-mu);f <- f1 - f0
f <- Fdiff(al0-mu,al1-mu)
-2*sum(log(f)*wt)
} ## ocat aic
ls <- function(y,w,theta,scale) {
## the log saturated likelihood function.
return(list(ls=0,lsth1=rep(0,R-2),LSTH1=matrix(0,length(y),R-2),lsth2=matrix(0,R-2,R-2)))
} ## ocat ls
## initialization is interesting -- needs to be with reference to initial cut-points
## so that mu puts each obs in correct category initially...
preinitialize <- function(y,family) {
ocat.ini <- function(R,y) {
## initialize theta from raw counts in each category
if (R<3) return()
y <- c(1:R,y) ## make sure there is *something* in each class
p <- cumsum(tabulate(y[is.finite(y)])/length(y[is.finite(y)]))
eta <- if (p[1]==0) 5 else -1 - log(p[1]/(1-p[1])) ## mean of latent
theta <- rep(-1,R-1)
for (i in 2:(R-1)) theta[i] <- log(p[i]/(1-p[i])) + eta
theta <- diff(theta)
theta[theta <= 0.01] <- 0.01
theta <- log(theta)
}
R3 <- length(family$getTheta())+2
if (!is.numeric(y)) stop("Response should be integer class labels")
if (R3>2&&family$n.theta>0) {
Theta <- ocat.ini(R3,y)
return(list(Theta=Theta))
}
} ## ocat preinitialize
initialize <- expression({
R <- length(family$getTheta())+2 ## don't use n.theta as it's used to signal fixed theta
if (any(y < 1)||any(y>R)) stop("values out of range")
## n <- rep(1, nobs)
## get the cut points...
alpha <- rep(0,R+1) ## the thresholds
alpha[1] <- -2;alpha[2] <- -1
if (R > 2) {
ind <- 3:R
alpha[ind] <- alpha[2] + cumsum(exp(family$getTheta()))
}
alpha[R+1] <- alpha[R] + 1
mustart <- (alpha[y+1] + alpha[y])/2
})
residuals <- function(object,type=c("deviance","working","response")) {
if (type == "working") {
res <- object$residuals
} else if (type == "response") {
theta <- object$family$getTheta()
mu <- object$linear.predictors
R = length(theta)+2
alpha <- rep(0,R+1) ## the thresholds
alpha[1] <- -Inf;alpha[R+1] <- Inf
alpha[2] <- -1
if (R > 2) {
ind <- 3:R
alpha[ind] <- alpha[2] + cumsum(exp(theta))
}
fv <- mu*NA
for (i in 1:(R+1)) {
ind <- mu>alpha[i] & mu<=alpha[i+1]
fv[ind] <- i
}
res <- object$y - fv
} else if (type == "deviance") {
y <- object$y
mu <- object$fitted.values
wts <- object$prior.weights
res <- object$family$dev.resids(y,mu,wts)
s <- attr(res,"sign")
if (is.null(s)) s <- sign(y-mu)
res <- as.numeric(sqrt(pmax(res,0)) * s)
}
res
} ## ocat residuals
predict <- function(family,se=FALSE,eta=NULL,y=NULL,X=NULL,
beta=NULL,off=NULL,Vb=NULL) {
## optional function to give predicted values - idea is that
## predict.gam(...,type="response") will use this, and that
## either eta will be provided, or {X, beta, off, Vb}. family$data
## contains any family specific extra information.
ocat.prob <- function(theta,lp,se=NULL) {
## compute probabilities for each class in ocat model
## theta is finite cut points, lp is linear predictor, se
## is standard error on lp...
R <- length(theta)
dp <- prob <- matrix(0,length(lp),R+2)
prob[,R+2] <- 1
for (i in 1:R) {
x <- theta[i] - lp
ind <- x > 0
prob[ind,i+1] <- 1/(1+exp(-x[ind]))
ex <- exp(x[!ind])
prob[!ind,i+1] <- ex/(1+ex)
dp[,i+1] <- prob[,i+1]*(prob[,i+1]-1)
}
prob <- t(diff(t(prob)))
dp <- t(diff(t(dp))) ## dprob/deta
if (!is.null(se)) se <- as.numeric(se)*abs(dp)
list(prob,se)
} ## ocat.prob
theta <- family$getTheta(TRUE)
if (is.null(eta)) { ## return probabilities
discrete <- is.list(X)
mu <- off + if (discrete) Xbd(X$Xd,beta,k=X$kd,ks=X$ks,ts=X$ts,dt=X$dt,v=X$v,qc=X$qc,drop=X$drop) else drop(X%*%beta)
if (se) {
se <- if (discrete) sqrt(pmax(0,diagXVXd(X$Xd,Vb,k=X$kd,ks=X$ks,ts=X$ts,dt=X$dt,v=X$v,qc=X$qc,drop=X$drop,nthreads=1))) else
sqrt(pmax(0,rowSums((X%*%Vb)*X)))
} else se <- NULL
##theta <- cumsum(c(-1,exp(theta)))
p <- ocat.prob(theta,mu,se)
if (is.null(se)) return(p) else { ## approx se on prob also returned
names(p) <- c("fit","se.fit")
return(p)
}
} else { ## return category implied by eta (i.e mean of latent)
R = length(theta)+2
#alpha <- rep(0,R) ## the thresholds
#alpha[1] <- -Inf;alpha[R] <- Inf
alpha <- c(-Inf, theta, Inf)
fv <- eta*NA
for (i in 1:(R+1)) {
ind <- eta>alpha[i] & eta<=alpha[i+1]
fv[ind] <- i
}
return(list(fv))
}
} ## ocat predict
rd <- function(mu,wt,scale) {
## simulate data given fitted latent variable in mu
theta <- get(".Theta")
R = length(theta)+2
alpha <- rep(0,R+1) ## the thresholds
alpha[1] <- -Inf;alpha[R+1] <- Inf
alpha[2] <- -1
if (R > 2) {
ind <- 3:R
alpha[ind] <- alpha[2] + cumsum(exp(theta))
}
## ... cut points computed, now simulate latent variable, u
y <- u <- runif(length(mu))
u <- mu + log(u/(1-u))
## and allocate categories according to u and cut points...
for (i in 1:R) {
y[u > alpha[i]&u <= alpha[i+1]] <- i
}
y
} ## ocat rd
environment(dev.resids) <- environment(aic) <- environment(putTheta) <-
environment(getTheta) <- environment(rd) <- environment(predict) <- env
structure(list(family = "Ordered Categorical", link = linktemp, linkfun = stats$linkfun,
linkinv = stats$linkinv, dev.resids = dev.resids,Dd=Dd,
aic = aic, mu.eta = stats$mu.eta, initialize = initialize,postproc=postproc,
preinitialize = preinitialize, ls=ls,rd=rd,residuals=residuals,
validmu = validmu, valideta = stats$valideta,n.theta=n.theta,
ini.theta = iniTheta,putTheta=putTheta,predict=predict,step = 1,
getTheta=getTheta,no.r.sq=TRUE), class = c("extended.family","family"))
} ## end of ocat
## Tweedie....
tw <- function (theta = NULL, link = "log",a=1.01,b=1.99) {
## Extended family object for Tweedie, to allow direct estimation of p
## as part of REML optimization.
## p = (a+b*exp(theta))/(1+exp(theta)), i.e. a < p < b
## NOTE: The Tweedie density computation at low phi, low p is susceptible
## to cancellation error, which seems unavoidable. Furthermore
## there are known problems with spurious maxima in the likelihood
## w.r.t. p when the data are rounded.
linktemp <- substitute(link)
if (!is.character(linktemp)) linktemp <- deparse(linktemp)
if (linktemp %in% c("log", "identity", "sqrt","inverse")) stats <- make.link(linktemp)
else if (is.character(link)) {
stats <- make.link(link)
linktemp <- link
} else {
if (inherits(link, "link-glm")) {
stats <- link
if (!is.null(stats$name))
linktemp <- stats$name
}
else stop(gettextf("link \"%s\" not available for Tweedie family.",
linktemp, collapse = ""), domain = NA)
}
## Theta <- NULL;
n.theta <- 1
if (!is.null(theta)&&theta!=0) {
if (abs(theta)<=a||abs(theta)>=b) stop("Tweedie p must be in interval (a,b)")
if (theta>0) { ## fixed theta supplied
iniTheta <- log((theta-a)/(b-theta))
n.theta <- 0 ## so no theta to estimate
} else iniTheta <- log((-theta-a)/(b+theta)) ## initial theta supplied
} else iniTheta <- 0 ## inital log theta value
env <- new.env(parent = .GlobalEnv)
assign(".Theta", iniTheta, envir = env)
assign(".a",a, envir = env);assign(".b",b, envir = env)
getTheta <- function(trans=FALSE) {
## trans transforms to the original scale...
th <- get(".Theta")
a <- get(".a");b <- get(".b")
if (trans) th <- if (th>0) (b+a*exp(-th))/(1+exp(-th)) else (b*exp(th)+a)/(exp(th)+1)
th
}
putTheta <- function(theta) assign(".Theta", theta,envir=environment(sys.function()))
validmu <- function(mu) all(mu > 0)
variance <- function(mu) {
th <- get(".Theta");a <- get(".a");b <- get(".b")
p <- if (th>0) (b+a*exp(-th))/(1+exp(-th)) else (b*exp(th)+a)/(exp(th)+1)
mu^p
}
dev.resids <- function(y, mu, wt,theta=NULL) {
if (is.null(theta)) theta <- get(".Theta")
a <- get(".a");b <- get(".b")
p <- if (theta>0) (b+a*exp(-theta))/(1+exp(-theta)) else (b*exp(theta)+a)/(exp(theta)+1)
y1 <- y + (y == 0)
theta <- if (p == 1) log(y1/mu) else (y1^(1 - p) - mu^(1 - p))/(1 - p)
kappa <- if (p == 2) log(y1/mu) else (y^(2 - p) - mu^(2 - p))/(2 - p)
pmax(2 * (y * theta - kappa) * wt,0)
} ## tw dev.resids
Dd <- function(y, mu, theta, wt, level=0) {
## derivatives of the tw deviance...
a <- get(".a");b <- get(".b")
th <- theta
p <- if (th>0) (b+a*exp(-th))/(1+exp(-th)) else (b*exp(th)+a)/(exp(th)+1)
dpth1 <- if (th>0) exp(-th)*(b-a)/(1+exp(-th))^2 else exp(th)*(b-a)/(exp(th)+1)^2
dpth2 <- if (th>0) ((a-b)*exp(-th)+(b-a)*exp(-2*th))/(exp(-th)+1)^3 else
((a-b)*exp(2*th)+(b-a)*exp(th))/(exp(th)+1)^3
mu1p <- mu^(1-p)
mup <- mu^p
r <- list()
## get the quantities needed for IRLS.
## Dmu2eta2 is deriv of D w.r.t mu twice and eta twice,
## Dmu is deriv w.r.t. mu once, etc...
ymupi <- y/mup
r$Dmu <- 2*wt*(mu1p - ymupi)
r$Dmu2 <- 2*wt*(mu^(-1-p)*p*y + (1-p)/mup)
r$EDmu2 <- (2*wt)/mup ## expected Dmu2 (weight)
if (level>0) { ## quantities needed for first derivatives
i1p <- 1/(1-p)
y1 <- y + (y==0)
##ylogy <- y*log(y1)
logmu <- log(mu)
mu2p <- mu * mu1p
r$Dth <- 2 * wt * ( (y^(2-p)*log(y1) - mu2p*logmu)/(2-p) +
(y*mu1p*logmu - y^(2-p)*log(y1))/(1-p) -
(y^(2-p) - mu2p)/(2-p)^2 +
(y^(2-p) - y*mu1p)*i1p^2) *dpth1
r$Dmuth <- 2 * wt * logmu * (ymupi - mu1p)*dpth1
mup1 <- mu^(-p-1)
r$Dmu3 <- -2 * wt * mup1*p*(y/mu*(p+1) + 1-p)
r$Dmu2th <- 2 * wt * (mup1*y*(1-p*logmu)-(logmu*(1-p)+1)/mup )*dpth1
r$EDmu3 <- -2*wt*p*mup1
r$EDmu2th <- -2*wt*logmu/mup*dpth1
}
if (level>1) { ## whole damn lot
mup2 <- mup1/mu
r$Dmu4 <- 2 * wt * mup2*p*(p+1)*(y*(p+2)/mu + 1 - p)
y2plogy <- y^(2-p)*log(y1);y2plog2y <- y2plogy*log(y1)
r$Dth2 <- 2 * wt * (((mu2p*logmu^2-y2plog2y)/(2-p) + (y2plog2y - y*mu1p*logmu^2)/(1-p) +
2*(y2plogy-mu2p*logmu)/(2-p)^2 + 2*(y*mu1p*logmu-y2plogy)/(1-p)^2
+ 2 * (mu2p - y^(2-p))/(2-p)^3+2*(y^(2-p)-y*mu^(1-p))/(1-p)^3)*dpth1^2) +
r$Dth*dpth2/dpth1
r$Dmuth2 <- 2 * wt * ((mu1p * logmu^2 - logmu^2*ymupi)*dpth1^2) + r$Dmuth*dpth2/dpth1
r$Dmu2th2 <- 2 * wt * ( (mup1 * logmu*y*(logmu*p - 2) + logmu/mup*(logmu*(1-p) + 2))*dpth1^2) +
r$Dmu2th * dpth2/dpth1
r$Dmu3th <- 2 * wt * mup1*(y/mu*(logmu*(1+p)*p-p -p-1) +logmu*(1-p)*p + p - 1 + p)*dpth1
}
r
} ## Dd tw
aic <- function(y, mu, theta=NULL, wt, dev) {
if (is.null(theta)) theta <- get(".Theta")
a <- get(".a");b <- get(".b")
p <- if (theta>0) (b+a*exp(-theta))/(1+exp(-theta)) else (b*exp(theta)+a)/(exp(theta)+1)
scale <- dev/sum(wt)
-2 * sum(ldTweedie(y, mu, p = p, phi = scale)[, 1] *
wt) + 2
} ## aic tw
ls <- function(y, w, theta, scale) {
## evaluate saturated log likelihood + derivs w.r.t. working params and log(scale) Tweedie
a <- get(".a");b <- get(".b")
Ls <- w * ldTweedie(y, y, rho=log(scale), theta=theta,a=a,b=b)
LS <- colSums(Ls)
lsth1 <- c(LS[4],LS[2]) ## deriv w.r.t. p then log scale
lsth2 <- matrix(c(LS[5],LS[6],LS[6],LS[3]),2,2)
list(ls=LS[1],lsth1=lsth1,LSTH1=Ls[,c(4,2)],lsth2=lsth2)
} ## ls tw
initialize <- expression({
##n <- rep(1, nobs)
mustart <- y + (y == 0)*.1
})
postproc <- function(family,y,prior.weights,fitted,linear.predictors,offset,intercept) {
posr <- list()
posr$null.deviance <- find.null.dev(family,y,eta=linear.predictors,offset,prior.weights)
posr$family <-
paste("Tweedie(p=",round(family$getTheta(TRUE),3),")",sep="")
posr
} ## postproc tw
rd <- function(mu,wt,scale) {
th <- get(".Theta")
a <- get(".a");b <- get(".b")
p <- if (th>0) (b+a*exp(-th))/(1+exp(-th)) else (b*exp(th)+a)/(exp(th)+1)
if (p == 2)
rgamma(n=length(mu), shape = 1/scale, scale = mu * scale)
else
rTweedie(mu, p = p, phi = scale)
} ## rd tw
environment(Dd) <- environment(ls) <-
environment(dev.resids) <- environment(aic) <- environment(getTheta) <-
environment(rd) <- environment(variance) <- environment(putTheta) <- env
structure(list(family = "Tweedie", link = linktemp, linkfun = stats$linkfun,
linkinv = stats$linkinv, dev.resids = dev.resids,Dd=Dd,variance=variance,rd=rd,
aic = aic, mu.eta = stats$mu.eta, initialize = initialize,postproc=postproc,ls=ls,
validmu = validmu, valideta = stats$valideta,canonical="none",n.theta=n.theta,
ini.theta = iniTheta,putTheta=putTheta,getTheta=getTheta,scale = -1),
class = c("extended.family","family"))
} ## tw
## beta regression
betar <- function (theta = NULL, link = "logit",eps=.Machine$double.eps*100) {
## Extended family object for beta regression
## length(theta)=1; log theta supplied
## This serves as a prototype for working with -2logLik
## as deviance, and only dealing with saturated likelihood
## at the end.
## Written by Natalya Pya. 'saturated.ll' by Simon Wood
linktemp <- substitute(link)
if (!is.character(linktemp)) linktemp <- deparse(linktemp)
if (linktemp %in% c("logit", "probit", "cloglog", "cauchit")) stats <- make.link(linktemp)
else if (is.character(link)) {
stats <- make.link(link)
linktemp <- link
} else {
if (inherits(link, "link-glm")) {
stats <- link
if (!is.null(stats$name))
linktemp <- stats$name
}
else stop(linktemp, " link not available for beta regression; available links are \"logit\", \"probit\", \"cloglog\" and \"cauchit\"")
}
n.theta <- 1
if (!is.null(theta)&&theta!=0) {
if (theta>0) {
iniTheta <- log(theta) ## fixed theta supplied
n.theta <- 0 ## signal that there are no theta parameters to estimate
} else iniTheta <- log(-theta) ## initial theta supplied
} else iniTheta <- 0 ## inital log theta value
env <- new.env(parent = .GlobalEnv)
assign(".Theta", iniTheta, envir = env)
assign(".betarEps",eps, envir = env)
getTheta <- function(trans=FALSE) if (trans) exp(get(".Theta")) else get(".Theta")
putTheta <- function(theta) assign(".Theta", theta,envir=environment(sys.function()))
variance <- function(mu) {
th <- get(".Theta")
mu*(1 - mu)/(1+exp(th))
}
validmu <- function(mu) all(mu > 0 & mu < 1)
dev.resids <- function(y, mu, wt,theta=NULL) {
## '-2*loglik' instead of deviance in REML/ML expression
if (is.null(theta)) theta <- get(".Theta")
theta <- exp(theta) ## note log theta supplied
muth <- mu*theta
## yth <- y*theta
2* wt * (-lgamma(theta) +lgamma(muth) + lgamma(theta - muth) - muth*(log(y)-log1p(-y)) -
theta*log1p(-y) + log(y) + log1p(-y))
} ## dev.resids betar
Dd <- function(y, mu, theta, wt, level=0) {
## derivatives of the -2*loglik...
## ltheta <- theta
theta <- exp(theta)
onemu <- 1 - mu; ## oney <- 1 - y
muth <- mu*theta; ## yth <- y*theta
onemuth <- onemu*theta ## (1-mu)*theta
psi0.th <- digamma(theta)
psi1.th <- trigamma(theta)
psi0.muth <- digamma(muth)
psi0.onemuth <- digamma(onemuth)
psi1.muth <- trigamma(muth)
psi1.onemuth <- trigamma(onemuth)
psi2.muth <- psigamma(muth,2)
psi2.onemuth <- psigamma(onemuth,2)
psi3.muth <- psigamma(muth,3)
psi3.onemuth <- psigamma(onemuth,3)
log.yoney <- log(y)-log1p(-y)
r <- list()
## get the quantities needed for IRLS.
## Dmu2eta2 is deriv of D w.r.t mu twice and eta twice,
## Dmu is deriv w.r.t. mu once, etc...
r$Dmu <- 2 * wt * theta* (psi0.muth - psi0.onemuth - log.yoney)
r$Dmu2 <- 2 * wt * theta^2*(psi1.muth+psi1.onemuth)
r$EDmu2 <- r$Dmu2
if (level>0) { ## quantities needed for first derivatives
r$Dth <- 2 * wt *theta*(-mu*log.yoney - log1p(-y)+ mu*psi0.muth+onemu*psi0.onemuth -psi0.th)
r$Dmuth <- r$Dmu + 2 * wt * theta^2*(mu*psi1.muth -onemu*psi1.onemuth)
r$Dmu3 <- 2 * wt *theta^3 * (psi2.muth - psi2.onemuth)
r$EDmu2th <- r$Dmu2th <- 2* r$Dmu2 + 2 * wt * theta^3* (mu*psi2.muth + onemu*psi2.onemuth)
}
if (level>1) { ## whole lot
r$Dmu4 <- 2 * wt *theta^4 * (psi3.muth+psi3.onemuth)
r$Dth2 <- r$Dth +2 * wt *theta^2* (mu^2*psi1.muth+ onemu^2*psi1.onemuth-psi1.th)
r$Dmuth2 <- r$Dmuth + 2 * wt *theta^2* (mu^2*theta*psi2.muth+ 2*mu*psi1.muth -
theta*onemu^2*psi2.onemuth - 2*onemu*psi1.onemuth)
r$Dmu2th2 <- 2*r$Dmu2th + 2* wt * theta^3* (mu^2*theta*psi3.muth +3*mu*psi2.muth+
onemu^2*theta*psi3.onemuth + 3*onemu*psi2.onemuth )
r$Dmu3th <- 3*r$Dmu3 + 2 * wt *theta^4*(mu*psi3.muth-onemu*psi3.onemuth)
}
r
} ## Dd betar
aic <- function(y, mu, theta=NULL, wt, dev) {
if (is.null(theta)) theta <- get(".Theta")
theta <- exp(theta)
muth <- mu*theta
term <- -lgamma(theta)+lgamma(muth)+lgamma(theta-muth)-(muth-1)*log(y)-
(theta-muth-1)*log1p(-y) ## `-' log likelihood for each observation
2 * sum(term * wt)
} ## aic betar
ls <- function(y,w,theta,scale) {
## the log saturated likelihood function for betar
## ls is defined as zero for REML/ML expression as deviance is defined as -2*log.lik
list(ls=0,## saturated log likelihood
lsth1=0, ## first deriv vector w.r.t theta - last element relates to scale
LSTH1 = matrix(0,length(y),1),
lsth2=0) ##Hessian w.r.t. theta
} ## ls betar
## preinitialization to reset G$y values of <=0 and >=1...
## code to evaluate in estimate.gam...
## reset G$y values of <=0 and >= 1 to eps and 1-eps...
#preinitialize <- NULL ## keep codetools happy
#eval(parse(text=paste("preinitialize <- expression({\n eps <- ",eps,
# "\n G$y[G$y >= 1-eps] <- 1 - eps\n G$y[G$y<= eps] <- eps })")))
preinitialize <- function(y,family) {
eps <- get(".betarEps")
y[y >= 1-eps] <- 1 - eps;y[y<= eps] <- eps
return(list(y=y))
}
# preinitialize <- expression({
# eps <- 1e-7
# G$y[G$y >= 1-eps] <- 1 - eps
# G$y[G$y<= eps] <- eps
# })
saturated.ll <- function(y,wt,theta=NULL){
## function to find the saturated loglik by Newton method,
## searching for the mu (on logit scale) that max loglik given theta and data...
gbh <- function(y,eta,phi,deriv=FALSE,a=1e-8,b=1-a) {
## local function evaluating log likelihood (l), gradient and second deriv
## vectors for beta... a and b are min and max mu values allowed.
## mu = (a + b*exp(eta))/(1+exp(eta))
ind <- eta>0
expeta <- mu <- eta;
expeta[ind] <- exp(-eta[ind]);expeta[!ind] <- exp(eta[!ind])
mu[ind] <- (a*expeta[ind] + b)/(1+expeta[ind])
mu[!ind] <- (a + b*expeta[!ind])/(1+expeta[!ind])
l <- dbeta(y,phi*mu,phi*(1-mu),log=TRUE)
if (deriv) {
g <- phi * log(y) - phi * log1p(-y) - phi * digamma(mu*phi) + phi * digamma((1-mu)*phi)
h <- -phi^2*(trigamma(mu*phi)+trigamma((1-mu)*phi))
dmueta2 <- dmueta1 <- eta
dmueta1 <- expeta*(b-a)/(1+expeta)^2
dmueta2 <- sign(eta)* ((a-b)*expeta+(b-a)*expeta^2)/(expeta+1)^3
h <- h * dmueta1^2 + g * dmueta2
g <- g * dmueta1
} else g=h=NULL
list(l=l,g=g,h=h,mu=mu)
} ## gbh
## now Newton loop...
eps <- get(".betarEps")
eta <- y
a <- eps;b <- 1 - eps
y[y<eps] <- eps;y[y>1-eps] <- 1-eps
eta[y<=eps*1.2] <- eps *1.2
eta[y>=1-eps*1.2] <- 1-eps*1.2
eta <- log((eta-a)/(b-eta))
mu <- LS <- ii <- 1:length(y)
for (i in 1:200) {
ls <- gbh(y,eta,theta,TRUE,a=eps/10)
conv <- abs(ls$g)<mean(abs(ls$l)+.1)*1e-8
if (sum(conv)>0) { ## some convergences occured
LS[ii[conv]] <- ls$l[conv] ## store converged
mu[ii[conv]] <- ls$mu[conv] ## store mu at converged
ii <- ii[!conv] ## drop indices
if (length(ii)>0) { ## drop the converged
y <- y[!conv];eta <- eta[!conv]
ls$l <- ls$l[!conv];ls$g <- ls$g[!conv];ls$h <- ls$h[!conv]
} else break ## nothing left to do
}
h <- -ls$h
hmin <- max(h)*1e-4
h[h<hmin] <- hmin ## make +ve def
delta <- ls$g/h ## step
ind <- abs(delta)>2
delta[ind] <- sign(delta[ind])*2 ## step length limit
ls1 <- gbh(y,eta+delta,theta,FALSE,a=eps/10); ## did it work?
ind <- ls1$l<ls$l ## failure index
k <- 0
while (sum(ind)>0&&k<20) { ## step halve only failed steps
k <- k + 1
delta[ind] <- delta[ind]/2
ls1$l[ind] <- gbh(y[ind],eta[ind]+delta[ind],theta,FALSE,a=eps/10)$l
ind <- ls1$l<ls$l
}
eta <- eta + delta
} ## end newton loop
if (length(ii)>0) {
LS[ii] <- ls$l
warning("saturated likelihood may be inaccurate")
}
list(f=sum(wt*LS),term=LS,mu=mu) ## fields f (sat lik) and term (individual datum sat lik) expected
} ## saturated.ll betar
## computes deviance, null deviance, family label
## requires prior weights, family, y, fitted values, offset, intercept indicator
postproc <- function(family,y,prior.weights,fitted,linear.predictors,offset,intercept) {
## code to evaluate in estimate.gam, to find the saturated
## loglik by Newton method
## searching for the mu (on logit scale) that max loglik given theta...
# wts <- object$prior.weights
theta <- family$getTheta(trans=TRUE) ## exp theta
lf <- family$saturated.ll(y, prior.weights,theta)
## storing the saturated loglik for each datum...
##object$family$data <- list(ls = lf$term,mu.ls = lf$mu)
l2 <- family$dev.resids(y,fitted,prior.weights)
posr <- list()
posr$deviance <- 2*lf$f + sum(l2)
wtdmu <- if (intercept) sum(prior.weights * y)/sum(prior.weights)
else family$linkinv(offset)
posr$null.deviance <- 2*lf$f + sum(family$dev.resids(y, wtdmu, prior.weights))
posr$family <-
paste("Beta regression(",round(theta,3),")",sep="")
posr
} ## postproc betar
initialize <- expression({
##n <- rep(1, nobs)
mustart <- y
})
residuals <- function(object,type=c("deviance","working","response","pearson")) {
if (type == "working") {
res <- object$residuals
} else if (type == "response") {
res <- object$y - object$fitted.values
} else if (type == "deviance") {
y <- object$y
mu <- object$fitted.values
wts <- object$prior.weights
lf <- object$family$saturated.ll(y, wts,object$family$getTheta(TRUE))
#object$family$data$ls <- lf$term
res <- 2*lf$term + object$family$dev.resids(y,mu,wts)
res[res<0] <- 0
s <- sign(y-mu)
res <- sqrt(res) * s
} else if (type == "pearson") {
mu <- object$fitted.values
res <- (object$y - mu)/object$family$variance(mu)^.5
}
res
} ## residuals betar
rd <- function(mu,wt,scale) {
## simulate data given fitted latent variable in mu
Theta <- exp(get(".Theta"))
r <- rbeta(n=length(mu),shape1=Theta*mu,shape2=Theta*(1-mu))
eps <- get(".betarEps")
r[r>=1-eps] <- 1 - eps
r[r<eps] <- eps
r
}
qf <- function(p,mu,wt,scale) {
Theta <- exp(get(".Theta"))
q <- qbeta(p,shape1=Theta*mu,shape2=Theta*(1-mu))
eps <- get(".betarEps")
q[q>=1-eps] <- 1 - eps
q[q<eps] <- eps
q
} ## rs betar
environment(dev.resids) <- environment(aic) <- environment(getTheta) <-
environment(rd)<- environment(qf) <- environment(variance) <- environment(putTheta) <-
environment(saturated.ll) <- environment(preinitialize) <- env
structure(list(family = "Beta regression", link = linktemp, linkfun = stats$linkfun,
linkinv = stats$linkinv, dev.resids = dev.resids,Dd=Dd, variance=variance,
aic = aic, mu.eta = stats$mu.eta, initialize = initialize,ls=ls,
preinitialize=preinitialize,postproc=postproc, residuals=residuals, saturated.ll=saturated.ll,
validmu = validmu, valideta = stats$valideta, n.theta=n.theta,
ini.theta = iniTheta,putTheta=putTheta,getTheta=getTheta,rd=rd,qf=qf),
class = c("extended.family","family"))
} ## betar
## scaled t (Natalya Pya) ...
scat <- function (theta = NULL, link = "identity",min.df = 3) {
## Extended family object for scaled t distribution
## length(theta)=2; log theta supplied.
## Written by Natalya Pya.
linktemp <- substitute(link)
if (!is.character(linktemp)) linktemp <- deparse(linktemp)
if (linktemp %in% c("identity", "log", "inverse")) stats <- make.link(linktemp)
else if (is.character(link)) {
stats <- make.link(link)
linktemp <- link
} else {
if (inherits(link, "link-glm")) {
stats <- link
if (!is.null(stats$name))
linktemp <- stats$name
}
else stop(linktemp, " link not available for scaled t distribution; available links are \"identity\", \"log\", and \"inverse\"")
}
## Theta <- NULL;
n.theta <- 2
if (!is.null(theta)&&sum(theta==0)==0) {
if (abs(theta[1])<=min.df) {
min.df <- 0.9*abs(theta[1])
warning("Supplied df below min.df. min.df reset")
}
if (sum(theta<0)) {
iniTheta <- c(log(abs(theta[1])-min.df),log(abs(theta[2]))) ## initial theta supplied
} else { ## fixed theta supplied
iniTheta <- c(log(theta[1]-min.df),log(theta[2]))
n.theta <- 0 ## no thetas to estimate
}
} else iniTheta <- c(-2,-1) ## inital log theta value
env <- new.env(parent = .GlobalEnv)
assign(".Theta", iniTheta, envir = env)
getTheta <- function(trans=FALSE) {
## trans transforms to the original scale...
th <- get(".Theta");min.df <- get(".min.df")
if (trans) { th <- exp(th); th[1] <- th[1] + min.df }
th
}
putTheta <- function(theta) assign(".Theta", theta,envir=environment(sys.function()))
assign(".min.df", min.df, envir = env)
variance <- function(mu) {
th <- get(".Theta")
min.df <- get(".min.df")
nu <- exp(th[1])+min.df; sig <- exp(th[2])
sig^2*nu/(nu-2)
}
validmu <- function(mu) all(is.finite(mu))
dev.resids <- function(y, mu, wt,theta=NULL) {
if (is.null(theta)) theta <- get(".Theta")
min.df <- get(".min.df")
nu <- exp(theta[1])+min.df; sig <- exp(theta[2])
wt * (nu + 1)*log1p((1/nu)*((y-mu)/sig)^2)
}
Dd <- function(y, mu, theta, wt, level=0) {
## derivatives of the scat deviance...
## ltheta <- theta
min.df <- get(".min.df")
nu <- exp(theta[1])+min.df; sig <- exp(theta[2])
nu1 <- nu + 1; ym <- y - mu; nu2 <- nu - min.df;
a <- 1 + (ym/sig)^2/nu
oo <- list()
## get the quantities needed for IRLS.
## Dmu2eta2 is deriv of D w.r.t mu twice and eta twice,
## Dmu is deriv w.r.t. mu once, etc...
nu1ym <- nu1*ym
sig2a <- sig^2*a
nusig2a <- nu*sig2a
f <- nu1ym/nusig2a
f1 <- ym/nusig2a
oo$Dmu <- -2 * wt * f
oo$Dmu2 <- 2 * wt * nu1*(1/nusig2a- 2*f1^2) # - 2*ym^2/(nu^2*sig^4*a^2)
term <- 2*nu1/sig^2/(nu+3)
n <- length(y)
oo$EDmu2 <- rep(term,n)
if (level>0) { ## quantities needed for first derivatives
nu1nusig2a <- nu1/nusig2a
nu2nu <- nu2/nu
fym <- f*ym; ff1 <- f*f1; f1ym <- f1*ym; fymf1 <- fym*f1
ymsig2a <- ym/sig2a
oo$EDmu2th <- oo$Dmu2th <- oo$Dmuth <- oo$Dth <- matrix(0,n,2)
oo$Dth[,1] <- 1 * wt * nu2 * (log(a) - fym/nu)
oo$Dth[,2] <- -2 * wt * fym
oo$Dmuth[,1] <- 2 * wt *(f - ymsig2a - fymf1)*nu2nu
oo$Dmuth[,2] <- 4* wt* f* (1- f1ym)
oo$Dmu3 <- 4 * wt * f * (3/nusig2a - 4*f1^2)
oo$Dmu2th[,1] <- 2* wt * (-nu1nusig2a + 1/sig2a + 5*ff1- 2*f1ym/sig2a - 4*fymf1*f1)*nu2nu
oo$Dmu2th[,2] <- 4*wt*(-nu1nusig2a + ff1*5 - 4*ff1*f1ym)
oo$EDmu3 <- rep(0,n)
oo$EDmu2th <- cbind(4/(sig^2*(nu+3)^2)*exp(theta[1]),-2*oo$EDmu2)
}
if (level>1) { ## whole lot
## nu1nu2 <- nu1*nu2;
nu1nu <- nu1/nu
fymf1ym <- fym*f1ym; f1ymf1 <- f1ym*f1
oo$Dmu4 <- 12 * wt * (-nu1nusig2a/nusig2a + 8*ff1/nusig2a - 8*ff1 *f1^2)
n2d <- 3 # number of the 2nd order derivatives
oo$Dmu3th <- matrix(0,n,2)
oo$Dmu2th2 <- oo$Dmuth2 <- oo$Dth2 <- matrix(0,n,n2d)
oo$Dmu3th[,1] <- 4*wt*(-6*f/nusig2a + 3*f1/sig2a + 18*ff1*f1 - 4*f1ymf1/sig2a - 12*nu1ym*f1^4)*nu2nu
oo$Dmu3th[,2] <- 48*wt* f* (- 1/nusig2a + 3*f1^2 - 2*f1ymf1*f1)
oo$Dth2[,1] <- 1*wt *(nu2*log(a) +nu2nu*ym^2*(-2*nu2-nu1+ 2*nu1*nu2nu - nu1*nu2nu*f1ym)/nusig2a) ## deriv of D w.r.t. theta1 theta1
oo$Dth2[,2] <- 2*wt*(fym - ym*ymsig2a - fymf1ym)*nu2nu ## deriv of D wrt theta1 theta2
oo$Dth2[,3] <- 4 * wt * fym *(1 - f1ym) ## deriv of D wrt theta2 theta2
term <- 2*nu2nu - 2*nu1nu*nu2nu -1 + nu1nu
oo$Dmuth2[,1] <- 2*wt*f1*nu2*(term - 2*nu2nu*f1ym + 4*fym*nu2nu/nu - fym/nu - 2*fymf1ym*nu2nu/nu)
oo$Dmuth2[,2] <- 4*wt* (-f + ymsig2a + 3*fymf1 - ymsig2a*f1ym - 2*fymf1*f1ym)*nu2nu
oo$Dmuth2[,3] <- 8*wt* f * (-1 + 3*f1ym - 2*f1ym^2)
oo$Dmu2th2[,1] <- 2*wt*nu2*(-term + 10*nu2nu*f1ym -
16*fym*nu2nu/nu - 2*f1ym + 5*nu1nu*f1ym - 8*nu2nu*f1ym^2 + 26*fymf1ym*nu2nu/nu -
4*nu1nu*f1ym^2 - 12*nu1nu*nu2nu*f1ym^3)/nusig2a
oo$Dmu2th2[,2] <- 4*wt*(nu1nusig2a - 1/sig2a - 11*nu1*f1^2 + 5*f1ym/sig2a + 22*nu1*f1ymf1*f1 -
4*f1ym^2/sig2a - 12*nu1*f1ymf1^2)*nu2nu
oo$Dmu2th2[,3] <- 8*wt * (nu1nusig2a - 11*nu1*f1^2 + 22*nu1*f1ymf1*f1 - 12*nu1*f1ymf1^2)
}
oo
} ## Dd scat
aic <- function(y, mu, theta=NULL, wt, dev) {
min.df <- get(".min.df")
if (is.null(theta)) theta <- get(".Theta")
nu <- exp(theta[1])+min.df; sig <- exp(theta[2])
term <- -lgamma((nu+1)/2)+ lgamma(nu/2) + log(sig*(pi*nu)^.5) +
(nu+1)*log1p(((y-mu)/sig)^2/nu)/2 ## `-'log likelihood for each observation
2 * sum(term * wt)
} ## aic scat
ls <- function(y,w,theta,scale) {
## the log saturated likelihood function for scat
## (Note these are correct but do not correspond to NP notes)
if (length(w)==1) w <- rep(w,length(y))
#vec <- !is.null(attr(theta,"vec.grad"))
min.df <- get(".min.df")
nu <- exp(theta[1])+min.df; sig <- exp(theta[2]); nu2 <- nu-min.df;
nu2nu <- nu2/nu; nu12 <- (nu+1)/2
term <- lgamma(nu12) - lgamma(nu/2) - log(sig*(pi*nu)^.5)
ls <- sum(term*w)
## first derivative wrt theta...
lsth2 <- matrix(0,2,2) ## rep(0, 3)
term <- nu2 * digamma(nu12)/2- nu2 * digamma(nu/2)/2 - 0.5*nu2nu
LSTH <- cbind(w*term,-1*w)
lsth <- colSums(LSTH)
## second deriv...
term <- nu2^2 * trigamma(nu12)/4 + nu2 * digamma(nu12)/2 -
nu2^2 * trigamma(nu/2)/4 - nu2 * digamma(nu/2)/2 + 0.5*(nu2nu)^2 - 0.5*nu2nu
lsth2[1,1] <- sum(term*w)
lsth2[1,2] <- lsth2[2,1] <- lsth2[2,2] <- 0
list(ls=ls,## saturated log likelihood
lsth1=lsth, ## first derivative vector wrt theta
LSTH1=LSTH,
lsth2=lsth2) ## Hessian wrt theta
} ## ls scat
preinitialize <- function(y,family) {
## initialize theta from raw observations..
if (family$n.theta>0) {
## low df and low variance promotes indefiniteness.
## Better to start with moderate df and fairly high
## variance...
Theta <- c(1.5, log(0.8*sd(y)))
return(list(Theta=Theta))
} ## otherwise fixed theta supplied
} ## preinitialize scat
initialize <- expression({
if (any(is.na(y))) stop("NA values not allowed for the scaled t family")
##n <- rep(1, nobs)
mustart <- y + (y == 0)*.1
})
postproc <- function(family,y,prior.weights,fitted,linear.predictors,offset,intercept) {
posr <- list()
posr$null.deviance <- find.null.dev(family,y,eta=linear.predictors,offset,prior.weights)
th <- round(family$getTheta(TRUE),3)
if (th[1]>999) th[1] <- Inf
posr$family <- paste("Scaled t(",paste(th,collapse=","),")",sep="")
posr
} ## postproc scat
rd <- function(mu,wt,scale) {
## simulate data given fitted latent variable in mu
theta <- get(".Theta");min.df <- get(".min.df")
nu <- exp(theta[1])+min.df; sig <- exp(theta[2])
n <- length(mu)
stats::rt(n=n,df=nu)*sig + mu
} ## rd scat
environment(dev.resids) <- environment(aic) <- environment(getTheta) <- environment(Dd) <-
environment(ls) <- environment(rd)<- environment(variance) <- environment(putTheta) <- env
structure(list(family = "scaled t", link = linktemp, linkfun = stats$linkfun,
linkinv = stats$linkinv, dev.resids = dev.resids,Dd=Dd,variance=variance,postproc=postproc,
aic = aic, mu.eta = stats$mu.eta, initialize = initialize,ls=ls, preinitialize=preinitialize,
validmu = validmu, valideta = stats$valideta,n.theta=n.theta,
ini.theta = iniTheta,putTheta=putTheta,getTheta=getTheta, rd=rd),
class = c("extended.family","family"))
} ## scat
## zero inflated Poisson (Simon Wood)...
lind <- function(l,th,deriv=0,k=0) {
## evaluate th[1] + exp(th[2])*l and some derivs
th[2] <- exp(th[2])
r <- list(p = th[1] + (k+th[2])*l)
r$p.l <- k + th[2] ## p_l
r$p.ll <- 0 ## p_ll
if (deriv) {
n <- length(l);
r$p.lllth <- r$p.llth <- r$p.lth <- r$p.th <- matrix(0,n,2)
r$p.th[,1] <- 1 ## dp/dth1
r$p.th[,2] <- th[2]*l ## dp/dth2
r$p.lth[,2] <- th[2] ## p_lth2
r$p.llll <- r$p.lll <- 0 ## p_lll,p_llll
r$p.llth2 <- r$p.lth2 <- r$p.th2 <- matrix(0,n,3) ## ordered l_th1th1,l_th1th2,l_th2th2
r$p.th2[,3] <- l*th[2] ## p_th2th2
r$p.lth2[,3] <- th[2] ## p_lth2th2
}
r
} ## lind
logid <- function(l,th,deriv=0,a=0,trans=TRUE) {
## evaluate exp(th[1]+th[2]*l)/(1+exp(th[1]+th[2]*l))
## and some of its derivatives
## if trans==TRUE then it is assumed that the
## transformation th[2] = exp(th[2]) is applied on input
b <- 1-2*a
## x is dth[2]/dth[2]' where th[2]' is input version, xx is second deriv over first
if (trans) { xx <- 1; x <- th[2] <- exp(th[2])} else { x <- 1;xx <- 0}
p <- f <- th[1] + th[2] * l
ind <- f > 0; ef <- exp(f[!ind])
p[!ind] <- ef/(1+ef); p[ind] <- 1/(1+exp(-f[ind]))
r <- list(p = a + b * p)
a1 <- p*(1-p); a2 <- p*(p*(2*p-3)+1)
r$p.l <- b * th[2]*a1; ## p_l
r$p.ll <- b * th[2]^2*a2 ## p_ll
if (deriv>0) {
n <- length(l); r$p.lth <- r$p.th <- matrix(0,n,2)
r$p.th[,1] <- b * a1 ## dp/dth1
r$p.th[,2] <- b * l*a1 * x ## dp/dth2
r$p.lth[,1] <- b * th[2]*a2 ## p_lth1
r$p.lth[,2] <- b * (l*th[2]*a2 + a1) * x ## p_lth2
a3 <- p*(p*(p*(-6*p + 12) -7)+1)
r$p.lll <- b * th[2]^3*a3 ## p_lll
r$p.llth <- matrix(0,n,2)
r$p.llth[,1] <- b * th[2]^2 * a3 ## p_llth1
r$p.llth[,2] <- b * (l*th[2]^2*a3 + 2*th[2]*a2) * x ## p_ppth2
a4 <- p*(p*(p*(p*(p*24-60)+50)-15)+1)
r$p.llll <- b * th[2]^4*a4 ## p_llll
r$p.lllth <- matrix(0,n,2)
r$p.lllth[,1] <- b * th[2]^3*a4 ## p_lllth1
r$p.lllth[,2] <- b * (th[2]^3*l*a4 + 3*th[2]^2*a3) * x ## p_lllth2
r$p.llth2 <- r$p.lth2 <- r$p.th2 <- matrix(0,n,3) ## ordered l_th1th1,l_th1th2,l_th2th2
r$p.th2[,1] <- b * a2 ## p_th1th1
r$p.th2[,2] <- b * l*a2 * x ## p_th1th2
r$p.th2[,3] <- b * l*l*a2 * x * x + xx* r$p.th[,2] ## p_th2th2
r$p.lth2[,1] <- b * th[2]*a3 ## p_lth1th1
r$p.lth2[,2] <- b * (th[2]*l*a3 + a2) * x ## p_lth1th2
r$p.lth2[,3] <- b * (l*l*a3*th[2] + 2*l*a2) *x * x + xx*r$p.lth[,2] ## p_lth2th2
r$p.llth2[,1] <- b * th[2]^2*a4 ## p_llth1th1
r$p.llth2[,2] <- b * (th[2]^2*l*a4 + 2*th[2]*a3) *x ## p_llth1th2
r$p.llth2[,3] <- b * (l*l*th[2]^2*a4 + 4*l*th[2]*a3 + 2*a2) *x*x + xx*r$p.llth[,2] ## p_llth2th2
}
r
} ## logid
ziP <- function (theta = NULL, link = "identity",b=0) {
## zero inflated Poisson parameterized in terms of the log Poisson parameter, gamma.
## eta = theta[1] + exp(theta[2])*gamma), and 1-p = exp(-exp(eta)) where p is
## probability of presence.
linktemp <- substitute(link)
if (!is.character(linktemp)) linktemp <- deparse(linktemp)
if (linktemp %in% c("identity")) {
stats <- make.link(linktemp)
} else stop(linktemp, " link not available for zero inflated; available link for `lambda' is only \"loga\"")
## Theta <- NULL;
n.theta <- 2
if (!is.null(theta)) {
## fixed theta supplied
iniTheta <- c(theta[1],theta[2])
n.theta <- 0 ## no thetas to estimate
} else iniTheta <- c(0,0) ## inital theta value - start at Poisson
env <- new.env(parent = environment(ziP))# new.env(parent = .GlobalEnv)
if (b<0) b <- 0; assign(".b", b, envir = env)
assign(".Theta", iniTheta, envir = env)
getTheta <- function(trans=FALSE) {
## trans transforms to the original scale...
th <- get(".Theta")
if (trans) {
th[2] <- get(".b") + exp(th[2])
}
th
}
putTheta <- function(theta) assign(".Theta", theta,envir=environment(sys.function()))
validmu <- function(mu) all(is.finite(mu))
dev.resids <- function(y, mu, wt,theta=NULL) {
## this version ignores saturated likelihood
if (is.null(theta)) theta <- get(".Theta")
b <- get(".b")
p <- theta[1] + (b + exp(theta[2])) * mu ## l.p. for prob present
-2*zipll(y,mu,p,deriv=0)$l
}
Dd <- function(y, mu, theta, wt=NULL, level=0) {
## here mu is lin pred for Poisson mean so E(y) = exp(mu)
## Deviance for log lik of zero inflated Poisson.
## code here is far more general than is needed - could deal
## with any 2 parameter mapping of lp of mean to lp of prob presence.
if (is.null(theta)) theta <- get(".Theta")
deriv <- 1; if (level==1) deriv <- 2 else if (level>1) deriv <- 4
b <- get(".b")
g <- lind(mu,theta,level,b) ## the derviatives of the transform mapping mu to p
z <- zipll(y,mu,g$p,deriv)
oo <- list();n <- length(y)
if (is.null(wt)) wt <- rep(1,n)
oo$Dmu <- -2*wt*(z$l1[,1] + z$l1[,2]*g$p.l)
oo$Dmu2 <- -2*wt*(z$l2[,1] + 2*z$l2[,2]*g$p.l + z$l2[,3]*g$p.l^2 + z$l1[,2]*g$p.ll)
## WARNING: following requires z$El1 term to be added if transform modified so
## that g$p.ll != 0....
oo$EDmu2 <- -2*wt*(z$El2[,1] + 2*z$El2[,2]*g$p.l + z$El2[,3]*g$p.l^2)
if (level>0) { ## l,p - ll,lp,pp - lll,llp,lpp,ppp - llll,lllp,llpp,lppp,pppp
oo$Dth <- -2*wt*z$l1[,2]*g$p.th ## l_p p_th
oo$Dmuth <- -2*wt*(z$l2[,2]*g$p.th + z$l2[,3]*g$p.l*g$p.th + z$l1[,2]*g$p.lth)
oo$Dmu2th <- -2*wt*(z$l3[,2]*g$p.th + 2*z$l3[,3]*g$p.l*g$p.th + 2* z$l2[,2]*g$p.lth +
z$l3[,4]*g$p.l^2*g$p.th + z$l2[,3]*(2*g$p.l*g$p.lth + g$p.th*g$p.ll) + z$l1[,2]*g$p.llth)
oo$Dmu3 <- -2*wt*(z$l3[,1] + 3*z$l3[,2]*g$p.l + 3*z$l3[,3]*g$p.l^2 + 3*z$l2[,2]*g$p.ll +
z$l3[,4]*g$p.l^3 +3*z$l2[,3]*g$p.l*g$p.ll + z$l1[,2]*g$p.lll)
}
if (level>1) {
p.thth <- matrix(0,n,3);p.thth[,1] <- g$p.th[,1]^2
p.thth[,2] <- g$p.th[,1]*g$p.th[,2];p.thth[,3] <- g$p.th[,2]^2
oo$Dth2 <- -2*wt*(z$l2[,3]*p.thth + z$l1[,2]*g$p.th2)
p.lthth <- matrix(0,n,3);p.lthth[,1] <- g$p.th[,1]*g$p.lth[,1]*2
p.lthth[,2] <- g$p.th[,1]*g$p.lth[,2] + g$p.th[,2]*g$p.lth[,1];
p.lthth[,3] <- g$p.th[,2]*g$p.lth[,2]*2
oo$Dmuth2 <- -2*wt*( z$l3[,3]*p.thth + z$l2[,2]*g$p.th2 + z$l3[,4]*g$p.l*p.thth +
z$l2[,3]*(g$p.th2*g$p.l + p.lthth) + z$l1[,2]*g$p.lth2)
p.lthlth <- matrix(0,n,3);p.lthlth[,1] <- g$p.lth[,1]*g$p.lth[,1]*2
p.lthlth[,2] <- g$p.lth[,1]*g$p.lth[,2] + g$p.lth[,2]*g$p.lth[,1];
p.lthlth[,3] <- g$p.lth[,2]*g$p.lth[,2]*2
p.llthth <- matrix(0,n,3);p.llthth[,1] <- g$p.th[,1]*g$p.llth[,1]*2
p.llthth[,2] <- g$p.th[,1]*g$p.llth[,2] + g$p.th[,2]*g$p.llth[,1];
p.llthth[,3] <- g$p.th[,2]*g$p.llth[,2]*2
oo$Dmu2th2 <- -2*wt*(z$l4[,3]*p.thth + z$l3[,2]*g$p.th2 + 2*z$l4[,4] * p.thth *g$p.l + 2*z$l3[,3]*(g$p.th2*g$p.l + p.lthth) +
2*z$l2[,2]*g$p.lth2 + z$l4[,5]*p.thth*g$p.l^2 + z$l3[,4]*(g$p.th2*g$p.l^2 + 2*p.lthth*g$p.l + p.thth*g$p.ll) +
z$l2[,3]*(p.lthlth + 2*g$p.l*g$p.lth2 + p.llthth + g$p.th2*g$p.ll) + z$l1[,2]*g$p.llth2)
oo$Dmu3th <- -2*wt*(z$l4[,2]*g$p.th + 3*z$l4[,3]*g$p.th*g$p.l + 3*z$l3[,2]*g$p.lth + 2*z$l4[,4]*g$p.th*g$p.l^2 +
z$l3[,3]*(6*g$p.lth*g$p.l + 3*g$p.th*g$p.ll) + 3*z$l2[,2]*g$p.llth + z$l4[,4]*g$p.th*g$p.l^2 +
z$l4[,5]*g$p.th*g$p.l^3 + 3*z$l3[,4]*(g$p.l^2*g$p.lth + g$p.th*g$p.l*g$p.ll) +
z$l2[,3]*(3*g$p.lth*g$p.ll + 3*g$p.l*g$p.llth + g$p.th*g$p.lll) + z$l1[,2]*g$p.lllth)
oo$Dmu4 <- -2*wt*(z$l4[,1] + 4*z$l4[,2]*g$p.l + 6*z$l4[,3]*g$p.l^2 + 6*z$l3[,2]*g$p.ll +
4*z$l4[,4]*g$p.l^3 + 12*z$l3[,3]*g$p.l*g$p.ll + 4*z$l2[,2]*g$p.lll + z$l4[,5] * g$p.l^4 +
6*z$l3[,4]*g$p.l^2*g$p.ll + z$l2[,3] *(4*g$p.l*g$p.lll + 3*g$p.ll^2) + z$l1[,2]*g$p.llll)
}
oo
} ## Dd ziP
aic <- function(y, mu, theta=NULL, wt, dev) {
if (is.null(theta)) theta <- get(".Theta")
b <- get(".b")
p <- theta[1] + (b+ exp(theta[2])) * mu ## l.p. for prob present
sum(-2*wt*zipll(y,mu,p,0)$l)
}
ls <- function(y,w,theta,scale) {
## the log saturated likelihood function for ziP
## ls is defined as zero for REML/ML expression as deviance is defined as -2*log.lik
#vec <- !is.null(attr(theta,"vec.grad"))
#lsth1 <- if (vec) matrix(0,length(y),2) else c(0,0)
list(ls=0,## saturated log likelihood
lsth1=c(0,0), ## first deriv vector w.r.t theta - last element relates to scale
LSTH1=matrix(0,length(y),2),
lsth2=matrix(0,2,2)) ##Hessian w.r.t. theta
} ## ls ZiP
initialize <- expression({
if (any(y < 0)) stop("negative values not allowed for the zero inflated Poisson family")
if (all.equal(y,round(y))!=TRUE) {
stop("Non-integer response variables are not allowed with ziP ")
}
if ((min(y)==0&&max(y)==1)) stop("Using ziP for binary data makes no sense")
##n <- rep(1, nobs)
mustart <- log(y + (y==0)/5)
})
## compute family label, deviance, null.deviance...
## requires prior weights, y, family, linear predictors
postproc <- function(family,y,prior.weights,fitted,linear.predictors,offset,intercept) {
posr <- list()
posr$family <-
paste("Zero inflated Poisson(",paste(round(family$getTheta(TRUE),3),collapse=","),")",sep="")
## need to fix deviance here!!
## wts <- object$prior.weights
lf <- family$saturated.ll(y,family,prior.weights)
## storing the saturated loglik for each datum...
##object$family$data <- list(ls = lf)
l2 <- family$dev.resids(y,linear.predictors,prior.weights)
posr$deviance <- sum(l2-lf)
fnull <- function(gamma,family,y,wt) {
## evaluate deviance for single parameter model
sum(family$dev.resids(y, rep(gamma,length(y)), wt))
}
meany <- mean(y)
posr$null.deviance <-
optimize(fnull,interval=c(meany/5,meany*3),family=family,y=y,wt = prior.weights)$objective - sum(lf)
## object$weights <- pmax(0,object$working.weights) ## Fisher can be too extreme
## E(y) = p * E(y) - but really can't mess with fitted.values if e.g. rd is to work.
posr
} ## postproc ZiP
rd <- function(mu,wt,scale) {
## simulate data given fitted latent variable in mu
rzip <- function(gamma,theta) { ## generate ziP deviates according to model and lp gamma
y <- gamma; n <- length(y)
lambda <- exp(gamma)
mlam <- max(c(lambda[is.finite(lambda)],.Machine$double.eps^.2))
lambda[!is.finite(lambda)] <- mlam
b <- get(".b")
eta <- theta[1] + (b+exp(theta[2]))*gamma
p <- 1- exp(-exp(eta))
ind <- p > runif(n)
y[!ind] <- 0
#np <- sum(ind)
## generate from zero truncated Poisson, given presence...
lami <- lambda[ind]
yi <- p0 <- dpois(0,lami)
nearly1 <- 1 - .Machine$double.eps*10
ii <- p0 > nearly1
yi[ii] <- 1 ## lambda so low that almost certainly y=1
yi[!ii] <- qpois(runif(sum(!ii),p0[!ii],nearly1),lami[!ii])
y[ind] <- yi
y
}
rzip(mu,get(".Theta"))
} ## rd ZiP
saturated.ll <- function(y,family,wt=rep(1,length(y))) {
## function to get saturated ll for ziP -
## actually computes -2 sat ll.
pind <- y>0 ## only these are interesting
wt <- wt[pind]
y <- y[pind];
mu <- log(y)
keep.on <- TRUE
theta <- family$getTheta()
r <- family$Dd(y,mu,theta,wt)
l <- family$dev.resids(y,mu,wt,theta)
lmax <- max(abs(l))
ucov <- abs(r$Dmu) > lmax*1e-7
k <- 0
while (keep.on) {
step <- -r$Dmu/r$Dmu2
step[!ucov] <- 0
mu1 <- mu + step
l1 <- family$dev.resids(y,mu1,wt,theta)
ind <- l1>l & ucov
kk <- 0
while (sum(ind)>0&&kk<50) {
step[ind] <- step[ind]/2
mu1 <- mu + step
l1 <- family$dev.resids(y,mu1,wt,theta)
ind <- l1>l & ucov
kk <- kk + 1
}
mu <- mu1;l <- l1
r <- family$Dd(y,mu,theta,wt)
ucov <- abs(r$Dmu) > lmax*1e-7
k <- k + 1
if (all(!ucov)||k==100) keep.on <- FALSE
}
l1 <- rep(0,length(pind));l1[pind] <- l
l1
} ## saturated.ll ZiP
residuals <- function(object,type=c("deviance","working","response")) {
if (type == "working") {
res <- object$residuals
} else if (type == "response") {
res <- object$y - predict.gam(object,type="response")
} else if (type == "deviance") {
y <- object$y
mu <- object$linear.predictors
wts <- object$prior.weights
res <- object$family$dev.resids(y,mu,wts)
## next line is correct as function returns -2*saturated.log.lik
res <- res - object$family$saturated.ll(y,object$family,wts)
fv <- predict.gam(object,type="response")
s <- attr(res,"sign")
if (is.null(s)) s <- sign(y-fv)
res <- as.numeric(sqrt(pmax(res,0)) * s)
}
res
} ## residuals (ziP)
predict <- function(family,se=FALSE,eta=NULL,y=NULL,X=NULL,
beta=NULL,off=NULL,Vb=NULL) {
## optional function to give predicted values - idea is that
## predict.gam(...,type="response") will use this, and that
## either eta will be provided, or {X, beta, off, Vb}. family$data
## contains any family specific extra information.
theta <- family$getTheta()
if (is.null(eta)) { ## return probabilities
discrete <- is.list(X)
## linear predictor for poisson parameter...
gamma <- off + if (discrete) Xbd(X$Xd,beta,k=X$kd,ks=X$ks,ts=X$ts,dt=X$dt,v=X$v,qc=X$qc,drop=X$drop) else drop(X%*%beta)
if (se) {
se <- if (discrete) sqrt(pmax(0,diagXVXd(X$Xd,Vb,k=X$kd,ks=X$ks,ts=X$ts,dt=X$dt,v=X$v,qc=X$qc,drop=X$drop,nthreads=1))) else
sqrt(pmax(0,rowSums((X%*%Vb)*X))) ## se of lin pred
} else se <- NULL
#gamma <- drop(X%*%beta + off) ## linear predictor for poisson parameter
#se <- if (se) drop(sqrt(pmax(0,rowSums((X%*%Vb)*X)))) else NULL ## se of lin pred
} else { se <- NULL; gamma <- eta}
## now compute linear predictor for probability of presence...
b <- get(".b")
eta <- theta[1] + (b+exp(theta[2]))*gamma
et <- exp(eta)
mu <- p <- 1 - exp(-et)
fv <- lambda <- exp(gamma)
ind <- gamma < log(.Machine$double.eps)/2
mu[!ind] <- lambda[!ind]/(1-exp(-lambda[!ind]))
mu[ind] <- 1
fv <- list(p*mu) ## E(y)
if (is.null(se)) return(fv) else {
dp.dg <- p
# ind <- eta < log(.Machine$double.xmax)/2
# dp.dg[!ind] <- 0
dp.dg <- exp(-et)*et*(b + exp(theta[2]))
dmu.dg <- (lambda + 1)*mu - mu^2
fv[[2]] <- abs(dp.dg*mu+dmu.dg*p)*se
names(fv) <- c("fit","se.fit")
return(fv)
}
} ## predict ZiP
environment(saturated.ll) <- environment(dev.resids) <- environment(Dd) <-
environment(aic) <- environment(getTheta) <- environment(rd) <- environment(predict) <-
environment(putTheta) <- env
structure(list(family = "zero inflated Poisson", link = linktemp, linkfun = stats$linkfun,
linkinv = stats$linkinv, dev.resids = dev.resids,Dd=Dd, rd=rd,residuals=residuals,
aic = aic, mu.eta = stats$mu.eta, g2g = stats$g2g,g3g=stats$g3g, g4g=stats$g4g,
#preinitialize=preinitialize,
initialize = initialize,postproc=postproc,ls=ls,no.r.sq=TRUE,
validmu = validmu, valideta = stats$valideta,n.theta=n.theta,predict=predict,
ini.theta = iniTheta,putTheta=putTheta,getTheta=getTheta,saturated.ll = saturated.ll),
class = c("extended.family","family"))
} ## ziP
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.