# R/gamlss.r In mgcv: Mixed GAM Computation Vehicle with Automatic Smoothness Estimation

#### Documented in gamlss.etamugamlss.gHgammalsgaulssgevlssgumblsmultinomshashtrind.generatortwlsszipllziplss

```## (c) Simon N. Wood (2013-2022) distributed under GPL2
## Code for the gamlss families.
## idea is that there are standard functions converting
## derivatives w.r.t. mu to derivatives w.r.t. eta, given
## given the links and derivatives.
## Then there are standard routines to take the family
## specific derivatives and the model matrices, and convert
## these to the required gradient, hessian, etc...

## Storage convections:
## 1. A single model matrix is stored, along with a single param vector.
##    an index object associates columns with particular gamlss predictors.
## 2. Distribution specific derivatives are stored in d1l-d4l.

## Need to somehow record block starts...
## idea is that if n blocks are stored using loops with the
## given l >= k >= j >= i structure then the block for index
## i,j,k,l starts at i4[i,j,k,l]*n+1, given symmetry over the indices.

trind.generator <- function(K=2,ifunc=FALSE,reverse=!ifunc) {
## Generates index arrays for 'upper triangular' storage up to order 4
## Suppose you fill an array using code like...
## m = 1
## for (i in 1:K) for (j in i:K) for (k in j:K) for (l in k:K) {
##   a[,m] <- something; m <- m+1 }
## ... and do this because actually the same 'something' would
## be stored for any permutation of the indices i,j,k,l.
## Clearly in storage we have the restriction l>=k>=j>=i,
## but for access we want no restriction on the indices.
## i4[i,j,k,l] produces the appropriate m for unrestricted
## indices. i3 and i2 do the same for 3d and 2d arrays.
## if ifunc==TRUE then rather than index arrays, index functions
## are returned, so e.g.i4(i,j,k,l) is equivalent to above.
## Index functions require less storage for high K.
## ixr will extract the unique elements from an x dimensional
## upper triangular array in the correct order.
## Note that there are K*(K+1)/2 unique entries in a 2d array
## (K*(K+3)+2)*K/6 in a 2d array and (6+K*11+K^2*6+K^3)*K/24
## in a 4d array.
m.start <- 1
if (ifunc) { ## return index functions
eval(parse(text= paste("i2 <- function(i,j) {\n",
"  if (i>j) {ii <- i;i <- j;j <- ii}\n",
"  (i-1)*(",2*K+2,"-i)/2 +j-i+1\n}",sep="")))

eval(parse(text=paste("i3 <- function(i,j,k) {\n",
"  if (i>j||j>k) { \n    ii <- sort(c(i,j,k))\n",
"    i <- ii[1];j <- ii[2];k <- ii[3]\n  }\n",
"  (i-1)*(",3*K*(K+1),"+(i-2)*(i-",3*(K+1),"))/6 + (j-i)*(",2*K+3,"-i-j)/2+k-j+1 \n}",
sep="")))

eval(parse(text=paste("i4 <- function(i,j,k,l) {\n",
"  if (i>j||j>k||k>l) { \n    ii <- sort(c(i,j,k,l))\n",
"    i <- ii[1];j <- ii[2];k <- ii[3];l <- ii[4]\n  }\n",
"  i1 <- i-1;i2 <- i-2; i1i2 <- i1*i2/2\n",
"  l-k+1 + (k-j)*(",2*K+3,"-j-k)/2 +",
"  (j-i)*(3*(",K+1,"-i)^2+3*(",K+1,"-i) + (j-i-1)*(j+2*i-",3*K+5,"))/6 +\n",
"  (i1*",K^3+3*K^2+2*K,"+i1i2*(",K+1,"*(2*i-3) - ",3*K^2+6*K+2,"-i1i2))/6\n}",
sep="")))
} else { ## return index arrays
i4 <- array(0,dim=c(K,K,K,K))
m <- m.start
for (i in 1:K) for (j in i:K) for (k in j:K) for (l in k:K) {
i4[i,j,k,l] <- i4[i,j,l,k] <- i4[i,k,l,j] <- i4[i,k,j,l] <- i4[i,l,j,k] <-
i4[i,l,k,j] <- i4[j,i,k,l] <- i4[j,i,l,k] <- i4[j,k,l,i] <- i4[j,k,i,l] <-
i4[j,l,i,k] <- i4[j,l,k,i] <- i4[k,j,i,l] <- i4[k,j,l,i] <- i4[k,i,l,j] <-
i4[k,i,j,l] <- i4[k,l,j,i] <- i4[k,l,i,j] <- i4[l,j,k,i] <- i4[l,j,i,k] <-
i4[l,k,i,j] <- i4[l,k,j,i] <- i4[l,i,j,k] <- i4[l,i,k,j] <- m
m <- m + 1
}

i3 <- array(0,dim=c(K,K,K))
m <- m.start
for (j in 1:K) for (k in j:K) for (l in k:K) {
i3[j,k,l] <- i3[j,l,k] <- i3[k,l,j] <- i3[k,j,l] <- i3[l,j,k] <-
i3[l,k,j] <- m
m <- m + 1
}

i2 <- array(0,dim=c(K,K))
m <- m.start
for (k in 1:K) for (l in k:K) {
i2[k,l] <- i2[l,k] <- m
m <- m + 1
}
}
## now create the reverse indices...
if (reverse) {
m <- m.start
maxi4 <- if (ifunc) i4(K,K,K,K) else i4[K,K,K,K]
i4r <- rep(0,maxi4) ## extracts the unique elements from a symmetric array in packing order.
for (i in 1:K) for (j in i:K) for (k in j:K) for (l in k:K) {
i4r[m] <- l + (k-1)*K + (j-1)*K^2 + (i-1)*K^3
m <- m + 1
}
m <- m.start
maxi3 <- if (ifunc) i3(K,K,K) else i3[K,K,K]
i3r <- rep(0,maxi3) ## extracts the unique elements from a symmetric array in packing order.
for (j in 1:K) for (k in j:K) for (l in k:K) {
i3r[m] <- l + (k-1)*K + (j-1)*K^2
m <- m + 1
}
m <- m.start
maxi2 <- if (ifunc) i2(K,K) else i2[K,K]
i2r <- rep(0,maxi2) ## extracts the unique elements from a symmetric array in packing order.
for (k in 1:K) for (l in k:K) {
i2r[m] <- l + (k-1)*K
m <- m + 1
}
} else i2r <- i3r <- i4r <- NULL
list(i2=i2,i3=i3,i4=i4,i2r=i2r,i3r=i3r,i4r=i4r)
} ## trind.generator

gamlss.ncv <- function(X,y,wt,nei,beta,family,llf,H=NULL,Hi=NULL,R=NULL,offset=NULL,dH=NULL,db=NULL,deriv=FALSE,nt=1) {
## computes the neighbourhood cross validation score and its derivative for a
## gamlss model. llf is what was returned by family\$ll when ncv info requested.
## If derivs not required then ll must be called with deriv >=1, otherwise deriv >= 3.
## To enable NCV for a gamlss family:
## 1. the 'll' function must be modified to have an 'ncv' argument. When this is TRUE and
##    deriv!=0 then ll should return l1, l2 and l3 the derivatives of the log likelihood
##    w.r.t. the linear preditors (typically returned from gamlss.mueta).
## 2. the 'll' function must have an eta argument allowing the linear predictors to be
##    supplied directly, rather than being computed from X and beta.
## 3. The family must contain an 'ncv' wrapper function, which simply calls this function.
## ... gaulss provides an example.
jj <- attr(X,"lpi") ## extract linear predictor index
nlp <- length(jj); n <- nrow(X)
if (deriv>0) {
nsp <- ncol(db)
deta <- matrix(0,n*nlp,nsp)
ind <- 1:n
for (i in 1:nlp) {
deta[ind,] <- X[,jj[[i]],drop=FALSE] %*% db[jj[[i]],,drop=FALSE]
ind <- ind + n
}
} else deta <- 0.0
## debug section
eta <- matrix(0,n,nlp)
for (i in 1:nlp) eta[,i] <- X[,jj[[i]],drop=FALSE] %*% beta[jj[[i]]]
## end debug
nm <- length(nei\$i)
eta.cv <- matrix(0,nm,nlp)

deta.cv <- if (deriv>0) matrix(0,nm*nlp,nsp) else if (deriv<0) matrix(0,length(nei\$m),length(beta))  else 0.0
if (is.null(R)) {
cg.iter <- .Call(C_ncvls,X,jj,H,Hi,dH,llf\$l1,llf\$l2,llf\$l3,nei\$i-1,nei\$mi,nei\$m,nei\$k-1,beta,eta.cv,deta.cv,
deta,db,deriv)
} else {
cg.iter <- .Call(C_Rncvls,X,jj,R,dH,llf\$l1,llf\$l2,llf\$l3,nei\$i-1,nei\$mi,nei\$m,nei\$k-1,beta,eta.cv,deta.cv,
deta,db,deriv,.Machine\$double.eps,nt)
}
if (!is.null(offset)) {
for (i in 1:ncol(eta.cv)) if (i <= length(offset)&&!is.null(offset[[i]])) eta.cv[,i] <- eta.cv[,i] + offset[[i]][nei\$i]
}
## ll must be set up to return l1..l3 as derivs w.r.t. linear predictors if ncv=TRUE
ncv1 <- NULL
gamma <- llf\$gamma;qapprox <- family\$qapprox
dev <- if (gamma!=1||qapprox) -sum(family\$ll(y,X,beta,wt,family,offset,deriv=0,db,eta=eta,ncv=TRUE)\$l0[nei\$i]) else 0
if (qapprox) { ## quadratic approximate version
ncv <-  dev - gamma*sum(llf\$l1[nei\$i,]*(eta.cv-eta[nei\$i,]))
k <- 0
for (i in 1:nlp) for (j in i:nlp) {
k <- k  + 1
ncv <- ncv - 0.5*gamma*(1+(i!=j))*sum(llf\$l2[nei\$i,k]*(eta.cv[,i]-eta[nei\$i,i])*(eta.cv[,j]-eta[nei\$i,j])) ## symmetric term
}
if (deriv>0) {
#ncv1 <- -colSums(as.numeric(llf\$l1[nei\$i,])*(deta.cv*gamma+(1-gamma)*deta))
rowk <- 1:nm
ncv1 <- -as.numeric(llf\$l1[nei\$i,1])*(deta.cv[rowk,]*gamma+(1-gamma)*deta[rowk,])
if (nlp>1) for (j in 2:nlp) {
rowk <- rowk + nm
ncv1 <- ncv1 - as.numeric(llf\$l1[nei\$i,j])*(deta.cv[rowk,]*gamma+(1-gamma)*deta[rowk,])
}
kk <- 0;jj <- 0
for (j in 1:nlp) for (k in j:nlp) {
kk <- kk  + 1
rowk <- 1:nm+(k-1)*nm
rowj <- 1:nm+(j-1)*nm
#ncv1 <- ncv1 - colSums(llf\$l2[nei\$i,kk]*((deta[1:nm+(k-1)*nm,] + deta.cv[1:nm+(k-1)*nm,])*(eta.cv[,j]-eta[nei\$i,j]) +
#                   (eta.cv[,k]-eta[nei\$i,k])*(deta.cv[1:nm+(j-1)*nm,] - deta[nei\$i+(j-1)*n,])))*gamma*.5
ncv1 <- ncv1 - llf\$l2[nei\$i,kk]*((deta[rowk,] + deta.cv[rowk,])*(eta.cv[,j]-eta[nei\$i,j]) +
(eta.cv[,k]-eta[nei\$i,k])*(deta.cv[rowj,] - deta[nei\$i+(j-1)*n,]))*gamma*.5
#if (j!=k) ncv1 <- ncv1 - colSums(llf\$l2[nei\$i,kk]*((deta[1:nm+(j-1)*nm,] + deta.cv[1:nm+(j-1)*nm,])*(eta.cv[,k]-eta[nei\$i,k]) +
#                   (eta.cv[,j]-eta[nei\$i,j])*(deta.cv[1:nm+(k-1)*nm,] - deta[nei\$i+(k-1)*n,])))*gamma*.5
if (j!=k) ncv1 <- ncv1 - llf\$l2[nei\$i,kk]*((deta[rowj,] + deta.cv[rowj,])*(eta.cv[,k]-eta[nei\$i,k]) +
(eta.cv[,j]-eta[nei\$i,j])*(deta.cv[rowk,] - deta[nei\$i+(k-1)*n,]))*gamma*.5
for (l in k:nlp) {
jj <- jj + 1
#ncv1 <- ncv1 - (1+(j!=k)) * gamma*.5 * colSums(
#        llf\$l3[nei\$i,jj]*deta[nei\$i+(l-1)*n,]*(eta.cv[,k]-eta[nei\$i,k])*(eta.cv[,j]-eta[nei\$i,j]))
ncv1 <- ncv1 - (1+(j!=k)) * gamma*.5 * llf\$l3[nei\$i,jj]*deta[nei\$i+(l-1)*n,]*(eta.cv[,k]-eta[nei\$i,k])*(eta.cv[,j]-eta[nei\$i,j])
#if (l!=k) ncv1 <- ncv1 - (1+(l!=j&&j!=k)) * gamma * .5 * colSums(
#        llf\$l3[nei\$i,jj]*deta[nei\$i+(k-1)*n,]*(eta.cv[,l]-eta[nei\$i,l])*(eta.cv[,j]-eta[nei\$i,j]))
if (l!=k) ncv1 <- ncv1 - (1+(l!=j&&j!=k)) * gamma * .5 *
llf\$l3[nei\$i,jj]*deta[nei\$i+(k-1)*n,]*(eta.cv[,l]-eta[nei\$i,l])*(eta.cv[,j]-eta[nei\$i,j])
#if (l!=j) ncv1 <- ncv1 - (1+(l!=k&&j!=k)) * gamma * .5 * colSums(
#        llf\$l3[nei\$i,jj]*deta[nei\$i+(j-1)*n,]*(eta.cv[,k]-eta[nei\$i,k])*(eta.cv[,l]-eta[nei\$i,l]))
if (l!=j) ncv1 <- ncv1 - (1+(l!=k&&j!=k)) * gamma * .5 *
llf\$l3[nei\$i,jj]*deta[nei\$i+(j-1)*n,]*(eta.cv[,k]-eta[nei\$i,k])*(eta.cv[,l]-eta[nei\$i,l])
}
}
}
} else { ## exact
offi <- offset
if (!is.null(offset)) for (i in 1:length(offset)) if (!is.null(offset[[i]])) offi[[i]] <- offset[[i]][nei\$i]
ll <- family\$ll(y[nei\$i],X[nei\$i,],beta,wt[nei\$i],family,offi,deriv=1,db,eta=eta.cv,ncv=TRUE)
ncv <- -ll\$l
ncv <- gamma*ncv - (gamma-1)*dev
if (deriv>0) {
dev1 <- ncv1 <- matrix(0,nm,nsp) #rep(0,nsp)
ind <- 1:nm; iin <- 1:n
for (i in 1:nlp) {
#ncv1 <- ncv1 - colSums(ll\$l1[,i]*deta.cv[ind,])
ncv1 <- ncv1 - ll\$l1[,i]*deta.cv[ind,]
#if (gamma!=1) dev1 <- dev1 - colSums((llf\$l1[,i]*deta[iin,])[nei\$i,,drop=FALSE])
if (gamma!=1) dev1 <- dev1 - (llf\$l1[,i]*deta[iin,])[nei\$i,,drop=FALSE]
ind <- ind + nm; iin <- iin + n
}
ncv1 <- gamma*ncv1 - (gamma-1)*dev1
}
}
if (deriv>0) {
Vg <- crossprod(ncv1)
ncv1 <- colSums(ncv1)
} else Vg <- NULL
attr(ncv,"eta.cv") <- eta.cv
if (deriv!=0) attr(ncv,"deta.cv") <- deta.cv ## actually the perturbations if deriv<0
return(list(NCV=ncv,NCV1=ncv1,error=cg.iter,Vg=Vg))
} ## gamlss.ncv

gamlss.etamu <- function(l1,l2,l3=NULL,l4=NULL,ig1,g2,g3=NULL,g4=NULL,i2,i3=NULL,i4=NULL,deriv=0) {
## lj is the array of jth order derivatives of l
## gj[,k] contains the jth derivatives for the link of the kth lp
## ig1 is one over first deriv of link
## kth parameter. This routine transforms derivatives
## w.r.t. the parameters (mu_1..mu_K) to derivatives
## w.r.t. the linear predictors (eta_1.. eta_K)
## i2, i3 and i4 are the upper triangular indexing arrays
## e.g. l4[,i4[i,j,l,m]] contains the partial w.r.t.
## params indexed by i,j,l,m with no restriction on
## the index values except that they are in 1..K
## lj may have an attribute "remap". If so then columns containing
## only zeros are not actually stored. So, for example, if
## remap is the attribute for l4 and k = remap[i4[i,j,l,m]] then
## the derivative is zero if k==0 and l4[,k] otherwise. This
## allows for situations in which K is quite high, but there
## are many zero derivatives. Note, however that a zero col
## in l4 does not always imply a zero column in d4 (deriv w.r.t.
## eta), since the latter often involves lower order derivatives
## l3, l2 etc. The same goes for l3 and l2.
## Returned arrays have remap attributes if input arrays do -
## they will generally be different to the input versions.

ordf <- function(i,j,l=NULL,m=NULL) {
## helper function to work out derivative orders
if (is.null(l)) { ## 2d
ord <- rep(1,2)
if (i==j) {ord[1] <- ord[1] + 1; ord[2] <- 0 }
} else if (is.null(m)) { ## 3 d
ord <- rep(1,3)
if (i==j) {ord[1] <- ord[1] + 1; ord[2] <- 0 }
if (i==l) {ord[1] <- ord[1] + 1; ord[3] <- 0 }
if (ord[2]) {
if (j==l) {ord[2] <- ord[2] + 1; ord[3] <- 0 }
}
} else { ## 4 d
ord <- rep(1,4)
if (i==j) {ord[1] <- ord[1] + 1; ord[2] <- 0 }
if (i==l) {ord[1] <- ord[1] + 1; ord[3] <- 0 }
if (i==m) {ord[1] <- ord[1] + 1; ord[4] <- 0 }
if (ord[2]) {
if (j==l) {ord[2] <- ord[2] + 1; ord[3] <- 0 }
if (j==m) {ord[2] <- ord[2] + 1; ord[4] <- 0 }
}
if (ord[3]&&l==m) { ord[3] <- ord[3] + 1; ord[4] <- 0 }
}
ord
} ## ordf

K <- ncol(l1) ## number of parameters of distribution
l1map <- attr(l1,"remap") ## lmap[i] is col of l1 storing ith deriv or zero if deriv zero
d1 <- l1 ## not "remap" matches l1
if (is.null(l1map)) { ## all derivs stored explicitly in l1
for (i in 1:K) { ## first derivative loop
d1[,i] <- l1[,i]*ig1[,i]
}
} else { ## some derivative are zero and not stored in l1
for (ii in 1:K) { ## first derivative loop
i <- l1map[ii] ## actual column in which iith deriv stored, or 0 if deriv zero.
if (i>0) d1[,i] <- l1[,i]*ig1[,i]
}
}

ifunc <- !is.array(i2) ## are index functions provided in place of index arrays?

l2map <- attr(l2,"remap")
g2zero <- colMeans(abs(g2))==0
if (!is.null(l2map)||!is.null(l1map)) { ## l1 and or l2 are supplied with missing zero cols
if (is.null(l1map)) l1map <- 1:K
K2 <- ((K+1)*K)/2 ## number of second derivatives in total
d2map <- 1:K2
if (is.null(l2map)) l2map <- 1:K2 else { ## need to do a dummy run to establish which elements of d2 are non-zero
k <- 0
for (i in 1:K) for (j in i:K) {
ord <- ordf(i,j); k <- k+1
mo <- max(ord)
if (mo==2) { ## pure 2nd derivative transform
if (l2map[k]==0 && (l1map[i]==0||g2zero[i])) d2map[k] <- 0 ## d2[,k] zero as components zero.
} else { ## all first derivative
if (l2map[k]==0) d2map[k] <- 0
}
}
K2 <- sum(d2map!=0)
d2map[d2map!=0] <- 1:K2
}
## Now know d2map and l1map and l2map both exist. Do transforms...
d2 <- matrix(0,nrow(l2),K2)
for (i in 1:K) for (j in i:K) {
ord <- ordf(i,j);k <- k+1
mo <- max(ord)
if (d2map[k]>0) { ## non-zero term
if (mo==2) { ## pure 2nd derivative transform
a <- if (l2map[k]>0) l2[,l2map[k]] else 0
b <- if (l1map[i]>0) l1[,l1map[i]]*g2[,i]*ig1[,i] else 0
d2[,d2map[k]] <- (a - b)*ig1[,i]^2
} else { ## all first derivative
d2[,d2map[k]] <- l2[,l2map[k]]*ig1[,i]*ig1[,j]
}
}
}
attr(d2,"remap") <- d2map
} else { ## l1 and or l2 are supplied complete
k <- 0; d2 <- l2
for (i in 1:K) for (j in i:K) {
## obtain the order of differentiation associated
## with the i,j derivatives...
ord <- ordf(i,j);k <- k+1
## l2[,k] is derivative to transform
mo <- max(ord)
if (mo==2) { ## pure 2nd derivative transform
d2[,k] <- (l2[,k] - l1[,i]*g2[,i]*ig1[,i])*ig1[,i]^2
} else { ## all first derivative
d2[,k] <- l2[,k]*ig1[,i]*ig1[,j]
}
}
} ## 2nd order transform done

l3map <- attr(l3,"remap")
if (deriv>0) g3zero <- colMeans(abs(g3))==0
if (deriv>0&& (!is.null(l3map)||!is.null(l2map)||!is.null(l1map))) { ## 3rd order required, but some lj have dropped zero cols
if (is.null(l1map)) l1map <- 1:K
if (is.null(l2map)) { K2 <- ((K+1)*K)/2; l2map <- 1:K2 }
K3 <- (K*(K+3)+2)*K/6
d3map <- 1:K3
if (is.null(l3map)) l3map <- 1:K3 else { ## dummy run to work out d3map
k <- 0
for (i in 1:K) for (j in i:K) for (l in j:K) {
k <- k + 1
ord <- ordf(i,j,l)
ii <- c(i,j,l)
## l3[,k] is derivative to transform
mo <- max(ord)
if (mo==3) { ## pure 3rd derivative transform
mind <- if (ifunc) i2(i,i) else i2[i,i]
if (l3map[k]==0&&(l2map[mind]==0||g2zero[i])&&(l1map[i]==0||(g3zero[i]&&g2zero[i]))) d3map[k] <- 0
} else if (mo==1) { ## all first derivative
if (l3map[k]==0) d3map[k] <- 0
} else { ## 2,1 deriv
k1 <- ii[ord==1] ## index of order 1 deriv
k2 <- ii[ord==2] ## index of order 2 part
mind <- if (ifunc) i2(k2,k1) else i2[k2,k1]
if (l3map[k]==0&&(l2map[mind]==0||g2zero[k2])) d3map[k] <- 0
}
}
K3 <- sum(d3map!=0)
d3map[d3map!=0] <- 1:K3
}
## now create and fill in non-zero cols of d3...
d3 <- matrix(0,nrow(l3),K3)
k <- 0
for (i in 1:K) for (j in i:K) for (l in j:K) {
## obtain the order of differentiation associated
## with the i,j,l derivatives...
k <- k+1
if (d3map[k]>0) {
ord <- ordf(i,j,l)
ii <- c(i,j,l)
## l3[,k] is derivative to transform
mo <- max(ord)
if (mo==3) { ## pure 3rd derivative transform
mind <- if (ifunc) i2(i,i) else i2[i,i]
aa <- if (l3map[k]>0) l3[,l3map[k]] else 0
bb <- if (l2map[mind]>0) -3*l2[,l2map[mind]]*g2[,i]*ig1[,i] else 0
cc <- if (l1map[i]>0) l1[,l1map[i]]*(3*g2[,i]^2*ig1[,i]^2 - g3[,i]*ig1[,i]) else 0
d3[,d3map[k]] <- (aa + bb + cc)*ig1[,i]^3
} else if (mo==1) { ## all first derivative
d3[,d3map[k]] <- l3[,l3map[k]]*ig1[,i]*ig1[,j]*ig1[,l]
} else { ## 2,1 deriv
k1 <- ii[ord==1] ## index of order 1 deriv
k2 <- ii[ord==2] ## index of order 2 part
mind <- if (ifunc) i2(k2,k1) else i2[k2,k1]
aa <- if (l3map[k]>0) l3[,l3map[k]] else 0
bb <- if (l2map[mind]>0) -l2[,l2map[mind]]*g2[,k2]*ig1[,k2] else 0
d3[,d3map[k]] <- (aa+bb)*ig1[,k1]*ig1[,k2]^2
}
} ## d3map[k]>0
} ## loop
attr(d3,"remap") <- d3map
} else { ## l3, l2 and l1 all supplied complete without zero cols dropped
k <- 0
d3 <- l3
if (deriv>0) for (i in 1:K) for (j in i:K) for (l in j:K) {
## obtain the order of differentiation associated
## with the i,j,l derivatives...
ord <- ordf(i,j,l);k <- k+1
ii <- c(i,j,l)
## l3[,k] is derivative to transform
mo <- max(ord)
if (mo==3) { ## pure 3rd derivative transform
mind <- if (ifunc) i2(i,i) else i2[i,i]
d3[,k] <- (l3[,k] - 3*l2[,mind]*g2[,i]*ig1[,i] +
l1[,i]*(3*g2[,i]^2*ig1[,i]^2 - g3[,i]*ig1[,i]))*ig1[,i]^3
} else if (mo==1) { ## all first derivative
d3[,k] <- l3[,k]*ig1[,i]*ig1[,j]*ig1[,l]
} else { ## 2,1 deriv
k1 <- ii[ord==1] ## index of order 1 deriv
k2 <- ii[ord==2] ## index of order 2 part
mind <- if (ifunc) i2(k2,k1) else i2[k2,k1]
d3[,k] <- (l3[,k] - l2[,mind]*g2[,k2]*ig1[,k2])*
ig1[,k1]*ig1[,k2]^2
}
}
}  ## 3rd order transform done

l4map <- attr(l4,"remap")
if (deriv>2&& (!is.null(l4map)||!is.null(l3map)||!is.null(l2map)||!is.null(l1map))) { ## 4th order required, but some lj have dropped zero cols
g4zero <- colMeans(abs(g4))==0
if (is.null(l1map)) l1map <- 1:K
if (is.null(l2map)) { K2 <- ((K+1)*K)/2; l2map <- 1:K2 }
if (is.null(l3map)) { K3 <- (K*(K+3)+2)*K/6;l3map <- 1:K3}
K4 <- (6+K*11+K^2*6+K^3)*K/24
d4map <- 1:K4
if (is.null(l4map)) l4map <- 1:K4 else { ## dummy run to create d4map
k <- 0
for (i in 1:K) for (j in i:K) for (l in j:K) for (m in l:K) {
## obtain the order of differentiation associated
## with the i,j,l & m derivatives...
ord <- ordf(i,j,l,m);k <- k+1
ii <- c(i,j,l,m)
## l4[,k] is derivative to transform
mo <- max(ord)
if (mo==4) { ## pure 4th derivative transform
mi2 <- if (ifunc) i2(i,i) else i2[i,i]
mi3 <- if (ifunc) i3(i,i,i) else i3[i,i,i]
if (l4map[k]==0&&(l3map[mi3]==0||g2zero[i])&&(l2map[mi2]==0||(g2zero[i]&&g3zero[i]))&&(l1map[i]==0||(g4zero[i]&&g2zero[i]))) d4map[k] <- 0
} else if (mo==1) { ## all first derivative
if (l4map[k]==0) d4map[k] <- 0
} else if (mo==3) { ## 3,1 deriv
k1 <- ii[ord==1] ## index of order 1 deriv
k3 <- ii[ord==3] ## index of order 3 part
mi2 <- if (ifunc) i2(k3,k1) else i2[k3,k1]
mi3 <- if (ifunc) i3(k3,k3,k1) else i3[k3,k3,k1]
if (l4map[k]==0&&(l3map[mi3]==0||g2zero[k3])&&(l2map[mi2]==0||(g2zero[k3]&&g3zero[k3]))) d4map[k] <- 0
} else {
if (sum(ord==2)==2) { ## 2,2
k2a <- (ii[ord==2])[1];k2b <- (ii[ord==2])[2]
mi2 <- if (ifunc) i2(k2a,k2b) else i2[k2a,k2b]
mi3 <- if (ifunc) i3(k2a,k2b,k2b) else i3[k2a,k2b,k2b]
mi3a <- if (ifunc) i3(k2a,k2a,k2b) else i3[k2a,k2a,k2b]
if (l4map[k]==0&&(l3map[mi3]==0||g2zero[k2a])&&(l3map[mi3a]==0||g2zero[k2b])&&(l2map[mi2]==0||g2zero[k2a]||g2zero[k2b])) d4map[k] <- 0
} else { ## 2,1,1
k2 <- ii[ord==2] ## index of order 2 derivative
k1a <- (ii[ord==1])[1];k1b <- (ii[ord==1])[2]
mi3 <- if (ifunc) i3(k2,k1a,k1b) else i3[k2,k1a,k1b]
if (l4map[k]==0&&(l3map[mi3]==0||g2zero[k2])) d4map[k] <- 0
}
}
} ## loop
}
K4 <- sum(d4map!=0)
d4map[d4map!=0] <- 1:K4
d4 <- matrix(0,nrow(l4),K4)
k <- 0
for (i in 1:K) for (j in i:K) for (l in j:K) for (m in l:K) { ## fill in d4
## obtain the order of differentiation associated
## with the i,j,l & m derivatives...
k <- k+1
if (d4map[k]>0) {
ord <- ordf(i,j,l,m);
ii <- c(i,j,l,m)
## l4[,k] is derivative to transform
mo <- max(ord)
if (mo==4) { ## pure 4th derivative transform
mi2 <- if (ifunc) i2(i,i) else i2[i,i]
mi3 <- if (ifunc) i3(i,i,i) else i3[i,i,i]
aa <- if (l4map[k]>0) l4[,l4map[k]] else 0
bb <- if (l3map[mi3]>0) -6*l3[,l3map[mi3]]*g2[,i]*ig1[,i] else 0
cc <- if (l2map[mi2]>0) l2[,l2map[mi2]]*(15*g2[,i]^2*ig1[,i]^2 - 4*g3[,i]*ig1[,i]) else 0
dd <- if (l1map[i]>0) -l1[,l1map[i]]*(15*g2[,i]^3*ig1[,i]^3 - 10*g2[,i]*g3[,i]*ig1[,i]^2 + g4[,i]*ig1[,i]) else 0
d4[,d4map[k]] <- (aa+bb+cc+dd)*ig1[,i]^4
} else if (mo==1) { ## all first derivative
d4[,d4map[k]] <- l4[,l4map[k]]*ig1[,i]*ig1[,j]*ig1[,l]*ig1[,m]
} else if (mo==3) { ## 3,1 deriv
k1 <- ii[ord==1] ## index of order 1 deriv
k3 <- ii[ord==3] ## index of order 3 part
mi2 <- if (ifunc) i2(k3,k1) else i2[k3,k1]
mi3 <- if (ifunc) i3(k3,k3,k1) else i3[k3,k3,k1]
aa <- if (l4map[k]>0) l4[,l4map[k]] else 0
bb <- if (l3map[mi3]>0) -3*l3[,l3map[mi3]]*g2[,k3]*ig1[,k3] else 0
cc <- if (l2map[mi2]>0) l2[,l2map[mi2]]*(3*g2[,k3]^2*ig1[,k3]^2 - g3[,k3]*ig1[,k3]) else 0
d4[,d4map[k]] <- (aa+bb+cc)*ig1[,k1]*ig1[,k3]^3
} else {
if (sum(ord==2)==2) { ## 2,2
k2a <- (ii[ord==2])[1];k2b <- (ii[ord==2])[2]
mi2 <- if (ifunc) i2(k2a,k2b) else i2[k2a,k2b]
mi3 <- if (ifunc) i3(k2a,k2b,k2b) else i3[k2a,k2b,k2b]
mi3a <- if (ifunc) i3(k2a,k2a,k2b) else i3[k2a,k2a,k2b]
aa <- if (l4map[k]>0) l4[,l4map[k]] else 0
bb <- if (l3map[mi3]>0) -l3[,l3map[mi3]]*g2[,k2a]*ig1[,k2a] else 0
cc <- if (l3map[mi3a]>0) -l3[,l3map[mi3a]]*g2[,k2b]*ig1[,k2b] else 0
dd <- if (l2map[mi2]>0) l2[,l2map[mi2]]*g2[,k2a]*g2[,k2b]*ig1[,k2a]*ig1[,k2b] else 0
d4[,d4map[k]] <- (aa+bb+cc+dd)*ig1[,k2a]^2*ig1[,k2b]^2
} else { ## 2,1,1
k2 <- ii[ord==2] ## index of order 2 derivative
k1a <- (ii[ord==1])[1];k1b <- (ii[ord==1])[2]
mi3 <- if (ifunc) i3(k2,k1a,k1b) else i3[k2,k1a,k1b]
aa <- if (l4map[k]>0) l4[,l4map[k]] else 0
bb <- if (l3map[mi3]>0) -l3[,l3map[mi3]]*g2[,k2]*ig1[,k2] else 0
d4[,d4map[k]] <- (aa+bb)*ig1[,k1a]*ig1[,k1b]*ig1[,k2]^2
}
}
} ## if d4map[k]>0
} ## loop
attr(d4,"remap") <- d4map
} else { ## l1-l4 are all supplied complete with no dropping
k <- 0
d4 <- l4
if (deriv>2) for (i in 1:K) for (j in i:K) for (l in j:K) for (m in l:K) {
## obtain the order of differentiation associated
## with the i,j,l & m derivatives...
ord <- ordf(i,j,l,m);k <- k+1
ii <- c(i,j,l,m)
## l4[,k] is derivative to transform
mo <- max(ord)
if (mo==4) { ## pure 4th derivative transform
mi2 <- if (ifunc) i2(i,i) else i2[i,i]
mi3 <- if (ifunc) i3(i,i,i) else i3[i,i,i]
d4[,k] <-  (l4[,k] - 6*l3[,mi3]*g2[,i]*ig1[,i] +
l2[,mi2]*(15*g2[,i]^2*ig1[,i]^2 - 4*g3[,i]*ig1[,i]) -
l1[,i]*(15*g2[,i]^3*ig1[,i]^3 - 10*g2[,i]*g3[,i]*ig1[,i]^2
+ g4[,i]*ig1[,i]))*ig1[,i]^4
} else if (mo==1) { ## all first derivative
d4[,k] <- l4[,k]*ig1[,i]*ig1[,j]*ig1[,l]*ig1[,m]
} else if (mo==3) { ## 3,1 deriv
k1 <- ii[ord==1] ## index of order 1 deriv
k3 <- ii[ord==3] ## index of order 3 part
mi2 <- if (ifunc) i2(k3,k1) else i2[k3,k1]
mi3 <- if (ifunc) i3(k3,k3,k1) else i3[k3,k3,k1]
d4[,k] <- (l4[,k] - 3*l3[,mi3]*g2[,k3]*ig1[,k3] +
l2[,mi2]*(3*g2[,k3]^2*ig1[,k3]^2 - g3[,k3]*ig1[,k3])
)*ig1[,k1]*ig1[,k3]^3
} else {
if (sum(ord==2)==2) { ## 2,2
k2a <- (ii[ord==2])[1];k2b <- (ii[ord==2])[2]
mi2 <- if (ifunc) i2(k2a,k2b) else i2[k2a,k2b]
mi3 <- if (ifunc) i3(k2a,k2b,k2b) else i3[k2a,k2b,k2b]
mi3a <- if (ifunc) i3(k2a,k2a,k2b) else i3[k2a,k2a,k2b]
d4[,k] <- (l4[,k] - l3[,mi3]*g2[,k2a]*ig1[,k2a]
-l3[,mi3a]*g2[,k2b]*ig1[,k2b] +
l2[,mi2]*g2[,k2a]*g2[,k2b]*ig1[,k2a]*ig1[,k2b]
)*ig1[,k2a]^2*ig1[,k2b]^2
} else { ## 2,1,1
k2 <- ii[ord==2] ## index of order 2 derivative
k1a <- (ii[ord==1])[1];k1b <- (ii[ord==1])[2]
mi3 <- if (ifunc) i3(k2,k1a,k1b) else i3[k2,k1a,k1b]
d4[,k] <- (l4[,k] - l3[,mi3]*g2[,k2]*ig1[,k2]
)*ig1[,k1a]*ig1[,k1b]*ig1[,k2]^2
}
}
}
} ## 4th order transform done

list(l1=d1,l2=d2,l3=d3,l4=d4)
} # gamlss.etamu

gamlss.gH <- function(X,jj,l1,l2,i2,l3=0,i3=0,l4=0,i4=0,d1b=0,d2b=0,deriv=0,fh=NULL,D=NULL,sandwich=FALSE) {
## X[,jj[[i]]] is the ith model matrix.
## lj contains jth derivatives of the likelihood for each datum,
## columns are w.r.t. different combinations of parameters.
## ij is the symmetric array indexer for the jth order derivs...
## e.g. l4[,i4[i,j,l,m]] contains derivatives with
## respect to parameters indexed by i,j,l,m
## d1b and d2b are first and second derivatives of beta w.r.t. sps.
## fh is a factorization of the penalized hessian, while D contains the corresponding
##    Diagonal pre-conditioning weights.
## deriv: 0 - just grad and Hess
##        1 - tr(Hp^{-1} dH/drho_j) vector - Hp^{-1} must be supplied in fh
##        2 - first deriv of Hess
##        3 - everything.
K <- length(jj)
if (is.list(X)) {
discrete <- TRUE
p <- X\$p;n <- nrow(X\$kd)
sparse <- !inherits(X\$Xd[[1]],"matrix")
} else {
sparse <- discrete <- FALSE
p <- ncol(X);n <- nrow(X)
}
trHid2H <- d1H <- d2H <- NULL ## defaults
ifunc <- !is.array(i2) ## are index functions provided in place of index arrays?

l1map <- attr(l1,"remap")
lb <- rep(0,p)
if (is.null(l1map)) { ## all cols of l1 supplied
for (i in 1:K) { ## first derivative loop
lb[jj[[i]]] <- lb[jj[[i]]] + if (discrete) XWyd(X\$Xd,rep(1,n),l1[,i],X\$kd,X\$ks,X\$ts,X\$dt,X\$v,X\$qc,X\$drop,lt=X\$lpid[[i]]) else
colSums(l1[,i]*X[,jj[[i]],drop=FALSE]) ## !
}
} else { ## only non-zero cols of l1 supplied (only supplied for completeness - unclear any sensible model could ever want this)
for (i in 1:K) if (l1map[i]>0) { ## first derivative loop
lb[jj[[i]]] <- lb[jj[[i]]] + if (discrete) XWyd(X\$Xd,rep(1,n),l1[,l1map[i]],X\$kd,X\$ks,X\$ts,X\$dt,X\$v,X\$qc,X\$drop,lt=X\$lpid[[i]]) else
colSums(l1[,l1map[i]]*X[,jj[[i]],drop=FALSE]) ## !
}
}

## the Hessian...
lbb <- if (sparse) Matrix(0,p,p) else matrix(0,p,p)
if (sandwich) { ## reset l2 so that Hessian becomes 'filling' for sandwich estimate
if (deriv>0) warning("sandwich requested with higher derivatives")
if (!is.null(l1map)) stop("sandwich requested with structurally zero first derivatives - can't be sensible")
k <- 0;
for (i in 1:K) for (j in i:K) { k <- k + 1;l2[,k] <- l1[,i]*l1[,j] }
attr(l2,"remap") <- NULL ## l2 has to be full now.
}

l2map <- attr(l2,"remap")
if (is.null(l2map)) { ## l2 supplied with all columns
for (i in 1:K) for (j in i:K) {
## A <- t(X[,jj[[i]],drop=FALSE])%*%(l2[,i2[i,j]]*X[,jj[[j]],drop=FALSE])
mi2 <- if (ifunc) i2(i,j) else i2[i,j]
A <- if (discrete) XWXd(X\$Xd,w=l2[,mi2],k=X\$kd,ks=X\$ks,ts=X\$ts,dt=X\$dt,v=X\$v,qc=X\$qc,nthreads=1,drop=X\$drop,lt=X\$lpid[[i]],rt=X\$lpid[[j]]) else
crossprod(X[,jj[[i]],drop=FALSE],l2[,mi2]*X[,jj[[j]],drop=FALSE])
lbb[jj[[i]],jj[[j]]] <- lbb[jj[[i]],jj[[j]]] + A
if (j>i) lbb[jj[[j]],jj[[i]]] <- lbb[jj[[j]],jj[[i]]] + t(A)
}
} else { ## l2 supplied with zero columns dropped
for (i in 1:K) for (j in i:K) {
mi2 <- if (ifunc) i2(i,j) else i2[i,j]
if (l2map[mi2]>0) {
A <- if (discrete) XWXd(X\$Xd,w=l2[,l2map[mi2]],k=X\$kd,ks=X\$ks,ts=X\$ts,dt=X\$dt,v=X\$v,qc=X\$qc,nthreads=1,drop=X\$drop,lt=X\$lpid[[i]],rt=X\$lpid[[j]]) else
crossprod(X[,jj[[i]],drop=FALSE],l2[,l2map[mi2]]*X[,jj[[j]],drop=FALSE])
lbb[jj[[i]],jj[[j]]] <- lbb[jj[[i]],jj[[j]]] + A
if (j>i) lbb[jj[[j]],jj[[i]]] <- lbb[jj[[j]],jj[[i]]] + t(A)
}
}
}

if (deriv>0) {
## the first derivative of the Hessian, using d1b
## the first derivates of the coefficients wrt the sps
m <- ncol(d1b) ## number of smoothing parameters
## stack the derivatives of the various linear predictors on top
## of each other...
d1eta <- matrix(0,n*K,m)
ind <- 1:n
for (i in 1:K) {
d1eta[ind,] <- if (discrete) Xbd(X\$Xd,d1b,k=X\$kd,ks=X\$ks,ts=X\$ts,dt=X\$dt,v=X\$v,qc=X\$qc,drop=X\$drop,lt=X\$lpid[[i]]) else
X[,jj[[i]],drop=FALSE]%*%d1b[jj[[i]],]
ind <- ind + n
}
l3map <- attr(l3,"remap")
null3map <- is.null(l3map)
}

if (deriv==1) {
## assuming fh contains the inverse penalized Hessian, Hp, forms tr(Hp^{-1}dH/drho_j) for each j
g.index <- attr(d1b,"g.index") ## possible index indicating log parameterization
if (!is.null(g.index)) { ## then several transform related quantities are required
beta <- attr(d1b,"beta") ##  regression coefficients
d1g <- d1b; d1g[g.index,] <- d1g[g.index,]/beta[g.index] ## derivartive w.r.t. working parameters
hess.diag <- attr(d1b,"hess.diag") ## should diagonal correction terms be included?
}
d1H <- rep(0,m)
if (discrete) {
## lpi <- attr(X,"lpi") ## this line was in original code for this discrete section, and lpi replaced jj below - mistake, I think
for (i in 1:K) for (j in i:K) { ## lp block loop
for (l in 1:m) { ## sp loop
v <- rep(0,n);ind <- 1:n
for (q in 1:K) { ## diagonal accumulation loop
mi3 <- if (ifunc) i3(i,j,q) else i3[i,j,q]
if (!null3map) mi3 <- l3map[mi3]
if (mi3>0) v <- v + l3[,mi3] * d1eta[ind,l]
ind <- ind + n
}
if (!is.null(g.index)) { ## non-linear correction terms required
gi <- g.index[jj[[i]]];gj <- g.index[jj[[j]]]
if (any(gi)) XVX[gi,] <- beta[jj[[i]]][gi]*XVX[gi,]
if (any(gj)) XVX[,gj] <- t(beta[jj[[j]]][gj]*t(XVX[,gj]))
if (any(gi)) {
XWX <- beta[jj[[i]]][gi]*d1g[jj[[i]],l][gi]*lbb[jj[[i]],jj[[j]]][gi,]
if (any(gj)) XWX[,gj] <- t(beta[jj[[j]]][gj]*t(XWX[,gj]))
XVX[gi,] <- XVX[gi,] + XWX
}
if (any(gj)) {
XWX <- t(beta[jj[[j]]][gj]*d1g[jj[[j]],l][gj]*t(lbb[jj[[i]],jj[[j]]][,gj]))
if (any(gi)) XWX[gi,] <- beta[jj[[i]]][gi]*XWX[gi,]
XVX[,gj] <- XVX[,gj] + XWX
if (i==j&&hess.diag) { ## add diagonal corrections
dd <- beta[jj[[i]]][gi]*(lbb[jj[[i]][gi],] %*% d1b[,l] + lb[jj[[i]]][gi]*d1g[jj[[i]],l][gi])
XVX[gi,gj] <- XVX[gi,gj] + diag(drop(dd),nrow=sum(gi))
}
}
} ## end of non-linear corrections
mult <- if (i==j) 1 else 2
d1H[l] <- d1H[l] + mult * sum(XVX * fh[jj[[i]],jj[[j]]]) ## accumulate tr(Hp^{-1}dH/drho_l)
}
}
} else for (i in 1:K) for (j in i:K) { ## lp block loop
Hpi <- fh[jj[[i]],jj[[j]]] ## correct component of inverse Hessian
d1hc <- rep(0,m)
if (!is.null(g.index)) { ## correct for non-linearity
gi <- g.index[jj[[i]]];gj <- g.index[jj[[j]]]
for (l in 1:m) { ## s.p. loop
dcor <- 0
if (any(gi)) {
XWX <- beta[jj[[i]]][gi]*d1g[jj[[i]],l][gi]*lbb[jj[[i]],jj[[j]]][gi,]
if (any(gj)) XWX[,gj] <- t(beta[jj[[j]]][gj]*t(XWX[,gj]))
dcor <- dcor + sum(XWX * Hpi[gi,])
}
if (any(gj)) {
XWX <- t(beta[jj[[j]]][gj]*d1g[jj[[j]],l][gj]*t(lbb[jj[[i]],jj[[j]]][,gj]))
if (any(gi)) XWX[gi,] <- beta[jj[[i]]][gi]*XWX[gi,]
dcor <- dcor + sum(XWX * Hpi[,gj])
if (i==j&&hess.diag) { ## diagonal correction
dd <- beta[jj[[i]]][gi]*(lbb[jj[[i]][gi],] %*% d1b[,l] + lb[jj[[i]]][gi]*d1g[jj[[i]],l][gi])
dcor <- dcor + sum(dd*diag(Hpi)[gi])
}
}
d1hc[l] <- dcor
} ## s.p. loop end
if (any(gi)) Hpi[gi,] <- Hpi[gi,]*beta[jj[[i]]][gi]
if (any(gj)) Hpi[,gj] <- t(t(Hpi[,gj])*beta[jj[[j]]][gj]) ## was jj[i] -- wrong
} ## end of non-linearity correction
a <- rowSums((X[,jj[[i]]] %*% Hpi) * X[,jj[[j]]])
for (l in 1:m) { ## sp loop
v <- rep(0,n);ind <- 1:n
for (q in 1:K) { ## diagonal accumulation loop
mi3 <- if (ifunc) i3(i,j,q) else i3[i,j,q]
if (!null3map) mi3 <- l3map[mi3]
if (mi3>0) v <- v + l3[,mi3] * d1eta[ind,l]
ind <- ind + n
}
mult <- if (i==j) 1 else 2
d1H[l] <- d1H[l] + mult * (sum(a*v) + d1hc[l]) ## accumulate tr(Hp^{-1}dH/drho_l)
}
}
} ## if deriv==1

if (deriv>1) {
if (discrete) stop("er... no discrete methods for higher derivatives")
d1H <- list()
for (l in 1:m) {
d1H[[l]] <- matrix(0,p,p)
for (i in 1:K) for (j in i:K) {
v <- rep(0,n);ind <- 1:n
for (q in 1:K) {
mi3 <- if (ifunc) i3(i,j,q) else i3[i,j,q]
if (!null3map) mi3 <- l3map[mi3]
if (mi3>0) v <- v + l3[,mi3] * d1eta[ind,l]
ind <- ind + n
}
## d1H[[l]][jj[[j]],jj[[i]]] <-
## A <- t(X[,jj[[i]],drop=FALSE])%*%(v*X[,jj[[j]],drop=FALSE])
A <- crossprod(X[,jj[[i]],drop=FALSE],v*X[,jj[[j]],drop=FALSE])
d1H[[l]][jj[[i]],jj[[j]]] <- d1H[[l]][jj[[i]],jj[[j]]] + A
if (j>i) d1H[[l]][jj[[j]],jj[[i]]] <- d1H[[l]][jj[[j]],jj[[i]]] + t(A)
}
}
} ## if deriv>1

if (deriv>2) {
l4map <- attr(l4,"remap")
null4map <- is.null(l4map)
## need tr(Hp^{-1} d^2H/drho_k drho_j)
## First form the expanded model matrix...
VX <- Xe <- matrix(0,K*n,ncol(X))
ind <- 1:n
for (i in 1:K) {
Xe[ind,jj[[i]]] <- X[,jj[[i]]]
ind <- ind + n
}
## Now form Hp^{-1} Xe'...
if (is.list(fh)) { ## then the supplied factor is an eigen-decomposition
d <- fh\$values;d[d>0] <- 1/d[d>0];d[d<=0] <- 0
Xe <- t(D*((fh\$vectors%*%(d*t(fh\$vectors)))%*%(D*t(Xe))))
} else { ## the supplied factor is a choleski factor
ipiv <- piv <- attr(fh,"pivot");ipiv[piv] <- 1:p
Xe <- t(D*(backsolve(fh,forwardsolve(t(fh),(D*t(Xe))[piv,]))[ipiv,]))
}
## now compute the required trace terms
d2eta <- matrix(0,n*K,ncol(d2b))
ind <- 1:n
for (i in 1:K) {
d2eta[ind,] <- X[,jj[[i]],drop=FALSE]%*%d2b[jj[[i]],]
ind <- ind + n
}
trHid2H <- rep(0,ncol(d2b))
kk <- 0 ## counter for second derivatives
for (k in 1:m) for (l in k:m) { ## looping over smoothing parameters...
kk <- kk + 1
for (i in 1:K) for (j in 1:K) {
v <- rep(0,n);ind <- 1:n
for (q in 1:K) { ## accumulate the diagonal matrix for X_i'diag(v)X_j
mi3 <- if (ifunc) i3(i,j,q) else i3[i,j,q]
if (!null3map) mi3 <- l3map[mi3]
if (mi3>0) v <- v + d2eta[ind,kk]*l3[,mi3]
ins <- 1:n
for (s in 1:K) {
mi4 <- if (ifunc) i4(i,j,q,s) else i4[i,j,q,s]
if (!null4map) mi4 <- l4map[mi4]
if (mi4>0) v <- v + d1eta[ind,k]*d1eta[ins,l]*l4[,mi4]
ins <- ins + n
}
ind <- ind + n
}
if (i==j) {
rind <- 1:n + (i-1)*n
VX[rind,jj[[i]]] <- v * X[,jj[[i]]]
} else {
rind1 <- 1:n + (i-1)*n
rind2 <- 1:n + (j-1)*n
VX[rind2,jj[[i]]] <- v * X[,jj[[i]]]
VX[rind1,jj[[j]]] <- v * X[,jj[[j]]]
}
}
trHid2H[kk] <- sum(Xe*VX)
}
} ## if deriv>2

list(lb=lb,lbb=lbb,d1H=d1H,d2H=d2H,trHid2H=trHid2H)
} ## end of gamlss.gH

## Extended family for Gaussian location scale model...
## so mu is mu1 and tau=1/sig is mu2
## tau = 1/(b + exp(eta)) eta = log(1/tau - b)
## 1. get derivatives wrt mu, tau
## 2. get required link derivatives and tri indices.
## 3. transform derivs to derivs wrt eta (gamlss.etamu).
## 4. get the grad and Hessian etc for the model
##    via a call to gamlss.gH
## the first derivatives of the log likelihood w.r.t
## the first and second parameters...

## first deal with links and their derivatives...
stats <- list()
mu.eta=stats[[1]]\$mu.eta),
class="family")
stats[[2]] <- list()
stats[[2]]\$valideta <- function(eta) TRUE
stats[[2]]\$mu.eta <- eval(parse(text=
paste("function(eta) { ee <- exp(eta); -ee/(ee +",b,")^2 }")))
paste("function(mu) { mub <- pmax(1 - mu *",b,",.Machine\$double.eps);(2*mub-1)/(mub*mu)^2}" )))
paste("function(mu) { mub <-  pmax(1 - mu *",b,",.Machine\$double.eps);((1-mub)*mub*6-2)/(mub*mu)^3}" )))
paste("function(mu) { mub <- pmax(1 - mu *",b,",.Machine\$double.eps);(((24*mub-36)*mub+24)*mub-6)/(mub*mu)^4}")))

residuals <- function(object,type=c("deviance","pearson","response")) {
type <- match.arg(type)
rsd <- object\$y-object\$fitted[,1]
if (type=="response") return(rsd) else
return((rsd*object\$fitted[,2])) ## (y-mu)/sigma
} ## gaulss residuals

postproc <- expression({
## code to evaluate in estimate.gam, to evaluate null deviance
## in principle the following seems reasonable, but because no
## price is paid for the high null variance, it leads to silly
## % deviance explained...
#er <- fitNull(G\$y,G\$family,G\$w,G\$offset,nlp=length(attr(G\$X,"lpi")),tol=1e-7)
#object\$null.deviance <- sum(((object\$y-er\$mu[,1])*er\$mu[,2])^2*G\$w)
object\$null.deviance <- sum(((object\$y-mean(object\$y))*object\$fitted[,2])^2)
})

ncv <- function(X,y,wt,nei,beta,family,llf,H=NULL,Hi=NULL,R=NULL,offset=NULL,dH=NULL,db=NULL,deriv=FALSE,nt=1) {
gamlss.ncv(X,y,wt,nei,beta,family,llf,H=H,Hi=Hi,R=R,offset=offset,dH=dH,db=db,deriv=deriv,nt=nt)
} ## ncv

ll <- function(y,X,coef,wt,family,offset=NULL,deriv=0,d1b=0,d2b=0,Hp=NULL,rank=0,fh=NULL,D=NULL,eta=NULL,ncv=FALSE,sandwich=FALSE) {
## function defining the gamlss Gaussian model log lik.
## N(mu,sigma^2) parameterized in terms of mu and log(sigma)
## deriv: 0 - eval
##        1 - grad and Hess
##        2 - diagonal of first deriv of Hess
##        3 - first deriv of Hess
##        4 - everything.
if (!is.null(offset)) offset[[3]] <- 0
discrete <- is.list(X)
jj <- attr(X,"lpi") ## extract linear predictor index
if (is.null(eta)) {
eta <- if (discrete) Xbd(X\$Xd,coef,k=X\$kd,ks=X\$ks,ts=X\$ts,dt=X\$dt,v=X\$v,qc=X\$qc,drop=X\$drop,lt=X\$lpid[[1]]) else X[,jj[[1]],drop=FALSE]%*%coef[jj[[1]]]
if (!is.null(offset[[1]])) eta <- eta + offset[[1]]
eta1 <- if (discrete) Xbd(X\$Xd,coef,k=X\$kd,ks=X\$ks,ts=X\$ts,dt=X\$dt,v=X\$v,qc=X\$qc,drop=X\$drop,lt=X\$lpid[[2]]) else X[,jj[[2]],drop=FALSE]%*%coef[jj[[2]]]
if (!is.null(offset[[2]])) eta1 <- eta1 + offset[[2]]
} else { ## eta supplied directly
eta1 <- eta[,2]
eta <- eta[,1]
}

tau <-  family\$linfo[[2]]\$linkinv(eta1) ## tau = 1/sig here

n <- length(y)
l1 <- matrix(0,n,2)
ymu <- y-mu;ymu2 <- ymu^2;tau2 <- tau^2

l0 <- -.5 * ymu2 * tau2 - .5 * log(2*pi) + log(tau)
l <- sum(l0)

if (deriv>0) {

l1[,1] <- tau2*ymu
l1[,2] <- 1/tau - tau*ymu2

## the second derivatives
## order mm,ms,ss
l2 <- cbind(-tau2,2*l1[,1]/tau,-ymu2 - 1/tau2)

## need some link derivatives for derivative transform
ig1 <- cbind(family\$linfo[[1]]\$mu.eta(eta),family\$linfo[[2]]\$mu.eta(eta1))
}

l3 <- l4 <- g3 <- g4 <- 0 ## defaults

if (deriv>1) {
## the third derivatives
## order mmm,mms,mss,sss
if (TRUE) {
l3 <- cbind(0,-2*tau,2*ymu,2/tau^3)
} else { ## test infrastructure for dropping zero columns
l3 <- cbind(-2*tau,2*ymu,2/tau^3)
attr(l3,"remap") <- c(0,1:3)
}
}

if (deriv>3) {
## the fourth derivatives
## order mmmm,mmms,mmss,msss,ssss
if (TRUE) {
l4 <- cbind(0,0,-2,0,-6/tau2^2)
} else { ## illustrates/tests 0 col dropping
l4 <- cbind(-2,-6/tau2^2)
attr(l4,"remap") <- c(0,0,1,0,2)
}
}
if (deriv) {
i2 <- family\$tri\$i2; i3 <- family\$tri\$i3
i4 <- family\$tri\$i4

## transform derivates w.r.t. mu to derivatives w.r.t. eta...
de <- gamlss.etamu(l1,l2,l3,l4,ig1,g2,g3,g4,i2,i3,i4,deriv-1)

## get the gradient and Hessian...
ret <- gamlss.gH(X,jj,de\$l1,de\$l2,i2,l3=de\$l3,i3=i3,l4=de\$l4,i4=i4,
d1b=d1b,d2b=d2b,deriv=deriv-1,fh=fh,D=D,sandwich=sandwich)
if (ncv) {
ret\$l1 <- de\$l1; ret\$l2 = de\$l2; ret\$l3 = de\$l3
}
} else ret <- list()
ret\$l <- l; ret\$l0 <- l0; ret
} ## end ll gaulss

sandwich <- function(y,X,coef,wt,family,offset=NULL) {
## compute filling for sandwich estimate of cov matrix
ll(y,X,coef,wt,family,offset=NULL,deriv=1,sandwich=TRUE)\$lbb
}

initialize <- expression({
## idea is to regress g(y) on model matrix for mean, and then
## to regress the corresponding log absolute residuals on
## the model matrix for log(sigma) - may be called in both
## gam.fit5 and initial.spg... note that appropriate E scaling
## for full calculation may be inappropriate for initialization
## which is basically penalizing something different here.
## best we can do here is to use E only as a regularizer.
n <- rep(1, nobs)
## should E be used unscaled or not?..
use.unscaled <- if (!is.null(attr(E,"use.unscaled"))) TRUE else FALSE
if (is.null(start)) {
jj <- attr(x,"lpi")
if (!is.null(offset)) offset[[3]] <- 0
yt1 <- if (family\$link[[1]]=="identity") y else
if (!is.null(offset[[1]])) yt1 <- yt1 - offset[[1]]
if (is.list(x)) { ## discrete case
start <- rep(0,max(unlist(jj)))
Xty <- XWyd(x\$Xd,rep(1,length(y)),yt1,x\$kd,x\$ks,x\$ts,x\$dt,x\$v,x\$qc,x\$drop,lt=x\$lpid[[1]])
piv <- attr(R,"pivot")
rrank <- attr(R,"rank")
startji <- rep(0,ncol(R))
if (rrank<ncol(R)) {
R <- R[1:rrank,1:rrank]
piv <- piv[1:rrank]
}
startji[piv] <- backsolve(R,forwardsolve(t(R),Xty[piv]))
startji[!is.finite(startji)] <- 0
start[jj[[1]]] <- startji
eta1 <- Xbd(x\$Xd,start,k=x\$kd,ks=x\$ks,ts=x\$ts,dt=x\$dt,v=x\$v,qc=x\$qc,drop=x\$drop,lt=x\$lpid[[1]])
if (!is.null(offset[[2]])) lres1 <- lres1 - offset[[2]]
Xty <- XWyd(x\$Xd,rep(1,length(y)),lres1,x\$kd,x\$ks,x\$ts,x\$dt,x\$v,x\$qc,x\$drop,lt=x\$lpid[[2]])
piv <- attr(R,"pivot")
rrank <- attr(R,"rank")
startji <- rep(0,ncol(R))
if (rrank<ncol(R)) {
R <- R[1:rrank,1:rrank]
piv <- piv[1:rrank]
}
startji[piv] <- backsolve(R,forwardsolve(t(R),Xty[piv]))
start[jj[[2]]] <- startji
} else { ## regular case
start <- rep(0,ncol(x))
x1 <- x[,jj[[1]],drop=FALSE]
e1 <- E[,jj[[1]],drop=FALSE] ## square root of total penalty
#ne1 <- norm(e1); if (ne1==0) ne1 <- 1
if (use.unscaled) {
qrx <- qr(rbind(x1,e1))
x1 <- rbind(x1,e1)
startji <- qr.coef(qr(x1),c(yt1,rep(0,nrow(E))))
startji[!is.finite(startji)] <- 0
} else startji <- pen.reg(x1,e1,yt1)
start[jj[[1]]] <- startji
if (!is.null(offset[[2]])) lres1 <- lres1 - offset[[2]]
x1 <-  x[,jj[[2]],drop=FALSE];e1 <- E[,jj[[2]],drop=FALSE]
#ne1 <- norm(e1); if (ne1==0) ne1 <- 1
if (use.unscaled) {
x1 <- rbind(x1,e1)
startji <- qr.coef(qr(x1),c(lres1,rep(0,nrow(E))))
startji[!is.finite(startji)] <- 0
} else startji <- pen.reg(x1,e1,lres1)
start[jj[[2]]] <- startji
}
}
}) ## initialize gaulss

rd <- function(mu,wt,scale) {
## simulate responses
return( rnorm(nrow(mu), mu[ , 1], sqrt(scale/wt)/mu[ , 2]) )
} ## gaulss rd

tri = trind.generator(2), ## symmetric indices for accessing derivative arrays
initialize=initialize,postproc=postproc,residuals=residuals,
linfo = stats,rd=rd, ## link information list
ls=1, ## signals that ls not needed here
available.derivs = 2, ## can use full Newton here
discrete.ok = TRUE
),class = c("general.family","extended.family","family"))
} ## end gaulss

multinom <- function(K=1) {
## general family for multinomial logistic regression model...
## accepts no links as parameterization directly in terms of
## linear predictor.
## 1. get derivatives wrt mu, tau
## 2. get required link derivatives and tri indices.
## 3. transform derivs to derivs wrt eta (gamlss.etamu).
## 4. get the grad and Hessian etc for the model
##    via a call to gamlss.gH
## the first derivatives of the log likelihood w.r.t
## the first and second parameters...

if (K<1) stop("number of categories must be at least 2")
stats <- list()

for (i in 1:K) {
mu.eta=stats[[i]]\$mu.eta),
class="family")
}

residuals <- function(object,type=c("deviance")) {
## Deviance residuals where sign depends on whether classification correct (+ve)
## or not (-ve)...
type <- match.arg(type)
## get category probabilities...
p <- object\$family\$predict(object\$family,eta=object\$fitted.values)[[1]] ## changed from linear predictor for consistency
## now get most probable category for each observation
pc <- apply(p,1,function(x) which(max(x)==x)[1])-1
n <- length(pc)
## +ve sign if class correct, -ve otherwise
sgn <- rep(-1,n); sgn[pc==object\$y] <- 1
## now get the deviance...
sgn*sqrt(-2*log(pmax(.Machine\$double.eps,p[1:n + object\$y*n])))
} ## multinom residuals

predict <- function(family,se=FALSE,eta=NULL,y=NULL,X=NULL,
beta=NULL,off=NULL,Vb=NULL) {
## optional function to give predicted values - idea is that
## predict.gam(...,type="response") will use this, and that
## either eta will be provided, or {X, beta, off, Vb}. family\$data
## contains any family specific extra information.
## if se = FALSE returns one item list containing matrix otherwise
## list of two matrices "fit" and "se.fit"...

if (is.null(eta)) {
discrete <- is.list(X)
lpi <- attr(X,"lpi")
if (is.null(lpi)) {
lpi <- list(1:ncol(X))
}
K <- length(lpi) ## number of linear predictors
nobs <- if (discrete) nrow(X\$kd) else nrow(X)
eta <- matrix(0,nobs,K)
if (se) {
ve <- matrix(0,nobs,K) ## variance of eta
ce <- matrix(0,nobs,K*(K-1)/2) ## covariance of eta_i eta_j
}
ii <- 0
for (i in 1:K) {
if (discrete) {
eta[,i] <- Xbd(X\$Xd,beta,k=X\$kd,ks=X\$ks,ts=X\$ts,dt=X\$dt,v=X\$v,qc=X\$qc,drop=X\$drop,lt=X\$lpid[[i]])
} else {
Xi <- X[,lpi[[i]],drop=FALSE]
eta[,i] <- Xi%*%beta[lpi[[i]]] ## ith linear predictor
}
if (!is.null(off[[i]])) eta[,i] <- eta[,i] + off[[i]]
if (se) { ## variance and covariances for kth l.p.

lt=X\$lpid[[i]],rt=X\$lpid[[i]]) else drop(pmax(0,rowSums((Xi%*%Vb[lpi[[i]],lpi[[i]]])*Xi)))
## ii <- 0 BUGGY location!
if (i<K) for (j in (i+1):K) {
ii <- ii + 1
lt=X\$lpid[[i]],rt=X\$lpid[[j]]) else drop(pmax(0,rowSums((Xi%*%Vb[lpi[[i]],lpi[[j]]])*X[,lpi[[j]]])))
}
}
}
} else {
se <- FALSE
}
gamma <- cbind(1,exp(eta))
beta <- rowSums(gamma)
gamma <- gamma/beta ## category probabilities
vp <- gamma*0
if (se) { ## need to loop to find se of probabilities...
for (j in 1:(K+1)) {
## get dp_j/deta_k...
if (j==1) dp <- -gamma[,-1,drop=FALSE]/beta else {
dp <- -gamma[,j]*gamma[,-1,drop=FALSE]
dp[,j-1] <- gamma[,j]*(1-gamma[,j])
}
## now compute variance...
vp[,j] <- rowSums(dp^2*ve)
ii <- 0
for (i in 1:K) if (i<K) for (k in (i+1):K) {
ii <- ii + 1
vp[,j] <- vp[,j] + 2 * dp[,i]*dp[,k]*ce[,ii]
}
vp[,j] <- sqrt(pmax(0,vp[,j])) ## transform to se
}
return(list(fit=gamma,se.fit=vp))
} ## if se
list(fit=gamma)
} ## multinom predict

postproc <- expression({
## code to evaluate in estimate.gam, to evaluate null deviance
multinom <- list()
object\$y <- round(object\$y)
multinom\$nj <- tabulate(object\$y+1) ## count each class membership
multinom\$n <- sum(multinom\$nj) ## total number
multinom\$K <- length(multinom\$nj)-1 ## number of linear predictors
## compute exp(eta) for categories 1..K
multinom\$gamma <- c(1,solve(diag(multinom\$n/multinom\$nj[-1],multinom\$K)-
matrix(1,multinom\$K,multinom\$K),rep(1,multinom\$K)))
multinom\$gamma <- log(multinom\$gamma/sum(multinom\$gamma))
object\$null.deviance <- -2*sum(multinom\$gamma[object\$y+1])
})

ncv <- function(X,y,wt,nei,beta,family,llf,H=NULL,Hi=NULL,R=NULL,offset=NULL,dH=NULL,db=NULL,deriv=FALSE,nt=1) {
gamlss.ncv(X,y,wt,nei,beta,family,llf,H=H,Hi=Hi,R=R,offset=offset,dH=dH,db=db,deriv=deriv,nt=nt)
} ## ncv

ll <- function(y,X,coef,wt,family,offset=NULL,deriv=0,d1b=0,d2b=0,Hp=NULL,rank=0,fh=NULL,D=NULL,eta=NULL,ncv=FALSE,sandwich=FALSE) {
## Function defining the logistic multimomial model log lik.
## Assumption is that coding runs from 0..K, with 0 class having no l.p.
## ... this matches binary log reg case...
## deriv: 0 - eval
##        1 - grad and Hess
##        2 - diagonal of first deriv of Hess
##        3 - first deriv of Hess
##        4 - everything.
n <- length(y)
jj <- attr(X,"lpi") ## extract linear predictor index
if (is.null(eta)) {
discrete <- is.list(X)
##return.l <- FALSE
K <- length(jj) ## number of linear predictors
eta <- matrix(1,n,K+1) ## linear predictor matrix (dummy 1's in first column)
if (is.null(offset)) offset <- list()
offset[[K+1]] <- 0
for (i in 1:K) if (is.null(offset[[i]])) offset[[i]] <- 0
for (i in 1:K) eta[,i+1] <- offset[[i]] + if (discrete) Xbd(X\$Xd,coef,k=X\$kd,ks=X\$ks,ts=X\$ts,dt=X\$dt,v=X\$v,qc=X\$qc,
drop=X\$drop,lt=X\$lpid[[i]]) else X[,jj[[i]],drop=FALSE]%*%coef[jj[[i]]]
} else { l2 <- 0;K <- ncol(eta);eta <- cbind(1,eta)} ##; return.l <- TRUE}

if (K!=family\$nlp) stop("number of linear predictors doesn't match")
y <- round(y) ## just in case
if (min(y)<0||max(y)>K) stop("response not in 0 to number of predictors + 1")

ee <- exp(eta[,-1,drop=FALSE])
beta <- 1 + rowSums(ee); alpha <- log(beta)

l0 <- eta[1:n+y*n] - alpha ## log likelihood
l <- sum(l0)

l1 <- matrix(0,n,K) ## first deriv matrix

if (deriv>0) {
for (i in 1:K) l1[,i] <- ee[,i]/beta ## alpha1

## the second derivatives...

l2 <- matrix(0,n,K*(K+1)/2)
ii <- 0; b2 <- beta^2
for (i in 1:K) for (j in i:K) {
ii <- ii + 1 ## column index
l2[,ii] <- if (i==j) -l1[,i] + ee[,i]^2/b2 else (ee[,i]*ee[,j])/b2
}

## finish first derivatives...
for (i in 1:K) l1[,i] <- as.numeric(y==i) - l1[,i]

} ## if (deriv>0)

l3 <- l4 <- 0 ## defaults

if (deriv>1) { ## the third derivatives...
l3 <- matrix(0,n,(K*(K+3)+2)*K/6)
ii <- 0; b3 <- b2 * beta
for (i in 1:K) for (j in i:K) for (k in j:K) {
ii <- ii + 1 ## column index
if (i==j&&j==k) { ## all same
l3[,ii] <- l2[,tri\$i2[i,i]] + 2*ee[,i]^2/b2 - 2*ee[,i]^3/b3
} else if (i!=j&&j!=k&i!=k) { ## all different
l3[,ii] <- -2*(ee[,i]*ee[,j]*ee[,k])/b3
} else { ## two same one different
kk <- if (i==j) k else j ## get indices for differing pair
l3[,ii] <- l2[,tri\$i2[i,kk]] - 2*(ee[,i]*ee[,j]*ee[,k])/b3
}
}
} ## if (deriv>1)

if (deriv>3) { ## the fourth derivatives...
l4 <- matrix(0,n,(6+K*11+K^2*6+K^3)*K/24)
ii <- 0; b4 <- b3 * beta
for (i in 1:K) for (j in i:K) for (k in j:K) for (l in k:K) {
ii <- ii + 1 ## column index
uni <- unique(c(i,j,k,l));
nun <- length(uni) ## number of unique indices
if (nun==1) { ## all equal
l4[,ii] <- l3[,tri\$i3[i,i,i]] + 4*ee[,i]^2/b2 - 10*ee[,i]^3/b3 + 6*ee[,i]^4/b4
} else if (nun==4) { ## all unequal
l4[,ii] <- 6*ee[,i]*ee[,j]*ee[,k]*ee[,l]/b4
} else if (nun==3) { ## 2 same 2 different
l4[,ii] <- l3[,tri\$i3[uni[1],uni[2],uni[3]]]  +6*ee[,i]*ee[,j]*ee[,k]*ee[,l]/b4
} else if (sum(uni[1]==c(i,j,k,l))==2) { ## 2 unique (2 of each)
l4[,ii] <- l3[,tri\$i3[uni[1],uni[2],uni[2]]] - 2 * ee[,uni[1]]^2*ee[,uni[2]]/b3 +
6*ee[,i]*ee[,j]*ee[,k]*ee[,l]/b4
} else { ## 3 of one 1 of the other
if (sum(uni[1]==c(i,j,k,l))==1) uni <- uni[2:1] ## first index is triple repeat index
l4[,ii] <- l3[,tri\$i3[uni[1],uni[1],uni[2]]] - 4 * ee[,uni[1]]^2*ee[,uni[2]]/b3 +
6*ee[,i]*ee[,j]*ee[,k]*ee[,l]/b4
}
}
} ## if deriv>3

##if (return.l) return(list(l=l0,l1=l1,l2=l2,l3=l3,l4=l4)) ## for testing...

if (deriv) {
## get the gradient and Hessian...
ret <- gamlss.gH(X,jj,l1,l2,tri\$i2,l3=l3,i3=tri\$i3,l4=l4,i4=tri\$i4,
d1b=d1b,d2b=d2b,deriv=deriv-1,fh=fh,D=D,sandwich=sandwich)
if (ncv) { ret\$l1=l1; ret\$l2=l2; ret\$l3=l3 }
} else ret <- list()
ret\$l <- l; ret
} ## end ll multinom

sandwich <- function(y,X,coef,wt,family,offset=NULL) {
## compute filling for sandwich estimate of cov matrix
ll(y,X,coef,wt,family,offset=NULL,deriv=1,sandwich=TRUE)\$lbb
}

rd <- function(mu,wt,scale) {
## simulate data given fitted linear predictor matrix in mu
p <- exp(cbind(0,mu))
p <- p/rowSums(p)
cp <- t(apply(p,1,cumsum))
apply(cp,1,function(x) min(which(x>runif(1))))-1
} ## rd

initialize <- expression({
## Binarize each category and lm on 6*y-3 by category.

n <- rep(1, nobs)
## should E be used unscaled or not?..
use.unscaled <- if (!is.null(attr(E,"use.unscaled"))) TRUE else FALSE
if (is.null(start)) {
jj <- attr(x,"lpi")
if (is.list(x)) { ## discrete case
start <- rep(0,max(unlist(jj)))
for (k in 1:length(jj)) { ## loop over the linear predictors
yt1 <- 6*as.numeric(y==k)-3
R <- suppressWarnings(chol(XWXd(x\$Xd,w=rep(1,length(y)),k=x\$kd,ks=x\$ks,ts=x\$ts,dt=x\$dt,
pivot=TRUE))
Xty <- XWyd(x\$Xd,rep(1,length(y)),yt1,x\$kd,x\$ks,x\$ts,x\$dt,x\$v,x\$qc,x\$drop,lt=x\$lpid[[k]])
piv <- attr(R,"pivot")
rrank <- attr(R,"rank")
startji <- rep(0,ncol(R))
if (rrank<ncol(R)) {
R <- R[1:rrank,1:rrank]
piv <- piv[1:rrank]
}
startji[piv] <- backsolve(R,forwardsolve(t(R),Xty[piv]))
startji[!is.finite(startji)] <- 0
start[jj[[k]]] <- startji

} ## lp loop
} else { ## regular case
start <- rep(0,ncol(x))
for (k in 1:length(jj)) { ## loop over the linear predictors
yt1 <- 6*as.numeric(y==k)-3
x1 <- x[,jj[[k]],drop=FALSE]
e1 <- E[,jj[[k]],drop=FALSE] ## square root of total penalty
if (use.unscaled) {
qrx <- qr(rbind(x1,e1))
x1 <- rbind(x1,e1)
startji <- qr.coef(qr(x1),c(yt1,rep(0,nrow(E))))
startji[!is.finite(startji)] <- 0
} else startji <- pen.reg(x1,e1,yt1)
start[jj[[k]]] <- startji ## copy coefficients back into overall start coef vector
} ## lp loop
}
}
}) ## initialize multinom

nlp=round(K),rd=rd,ncv=ncv,sandwich=sandwich,
tri = trind.generator(K), ## symmetric indices for accessing derivative arrays
initialize=initialize,postproc=postproc,residuals=residuals,predict=predict,
linfo = stats, ## link information list
ls=1, ## signals that ls not needed here
available.derivs = 2, ## can use full Newton here
discrete.ok = TRUE
),class = c("general.family","extended.family","family"))
} ## end multinom

pen.reg <- function(x,e,y) {
## get coefficients of penalized regression of y on matrix x
## where e is a square root penalty. Idea is to use e mainly for
## regularization, so that edf is close to rank of x.
if (sum(abs(e))==0) { ## no penalization - easy
b <- qr.coef(qr(x),y);b[!is.finite(b)] <- 0
return(b)
}
## need to adjust degree of penalization, so best to QR
## the x matrix up front...
qrx <- qr(x,LAPACK=TRUE)
R <- qr.R(qrx)
r <- ncol(R)
rr <- Rrank(R) ## rank of R/X
R[,qrx\$pivot] <- R ## unpivot
Qy <- qr.qty(qrx,y)[1:ncol(R)]
## now we want estimates with penalty weight low enough
## EDF is k * rr where k is somewhere in e.g. (.7,.9)
k <- .01 * norm(R)/norm(e)
qrr <- qr(rbind(R,e*k));
edf <- sum(qr.Q(qrr)[1:r,]^2)
## compute rank of e less rank of the space penalized by e not in
## range space of x, this is how much penalty can in principle change
## edf by. Needed for corner cases where e.g. penalty is imposed for
## identifiability reasons and then only penalizes null space of x...
re <- min(sum(colSums(abs(e))!=0),nrow(e)) - Rrank(qr.R(qrr)) + rr
while (edf > rr-.1*re) { ## increase penalization
k <- k*10
qrr <- qr(rbind(R,e*k));
edf <- sum(qr.Q(qrr)[1:r,]^2)
}
while (edf<.7*rr) { ## reduce penalization
k <- k/20
qrr <- qr(rbind(R,e*k));
edf <- sum(qr.Q(qrr)[1:r,]^2)
}
b <- qr.coef(qrr,c(Qy,rep(0,nrow(e))));b[!is.finite(b)] <- 0
b
} ## pen.reg

## code for zero inflated Poisson models

#log1ex <- function(x) {
## evaluate log(1+exp(x)) accurately and avoiding overflow
#  y <- x
#  big <- -log(.Machine\$double.eps)+5 ## exp(big) overwhelms 1
#  ind <- x > big
#  y[ind] <- x[ind] ## log(1+exp(x)) = x to machine precision
#  ## compute number below which log(1+exp(x)) = exp(x) to
#  ## machine precision...
#  small <- log(sqrt(.Machine\$double.eps))
#  ind1 <- x < small
#  y[ind1] <- exp(x[ind1])
#  ind <- !ind&!ind1 ## the moderate size elements
#  y[ind] <- log(1+exp(x[ind]))
#  y
#}

#logist <- function(x) {
## overflow proof logistic
#  ind <- x > 0; y <- x
#  y[ind] <- 1/(exp(-x[ind])+1)
#  ex <- exp(x[!ind])
#  y[!ind] <- ex/(1+ex)
#  y
#}

l1ee <- function(x) {
## log(1-exp(-exp(x)))...
ind <- x < log(.Machine\$double.eps)/3
ex <- exp(x);exi <- ex[ind]
l <- log(1-exp(-ex))
l[ind] <- log(exi-exi^2/2+exi^3/6)
ind <- x < -log(.Machine\$double.xmax)
l[ind] <- x[ind]
l
}

lee1 <- function(x) {
## log(exp(exp(x))-1)...
ind <- x < log(.Machine\$double.eps)/3
ex <- exp(x);exi <- ex[ind]
l <- log(exp(ex)-1)
l[ind] <- log(exi+exi^2/2+exi^3/6)
ind <- x < -log(.Machine\$double.xmax)
l[ind] <- x[ind]
ind <- x > log(log(.Machine\$double.xmax))
l[ind] <- ex[ind]
l
}

ldg <- function(g,deriv=4) {
alpha <- function(g) {
ind <- g > log(.Machine\$double.eps)/3
eg <- exp(g)
g[ind] <- eg[ind]/(1-exp(-eg[ind]))
g[!ind] <- 1+eg[!ind]/2 + eg[!ind]^2/12
g
}
ind <- g < log(.Machine\$double.eps)/3
ghi <- log(log(.Machine\$double.xmax)) + 1
## ... above ghi alpha(g) is simply exp(g)
ii <- g>ghi
a <- alpha(g)
eg <- exp(g)
l2 <- a*(a-eg-1)
egi <- eg[ind]
## in the lower tail alpha = 1 + b, where b = eg/2 + eg^2/12
## so l'' = alpha*(b-eg)...
b <- egi*(1+egi/6)/2
l2[ind] <- a[ind]*(b-egi)
l2[ii] <- -exp(g[ii])
l3 <- l4 <- NULL
## in a similar vein l3 can be robustified...
if (deriv>1) {
l3 <- a*(a*(-2*a + 3*(eg+1)) - 3*eg - eg^2 - 1)
l3[ind] <- a[ind]*(-b-2*b^2+3*b*egi-egi^2)
l3[ii] <- -exp(g[ii])
}
## finally l4, which requires a similar approach...
if (deriv>2) {
l4 <- a*(6*a^3 - 12*(eg+1)*a^2+4*eg*a+7*(eg+1)^2*a-(4+3*eg)*eg -(eg+1)^3)
l4[ind] <- a[ind]*(6*b*(3+3*b+b^2) - 12*egi*(1+2*b+b^2) - 12*b*(2-b) + 4*egi*(1+b)+
7*(egi^2+2*egi+b*egi^2+2*b*egi+b)-(4+3*egi)*egi-egi*(3+3*egi+egi^2))

l4[ii] <- -exp(g[ii])
}
l1=-a
ghi <- log(.Machine\$double.xmax)/5
ii <- g > ghi
if (sum(ii)) {
l1[ii] <- l2[ii] <- l3[ii] <- l4[ii] <- -exp(ghi)
}
list(l1=l1,l2=l2,l3=l3,l4=l4)
} ## ldg

lde <- function(eta,deriv=4) {
## llog lik derivs w.r.t. eta
ind <- eta < log(.Machine\$double.eps)/3
ii <- eta > log(.Machine\$double.xmax)
l1 <- et <- exp(eta);eti <- et[ind]
l1[!ind] <- et[!ind]/(exp(et[!ind])-1)
b <- -eti*(1+eti/6)/2
l1[ind] <- 1+b
l1[ii] <- 0
## l2 ...
l2 <- l1*((1-et)-l1)
l2[ind] <- -b*(1+eti+b) - eti
l2[ii] <- 0
l3 <- l4 <- NULL
## l3 ...
if (deriv>1) {
ii <- eta > log(.Machine\$double.xmax)/2
l3 <- l1*((1-et)^2-et - 3*(1-et)*l1 + 2*l1^2)
l3[ind] <- l1[ind]*(-3*eti+eti^2 -3*(-eti+b-eti*b) + 2*b*(2+b))
l3[ii] <- 0
}
## l4 ...
if (deriv>2) {
ii <- eta > log(.Machine\$double.xmax)/3
l4 <- l1*((3*et-4)*et + 4*et*l1 + (1-et)^3 - 7*(1-et)^2*l1 + 12*(1-et)*l1^2
- 6*l1^3)
l4[ii] <- 0
l4[ind] <- l1[ind]*(4*l1[ind]*eti - eti^3 - b -7*b*eti^2 - eti^2 - 5*eti -
10*b*eti - 12*eti*b^2 - 6*b^2 - 6*b^3)
}
list(l1=l1,l2=l2,l3=l3,l4=l4)
} ## lde

zipll <- function(y,g,eta,deriv=0) {
## function to evaluate zero inflated Poisson log likelihood
## and its derivatives w.r.t. g/gamma and eta where
## 1-p = exp(-exp(eta)) and lambda = exp(gamma), for each datum in vector y.
## p is probability of potential presence. lambda is Poisson mean
## given potential presence.
## deriv: 0 - eval
##        1 - grad (l,p) and Hess (ll,lp,pp)
##        2 - third derivs lll,llp,lpp,ppp
##        4 - 4th derivs. llll,lllp,llpp,lppp,pppp

l1 <- El2 <- l2 <- l3 <- l4 <- NULL
zind <- y == 0 ## the index of the zeroes
## yz <- y[zind];
yp <- y[!zind]
l <- et <- exp(eta)
l[zind] <- -et[zind] # -exp(eta[ind])
l[!zind] <- l1ee(eta[!zind]) + yp*g[!zind] - lee1(g[!zind]) - lgamma(yp+1)
p <- 1-exp(-et) ## probablity of non-zero

if (deriv>0) { ## get first and second derivs...
n <- length(y)
l1 <- matrix(0,n,2)
le <- lde(eta,deriv) ## derivs of ll wrt eta
lg <- ldg(g,deriv) ## derivs of ll wrt gamma
l1[!zind,1] <- yp + lg\$l1[!zind]  ## l_gamma, y>0
l1[zind,2] <- l[zind] ## l_eta, y==0
l1[!zind,2] <- le\$l1[!zind]  ## l_eta, y>0

El2 <- l2 <- matrix(0,n,3)
## order gg, ge, ee...
l2[!zind,1] <- lg\$l2[!zind]   ## l_gg, y>0
l2[!zind,3] <- le\$l2[!zind]   ## l_ee, y>0
l2[zind,3]  <- l[zind] ## l_ee, y=0
El2[,1] <- p*lg\$l2             ## E(l_gg)
El2[,3] <- -(1-p)*et + p*le\$l2 ## E(l_ee)
}
if (deriv>1) {
## the third derivatives
## order ggg,gge,gee,eee
l3 <- matrix(0,n,4)
l3[!zind,1] <- lg\$l3[!zind]   ## l_ggg, y>0
l3[!zind,4] <- le\$l3[!zind]   ## l_eee, y>0
l3[zind,4]  <- l[zind]        ## l_eee, y=0
}
if (deriv>3) {
## the fourth derivatives
## order gggg,ggge,ggee,geee,eeee
l4 <- matrix(0,n,5)
l4[!zind,1] <- lg\$l4[!zind]   ## l_gggg, y>0
l4[!zind,5] <- le\$l4[!zind]   ## l_eeee, y>0
l4[zind,5]  <- l[zind]        ## l_eeee, y=0
}
list(l=l,l1=l1,l2=l2,l3=l3,l4=l4,El2=El2)
} ## zipll

## Extended family for Zero Inflated Poisson fitted as gamlss
## type model.
## mu1 is Poisson mean, while mu2 is zero inflation parameter.
## first deal with links and their derivatives...
stats <- list()
param.names <- c("Poisson mean","binary probability")
for (i in 1:2) {
mu.eta=stats[[i]]\$mu.eta),
class="family")
}

residuals <- function(object,type=c("deviance","response")) {
ls <- function(y) {
## compute saturated likelihood for ziplss model
l <- y;l[y<2] <- 0
ind <- y > 1 & y < 18
## lambda maximizing likelihood for y = 2 to 17
glo <- c(1.593624,2.821439,3.920690,4.965114,5.984901,6.993576,
7.997309,8.998888,9.999546,10.999816,11.999926,12.999971,
13.999988,14.999995,15.999998,16.999999)
g <- y ## maximizing lambda essentially y above this
g[ind] <- glo[y[ind]-1]
ind <- y > 1
l[ind] <- zipll(y[ind],log(g[ind]),g[ind]*0+1e10,deriv=0)\$l
l
} ## ls

type <- match.arg(type)
p <- exp(-exp(object\$fitted[,2]));
lam <- exp(object\$fitted[,1])
ind <- lam > .Machine\$double.eps^.5
## compute E(y)
Ey <- p ## very small lambda causes conditional expectation to be 1
Ey[ind] <- p[ind]*lam[ind]/(1-exp(-lam[ind]))
rsd <- object\$y - Ey ## raw residuals
if (type=="response") return(rsd)
else { ## compute deviance residuals
sgn <- sign(rsd)
ind <- object\$y == 0
rsd <- pmax(0,2*(ls(object\$y) - zipll(object\$y,object\$fitted[,1],object\$fitted[,2],deriv=0)\$l))
rsd <- sqrt(rsd)*sgn
}
rsd
} ## ziplss residuals

predict <- function(family,se=FALSE,eta=NULL,y=NULL,X=NULL,
beta=NULL,off=NULL,Vb=NULL) {
## optional function to give predicted values - idea is that
## predict.gam(...,type="response") will use this, and that
## either eta will be provided, or {X, beta, off, Vb}. family\$data
## contains any family specific extra information.
## if se = FALSE returns one item list containing matrix otherwise
## list of two matrices "fit" and "se.fit"...

if (is.null(eta)) {
if (is.null(off)) off <- list(0,0)
off[[3]] <- 0
for (i in 1:2) if (is.null(off[[i]])) off[[i]] <- 0
lpi <- attr(X,"lpi")
X1 <- X[,lpi[[1]],drop=FALSE]
X2 <- X[,lpi[[2]],drop=FALSE]
gamma <- drop(X1%*%beta[lpi[[1]]] + off[[1]]) ## linear predictor for poisson parameter
eta <- drop(X2%*%beta[lpi[[2]]] + off[[2]])  ## linear predictor for presence parameter
if (se) {
v.g <- drop(pmax(0,rowSums((X1%*%Vb[lpi[[1]],lpi[[1]]])*X1))) ## var of gamma
v.e <- drop(pmax(0,rowSums((X1%*%Vb[lpi[[1]],lpi[[1]]])*X1))) ## var of eta
v.eg <- drop(pmax(0,rowSums((X1%*%Vb[lpi[[1]],lpi[[2]]])*X2))) ## cov of eta, gamma
}
} else {
se <- FALSE
gamma <- eta[,1]
eta <- eta[,2]
}
et <- exp(eta)
mu <- p <- 1 - exp(-et)
fv <- lambda <- exp(gamma)
ind <- gamma < log(.Machine\$double.eps)/2
mu[!ind] <- lambda[!ind]/(1-exp(-lambda[!ind]))
mu[ind] <- 1
fv <- list(p*mu)    ## E(y)
if (!se) return(fv) else {
df.de <- p
ind <- eta < log(.Machine\$double.xmax)/2
df.de[!ind] <- 0
df.de[ind] <- exp(-et[ind])*et[ind]
df.de <- df.de * mu
df.dg <- ((lambda + 1)*mu - mu^2)*p
fv[[2]] <- sqrt(df.dg^2*v.g + df.de^2*v.e + 2 * df.de * df.dg * v.eg)
names(fv) <- c("fit","se.fit")
return(fv)
}
} ## ziplss predict

rd <- function(mu,wt,scale) {
## simulate data given fitted latent variable in mu
rzip <- function(gamma,eta) { ## generate ziP deviates according to model and lp gamma
y <- gamma; n <- length(y)
lambda <- exp(gamma)
p <- 1- exp(-exp(eta)) ## prob present
ind <- p > runif(n)
y[!ind] <- 0
np <- sum(ind)
## generate from zero truncated Poisson, given presence...
u <- runif(np,dpois(0,lambda[ind]),1)
## qpois can produce infinite answers at low lambda
## if u too close to one, and it can get very close at low
## lambda!
one.eps <- 1 - .Machine\$double.eps^.75
u[u>one.eps] <- one.eps
y[ind] <- qpois(u,lambda[ind])
y
}
rzip(mu[,1],mu[,2])
} ## rd

postproc <- expression({
## code to evaluate in estimate.gam, to evaluate null deviance
## null model really has two parameters... probably need to newton iterate
ls <- function(y) {
## compute saturated likelihood for ziplss model
l <- y;l[y<2] <- 0
ind <- y > 1 & y < 18
## lambda maximizing likelihood for y = 2 to 17
glo <- c(1.593624,2.821439,3.920690,4.965114,5.984901,6.993576,
7.997309,8.998888,9.999546,10.999816,11.999926,12.999971,
13.999988,14.999995,15.999998,16.999999)
g <- y ## maximizing lambda essentially y above this
g[ind] <- glo[y[ind]-1]
ind <- y > 1
l[ind] <- zipll(y[ind],log(g[ind]),g[ind]*0+1e10,deriv=0)\$l
l
} ## ls

fp <- function(p,y) {
## compute zero related part of log likelihood
eps <- .Machine\$double.eps^.5
l1p <- if (p>eps) log(1-p) else -p - p^2/2
l1p*sum(y==0) + log(p)*sum(y>0)
} ## fp

flam <- function(lam,y) {
## compute >0 part of log likelihood
y <- y[y>0]
sum(y*log(lam) - log(exp(lam)-1) - lgamma(y+1))
} ## flam

## optimize zero repated part of likelihood w.r.t. p...
lnull <- optimize(fp,interval=c(1e-60,1-1e-10),y=object\$y,maximum=TRUE)\$objective
## optimize >0 part for lambda...
my <- mean(object\$y[object\$y>0])
lnull <- lnull + optimize(flam,interval=c(my/2,my*2),y=object\$y,maximum=TRUE)\$objective
object\$null.deviance <- 2*(sum(ls(object\$y)) - lnull)

}) ## postproc

ncv <- function(X,y,wt,nei,beta,family,llf,H=NULL,Hi=NULL,R=NULL,offset=NULL,dH=NULL,db=NULL,deriv=FALSE,nt=1) {
gamlss.ncv(X,y,wt,nei,beta,family,llf,H=H,Hi=Hi,R=R,offset=offset,dH=dH,db=db,deriv=deriv,nt=nt)
} ## ncv

ll <- function(y,X,coef,wt,family,offset=NULL,deriv=0,d1b=0,d2b=0,Hp=NULL,rank=0,fh=NULL,D=NULL,eta=NULL,ncv=FALSE,sandwich=FALSE) {
## function defining the gamlss ZIP model log lik.
## First l.p. defines Poisson mean, given presence (lambda)
## Second l.p. defines probability of presence (p)
## deriv: 0 - eval
##        1 - grad and Hess
##        2 - diagonal of first deriv of Hess
##        3 - first deriv of Hess
##        4 - everything.
if (is.null(offset)) offset <- list(0,0) else offset[[3]] <- 0
for (i in 1:2) if (is.null(offset[[i]])) offset[[i]] <- 0
jj <- attr(X,"lpi") ## extract linear predictor index
if (is.null(eta)) {
eta <- X[,jj[[1]],drop=FALSE]%*%coef[jj[[1]]] + offset[[1]]
eta1 <- X[,jj[[2]],drop=FALSE]%*%coef[jj[[2]]] +offset[[2]]
} else { ## eta supplied
eta1 <- eta[,2]
eta <- eta[,1]
}

##n <- length(y)
## l1 <- matrix(0,n,2)
zl <- zipll(y,lambda,p,deriv)

if (deriv>0) {
## need some link derivatives for derivative transform
ig1 <- cbind(family\$linfo[[1]]\$mu.eta(eta),family\$linfo[[2]]\$mu.eta(eta1))
}

## l3 <- l4 <-
g3 <- g4 <- 0 ## defaults

if (deriv>1) {
## the third derivatives
## order lll,llp,lpp,ppp
}

if (deriv>3) {
## the fourth derivatives
## order llll,lllp,llpp,lppp,pppp
}
if (deriv) {
i2 <- family\$tri\$i2; i3 <- family\$tri\$i3
i4 <- family\$tri\$i4

## transform derivates w.r.t. mu to derivatives w.r.t. eta...
de <- gamlss.etamu(zl\$l1,zl\$l2,zl\$l3,zl\$l4,ig1,g2,g3,g4,i2,i3,i4,deriv-1)

## get the gradient and Hessian...
ret <- gamlss.gH(X,jj,de\$l1,de\$l2,i2,l3=de\$l3,i3=i3,l4=de\$l4,i4=i4,
d1b=d1b,d2b=d2b,deriv=deriv-1,fh=fh,D=D,sandwich=sandwich)
if (ncv) {
ret\$l1 <- de\$l1; ret\$l2 = de\$l2; ret\$l3 = de\$l3
}
} else ret <- list()
ret\$l <- sum(zl\$l); ret
} ## end ll for ZIP

sandwich <- function(y,X,coef,wt,family,offset=NULL) {
## compute filling for sandwich estimate of cov matrix
ll(y,X,coef,wt,family,offset=NULL,deriv=1,sandwich=TRUE)\$lbb
}

initialize <- expression({ ## for ZIP
## Idea is to regress binarized y on model matrix for p.
## Then downweight any y=0 with p<0.5 and regress g(y) on
## the model matrix for lambda - don't drop as this may
## induce rank deficiency in model matrix!
## May be called in both gam.fit5 and initial.spg...
## note that appropriate E scaling
## for full calculation may be inappropriate for initialization
## which is basically penalizing something different here.
## best we can do here is to use E only as a regularizer.
n <- rep(1, nobs)
if (all.equal(y,round(y))!=TRUE) {
stop("Non-integer response variables are not allowed with ziplss ")
}
if ((min(y)==0&&max(y)==1)) stop("Using ziplss for binary data makes no sense")
## should E be used unscaled or not?..
use.unscaled <- if (!is.null(attr(E,"use.unscaled"))) TRUE else FALSE
if (is.null(start)) {
jj <- attr(x,"lpi")
start <- rep(0,ncol(x))
x1 <- x[,jj[[2]],drop=FALSE]
e1 <- E[,jj[[2]],drop=FALSE] ## square root of total penalty
yt1 <- as.numeric(as.logical(y)) ## binarized response
if (use.unscaled) {
qrx <- qr(rbind(x1,e1))
x1 <- rbind(x1,e1)
startji <- qr.coef(qr(x1),c(yt1,rep(0,nrow(E))))
startji[!is.finite(startji)] <- 0
} else startji <- pen.reg(x1,e1,yt1)
start[jj[[2]]] <- startji
p <- drop(x1[1:nobs,,drop=FALSE] %*% startji) ## probability of presence
ind <- y==0 & p < 0.5 ## downweight these for estimating lambda
w <- rep(1,nobs); w[ind] <- .1

## note assumption that working scale is log...

yt1 <- yt1*w

x1 <-  w*x[,jj[[1]],drop=FALSE];e1 <- E[,jj[[1]],drop=FALSE]
if (use.unscaled) {
x1 <- rbind(x1,e1)
startji <- qr.coef(qr(x1),c(yt1,rep(0,nrow(E))))
startji[!is.finite(startji)] <- 0
} else startji <- pen.reg(x1,e1,yt1)
start[jj[[1]]] <- startji
}
}) ## initialize ziplss

tri = trind.generator(2), ## symmetric indices for accessing derivative arrays
initialize=initialize,postproc=postproc,residuals=residuals,rd=rd,predict=predict,
linfo = stats, ## link information list
ls=1, ## signals that ls not needed here
available.derivs = 2 ## can use full Newton here
),class = c("general.family","extended.family","family"))
} ## ziplss

## General family for GEV location scale model...
## so mu is mu1, rho = log(sigma) is mu2 and xi is mu3
## 1. get derivatives wrt mu, rho and xi.
## 2. get required link derivatives and tri indices.
## 3. transform derivs to derivs wrt eta (gamlss.etamu).
## 4. get the grad and Hessian etc for the model
##    via a call to gamlss.gH

## first deal with links and their derivatives...
stats <- list()
for (i in 1:3) {
mu.eta=stats[[i]]\$mu.eta),
class="family")
}

if (link[[3]]=="logit") { ## shifted logit link to confine xi to (-1,.5)
## Smith '85 Biometrika shows that -1 limit needed for MLE consistency
## but would need -0.5 for normality...
stats[[3]]\$mu.eta <- function(eta) binomial()\$mu.eta(eta)*1.5
stats[[3]]\$d2link <- function(mu) { mu <- (mu+ 1)/1.5; (1/(1 - mu)^2 - 1/mu^2)/1.5^2}
stats[[3]]\$d3link <- function(mu) { mu <- (mu+ 1)/1.5; (2/(1 - mu)^3 + 2/mu^3)/1.5^3}
stats[[3]]\$d4link <- function(mu) {  mu <- (mu+ 1)/1.5; (6/(1-mu)^4 - 6/mu^4)/1.5^4}
}

residuals <- function(object,type=c("deviance","pearson","response")) {
mu <- object\$fitted[,1]
rho <- object\$fitted[,2]
xi <- object\$fitted[,3]
y <- object\$y
fv <- mu + exp(rho)*(gamma(1-xi)-1)/xi
eps <- 1e-7; xi[xi>=0&xi<eps] <- eps; xi[xi<0&xi>-eps] <- -eps
type <- match.arg(type)
if (type=="deviance") {
rsd <- (xi+1)/xi * log(1+(y-mu)*exp(-rho)*xi) + (1+(y-mu)*exp(-rho)*xi)^(-1/xi) +
(1+xi)*log(1+xi) - (1 + xi) ## saturated part
rsd <- sqrt(pmax(0,rsd))*sign(y-fv)
} else if (type=="pearson") {
sd <- exp(rho)/xi*sqrt(pmax(0,gamma(1-2*xi)-gamma(1-xi)^2))
rsd <- (y-fv)/sd
} else {
rsd <- y-fv
}
rsd
} ## gevlss residuals

postproc <- expression({
## code to evaluate in estimate.gam, to evaluate null deviance
## It's difficult to define a sensible version of this that ensures
## that the data fall in the support of the null model, whilst being
## somehow equivalent to the full fit
object\$null.deviance <- NA

})

ncv <- function(X,y,wt,nei,beta,family,llf,H=NULL,Hi=NULL,R=NULL,offset=NULL,dH=NULL,db=NULL,deriv=FALSE,nt=1) {
gamlss.ncv(X,y,wt,nei,beta,family,llf,H=H,Hi=Hi,R=R,offset=offset,dH=dH,db=db,deriv=deriv,nt=nt)
} ## ncv

ll <- function(y,X,coef,wt,family,offset=NULL,deriv=0,d1b=0,d2b=0,Hp=NULL,rank=0,fh=NULL,D=NULL,eta=NULL,ncv=FALSE,sandwich=FALSE) {
## function defining the gamlss GEV model log lik.
## deriv: 0 - eval
##        1 - grad and Hess
##        2 - diagonal of first deriv of Hess
##        3 - first deriv of Hess
##        4 - everything.
if (!is.null(offset)) offset[[4]] <- 0
discrete <- is.list(X)
jj <- attr(X,"lpi") ## extract linear predictor index
if (is.null(eta)) {
eta <- if (discrete) Xbd(X\$Xd,coef,k=X\$kd,ks=X\$ks,ts=X\$ts,dt=X\$dt,v=X\$v,qc=X\$qc,drop=X\$drop,lt=X\$lpid[[1]]) else X[,jj[[1]],drop=FALSE]%*%coef[jj[[1]]]
if (!is.null(offset[[1]])) eta <- eta + offset[[1]]
etar <- if (discrete) Xbd(X\$Xd,coef,k=X\$kd,ks=X\$ks,ts=X\$ts,dt=X\$dt,v=X\$v,qc=X\$qc,drop=X\$drop,lt=X\$lpid[[2]]) else X[,jj[[2]],drop=FALSE]%*%coef[jj[[2]]] ## log sigma
if (!is.null(offset[[2]])) etar <- etar + offset[[2]]
etax <- if (discrete) Xbd(X\$Xd,coef,k=X\$kd,ks=X\$ks,ts=X\$ts,dt=X\$dt,v=X\$v,qc=X\$qc,drop=X\$drop,lt=X\$lpid[[3]]) else X[,jj[[3]],drop=FALSE]%*%coef[jj[[3]]] ## shape parameter
if (!is.null(offset[[3]])) etax <- etax + offset[[3]]
} else { ## eta supplied
etar <- eta[,2]
etax <- eta[,3]
eta <- eta[,1]
}
rho <- family\$linfo[[2]]\$linkinv(etar) ## log sigma
xi <- family\$linfo[[3]]\$linkinv(etax) ## shape parameter

## Avoid xi == 0 - using a separate branch for xi==0 requires
## seperate treatment of derivative w.r.t. xi, and statistically
## brings us nothing.
eps <- 1e-7

xi[xi>=0&xi<eps] <- eps
xi[xi<0&xi>-eps] <- -eps

n <- length(y)
l1 <- matrix(0,n,3)

## note that the derivative code is largely auto-generated, and
## auto-simplified. Optimized Maxima derivs exported as Maxima
## code, translated to R code in R, then processed in R to
## remove redundant auxiliary variables and their definitions.
## Modifications of auto code (but not the consequent substitutions) are
## flagged '## added'. Code post auto and non-auto modification has
## been tested against raw translated code.

exp1 <- exp(1); ## facilitates lazy auto-translation
ymu <- y - mu
ind <- which(aa0 <= -1) ## added
if (FALSE&&length(ind)>0) { ## all added
xii <- xi[ind] ## this idea is really not a good one - messes up derivatives when triggered
erho <- exp1^rho[ind]
eps1 <- 1-.Machine\$double.eps^.25
ymu[ind] <- -erho/xii*eps1
aa0[ind] <- -eps1
}
aa1 <- aa0 + 1 # (xi*(y-mu))/exp1^rho+1;
aa2 <- 1/xi;
l0  <- (-aa2*(1+xi)*log.aa1)-1/aa1^aa2-rho;
l <- sum(l0)

if (deriv>0) {
## first derivatives m, r, x...
bb1 <- 1/exp1^rho;
bb2 <- bb1*xi*ymu+1;

l1[,1]  <-  (bb1*(xi+1))/bb2-bb1*bb2^((-1/xi)-1);
cc2 <- ymu;

cc3 <- cc0 + 1 ##bb1*xi*cc2+1;
l1[,2]  <-  (-bb1*cc2*cc3^((-1/xi)-1))+(bb1*(xi+1)*cc2)/cc3-1;
dd3 <- xi+1;
dd6 <- 1/cc3;
dd7 <- log.cc3;
dd8 <- 1/xi^2;
l1[,3]  <-  (-(dd8*dd7-bb1*aa2*cc2*dd6)/cc3^aa2)+dd8*dd3*dd7-
aa2*dd7-bb1*aa2*dd3*cc2*dd6;

## the second derivatives mm mr mx rr rx xx

l2 <- matrix(0,n,6)
ee1 <- 1/exp1^(2*rho);
ee3 <- -1/xi;
l2[,1]  <-  ee1*(ee3-1)*xi*aa1^(ee3-2)+(ee1*xi*(xi+1))/aa1^2;
ff7 <- ee3-1;
l2[,2]  <-  bb1*cc3^ff7+ee1*ff7*xi*cc2*cc3^(ee3-2)-(bb1*dd3)/cc3+
(ee1*xi*dd3*cc2)/cc3^2;
gg7 <- -aa2;
l2[,3]  <-  (-bb1*cc3^(gg7-1)*(log.cc3/xi^2-bb1*aa2*cc2*dd6))+
ee1*cc2*cc3^(gg7-2)+bb1*dd6-(ee1*(xi+1)*cc2)/cc3^2;
hh4 <- cc2^2;
l2[,4]  <-  bb1*cc2*cc3^ff7+ee1*ff7*xi*hh4*cc3^(ee3-2)-
(bb1*dd3*cc2)/cc3+(ee1*xi*dd3*hh4)/cc3^2;
l2[,5]  <-  (-bb1*cc2*cc3^(gg7-1)*(log.cc3/xi^2-bb1*aa2*cc2*dd6))+
ee1*hh4*cc3^(gg7-2)+bb1*cc2*dd6-(ee1*(xi+1)*hh4)/cc3^2;
jj08 <- 1/cc3^2;
jj12 <- 1/xi^3;
jj13 <- 1/cc3^aa2;
l2[,6]  <-  (-jj13*(dd8*dd7-bb1*aa2*cc2*dd6)^2)-jj13*(ee1*aa2*hh4*jj08+
2*bb1*dd8*cc2*dd6-2*jj12*dd7)-2*jj12*dd3*dd7+2*dd8*dd7+
2*bb1*dd8*dd3*cc2*dd6-2*bb1*aa2*cc2*dd6+ee1*aa2*dd3*hh4*jj08;

## need some link derivatives for derivative transform
ig1 <- cbind(family\$linfo[[1]]\$mu.eta(eta),family\$linfo[[2]]\$mu.eta(etar),
family\$linfo[[3]]\$mu.eta(etax))
}

l3 <- l4 <- g3 <- g4 <- 0 ## defaults

if (deriv>1) {
## the third derivatives
## order mmm mmr mmx mrr mrx mxx rrr rrx rxx xxx
l3 <- matrix(0,n,10)
kk1 <- 1/exp1^(3*rho);
kk2 <- xi^2;
l3[,1]  <-  (2*kk1*kk2*(xi+1))/aa1^3-kk1*(ee3-2)*(ee3-1)*kk2*aa1^(ee3-3);
ll5 <- (xi*cc2)/exp1^rho+1;
ll8 <- ee3-2;
l3[,2]  <-  (-2*ee1*ff7*xi*ll5^ll8)-kk1*ll8*ff7*kk2*cc2*ll5^(ee3-3)-
(2*ee1*xi*dd3)/ll5^2+(2*kk1*kk2*dd3*cc2)/ll5^3;
mm10 <- cc3^(gg7-3);
mm11 <- gg7-2;
mm12 <- cc3^mm11;
l3[,3]  <-  ee1*(gg7-1)*xi*mm12*(log.cc3/xi^2-(bb1*aa2*cc2)/cc3)-ee1*mm12-
kk1*mm11*xi*cc2*mm10+kk1*cc2*mm10+ee1*dd3*jj08+ee1*xi*jj08-
(2*kk1*xi*dd3*cc2)/cc3^3;
l3[,4]  <-  (-bb1*cc3^ff7)-3*ee1*ff7*xi*cc2*cc3^ll8-kk1*ll8*ff7*kk2*hh4*
cc3^(ee3-3)+(bb1*dd3)/cc3-(3*ee1*xi*dd3*cc2)/cc3^2+
(2*kk1*kk2*dd3*hh4)/cc3^3;
oo10 <- gg7-1;
oo13 <- log.cc3/xi^2;
l3[,5]  <-  bb1*cc3^oo10*(bb1*oo10*cc2*dd6+oo13)+ee1*oo10*xi*cc2*mm12*
(bb1*mm11*cc2*dd6+oo13)+ee1*aa2*cc2*mm12+ee1*oo10*cc2*mm12-
bb1*dd6+2*ee1*dd3*cc2*jj08+ee1*xi*cc2*jj08-
(2*xi*dd3*cc2^2)/(exp1^(3*rho)*cc3^3);
pp07 <- (-1/xi)-1;
pp08 <- cc3^pp07;
l3[,6]  <-  (-bb1*pp08*(bb1*pp07*cc2*dd6+dd8*dd7)^2)-bb1*pp08*
((-ee1*pp07*hh4*jj08)+2*bb1*dd8*cc2*dd6-(2*dd7)/xi^3)-
2*ee1*cc2*jj08+(2*(xi+1)*hh4)/(exp1^(3*rho)*cc3^3);
qq05 <- cc2^3;
l3[,7]  <-  (-bb1*cc2*cc3^ff7)-3*ee1*ff7*xi*hh4*cc3^ll8-
kk1*ll8*ff7*kk2*qq05*cc3^(ee3-3)+(bb1*dd3*cc2)/cc3-
(3*ee1*xi*dd3*hh4)/cc3^2+(2*kk1*kk2*dd3*qq05)/cc3^3;
rr17 <- log.cc3/xi^2-bb1*aa2*cc2*dd6;
l3[,8]  <-  bb1*cc2*cc3^oo10*rr17+ee1*oo10*xi*hh4*mm12*rr17-2*ee1*hh4*mm12-
kk1*mm11*xi*qq05*mm10+kk1*qq05*mm10-bb1*cc2*dd6+
2*ee1*dd3*hh4*jj08+ee1*xi*hh4*jj08-(2*kk1*xi*dd3*qq05)/cc3^3;
l3[,9]  <-  (-bb1*cc2*pp08*(bb1*pp07*cc2*dd6+dd8*dd7)^2)-bb1*cc2*pp08*
((-ee1*pp07*hh4*jj08)+2*bb1*dd8*cc2*dd6-(2*dd7)/xi^3)-
2*ee1*hh4*jj08+(2*(xi+1)*cc2^3)/(exp1^(3*rho)*cc3^3);
tt08 <- 1/cc3^3;
tt16 <- 1/xi^4;
tt18 <- dd8*dd7-bb1*aa2*cc2*dd6;
l3[,10]  <-  (-jj13*tt18^3)-3*jj13*(ee1*aa2*hh4*jj08+2*bb1*dd8*cc2*dd6-2*jj12*dd7)*
tt18-jj13*((-2*kk1*aa2*qq05*tt08)-3*ee1*dd8*hh4*jj08-6*bb1*jj12*cc2*dd6+
6*tt16*dd7)+6*tt16*dd3*dd7-6*jj12*dd7-6*bb1*jj12*dd3*cc2*dd6+
6*bb1*dd8*cc2*dd6-3*ee1*dd8*dd3*hh4*jj08+3*ee1*aa2*hh4*jj08-
2*kk1*aa2*dd3*qq05*tt08;

}

if (deriv>3) {
## the fourth derivatives
## mmmm mmmr mmmx mmrr mmrx mmxx mrrr mrrx mrxx mxxx
## rrrr rrrx rrxx rxxx xxxx
l4 <- matrix(0,n,15)
uu1 <- 1/exp1^(4*rho);
uu2 <- xi^3;
l4[,1]  <-  uu1*(ee3-3)*(ee3-2)*(ee3-1)*uu2*aa1^(ee3-4)+(6*uu1*uu2*(xi+1))/aa1^4;
vv09 <- ee3-3;
l4[,2]  <-  3*kk1*ll8*ff7*kk2*ll5^vv09+uu1*vv09*ll8*ff7*uu2*cc2*ll5^(ee3-4)-
(6*kk1*kk2*dd3)/ll5^3+(6*uu1*uu2*dd3*cc2)/ll5^4;
ww11 <- gg7-3;
ww12 <- cc3^(gg7-4);
ww15 <- cc3^ww11;
l4[,3]  <- (-kk1*mm11*oo10*kk2*ww15*(log.cc3/kk2-(bb1*aa2*cc2)/cc3))+
2*kk1*mm11*xi*ww15-kk1*ww15+uu1*ww11*mm11*kk2*cc2*ww12-
uu1*oo10*xi*cc2*ww12-uu1*ww11*xi*cc2*ww12+2*kk1*kk2*tt08+
4*kk1*xi*dd3*tt08-(6*uu1*kk2*dd3*cc2)/cc3^4;
l4[,4]  <-  4*ee1*ff7*xi*ll5^ll8+5*kk1*ll8*ff7*kk2*cc2*ll5^vv09+
uu1*vv09*ll8*ff7*uu2*hh4*ll5^(ee3-4)+(4*ee1*xi*dd3)/ll5^2-
(10*kk1*kk2*dd3*cc2)/ll5^3+(6*uu1*uu2*dd3*hh4)/ll5^4;
yy18 <- log.cc3/kk2;
l4[,5]  <-  (-2*ee1*oo10*xi*mm12*(bb1*mm11*cc2*dd6+yy18))-
kk1*mm11*oo10*kk2*cc2*ww15*(bb1*ww11*cc2*dd6+yy18)-
2*ee1*aa2*mm12-2*ee1*oo10*mm12-2*kk1*mm11*oo10*xi*cc2*ww15-
kk1*oo10*cc2*ww15-kk1*mm11*cc2*ww15-2*ee1*dd3*jj08-
2*ee1*xi*jj08+2*kk1*kk2*cc2*tt08+8*kk1*xi*dd3*cc2*tt08-
(6*kk2*dd3*cc2^2)/(exp1^(4*rho)*cc3^4);
l4[,6]  <-  ee1*oo10*xi*mm12*tt18^2-2*ee1*mm12*tt18-2*kk1*mm11*xi*cc2*ww15*tt18+
2*kk1*cc2*ww15*tt18+ee1*oo10*xi*mm12*(ee1*aa2*hh4*jj08+2*bb1*
dd8*cc2*dd6-(2*dd7)/xi^3)+4*kk1*cc2*ww15+2*uu1*ww11*xi*hh4*ww12-
4*uu1*hh4*ww12+2*ee1*jj08-4*kk1*dd3*cc2*tt08-4*kk1*xi*cc2*tt08+
(6*uu1*xi*dd3*hh4)/cc3^4;
l4[,7]  <-  bb1*cc3^ff7+7*ee1*ff7*xi*cc2*cc3^ll8+6*kk1*ll8*ff7*kk2*hh4*cc3^vv09+
uu1*vv09*ll8*ff7*uu2*qq05*cc3^(ee3-4)-(bb1*dd3)/cc3+
(7*ee1*xi*dd3*cc2)/cc3^2-(12*kk1*kk2*dd3*hh4)/cc3^3+
(6*uu1*uu2*dd3*qq05)/cc3^4;
l4[,8]  <-  (-bb1*cc3^oo10*(bb1*oo10*cc2*dd6+yy18))-3*ee1*oo10*xi*cc2*mm12*
(bb1*mm11*cc2*dd6+yy18)-kk1*mm11*oo10*kk2*hh4*ww15*
(bb1*ww11*cc2*dd6+yy18)-3*ee1*aa2*cc2*mm12-3*ee1*oo10*cc2*mm12-
2*kk1*mm11*oo10*xi*hh4*ww15-kk1*oo10*hh4*ww15-kk1*mm11*hh4*ww15+
bb1*dd6-4*ee1*dd3*cc2*jj08-3*ee1*xi*cc2*jj08+2*kk1*kk2*hh4*tt08+
10*kk1*xi*dd3*hh4*tt08-(6*kk2*dd3*cc2^3)/(exp1^(4*rho)*cc3^4);
4*kk1*xi*hh4*tt08+(6*xi*dd3*cc2^3)/(exp1^(4*rho)*cc3^4);
ae16 <- dd8*dd7+bb1*pp07*cc2*dd6;
l4[,10]  <- (-bb1*pp08*ae16^3)-3*bb1*pp08*((-ee1*pp07*hh4*jj08)+
2*bb1*dd8*cc2*dd6-2*jj12*dd7)*ae16-bb1*pp08*(2*kk1*pp07*qq05*tt08-
3*ee1*dd8*hh4*jj08-6*bb1*jj12*cc2*dd6+(6*dd7)/xi^4)+6*kk1*hh4*tt08-
(6*(xi+1)*qq05)/(exp1^(4*rho)*cc3^4);
af05 <- cc2^4;
l4[,11]  <-  bb1*cc2*cc3^ff7+7*ee1*ff7*xi*hh4*cc3^ll8+6*kk1*ll8*ff7*kk2*qq05*
cc3^vv09+uu1*vv09*ll8*ff7*uu2*af05*cc3^(ee3-4)-(bb1*dd3*cc2)/cc3+
(7*ee1*xi*dd3*hh4)/cc3^2-(12*kk1*kk2*dd3*qq05)/cc3^3+
(6*uu1*uu2*dd3*af05)/cc3^4;
ag23 <- log.cc3/kk2-bb1*aa2*cc2*dd6;
l4[,12]  <- (-bb1*cc2*cc3^oo10*ag23)-3*ee1*oo10*xi*hh4*mm12*ag23-
kk1*mm11*oo10*kk2*qq05*ww15*ag23+4*ee1*hh4*mm12+
5*kk1*mm11*xi*qq05*ww15-4*kk1*qq05*ww15+uu1*ww11*mm11*kk2*af05*ww12-
uu1*oo10*xi*af05*ww12-uu1*ww11*xi*af05*ww12+bb1*cc2*dd6-
4*ee1*dd3*hh4*jj08-3*ee1*xi*hh4*jj08+2*kk1*kk2*qq05*tt08+
10*kk1*xi*dd3*qq05*tt08-(6*uu1*kk2*dd3*af05)/cc3^4;
ah24 <- (-(2*dd7)/xi^3)+2*bb1*dd8*cc2*dd6+ee1*aa2*hh4*jj08;
ah27 <- tt18^2;