Nothing
## ----echo=FALSE-------------------------------------------------------------------------------------------------------------------------------------------------------------------
library(knitr)
opts_chunk$set(comment="", message=FALSE, warning=FALSE,
tidy.opts=list(keep.blank.line=TRUE, width.cutoff=180),
fig.width=8, fig.height=6,out.width='1\\textwidth',
options(width=180),fig.show='hold',fig.align='center',cache=TRUE)
## ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
library(likelihoodAsy)
## ----Data for Example 1-----------------------------------------------------------------------------------------------------------------------------------------------------------
library(MASS)
data(leuk)
## ----Log likelihood for a Weibull regression model--------------------------------------------------------------------------------------------------------------------------------
loglik.Wbl <- function(theta, data)
{
logy <- log(data$y)
X <- data$X
loggam <- theta[1]
beta <- theta[-1]
gam <- exp(loggam)
H <- exp(gam * logy + X %*% beta)
out <- sum(X %*% beta + loggam + (gam - 1) * logy - H)
return(out)
}
## ----Data list for Example 1------------------------------------------------------------------------------------------------------------------------------------------------------
X <- model.matrix(~log(wbc, base=10), data=leuk[leuk$ag=="present",])
data.fz <-list(X = X, y = leuk$time[leuk$ag=="present"])
## ----Data generation for a Weibull regression model-------------------------------------------------------------------------------------------------------------------------------
gendat.Wbl <- function(theta, data)
{
X <- data$X
n <- nrow(X)
beta <- theta[-1]
gam <- exp(theta[1])
data$y <- (rexp(n) / exp(X %*% beta)) ^ (1 / gam)
return(data)
}
## ----Scalar function of interest--------------------------------------------------------------------------------------------------------------------------------------------------
psifcn.Wbl <- function(theta)
{
beta <- theta[-1]
gam <- exp(theta[1])
y0 <- 130
x0 <- 4
psi <- -(y0 ^ gam) * exp(beta[1] + x0 * beta[2])
return(psi)
}
## ----Testing a value for the log Survival function--------------------------------------------------------------------------------------------------------------------------------
rs <- rstar(data=data.fz, thetainit = c(0, 0, 0), floglik = loglik.Wbl,
fpsi = psifcn.Wbl, psival = log(0.03), datagen = gendat.Wbl,
trace=FALSE, seed=10, psidesc="Log survival function")
rs
## ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
summary(rs)
## ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
grad.Wbl <- function(theta, data)
{
logy <- log(data$y)
X <- data$X
loggam <- theta[1]
beta <- theta[-1]
gam <- exp(loggam)
H <- exp(gam * logy + X %*% beta)
score.beta <- t(X) %*% (1 - H)
score.nu <- sum(1 + gam * logy - gam * H * logy)
out <- c(score.nu, score.beta)
return(out)
}
## ----Checking the gradient--------------------------------------------------------------------------------------------------------------------------------------------------------
cbind(pracma::grad(loglik.Wbl, rs$theta.hyp, data=data.fz),
grad.Wbl(rs$theta.hyp, data.fz))
## ----Confidence intervals for the log Survival function---------------------------------------------------------------------------------------------------------------------------
rs.int <- rstar.ci(data=data.fz, thetainit = c(0, 0, 0), floglik = loglik.Wbl,
fpsi = psifcn.Wbl, fscore=grad.Wbl, datagen=gendat.Wbl,
trace=FALSE, seed=1223, psidesc="Log survival function")
rs.int
## ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
summary(rs.int)
## ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
plot(rs.int)
## ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
print(exp(rs.int$CIrs[2,]), digits=3)
## ----Log likelihood for a Weibull regression model with censoring-----------------------------------------------------------------------------------------------------------------
loglik.Wbl.cens <- function(theta, data)
{
logy <- log(data$y)
X <- data$X
f <- data$f ### binary censoring indicator: 0=censored, 1=observed
loggam <- theta[1]
beta <- theta[-1]
gam <- exp(loggam)
H <- exp(gam * logy + X %*% beta)
out <- sum(f * (X %*% beta + loggam + (gam - 1) * logy) - H)
return(out)
}
## ----Data generation for a Weibull regression model with Type II censoring--------------------------------------------------------------------------------------------------------
gendat.Wbl.cens <- function(theta, data)
{
X <- data$X
n <- nrow(X)
beta <- theta[-1]
gam <- exp(theta[1])
y <- (rexp(n) / exp(X %*% beta)) ^ (1 / gam)
maxv <- n - 5 ### the five largest observation are censored
ymaxv <- sort(y)[maxv]
data$y <- ifelse(y < ymaxv, y, ymaxv)
data$f <- ifelse(y < ymaxv, 1, 0)
return(data)
}
## ----Data list for Example 1 with censoring---------------------------------------------------------------------------------------------------------------------------------------
data.fz.cens <-list(X = X, y = leuk$time[leuk$ag=="present"], f=rep(1,nrow(X)))
data.fz.cens$y <- ifelse(data.fz$y < sort(data.fz$y)[12], data.fz$y,
sort(data.fz$y)[12])
data.fz.cens$f <- ifelse(data.fz$y < sort(data.fz$y)[12], 1, 0)
## ----Confidence intervals for the log Survival function with censoring------------------------------------------------------------------------------------------------------------
rs.int.cens <- rstar.ci(data=data.fz.cens, thetainit = c(0, 0, 0),
floglik = loglik.Wbl.cens, fpsi = psifcn.Wbl,
datagen=gendat.Wbl.cens, constr.opt="alabama",
trace=FALSE, seed=1223, psidesc="Log survival function")
summary(rs.int.cens)
## ----Log likelihood for the AR1 example-------------------------------------------------------------------------------------------------------------------------------------------
likAR1 <- function(theta, data)
{
y <- data$y
mu <- theta[1]
sigma2 <- exp(theta[2] * 2)
rho <- theta[3]
n <- length(y)
Gamma1 <- diag(1 + c(0, rep(rho^2, n-2), 0))
for(i in 2:n)
Gamma1[i,i-1]<- Gamma1[i-1,i] <- -rho
lik <- -n/2 * log(sigma2) + 0.5 * log(1 - rho^2) - 1 / (2 * sigma2) *
mahalanobis(y, rep(mu,n), Gamma1, inverted = TRUE)
return(lik)
}
## ----Gradient function for the AR1 example----------------------------------------------------------------------------------------------------------------------------------------
grAR1 <- function(theta, data)
{
y <- data$y
mu <- theta[1]
sigma2 <- exp(theta[2] * 2)
rho <- theta[3]
n <- length(y)
Gamma1 <- diag( 1 + c(0, rep(rho^2, n-2), 0))
DGamma1 <- diag(c(0, rep( 2 * rho, n-2), 0))
for(i in 2:n)
{
Gamma1[i,i-1]<- Gamma1[i-1,i] <- -rho
DGamma1[i,i-1] <- DGamma1[i-1,i] <- -1
}
out <- rep(0, length(theta))
out[1] <- 1 / sigma2 * t(rep(1,n)) %*% Gamma1 %*% (y-mu)
out[2] <- -n / (2 * sigma2) + 1 / (2 * sigma2^2) *
mahalanobis(y, rep(mu,n), Gamma1, inverted = TRUE)
out[2] <- out[2] * sigma2 * 2
out[3] <- -rho / (1 - rho^2) - 1 / (2 * sigma2) *
mahalanobis(y, rep(mu,n), DGamma1, inverted = TRUE)
return(out)
}
## ----Data generation for the AR1 example------------------------------------------------------------------------------------------------------------------------------------------
genDataAR1 <- function(theta, data)
{
out <- data
mu <- theta[1]
sigma <- exp(theta[2])
rho <- theta[3]
n <- length(data$y)
y <- rep(0,n)
y[1] <- rnorm(1, mu, s = sigma * sqrt(1 / (1 - rho^2)))
for(i in 2:n)
y[i] <- mu + rho * (y[i-1] - mu) + rnorm(1) * sigma
out$y <- y
return(out)
}
## ----Data for the AR1 example-----------------------------------------------------------------------------------------------------------------------------------------------------
data.AR1 <- list( y = as.numeric(lh) )
## ----Testing a value for the correlation------------------------------------------------------------------------------------------------------------------------------------------
rsAR1 <- rstar(data=data.AR1, thetainit = c(0, 0, 0), floglik = likAR1,
fpsi = function(theta) theta[3], fscore=grAR1,
psival = 0.765, datagen=genDataAR1, trace=FALSE, seed=10121,
psidesc="Autocorrelation parameter")
summary(rsAR1)
## ----Comparison with parametric bootstrap, eval=FALSE-----------------------------------------------------------------------------------------------------------------------------
# rvals <- rep(0, 50000)
# set.seed(107)
# for(i in 1:length(rvals))
# {
# data.boot <- genDataAR1(rsAR1$theta.hyp, data.AR1)
# if(i%%1000==0) cat("i=",i,"\n")
# r.boot <- rstar(data=data.boot, thetainit = rsAR1$theta.hyp, floglik = likAR1,
# fpsi = function(theta) theta[3], fscore=grAR1,
# psival = 0.765, datagen=genDataAR1, trace=FALSE, ronly=TRUE)
# rvals[i] <- r.boot$r
# }
## ----Comparison with parametric bootstrap: p-values, eval=FALSE-------------------------------------------------------------------------------------------------------------------
# c(mean(rvals < rsAR1$r), pnorm(rsAR1$rs) )
## ----Data definition for binomial overdispersion----------------------------------------------------------------------------------------------------------------------------------
data(finndat)
z <- scale(finndat$z * 10, scale=FALSE)
X <- cbind(rep(1,length(z)), z)
data.binOD <- list(X=X, den = finndat$den, y = finndat$y,
gq=gauss.quad(40,"hermite"))
## ----Log likelihood function for binomial overdispersion-------------------------------------------------------------------------------------------------------------------------
loglik.binOD <- function(theta, data)
{
p.range<- function(p, eps=2.22e-15)
{
out <- p
out[p<eps] <- eps
out[p>(1-eps)] <- (1-eps)
return(out)
}
y <- data$y
den <- data$den
X <- data$X
gq <- data$gq
n <- length(y)
p <- ncol(X)
beta <- theta[1:p]
sigma <- exp(theta[p+1])
linpred <- X %*% beta
L <- rep(0,n)
for (i in 1:n)
{
prob <- p.range(plogis(linpred[i] + gq$nodes * sqrt(2)*sigma))
likq <- y[i] * log(prob) + (den[i] - y[i]) * log(1-prob)
L[i] <- sum(gq$weights * exp(likq) ) / sqrt(2 * pi)
}
return(log(prod(L)))
}
## ----Gradient function for binomial overdispersion--------------------------------------------------------------------------------------------------------------------------------
grad.binOD <- function(theta,data)
{
p.range<- function(p, eps=2.22e-15)
{
out <- p
out[p<eps] <- eps
out[p>(1-eps)] <- (1-eps)
return(out)
}
y <- data$y
den <- data$den
X <- data$X
gq <- data$gq
n <- length(y)
p <- ncol(X)
beta <- theta[1:p]
sigma <- exp(theta[p+1])
linpred <- X %*% beta
L <- rep(0,n)
LB <- matrix(0, nrow=n, ncol=p+1)
out <- rep(0,p+1)
for (i in 1:n)
{
prob <- p.range(plogis(linpred[i]+gq$nodes*sqrt(2)*sigma))
likq <- y[i] * log(prob) + (den[i] - y[i]) * log(1-prob)
score <- (y[i] - den[i] * prob)
L[i] <- sum(gq$weights * exp(likq) ) / sqrt(2 * pi)
LB[i,1] <- sum(gq$weights * exp(likq) * score) / sqrt(2 * pi)
LB[i,2] <- sum(gq$weights * exp(likq) * score * X[i,2] ) / sqrt(2 * pi)
LB[i,3] <- sum(gq$weights * exp(likq) * score * gq$nodes *
sqrt(2)) / sqrt(2 * pi) * sigma
out <- out + LB[i,] / L[i]
}
return(out)
}
## ----Function that generates a data set for binomial overdispersion--------------------------------------------------------------------------------------------------------------
gendat.binOD <- function(theta, data)
{
out <- data
den <- data$den
X <- data$X
p <- ncol(X)
n <- length(data$y)
beta <- theta[1:p]
sigma <- exp(theta[p+1])
u <- rnorm(n) * sigma
linpred <- X %*% beta + u
out$y <- rbinom(n, size=den, prob=plogis(linpred))
return(out)
}
## ----Testing that the slope is one------------------------------------------------------------------------------------------------------------------------------------------------
rs <- rstar(data=data.binOD, thetainit=c(0, 0, 0), floglik=loglik.binOD,
fscore=grad.binOD, fpsi=function(theta) return(theta[2]), seed=110,
trace=FALSE, R=500, psival=1 ,datagen=gendat.binOD,
psidesc="Regression slope")
summary(rs)
## ----Log likelihood and data generation for 2x2 table-----------------------------------------------------------------------------------------------------------------------------
loglik.Pois <- function(theta, data)
{
y <- data$y
y <- y + 0.50 * c(-1,1,1,-1) ### continuity correction
mu <- exp(data$X %*% theta)
el <- sum(y * log(mu) - mu)
return(el)
}
gendat.Pois <- function(theta, data)
{
out <- data
mu <- exp(data$X %*% theta)
out$y <- rpois(n=4, lam=mu)
return(out)
}
## ----Data definition for 2x2 table------------------------------------------------------------------------------------------------------------------------------------------------
rowf <- c(1, 0, 1, 0)
colf <- c(1, 1, 0, 0)
intf <- c(0, 0, 0, 1)
X <- cbind( rep(1, 4), rowf, colf, intf)
data.2x2 <- list(y = c(15, 9, 7, 13), X=X)
## ----Testing independence in 2 x 2 table------------------------------------------------------------------------------------------------------------------------------------------
rs <- rstar(data=data.2x2, thetainit = c(0, 0, 0, 0), floglik = loglik.Pois,
fpsi = function(theta) theta[4], psival = 0, datagen=gendat.Pois,
trace=FALSE, R=50, psidesc="Independence test")
summary(rs)
## ----Accessing the crying babies data---------------------------------------------------------------------------------------------------------------------------------------------
library(cond)
data(babies)
## ----Standard logistic regression model-------------------------------------------------------------------------------------------------------------------------------------------
mod.glm <- glm(formula = cbind(r1, r2) ~ day + lull - 1, family = binomial,
data = babies)
data.obj <- list(y = babies$r1, den = babies$r1 + babies$r2,
X = model.matrix(mod.glm))
## ----Functions for logistic regression--------------------------------------------------------------------------------------------------------------------------------------------
loglik.logit<- function(theta, data)
{
y <- data$y
den <- data$den
X <- data$X
eta <- X %*% theta
p <- plogis(eta)
l <- sum(y * log(p) + (den - y) * log(1-p))
return(l)
}
grad.logit<- function(theta, data)
{
y <- data$y
den <- data$den
X <- data$X
eta <- X %*% theta
p <- plogis(eta)
out <- t(y - p * den) %*% X
return(drop(out))
}
gendat.logit<- function(theta, data)
{
X <- data$X
eta <- X %*% theta
p <- plogis(eta)
out <- data
out$y <- rbinom(length(data$y), size = data$den, prob = p)
return(out)
}
## ----Confidence intervals for the coefficient of lull-----------------------------------------------------------------------------------------------------------------------------
time.with <- system.time( rs.int <- rstar.ci(data=data.obj,
thetainit = coef(mod.glm),
floglik = loglik.logit, fpsi = function(theta) theta[19],
fscore=grad.logit, datagen=gendat.logit, trace=FALSE,
psidesc="Coefficient of lull") )
time.without <- system.time( rs.int.no <- rstar.ci(data=data.obj,
thetainit = coef(mod.glm),
floglik = loglik.logit, fpsi = function(theta) theta[19],
datagen=gendat.logit, trace=FALSE,
psidesc="Coefficient of lull") )
## ----Summary of confidence intervals for the coefficient of lull------------------------------------------------------------------------------------------------------------------
summary(rs.int)
## ----Comparison of computational times--------------------------------------------------------------------------------------------------------------------------------------------
time.with
time.without
## ----Results obtained with the cond package---------------------------------------------------------------------------------------------------------------------------------------
res.cond <- cond(object = mod.glm, offset = lullyes)
summary(res.cond)
## ----Numerical optimization of profile and modified profile log likelihoods-------------------------------------------------------------------------------------------------------
max.prof <- nlminb(0, logPL, data=data.obj, thetainit=coef(mod.glm),
floglik=loglik.logit, fscore=grad.logit, indpsi=19, trace=FALSE,
minus=TRUE)
max.mpl <- nlminb(0, logMPL, data=data.obj, mle=coef(mod.glm),
floglik=loglik.logit, fscore=grad.logit, datagen=gendat.logit,
indpsi=19, R=50, seed=2020, trace=FALSE, minus=TRUE)
c(max.prof$par, max.mpl$par)
## ----Plotting the two log likelhoods----------------------------------------------------------------------------------------------------------------------------------------------
psi.vals <- seq(-0.3, 3.7, l=30)
obj.prof <- sapply(psi.vals, logPL, data=data.obj, thetainit=coef(mod.glm),
floglik=loglik.logit, fscore=grad.logit, indpsi=19)
obj.mpl <- sapply(psi.vals, logMPL, data=data.obj, mle=coef(mod.glm),
floglik=loglik.logit, fscore=grad.logit, datagen=gendat.logit,
indpsi=19, R=50, seed=2020)
## ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
par(pch="s")
plot(psi.vals, obj.prof - max(obj.prof), type="l", xlab=expression(psi),
ylab="log likelihood", lwd=2, las=1)
lines(psi.vals, obj.mpl - max(obj.mpl), col="red", lwd=2)
legend("topright", col=c(1, 2), lty=1, lwd=2, legend=c("Profile","MPL"), bty="n")
## ----Functions for random intercept model-----------------------------------------------------------------------------------------------------------------------------------------
logLikLme<- function(theta, data)
{
X <- data$X
Z <- data$Z
y <- data$y
beta <- theta[1:ncol(X)]
sigma.b <- theta[ncol(X)+1]
sigma <- theta[ncol(X)+2]
n <- nrow(X)
V <- tcrossprod(Z) * sigma.b^2 + diag(n) * sigma^2
L <- chol(V)
XL <- backsolve(L, X, transpose=TRUE)
yL <- backsolve(L, y, transpose=TRUE)
out<- - sum(log(diag(L))) - sum( (yL-XL %*% beta)^2) / 2
return(out)
}
gradLikLme <- function(theta, data)
{
X <- data$X
Z <- data$Z
y <- data$y
beta <- theta[1:ncol(X)]
sigma.b <- theta[ncol(X)+1]
sigma <- theta[ncol(X)+2]
n <- nrow(X)
V <- tcrossprod(Z) * sigma.b^2 + diag(n) * sigma^2
L <- chol(V)
XL<- backsolve(L, X, transpose=TRUE)
yL<- backsolve(L, y, transpose=TRUE)
out <- rep(0, length(theta))
out[1:ncol(X)] <- t(yL-XL %*% beta) %*% XL
ni<- as.vector(t(Z) %*% rep(1,n))
Zv<- matvec(Z, sqrt(1/(sigma^2 + sigma.b^2 * ni)))
V1 <- diag(n) / sigma^2 - tcrossprod(Zv) * sigma.b^2 / sigma^2
Vb <- tcrossprod(Z) * 2 * sigma.b
Vs <- diag(n) * 2 * sigma
Mb <- V1 %*% Vb
Ms <- V1 %*% Vs
r <- as.vector(y - X %*% beta)
out[ncol(X)+1] <- -sum(diag(Mb)) / 2 +
as.numeric( t(r) %*% Mb %*% V1 %*% r) / 2
out[ncol(X)+2] <- -sum(diag(Ms)) / 2 +
as.numeric( t(r) %*% Ms %*% V1 %*% r) / 2
return(out)
}
genDataLme <- function(theta, data)
{
out <- data
X <- data$X
Z <- data$Z
y <- data$y
beta <- theta[1:ncol(X)]
sigma.b <- theta[ncol(X)+1]
sigma <- theta[ncol(X)+2]
n <- nrow(X)
mu <- X %*% beta
b <- rnorm(ncol(Z), s=sigma.b)
e <- rnorm(nrow(Z), s=sigma)
out$y <- mu + e + Z %*% b
return(out)
}
## ----Random intercept example-----------------------------------------------------------------------------------------------------------------------------------------------------
library(lme4)
fm1R <- lmer(Reaction ~ Days + (1|Subject), sleepstudy)
sleepdata <- list(X=model.matrix(Reaction ~ Days, sleepstudy),
Z=model.matrix(Reaction ~ factor(Subject)-1, sleepstudy),
y=sleepstudy$Reaction)
## ----MLE for example--------------------------------------------------------------------------------------------------------------------------------------------------------------
mleFull <- optim( c(250, 10, 30, 30), logLikLme, gr=gradLikLme,
data=sleepdata, method="BFGS",
control=list(fnscale=-1))
## ----Modified Profile likelihood maximization, eval=FALSE-------------------------------------------------------------------------------------------------------------------------
# mleM <- optim(mleFull$par[3:4], logMPL, data=sleepdata, mle=mleFull$par,
# floglik=logLikLme, fscore=gradLikLme, minus=TRUE,
# indpsi=3:4, datagen=genDataLme, trace=FALSE, seed=11, R=100)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.