inst/doc/mlt.R

## ----mlt-setup, echo = FALSE, results = "hide", message = FALSE, warning = FALSE----
set.seed(290875)

pkgs <- sapply(c("mlt", "survival", "eha", "prodlim", "truncreg", "lattice", "gridExtra",
         "MASS", "nnet", "HSAUR3", "sandwich", "flexsurv", "grid", "latticeExtra", 
         "colorspace", "multcomp", "eha"), require, char = TRUE)

dvc <- obs <- NULL
if (!file.exists("analysis/DVC.rda")) {
    op <- options(timeout = 120)
    dwnfl <- try(download.file("https://zenodo.org/record/17179/files/DVC.tgz", "DVC.tgz"))
    options(op)
    if (!inherits(dwnfl, "try-error")) {
        untar("DVC.tgz", file = "analysis/DVC.rda")
        load("analysis/DVC.rda")
    }
} else {
    load("analysis/DVC.rda")
}
if (!is.null(obs))
    dvc <- c(margin.table(obs[,,"wild"], 2))

logLik.phreg <- function(object) {
    ret <- object$loglik[2]
    attr(ret, "df") <- length(coef(object))
    class(ret) <- "logLik"
    ret
}
vcov.phreg <- function(object) object$var

trellis.par.set(list(plot.symbol = list(col=1,pch=20, cex=0.7),
                     box.rectangle = list(col=1),
                     box.umbrella = list(lty=1, col=1),
                     strip.background = list(col = "white")))
ltheme <- canonical.theme(color = FALSE)     ## in-built B&W theme
ltheme$strip.background$col <- "transparent" ## change strip bg
lattice.options(default.theme = ltheme)

knitr::opts_chunk$set(echo = TRUE, results = 'markup', error = FALSE,
                      warning = FALSE, message = FALSE,
                      tidy = FALSE, cache = FALSE, size = "small",
                      fig.width = 6, fig.height = 4, fig.align = "center",
                      out.width = NULL, ###'.6\\linewidth', 
                      out.height = NULL,
                      fig.scap = NA)
knitr::render_sweave()  # use Sweave environments
knitr::set_header(highlight = '')  # do not \usepackage{Sweave}
## R settings
options(prompt = "R> ", continue = "+  ", useFancyQuotes = FALSE)  # JSS style
options(width = 75)

RC <- function(...) {
    ret <- do.call("cbind", list(...))
    mlt <- which(colnames(ret) == "mlt")
    for (i in (1:ncol(ret))[-mlt]) {
        nm <- colnames(ret)[i]
        ret <- cbind(ret, (ret[,i] - ret[,mlt]) / ret[,mlt])
        colnames(ret)[ncol(ret)] <- paste("(", nm, " - mlt)/mlt", sep = "")
    }
    print(ret, digits = 5)
    return(invisible(ret))
}

## ----mlt-citation, echo = FALSE------------------------------------------
year <- substr(packageDescription("mlt.docreg")$Date, 1, 4)
version <- packageDescription("mlt.docreg")$Version

## ----fail, results = "asis", echo = FALSE--------------------------------
if (any(!pkgs)) {
    cat(paste("Package(s)", paste(names(pkgs)[!pkgs], collapse = ", "), 
        "not available, stop processing.",
        "\\end{document}\n"))
    knitr::knit_exit()
}
if (is.null(dvc)) {
    cat("Downloading data from zenodo failed, stop processing.", "\\end{document}\n")
    knitr::knit_exit()
}

## ----mlt-geyser-var, echo = TRUE-----------------------------------------
library("mlt")
var_d <- numeric_var("duration", support = c(1.0, 5.0), 
                     add = c(-1, 1), bounds = c(0, Inf))

## ----mlt-geyser-basis, echo = TRUE---------------------------------------
B_d <- Bernstein_basis(var = var_d, order = 8, ui = "increasing")

## ----mlt-geyser-ctm, echo = TRUE-----------------------------------------
ctm_d <- ctm(response = B_d, todistr = "Normal")

## ----mlt-geyser-grid, echo = TRUE----------------------------------------
str(nd_d <- mkgrid(ctm_d, 200))

## ----mlt-geyser-data, echo = TRUE----------------------------------------
data("geyser", package = "TH.data")
head(geyser)

## ----mlt-geyser-fit, echo = TRUE-----------------------------------------
mlt_d <- mlt(ctm_d, data = geyser)
logLik(mlt_d)

## ----mlt-geyser-density, echo = TRUE-------------------------------------
nd_d$d <- predict(mlt_d, newdata = nd_d, type = "density")

## ----mlt-geyser-plot, echo = FALSE---------------------------------------
plot(d ~ duration, data = nd_d, type = "l", ylab = "Density", xlab = "Duration time")

## ----mlt-CHFLS-1---------------------------------------------------------
data("CHFLS", package = "HSAUR3")
polr_CHFLS_1 <- polr(R_happy ~ 1, data = CHFLS)

## ----mlt-CHFLS-1-basefun-------------------------------------------------
nl <- nlevels(CHFLS$R_happy)
b_happy <- as.basis(~ R_happy, data = CHFLS, remove_intercept = TRUE,
                    contrasts.arg = list(R_happy = function(n) 
                        contr.treatment(n, base = nl)),
                    ui = diff(diag(nl - 1)), ci = rep(0, nl - 2))

## ----mlt-CHFLS-1-basefun-basis-------------------------------------------
b_happy <- as.basis(CHFLS$R_happy)

## ----mlt-CHFLS-1-ctm-----------------------------------------------------
ctm_CHFLS_1 <- ctm(response = b_happy, todist = "Logistic")

## ----mlt-CHFLS-1-mlt-----------------------------------------------------
mlt_CHFLS_1 <- mlt(model = ctm_CHFLS_1, data = CHFLS)

## ----mlt-CHFLS-1-cmpr----------------------------------------------------
logLik(polr_CHFLS_1)
logLik(mlt_CHFLS_1)
RC(polr = polr_CHFLS_1$zeta, mlt = coef(mlt_CHFLS_1))

## ----mlt-CHFLS-1-pred----------------------------------------------------
RC(polr = predict(polr_CHFLS_1, newdata = data.frame(1), type = "prob"),
   mlt = c(predict(mlt_CHFLS_1, newdata = data.frame(1), 
                   type = "density", q = mkgrid(b_happy)[[1]])),
   ML = xtabs(~ R_happy, data = CHFLS) / nrow(CHFLS))

## ----mlt-geyser-w--------------------------------------------------------
var_w <- numeric_var("waiting", support = c(40.0, 100), add = c(-5, 15), 
                     bounds = c(0, Inf))
c(sapply(nd_w <- mkgrid(var_w, 100), range))

## ----mlt-geyser-bernstein------------------------------------------------
B_w <- Bernstein_basis(var_w, order = 8, ui = "increasing")

## ----mlt-geyser-w-ctm, echo = TRUE---------------------------------------
ctm_w <- ctm(response = B_w, todistr = "Normal")

## ----mlt-geyser-w-fit, echo = TRUE---------------------------------------
mlt_w <- mlt(ctm_w, data = geyser)

## ----mlt-geyser-w-distribution, echo = TRUE------------------------------
nd_w$d <- predict(mlt_w, newdata = nd_w, type = "distribution")

## ----mlt-geyser-w-plot, echo = FALSE-------------------------------------
layout(matrix(1:2, ncol = 2))
plot(ecdf(geyser$waiting), col = "grey", xlab = "Waiting times", ylab = "Distribution", 
     main = "", cex = .75)
lines(nd_w$waiting, nd_w$d)
B_w_40 <- Bernstein_basis(order = 40, var = var_w, ui = "incre")
ctm_w_40 <- ctm(B_w_40, todistr = "Normal")
mlt_w_40 <- mlt(ctm_w_40, data = geyser)
nd_w$d2 <- predict(mlt_w_40, q = nd_w$waiting, type = "distribution")
lines(nd_w$waiting, nd_w$d2, lty = 2)
legend("bottomright", lty = 1:2, legend = c("M = 8", "M = 40"), bty = "n")
plot(nd_w$waiting, predict(mlt_w, q = nd_w$waiting, type = "density"), type = "l",
     ylim = c(0, .04), xlab = "Waiting times", ylab = "Density")
lines(nd_w$waiting, predict(mlt_w_40, q = nd_w$waiting, type = "density"), lty = 2)
rug(geyser$waiting, col = rgb(.1, .1, .1, .1))

## ----mlt-dvc-------------------------------------------------------------
var_dvc <- numeric_var("dvc", support = min(dvc):max(dvc))
B_dvc <- Bernstein_basis(var_dvc, order = 6, ui = "increasing")
dvc_mlt <- mlt(ctm(B_dvc), data = data.frame(dvc = dvc))

## ----mlt-dvc-predict-----------------------------------------------------
q <- support(var_dvc)[[1]]
p <- predict(dvc_mlt, newdata = data.frame(1), q = q,
             type = "distribution")

## ----mlt-dvc-plot, echo = FALSE------------------------------------------
plot(ecdf(dvc), col = "grey", xlab = "Number of Roe Deer-Vehicle Collisions",
     ylab = "Distribution", main = "", cex = .75)
lines(q, p, col = "blue")
lines(q, ppois(q, lambda = mean(dvc)), col = "darkred")
legend(400, .3, pch = c(20, NA, NA), lty = c(NA, 1, 1), 
       legend = c("ECDF", "Transformation Model", "Poisson"), bty = "n", cex = .8,
       col = c("grey", "blue", "darkred"))

## ----mlt-CHFLS-2, cache = TRUE-------------------------------------------
polr_CHFLS_2 <- polr(R_happy ~ R_age + R_income, data = CHFLS)

## ----mlt-CHFLS-2-base----------------------------------------------------
b_R <- as.basis(~ R_age + R_income, data = CHFLS, remove_intercept = TRUE, 
                negative = TRUE)

## ----mlt-CHFLS-2-ctm-----------------------------------------------------
ctm_CHFLS_2 <- ctm(response = b_happy, shifting = b_R, 
                   todistr = "Logistic")
mlt_CHFLS_2 <- mlt(ctm_CHFLS_2, data = CHFLS, scale = TRUE)

## ----mlt-CHFLS-2-cmpr----------------------------------------------------
logLik(polr_CHFLS_2)
logLik(mlt_CHFLS_2)
RC(polr = c(polr_CHFLS_2$zeta, coef(polr_CHFLS_2)), 
   mlt = coef(mlt_CHFLS_2))

## ----mlt-GBSG2-1, echo = TRUE--------------------------------------------
data("GBSG2", package = "TH.data")
GBSG2y <- numeric_var("y", support = c(100.0, max(GBSG2$time)), 
                      bounds = c(0, Inf))
GBSG2$y <- with(GBSG2, Surv(time, cens))

## ----mlt-GBSG2-1-Cox-----------------------------------------------------
B_GBSG2y <- Bernstein_basis(var = GBSG2y, order = 10, ui = "increasing")
fm_GBSG2 <- Surv(time, cens) ~ horTh + age + menostat + tsize + tgrade +
                               pnodes + progrec + estrec
ctm_GBSG2_1 <- ctm(B_GBSG2y, shifting = fm_GBSG2[-2L], data = GBSG2,
                   todistr = "MinExtrVal")

## ----mlt-GBSG2-1-xbasis, eval = FALSE------------------------------------
#  as.basis(fm_GBSG2[-2L], data = GBSG2, remove_intercept = TRUE)

## ----mlt-GBSG2-1-mlt-----------------------------------------------------
mlt_GBSG2_1 <- mlt(ctm_GBSG2_1, data = GBSG2, scale = TRUE)

## ----mlt-GBSG2-1-coxph---------------------------------------------------
coxph_GBSG2_1 <- coxph(fm_GBSG2, data = GBSG2, ties = "breslow")
cf <- coef(coxph_GBSG2_1)
RC(coxph = cf, mlt = coef(mlt_GBSG2_1)[names(cf)])

## ----mlt-GBSG2-coxph_mlt, echo = FALSE, results = "hide", cache = TRUE----
ndtmp <- as.data.frame(mkgrid(GBSG2y, 100))

ord <- c(1:30, 35, 40, 45, 50)
p <- vector(mode = "list", length = length(ord))
CF <- c()
ll <- numeric(length(ord))
tm <- numeric(length(ord))

for (i in 1:length(ord)) {
    print(i)
    B_GBSG2ytmp <- Bernstein_basis(var = GBSG2y, order = ord[i], ui = "increasing")
    ctmi <- ctm(B_GBSG2ytmp, shifting = fm_GBSG2[-2L], data = GBSG2,
                   todistr = "MinExtrVal")
    tm[i] <- system.time(mlti <- mlt(ctmi, data = GBSG2, scale = TRUE))[1]

    ll[i] <- logLik(mlti)
    p[[i]] <- predict(mlti, newdata = ndtmp, type = "trafo", terms = "bresponse")

    cf <- coef(mlti)
    cf <- cf[-grep("Bs", names(cf))]
    CF <- rbind(CF, cf)
}
colnames(CF) <- names(cf)

of <- model.matrix(coxph_GBSG2_1) %*% coef(coxph_GBSG2_1) 
coxphtmp <- coxph(Surv(time, cens) ~ 1  + offset(of), data = GBSG2)
b <- survfit(coxphtmp)
layout(matrix(1:2, nc = 2))
col <- rgb(.1, .1, .1, .1)
ylim <- range(unlist(p)[is.finite(unlist(p))])
plot(ndtmp$y, p[[1]], type = "l", ylim =  ylim, col = col, xlab = "Survival Time (days)",
     ylab = "Log-cumulative Hazard")
out <- sapply(p, function(y) lines(ndtmp$y, y, col = col))
lines(b$time, log(b$cumhaz), col = "red")

ylim <- range(CF)
plot(ord, CF[,1], ylim = ylim, col = col[1], xlab = "Order M",
     ylab = "Coefficients", type = "n")
for (i in 1:ncol(CF)) {
    points(ord, CF[,i], cex = .5, pch = 19)
    abline(h = coef(coxph_GBSG2_1)[i], col = "darkgrey")
}
# text(20, coef(coxph_GBSG2_1) + .1, names(coef(coxph_GBSG2_1)))

## ----mlt-GBSG2-1-fss, cache = TRUE---------------------------------------
kn <- log(support(GBSG2y)$y)
fss_GBSG2_1 <- flexsurvspline(fm_GBSG2, data = GBSG2, scale = "hazard", 
                              k = 9, bknots = kn)
logLik(fss_GBSG2_1)
logLik(mlt_GBSG2_1)
cf <- coef(coxph_GBSG2_1)
RC(coxph = cf, mlt = coef(mlt_GBSG2_1)[names(cf)],
   fss = coef(fss_GBSG2_1)[names(cf)])

## ----mlt-GBSG2-1-fss-plot, echo = FALSE----------------------------------
p1 <- summary(fss_GBSG2_1, newdata = GBSG2[1,], ci = FALSE)
p2 <- predict(mlt_GBSG2_1, newdata = GBSG2[1, all.vars(fm_GBSG2[-2L])], 
              q = p1[[1]]$time, type = "survivor")
plot(p1[[1]]$time, p1[[1]]$est, type = "l", lty = 1, xlab = "Survival Time (days)", 
     ylab = "Probability", ylim = c(0, 1))
lines(p1[[1]]$time, p2[,1], lty = 2)
p3 <- survfit(coxph_GBSG2_1, newdata = GBSG2[1,])
lines(p3$time, p3$surv, lty = 3)
legend("topright", lty = 1:3, legend = c("flexsurvspline", "mlt", "coxph"), bty = "n")

## ----mlt-GBSG2-2---------------------------------------------------------
ly <- log_basis(GBSG2y, ui = "increasing")
ctm_GBSG2_2 <- ctm(ly, shifting = fm_GBSG2[-2L], data = GBSG2, 
                   negative = TRUE, todistr = "MinExtrVal")
mlt_GBSG2_2 <- mlt(ctm_GBSG2_2, data = GBSG2, fixed = c("log(y)" = 1), 
                   scale = TRUE)

## ----mlt-GBSG2-2-exp-----------------------------------------------------
survreg_GBSG2_2 <- survreg(fm_GBSG2, data = GBSG2, dist = "exponential")
phreg_GBSG2_2 <- phreg(fm_GBSG2, data = GBSG2, dist = "weibull", 
                       shape = 1)
logLik(survreg_GBSG2_2)
logLik(phreg_GBSG2_2)
logLik(mlt_GBSG2_2)
RC(survreg = coef(survreg_GBSG2_2)[names(cf)], 
   phreg = -coef(phreg_GBSG2_2)[names(cf)], 
   mlt = coef(mlt_GBSG2_2)[names(cf)])

## ----mlt-GBSG2-3---------------------------------------------------------
mlt_GBSG2_3 <- mlt(ctm_GBSG2_2, data = GBSG2, scale = TRUE)
survreg_GBSG2_3 <- survreg(fm_GBSG2, data = GBSG2, dist = "weibull")
phreg_GBSG2_3 <- phreg(fm_GBSG2, data = GBSG2, dist = "weibull")
logLik(survreg_GBSG2_3)
logLik(phreg_GBSG2_3)
logLik(mlt_GBSG2_3)
RC(survreg = coef(survreg_GBSG2_3)[names(cf)] / survreg_GBSG2_3$scale, 
   phreg = - coef(phreg_GBSG2_3)[names(cf)], 
   mlt = coef(mlt_GBSG2_3)[names(cf)])

## ----mlt-GBSG2-3a--------------------------------------------------------
log_GBSG2y <- numeric_var("y", support = c(100.0, max(GBSG2$time)), 
                          bounds = c(0.1, Inf))
lBy <- Bernstein_basis(log_GBSG2y, order = 10, ui = "increasing", 
                       log_first = TRUE)
ctm_GBSG2_3a <- ctm(lBy, shifting = fm_GBSG2[-2L], data = GBSG2, 
                   negative = FALSE, todistr = "MinExtrVal")
mlt_GBSG2_3a <- mlt(ctm_GBSG2_3a, data = GBSG2, scale = TRUE)
logLik(mlt_GBSG2_3a)
RC(coxph = cf, mlt = coef(mlt_GBSG2_3a)[names(cf)])

## ----mlt-BostonHousing-lm------------------------------------------------
data("BostonHousing2", package = "mlbench")
lm_BH <- lm(cmedv ~ crim + zn + indus + chas + nox + rm + age + dis + 
            rad + tax + ptratio + b + lstat, data = BostonHousing2)

## ----mlt-BostonHousing-mlt-----------------------------------------------
BostonHousing2$medvc <- with(BostonHousing2, Surv(cmedv, cmedv < 50))
var_m <- numeric_var("medvc", support = c(10.0, 40.0), bounds = c(0, Inf))
fm_BH <- medvc ~ crim + zn + indus + chas + nox + rm + age + 
                 dis + rad + tax + ptratio + b + lstat

## ----mlt-BostonHousng-mlt-linear-----------------------------------------
B_m <- polynomial_basis(var_m, coef = c(TRUE, TRUE), 
                        ui = matrix(c(0, 1), nrow = 1), ci = 0)
ctm_BH <- ctm(B_m, shift = fm_BH[-2L], data = BostonHousing2, 
              todistr = "Normal")
lm_BH_2 <- mlt(ctm_BH, data = BostonHousing2, scale = TRUE)
logLik(lm_BH_2)

## ----mlt-BostonHousing-ctm-----------------------------------------------
B_m <- Bernstein_basis(var_m, order = 6, ui = "increasing")
ctm_BH <- ctm(B_m, shift = fm_BH[-2L], data = BostonHousing2, 
              todistr = "Normal")
mlt_BH <- mlt(ctm_BH, data = BostonHousing2, scale = TRUE)
logLik(mlt_BH)

## ----mlt-BostonHousing-plot, echo = FALSE, results = "hide", dev = "png"----
q <- 3:52
m <- predict(lm_BH, data = BostonHousing2)
s <- summary(lm_BH)$sigma
d <- sapply(m, function(m) pnorm(q, mean = m, sd = s))
nd <- expand.grid(q = q, lp = m)
nd$d <- c(d)

pfun <- function(x, y, z, subscripts, at, ...) {
    panel.contourplot(x, y, z, subscripts, at = 1:9/10, ...)
    panel.xyplot(x = m, y = BostonHousing2$cmedv, pch = 20,   
                 col = rgb(.1, .1, .1, .1),  ...)   
}
p1 <- contourplot(d ~ lp + q, data = nd, panel = pfun, 
                  xlab = "Linear predictor", ylab = "Observed", 
                  main = "Normal Linear Model")

d <- predict(mlt_BH, newdata = BostonHousing2, q = q, type = "distribution")
lp <- c(predict(mlt_BH, newdata = BostonHousing2, q = 1, terms = "bshift"))
nd <- expand.grid(q = q, lp = -lp)
nd$d <- c(d)
pfun <- function(x, y, z, subscripts, at, ...) {
    panel.contourplot(x, y, z, subscripts, at = 1:9/10, ...)
    panel.xyplot(x = -lp, y = BostonHousing2$cmedv, pch = 20, 
                 col = rgb(.1, .1, .1, .1), ...)   
}
p2 <- contourplot(d ~ lp + q, data = nd, panel = pfun, xlab = "Linear predictor", ylab = "Observed", main = "Linear Transformation Model")
grid.arrange(p1, p2, nrow = 1)

## ----mlt-PSID1976--------------------------------------------------------
data("PSID1976", package = "AER")
PSID1976$nwincome <- with(PSID1976, (fincome - hours * wage)/1000)
PSID1976$hours <- as.double(PSID1976$hours)
PSID1976_0 <- subset(PSID1976, participation == "yes")
fm_PSID1976 <- hours ~ nwincome + education + experience + 
                       I(experience^2) + age + youngkids + oldkids

## ----mlt-PSID1976-truncreg-----------------------------------------------
tr_PSID1976 <- truncreg(fm_PSID1976, data = PSID1976_0)

## ----mlt-PSID1976-mlt----------------------------------------------------
PSID1976_0$hours <- R(PSID1976_0$hours, tleft = 0)
b_hours <- as.basis(~ hours, data = PSID1976, 
                    ui = matrix(c(0, 1), nr  = 1), ci = 0)
ctm_PSID1976_1 <- ctm(b_hours, shift = fm_PSID1976[-2L], 
                      data = PSID1976_0, todistr = "Normal") 
mlt_PSID1976_1 <- mlt(ctm_PSID1976_1, data = PSID1976_0, scale = TRUE)

## ----mlt-PSID1976-cmpr---------------------------------------------------
logLik(tr_PSID1976)
logLik(mlt_PSID1976_1)
cf <- coef(mlt_PSID1976_1)
RC(truncreg = coef(tr_PSID1976),
   mlt = c(-cf[-grep("hours", names(cf))], 1) / cf["hours"])

## ----mlt-PSID1976-mlt-ctm------------------------------------------------
var_h <- numeric_var("hours", support = range(PSID1976_0$hours$exact),
                     bounds = c(0, Inf))
B_hours <- Bernstein_basis(var_h, order = 6, ui = "increasing")
ctm_PSID1976_2 <- ctm(B_hours, shift = fm_PSID1976[-2L], 
                      data = PSID1976_0, todistr = "Normal")
mlt_PSID1976_2 <- mlt(ctm_PSID1976_2, data = PSID1976_0, 
                      scale = TRUE)
logLik(mlt_PSID1976_2)

## ----mlt-CHFLS-3, cache = TRUE-------------------------------------------
b_health <- as.basis(~ R_health - 1, data = CHFLS)
ctm_CHFLS_3 <- ctm(b_happy, interacting = b_health, todist = "Logistic")
mlt_CHFLS_3 <- mlt(ctm_CHFLS_3, data = CHFLS, scale = TRUE)
logLik(mlt_CHFLS_3)
predict(mlt_CHFLS_3, newdata = mkgrid(mlt_CHFLS_3), type = "distribution")

## ----mlt-CHFLS-4, cache = TRUE-------------------------------------------
ctm_CHFLS_4 <- ctm(b_happy, interacting = b_health, shifting = b_R, 
                   todist = "Logistic")
mlt_CHFLS_4 <- mlt(ctm_CHFLS_4, data = CHFLS, scale = TRUE)
coef(mlt_CHFLS_4)[c("R_age", "R_income")]

## ----mlt-GBSG2-4---------------------------------------------------------
b_horTh <- as.basis(GBSG2$horTh)
ctm_GBSG2_4 <- ctm(B_GBSG2y, interacting = b_horTh, 
                   todistr = "MinExtrVal")
mlt_GBSG2_4 <- mlt(ctm_GBSG2_4, data = GBSG2)

## ----mlt-GBSG2-strata-plot, echo = FALSE, results = "hide"---------------
nd <- expand.grid(s <- mkgrid(mlt_GBSG2_4, 100))
nd$mlt_S <- c(predict(mlt_GBSG2_4, newdata = s, type = "survivor"))
nd$KM_S <- unlist(predict(prodlim(Surv(time, cens) ~ horTh, data = GBSG2), 
     	             newdata = data.frame(horTh = s$horTh), times = s$y))
plot(nd$y, nd$mlt_S, ylim = c(0, 1), xlab = "Survival time (days)",
     ylab = "Probability", type = "n", las = 1)
with(subset(nd, horTh == "no"), lines(y, mlt_S, col = "grey", lty = 2))
with(subset(nd, horTh == "yes"), lines(y, mlt_S, lty = 2))
with(subset(nd, horTh == "no"), lines(y, KM_S, type = "s", col = "grey"))
with(subset(nd, horTh == "yes"), lines(y, KM_S, type = "s"))
legend(250, 0.4, lty = c(1, 1, 2, 2), col = c("black", "grey", "black", "grey"),
       legend = c("hormonal therapy, KM", "no hormonal therapy, KM", 
                  "hormonal therapy, MLT", "no hormonal therapy, MLT"), bty = "n", cex = .75)

## ----mlt-GBSG2-5---------------------------------------------------------
ctm_GBSG2_5 <- ctm(B_GBSG2y, interacting = b_horTh, shifting = ~ age, 
                   data = GBSG2, todistr = "MinExtrVal")
mlt_GBSG2_5 <- mlt(ctm_GBSG2_5, data = GBSG2, scale = TRUE)

## ----mlt-GBSG2-5-coxph---------------------------------------------------
coxph_GBSG2_5 <- coxph(Surv(time, cens) ~ age + strata(horTh), 
                       data = GBSG2)
cf <- coef(coxph_GBSG2_5)
RC(coxph = cf, mlt = coef(mlt_GBSG2_5)[names(cf)])

## ----mlt-CHFLS-5, cache = TRUE-------------------------------------------
contrasts(CHFLS$R_health) <- "contr.treatment"
b_health <- as.basis(~ R_health, data = CHFLS)
ctm_CHFLS_5 <- ctm(b_happy, interacting = b_health, todist = "Logistic")
mlt_CHFLS_5 <- mlt(ctm_CHFLS_5, data = CHFLS, scale = TRUE)
predict(mlt_CHFLS_5, newdata = mkgrid(mlt_CHFLS_5), type = "distribution")
logLik(mlt_CHFLS_5)

## ----mlt-CHFLS-6, cache = TRUE-------------------------------------------
b_R <- as.basis(~ R_age + R_income, data = CHFLS, remove_intercept = TRUE, 
                scale = TRUE)
ctm_CHFLS_6 <- ctm(b_happy, interacting = b_R, todist = "Logistic")  
mlt_CHFLS_6 <- mlt(ctm_CHFLS_6, data = CHFLS, scale = TRUE)
logLik(mlt_CHFLS_6)

## ----mlt-CHFLS-7, cache = TRUE-------------------------------------------
ctm_CHFLS_7 <- ctm(b_happy, interacting = c(h = b_health, R = b_R), 
    todist = "Logistic")  
mlt_CHFLS_7 <- mlt(ctm_CHFLS_7, data = CHFLS, scale = TRUE)
logLik(mlt_CHFLS_7)

## ----mlt-iris-1----------------------------------------------------------
fm_iris <- Species ~ Sepal.Length + Sepal.Width + 
                     Petal.Length + Petal.Width
multinom_iris <- nnet::multinom(fm_iris, data = iris, trace = FALSE)
logLik(multinom_iris)
iris$oSpecies <- ordered(iris$Species)
b_Species <- as.basis(iris$oSpecies)
b_x <- as.basis(fm_iris[-2L], data = iris, scale = TRUE)
ctm_iris <- ctm(b_Species, interacting = b_x,
                todistr = "Logistic")
mlt_iris <- mlt(ctm_iris, data = iris, scale = TRUE)
logLik(mlt_iris)
p1 <- predict(mlt_iris, newdata = iris, q = sort(unique(iris$oSpecies)), 
              type = "density")
p2 <- predict(multinom_iris, newdata = iris, type = "prob")
max(abs(t(p1) - p2))

## ----mlt-GBSG2-6---------------------------------------------------------
ctm_GBSG2_6 <- ctm(B_GBSG2y, shifting = ~ horTh, data = GBSG2, 
                   todistr = "MinExtrVal")
mlt_GBSG2_6 <- mlt(ctm_GBSG2_6, data = GBSG2)
logLik(mlt_GBSG2_6)

## ----mlt-GBSG2-7---------------------------------------------------------
b_horTh <- as.basis(~ horTh, data = GBSG2)
ctm_GBSG2_7 <- ctm(B_GBSG2y, interacting = b_horTh, 
                   todistr = "MinExtrVal")
nd <- data.frame(y = GBSG2$time[1:2], horTh = unique(GBSG2$horTh))
attr(model.matrix(ctm_GBSG2_7, data = nd), "constraint")
mlt_GBSG2_7 <- mlt(ctm_GBSG2_7, data = GBSG2)
logLik(mlt_GBSG2_7)

## ----mlt-GBSG2-deviation-plot, echo = FALSE, results = "hide"------------

s <- mkgrid(mlt_GBSG2_7, 15)
s$y <- s$y[s$y > 100 & s$y < 2400]
nd <- expand.grid(s)
K <- model.matrix(ctm_GBSG2_7, data = nd)
Kyes <- K[nd$horTh == "yes",]
Kyes[,grep("Intercept", colnames(K))] <- 0  
gh <- glht(parm(coef(mlt_GBSG2_7), vcov(mlt_GBSG2_7)), Kyes)
ci <- confint(gh)
coxy <- s$y

K <- matrix(0, nrow = 1, ncol = length(coef(mlt_GBSG2_6)))
K[,length(coef(mlt_GBSG2_6))] <- 1
ci2 <- confint(glht(mlt_GBSG2_6, K))

plot(coxy, ci$confint[, "Estimate"], ylim = range(ci$confint), type = "n",
     xlab = "Survival time (days)", ylab = "Log-hazard ratio", las = 1)
polygon(c(coxy, rev(coxy)), c(ci$confint[,"lwr"], rev(ci$confint[, "upr"])),
        border = NA, col = rgb(.1, .1, .1, .1))
lines(coxy, ci$confint[, "Estimate"], lty = 1, lwd = 1)
lines(coxy, rep(ci2$confint[,"Estimate"], length(coxy)), lty = 2, lwd = 1) 
lines(coxy, rep(0, length(coxy)), lty = 3)
polygon(c(coxy[c(1, length(coxy))], rev(coxy[c(1, length(coxy))])),
        rep(ci2$confint[,c("lwr", "upr")], c(2, 2)),
        border = NA, col = rgb(.1, .1, .1, .1))
legend("bottomright", lty = 1:2, lwd = 1, legend = c("time-varying log-hazard ratio",
       "time-constant log-hazard ratio"), bty = "n", cex = .75)

## ----mlt-GBSG2-8, echo = TRUE, cache = TRUE------------------------------
var_a <- numeric_var("age", support = range(GBSG2$age))
B_age <- Bernstein_basis(var_a, order = 3)
b_horTh <- as.basis(GBSG2$horTh)
ctm_GBSG2_8 <- ctm(B_GBSG2y, 
                   interacting = b(horTh = b_horTh, age = B_age), 
                   todistr = "MinExtrVal")
mlt_GBSG2_8  <- mlt(ctm_GBSG2_8, data = GBSG2)
logLik(mlt_GBSG2_8)

## ----mlt-GBSG2-8-plot, echo = FALSE--------------------------------------
nlev <- c(no = "without hormonal therapy", yes = "with hormonal therapy")
levels(nd$horTh) <- nlev[match(levels(nd$horTh), names(nlev))]
s <- mkgrid(mlt_GBSG2_8, 100)
nd <- expand.grid(s)
nd$s <- c(predict(mlt_GBSG2_8, newdata = s, type = "survivor"))
contourplot(s ~ age + y | horTh, data = nd, at = 1:9 / 10,
            ylab = "Survival time (days)", xlab = "Age (years)",
            scales = list(x = list(alternating = c(1, 1))))

## ----mlt-head, echo = TRUE, cache = TRUE---------------------------------
data("db", package = "gamlss.data")
db$lage <- with(db, age^(1/3))
var_head <- numeric_var("head", support = quantile(db$head, c(.1, .9)),
                       bounds = range(db$head))
B_head <- Bernstein_basis(var_head, order = 3, ui = "increasing")
var_lage <- numeric_var("lage", support = quantile(db$lage, c(.1, .9)),
                        bounds = range(db$lage))
B_age <- Bernstein_basis(var_lage, order = 3, ui = "none")
ctm_head <- ctm(B_head, interacting = B_age)
mlt_head <- mlt(ctm_head, data = db, scale = TRUE)

## ----mlt-head-plot, echo = FALSE, dev = "png"----------------------------
pr <- expand.grid(s <- mkgrid(ctm_head, 100))
pr$p <- c(predict(mlt_head, newdata = s, type = "distribution"))
pr$lage <- pr$lage^3
pr$cut <- factor(pr$lage > 2.5)
levels(pr$cut) <- c("Age < 2.5 yrs", "Age > 2.5 yrs")
pfun <- function(x, y, z, subscripts, at, ...) {
    panel.contourplot(x, y, z, subscripts,
        at = c(0.4, 2, 10, 25, 50, 75, 90, 98, 99.6)/ 100, ...)
    panel.xyplot(x = db$age, y = db$head, pch = 20,
                 col = rgb(.1, .1, .1, .1), ...)
}
print(contourplot(p ~ lage + head | cut, data = pr, panel = pfun, region = FALSE,
            xlab = "Age (years)", ylab = "Head circumference (cm)",
            scales = list(x = list(relation = "free"))))

## ----mlt-BHi-start, echo = FALSE-----------------------------------------
start <- 
c(`Bs1(medvc):(Intercept)` = -11.58786739767557, `Bs2(medvc):(Intercept)` = -6.7547950426875296, 
`Bs3(medvc):(Intercept)` = -4.1366028640187587, `Bs4(medvc):(Intercept)` = 1.70242739486858, 
`Bs5(medvc):(Intercept)` = 1.7024282405116158, `Bs6(medvc):(Intercept)` = 3.2595263987587746, 
`Bs7(medvc):(Intercept)` = 3.9025772292254435, `Bs1(medvc):crim` = 3.0699495656298534, 
`Bs2(medvc):crim` = 3.5955167824166181, `Bs3(medvc):crim` = 3.5955168278664127, 
`Bs4(medvc):crim` = 3.5955168750613429, `Bs5(medvc):crim` = 3.5955168904234664, 
`Bs6(medvc):crim` = 3.5955169392855586, `Bs7(medvc):crim` = 3.5955169730725052, 
`Bs1(medvc):zn` = -2.1151709334952202, `Bs2(medvc):zn` = -1.330004690968422, 
`Bs3(medvc):zn` = -1.3300045948503083, `Bs4(medvc):zn` = -1.3300033108698186, 
`Bs5(medvc):zn` = -1.3300033078086677, `Bs6(medvc):zn` = -0.96191066610824716, 
`Bs7(medvc):zn` = -0.96191065861104608, `Bs1(medvc):indus` = -0.35960908556525345, 
`Bs2(medvc):indus` = -0.35960905985490077, `Bs3(medvc):indus` = -0.35960793525271667, 
`Bs4(medvc):indus` = -0.35934309840163225, `Bs5(medvc):indus` = -0.35934309357921079, 
`Bs6(medvc):indus` = -0.35934403265085169, `Bs7(medvc):indus` = -0.35934405322662893, 
`Bs1(medvc):chas1` = -0.49786094816438858, `Bs2(medvc):chas1` = -0.49770555550324658, 
`Bs3(medvc):chas1` = -0.49085193409644223, `Bs4(medvc):chas1` = -0.28057150654979646, 
`Bs5(medvc):chas1` = -0.28057150745910958, `Bs6(medvc):chas1` = -0.28057159418145561, 
`Bs7(medvc):chas1` = -0.28057159957855826, `Bs1(medvc):nox` = 2.5376938304919792, 
`Bs2(medvc):nox` = 2.5376933697075534, `Bs3(medvc):nox` = 2.5375485200496413, 
`Bs4(medvc):nox` = 2.5375484910378336, `Bs5(medvc):nox` = 2.5375484949797813, 
`Bs6(medvc):nox` = 2.5375484970845217, `Bs7(medvc):nox` = 2.5375484892126163, 
`Bs1(medvc):rm` = -0.14558898695685421, `Bs2(medvc):rm` = -2.2333101594194145, 
`Bs3(medvc):rm` = -2.233310165514423, `Bs4(medvc):rm` = -6.913467986991809, 
`Bs5(medvc):rm` = -6.9134680513321216, `Bs6(medvc):rm` = -6.9134681648477878, 
`Bs7(medvc):rm` = -6.9134681532727758, `Bs1(medvc):age` = 1.7037598964290337, 
`Bs2(medvc):age` = 1.7037612199992875, `Bs3(medvc):age` = 1.5898930022989017, 
`Bs4(medvc):age` = 0.43102069612812693, `Bs5(medvc):age` = 0.43102067780471154, 
`Bs6(medvc):age` = -0.037041330032449186, `Bs7(medvc):age` = -0.037041406627502181, 
`Bs1(medvc):dis` = 2.9873219530242476, `Bs2(medvc):dis` = 4.4952372044989133, 
`Bs3(medvc):dis` = 4.4952373035891808, `Bs4(medvc):dis` = 4.4952386569329841, 
`Bs5(medvc):dis` = 4.4952391676337369, `Bs6(medvc):dis` = 4.4952392418127287, 
`Bs7(medvc):dis` = 4.7809968102137574, `Bs1(medvc):rad` = -0.97566895623645966, 
`Bs2(medvc):rad` = -0.97566909946874247, `Bs3(medvc):rad` = -3.4798480962672937, 
`Bs4(medvc):rad` = -3.4798481822134661, `Bs5(medvc):rad` = -3.4798481789260292, 
`Bs6(medvc):rad` = -4.3174402380431083, `Bs7(medvc):rad` = -4.3178484277713345, 
`Bs1(medvc):tax` = 3.9727833395481338, `Bs2(medvc):tax` = 2.3660629545899958, 
`Bs3(medvc):tax` = 2.3660628938371251, `Bs4(medvc):tax` = 2.3660629241250111, 
`Bs5(medvc):tax` = 2.3660629251840981, `Bs6(medvc):tax` = 2.1146216707381735, 
`Bs7(medvc):tax` = 1.4719791743622892, `Bs1(medvc):ptratio` = 2.2039034046410761, 
`Bs2(medvc):ptratio` = 2.2039025612746528, `Bs3(medvc):ptratio` = 2.203902513614862, 
`Bs4(medvc):ptratio` = 2.2039024990773788, `Bs5(medvc):ptratio` = 2.2039017916126986, 
`Bs6(medvc):ptratio` = 2.2039017060852997, `Bs7(medvc):ptratio` = 2.2039016974876349, 
`Bs1(medvc):b` = -0.50419574760654773, `Bs2(medvc):b` = -1.6428248797945801, 
`Bs3(medvc):b` = -0.71378127312071882, `Bs4(medvc):b` = -0.71378122029094859, 
`Bs5(medvc):b` = -0.71378126944149745, `Bs6(medvc):b` = -0.71378288163529846, 
`Bs7(medvc):b` = -0.71378290708848713, `Bs1(medvc):lstat` = 5.3505499532175946, 
`Bs2(medvc):lstat` = 5.7360315640224862, `Bs3(medvc):lstat` = 5.7360316296539118, 
`Bs4(medvc):lstat` = 5.7360355991353975, `Bs5(medvc):lstat` = 6.6638242380198784, 
`Bs6(medvc):lstat` = 7.0023018769629921, `Bs7(medvc):lstat` = 7.0023018815336835
)

## ----mlt-BostonHousing-dr-sumconstr, cache = TRUE------------------------
b_BH_s <- as.basis(fm_BH[-2L], data = BostonHousing2, scale = TRUE)
ctm_BHi <- ctm(B_m, interacting = b_BH_s, sumconstr = TRUE)
mlt_BHi <- mlt(ctm_BHi, data = BostonHousing2, dofit = FALSE)
coef(mlt_BHi) <- start
logLik(mlt_BHi)

## ----mlt-BostonHousing-dr, cache = TRUE----------------------------------
ctm_BHi2 <- ctm(B_m, interacting = b_BH_s, sumconstr = FALSE)
mlt_BHi2 <- mlt(ctm_BHi2, data = BostonHousing2)
logLik(mlt_BHi2)

## ----mlt-Boston-Housing-dr-plot, echo = FALSE, fig.height = 3, dev = "png"----
q <- mkgrid(var_m, 100)[[1]]
tr <- predict(mlt_BH, newdata = BostonHousing2[, all.vars(fm_BH[-2L])],
              q = q, type = "density")
tri <- predict(mlt_BHi, newdata = BostonHousing2[, all.vars(fm_BH[-2L])],
              q = q, type = "density")
tri2 <- predict(mlt_BHi2, newdata = BostonHousing2[, all.vars(fm_BH[-2L])],
              q = q, type = "density")
layout(matrix(1:3, ncol = 3))
Q <- matrix(q, nrow = length(q), ncol = ncol(tr))
ylim <- range(c(tr, tri))
matplot(Q, tr, ylim = ylim, xlab = "Median Value", ylab = "Density",
        type = "l", col = rgb(.1, .1, .1, .1), lty = 1, main = "mlt_BH") 
matplot(Q, tri, ylim = ylim, xlab = "Median Value", ylab = "Density",
        type = "l", col = rgb(.1, .1, .1, .1), lty = 1, main = "mlt_BHi")
matplot(Q, tri2, ylim = ylim, xlab = "Median Value", ylab = "Density",
        type = "l", col = rgb(.1, .1, .1, .1), lty = 1, main = "mlt_BHi2")

## ----mlt-treepipit, echo = TRUE, cache = TRUE----------------------------
data("treepipit", package = "coin")
treepipit$ocounts <- ordered(treepipit$counts)
B_cs <- Bernstein_basis(var = numeric_var("coverstorey", support = 1:110), 
                        order = 4)
B_c <- as.basis(treepipit$ocounts)
ctm_treepipit <- ctm(B_c, interacting = B_cs)
mlt_treepipit <- mlt(ctm_treepipit, data = treepipit, scale = TRUE,
                     optim = mltoptim()["spg"])

## ----mlt-mgcv, echo = FALSE, results = "hide"----------------------------
library("mgcv") ### masks nnet::multinom

## ----mlt-treepipit-gam---------------------------------------------------
gam_treepipit <- gam(counts ~ s(coverstorey), data = treepipit, 
                     family = "poisson")

## ----mlt-treepipit-plot, echo = FALSE, fig.height = 3--------------------
s <- mkgrid(ctm_treepipit, 100)
s$ocounts <- s$ocounts[1:5]
nd <- expand.grid(s)
nd$p <- c(predict(mlt_treepipit, newdata = s, type = "distribution"))

### produce a table
tpt <- xtabs(~ counts + coverstorey, data = treepipit)

### construct a data frame with frequencies
treepipit2 <- sapply(as.data.frame(tpt, stringsAsFactors = FALSE),
                     as.integer)

s <- mkgrid(ctm_treepipit, 10)
s$ocounts <- s$ocounts[1]
K <- model.matrix(ctm_treepipit, data = expand.grid(s))
#g <- glht(parm(coef(mod), vcov(mod)), linfct = K)
#confint(g)
nd$lambda <- predict(gam_treepipit, newdata = nd, type = "response")

layout(matrix(1:3, nr = 1))
par("mai" = par("mai") * c(1, .95, 1, .85))
xlim <- range(treepipit[, "coverstorey"]) * c(0.98, 1.05)
xlab <- "Cover storey"
ylab <- "Number of tree pipits (TP)"
### scatterplot again; plots are proportional to number of plots
plot(counts ~ coverstorey, data = treepipit2, cex = sqrt(Freq),
     ylim = c(-.5, 5), xlab = xlab, ylab = ylab, col = "darkgrey", 
     xlim = xlim, las = 1, main = "Observations")

plot(c(0, 110), c(0, 1), type = "n", xlab = xlab, ylab = "Conditional probability",
     xlim = xlim, las = 1, main = "MLT")
with(subset(nd, ocounts == "0"), lines(coverstorey, p, lty = 1))
with(subset(nd, ocounts == "1"), lines(coverstorey, p, lty = 2))
with(subset(nd, ocounts == "2"), lines(coverstorey, p, lty = 3))
with(subset(nd, ocounts == "3"), lines(coverstorey, p, lty = 4))
with(subset(nd, ocounts == "4"), lines(coverstorey, p, lty = 5))
abline(h = 1, lty = 6)
legend("bottomright", lty = 1:6, legend = c(expression(TP == 0),
                                            expression(TP <= 1),
                                            expression(TP <= 2),
                                            expression(TP <= 3),
                                            expression(TP <= 4),
                                            expression(TP <= 5)), bty = "n")

plot(c(0, 110), c(0, 1), type = "n", xlab = xlab, ylab = "Conditional probability",
     xlim = xlim, las = 1, main = "GAM")
with(subset(nd, ocounts == "0"), lines(coverstorey, ppois(0, lambda), lty = 1))
with(subset(nd, ocounts == "1"), lines(coverstorey, ppois(1, lambda), lty = 2))
with(subset(nd, ocounts == "2"), lines(coverstorey, ppois(2, lambda), lty = 3))
with(subset(nd, ocounts == "3"), lines(coverstorey, ppois(3, lambda), lty = 4))
with(subset(nd, ocounts == "4"), lines(coverstorey, ppois(4, lambda), lty = 5))
abline(h = 1, lty = 6)

## ----mlt-CHFLS-2-cmpr-estfun---------------------------------------------
sc_polr <- estfun(polr_CHFLS_2)
sc_mlt <- -estfun(mlt_CHFLS_2)[,c(4, 5, 1:3)]
summary((sc_polr - sc_mlt) / 
        pmax(sqrt(.Machine$double.eps), sc_mlt))

## ----mlt-CHFLS-2-cmpr-2--------------------------------------------------
RC(polr = sqrt(diag(vcov(polr_CHFLS_2))),
   mlt = sqrt(diag(vcov(mlt_CHFLS_2)))[c(4, 5, 1:3)])

## ----mlt-CHFLS-2-cmpr-3--------------------------------------------------
cftest(polr_CHFLS_2)
cftest(mlt_CHFLS_2, parm = names(coef(polr_CHFLS_2)))

## ----mlt-GBSG2-1-coxph-cmpr----------------------------------------------
cf <- coef(coxph_GBSG2_1)
RC(coxph = sqrt(diag(vcov(coxph_GBSG2_1))),
   mlt = sqrt(diag(vcov(mlt_GBSG2_1)))[names(cf)],
   fss = sqrt(diag(vcov(fss_GBSG2_1)))[names(cf)])

## ----mlt-GBSG2-1-coxph-cmpr-cftest---------------------------------------
cftest(coxph_GBSG2_1)
cftest(mlt_GBSG2_1, parm = names(cf))
cftest(fss_GBSG2_1, parm = names(cf))

## ----mlt-geyser-w-band---------------------------------------------------
cb_w <- confband(mlt_w, newdata = data.frame(1), K = 20, cheat = 100)

## ----mlt-geyser-w-cbplot, echo = FALSE-----------------------------------
layout(matrix(1:2, ncol = 2))
#i <- (cb_w[, "q"] > 45 & cb_w[, "q"] < 110)
#cb_w[-i, "lwr"] <- NA
#cb_w[-i, "upr"] <- NA
plot(cb_w[, "q"], cb_w[, "Estimate"], xlab = "Waiting times", ylab = "Transformation", 
     main = "", type = "l")

q <- cb_w[, "q"]
lwr <- cb_w[, "lwr"]
upr <-  cb_w[, "upr"]
polygon(c(q, rev(q)), c(lwr, rev(upr)),
        border = NA, col = "lightblue") ### rgb(.1, .1, .1, .1))

lines(cb_w[, "q"], cb_w[, "Estimate"])
rug(geyser$waiting, col = rgb(.1, .1, .1, .1))
plot(cb_w[, "q"], pnorm(cb_w[, "Estimate"]), xlab = "Waiting times", ylab = "Distribution", 
     main = "", cex = .5, type = "n")
polygon(c(q, rev(q)), c(pnorm(lwr), rev(pnorm(upr))),                                     
        border = NA, col = "lightblue") ### rgb(.1, .1, .1, .1))
lines(cb_w[, "q"], pnorm(cb_w[, "Estimate"]))

# lines(cb_w[, "q"], pnorm(cb_w[, "lwr"]))
# lines(cb_w[, "q"], pnorm(cb_w[, "upr"]))
rug(geyser$waiting, col = rgb(.1, .1, .1, .1))

## ----mlt-geyser-w-simulate, results = "hide", cache = TRUE---------------
new_w <- simulate(mlt_w, nsim = 100)
llr <- numeric(length(new_w))
pdist <- vector(mode = "list", length = length(new_w))
pdens <- vector(mode = "list", length = length(new_w))
ngeyser <- geyser
q <- mkgrid(var_w, 100)[[1]]
for (i in 1:length(new_w)) {
    ngeyser$waiting <- new_w[[i]]
    mlt_i <- mlt(ctm_w, data = ngeyser, scale = TRUE, 
                 theta = coef(mlt_w))
    llr[[i]] <- logLik(mlt_i) - logLik(mlt_i, parm = coef(mlt_w))
    pdist[[i]] <- predict(mlt_i, newdata = data.frame(1), 
                          type = "distribution", q = q)
    pdens[[i]] <- predict(mlt_i, newdata = data.frame(1), 
                          type = "density", q = q)
}

## ----mlt-geyser-w-simulate-plot, echo = FALSE----------------------------
i <- which(llr < quantile(llr, prob = .95))
tpdist <- pdist[i]
tpdens <- pdens[i]
layout(matrix(1:2, ncol = 2))
plot(q, tpdist[[1]], type = "n", xlab = "Waiting times", ylab = "Distribution")
polygon(c(q, rev(q)), c(pnorm(lwr), rev(pnorm(upr))),
        border = NA, col = "lightblue")
tmp <- sapply(tpdist, function(x) lines(q, x, col = rgb(.1, .1, .1, .1)))
plot(q, tpdens[[1]], type = "n", ylim = range(unlist(pdens)),
     xlab = "Waiting times", ylab = "Density")
tmp <- sapply(tpdens, function(x) lines(q, x, col = rgb(.1, .1, .1, .1)))

## ----mlt-variables-factor------------------------------------------------
f_eye <- factor_var("eye", desc = "eye color", 
                    levels = c("blue", "brown", "green", "grey", "mixed"))

## ----mlt-variables-factor-methods----------------------------------------
variable.names(f_eye)
desc(f_eye)
variables::unit(f_eye)
support(f_eye)
bounds(f_eye)
is.bounded(f_eye)

## ----mlt-variables-factor-mkgrid-----------------------------------------
mkgrid(f_eye)

## ----mlt-variables-ordered-----------------------------------------------
o_temp <- ordered_var("temp", desc = "temperature", 
                      levels = c("cold", "lukewarm", "warm", "hot"))

## ----mlt-variables-ordered-methods---------------------------------------
variable.names(o_temp)
desc(o_temp)
variables::unit(o_temp)
support(o_temp)
bounds(o_temp) 
is.bounded(o_temp)
mkgrid(o_temp)

## ----mlt-variables-fd----------------------------------------------------
v_age <- numeric_var("age", desc = "age of patient", 
                     unit = "years", support = 25:75)

## ----mlt-variables-fd-methods--------------------------------------------
variable.names(v_age)
desc(v_age)
variables::unit(v_age)
support(v_age)
bounds(v_age) 
is.bounded(v_age)

## ----mlt-variables-fd-mkgrid---------------------------------------------
mkgrid(v_age)

## ----mlt-variables-c-----------------------------------------------------
v_temp <- numeric_var("ztemp", desc = "Zurich daytime temperature", 
                      unit = "Celsius", support = c(-10.0, 35.0), 
                      add = c(-5, 5), bounds = c(-273.15, Inf))

## ----mlt-variables-c-methods---------------------------------------------
variable.names(v_temp)
desc(v_temp)
variables::unit(v_temp)
support(v_temp)
bounds(v_temp) 
is.bounded(v_temp)

## ----mlt-variables-c-mkgrid----------------------------------------------
mkgrid(v_temp, n = 20)

## ----mlt-variables-vars--------------------------------------------------
vars <- c(f_eye, o_temp, v_age, v_temp)

## ----mlt-variables-vars-methods------------------------------------------
variable.names(vars)
desc(vars) 
variables::unit(vars)
support(vars)
bounds(vars)
is.bounded(vars)
mkgrid(vars, n = 20)

## ----mlt-variables-vars-expand-------------------------------------------
nd <- expand.grid(mkgrid(vars))

## ----mlt-variables-check-------------------------------------------------
check(vars, data = nd)

## ----mlt-basefun-polynom-------------------------------------------------
xvar <- numeric_var("x", support = c(0.1, pi), bounds= c(0, Inf))
x <- as.data.frame(mkgrid(xvar, n = 20))
class(pb <- polynomial_basis(xvar, coef = c(TRUE, TRUE, FALSE, TRUE)))

## ----mlt-basefun-polynom-fun---------------------------------------------
head(pb(x))

## ----mlt-basefun-polynom-mm----------------------------------------------
head(model.matrix(pb, data = x))

## ----mlt-basefun-polynom-pred--------------------------------------------
predict(pb, newdata = x, coef = c(1, 2, 0, 1.75))

## ----mlt-basefun-polynom-pred-deriv--------------------------------------
predict(pb, newdata = x, coef = c(1, 2, 0, 1.75), deriv = c(x = 1L))

## ----mlt-basefun-log-----------------------------------------------------
class(lb <- log_basis(xvar, ui = "increasing"))
head(X <- model.matrix(lb, data = x))

## ----mlt-basefun-log-constr----------------------------------------------
attr(X, "constraint")

## ----mlt-basefun-log-pred------------------------------------------------
predict(lb, newdata = x, coef = c(1, 2))
predict(lb, newdata = x, coef = c(1, 2), deriv = c(x = 1L))

## ----mlt-basefun-Bernstein-----------------------------------------------
class(bb <- Bernstein_basis(xvar, order = 3, ui = "increasing"))
head(X <- model.matrix(bb, data = x))

## ----mlt-basefun-Bernstein-constr----------------------------------------
cf <- c(1, 2, 2.5, 2.6)
(cnstr <- attr(X, "constraint"))
all(cnstr$ui %*% cf > cnstr$ci)

## ----mlt-basefun-Bernstein-predict---------------------------------------
predict(bb, newdata = x, coef = cf)
predict(bb, newdata = x, coef = cf, deriv = c(x = 1))

## ----mlt-basefun-as.basis------------------------------------------------
iv <- as.vars(iris)
fb <- as.basis(~ Species + Sepal.Length + Sepal.Width,  data = iv,
               remove_intercept = TRUE, negative = TRUE, 
               contrasts.args =  list(Species = "contr.sum"))
class(fb)
head(model.matrix(fb, data = iris))

## ----mlt-basefun-c-------------------------------------------------------
class(blb <- c(bern = bb, 
               log = log_basis(xvar, ui = "increasing", 
                               remove_intercept = TRUE)))
head(X <- model.matrix(blb, data = x))
attr(X, "constraint")

## ----mlt-basefun-b-------------------------------------------------------
fb <- as.basis(~ g, data = factor_var("g", levels = LETTERS[1:2]))
class(bfb <- b(bern = bb, f = fb))
nd <- expand.grid(mkgrid(bfb, n = 10))
head(X <- model.matrix(bfb, data = nd))
attr(X, "constraint")

## ----mlt-basefun-b-sumconstr---------------------------------------------
bfb <- b(bern = bb, f = fb, sumconstr = TRUE)
head(X <- model.matrix(bfb, data = nd))
attr(X, "constraint")

## ----mlt-R-ordered-------------------------------------------------------
head(R(sort(unique(CHFLS$R_happy))))

## ----mlt-R-integer-------------------------------------------------------
R(1:5, bounds = c(1, 5))

## ----mlt-R-numeric-------------------------------------------------------
x <- rnorm(10)
xt <- round(x[x > -1 & x <= 2], 3)
xl <- xt - sample(c(Inf, (0:(length(xt) - 2)) / length(xt)), 
                  replace = FALSE)
xr <- xt + sample(c(Inf, (0:(length(xt) - 2)) / length(xt)), 
                  replace = FALSE)
R(c(1.2, rep(NA, length(xt))), cleft = c(NA, xl), cright = c(NA, xr), 
  tleft = -1, tright = 2)

## ----mlt-R-Surv----------------------------------------------------------
head(geyser$duration)
head(R(geyser$duration))

## ----mlt-mlt-chi-p-------------------------------------------------------
pY <- function(x) pchisq(x, df = 20)
dY <- function(x) dchisq(x, df = 20)
qY <- function(p) qchisq(p, df = 20)

## ----mlt-mlt-chi-B-------------------------------------------------------
yvar <- numeric_var("y", support = qY(c(.001, 1 - .001)), 
                    bounds = c(0, Inf))
By <- Bernstein_basis(yvar, order = ord <- 15, ui = "increasing")

## ----mlt-mlt-chi-mlt-----------------------------------------------------
mod <- ctm(By)

## ----mlt-mlt-chi-trafo---------------------------------------------------
h <- function(x) qnorm(pY(x))
x <- seq(from = support(yvar)[["y"]][1], to = support(yvar)[["y"]][2], 
         length.out = ord + 1)

## ----mlt-mlt-chi-coef----------------------------------------------------
mlt::coef(mod) <- h(x)

## ----mlt-mlt-chi-sim-----------------------------------------------------
d <- as.data.frame(mkgrid(yvar, n = 500))
d$grid <- d$y
d$y <- simulate(mod, newdata = d)

## ----mlt-mlt-chi-fit-----------------------------------------------------
fmod <- mlt(mod, data = d, scale = TRUE)

## ----mlt-mlt-chi-model---------------------------------------------------
coef(mod)
coef(fmod)
logLik(fmod)
logLik(fmod, parm = coef(mod))

## ----mlt-mlt-chi-plot----------------------------------------------------
## compute true density
d$dtrue <- dY(d$grid)
d$dest <- predict(fmod, q = sort(d$grid), type = "density")
plot(mod, newdata = d, type = "density", col = "black", 
     xlab = "y", ylab = "Density", ylim = c(0, max(d$dest)))
lines(d$grid, d$dtrue, lty = 2)
lines(sort(d$grid), d$dest[order(d$grid)], lty = 3)
legend("topright", lty = 1:3, bty = "n", 
       legend = c("True", "Approximated", "Estimated"))

## ----mlt-mlt-coef, echo = FALSE, results = "hide"------------------------
### print coefs for regression tests
objs <- ls()
mltobj <- objs[grep("^mlt_", objs)]
sapply(mltobj, function(m) eval(parse(text = paste("coef(", m, ")"))))

## ----mlt-sessionInfo, echo = FALSE, results = "hide"---------------------
sessionInfo()

Try the mlt.docreg package in your browser

Any scripts or data that you put into this service are public.

mlt.docreg documentation built on Aug. 28, 2023, 5:06 p.m.