R/methods.R

Defines functions downstream.test risk.mboost risk residuals.mboost extract.bl_tree extract.bl_lin extract.blg extract.blackboost extract.glmboost extract.mboost extract nuisance.mboost nuisance print.summary.mboost summary.mboost selected.mboost variable.names.glmboost variable.names.mboost print.glmboost print.mboost hatvalues.glmboost coef.glmboost predict.glmboost model.frame.mboost update.mboost mstop.mboost logLik.mboost resid.mboost fitted.mboost mstop.gbAIC mstop plot.gbAIC print.gbAIC AICboost AIC.mboost hatvalues.mboost hatvalues.gamboost coef.mboost predict.mboost .predictmboost

Documented in AIC.mboost coef.glmboost coef.mboost downstream.test extract extract.blackboost extract.blg extract.bl_lin extract.bl_tree extract.glmboost extract.mboost fitted.mboost hatvalues.gamboost hatvalues.glmboost logLik.mboost mstop mstop.gbAIC mstop.mboost nuisance nuisance.mboost predict.glmboost predict.mboost print.glmboost print.mboost resid.mboost residuals.mboost risk risk.mboost selected.mboost summary.mboost variable.names.glmboost variable.names.mboost

### <FIXME>:
###          add link argument (family needs to be touched)
### </FIXME>
.predictmboost <- function(y, pr, type, nm, family) {
    if (!isMATRIX(pr)) {
        pr <- as.vector(pr)
        names(pr) <- nm
    }
    if (is.list(pr)) {
        rownames(pr) <- nm
        if (type != "link")
            warning("argument", sQuote("type"), " is ignored")
        return(pr)
    }
    if (type == "link") ret <- pr
    if (type == "response") ret <- family@response(pr)
    if (type == "class") {
        if (!is.factor(y)) stop("response is not a factor")
            ret <- factor(levels(y)[family@rclass(pr)], levels = levels(y))
        }
    names(ret) <- names(pr)
    return(ret)
}

predict.mboost <- function(object, newdata = NULL,
    type = c("link", "response", "class"), which = NULL,
    aggregate = c("sum", "cumsum", "none"), ...) {

    type <- match.arg(type)
    aggregate <- match.arg(aggregate)
    if (aggregate != "sum")
        stopifnot(type == "link")
    pr <- object$predict(newdata = newdata,
                         which = which, aggregate = aggregate)
    nm <- rownames(newdata)
    if (is.null(newdata)) 
        nm <- object$rownames
    if (length(pr) == 1 && length(nm) != length(pr))
        nm <- NULL
    if (is.list(pr)){
        RET <- lapply(pr, .predictmboost, y = object$response,
                      type = type, nm = nm, family = object$family)
        if (type == "link") attr(RET, "offset") <- attr(pr, "offset")
        return(RET)
    }
    RET <- .predictmboost(object$response, pr, type = type, nm = nm,
                          family = object$family)
    if (type != "link") attr(RET, "offset") <- NULL
    return(RET)
}

### extract coefficients
coef.mboost <- function(object, which = NULL,
    aggregate = c("sum", "cumsum", "none"), ...) {

    args <- list(...)
    if (length(args) > 0)
        warning("Arguments ", paste(names(args), sep = ", "), " unknown")
    if (grepl("Negative Binomial Likelihood (logit link)", object$family@name))
        message("\nNOTE: Coefficients from a Binomial model are half the size of ",
                "coefficients\n from a model fitted via ",
                "glm(... , family = 'binomial').\n",
                "See Warning section in ?coef.mboost\n")
    ### NOTE: This is only the case for type = "adaboost" and link = "logit" ! 
    ###       see github #63 and #65
    object$coef(which = which, aggregate = aggregate)
}

### compute boosting hat matrix and its trace
hatvalues.gamboost <- function(model, ...) {
    H <- model$hatvalues(...)
    n <- length(model$response)

    ### <FIXME> better checks
    L2 <- FALSE
    if (!extends(class(model$family), "boost_family_glm")) {
        warning("AIC might not be reasonable for family ", model$family@name)
        L2 <- TRUE
    } else {
        fW <- model$family@fW
    }
    ### </FIXME>
    if (checkL2(model) || L2) {
        op <- .Call("R_trace_gamboost", as.integer(n), H,
                    as.integer(model$xselect()), PACKAGE = "mboost")
    } else {
        fitm <- predict(model, aggregate = "cumsum")
        op <- bhatmat(n, H, model$xselect(), fitm, model$family@fW)
    }
    RET <- diag(op[[1]])
    attr(RET, "hatmatrix") <- op[[1]]
    attr(RET, "trace") <- op[[2]]
    RET
}

hatvalues.mboost <- function(model, ...) {
    stop("hatvalues are not implemented for models fitted via function ",
         sQuote("mboost"), ".\n Please use functions ", sQuote("gamboost"),
         " or ", sQuote("glmboost"), " instead.")
}

AIC.mboost <- function(object, method = c("corrected", "classical", "gMDL"),
                       df = c("trace", "actset"), ..., k = 2) {

    df <- match.arg(df)
    if (df == "actset" && !inherits(object, "glmboost")) {
        df <- "trace"
        warning("df = ", dQuote("actset"), " can only be used with ",
                sQuote("glmboost"), " models. df = ", dQuote("trace"),
                " is used instead.")
    }
    if (df == "trace") {
        hatval <- hatvalues(object)
        RET <- AICboost(object, method = method,
                        df = attr(hatval, "trace"), k = k)
    }
    if (df == "actset") {
        ### compute active set: number of non-zero coefficients
        ### for each boosting iteration
        xs <- object$xselect()
        xu <- sort(sapply(unique(xs), function(i) which(xs == i)[1]))
        xu <- c(xu, mstop(object) + 1)
        df <- rep(1:(length(xu) - 1), diff(xu))
        ### <FIXME>: offset = 0 may mean hat(offset) = 0 or
        ### no offset computed at all!
        if (object$offset != 0) df <- df + 1
        ### </FIXME>
        RET <- AICboost(object, method = method,
                        df = df, k = k)
    }
    return(RET)
}

AICboost <- function(object, method = c("corrected", "classical", "gMDL"), df, k = 2) {

    if (object$control$risk != "inbag")
        return(NA)
    method <- match.arg(method)

    if (checkL2(object) && method == "classical")
        stop("classical AIC method not implemented for Gaussian family")
    if (!checkL2(object) && method == "corrected")
        stop("corrected AIC method not implemented for non-Gaussian family")

    sumw <- sum(model.weights(object)[!is.na(fitted(object))])
    if (method == "corrected")
        AIC <- log(object$risk()[-1] / sumw) +
               (1 + df/sumw) / (1 - (df + 2)/sumw)

    ### loss-function is to be MINIMIZED, take -2 * logLik == 2 * risk
    if (method == "classical")
        AIC <- 2 * object$risk()[-1] + k * df
    if (method == "gMDL"){
        s <- object$risk()[-1]/(sumw - df)
        AIC <- log(s) + df/sumw * log((sum(object$response^2) - object$risk()[-1])
                      /(df * s))
        }
    mstop <- which.min(AIC)
    RET <- AIC[mstop]

    attr(RET, "mstop") <- which.min(AIC)
    attr(RET, "df") <- df
    attr(RET, "AIC") <- AIC
    attr(RET, "corrected") <- method == "corrected"

    class(RET) <- "gbAIC"
    return(RET)
}

print.gbAIC <- function(x, ...) {
    mstop <- mstop(x)
    df <- attr(x, "df")[mstop]
    attributes(x) <- NULL
    print(x)
    cat("Optimal number of boosting iterations:", mstop, "\n")
    cat("Degrees of freedom", paste("(for mstop = ", mstop, "):", sep = "",
                                    collapse = ""), df, "\n")
    invisible(x)
}

plot.gbAIC <- function(x, y = NULL, ...) {
    mstop <- mstop(x)
    class(x) <- NULL
    plot(attr(x, "AIC"), xlab = "Number of boosting iterations",
         ylab = ifelse(attr(x, "corrected"), "Corrected AIC", "AIC"),
         type = "l", ...)
    points(mstop, x)
    ylim <- list(...)$ylim
    if (!is.null(ylim)) {
        ymin <- ylim[1] * ifelse(ylim[1] < 0, 2, 0.5)
    } else {
        ymin <- x - x/2
    }
    lines(c(mstop, mstop),
          c(ymin, x), lty = 2)
}

mstop <- function(object, ...) UseMethod("mstop")

mstop.gbAIC <- function(object, ...) attr(object, "mstop")


### compute fitted values
fitted.mboost <- function(object, ...) {
    args <- list(...)
    if (length(args) == 0) {
        ret <- object$fitted()
        if (length(ret) == length(object$rownames))
            names(ret) <- object$rownames
    } else {
        if ("newdata" %in% names(args)) {
            args$newdata <- NULL
            warning("Argument ", sQuote("newdata"), " was  ignored. Please use ", 
                    sQuote("predict()"), " to make predictions for new data.")
        }
        args$object <- object
        ret <- do.call(predict, args)
    }
    ret
}

### residuals (the current negative gradient)
resid.mboost <- function(object, ...)
    object$resid()

logLik.mboost <- function(object, ...)
    object$logLik()

### restrict or enhance models to less/more
### boosting iterations.
### ATTENTION: x gets CHANGED!
"[.mboost" <- function(x, i, return = TRUE, ...) {
    if (!(length(i) == 1 && i >= 0))
        stop("Please provide a single non-negative number")
    x$subset(i)
    if (return) return(x)
    invisible(NULL)
}

"mstop<-" <- function(x, value) {
    return(x[value, return = TRUE])
}

mstop.mboost <- function(object, ...) object$mstop()

update.mboost <- function(object, weights, oobweights = NULL,
                          risk = NULL, trace = NULL, ...)
    object$update(weights = weights, oobweights = oobweights,
                  risk = risk, trace = trace)

model.frame.mboost <- function(formula, ...)
    formula$model.frame(...)

predict.glmboost <- function(object, newdata = NULL,
    type = c("link", "response", "class"), which = NULL,
    aggregate = c("sum", "cumsum", "none"), ...) {

    args <- list(...)
    if (length(args) > 0)
        warning("Arguments ", paste(names(args), sep = ", "), " unknown")

    aggregate <- match.arg(aggregate)

    ### X <- object$baselearner[[1]]$get_data()
    if (!is.null(newdata))
        newdata <- object$newX(newdata)

    ### <FIXME> implement predictions here </FIXME>
    ### which <- object$which(which, usedonly = nw <- is.null(which))
    ### cf <- coef(object, which = which, aggregate = aggregate)
    ### pr <- X[, which, drop = FALSE] %*% cf

    pr <- object$predict(newdata = newdata, which = which,
                         aggregate = aggregate)
    type <- match.arg(type)
    nm <- rownames(newdata)
    if (is.null(newdata)) 
        nm <- object$rownames
    if (length(pr) == 1 && length(nm) != length(pr))
        nm <- NULL
    if (is.list(pr))
        return(lapply(pr, .predictmboost, y = object$response,
                      type = type, nm = nm, family = object$family))
    .predictmboost(object$response, pr, type = type, nm = nm,
                   family = object$family)
}

coef.glmboost <- function(object, which = NULL,
    aggregate = c("sum", "cumsum", "none"), off2int = FALSE, ...) {

    args <- list(...)
    if (length(args) > 0)
        warning("Arguments ", paste(names(args), sep = ", "), " unknown")

    if (grepl("Negative Binomial Likelihood", object$family@name))
        message("\nNOTE: Coefficients from a Binomial model are half the size of ",
                "coefficients\n from a model fitted via ",
                "glm(... , family = 'binomial').\n",
                "See Warning section in ?coef.mboost\n")

    aggregate <- match.arg(aggregate)
    cf <- object$coef(which = which, aggregate = aggregate)
    offset <- object$offset
    
    if (is.null(cf))
        return(cf)

    ### intercept = hat(beta[1]) - bar(x) %*% hat(beta[-1])
    assign <- object$assign
    cm <- object$center
    INTERCEPT <- sum(assign == 0) == 1
    if (!INTERCEPT && off2int)
        warning(sQuote("object"), " does not contain an intercept, ",
                sQuote("off2int = TRUE"), " ignored.")
    if (INTERCEPT && any(cm != 0)) {
        intercept <- which(assign == 0)
        if (is.null(which)) {
            which <- object$which(usedonly = TRUE)
            if (!intercept %in% which) {
                which <- c(intercept, which)
                cf <- object$coef(which = which, aggregate = aggregate)
            }
        }
        which <- object$which(which)
        if (intercept %in% which) {
            if (all(which %in% object$which(usedonly = TRUE))) {
                cm <- cm[which]
                scf <- sapply(1:length(cf), function(i) cf[[i]] * cm[i])
                if (!is.matrix(scf)) scf <- matrix(scf, nrow = 1)
                cf[[intercept]] <- cf[[intercept]] - rowSums(scf)
            } else {
                cm <- cm[object$which(usedonly = TRUE)]
                tmp <- object$coef(which = object$which(usedonly = TRUE),
                                   aggregate = aggregate)
                scf <- sapply(1:length(tmp), function(i) tmp[[i]] * cm[i])
                if (!is.matrix(scf)) scf <- matrix(scf, nrow = 1)
                cf[[intercept]] <- cf[[intercept]] - rowSums(scf)
            }
        }
    }

    if (aggregate == "sum") cf <- unlist(cf)
    if (aggregate == "none") {
        attr(cf, "offset") <- offset
        if (off2int)
            warning(sQuote("off2int = TRUE"), " ignored for ",
                    sQuote("aggregate = \"none\""))
    } else {
        if (off2int & length(offset) == 1) {
            cf[[1]] <- cf[[1]] + offset
        } else {
            if (off2int)
                warning(sQuote("off2int = TRUE"),
                        " ignored for non-scalar offset")
            attr(cf, "offset") <- offset
        }
    }
    cf
}

hatvalues.glmboost <- function(model, ...) {

    args <- list(...)
    if (length(args) > 0)
        warning("Arguments ", paste(names(args), sep = ", "), " unknown")

    if (!checkL2(model)) return(hatvalues.gamboost(model))
    Xf <- t(model$basemodel[[1]]$MPinv()) * model$control$nu
    X <- model$baselearner[[1]]$get_data()
    op <- .Call("R_trace_glmboost", as(X, "matrix"), as(Xf, "matrix"),
                as.integer(model$xselect()),
                PACKAGE = "mboost")
    RET <- diag(op[[1]])
    attr(RET, "hatmatrix") <- op[[1]]
    attr(RET, "trace") <- op[[2]]
    RET
}

### methods: print
print.mboost <- function(x, ...) {

    cat("\n")
    cat("\t Model-based Boosting\n")
    cat("\n")
    if (!is.null(x$call)) {
        if(length(deparse(x$call$data)) > 20)
            x$call$data <- deparse(x$call$data, nlines = 1)
        cat("Call:\n", deparse(x$call), "\n\n", sep = "")
    }
    show(x$family)
    cat("\n")
    cat("Number of boosting iterations: mstop =", mstop(x), "\n")
    cat("Step size: ", x$control$nu, "\n")
    cat("Offset: ", x$offset, "\n")
    cat("Number of baselearners: ", length(variable.names(x)), "\n")
    cat("\n")
    invisible(x)
}

### methods: print
print.glmboost <- function(x, ...) {

    cat("\n")
    cat("\t Generalized Linear Models Fitted via Gradient Boosting\n")
    cat("\n")
    if (!is.null(x$call)) {
        if(length(deparse(x$call$data)) > 20)
            x$call$data <- deparse(x$call$data, nlines = 1)
        cat("Call:\n", deparse(x$call), "\n\n", sep = "")
    }
    show(x$family)
    cat("\n")
    cat("Number of boosting iterations: mstop =", mstop(x), "\n")
    cat("Step size: ", x$control$nu, "\n")
    cat("Offset: ", x$offset, "\n")
    cat("\n")
    cat("Coefficients: \n")
    cf <- coef(x)
    attr(x, "offset") <- NULL
    print(cf)
    cat("\n")
    invisible(x)
}

variable.names.mboost <- function(object, which = NULL, usedonly = FALSE, ...) {

    which <- object$which(which, usedonly = usedonly)

    args <- list(...)
    if (length(args) > 0)
        warning("Arguments ", paste(names(args), sep = ", "), " unknown")

    ### <FIXME> is pasting what we want?
    ret <- sapply(object$baselearner, function(x)
                  paste(x$get_names(), collapse = ", "))
    ### </FIXME>
    if (is.matrix(ret)) ret <- ret[, , drop = TRUE]
    ret[which]
}

variable.names.glmboost <- function(object, which = NULL, usedonly = FALSE, ...) {

    if (usedonly) {
        which <- object$which(usedonly = TRUE)
        ## if center = TRUE for model fitting intercept is implicitly selected
        center <- get("center", envir = environment(object$newX))
        if (center){
            intercept <- which(object$assign == 0)
            INTERCEPT <- sum(object$assign == 0) == 1
            if (INTERCEPT && !intercept %in% which)
                which <- c(intercept, which)
        }
    } else {
        which <- object$which(which, usedonly = usedonly)
    }

    args <- list(...)
    if (length(args) > 0)
        warning("Arguments ", paste(names(args), sep = ", "), " unknown")

    ret <- object$baselearner[[1]]$get_names()
    names(ret) <- ret
    ret[which]
}

selected.mboost <- function(object, ...)
    object$xselect()

summary.mboost <- function(object, ...) {

    ret <- list(object = object, selprob = NULL)
    xs <- selected(object)
    nm <- variable.names(object)
    if (length(xs) > 0) {
        selprob <- tabulate(xs, nbins = length(nm)) / length(xs)
        names(selprob) <- names(nm)
        selprob <- sort(selprob, decreasing = TRUE)
        ret$selprob <- selprob[selprob > 0]
    }
    class(ret) <- "summary.mboost"
    return(ret)
}

print.summary.mboost <- function(x, ...) {
    print(x$object)
    if (!is.null(x$selprob)) {
        cat("Selection frequencies:\n")
        print(x$selprob)
        cat("\n")
    } 
}

nuisance <- function(object)
    UseMethod("nuisance")

nuisance.mboost <- function(object)
    object$nuisance()[[mstop(object)]]



extract <- function(object, ...)
    UseMethod("extract")

extract.mboost <- function(object, what = c("design", "penalty", "lambda", "df",
                                   "coefficients", "residuals",
                                   "variable.names", "bnames", "offset",
                                   "nuisance", "weights", "index", "control"),
                           which = NULL, ...){
    what <- match.arg(what)
    which <- object$which(which, usedonly = is.null(which))
    if (what %in% c("design", "penalty", "lambda", "df", "index")){
        fun <- function(which)
            extract(object$basemodel[[which]], what = what, ...)
        ret <- lapply(which, fun)
        names(ret) <- extract(object, what = "bnames", which = which)
        return(ret)
    }
    switch(what,
           "coefficients" = return(coef(object, which = which)),
           "residuals" = return(residuals(object)),
           "variable.names" = return(variable.names(object, which)),
           "bnames" = return(get("bnames", envir = environment(object$update))[which]),
           "offset" = return(object$offset),
           "nuisance" = return(nuisance(object)),
           "weights" = return(model.weights(object)),
           "control" = return(object$control))
}

extract.glmboost <- function(object, what = c("design", "coefficients", "residuals",
                                     "variable.names", "offset",
                                     "nuisance", "weights", "control"),
                             which = NULL, asmatrix = FALSE, ...){
    what <- match.arg(what)
    center <- get("center", envir = environment(object$newX))
    if (is.null(which)) {
        which <- object$which(usedonly = TRUE)
        ## if center = TRUE for model fitting intercept is implicitly selected
        if (center){
            intercept <- which(object$assign == 0)
            INTERCEPT <- sum(object$assign == 0) == 1
            if (INTERCEPT && !intercept %in% which)
                which <- c(intercept, which)
        }
    } else {
        which <- object$which(which)
    }

    if (what == "design"){
        mat <- object$baselearner[[1]]$get_data()[,which]
        if (asmatrix)
            mat <- as.matrix(mat)
        return(mat)
    }

    switch(what,
           "coefficients" = return(coef(object, which = which)),
           "residuals" = return(residuals(object)),
           "variable.names" = return(variable.names(object, which)),
           "offset" = return(object$offset),
           "nuisance" = return(nuisance(object)),
           "weights" = return(model.weights(object)),
           "control" = return(object$control))
}

extract.blackboost <- function(object, ...)
    stop("function not yet implemented")

extract.blg <- function(object, what = c("design", "penalty", "index"),
                        asmatrix = FALSE, expand = FALSE, ...){
    what <- match.arg(what)
    #object <- object$dpp(rep(1, NROW(object$model.frame())))
    # return(extract(object, what = what,
    #               asmatrix = asmatrix, expand = expand))
    if (what == "design")
        mat <- get("X", envir = environment(object$dpp))
    if (what == "penalty")
        mat <- get("K", envir = environment(object$dpp))
    if (what == "index")
        return(get("index", envir = environment(object$dpp)))
    ## only applicable for design and penalty matrix
    if (asmatrix){
        mat <- as.matrix(mat)
    }
    if (expand && !is.null(indx <- extract(object, what = "index"))){
        a <- attributes(mat)
        mat <- mat[indx,]
        a[c("dim", "dimnames")] <- attributes(mat)[c("dim", "dimnames")]
        attributes(mat) <- a
    }
    return(mat)
}

extract.bl_lin <- function(object, what = c("design", "penalty", "lambda", "df",
                                   "weights", "index"),
                           asmatrix = FALSE, expand = FALSE,  ...){
    what <- match.arg(what)
    if (what == "design")
        mat <- get("X", envir = environment(object$fit))
    if (what == "penalty")
        mat <- get("K", envir = environment(object$fit))
    if (what %in% c("lambda", "df"))
        return(object$df()[what])
    if (what %in% c("weights", "index"))
        return(get(what, envir = environment(object$fit)))
    ## only applicable for design and penalty matrix
    if (asmatrix){
        mat <- as.matrix(mat)
    }
    if (expand && !is.null(indx <- extract(object, what = "index"))){
        a <- attributes(mat)
        mat <- mat[indx,]
        a[c("dim", "dimnames")] <- attributes(mat)[c("dim", "dimnames")]
        attributes(mat) <- a
    }
    return(mat)
}

extract.bl_tree <- function(object, what = c("design", "penalty", "lambda", "df",
                                    "weights", "index"),
                            ...){
    what <- match.arg(what)
    if (what == "weights"){
        return(get("weights", envir = environment(object$fit)))
    } else {
        warning(paste("model matrix, penalty matrix, lambda and index",
                      "do not exist for tree base-learners"))
        invisible(NULL)
    }
}

residuals.mboost <- function(object, ...){
    if(object$family@name == "Squared Error (Regression)")
        return(object$resid())
    stop(sQuote("residuals()"), " only implemented for ",
         sQuote("family = Gaussian()"))
}

risk <- function(object, ...)
    UseMethod("risk")

risk.mboost <- function(object, ...) {
    object$risk()
}

downstream.test <- function(object, ...) {
    if (object$family@name != "Ratio of Correlated Gammas (RCG)")
        stop("downstream tests currently only implemented for RCG family")
    if (!inherits(object, "glmboost"))
        stop("downstream tests currently only implemented for linear models, i.e., glmboost")
    
    # calculate Fisher information matrix
    alpha <- nuisance(object)[1]
    rho <- nuisance(object)[2]
    coefs <- coef(object, which = "")
    y <- object$response
    ## do we need the original design matrix (X1) or the centered design (X) matrix?
    X <- as.data.frame(extract(object, what = "design"))
    X1 <- data.frame(model.matrix(~ x1 + x2))
    
    Fisher <- environment(object$family@ngradient)[["Fisher"]](y, alpha, rho, X1, coefs)
    
    # downstream hypothesis tests
    p_value <- sapply(1:length(coefs), function(i) (1 - pnorm(abs(coefs[i]) / sqrt(solve(Fisher)[i, i]))) * 2)
    return(p_value)
}
hofnerb/mboost documentation built on Nov. 13, 2019, 6:05 a.m.