R/clmtree.R
In loret: Logistic Regression Trees for Multinomial and Ordinal Responses

#There are two ways to do this: use a fitter function or a formula interface function.
#With CLM: clm.fit interface works, but there is no predict.clm.fit -> needs to be written first; problem here is that the clm.fit object does not return a model frame with the response or x matrix; need to find a god workaround that does not need to much memory (so appending x toe very fit is not acceptable)
#Second way: use clm and write a wrapper that allows to use it as clm(y,x); also how its done with mlogit 

## wrapper function to specify fitter and return class
clmtree <- function(formula, data, subset, na.action, weights, offset, link=c("logit","probit","cloglog","loglog","cauchit"),threshold=c("flexible","symmetric","symmetric2","equidistant"),epsilon = 1e-8, maxit = 25, ...)
{
  if(missing(threshold)) threshold <- "flexible"
  if(missing(link)) link <- "logit"
  if (threshold != "flexible") stop("only flexible thresholds implemented at the moment")
  ## use dots for setting up mob_control
  control <- mob_control(...)

  ## keep call
  cl <- match.call(expand.dots = TRUE)

  ## extend formula if necessary
  fo <- Formula::Formula(formula)
  if(length(fo)[2L] == 1L) {
    attr(fo, "rhs") <- c(list(1), attr(fo, "rhs"))
    formula[[3L]] <- formula(fo)[[3L]]
  } else {    #not sure this is what we want?
    fo <- NULL
  }

  #Check what to do with link/family as in glmtree
  
  ## call mob
  m <- match.call(expand.dots = FALSE)
  if(!is.null(fo)) m$formula <- formula
  m$fit <- clmfit
  m$control <- control
  m$epsilon <- epsilon
  m$maxit <- maxit
  if("..." %in% names(m)) m[["..."]] <- NULL
  m$link <- link
  m$threshold <- threshold
  m[[1L]] <- as.name("mob")
  rval <- eval(m, parent.frame())
  
  ## extend class and keep original call
  rval$info$call <- cl
  rval$info$link <- link
  rval$info$threshold <- threshold
  class(rval) <- c("clmtree", class(rval))
  return(rval)
}


#' A fitting function for cumulative link logit models. Based on ordinal:clm. 
#'
#' @param formula the model formula
#' @param scale 
#' @param nominal 
#' @param weights case weights
#' @param start  an optional list of starting values of the form c(alpha, beta, zeta) for the thresholds and nominal effects (alpha), regression parameters (beta) and scaleparameters (zeta).
#' @param subset
#' @param na.action
#' @param contrasts
#' @param model
#' @param link the link function
#' @param threshold the threshold structure, see \link{clm}
#' @param ... Additional control parameters passed to the fitting function, a list of control parameteres or a call to \link{clm.control}  
#' @param estfun estimating functions
#' @param object should the object be saved
#'
#' @return An object of class, a list with components
#' \itemize{
#'         \item{}
#'         \item{}
#'         \item{}
#'         \item{}
#'         \item{}
#'         \item{}
#'         \item{}
#' }
#' 
#' @export
#'
#'       if (missing(X)) 
#'        X <- cbind(`(Intercept)` = rep(1, length(y)))
#'  stopifnot(is.factor(y), is.matrix(X))
#'  if (!missing(S) && !is.null(S)) {
#'        stopifnot(is.matrix(S), length(y) == nrow(S))
#'    }
#'    if (!missing(N) && !is.null(N)) {
#'        stopifnot(is.matrix(N), length(y) == nrow(N))
#'    }

## clmfit <- function(y, x, scale=NULL, nominal=NULL, weights, start = NULL, na.action="na.exclude", contrasts=NULL, model=TRUE, link=c("logit","probit","cloglog","loglog","cauchit"), threshold=c("flexible","symmetric","symmetric2","equidistant"), ..., estfun = FALSE, object = FALSE)
##   {
##    #got rid of argument subset, data, scale and nominal 
##   if (!is.null(scale)) stop("Scale regression not implemented yet.")
##   if (!is.null(nominal)) stop("Nominal regression not implemented as cumulative link model. Use mlogittree().")
  
##   if(missing(weights)) weights <- rep(1,nrow(x))
  
##  ## catch control arguments
##   args <- list(...)
##   ctrl <- list()
##   for(n in c("relTol", "maxIter")) {
##     if(n %in% names(args)) {
##       ctrl[[n]] <- args[[n]]
##       args[[n]] <- NULL
##     }
##   }
##   args$control <- do.call("clm.control", ctrl)

##   x <- x[,-which(colnames(x)%in%"(Intercept)")]
  
##   formula <- formula(mf)$formula
##   mf <- data.frame(y,x)
##   colnames(mf)[1] <- as.character(formula)[2]
  
##   ## call clm fitting function
##   args <- c(list(formula=formula, data=mf, weights = weights, model=model, start = start, na.action=na.action, contrasts=contrasts, link=link,threshold=threshold), args)
##   z <- do.call("clm", args)

##   ## z2 <- rapply(z, function(x) {
##   ##                   if(is.character(x)) x <- sub("^x","", x) else x
##   ##               }
##   ##             , how="replace")

##   ## z2 <- rapply(z2, function(x) {
##   ##                        if(!is.null(attr(x,"names"))) attr(x,"names") <- sub("^x","", attr(x,"names")) else x
##   ##               }
##   ##             , how="replace")

##   ## z2 <- rapply(z2, function(x) {
##   ##                  if(!is.null(attr(x,"dimnames"))) lapply(attr(x,"dimnames"), function(l) l <- sub("^x","", l)) else x }
##   ##             , how="replace")

##   ## z2
##   ## attr(z$coefficients,"names") <- sub("^x", "", attr(z$coefficients,"names"))

      
##   ## degrees of freedom
##   df <- z$edf 

##   ## list structure
##   rval <- list(
##     coefficients = z$coefficients,
##     objfun= -z$logLik,  
##     estfun = NULL,
##     object = NULL
##   )

##   #add estimating functions (if desired)
##   #based on estfun.clm in sandwich
##   #only works for flexible threshold and no scale regression
##   if(estfun) {
##    rval$estfun <- sandwich::estfun(z)
## }

##   ## add model (if desired)
##   #TODO: add more info 
##   if(object) {
##    # class(z) <- c("clm")
##    # z$offset <- offset #made this to be the offset right from the start
##   #  z$contrasts <- attr(x, "contrasts")
##   #  z$xlevels <- attr(x, "xlevels")
##     #z$x <- x
##   #  cl <- as.call(expression(clm))
##    # cl$formula <- attr(x, "formula")	
##    # z$call <- cl
##    # z$terms <- attr(x, "terms")
##     #should we collect link information?
##     rval$object <- z
##   }

##   return(rval)
## }


#' A fitting function for cumulative link logit models. Based on ordinal:clm.fit. In loret use it with y=factor(y,ordered=TRUE)
#'
#' @param y the response variable; an ordered factor.
#' @param x design matrix for regression  parameters
#' @param s design matrix for scale  parameters  
#' @param n design matrix for nominal parameters 
#' @param start  an optional list of starting values of the form c(alpha, beta, zeta) for the thresholds and nominal effects (alpha), regression parameters (beta) and scaleparameters (zeta).
#' @param weights case weights
#' @param offset an optional offset
#' @param S.offset an optional offset for the scale
#' @param link the link function
#' @param threshold the threshold structure, see \link{clm}
#' @param ... Additional control parameters passed to the fitting function, a list of control parameteres or a call to \link{clm.control}  
#' @param estfun estimating functions
#' @param object should the object be saved
#'
#' @return An object of class, a list with components
#' \itemize{
#'         \item{}
#'         \item{}
#'         \item{}
#'         \item{}
#'         \item{}
#'         \item{}
#'         \item{}
#' }
#' 
#' @export
#'
#'       if (missing(X)) 
#'        X <- cbind(`(Intercept)` = rep(1, length(y)))
#'  stopifnot(is.factor(y), is.matrix(X))
#'  if (!missing(S) && !is.null(S)) {
#'        stopifnot(is.matrix(S), length(y) == nrow(S))
#'    }
#'    if (!missing(N) && !is.null(N)) {
#'        stopifnot(is.matrix(N), length(y) == nrow(N))
#'    }

clmfit <- function(y, x, s=NULL, n=NULL, start = NULL, weights = rep(1,nrow(x)), offset = rep(0,nrow(x)),S.offset=rep(0,nrow(x)), link=c("logit","probit","cloglog","loglog","cauchit"),threshold=c("flexible","symmetric","symmetric2","equidistant"), ..., estfun = FALSE, object = FALSE)
  {

  if (!is.null(s)) stop("Scale regression not implemented yet")
  if (!is.null(n)) stop("Nominal regression not implemented as cumulative link model. Use mlogittree().")
  
 ## catch control arguments
  args <- list(...)
  ctrl <- list()
  for(n in c("relTol", "maxIter")) {
    if(n %in% names(args)) {
      ctrl[[n]] <- args[[n]]
      args[[n]] <- NULL
    }
  }
  args$control <- do.call("clm.control", ctrl)
  
  ## call clm fitting function
  args <- c(list(y = y, X = x, weights = weights, offset = offset, S.offset=S.offset,start = start,link=link,threshold=threshold), args)
  z <- do.call("clm.fit", args)

  ## degrees of freedom
  df <- z$edf 

  ## list structure
  rval <- list(
    coefficients = z$coefficients,
    objfun= -z$logLik,  
    estfun = NULL,
    object = NULL
  )

  #add estimating functions (if desired)
  #based on estfun.clm in sandwich
  #only works for flexible threshold and no scale regression
  if(estfun) {
   if(length(link)>1L) link <- link[1]
  # cat("link:", link,"\n")   
   mueta <- make.link(link)$mu.eta
   xmat <- x[, -1L, drop = FALSE]
   n <- nrow(xmat)
   k <- ncol(xmat)
   m <- length(z$alpha)
   y <- as.numeric(y)
   prob <- z$fitted.values
   xb <- if (k >= 1L) 
        as.vector(xmat %*% z$beta)
   else rep(0, n)
   zeta <- z$alpha
   lp <- cbind(0, mueta(matrix(zeta, nrow = n, ncol = m, byrow = TRUE) - 
        xb), 0)
   rrval <- matrix(0, nrow = n, ncol = k + m + 2L)
   if (k >= 1L) 
        rrval[, 1L:k] <- (-xmat * as.vector(lp[cbind(1:n, y + 
            1L)] - lp[cbind(1:n, y)]))
   rrval[cbind(1:n, k + y)] <- -as.vector(lp[cbind(1:n, y)])
   rrval[cbind(1:n, k + y + 1L)] <- as.vector(lp[cbind(1:n, y + 
        1L)])
   rrval <- rrval[, -c(k + 1L, k + m + 2L), drop = FALSE]
   rrval <- weights/prob * rrval
   dimnames(rrval) <- list(rownames(xmat), c(colnames(xmat), names(z$alpha)))
   ix <- if (k >= 1L) 
        c((k + 1L):(k + m), 1L:k)
   else 1L:m
   rval$estfun <- rrval[, ix, drop = FALSE]
}

  ## add model (if desired)
  #TODO: add more info 
  if(object) {
    class(z) <- c("clm.fit")
    z$offset <- offset #made this to be the offset right from the start
    z$contrasts <- attr(x, "contrasts")
    z$xlevels <- attr(x, "xlevels")
    z$x <- x #added for predict
    cl <- as.call(expression(clm))
    cl$formula <- attr(x, "formula")	
    z$call <- cl
    z$terms <- attr(x, "terms")
    #should we collect link information?
    rval$object <- z
  }

  return(rval)
}

## methods
print.clmtree <- function(x,
  title = NULL, objfun = "negative log-likelihood", ...)
{
  if(is.null(title)) title <- paste("Cumulative link model tree for ordinal data (link: ", x$info$link,", threshold: ", x$info$threshold,")",sep="")
  print.modelparty(x, title = title, objfun = objfun, ...)
}


predict.clm.fit <- function(object,newdata=NULL,type = c("prob", "class"),se.fit=FALSE,interval=FALSE,level=0.95,na.action=na.pass,...)
    {
    #TODO look at predict.clm.fit from predict.clm
    type <- match.arg(type)
    se.fit <- as.logical(se.fit)[1]
    interval <- as.logical(interval)[1]
    stopifnot(length(level) == 1 && is.numeric(level) && level < 1 && level > 0)
    if (type == "class" && (se.fit || interval)) {
        warning("se.fit and interval set to FALSE for type = 'class'")
        se.fit <- interval <- FALSE
    }
    cov <- if (se.fit || interval) 
        object$object$vcov
    else NULL
    has.response <- TRUE
    if (type == "class" && missing(newdata)) 
        newdata <- object$object$x
    has.newdata <- !(missing(newdata) || is.null(newdata))
    if (has.newdata || type == "class") {
        if (has.newdata && sum(unlist(object$aliased)) > 0) 
            warning("predictions from column rank-deficient fit may be misleading")
        newdata <- as.data.frame(newdata)
        resp <- object$object$y 
        if (type == "class") 
            newdata <- newdata[!names(newdata) %in% resp]
        has.response <- resp %in% names(newdata)
        if (!has.response) {
            ylev <- object$y.levels
            nlev <- length(ylev)
            nnd <- nrow(newdata)
            newdata <- cbind(newdata[rep(1:nnd, each = nlev), 
                , drop = FALSE], factor(rep(ylev, nnd), levels = ylev, 
                ordered = TRUE))
            names(newdata)[ncol(newdata)] <- resp
        }
        if (is.null(attr(object$terms, "predvars"))) 
            warning(paste0("terms object does not have a predvars attribute: ", 
                "predictions may be misleading"))
        mf <- model.frame(object$terms, newdata, na.action = na.action, 
            xlev = object$xlevels)
        if (nrow(mf) != nrow(newdata)) 
            stop("length of variable(s) found do not match nrow(newdata)")
        if (!is.null(cl <- attr(object$terms, "dataClasses"))) 
            .checkMFClasses(cl, mf)
        X <- model.matrix(object$terms, mf, contrasts = object$contrasts)
        Xint <- match("(Intercept)", colnames(X), nomatch = 0L)
        n <- nrow(X)
        if (Xint <= 0) 
            X <- cbind(`(Intercept)` = rep(1, n), X)
        if (sum(object$aliased$beta) > 0) 
            X <- X[, !c(FALSE, object$aliased$beta), drop = FALSE]
        offset <- rep(0, nrow(X))
        if (!is.null(off.num <- attr(object$terms, "offset"))) 
            for (i in off.num) offset <- offset + eval(attr(object$terms, 
                "variables")[[i + 1]], newdata)
        y <- model.response(mf)
        if (any(!levels(y) %in% object$y.levels)) 
            stop(gettextf("response factor '%s' has new levels", 
                response.name(object$terms)))
        if (is.nom <- !is.null(object$nom.terms)) {
            nom.mf <- model.frame(object$nom.terms, newdata, 
                na.action = na.action, xlev = object$nom.xlevels)
            if (nrow(nom.mf) != nrow(newdata)) 
                stop("length of variable(s) found do not match nrow(newdata)")
            if (!is.null(cl <- attr(object$nom.terms, "dataClasses"))) 
                .checkMFClasses(cl, nom.mf)
            NOM <- model.matrix(object$nom.terms, nom.mf, contrasts = object$nom.contrasts)
            NOMint <- match("(Intercept)", colnames(NOM), nomatch = 0L)
            if (NOMint <= 0) 
                NOM <- cbind(`(Intercept)` = rep(1, n), NOM)
            alias <- t(matrix(object$aliased$alpha, nrow = length(object$y.levels) - 
                1))[, 1]
            if (sum(alias) > 0) 
                NOM <- NOM[, !c(FALSE, alias), drop = FALSE]
        }
        if (is.scale <- !is.null(object$S.terms)) {
            S.mf <- model.frame(object$S.terms, newdata, na.action = na.action, 
                xlev = object$S.xlevels)
            if (nrow(S.mf) != nrow(newdata)) 
                stop("length of variable(s) found do not match nrow(newdata)")
            if (!is.null(cl <- attr(object$S.terms, "dataClasses"))) 
                .checkMFClasses(cl, S.mf)
            S <- model.matrix(object$S.terms, S.mf, contrasts = object$S.contrasts)
            Sint <- match("(Intercept)", colnames(S), nomatch = 0L)
            if (Sint <= 0) 
                S <- cbind(`(Intercept)` = rep(1, n), S)
            if (sum(object$aliased$zeta) > 0) 
                S <- S[, !c(FALSE, object$aliased$zeta), drop = FALSE]
            Soff <- rep(0, nrow(S))
            if (!is.null(off.num <- attr(object$S.terms, "offset"))) 
                for (i in off.num) Soff <- Soff + eval(attr(object$S.terms, 
                  "variables")[[i + 1]], newdata)
        }
        tJac <- object$tJac
        dimnames(tJac) <- NULL
        env <- clm.newRho(parent.frame(), y = y, X = X, NOM = if (is.nom) 
            NOM
        else NULL, S = if (is.scale) 
            S
        else NULL, weights = rep(1, n), offset = offset, S.offset = if (is.scale) 
            Soff
        else rep(0, n), tJac = tJac)
        setLinks(env, link = object$link)
    }
    else {
        env <- get_clmRho.clm(object)
    }
    env$par <- as.vector(coef(object))
    env$par <- env$par[!is.na(env$par)]
    pred <- switch(type, prob = prob.predict.clm(env = env, cov = cov, 
        se.fit = se.fit, interval = interval, level = level), 
        class = prob.predict.clm(env = env, cov = cov, se.fit = se.fit, 
            interval = interval, level = level), cum.prob = cum.prob.predict.clm(env = env, 
            cov = cov, se.fit = se.fit, interval = interval, 
            level = level), linear.predictor = lin.pred.predict.clm(env = env, 
            cov = cov, se.fit = se.fit, interval = interval, 
            level = level))
    if (!has.response || type == "class") {
        pred <- lapply(pred, function(x) {
            x <- matrix(unlist(x), ncol = nlev, byrow = TRUE)
            dimnames(x) <- list(1:nrow(x), ylev)
            x
        })
        if (type == "class") 
            pred <- lapply(pred, function(x) {
                factor(max.col(x), levels = seq_along(ylev), 
                  labels = ylev)
            })
    }
    if (missing(newdata) && !is.null(object$na.action)) 
        pred <- lapply(pred, function(x) napredict(object$na.action, 
            x))
    return(pred)
}

predict.clmtree <- function(object, newdata = NULL, type = "node", ...)
{
  ## FIXME: This does not work currently
  if(is.null(newdata) & !identical(type, "node")) stop("newdata has to be provided")
  predict.modelparty(object, newdata = newdata, type = type, ...)
}



plot.clmtree <- function(x, terminal_panel = node_bivplot,
  tp_args = list(), tnex = NULL, drop_terminal = NULL, ...)
{
  ###FIXME: Should be rewritten; like in psychotree?   
  nreg <- if(is.null(tp_args$which)) x$info$nreg else length(tp_args$which)
  if(nreg < 1L & missing(terminal_panel)) {
    plot.constparty(as.constparty(x),
      tp_args = tp_args, tnex = tnex, drop_terminal = drop_terminal, ...)
  } else {
    if(is.null(tnex)) tnex <- if(is.null(terminal_panel)) 1L else 2L * nreg
    if(is.null(drop_terminal)) drop_terminal <- !is.null(terminal_panel)
    plot.modelparty(x, terminal_panel = terminal_panel,
      tp_args = tp_args, tnex = tnex, drop_terminal = drop_terminal, ...)
  }
}
Any scripts or data that you put into this service are public.
loret documentation built on May 2, 2019, 5:31 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
loret
Logistic Regression Trees for Multinomial and Ordinal Responses

R/clmtree.R
In loret: Logistic Regression Trees for Multinomial and Ordinal Responses

Try the loret package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

loret Logistic Regression Trees for Multinomial and Ordinal Responses

R/clmtree.R In loret: Logistic Regression Trees for Multinomial and Ordinal Responses

Try the loret package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

loret
Logistic Regression Trees for Multinomial and Ordinal Responses

R/clmtree.R
In loret: Logistic Regression Trees for Multinomial and Ordinal Responses