Nothing
#
# ggdendro/R/dendro_rpart.R by Andrie de Vries and Brian Ripley
# Copyright (C) 2011-2015
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 or 3 of the License
# (at your option).
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# A copy of the GNU General Public License is available at
# http://www.r-project.org/Licenses/
#
#
# labels.rpart ----------------------------------------------------------------
#' Make the nice labels used by print and summary.
#'
#' @param digits obvious
#'
#' @param minlength 0 = don't abbrev factors; 1 = use single letters; 2+= the
#' same arg as the "abbreviate" function
#'
#' @param collapse an oddly named argument: FALSE = return a matrix with two
#' columns, containing the labels of the left and right descendants of each
#' node; TRUE = return a vector of 1 column, with the label of the parent
#'
#' @param pretty: for historical compatibility: 0 -> minlength = 0; NULL ->
#' minlength = 1; TRUE -> minlength = 4
#'
#' @param ... = other args for abbreviate()
#'
#' @keywords internal
#' @noRd
labels.rpart <- function(object, digits = 4, minlength = 1L, pretty,
collapse = TRUE, ...) {
if (missing(minlength) && !missing(pretty)) {
minlength <- if (is.null(pretty)) {
1L
} else if (is.logical(pretty)) {
if (pretty) 4L else 0L
} else {
0L
}
}
ff <- object$frame
n <- nrow(ff)
if (n == 1L) {
return("root")
} # special case of no splits
is.leaf <- (ff$var == "<leaf>")
whichrow <- !is.leaf
vnames <- ff$var[whichrow] # the variable names for the primary splits
index <- cumsum(c(1, ff$ncompete + ff$nsurrogate + !is.leaf))
irow <- index[c(whichrow, FALSE)] # we only care about the primary split
ncat <- object$splits[irow, 2L]
## Now to work: first create labels for the left and right splits,
## but not for leaves of course
##
lsplit <- rsplit <- character(length(irow))
if (any(ncat < 2L)) { # any continuous vars ?
jrow <- irow[ncat < 2L]
cutpoint <- formatg(object$splits[jrow, 4L], digits)
temp1 <- (ifelse(ncat < 0, "< ", ">="))[ncat < 2L]
temp2 <- (ifelse(ncat < 0, ">=", "< "))[ncat < 2L]
lsplit[ncat < 2L] <- paste0(temp1, cutpoint)
rsplit[ncat < 2L] <- paste0(temp2, cutpoint)
}
if (any(ncat > 1L)) { # any categorical variables ?
xlevels <- attr(object, "xlevels")
##
## jrow will be the row numbers of factors within lsplit and rsplit
## crow the row number in "csplit"
## and cindex the index on the "xlevels" list
##
jrow <- seq_along(ncat)[ncat > 1L]
crow <- object$splits[irow[ncat > 1L], 4L] # row number in csplit
cindex <- (match(vnames, names(xlevels)))[ncat > 1L]
## Now, abbreviate the levels
if (minlength == 1L) {
if (any(ncat > 52L)) {
warning("more than 52 levels in a predicting factor, truncated for printout",
domain = NA
)
}
xlevels <- lapply(xlevels, function(z) c(letters, LETTERS)[pmin(seq_along(z), 52L)])
} else if (minlength > 1L) {
xlevels <- lapply(xlevels, abbreviate, minlength, ...)
}
## Now tuck in the labels
## I'll let some other clever person vectorize this
for (i in seq_along(jrow)) {
j <- jrow[i]
splits <- object$csplit[crow[i], ]
## splits will contain 1=left, 3=right, 2= neither
cl <- if (minlength == 1L) "" else ","
lsplit[j] <-
paste((xlevels[[cindex[i]]])[splits == 1L], collapse = cl)
rsplit[j] <-
paste((xlevels[[cindex[i]]])[splits == 3L], collapse = cl)
}
}
if (!collapse) { # called by no routines that I know of
ltemp <- rtemp <- rep("<leaf>", n)
ltemp[whichrow] <- lsplit
rtemp[whichrow] <- rsplit
return(cbind(ltemp, rtemp))
}
lsplit <- paste0(ifelse(ncat < 2L, "", "="), lsplit)
rsplit <- paste0(ifelse(ncat < 2L, "", "="), rsplit)
## Now match them up to node numbers
## The output will have one label per row of object$frame, each
## corresponding the the line segement joining this node to its parent
varname <- (as.character(vnames))
node <- as.numeric(row.names(ff))
parent <- match(node %/% 2L, node[whichrow])
odd <- (as.logical(node %% 2L))
labels <- character(n)
labels[odd] <- paste0(varname[parent[odd]], rsplit[parent[odd]])
labels[!odd] <- paste0(varname[parent[!odd]], lsplit[parent[!odd]])
labels[1L] <- "root"
labels
}
# rpart_ggdendro_env ------------------------------------------------------
# rpart_ggdendro_env <- new.env()
# rpart.branch ------------------------------------------------------------
##
## Compute the "branches" to be drawn for an rpart object
##
## most likely this could simply default to branch = 1
rpart.branch <- function(x, y, node, branch) {
# if (missing(branch)) {
# pn <- paste0("device", dev.cur())
# if (!exists(pn, envir = rpart_ggdendro_env, inherits = FALSE))
# stop("no information available on parameters from previous call to plot()")
# parms <- get(pn, envir = rpart_ggdendro_env, inherits = FALSE)
# branch <- parms$branch
# }
if (missing(branch)) branch <- 1
## Draw a series of horseshoes, left son, up, over, down to right son
## NA's in the vector cause lines() to "lift the pen"
is.left <- (node %% 2L == 0L) # left hand sons
node.left <- node[is.left]
parent <- match(node.left / 2L, node)
sibling <- match(node.left + 1L, node)
temp <- (x[sibling] - x[is.left]) * (1 - branch) / 2
xx <- rbind(
x[is.left], x[is.left] + temp,
x[sibling] - temp, x[sibling], NA
)
yy <- rbind(y[is.left], y[parent], y[parent], y[sibling], NA)
list(x = xx, y = yy)
}
# tree.depth ------------------------------------------------------------------
tree.depth <- function(nodes) {
depth <- floor(log(nodes, base = 2) + 1e-7)
depth - min(depth)
}
# rpartco ---------------------------------------------------------------------
#' Compute the x-y coordinates for a tree.
#'
#' @param tree Tree model
#' @param parms Graphics parameters
#' @keywords internal
rpartco <- function(tree, parms) {
# if (missing(parms)) {
# pn <- paste0("device", dev.cur())
# if (!exists(pn, envir = rpart_ggdendro_env, inherits = FALSE))
# stop("no information available on parameters from previous call to plot()")
# parms <- get(pn, envir = rpart_ggdendro_env, inherits = FALSE)
# }
frame <- tree$frame
node <- as.numeric(row.names(frame))
depth <- tree.depth(node)
is.leaf <- (frame$var == "<leaf>")
if (length(parms)) {
uniform <- parms$uniform
nspace <- parms$nspace
minbranch <- parms$minbranch
} else {
uniform <- FALSE
nspace <- -1
minbranch <- 0.3
}
if (uniform) {
y <- (1 + max(depth) - depth) / max(depth, 4L)
} else { # make y- (parent y) = change in deviance
y <- dev <- frame$dev
temp <- split(seq(node), depth) # depth 0 nodes, then 1, then ...
parent <- match(node %/% 2L, node)
sibling <- match(ifelse(node %% 2L, node - 1L, node + 1L), node)
## assign the depths
for (i in temp[-1L]) {
temp2 <- dev[parent[i]] - (dev[i] + dev[sibling[i]])
y[i] <- y[parent[i]] - temp2
}
##
## For some problems, classification & loss matrices in particular
## the gain from a split may be 0. This is ugly on the plot.
## Hence the "fudge" factor of 0.3 * the average step
##
fudge <- minbranch * diff(range(y)) / max(depth)
for (i in temp[-1L]) {
temp2 <- dev[parent[i]] - (dev[i] + dev[sibling[i]])
haskids <- !(is.leaf[i] & is.leaf[sibling[i]])
y[i] <- y[parent[i]] - ifelse(temp2 <= fudge & haskids, fudge, temp2)
}
y <- y / (max(y))
}
# Now compute the x coordinates, by spacing out the leaves and then
# filling in
x <- double(length(node)) # allocate, then fill it in below
x[is.leaf] <- seq(sum(is.leaf)) # leaves at 1, 2, 3, ....
left.child <- match(node * 2L, node)
right.child <- match(node * 2L + 1L, node)
## temp is a list of non-is.leaf, by depth
temp <- split(seq(node)[!is.leaf], depth[!is.leaf])
for (i in rev(temp)) {
x[i] <- 0.5 * (x[left.child[i]] + x[right.child[i]])
}
if (nspace < 0) {
return(list(x = x, y = y))
}
##
## Now we get fancy, and try to do overlapping
##
## The basic algorithm is, at each node:
## 1: get the left & right edges, by depth, for the left and
## right sons, of the x-coordinate spacing.
## 2: find the minimal free spacing. If this is >0, slide the
## right hand son over to the left
## 3: report the left & right extents of the new tree up to the
## parent
## A way to visualize steps 1 and 2 is to imagine, for a given node,
## that the left son, with all its descendants, is drawn on a
## slab of wood. The left & right edges, per level, give the
## width of this board. (The board is not a rectangle, it has
## 'stair step' edges). Do the same for the right son. Now
## insert some spacers, one per level, and slide right hand
## board over until they touch. Glue the boards and spacer
## together at that point.
##
## If a node has children, its 'space' is considered to extend left
## and right by the amount "nspace", which accounts for space
## used by the arcs from this node to its children. For
## horseshoe connections nspace usually is 1.
##
compress <- function(x, me, depth) {
lson <- me + 1L
if (is.leaf[lson]) {
left <- list(
left = x[lson], right = x[lson],
depth = depth + 1L, sons = lson
)
} else {
left <- compress(x, me + 1L, depth + 1L)
x <- left$x
}
rson <- me + 1L + length(left$sons) # index of right son
if (is.leaf[rson]) {
right <- list(
left = x[rson], right = x[rson],
depth = depth + 1L, sons = rson
)
} else {
right <- compress(x, rson, depth + 1L)
x <- right$x
}
maxd <- max(left$depth, right$depth) - depth
mind <- min(left$depth, right$depth) - depth
## Find the smallest distance between the two subtrees
## But only over depths that they have in common
## 1 is a minimum distance allowed
slide <- min(right$left[1L:mind] - left$right[1L:mind]) - 1L
if (slide > 0) { # slide the right hand node to the left
x[right$sons] <- x[right$sons] - slide
x[me] <- (x[right$sons[1L]] + x[left$sons[1L]]) / 2
}
else {
slide <- 0
}
## report back
if (left$depth > right$depth) {
templ <- left$left
tempr <- left$right
tempr[1L:mind] <- pmax(tempr[1L:mind], right$right - slide)
} else {
templ <- right$left - slide
tempr <- right$right - slide
templ[1L:mind] <- pmin(templ[1L:mind], left$left)
}
list(
x = x,
left = c(x[me] - nspace * (x[me] - x[lson]), templ),
right = c(x[me] - nspace * (x[me] - x[rson]), tempr),
depth = maxd + depth, sons = c(me, left$sons, right$sons)
)
}
x <- compress(x, 1L, 1L)$x
list(x = x, y = y)
}
# text.rpart ------------------------------------------------------------------
## This is a modification of text.tree.
## Fancy option has been added in (to mimic post.tree)
##
text_rpart <- function(x, splits = TRUE, label, FUN = text, all = FALSE,
pretty = NULL, digits = getOption("digits") - 3L,
use.n = FALSE, fancy = FALSE, fwidth = 0.8, fheight = 0.8,
bg = par("bg"), minlength = 1L, parms, ...) {
string.bounding.box <- NULL
if (!inherits(x, "rpart")) stop("Not a legitimate \"rpart\" object")
if (nrow(x$frame) <= 1L) stop("fit is not a tree, just a root")
frame <- x$frame
if (!missing(label)) warning("argument 'label' is no longer used")
col <- names(frame)
ylevels <- attr(x, "ylevels")
if (!is.null(ylevels <- attr(x, "ylevels"))) col <- c(col, ylevels)
# cxy <- par("cxy") # character width and height
cxy <- c(0.1, 0.1)
if (!is.null(srt <- list(...)$srt) && srt == 90) cxy <- rev(cxy)
xy <- rpartco(x, parms = parms)
node <- as.numeric(row.names(frame))
is.left <- (node %% 2L == 0L) # left hand sons
node.left <- node[is.left]
parent <- match(node.left / 2L, node)
## Put left splits at the parent node
if (splits) {
left.child <- match(2L * node, node)
right.child <- match(node * 2L + 1L, node)
rows <- if (!missing(pretty) && missing(minlength)) {
labels(x, pretty = pretty)
} else {
labels(x, minlength = minlength)
}
if (fancy) {
## put split labels on branches instead of nodes
xytmp <- rpart.branch(x = xy$x, y = xy$y, node = node)
leftptx <- (xytmp$x[2L, ] + xytmp$x[1L, ]) / 2
leftpty <- (xytmp$y[2L, ] + xytmp$y[1L, ]) / 2
rightptx <- (xytmp$x[3L, ] + xytmp$x[4L, ]) / 2
rightpty <- (xytmp$y[3L, ] + xytmp$y[4L, ]) / 2
# FUN(leftptx, leftpty + 0.52 * cxy[2L],
# rows[left.child[!is.na(left.child)]], ...)
# FUN(rightptx, rightpty - 0.52 * cxy[2L],
# rows[right.child[!is.na(right.child)]], ...)
} else {
# FUN(xy$x, xy$y + 0.5 * cxy[2L], rows[left.child], ...)
### Insertion by Andrie de Vries to capture label splits
labelSplits <- data.frame(
x = xy$x,
y = xy$y,
label = rows[left.child]
)
}
labelSplits <- labelSplits[complete.cases(labelSplits), ]
labelSplits$type <= "splits"
}
leaves <- if (all) rep(TRUE, nrow(frame)) else frame$var == "<leaf>"
stat <-
x$functions$text(
yval = if (is.null(frame$yval2)) {
frame$yval[leaves]
} else {
frame$yval2[leaves, ]
},
dev = frame$dev[leaves], wt = frame$wt[leaves],
ylevel = ylevels, digits = digits,
n = frame$n[leaves], use.n = use.n
)
# if (fancy) {
# if (col2rgb(bg, alpha = TRUE)[4L, 1L] < 255) bg <- "white"
# oval <- function(middlex, middley, a, b)
# {
# theta <- seq(0, 2 * pi, pi/30)
# newx <- middlex + a * cos(theta)
# newy <- middley + b * sin(theta)
# polygon(newx, newy, border = TRUE, col = bg)
# }
#
# ## FIXME: use rect()
# rectangle <- function(middlex, middley, a, b)
# {
# newx <- middlex + c(a, a, -a, -a)
# newy <- middley + c(b, -b, -b, b)
# polygon(newx, newy, border = TRUE, col = bg)
# }
#
# ## find maximum length of stat
# maxlen <- max(string.bounding.box(stat)$columns) + 1L
# maxht <- max(string.bounding.box(stat)$rows) + 1L
#
# a.length <- if (fwidth < 1) fwidth * cxy[1L] * maxlen else fwidth * cxy[1L]
#
# b.length <- if (fheight < 1) fheight * cxy[2L] * maxht else fheight * cxy[2L]
#
# ## create ovals and rectangles here
# ## sqrt(2) creates the smallest oval that fits around the
# ## best fitting rectangle
# for (i in parent)
# oval(xy$x[i], xy$y[i], sqrt(2) * a.length/2, sqrt(2) * b.length/2)
# child <- match(node[frame$var == "<leaf>"], node)
# for (i in child)
# rectangle(xy$x[i], xy$y[i], a.length/2, b.length/2)
# }
## if FUN=text then adj=1 puts the split label to the left of the
## split rather than centered
## Allow labels at all or just leaf nodes
## stick values on nodes
# if (fancy) FUN(xy$x[leaves], xy$y[leaves] + 0.5 * cxy[2L], stat, ...)
# else FUN(xy$x[leaves], xy$y[leaves] - 0.5 * cxy[2L], stat, adj = 0.5, ...)
labelLeaves <- data.frame(
x = xy$x[leaves],
y = xy$y[leaves],
label = stat
)
# return(list(xy=xy, cxy=cxy))
list(
labels = labelSplits,
leaf_labels = labelLeaves
)
}
# formatg -----------------------------------------------------------------
## format a set of numbers using C's "g" format
## No longer exported.
formatg <- function(x, digits = getOption("digits"),
format = paste0("%.", digits, "g")) {
if (!is.numeric(x)) stop("'x' must be a numeric vector")
temp <- sprintf(format, x)
if (is.matrix(x)) matrix(temp, nrow = nrow(x)) else temp
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.