R/rp_coefficients.R
In rpanel: Simple Interactive Controls for R using the 'tcltk' Package

Documented in rp.coefficients

# Effect plots for linear or generalised linear regression models

rp.coefficients <- function(model, style = 'density',
                            ci = TRUE, point.estimate = !ci,
                            se.scale = FALSE, marks, cols, ngrid) {

   if (!requireNamespace("ggplot2", quietly = TRUE))
      stop("the ggplot2 package is not available.")

   if (!('lm' %in% class(model)) & !('glm' %in% class(model)))
      stop("'model' is not a linear or generalised linear model object.")
   if (is.null(attr(model$terms, 'term.labels')))
      stop('the model has no non-intercept terms.')

   if ((length(style) != 1) | !(style %in% c('density', 'shading'))) {
      warning("invalid value for 'style' - setting to 'density'.")
      style <- 'density'
   }
   
   # Missing arguments
   if (missing(marks)) {
      marks <- if ('lm' %in% class(model)) qt(0.975, model$df.residual) else 1.96
      marks <- c(-marks, marks)
   }
	if (missing(ngrid)) ngrid <- 200
	clrs <- if (missing(cols)) rp.colours() else rp.colours(cols)
	fill.col <- if (ci) clrs['estimate'] else clrs['reference']
	
   # subset and labels are currently disabled
	# sbst    <- if (missing(subset)) NA else subset
	# lbls    <- if (missing(labels)) NA else labels
	
	if (!("x" %in% names(model)))
	   model$x <- model.matrix(model, model.frame(model))

	tbl.full <- summary(model)$coefficients

	# Retain only coefficients generated by terms at the top of the model hierarchy
	involved <- attr(model$terms, 'factors')
	type     <- attr(model$terms, 'dataClasses')
	fn1 <- function(nm1) {
	   if (type[nm1] == 'factor') paste(nm1, colnames(contrasts(model$model[ , nm1])), sep = '')
	   else nm1
	}
	fn  <- function(nm) {
	   inv <- involved[ , nm]
	   inv <- as.list(names(inv)[inv == 1])
	   mat <- as.matrix(expand.grid(lapply(inv, fn1)))
	   apply(mat, 1, paste, collapse = ':')
	}
	keep        <- rownames(drop1(model))[-1]
   # Old code which doesn't work for general contrasts
	# keep.cfs <- lapply(keep, fn)
	# trms.cfs <- rep(keep, sapply(keep.cfs, length))
	# keep     <- unlist(keep.cfs)
	term.labels <- attr(model$terms, 'term.labels')
	ind         <- which(term.labels %in% keep)
	mm          <- model.matrix(formula(model), model$model)
	massign     <- attr(mm, 'assign')
	keep        <- massign %in% ind
	keep <- keep[!is.na(coefficients(model))]
	tbl         <- tbl.full[keep, , drop = FALSE]

	# Apply subset, removing the intercept if subset is not specified
   # Currently disabled
	sbst <- NA
	if (any(is.na(sbst))) {
	   sbst <- 1:nrow(tbl)
	   intcpt <- match('(Intercept)', rownames(tbl))
	   if (!is.na(intcpt)) sbst <- sbst[-intcpt] 
	}
	if (length(sbst) == 0)
	   stop('no non-intercept terms are present.')
	tbl <- tbl[sbst, , drop = FALSE]

	# Check the labels
	# Currently disabled
	lbls <- NA
	if (any(is.na(lbls)))
	   lbls  <- rownames(tbl)
	# else
	#    renamed <- (length(lbls) == nrow(tbl))
	if (length(lbls) < nrow(tbl))
	   stop(paste('the length of labels is smaller that the number of model terms',
	              '(after subset has been applied, if used).'))
	# Allow labels which are a superset
	else if (length(lbls) > nrow(tbl)) {
	   if (!all(rownames(tbl) %in% lbls))
	      stop('the labels do not contain all the model terms.')
	   else
	      lbls.coef <- match(rownames(tbl), lbls)
	}
	else
	   lbls.coef <- 1:nrow(tbl)
	# else if (length(lbls) != nrow(tbl))
	#    stop("the 'labels' argument does not match the number of model terms.")
	
	coeff  <- tbl[ , 1]
	se     <- tbl[ , 2]
	yname  <- all.vars(model$call)[1]
	x      <- model$x[ , rownames(tbl), drop = FALSE]
	involved  <- attr(model$terms, 'factors')[-1, , drop = FALSE]
	var.types <- attr(model$terms, 'dataClasses')[-1]
	
	rng.fn <- function(nm) {
	   ind <- match(nm, rownames(tbl.full))
	   # trm <- trms.cfs[ind]
	   trm <- term.labels[model$assign[ind]]
	   ind <- which((involved[ , trm, drop = FALSE] == 1) & (var.types == 'numeric'))
	   ind <- rownames(involved)[ind]
	   if (length(ind) > 1)
	       stop('rp.coefficients cannot handle interaction between numeric variables.')
	   result <- if (length(ind) == 0) 0:1 else range(model$x[ , ind])
	   result
	}
	rng <- sapply(rownames(tbl), rng.fn)
	for (i in 1:length(coeff)) {
	   if (abs(diff(rng[ , i]) - 1) > 1e-8)
	         lbls[lbls.coef[i]] <- paste(lbls[lbls.coef[i]], "\n(", signif(rng[1, i], 5),
	                            " - ", signif(rng[2, i], 5), ")", sep = "")
	}
	lbls   <- factor(lbls, levels = lbls)	# Order of the terms retained
	lbls   <- lbls[lbls.coef]
	ncoef  <- length(coeff)
	coeff  <- coeff * (rng[2, ] - rng[1, ])
	se     <-    se * (rng[2, ] - rng[1, ])
	mn     <- if (ci) coeff else rep(0, ncoef)
	yrange <- range(min(mn - 3 * se), max(mn + 3 * se), coeff, 0)
	mn     <- rep(mn, each = ngrid)
	se     <- rep(se, each = ngrid)
	ygrid  <- seq(yrange[1], yrange[2], length = ngrid + 2)[2:(ngrid + 1)] # Avoid end-points
	ygrid  <- rep(ygrid, ncoef)
	dgrid  <- dnorm(ygrid, mn, se) * 1.5 * min(se)
	x      <- rep(lbls, each = ngrid)
	y      <- ygrid
	d      <- dgrid
	dfrm   <- data.frame(x = x, y = y, d = d)

	# Create the plot
	plt <- ggplot2::ggplot(dfrm, ggplot2::aes(x, y))
	
	# Add point estimates if required
	if (point.estimate | !ci) {
	   dfrm.mn <- data.frame(x = as.numeric(lbls[lbls.coef]), y = coeff)
	   plt <- plt + ggplot2::geom_segment(ggplot2::aes(x = x - 0.3, xend = x  + 0.3),
	                                      col = clrs['estline'], data = dfrm.mn)
	}

   # 	Add the uncertainty
	if (style == 'density')
	   plt <- plt +
	      ggplot2::geom_tile(width = dgrid, fill = fill.col, show.legend = FALSE)
	else if (style == 'shading')
	   plt <- plt +
	      ggplot2::geom_tile(ggplot2::aes(fill = d), width = 0.6, show.legend = FALSE) +
	      ggplot2::scale_fill_gradient(low = "grey92", high = fill.col)
	
	# Add se scale if requested
	if (se.scale) {
	   se.marks     <- seq(-3, 3, by = 1)
	   nmarks       <- length(se.marks)
	   mn           <- if (ci) coeff else rep(0, ncoef)
	   se           <- tbl[ , 2] * (rng[2, ] - rng[1, ])
	   sc           <- rep(se.marks, ncoef)
	   dfrm.scale   <- data.frame(x  = rep(as.numeric(lbls), each = nmarks),
	                              mn = rep(mn, each = nmarks),
	                              se = rep(se, each = nmarks),
	                              sc = sc)
	   dfrm.scale$y <- dfrm.scale$mn + dfrm.scale$sc * dfrm.scale$se
	   ystart       <- mn - min(se.marks) * se
	   yend         <- mn + min(se.marks) * se
	   dfrm.axes    <- data.frame(x = lbls, y = ystart, yend = yend)
	   col.scale <- clrs['points']
	   plt <- plt +
	      ggplot2::geom_segment(ggplot2::aes(x = x, y = y, yend = yend),
	                            col = col.scale, data = dfrm.axes) +
	      ggplot2::geom_segment(ggplot2::aes(x = x - 0.02, xend = x + 0.02, y = y),
	                            col = col.scale, data = dfrm.scale) +
	      ggplot2::geom_text(ggplot2::aes(x = x - 0.08, y = y,
	                                      label = as.character(sc)),
	                         col = col.scale, size = 3, data = dfrm.scale)
	}
	
	# Add ci marks if requested
	if (!is.null(marks)) {
	   mn           <- if (ci) coeff else rep(0, ncoef)
	   se           <- tbl[ , 2] * (rng[2, ] - rng[1, ])
	   dfrm.scale   <- data.frame(x  = rep(as.numeric(lbls), each = length(marks)),
	                              mn = rep(  mn, each = length(marks)),
	                              se = rep(  se, each = length(marks)),
	                              sc = rep(marks, ncoef))
	   dfrm.scale$y <- dfrm.scale$mn + dfrm.scale$sc * dfrm.scale$se
	   dfrm.scale$d <- dnorm(dfrm.scale$y, dfrm.scale$mn, dfrm.scale$se) * 1.5 * min(dfrm.scale$se)
	   del <- if (style == 'density') 0.5 * dfrm.scale$d else 0.3
	   plt <- plt + ggplot2::geom_segment(ggplot2::aes(x    = x - del,
	                                                   xend = x + del,
	                                                   y    = y),
	                                      col = 'white', data = dfrm.scale)
	}
	
	# Other detailed aspects of the plot
	plt <-    plt + 
	          ggplot2::scale_x_discrete(drop = FALSE) +
	          ggplot2::geom_hline(yintercept = 0, linetype = "dashed") +
	          ggplot2::ylab(paste("Change in mean", yname)) +
	          ggplot2::xlab("Coefficients") +
	          ggplot2::ylim(yrange[1], yrange[2]) +
	          ggplot2::theme(panel.grid = ggplot2::element_blank(), legend.position = "none")
	
	print(plt)
	invisible(plt)
}
	
	# old code
#    cfs   <- coef(model)[-1]
#    p     <- length(cfs)
#    x     <- model$x[ , -1]
#    rng   <- diff(apply(x, 2, range))
#    chng  <- cfs * rng
#    se    <- coef(summary(model))[-1, 2] * rng
#    ylab  <- attr(model$terms, "variables")
#    ylab  <- strsplit(deparse(ylab), ",")[[1]][1]
#    ylab  <- substr(ylab, 6, nchar(ylab))
#    if (any(is.na(prng))) prng <- range(chng + 3 * se, chng - 3 * se)
#    par(mar = c(3, 3, 1, 1) + 0.1, mgp = c(1.5, 0.2, 0), tcl = -0.2)
#    plot(c(0.4, p + 0.6), prng, ylab = paste("Change in", ylab), xlab = "",
#    		 type = "n", axes = FALSE)
#    usr <- par("usr")
#    rect(usr[1], usr[3], usr[2], usr[4], col = grey(0.9), border = NA)
#    abline(h = axTicks(2), col = "white")
#    abline(h = 0, lty = 2, lwd = 2)
#    # grid(col = "white", lty = 1)
#    axis(1, 1:p, names(cfs), tick = FALSE, lwd = 0, mgp = c(3, 0, 0))
#    rng <- signif(apply(x, 2, range))
#    rng <- paste(rng[1, ], "-", rng[2, ])
#    axis(1, 1:p, rng,        tick = FALSE, lwd = 0, mgp = c(3, 1, 0), cex.axis = 0.7)
#    axis(2, lwd = 0, lwd.ticks = 2, col = grey(0.6), col.ticks = grey(0.6),
#    		 col.axis = grey(0.6), cex.axis = 0.8)
#    xgrid <- seq(usr[3], usr[4], length = 500)
#    for (i in 1:p) denstrip::denstrip(xgrid, dnorm(xgrid, chng[i], se[i]), i, 0.7,
#                               colmax = col, colmin = "transparent", horiz = FALSE)
#    # segments(usr[1], usr[3], usr[1], usr[4], lwd = 2)
#    invisible()
# }