R/cols_cntr_scatter.R

Defines functions cols.cntr.scatter

Documented in cols.cntr.scatter

#' Scatterplot for column categories contribution to dimensions
#'
#' This function allows to plot a scatterplot of the contribution of column categories to two selected dimensions. Two references lines (in RED) indicate the threshold above which the contribution can be considered important for the determination of the dimensions. A diagonal line (in BLACK) is just a visual aid to eyeball whether a category is actually contributing more (in relative terms) to either of the two dimensions. 
#' @param data: name of the dataset.
#' @param x,y: dimensions for which the contributions are reported (1st and 2nd dimension by default).
#' @keywords scatterplot column categories contribution
#' @export
#' @examples
#' data(greenacre_data) #load the sample dataset
#' cols.cntr.scatter(greenacre_data,1,2) #plot the scatterplot for dimensions 1&2
#' 
cols.cntr.scatter <- function (data, x = 1, y = 2){
  ncols <- ncol(data)
  nrows <- nrow(data)
  numb.dim.cols<-ncol(data)-1
  numb.dim.rows<-nrow(data)-1
  a <- min(numb.dim.cols, numb.dim.rows) #dimensionality of the table
  pnt_labls <- colnames(data)
  res <- CA(data, ncp=a, graph = FALSE)
  dfr <- data.frame(lab=pnt_labls, dim1 = res$col$contrib[, x] * 10, dim2 = res$col$contrib[,y] * 10)
  xmax <- max(dfr[, 2]) + 10
  ymax <- max(dfr[, 3]) + 10
  limit <- max(xmax, ymax)
  p <- ggplot(dfr, aes(x=dim1, y=dim2)) + geom_point(alpha=.80) + geom_hline(yintercept = round((100/ncols) * 10, digits = 0), colour="red", linetype = "dashed") + geom_vline(xintercept = round((100/ncols) * 10, digits = 0), colour="red", linetype = "dashed") + scale_y_continuous(limit = c(0, limit)) + scale_x_continuous(limit = c(0, limit)) + geom_abline(intercept = 0, slope = 1) + theme_bw() + geom_text_repel(data = dfr, aes(label = lab), size = 2.7, colour="black", box.padding = unit(0.35, "lines"), point.padding = unit(0.3, "lines")) + labs(x = paste("Column categories' contribution (permills) to Dim", x), y=paste("Column categories' contribution (permills) to Dim", y)) #requires 'ggrepel'
  return(p)
}
gianmarcoalberti/CAinterprTools_0.7 documentation built on May 14, 2017, 10:51 a.m.