R/rows_corr_scatter.R

Defines functions rows.corr.scatter

Documented in rows.corr.scatter

#' Scatterplot for row categories correlation with dimensions
#'
#' This function allows to plot a scatterplot of the correlation of row categories with two selected dimensions. A diagonal line (in BLACK) is just a visual aid to eyeball whether a category is actually more correlated (in relative terms) to either of the two dimensions. 
#' @param data: name of the dataset.
#' @param x,y: dimensions for which the correlations are reported (1st and 2nd dimension by default).
#' @keywords scatterplot row categories correlation
#' @export
#' @examples
#' data(greenacre_data) #load the sample dataset
#' rows.corr.scatter(greenacre_data,1,2) #plot the scatterplot for dimensions 1&2
#' 
rows.corr.scatter <- function (data, x = 1, y = 2){
  ncols <- ncol(data)
  nrows <- nrow(data)
  numb.dim.cols<-ncol(data)-1
  numb.dim.rows<-nrow(data)-1
  a <- min(numb.dim.cols, numb.dim.rows) #dimensionality of the table
  pnt_labls <- rownames(data)
  res <- CA(data, ncp=a, graph = FALSE)
  dfr <- data.frame(lab=pnt_labls, dim1 = round(sqrt(res$row$cos2[, x]), digits = 3), dim2 = round(sqrt(res$row$cos2[, y]), digits = 3))
  p <- ggplot(dfr, aes(x=dim1, y=dim2)) + geom_point(alpha=.80) + scale_y_continuous(limit = c(0, 1)) + scale_x_continuous(limit = c(0, 1)) + geom_abline(intercept = 0, slope = 1) + theme_bw() + geom_text_repel(data = dfr, aes(label = lab), size = 2.7, colour="black", box.padding = unit(0.35, "lines"), point.padding = unit(0.3, "lines")) + labs(x = paste("Row categories' correlation with Dim", x), y=paste("Row categories' correlation with Dim", y)) #requires 'ggrepel'
  return(p)
}
gianmarcoalberti/CAinterprTools_0.7 documentation built on May 14, 2017, 10:51 a.m.