R/pcabiplot.R

#' Biplot of a PCA object
#'
#' @description This function creates a biplot from a pca object, which is
#' generated by the \code{prcomp} function from the \pkg{stats} package.
#'
#' @param PC A pca object generated by \code{prcomp} function.
#' @param x X axis (see \strong{Details}).
#' @param y Y axis (see \strong{Details}).
#' @param var.line A logical input, if variable lines are plotted.
#' @param colobj A vector to provide color in the objects (see
#' \strong{Details}).
#' @param o.size A numeric number to set the object size.
#'
#' @details This is a function to plot a pca biplot from a pca object. The x
#' and y axes can be supplied with any principle component. The length of the
#' \code{colobj} vector has to be equal to the number of objects. This argument
#' controls the color of the objects and is very convenient to explore the
#' clustering result. The default value is that all object have the same color.
#'
#' @return Function returns a plot of pca.
#'
#' @author Weksi Budiaji \cr Contact: \email{budiaji@untirta.ac.id}
#'
#' @examples
#' pcadat <- prcomp(iris[,1:4], scale. = TRUE)
#' pcabiplot(pcadat)
#'
#' @export

pcabiplot <- function (PC, x = "PC1", y = "PC2", var.line = TRUE,
                       colobj = rep(1, nrow(PC$x)), o.size = 1) {

  if (inherits(PC, "prcomp") == FALSE)
    stop("The object is not a prcomp class")

  if (length(colobj) != nrow(PC$x))
    stop("The vector of colobj must have the same length with the number of objects!")

  varpca <- PC$sdev^2
  names(varpca) <- colnames(PC$x)
  prop.exp <- (varpca[x] + varpca[y])/sum(varpca)
  if (is.null(rownames(PC$x)) == FALSE) {
    data <- data.frame(obs = row.names(PC$x), PC$x)
    plot <- ggplot(data, aes_string(x = x, y = y)) +
      geom_text(alpha = 0.4, size = o.size, aes_(label = ~obs),
                color = as.factor(colobj))
  }
  else {
    data <- data.frame(PC$x)
    plot <- ggplot(data, aes_string(x = x, y = y)) +
      geom_point(alpha = 0.4, size = o.size, color = as.factor(colobj))
  }
  datapc <- data.frame(varnames = rownames(PC$rotation), PC$rotation)
  mult <- min((max(data[, y]) - min(data[, y])/(max(datapc[,y]) - min(datapc[, y]))),
              (max(data[, x]) - min(data[, x])/(max(datapc[, x]) - min(datapc[, x]))))
  datapc <- transform(datapc, v1 = 0.7 * mult * (get(x)), v2 = 0.7 * mult * (get(y)))
  plot <- plot + coord_equal() +
    xlab(paste(names(varpca[x]), "(Variance explained =", round(varpca[x]/sum(varpca) * 100, 1), "%)")) +
    ylab(paste(names(varpca[y]), "(Variance explained =", round(varpca[y]/sum(varpca) * 100, 1), "%)")) +
    theme(panel.grid.major = element_blank(),
          panel.border = element_blank(), legend.position = "none") +
    labs(caption = paste("Total variance =", round(prop.exp * 100, 1), "%"))
  if (var.line != TRUE) {
    plot <- plot
  }
  else {
    plot <- plot +
      geom_text(data = datapc, aes_(x = ~v1, y = ~v2, label = ~varnames), size = 3, vjust = 1, color = "red") +
      geom_segment(data = datapc, aes_(x = 0, y = 0, xend = ~v1, yend = ~v2),
                   arrow = arrow(length = unit(0.2, "cm")), alpha = 0.75, color = "red")
  }
  return(plot)
}

Try the kmed package in your browser

Any scripts or data that you put into this service are public.

kmed documentation built on Aug. 29, 2022, 9:06 a.m.