R/plotseries.R

Defines functions ellipse plotHotellingEllipse plotRegressionLine plotPointsShape plotConvexHull plotConfidenceEllipse plotDensity plotBars plotErrorbars plotLines plotScatter showLabels getDataLabels getLabelsAsIndices getLabelsAsValues getConvexHull getConfidenceEllipse getPlotColors splitExcludedData splitPlotData preparePlotData plotseries

Documented in ellipse getConfidenceEllipse getConvexHull getDataLabels getLabelsAsIndices getLabelsAsValues getPlotColors plotBars plotConfidenceEllipse plotConvexHull plotDensity plotErrorbars plotHotellingEllipse plotLines plotPointsShape plotRegressionLine plotScatter plotseries preparePlotData showLabels splitExcludedData splitPlotData

#' Create plot series object based on data, plot type and parameters
#'
#' @param data
#' data to make the plot for (vector, matrix or data frame).
#' @param type
#' type of the plot.
#' @param cgroup
#' vector with values used to create a color grouping of the series instances.
#' @param col
#' color to show the series on plot with (user defined).
#' @param opacity
#' opacity of the colors (between 0 and 1).
#' @param colmap
#' colormap name to generate color/colors if they are not specified by user. See
#' \code{link{mdaplot.getColors}} for details.
#' @param labels
#' either vector with labels for the series instances or string ("names", "values", or "indices")
#' if labels should be generated automatically.
#'
#' @description
#' The `plotseries` object contains all necessary paremeters to create main plots
#' from data values, including values for x and y, correct handling of excluded rows
#' and columns, color grouping (if any), limits and labels.
#'
#' If both `col` and `cgroup` are specified, `cgroup` will be ignored.
#'
#' Labels can be either provided by user or generated automatically based on values, names or
#' indices of data rows and columns. If series is made for scatter plot `type="p"` then labels
#' are required for each row of the original dataset. Otherwise (for line, bar and errobar plot)
#' labels correspond to data columns (variables).
#'
#' The object has the following plotting methods once created:
#' \code{\link{plotScatter}}
#' \code{\link{plotLines}}
#' \code{\link{plotBars}}
#' \code{\link{plotDensity}}
#' \code{\link{plotErrorbars}}
#'
#' @export
plotseries <- function(data, type, cgroup = NULL, col = NULL, opacity = 1,
   colmap = "default", labels = NULL) {

   if (length(data) == 0) {
      stop("Seems you forgot to provide data values (vector, matrix of data frame).")
   }

   ps <- list()
   ps$type <- type
   ps$name <- attr(data, "name", exact = TRUE)
   ps$call <- match.call()
   class(ps) <- "plotseries"

   # prepare data values
   plot_data <- preparePlotData(data)
   ps <- c(ps, splitPlotData(plot_data$visible, type))

   # add values for excluded rows
   if (!is.null(plot_data$excluded)) {
      ps$excluded_cols <- plot_data$excluded_cols
      ps$excluded_rows <- plot_data$excluded_rows
      ps <- c(ps, splitExcludedData(plot_data$excluded, type))
   }

   # add labels
   ps <- c(ps, getDataLabels(ps, labels))

   # handle colors
   ps <- c(ps, getPlotColors(ps, col, opacity, cgroup, colmap))

   # compute axes limits
   ps$xlim <- range(ps$x_values)
   ps$ylim <- range(ps$y_values)
   class(ps) <- "plotseries"

   return(ps)
}

#' Take dataset and prepare them for plot
#'
#' @param data
#' dataset (vector, matrix or data frame)
#'
#' @description
#' The function checks that `data` contains correct numeric values, check for
#' mandatory attributes (row and column names, x- and y-axis values and names, etc.)
#' and add them if necessary.
#'
#' Another things is to remove hidden columns and split the rest to visible and hidden
#' values (if excluded rows are present).
#'
preparePlotData <- function(data) {

   # get data attributes to separate variable for shorten code
   attrs <- attributes(data)

   # if it is vector without dimension - make a matrix
   if (is.null(dim(data))) {
      data <- t(as.matrix(data))
   }

   # convert data frame to a matrix if needed
   if (is.data.frame(data)) {
      data <- mda.df2mat(data)
   }

   # if row names are missing - add them
   if (is.null(rownames(data))) {
      rownames(data) <- paste0("O", seq_len(nrow(data)))
   }

   # if column names are missing - add them
   if (is.null(colnames(data))) {
      colnames(data) <- paste0("X", seq_len(ncol(data)))
   }

   # if xaxis values are missing - add them
   if (is.null(attrs$xaxis.values)) {
      attrs$xaxis.values <- seq_len(ncol(data))
   }

   # if yaxis values are missing - add them
   if (is.null(attrs$yaxis.values)) {
      attrs$yaxis.values <- seq_len(nrow(data))
   }

   # handle excluded columns if any
   excluded_cols <- NULL
   if (length(attrs$exclcols) > 0) {
      excluded_cols <- mda.getexclind(attrs$exclcols, colnames(data), ncol(data))
      data <- data[, -excluded_cols, drop = FALSE]
      attrs$xaxis.values <- attrs$xaxis.values[-excluded_cols]
   }

   # check if data still has some columns
   if (is.null(ncol(data)) || ncol(data) == 0) {
      stop("No columns left after excluded hidden values.")
   }

   # handle excluded rows if any
   excluded_data <- NULL
   excluded_rows <- NULL
   if (length(attrs$exclrows > 0)) {
      excluded_rows <- mda.getexclind(attrs$exclrows, rownames(data), nrow(data))
      excluded_data <- data[excluded_rows, , drop = FALSE]
      excluded_yaxis_values <- attrs$yaxis.values[excluded_rows]

      data <- data[-excluded_rows, , drop = FALSE]
      attrs$yaxis.values <- attrs$yaxis.values[-excluded_rows]
   }

   # reassign some attribues to data
   attr(data, "xaxis.values") <- attrs$xaxis.values
   attr(data, "yaxis.values") <- attrs$yaxis.values
   attr(data, "xaxis.name") <- attrs$xaxis.name
   attr(data, "yaxis.name") <- attrs$yaxis.name
   attr(data, "name") <- attrs$name

   # add necessary attributes to excluded data
   if (!is.null(excluded_data)) {
      attr(excluded_data, "xaxis.values") <- attrs$xaxis.values
      attr(excluded_data, "yaxis.values") <- excluded_yaxis_values
   }

   return(list(
      visible = data,
      excluded = excluded_data,
      excluded_cols = excluded_cols,
      excluded_rows = excluded_rows
   ))
}

#' Split dataset to x and y values depending on plot type
#'
#' @param data
#' matrix with data values (visible or hidden)
#' @param type
#' type of plot
#'
splitPlotData <- function(data, type) {

   # shortcuts to some of parameters
   attrs <- attributes(data)


   if (type == "p" && ncol(data) == 1) {
      # if data has only one column add y values in front
      data <- cbind(attrs$yaxis.values, data)
      colnames(data)[1] <- if (is.null(attrs$yaxis.name)) "Objects" else attrs$yaxis.name
   }

   if (type %in%  c("p", "d")) {
      # scatter or density plot - take first and second columns as x and y
      x_values <- data[, 1]
      names(x_values) <- rownames(data)
      attr(x_values, "name") <- colnames(data)[1]
      y_values <- data[, 2, drop = FALSE]
      attr(y_values, "name") <- colnames(data)[2]
      return(list(x_values = x_values, y_values = y_values))
   }

   if (type == "e") {
      # errorbar plor - make data consist of three rows: (m, m - err, m + err)
      data <- rbind(
         data[1, ],
         data[1, ] - data[2, ],
         data[1, ] + if (nrow(data) > 2) data[3, ] else data[2, ]
      )
   }


   # 0.12.0: yaxis.name must not be used as axis label in line and bar plots
   if (type %in% c("b", "l", "e", "h")) {
      attrs$yaxis.name <- NULL
   }

   # prepare x-axis values for other types of plots
   y_values <- data
   x_values <- attrs$xaxis.values

   if (is.null(names(x_values))) names(x_values) <- colnames(data)
   attr(x_values, "name") <- if (is.null(attrs$xaxis.name)) "Variables" else attrs$xaxis.name

   # by default take all data values as y and assign name to the x-variable
   attr(y_values, "name") <- if (is.null(attrs$yaxis.name)) "" else attrs$yaxis.name

   return(list(x_values = x_values, y_values = y_values))
}

#' Split the excluded part of data
#'
#' @param data
#' matrix with hidden data values
#' @param type
#' type of plot
#'
splitExcludedData <- function(data, type) {

   if (type == "e") {
      stop("Errorbar can not be created for data with excluded rows.")
   }

   pd <- splitPlotData(data, type)
   return(list(x_values_excluded = pd$x_values, y_values_excluded = pd$y_values))
}

#' Define colors for plot series
#'
#' @param ps
#' `plotseries` object
#' @param col
#' color specified by user (if any)
#' @param opacity
#' opacity for the color
#' @param cgroup
#' vector for color grouping (if any)
#' @param colmap
#' name or values for colormap
#'
getPlotColors <- function(ps, col, opacity, cgroup, colmap) {

   # if user specified color - use it
   if (!is.null(col)) {
      return(list(
         col = adjustcolor(col, opacity),
         colmap = colmap,
         cgroup = NULL
      ))
   }

   # if user did not specify cgroup - get one color based on colmap and opacity
   if (is.null(cgroup) || ps$type == "e") {
      return(list(
         col = mdaplot.getColors(1, colmap = colmap, opacity = opacity),
         colmap = colmap,
         cgroup = NULL
      ))
   }

   # check if cgroup is provided for all values, also from excluded rows and correct this
   if (ps$type == "h") {
      cgroup_expected_length <- length(ps$x_values)
      cgroup_excluded_values <- ps$excluded_cols
   } else {
      cgroup_expected_length <- nrow(ps$y_values)
      cgroup_excluded_values <- ps$excluded_rows
   }

   if (length(cgroup) != cgroup_expected_length && length(cgroup_excluded_values) > 0) {
      cgroup <- cgroup[-cgroup_excluded_values]
   }

   if (length(cgroup) != cgroup_expected_length) {
      stop("Parameter 'cgroup' does not match size of the dataset.")
   }

   return(list(
      col = mdaplot.getColors(cgroup = cgroup, colmap = colmap, opacity = opacity),
      colmap = colmap,
      cgroup = cgroup
   ))
}

#' Compute confidence ellipse for a set of points
#'
#' @param points
#' matrix of data frame with coordinates of the points
#' @param conf.level
#' confidence level for the ellipse
#' @param n
#' number of points in the ellipse coordinates
#'
#' @return
#' matrix with coordinates of the ellipse points (x and y)
#'
getConfidenceEllipse <- function(points, conf.level = 0.95, n = 100) {

   # compute igen vectors and values
   e <- eigen(cov(points))

   # get angle between the x-axis and the largest eigenvector
   phi <- atan2(e$vectors[[2]], e$vectors[[1]])
   if (phi < 0) phi <- phi + 2 * pi

   # compute center and radii
   chisq <- qchisq(conf.level, 2)
   center <- apply(points, 2, mean)
   radii <- sqrt(chisq * e$values)

   # generate vector of angles
   theta_grid <- seq(0, 2 * pi, length.out = n)

   # compute x and y coordinates of the ellipse
   ellipse <- cbind(radii[1] * cos(theta_grid), radii[2] * sin(theta_grid))

   # rotate the coordinates to the angle found earlier and shift to center of points
   R <- rbind(c(cos(phi), sin(phi)), c(-sin(phi), cos(phi)))
   ellipse <- sweep(ellipse %*% R, 2, center, "+")

   return(ellipse)
}

#' Compute coordinates of a closed convex hull for data points
#'
#' @param points
#' matrix of data frame with coordinates of the points
#'
#' @importFrom grDevices chull
#'
getConvexHull <- function(points) {
   ch_ind <- chull(points)
   ch_ind <- c(ch_ind, ch_ind[1])
   return(points[ch_ind, ])
}

#' Create labels from data values
#'
#' @param ps
#' `plotseries` object
#'
getLabelsAsValues <- function(ps) {
   y_values <- ps$y_values
   y_values_excluded <- ps$y_values_excluded

   if (ps$type %in% c("l", "b")) {
      labels <- apply(y_values, 2, max)
      labels_excluded <- if (!is.null(y_values_excluded)) apply(y_values_excluded, 2, max)
      return(list(labels = labels, labels_excluded = labels_excluded))
   }

   if (ps$type %in% c("h", "e")) {
      labels <- y_values[1, ]
      labels_excluded <- y_values_excluded[1, ]
      return(list(labels = labels, labels_excluded = labels_excluded))
   }

   # otherwise prepare labels as for scatter plot
   labels <- y_values
   labels_excluded <- y_values_excluded
   return(list(labels = labels, labels_excluded = labels_excluded))
}

#' Create labels as column or row indices
#'
#' @param ps
#' `plotseries` object
#'
getLabelsAsIndices <- function(ps) {

   # function which returns indices for columns or rows
   f <- function(nvalues, excluded_values) {
      indices <- 1:(nvalues + length(excluded_values))
      if (!is.null(excluded_values)) {
         indices <- indices[-excluded_values]
      }
      return(list(labels = indices, labels_excluded = excluded_values))
   }

   if (ps$type == "p") {
      # if scatter plot row indices are used and excluded rows also have indices
      return(f(nrow(ps$y_values), ps$excluded_rows))
   }

   # if non scatter plot, column indices are used and no hidden cols shown
   out <- f(length(ps$x_values), ps$excluded_cols)
   return(list(labels = out$labels, labels_excluded = NULL))
}

#' Create a vector with labels for plot series
#'
#' @param ps
#' `plotseries` object
#' @param labels
#' vector with user defined labels or type of labels to show ("values", "names", "indices")
#'
#' @description
#' For scatter plots labels correspond to rows of the data (names, values, indices, etc.). For
#' non-scatter plots labels correspond to the columns (names, indices or max value for each column)
#'
getDataLabels <- function(ps, labels = NULL) {
   y_values <- ps$y_values
   x_values <- ps$x_values

   # if labels are not specified - use names
   if (is.null(labels)) labels <- "names"

   # if user provided labels - use them
   if (length(labels) > 1) {
      excluded_cols <- ps$excluded_cols
      excluded_rows <- ps$excluded_rows

      # get values and excluded values (rows or cols depending on plot type)
      n_values <- if (ps$type == "p") nrow(y_values) else length(x_values)
      excluded_values <- if (ps$type == "p") excluded_rows else excluded_cols

      # check that labels were specified for all values
      if (length(labels) != (n_values + length(excluded_values))) {
         stop("Labels must be provided for all values (also ones which are excluded).")
      }

      if (is.null(excluded_values)) {
         labels_excluded <- NULL
         labels <- labels
         return(list(labels = labels, labels_excluded = labels_excluded))
      }

      labels_excluded <- labels[excluded_values]
      labels <- labels[-excluded_values]
      return(list(labels = labels, labels_excluded = labels_excluded))
   }

   # if labels must be values use y-values for that
   if (labels == "values") {
      return(getLabelsAsValues(ps))
   }

   # if labels must be indices use row or column indices
   if (labels == "indices") {
      return(getLabelsAsIndices(ps))
   }

   # if nothing above works - use names as labels
   labels <- names(x_values)
   labels_excluded <- if (ps$type == "p") names(ps$x_values_excluded) else NULL
   return(list(labels = labels, labels_excluded = labels_excluded))
}

#' Show labels on plot
#'
#' @param ps
#' `plotseries` object
#' @param show.excluded
#' logical, are excluded rows also shown on the plot
#' @param pos
#' position of the labels relative to the data points
#' @param cex
#' size of the labels text
#' @param col
#' color of the labels text
#' @param force.x.values
#' vector with forced x-values (or NULL)
#' @param bwd
#' bar width in case of bar plot
#'
showLabels <- function(ps, show.excluded = FALSE, pos = 3, cex = 0.65, col = "darkgray",
   force.x.values = NULL, bwd = 0.8) {

   f <- function(x, y, labels, type) {

      if (length(labels) == 0) stop("No labels available.")
      if (ps$type %in% c("h", "e")) y <- y[1, ]
      if (ps$type %in% c("l", "b")) y <- apply(y, 2, max)

      # correct x_values if they were forced by bwd
      if (is.numeric(force.x.values)) {
         x <- x - bwd / 2 + (force.x.values[1] - 0.5) * bwd / force.x.values[2]
         bwd <- bwd / force.x.values[2]
      }

      x <- as.numeric(x)
      y <- as.numeric(y)
      labels <- mdaplot.formatValues(labels)

      if (ps$type == "h") pos <- ifelse(y < 0, 1, 3)
      text(x, y, labels, cex = cex, pos = pos, col = col)
   }

   f(ps$x_values, ps$y_values, ps$labels, ps$type)

   if (show.excluded && !is.null(ps$labels_excluded)) {
      f(ps$x_values_excluded, ps$y_values_excluded, ps$labels_excluded, ps$type)
   }
}

#' Show plot series as set of points
#'
#' @param ps
#' `plotseries` object
#' @param pch
#' size of point markers
#' @param col
#' color of the points
#' @param bg
#' background color of the points if `pch=21:25`
#' @param lwd
#' line width for the error bars
#' @param cex
#' scale factor for the marker
#' @param col.excluded
#' color for excluded values (if must be shown)
#' @param pch.colinv
#' logical, should `col` and `bg` be switched if `pch=21:25` and `cgroup` is used to create colors.
#' @param show.excluded
#' logical, show or not the excluded data points
#' @param ...
#' other arguments for function `points()`.
#'
#' @export
plotScatter <- function(ps, pch = 16, col = ps$col, bg = "white", lwd = 1, cex = 1,
   col.excluded = "lightgray", pch.colinv = FALSE, show.excluded = FALSE, ...) {

   if (pch.colinv) {
      # switch colors for main and background in case pch=21:25
      bg.old <- bg
      bg <- col
      col <- bg.old
   }

   # show main set of points
   points(ps$x_values, ps$y_values, col = col, bg = bg, pch = pch, lwd = lwd, cex = cex, ...)

   # show excluded points if any
   if (show.excluded && !is.null(ps$y_values_excluded)) {
      points(ps$x_values_excluded, ps$y_values_excluded, col = col.excluded, pch = pch,
         lwd = lwd, cex = cex, ...)
   }
}

#' Show plot series as set of lines
#'
#' @param ps
#' `plotseries` object
#' @param col
#' a color for markers or lines (same as \code{plot} parameter).
#' @param lty
#' line type
#' @param lwd
#' line width
#' @param cex
#' scale factor for the marker
#' @param col.excluded
#' color for the excluded lines.
#' @param show.excluded
#' logical, show or not the excluded data points
#' @param ...
#' other arguments for function `lines()`.
#'
#' @export
plotLines <- function(ps, col = ps$col, lty = 1, lwd = 1, cex = 1,
   col.excluded = "darkgray", show.excluded = FALSE, ...) {

   # show main set of lines
   matlines(ps$x_values, t(ps$y_values), type = ps$type, col = col, lty = lty,
      lwd = lwd, cex = cex, ...)

   # show excluded rows
   if (show.excluded && !is.null(ps$y_values_excluded)) {
      matlines(ps$x_values_excluded, t(ps$y_values_excluded), type = ps$type,
         lty = lty, lwd = lwd, cex = cex, col = col.excluded, ...)
   }
}

#' Show plot series as error bars
#'
#' @param ps
#' `plotseries` object
#' @param col
#' color for the error bars
#' @param pch
#' marker symbol for the plot
#' @param lwd
#' line width for the error bars
#' @param cex
#' scale factor for the marker
#' @param ...
#' other arguments for function `points()`.
#'
#' @description
#' It is assumed that first row of dataset contains the y-coordinates of points,
#' second rows contains size of lower error bar and third - size for upper error bar. If
#' only two rows are provided it is assumed that error bars are symmetric.
#'
#' @export
plotErrorbars <- function(ps, col = ps$col, pch = 16, lwd = 1, cex = 1, ...) {

   x <- ps$x_values
   y <- ps$y_values

   dx <- diff(ps$xlim) / max(50, (length(x) * 3))

   segments(x, y[2, ], x, y[3, ], col = ps$col, lwd = lwd)
   segments(x - dx, y[2, ], x + dx, y[2, ], col = ps$col, lwd = lwd)
   segments(x - dx, y[3, ], x + dx, y[3, ], col = ps$col, lwd = lwd)
   points(x, y[1, ], col = col, pch = pch, cex = cex, ...)
}

#' Show plot series as bars
#'
#'
#' @param ps
#' `plotseries` object
#' @param col
#' colors of the bars
#' @param bwd
#' width of the bars (as a ratio for max width)
#' @param border
#' color of bar edges
#' @param force.x.values
#' vector with corrected x-values for a bar plot (needed for group plots, do not change manually).
#'
#' @description
#' First row of the data matrix is taken for creating the bar series. In case of barplot
#' color grouping is made based on columns (not rows as for all other plots).
#'
#' @export
plotBars <- function(ps, col = ps$col, bwd = 0.8, border = NA, force.x.values = NA) {
   x <- ps$x_values
   y <- ps$y_values[1, ]

   if (length(x) > 1) {
      dx <- min(diff(x))
      bwd_right <- bwd_left <- dx * bwd / 2
   } else {
      bwd_left <- bwd_right <- bwd * x / 2
   }

   # correct x_values if they were forced by bwd
   if (is.numeric(force.x.values)) {
      x <- x - bwd / 2 + (force.x.values[1] - 0.5) * bwd / force.x.values[2]
      bwd_left <- bwd_right <- bwd / force.x.values[2] / 2
   }

   if (length(col) != length(y)) {
      col <- rep(col, length.out = length(y))
   }

   rect(x - bwd_left, 0, x + bwd_right, y, col = col, border = border)
}

#' Show plot series as density plot (using hex binning)
#'
#' @param ps
#' `plotseries` object
#' @param nbins
#' number of bins in one dimension
#' @param colmap
#' colormap name or values used to create color gradient
#'
plotDensity <- function(ps, nbins = 60, colmap = ps$colmap) {
   x <- ps$x_values
   y <- ps$y_values

   # Compute coordinates of nearest lattice center
   getNearest <- function(value, scale, round = 1) {
      div <- floor(value / (scale / 2))
      return(scale / 2 * (div + as.numeric(div %% 2 == round)))
   }

   # Compute x or y coordinates of hexagons around given centers coordinates
   getHexpoints <- function(center_coord, r, f = sin) {
      a <- seq(30, 390, by = 60) / 180 * pi
      nrow <- length(a)
      ncol <- length(center_coord)
      hex_coord <-
         matrix(r * f(a), nrow = nrow, ncol = ncol) +
         matrix(center_coord, nrow = nrow, ncol = ncol, byrow = TRUE)
      hex_coord <- rbind(hex_coord, rep(NA, ncol))
      return(as.numeric(hex_coord))
   }

   # Compute x coordinates of hexagons
   getHexpointsX <- function(x, rx) {
      return(getHexpoints(x, rx, cos))
   }

   # Compute y coordinates of hexagons
   getHexpointsY <- function(y, ry) {
      return(getHexpoints(y, ry, sin))
   }

   # compute size of bins based on range of data values and number of bins
   x_range <- diff(range(x))
   y_range <- diff(range(y))

   bin_width <- x_range / nbins
   bin_height <- y_range / nbins

   # compute coordinates of hexbin lattice based on points coordinates

   ## basic lattice
   xx1 <- getNearest(x, bin_width, 1)
   yy1 <- getNearest(y, bin_height * sqrt(3), 1)

   ## lattice shifted by 0.5 of bin width/height related to the basic
   xx2 <- getNearest(x, bin_width, 0)
   yy2 <- getNearest(y, bin_height * sqrt(3), 0)

   # find which of the two fits every point best
   d1 <- (x - xx1)^2 + (y - yy2)^2
   d2 <- (x - xx2)^2 + (y - yy2)^2

   xx <- ifelse(d1 < d2, xx1, xx2)
   yy <- ifelse(d1 < d2, yy1, yy2)

   # compute number of points around each lattice center
   density <- as.data.frame(table(xx, yy, exclude = FALSE))
   density[, 1] <- as.numeric(as.character(density[, 1]))
   density[, 2] <- as.numeric(as.character(density[, 2]))

   # remove centers with no points around
   density <- density[density[, 3] > 0, ]
   lattice_x <- density[, 1]
   lattice_y <- density[, 2]
   density <- density[, 3]

   # compute coordinates of hexagons as vectors
   rx <- bin_width / 2 / cos(30 / 180 * pi)
   ry <- bin_height / 2 / cos(30 / 180 * pi)
   hex_x <- getHexpointsX(lattice_x, rx)
   hex_y <- getHexpointsY(lattice_y, ry)

   # add polygons to the current axes
   polygon(hex_x, hex_y, border = NA, col = mdaplot.getColors(cgroup = density, colmap = colmap))
}

#' Add confidence ellipse for groups of points on scatter plot
#'
#' @param p
#' plot data returned by function `mdaplot()`.
#' @param lwd
#' thickness of line used to show the hull.
#' @param lty
#' type of line used to show the hull.
#' @param conf.level
#' confidence level to make the ellipse for (between 0 and 1).
#' @param opacity
#' of opacity is 0 ellipse is transparent otherwise semi-transparent.
#'
#' @description
#' The method shows confidence ellipse for groups of points on a scatter plot made using
#' `mdaplot()` function with `cgroup` parameter. It will work only if `cgroup` is a factor.
#'
#' @examples
#' # adds 90% confidence ellipse with semi-transparent area over two clusters of points
#'
#' library(mdatools)
#' data(people)
#' group <- factor(people[, "Sex"], labels = c("Male", "Female"))
#'
#' # first make plot and then add confidence ellipse
#' p <- mdaplot(people, type = "p", cgroup = group)
#' plotConfidenceEllipse(p, conf.level = 0.90, opacity = 0.2)
#'
#' @export
plotConfidenceEllipse <- function(p, conf.level = 0.95, lwd = 1, lty = 1, opacity = 0) {

   plotPointsShape(
      p,
      lwd = lwd,
      lty = lty,
      opacity = opacity,
      shape_function = getConfidenceEllipse,
      conf.level = conf.level
   )
}

#' Add convex hull for groups of points on scatter plot
#'
#' @param p
#' plot data returned by function `mdaplot()`.
#' @param lwd
#' thickness of line used to show the hull.
#' @param lty
#' type of line used to show the hull.
#' @param opacity
#' of opacity is larger than 0 a semi-transparent polygon is shown over points.
#'
#' @description
#' The method shows convex hull for groups of points on a scatter plot made using
#' `mdaplot()` function with `cgroup` parameter. It will work only if `cgroup` is a factor.
#'
#' @examples
#' # adds convex hull with semi-transparent area over two clusters of points
#'
#' library(mdatools)
#' data(people)
#' group <- factor(people[, "Sex"], labels = c("Male", "Female"))
#'
#' p <- mdaplot(people, type = "p", cgroup = group)
#' plotConvexHull(p)
#'
#' @importFrom grDevices chull
#' @importFrom graphics polygon
#'
#' @export
plotConvexHull <- function(p, lwd = 1, lty = 1, opacity = 0) {
   plotPointsShape(
      p,
      lwd = lwd,
      lty = lty,
      opacity = opacity,
      shape_function = getConvexHull
   )
}

#' Add confidence ellipse or convex hull for group of points
#'
#' @param p
#' plot data returned by function `mdaplot()`
#' @param lwd
#' thickness of line used to show the hull
#' @param lty
#' type of line used to show the hull
#' @param opacity
#' of opacity is larger than 0 a semi-transparent polygon is shown over points
#' @param shape_function
#' function which calculates and return coordinates of the shape
#' @param ...
#' extra parameters for shape_function
#'
#' @importFrom graphics polygon
#'
#' @export
plotPointsShape <- function(p, lwd, lty, opacity, shape_function, ...) {

   x <- p$x_values
   y <- p$y_values
   cgroup <- p$cgroup
   type <- p$type

   if (type != "p") {
      stop("Shape can be added only to scatter plots.")
   }

   if (!is.factor(cgroup)) {
      stop("Parameter 'cgroup' must be a factor if you want to show points shape.")
   }

   if (length(unique(cgroup)) != length(unique(p$col))) {
      stop("Number of colors should be the same as number of levels in parameter 'cgroup'.")
   }

   col <- split(p$col, f = p$cgroup, drop = TRUE)
   d <- split(data.frame(x, y), f = p$cgroup, drop = TRUE)

   plot_function <- function(i) {
      # compute indices for convex hull points and draw the hull
      shape <- shape_function(d[[i]], ...)
      color <- col[[i]][1]
      lines(shape[, 1], shape[, 2], col = color, lwd = lwd, lty = lty)

      # if opacity is not zero add a polygon on top
      if (opacity > 0) {
         polygon(shape[, 1], shape[, 2], border = NA, col = adjustcolor(color, alpha.f = opacity))
      }
   }

   sapply(seq_len(length(d)), plot_function)
   invisible(NULL)
}

#' Add regression line for data points
#'
#' @description
#' Shows linear fit line for data points.
#'
#' @param p
#' plot data returned by function `mdaplot()`
#' @param col
#' color of line
#' @param ...
#' other parameters available for `abline()` function
#'
plotRegressionLine <- function(p, col = p$col, ...) {
   abline(lm(p$y_values ~ p$x_values), col = col, ...)
}

#' Hotelling ellipse
#'
#' @description
#' Add Hotelling ellipse to a scatter plot
#'
#' @param p
#' plot series (e.g. from PCA scores plot)
#' @param conf.lim
#' confidence limit
#' @param col
#' color of the ellipse line
#' @param lty
#' line type (e.g. 1 for solid, 2 for dashed, etc.)
#' @param ...
#' any argument suitable for \code{lines} function
#'
#' @details
#' The method is created to be used with PCA and PLS scores plots, so it shows the statistical
#' limits computed using Hotelling T^2 distribution in form of ellipse. The function works similar
#' to \code{\link{plotConvexHull}} and \code{\link{plotConfidenceEllipse}} but does not require
#' grouping of data points. Can be used together with functions \code{\link{plotScores.pca}},
#' \code{\link{plotScores.ldecomp}}, \code{\link{plotXScores.pls}},
#' \code{\link{plotXScores.plsres}}.
#'
#' See examples for more details.
#'
#' @examples
#'
#' # create PCA model for People data
#' data(people)
#' m <- pca(people, 4, scale = TRUE)
#'
#' # make scores plot and show Hotelling ellipse with default settings
#' p <- plotScores(m, xlim = c(-8, 8), ylim = c(-8, 8))
#' plotHotellingEllipse(p)
#'
#' # make scores plot and show Hotelling ellipse with manual settings
#' p <- plotScores(m, xlim = c(-8, 8), ylim = c(-8, 8))
#' plotHotellingEllipse(p, conf.lim = 0.99, col = "red")
#'
#' # in case if you have both calibration and test set, 'plotScores()' returns
#' # plot series data for both, so you have to subset it and take the first series
#' # (calibration set) as shown below.
#' ind <- seq(1, 32, by = 4)
#' xc <- people[-ind, , drop = FALSE]
#' xt <- people[ind, , drop = FALSE]
#' m <- pca(xc, 4, scale = TRUE, x.test = xt)
#'
#' p <- plotScores(m, xlim = c(-8, 8), ylim = c(-8, 8))
#' plotHotellingEllipse(p[[1]])
#'
#' @export
plotHotellingEllipse <- function(p, conf.lim = 0.95, col = "#a0a0a0", lty = 3, ...) {

   if (!inherits(p, "plotseries")) {
      stop("Argument 'p' does not look like a plot series, try 'p[[1]]' instead.")
   }

   if (p$type != "p") {
      stop("Hotelling ellipse can be added to scatter plot only.")
   }

   t1 <- as.numeric(p$x_values)
   t2 <- as.numeric(p$y_values)

   s1 <- sd(t1)^2
   s2 <- sd(t2)^2

   nobj <- length(t1)
   T2lim <- (2 * (nobj - 1) / (nobj - 2)) * qf(conf.lim, 2, (nobj - 2))

   a <- sqrt(T2lim * s1)
   b <- sqrt(T2lim * s2)

   ellipse(a = a, b = b, col = col, lty = lty, ...)
}

#' Create ellipse on the current plot
#'
#' @param xc
#' coordinate of center (x)
#' @param yc
#' coordinate of center (y)
#' @param a
#' major axis
#' @param b
#' minor axis
#' @param col
#' color of the ellipse line
#' @param lty
#' type of the ellipse line
#' @param ...
#' any argument suitable for \code{lines} function
#'
#' @export
ellipse <- function(xc = 0, yc = 0, a, b, col = "black", lty = 1, ...) {
   t <- seq(0, 2 * pi, 0.01)
   x <- xc + a * cos(t)
   y <- yc + b * sin(t)
   lines(x, y, type = "l", col = col, lty = lty, ...)
}

Try the mdatools package in your browser

Any scripts or data that you put into this service are public.

mdatools documentation built on Aug. 13, 2023, 1:06 a.m.