R/examine.R

Defines functions pander.examine print.examine examine

Documented in examine pander.examine print.examine

#' Examine one or more variables
#'
#' These functions are one of many R functions enabling users to assess
#' variable descriptives. They have been developed to mimic SPSS' 'EXAMINE'
#' syntax command ('Explore' in the menu) as closely as possible to ease the
#' transition for new R users and facilitate teaching courses where both
#' programs are taught alongside each other.
#'
#' This function basically just calls the \code{\link{descr}} function,
#' optionally supplemented with calls to \code{\link{stem}},
#' [ufs::dataShape()].
#'
#' @param \dots The first argument is a list of variables to provide
#' descriptives for. Because these are the first arguments, the other arguments
#' must be named explicitly so R does not confuse them for something that
#' should be part of the dots.
#' @param stem Whether to display a stem and leaf plot.
#' @param plots Whether to display the plots generated by the
#' [ufs::dataShape()] function.
#' @param extremeValues How many extreme values to show at either end (the
#' highest and lowest values). When set to FALSE (or 0), no extreme values are
#' shown.
#' @param qqCI Whether to display confidence intervals in the QQ-plot.
#' @param conf.level The level of confidence of the confidence interval.
#' @return A list that is displayed when printed.
#' @author Gjalt-Jorn Peters
#'
#' Maintainer: Gjalt-Jorn Peters <gjalt-jorn@@userfriendlyscience.com>
#' @examples
#'
#' ### Look at the miles per gallon descriptives:
#' rosetta::examine(mtcars$mpg, stem=FALSE, plots=FALSE);
#'
#' ### Separate for the different number of cylinders:
#' rosetta::examineBy(
#'   mtcars$mpg, by=mtcars$cyl,
#'   stem=FALSE, plots=FALSE,
#'   extremeValues=FALSE
#' );
#'
#'
#' @rdname examine
#' @export
examine <- function(..., stem=TRUE, plots=TRUE,
                    extremeValues = 5,
                    qqCI=TRUE, conf.level=.95) {

  originalVarNames <- unlist(as.list(substitute(list(...)))[-1]);
  dotList <- list(...);

  if (!is.null(dotList$plot) && dotList$plot) {
    plots <- TRUE;
    dotList$plot <- NULL;
    originalVarNames$plot <- NULL;
  }

  varNames <- NULL;

  if (length(dotList) == 1) {
    dat <- dotList[[1]];
    if (is.data.frame(dat)) {
      vectorList <- dat;
      varNames <- names(dat);
    } else {
      vectorList <- dotList;
    }
  } else {
    vectorList <- dotList;
  }

  if (is.null(varNames)) {
    varNames <- originalVarNames;
  }

  ### Call functions to explore the variables
  res <- lapply(vectorList, function(x) {
    rsl <- list();

    # if (is.null(descr.include)) {
    #   rsl$descr <- descr(x, conf.level=conf.level);
    # } else {
      rsl$descr <- descr(x, conf.level=conf.level);
    # }

    tmpDf <- data.frame(rowNr = 1:length(x), value=x);
    tmpDf <- tmpDf[order(x), ];

    if (extremeValues) {
      rsl$xtrm <- list(lo = utils::head(stats::na.omit(tmpDf), extremeValues),
                       hi = utils::tail(stats::na.omit(tmpDf), extremeValues));
    }

    if (plots) {
      if (is.numeric(x)) {
        rsl$dataShapePlot <- suppressWarnings(ufs::dataShape(x, qqCI=qqCI)$output$plot);
      } else {
        rsl$dataShapePlot <- ggBarChart(x);
      }
    }

    if (stem) {
      rsl$stem <- paste0(
        utils::capture.output(
          graphics::stem(as.numeric(x))),
        collapse="\n"
      );
    }

    return(rsl);
  });

  ### Set the variable names
  names(res) <- varNames;

  ### Store them as variable names in the descriptives objects
  for (index in 1:length(res)) {
    attr(res[[index]]$descr, 'varName') <- names(res)[index];
  }

  # if (length(res) == 1) {
  #   res <- res[[1]];
  # }

  ### Set class for correct printing and return result
  class(res) <- 'examine';
  return(res);
}

#' @rdname examine
#' @export
#' @method print examine
print.examine <- function(x, ...) {
  for (currentName in names(x)) {
    if (inherits(x[[currentName]]$descr, 'freq')) {
      cat0("###### Frequencies for ", currentName, ":\n\n");
    }
    print(x[[currentName]]$descr);
    if (!is.null(x[[currentName]]$xtrm)) {
      cat0("\n");
      cat("###### Rows with lowest values:\n");
      print(x[[currentName]]$xtrm$lo[, 'value', drop=FALSE]);
      cat("\n###### Rows with highest values:\n");
      print(x[[currentName]]$xtrm$hi[, 'value', drop=FALSE]);
    }
    if (!is.null(x[[currentName]]$stem)) {
      cat("\n###### Stem and leaf plot:\n", x[[currentName]]$stem, "\n");
    }
    if (!is.null(x[[currentName]]$dataShapePlot)) {
      gridExtra::grid.arrange(grid::textGrob(paste0('Histogram, Q-Q plot & boxplot for ',
                                                    ufs::extractVarName(currentName)),
                            gp=grid::gpar(fontsize=14)),
                   x[[currentName]]$dataShapePlot,
                   ncol=1, heights=c(.1, .9));
    }
    cat('\n');
  }
}

#' @method pander examine
#' @rdname examine
#' @importFrom pander pander
#' @export
pander.examine <- function(x, headerPrefix = "", headerStyle = "**",
                           secondaryHeaderPrefix = "",
                           secondaryHeaderStyle="*", ...) {
  for (currentName in names(x)) {
    cat0("\n\n", headerPrefix, headerStyle,
         ufs::extractVarName(currentName),
         headerStyle, "\n\n");
    cat(pander::pander(x[[currentName]]$descr,
               headerPrefix=secondaryHeaderPrefix,
               headerStyle=secondaryHeaderStyle));
    if (!is.null(x[[currentName]]$xtrm)) {
      cat("\n\n");
      cat0(secondaryHeaderPrefix, secondaryHeaderStyle,
          "Rows with lowest values:",
          secondaryHeaderStyle, "  \n\n");
      pander::pander(t(x[[currentName]]$xtrm$lo[, 'value', drop=FALSE]));
      cat("\n\n");
      cat0(secondaryHeaderPrefix, secondaryHeaderStyle,
          "Rows with highest values:",
          secondaryHeaderStyle, "  \n\n");
      pander::pander(t(x[[currentName]]$xtrm$hi[, 'value', drop=FALSE]));
    }
    if (!is.null(x[[currentName]]$stem)) {
      cat("\n\n");
      cat0(secondaryHeaderPrefix, secondaryHeaderStyle,
           "Stem and leaf plot:",
           secondaryHeaderStyle, "  \n", x[[currentName]]$stem);
    }
    cat("\n");
    if (!is.null(x[[currentName]]$dataShapePlot)) {
      plotTitle <- ifelse('gTree' %in% class(x[[currentName]]$dataShapePlot),
                          paste0('Histogram, Q-Q plot & boxplot for ', ufs::extractVarName(currentName)),
                          paste0('Barchart for ', ufs::extractVarName(currentName)));
      gridExtra::grid.arrange(grid::textGrob(plotTitle,
                            gp=grid::gpar(fontsize=14)),
                   x[[currentName]]$dataShapePlot,
                   ncol=1, heights=c(.1, .9));
    }
  }
}

Try the rosetta package in your browser

Any scripts or data that you put into this service are public.

rosetta documentation built on March 7, 2023, 7:40 p.m.