R/regr.R

Defines functions print.rosettaRegr knit_print.rosettaRegr rosettaRegr_partial regr

Documented in knit_print.rosettaRegr print.rosettaRegr regr rosettaRegr_partial

#' regr: a simple regression analysis wrapper
#'
#' The \code{regr} function wraps a number of linear regression functions into
#' one convenient interface that provides similar output to the regression
#' function in SPSS. It automatically provides confidence intervals and
#' standardized coefficients. Note that this function is meant for teaching
#' purposes, and therefore it's only for very basic regression analyses; for
#' more functionality, use the base R function `lm` or e.g. the `lme4` package.
#'
#' @param formula The formula of the regression analysis, of the form \code{y ~
#' x1 + x2}, where y is the dependent variable and x1 and x2 are the
#' predictors.
#' @param data If the terms in the formula aren't vectors but variable names,
#' this should be the dataframe where those variables are stored.
#' @param conf.level The confidence of the confidence interval around the
#' regression coefficients.
#' @param digits Number of digits to round the output to.
#' @param pvalueDigits The number of digits to show for p-values; smaller
#' p-values will be shown as <.001 or <.0001 etc.
#' @param coefficients Which coefficients to show; can be "raw" to only show
#' the raw (unstandardized) coefficients; "scaled" to only show the scaled
#' (standardized) coefficients), or c("raw", "scaled') to show both.
#' @param plot For regression analyses with only one predictor (also sometimes
#' confusingly referred to as 'univariate' regression analyses), scatterplots
#' with regression lines and their standard errors can be produced.
#' @param pointAlpha The alpha channel (transparency, or rather: 'opaqueness')
#' of the points drawn in the plot.
#' @param collinearity Whether to compute and show collinearity diagnostics
#' (specifically, the tolerance (\emph{1 - R^2}, where \emph{R^2} is the one
#' obtained when regressing each predictor on all the other predictors) and the
#' Variance Inflation Factor (VIF), which is the reciprocal of the tolerance,
#' i.e. \emph{VIF = 1 / tolerance}).
#' @param influential Whether to compute diagnostics for influential cases.
#' These are stored in the returned object in the \code{lm.influence.raw} and
#' \code{lm.influence.scaled} objects in the \code{intermediate} object. They
#' are not printed.
#' @param ci.method,ci.method.note Which method to use for the confidence
#' interval around R squared, and whether to display a note about this choice.
#' @param headingLevel The number of hashes to print in front of the headings
#' when printing while knitting
#' @param x The object to print (i.e. as produced by `regr`).
#' @param forceKnitrOutput Force knitr output.
#' @param quiet Passed on to [knitr::knit()] whether it should b
#'  chatty (`FALSE`) or quiet (`TRUE`).
#' @param echoPartial Whether to show the executed code in the R Markdown
#' partial (`TRUE`) or not (`FALSE`).
#' @param partialFile This can be used to specify a custom partial file. The
#' file will have object `x` available.
#' @param \dots Any additional arguments are passed to the default print method
#' by the print method, and to [rmdpartials::partial()] when knitting an
#' RMarkdown partial.
#' @param env The enviroment where to evaluate the formula.
#' @return A list of three elements: \item{input}{List with input arguments}
#' \item{intermediate}{List of intermediate objects, such as the lm and confint
#' objects.} \item{output}{List with two dataframes, one with the raw
#' coefficients, and one with the scaled coefficients.}
#' @author Gjalt-Jorn Peters
#'
#' Maintainer: Gjalt-Jorn Peters <gjalt-jorn@@userfriendlyscience.com>
#' @examples
#'
#' ### Do a simple regression analysis
#' rosetta::regr(age ~ circumference, dat=Orange);
#'
#' ### Show more digits for the p-value
#' rosetta::regr(Orange$age ~ Orange$circumference, pvalueDigits=18);
#'
#' \dontrun{
#' ### An example with an interaction term, showing in the
#' ### viewer
#' rosetta::rosettaRegr_partial(
#'   rosetta::regr(
#'     mpg ~ wt + hp + wt:hp,
#'     dat=mtcars,
#'     coefficients = "raw",
#'     plot=TRUE,
#'     collinearity=TRUE
#'   )
#' );
#' }
#'
#' @rdname rosettaRegr
#' @export regr
regr <- function(formula, data=NULL, conf.level=.95, digits=2,
                 pvalueDigits = 3, coefficients=c("raw", "scaled"),
                 plot=FALSE, pointAlpha = .5,
                 collinearity = FALSE, influential = FALSE,
                 ci.method = c("widest", "r.con", "olkinfinn"),
                 ci.method.note = FALSE,
                 headingLevel = 3, env=parent.frame()) {

  dat <- data;

  ### Generate object to store input, intermediate outcomes, and results
  res <- list(input = as.list(environment()),
              intermediate = list(),
              output = list());

  ### Extract variables from formula
  res$intermediate$variableNames <- all.vars(formula);

  ### Convert formula to a character string
  res$intermediate$formula.as.character <-
    paste0(as.character(formula)[c(2, 1, 3)], collapse=" ");

  if (is.null(dat)) {
    ### Extract variables from formula
    res$intermediate$variables <-
      as.character(as.list(attr(stats::terms(formula), 'variables'))[-1]);

    ### Store variablesnames only for naming in raw dataframe
    res$intermediate$variables_namesOnly <- unlist(
      lapply(strsplit(res$intermediate$variables, "\\$"), utils::tail, 1));

    ### Store variables in a dataframe
    res$intermediate$dat.raw <- list();
    for (varToGet in 1:length(res$intermediate$variables)) {
      res$intermediate$dat.raw[[res$intermediate$variables_namesOnly[varToGet]]] <-
        eval(parse(text=res$intermediate$variables[varToGet]), envir=env);
    }
    ### Convert the list to a dataframe
    res$intermediate$dat.raw <- data.frame(res$intermediate$dat.raw);

    ### Convert variable names to bare variable names
    for (currentVariableIndex in 1:length(res$intermediate$variables)) {
      res$intermediate$formula.as.character <-
        gsub(res$intermediate$variables[currentVariableIndex],
             res$intermediate$variables_namesOnly[currentVariableIndex],
             res$intermediate$formula.as.character, fixed=TRUE);
    }

  } else {
    ### Store variables in a dataframe
    res$intermediate$dat.raw <- dat[, res$intermediate$variableNames];
    res$intermediate$variables_namesOnly <- res$intermediate$variableNames;
  }

  res$intermediate$formula <- formula(res$intermediate$formula.as.character,
                                      env = environment());

  ### Standardize numeric variables
  res$intermediate$dat.scaled <- res$intermediate$dat.raw;
  res$intermediate$dat.scaled[, sapply(res$intermediate$dat.scaled, is.numeric)] <-
    as.data.frame(scale(res$intermediate$dat.scaled[, sapply(res$intermediate$dat.scaled, is.numeric)]));

  ### Run and store lm objects
  res$intermediate$lm.raw <-
    stats::lm(formula=res$intermediate$formula, data=res$intermediate$dat.raw);
  res$intermediate$lm.scaled <-
    stats::lm(formula=res$intermediate$formula, data=res$intermediate$dat.scaled);

  ### R^2 confidence interval based on formula at
  ### http://www.danielsoper.com/statcalc3/calc.aspx?id=28
  res$intermediate$rsq <- rsq <- summary(res$intermediate$lm.raw)$r.squared;
  res$intermediate$k <- k <- length(res$intermediate$lm.raw$terms) - 1;
  res$intermediate$n <- n <- res$intermediate$lm.raw$df.residual + k + 1;
  res$intermediate$rsq.se <- sqrt((4*rsq*(1-rsq)^2*(n-k-1)^2)/
                                  ((n^2-1)*(3+n)));
  res$intermediate$rsq.t.crit <- stats::qt(p=1-(1-conf.level)/2, df=n-k-1);
  res$output$rsq.ci.olkinfinn <- c(res$intermediate$rsq -
                                   res$intermediate$rsq.t.crit *
                                   res$intermediate$rsq.se,
                                 res$intermediate$rsq +
                                   res$intermediate$rsq.t.crit *
                                   res$intermediate$rsq.se);

  res$output$rsq.ci.olkinfinn <- ifelse(res$output$rsq.ci.olkinfinn < 0,
                                        0, res$output$rsq.ci.olkinfinn);

  res$output$rsq.ci.r.con <- psych::r.con(sqrt(rsq), n);
  res$output$rsq.ci.r.con <- ifelse(res$output$rsq.ci.r.con < 0,
                                    0, res$output$rsq.ci.r.con) ^ 2;

  if ("widest" %IN% ci.method) {
    res$output$rsq.ci <- ifelse(range(res$output$rsq.ci.r.con) >
                                range(res$output$rsq.ci.olkinfinn),
                                res$output$rsq.ci.r.con,
                                res$output$rsq.ci.olkinfinn);
  } else if ("r.con" %IN% ci.method) {
    res$output$rsq.ci <- res$output$rsq.ci.r.con;
  } else {
    res$output$rsq.ci <- res$output$rsq.ci.olkinfinn;
  }


  ### Run confint on lm object
  res$intermediate$confint.raw <-
    stats::confint(res$intermediate$lm.raw, level=conf.level);
  res$intermediate$confint.scaled <-
    stats::confint(res$intermediate$lm.scaled, level=conf.level);

  ### Run lm.influence on lm object
  res$intermediate$lm.influence.raw <-
    stats::lm.influence(res$intermediate$lm.raw);
  res$intermediate$lm.influence.scaled <-
    stats::lm.influence(res$intermediate$lm.scaled);

  ### Get variance inflation factors and compute tolerances
  if (collinearity && (length(res$intermediate$variables) > 2)) {
    res$intermediate$vif.raw <- car::vif(res$intermediate$lm.raw);
    res$intermediate$vif.scaled <- car::vif(res$intermediate$lm.scaled);
    if (is.vector(res$intermediate$vif.raw)) {
      res$intermediate$tolerance.raw <- 1/res$intermediate$vif.raw;
      res$intermediate$tolerance.scaled <- 1/res$intermediate$vif.scaled;
    }
  }

  ### get summary for lm objects
  res$intermediate$summary.raw <- summary(res$intermediate$lm.raw);
  res$intermediate$summary.scaled <- summary(res$intermediate$lm.scaled);

  ### Generate output
  res$output$coef.raw <- cbind(data.frame(res$intermediate$confint.raw[, 1],
                                          res$intermediate$confint.raw[, 2]),
                               res$intermediate$summary.raw$coefficients);
  res$output$coef.scaled <- cbind(data.frame(res$intermediate$confint.scaled[, 1],
                                             res$intermediate$confint.scaled[, 2]),
                                  res$intermediate$summary.scaled$coefficients);

  names(res$output$coef.raw) <- names(res$output$coef.scaled) <-
    c(paste0(conf.level*100,"% CI, lo"),
      paste0(conf.level*100,"% CI, hi"),
      'estimate', 'se', 't', 'p');

  if (plot) {
    res$intermediate$dat.plot <-
      res$intermediate$dat.raw[
        stats::complete.cases(res$intermediate$dat.raw),
      ];
    if (length(res$intermediate$variables_namesOnly) == 2) {
      res$output$plot <-
        ggplot2::ggplot(res$intermediate$dat.plot,
                        ggplot2::aes_string(y=res$intermediate$variables_namesOnly[1],
                        x=res$intermediate$variables_namesOnly[2])) +
        ggplot2::geom_point(alpha = pointAlpha,
                            position = ggplot2::position_jitter()) +
        ggplot2::geom_smooth(method='lm', formula = "y ~ x") +
        ggplot2::theme_bw();
    } else if (nrow(res$output$coef.raw) == 4) {
    ### Used to be fourth one is the interaction term
    #} else if ((length(res$intermediate$variables_namesOnly) == 3)) {

      if (is.numeric(res$intermediate$dat.plot[, res$intermediate$variables_namesOnly[2]]) &&
          is.numeric(res$intermediate$dat.plot[, res$intermediate$variables_namesOnly[3]])) {
        ### Both numeric, so we treat the second one as moderator and compute the line

        predictorName <- res$intermediate$variables_namesOnly[2];
        moderatorName <- res$intermediate$variables_namesOnly[3];

        predictorMean <- mean(res$intermediate$dat.plot[, predictorName],
                              na.rm=TRUE);
        predictorSD <- stats::sd(res$intermediate$dat.plot[, predictorName],
                          na.rm=TRUE);
        # loPredictorValue <- predictorMean - predictorSD;
        # hiPredictorValue <- predictorMean + predictorSD;
        loPredictorValue <- min(res$intermediate$dat.plot[, predictorName],
                                na.rm=TRUE);
        hiPredictorValue <- max(res$intermediate$dat.plot[, predictorName],
                                na.rm=TRUE);

        moderatorMean <- mean(res$intermediate$dat.plot[, moderatorName],
                              na.rm=TRUE);
        moderatorSD <- stats::sd(res$intermediate$dat.plot[, moderatorName],
                                 na.rm=TRUE);
        loModeratorValue <- moderatorMean - moderatorSD;
        hiModeratorValue <- moderatorMean + moderatorSD;

        intercept <- res$output$coef.raw['(Intercept)', 'estimate'];
        predictorSlope <- res$output$coef.raw[predictorName, 'estimate'];
        moderatorSlope <- res$output$coef.raw[moderatorName, 'estimate'];
        interactionSlope <- res$output$coef.raw[paste0(predictorName,
                                                       ":",
                                                       moderatorName),
                                                'estimate'];

        depVarValue_loPred_loMod <- intercept +
          loPredictorValue * predictorSlope +
          loModeratorValue * moderatorSlope +
          loPredictorValue * loModeratorValue * interactionSlope;
        depVarValue_loPred_hiMod <- intercept +
          loPredictorValue * predictorSlope +
          hiModeratorValue * moderatorSlope +
          loPredictorValue * hiModeratorValue * interactionSlope;
        depVarValue_hiPred_loMod <- intercept +
          hiPredictorValue * predictorSlope +
          loModeratorValue * moderatorSlope +
          hiPredictorValue * loModeratorValue * interactionSlope;
        depVarValue_hiPred_hiMod <- intercept +
          hiPredictorValue * predictorSlope +
          hiModeratorValue * moderatorSlope +
          hiPredictorValue * hiModeratorValue * interactionSlope;

        moderatorDat <- data.frame(x = c(loPredictorValue, loPredictorValue),
                                   xend = c(hiPredictorValue, hiPredictorValue),
                                   y = c(depVarValue_loPred_loMod, depVarValue_loPred_hiMod),
                                   yend = c(depVarValue_hiPred_loMod, depVarValue_hiPred_hiMod),
                                   modVal = c("Low", "High"));
        names(moderatorDat)[5] <- res$intermediate$variables_namesOnly[3];

        res$intermediate$moderatorDat <- moderatorDat;

        res$output$plot <-
          ggplot2::ggplot(res$intermediate$dat.plot,
                          ggplot2::aes_string(y=res$intermediate$variables_namesOnly[1],
                                              x=res$intermediate$variables_namesOnly[2])) +
          ggplot2::geom_point(alpha = pointAlpha,
                              position = ggplot2::position_jitter()) +
          ggplot2::geom_smooth(method='lm', formula = "y ~ x") +
          ggplot2::theme_bw() +
          ggplot2::geom_segment(data = moderatorDat,
                                ggplot2::aes_string(x = 'x', xend = 'xend',
                                                    y = 'y', yend = 'yend',
                                                    group = res$intermediate$variables_namesOnly[3],
                                                    color = res$intermediate$variables_namesOnly[3]),
                                size=1) +
          ggplot2::scale_color_viridis_d();

      } else if ((is.numeric(res$intermediate$dat.plot[, res$intermediate$variables_namesOnly[2]]) &&
                 (is.factor(res$intermediate$dat.plot[, res$intermediate$variables_namesOnly[3]]))) ||
                 (is.numeric(res$intermediate$dat.plot[, res$intermediate$variables_namesOnly[3]]) &&
                 (is.factor(res$intermediate$dat.plot[, res$intermediate$variables_namesOnly[2]])))) {
        ### One of the predictors is a factor, so we just plot lines for each value
        if (is.numeric(res$intermediate$dat.plot[, res$intermediate$variables_namesOnly[2]])) {
          predictor <- res$intermediate$variables_namesOnly[2];
          moderator <- res$intermediate$variables_namesOnly[3];
        } else {
          predictor <- res$intermediate$variables_namesOnly[3];
          moderator <- res$intermediate$variables_namesOnly[2];
        }
        res$output$plot <-
          ggplot2::ggplot(res$intermediate$dat.plot,
                          ggplot2::aes_string(y=res$intermediate$variables_namesOnly[1],
                                              x=predictor)) +
          ggplot2::geom_point(alpha = pointAlpha,
                              position = ggplot2::position_jitter()) +
          ggplot2::geom_smooth(method='lm', formula = y ~ x) +
          ggplot2::theme_bw() +
          ggplot2::geom_smooth(ggplot2::aes_string(y=res$intermediate$variables_namesOnly[1],
                                                   x=predictor,
                                                   group=moderator,
                                                   color=moderator),
                               method="lm",
                               formula= y ~ x,
                               se = FALSE) +
          ggplot2::scale_color_viridis_d();
      }
    } else {
      warning("You requested a plot, but for now plots are ",
              "only available for regression analyses with one predictor or ",
              "with two predictors and an interaction term (i.e. three ",
              "predictors in total).");
    }
  }

  ### Use Z = (b1 - b2) / sqrt(SE_b1^2 + SE_b2^2) to test whether
  ### the coefficients differ; add arguments such as
  ### compareCoefficients=FALSE, p.adjust="fdr" to enable user to
  ### request & correct this.

  class(res) <- 'rosettaRegr';
  return(res);

}

###-----------------------------------------------------------------------------

#' @rdname rosettaRegr
#' @export
rosettaRegr_partial <- function(x,
                                digits = x$input$digits,
                                pvalueDigits=x$input$pvalueDigits,
                                headingLevel = x$input$headingLevel,
                                echoPartial = FALSE,
                                partialFile = NULL,
                                quiet=TRUE,
                                ...) {

  ### Get filename
  if (!is.null(partialFile) && file.exists(partialFile)) {
    rmdPartialFilename <-
      partialFile;
  } else {
    rmdPartialFilename <-
      system.file("partials", "_rosettaRegr_partial.Rmd",
                  package="rosetta");
  }

  rmdpartials::partial(rmdPartialFilename);

}

###-----------------------------------------------------------------------------

#' @rdname rosettaRegr
#' @method knit_print rosettaRegr
#' @importFrom knitr knit_print
#' @export
knit_print.rosettaRegr <- function(x,
                                   digits = x$input$digits,
                                   headingLevel = x$input$headingLevel,
                                   pvalueDigits=x$input$pvalueDigits,
                                   echoPartial = FALSE,
                                   partialFile = NULL,
                                   quiet=TRUE,
                                   ...) {
  rosettaRegr_partial(x = x,
                      headingLevel = headingLevel,
                      quiet = quiet,
                      echoPartial = echoPartial,
                      partialFile = partialFile,
                      digits = digits,
                      pvalueDigits = pvalueDigits,
                      ...);
}

###-----------------------------------------------------------------------------

#' @method print rosettaRegr
#' @rdname rosettaRegr
#' @export
print.rosettaRegr <- function(x, digits=x$input$digits,
                              pvalueDigits=x$input$pvalueDigits,
                              headingLevel = x$input$headingLevel,
                              forceKnitrOutput = FALSE,
                              ...) {


  if (isTRUE(getOption('knitr.in.progress')) || forceKnitrOutput) {

    rosettaRegr_partial(x = x,
                        digits = digits,
                        headingLevel = headingLevel,
                        ...);

  } else {

    cat(paste0("Regression analysis\n  Formula: ",
               x$intermediate$formula.as.character, "\n",
               "  Sample size: ",
               sum(stats::complete.cases(x$intermediate$dat.raw)),
               "\n\n",
               "Significance test of the entire model (all predictors together):\n",
               "  Multiple R-squared: ",
               ufs::formatCI(x$output$rsq.ci, noZero=TRUE, digits=digits),
               " (point estimate = ",
               round(x$intermediate$summary.raw$r.squared, digits),
               ", adjusted = ",
               round(x$intermediate$summary.raw$adj.r.squared, digits), ")",
               ifelse(x$input$ci.method.note, "*\n", "\n"),
               "  Test for significance: F[",
               x$intermediate$summary.raw$fstatistic[2], ", ",
               x$intermediate$summary.raw$fstatistic[3], "] = ",
               round(x$intermediate$summary.raw$fstatistic[1], digits),
               ", ", formatPvalue(stats::pf(x$intermediate$summary.raw$fstatistic[1],
                                                 x$intermediate$summary.raw$fstatistic[2],
                                                 x$intermediate$summary.raw$fstatistic[3],
                                                 lower.tail=FALSE), digits=pvalueDigits), "\n"));
    if ("raw" %in% x$input$coefficients) {
      cat("\nRaw regression coefficients (unstandardized beta values, called 'B' in SPSS):\n\n");
      tmpDat <- round(x$output$coef.raw[, 1:5], digits);
      tmpDat[[1]] <- paste0("[", tmpDat[[1]], "; ", tmpDat[[2]], "]");
      tmpDat[[2]] <- NULL;
      names(tmpDat)[1] <- paste0(x$input$conf.level*100, "% conf. int.");
      tmpDat$p <- formatPvalue(x$output$coef.raw$p,
                                    digits=pvalueDigits,
                                    includeP=FALSE);
      print(tmpDat, ...);
    }
    if ("scaled" %in% x$input$coefficients) {
      cat("\nScaled regression coefficients (standardized beta values, called 'Beta' in SPSS):\n\n");
      tmpDat <- round(x$output$coef.scaled[, 1:5], digits);
      tmpDat[[1]] <- paste0("[", tmpDat[[1]], "; ", tmpDat[[2]], "]");
      tmpDat[[2]] <- NULL;
      names(tmpDat)[1] <- paste0(x$input$conf.level*100, "% conf. int.");
      tmpDat$p <- formatPvalue(x$output$coef.scaled$p,
                                    digits=pvalueDigits,
                                    includeP=FALSE);
      print(tmpDat, ...);
    }

    if (x$input$collinearity && (!is.null(x$intermediate$vif.raw))) {
      cat0("\nCollinearity diagnostics:\n\n");
      if (is.vector(x$intermediate$vif.raw)) {
        if ("raw" %in% x$input$coefficients) {
          collinearityDat <- data.frame(
            VIF = round(x$intermediate$vif.raw, digits),
            Tolerance = round(x$intermediate$tolerance.raw, digits)
          );
          row.names(collinearityDat) <- paste0(ufs::repStr(4), names(x$intermediate$vif.raw));
          print(collinearityDat);
        }
      } else if (is.vector(x$intermediate$vif.scaled)) {
        if ("scaled" %in% x$input$coefficients) {
          collinearityDat <- data.frame(
            VIF = round(x$intermediate$vif.scaled, digits),
            Tolerance = round(x$intermediate$tolerance.scaled, digits)
          );
          row.names(collinearityDat) <- paste0(ufs::repStr(4), names(x$intermediate$vif.raw));
          print(collinearityDat);
        }
      }
    }

    ciMsg <- "\n* Note that the confidence interval for R^2 is based on ";
    if (x$input$ci.method[1] == 'r.con') {
      ciMsg <- paste0(ciMsg,
                      "the confidence interval for the Pearson Correlation of ",
                      "the multiple correlation using r.con from the 'psych' ",
                      "package because that was specified using the 'ci.method' ",
                      "argument.");
    } else if (x$input$ci.method[1] == 'olkinfinn') {
      ciMsg <- paste0(ciMsg,
                      "the formula reported by Olkin and Finn (1995) in their Correlation ",
                      "Redux paper, because this was specified using the 'ci.method' ",
                      "argument. This may not work well for very low values. Set the ",
                      "argument to 'widest' to also compute the confidence interval ",
                      "of the multiple correlation using the r.con function from ",
                      "the 'psych' package and selecting the widest interval.");
    } else if (identical(x$output$rsq.ci, x$output$rsq.ci.r.con)) {
      ciMsg <- paste0(ciMsg,
                      "the confidence interval for the Pearson Correlation of ",
                      "the multiple correlation using r.con from the 'psych' ",
                      "package because that was the widest interval, which ",
                      "should be used because the 'ci.method' was set to 'widest'.");
    } else if (identical(x$output$rsq.ci, x$output$rsq.ci.olkinfinn)) {
      ciMsg <- paste0(ciMsg,
                      "the formula reported by Olkin and Finn (1995) in their Correlation ",
                      "Redux paper, because this was the widest interval, which ",
                      "should be used because the 'ci.method' was set to 'widest'.");
    } else {
      ciMsg <- paste0(ciMsg,
                      " -- I don't know actually, something appears to have gone wrong. ",
                      "The 'ci.method' argument was set to ", vecTxtQ(x$input$ci.method),
                      ".");
    }

    if (x$input$ci.method.note) {
      cat("\n");
      cat(strwrap(ciMsg), sep="\n");
    }

    if (!is.null(x$output$plot)) {
      print(x$output$plot);
    }
    invisible();
  }
}

###-----------------------------------------------------------------------------

### Function to smoothly pander output from regr function in userfriendlyscience
#' @method pander rosettaRegr
#' @rdname rosettaRegr
#' @export
pander.rosettaRegr <- function (x, digits = x$input$digits, pvalueDigits = x$input$pvalueDigits, ...) {
  pander::pandoc.p(paste0("\n\n#### Regression analysis for formula: ", x$intermediate$formula.as.character));
  pander::pandoc.p("\n\n##### Significance test of the entire model (all predictors together):\n\n");
  pander::pandoc.p(paste0("Multiple R-squared: [", round(x$output$rsq.ci[1],
                                                 digits), ", ", round(x$output$rsq.ci[2], digits),
                  "] (point estimate = ", round(x$intermediate$summary.raw$r.squared,
                                                digits), ", adjusted = ", round(x$intermediate$summary.raw$adj.r.squared,
                                                                                digits), ")"))
  pander::pandoc.p(paste0("Test for significance: F[", x$intermediate$summary.raw$fstatistic[2],
                  ", ", x$intermediate$summary.raw$fstatistic[3], "] = ",
                  round(x$intermediate$summary.raw$fstatistic[1], digits),
                  ", ", formatPvalue(stats::pf(x$intermediate$summary.raw$fstatistic[1],
                                                    x$intermediate$summary.raw$fstatistic[2], x$intermediate$summary.raw$fstatistic[3],
                                                    lower.tail = FALSE), digits = pvalueDigits), "\n"));

  if ("raw" %in% x$input$coefficients) {
    pander::pandoc.p("\n\n##### Raw regression coefficients (unstandardized beta values, called 'B' in SPSS):\n\n");
    tmpDat <- round(x$output$coef.raw[, 1:5], digits);
    tmpDat[[1]] <- paste0("[", tmpDat[[1]], "; ", tmpDat[[2]], "]");
    tmpDat[[2]] <- NULL;
    names(tmpDat)[1] <- paste0(x$input$conf.level * 100, "% conf. int.");
    tmpDat$p <- formatPvalue(x$output$coef.raw$p, digits = pvalueDigits, includeP = FALSE);
    pander::pander(tmpDat, missing="");
  }
  if ("scaled" %in% x$input$coefficients) {
    pander::pandoc.p("\n\n##### Scaled regression coefficients (standardized beta values, called 'Beta' in SPSS):\n\n");
    tmpDat <- round(x$output$coef.scaled[, 1:5], digits);
    tmpDat[[1]] <- paste0("[", tmpDat[[1]], "; ", tmpDat[[2]], "]");
    tmpDat[[2]] <- NULL;
    names(tmpDat)[1] <- paste0(x$input$conf.level * 100, "% conf. int.");
    tmpDat$p <- formatPvalue(x$output$coef.scaled$p, digits = pvalueDigits, includeP = FALSE);
    pander::pander(tmpDat, missing="");
  }
}

Try the rosetta package in your browser

Any scripts or data that you put into this service are public.

rosetta documentation built on March 7, 2023, 7:40 p.m.