Nothing
#' @rdname normalityAssessment
#' @export
dataShape <- function(sampleVector, na.rm=TRUE, type=2, digits=2,
conf.level=.95, plots=TRUE, xLabs = NA,
yLabs = NA, qqCI=TRUE, labelOutliers = TRUE,
sampleSizeOverride = NULL) {
### Note: this is adapted from the 'skewness' and 'kurtosis' functions
### in 'e1071' and the way they're computed in 'describe' in 'psych', and
### for the variances, also based on Stan Browns page at
### https://www.tc3.edu/instruct/sbrown/stat/shape.htm, as well as of course
### on Joanes & Gill (1998)
res <- list(input = as.list(environment()),
intermediate = list(), output = list());
if (any(isNA <- is.na(sampleVector))) {
if (na.rm) {
res$intermediate$sampleVector <- sampleVector <- sampleVector[!isNA];
}
else {
return(NA);
}
} else {
res$intermediate$sampleVector <- sampleVector;
}
if (!(type %in% (1:3))) {
stop("Invalid 'type' argument. This must be 1, 2 or 3; see ?skewness for more info.");
}
### Number of observations
res$intermediate$n <- n <- length(sampleVector);
if (n < 3) {
stop("There are only ", n, " datapoints/observations - this is too few to ",
"compute a sensible estimate of the 'shape' of this data.");
}
### First moment (mean)
res$intermediate$m1 <- sum(sampleVector^1) / n;
### Center the data
res$intermediate$centeredVector <- sampleVector <- sampleVector - res$intermediate$m1;
### Second moment (variance)
res$intermediate$m2 <- m2 <- sum(sampleVector^2) / n;
### Third moment
res$intermediate$m3 <- m3 <- sum(sampleVector^3) / n;
### Fourth moment
res$intermediate$m4 <- m4 <- sum(sampleVector^4) / n;
### Sample skewness
res$intermediate$g1 <- m3 / (m2 ^ (3/2));
### Sample (excess) kurtosis
res$intermediate$g2 <- (m4 / (m2 ^ 2)) - 3;
### Population skewness
res$intermediate$G1 <- res$intermediate$g1 *
sqrt( n * (n-1) ) /
( n - 2 );
### Population kurtosis
res$intermediate$G2 <- (n-1) *
( (n+1) * res$intermediate$g2 + 6 ) /
( (n-2) * (n-3) );
### Type 3 correction for skewness
res$intermediate$b1 <- res$intermediate$g1 *
(((n-1) / n) ^ (3/2));
### Type 3 correction for kurtosis
res$intermediate$b2 <- (res$intermediate$g2 + 3) *
(((n-1) / n) ^ 2) - 3;
### Now for the variances. We compute the variance in a normal
### population, because when we want to test the skewness and
### kurtosis, we do so under the assumption that they come from a
### normal distribution. Plus, no idea how to compute the variance
### otherwise :-)
### These formulas come from the Joanes & Gill (1998) article.
### Note that we use the 'overridden sample size' if one was specified.
### We do this because when this function is used
if (is.numeric(sampleSizeOverride)) n <- sampleSizeOverride;
res$intermediate$var.g1 <- ( 6 * (n-2) ) /
( (n+1) * (n+3) );
res$intermediate$var.g2 <- ( 24*n * (n-2) * (n-3) ) /
( (n+1)^2 * (n+3) * (n + 5) );
res$intermediate$var.G1 <- res$intermediate$var.g1 *
sqrt(n * (n-1)) /
(n-2)^2;
res$intermediate$var.G2 <- res$intermediate$var.g2 *
(((n-1) * (n+1)) /
((n-2) * (n-3))) ^ 2;
res$intermediate$var.b1 <- res$intermediate$var.g1 *
((n-1) / n) ^ 3;
res$intermediate$var.b2 <- res$intermediate$var.g2 *
((n-1) / n) ^ 4;
### And the standard error for the skewness and the kurtosis,
### based on the formulas on https://www.tc3.edu/instruct/sbrown/stat/shape.htm
res$intermediate$se.G1 <- sqrt( ( 6 * n * (n-1) ) /
( (n-2) * (n+1) * (n+3) ) );
res$intermediate$se.G2 <- 2 * res$intermediate$se.G1 *
sqrt( (n^2 - 1) /
( (n-3) * (n+5) ) );
### Confidence interval for skewness and kurtosis in the population
res$intermediate$zMultiplier <- stats::qnorm(1 - (1-conf.level)/2);
res$intermediate$ci.G1 <- c(res$intermediate$G1 -
res$intermediate$zMultiplier *
res$intermediate$se.G1,
res$intermediate$G1 +
res$intermediate$zMultiplier *
res$intermediate$se.G1);
res$intermediate$ci.G2 <- c(res$intermediate$G2 -
res$intermediate$zMultiplier *
res$intermediate$se.G2,
res$intermediate$G2 +
res$intermediate$zMultiplier *
res$intermediate$se.G2);
### Z-test to test against normal distribution
res$intermediate$z.G1 <- res$intermediate$G1 / res$intermediate$se.G1;
res$intermediate$z.G2 <- res$intermediate$G2 / res$intermediate$se.G2;
res$intermediate$p.G1 <- 2*(1-stats::pnorm(abs(res$intermediate$z.G1)));
res$intermediate$p.G2 <- 2*(1-stats::pnorm(abs(res$intermediate$z.G2)));
### Hartigans' Dip Test
res$output$D.dip.test <- diptest::dip.test(sampleVector)$statistic[[1]];
res$output$p.dip.test <- diptest::dip.test(sampleVector)$p[[1]];
### Store requested estimates in output object
if (type == 1) {
res$output$skewness <- res$intermediate$g1;
res$output$kurtosis <- res$intermediate$g2;
res$output$type <- "g";
}
else if (type == 2) {
res$output$skewness <- res$intermediate$G1;
res$output$kurtosis <- res$intermediate$G2;
res$output$type <- "G";
}
else if (type == 3) {
res$output$skewness <- res$intermediate$b1;
res$output$kurtosis <- res$intermediate$b2;
res$output$type <- "b";
}
if (plots) {
res$intermediate$histogram <-
normalHist(res$intermediate$sampleVector)$plot;
res$intermediate$qq <-
ggqq(res$intermediate$sampleVector, ci=qqCI);
res$intermediate$boxplot <-
ggBoxplot(res$intermediate$sampleVector, labelOutliers=labelOutliers);
if (!is.null(xLabs)) {
if (is.na(xLabs)) {
res$intermediate$histogram <-
res$intermediate$histogram +
ggplot2::theme(axis.title.x = ggplot2::element_blank());
res$intermediate$qq <-
res$intermediate$qq +
ggplot2::theme(axis.title.x = ggplot2::element_blank());
res$intermediate$boxplot <-
res$intermediate$boxplot +
ggplot2::theme(axis.title.x = ggplot2::element_blank());
} else {
res$intermediate$histogram <-
res$intermediate$histogram + ggplot2::xlab(xLabs$hist);
res$intermediate$qq <-
res$intermediate$qq + ggplot2::xlab(xLabs$qq);
res$intermediate$boxplot <-
res$intermediate$boxplot + ggplot2::xlab(xLabs$box);
}
}
if (!is.null(yLabs)) {
if (is.na(yLabs)) {
res$intermediate$histogram <-
res$intermediate$histogram +
ggplot2::theme(axis.title.y = ggplot2::element_blank());
res$intermediate$qq <-
res$intermediate$qq +
ggplot2::theme(axis.title.y = ggplot2::element_blank());
res$intermediate$boxplot <-
res$intermediate$boxplot +
ggplot2::theme(axis.title.y = ggplot2::element_blank());
} else {
res$intermediate$histogram <-
res$intermediate$histogram + ggplot2::ylab(yLabs$hist);
res$intermediate$qq <-
res$intermediate$qq + ggplot2::ylab(yLabs$qq);
res$intermediate$boxplot <-
res$intermediate$boxplot + ggplot2::ylab(yLabs$box);
}
}
res$output$plot <-
gridExtra::arrangeGrob(res$intermediate$histogram,
res$intermediate$qq,
res$intermediate$boxplot, ncol=3);
}
### Return results object
class(res) <- "dataShape";
return(res);
}
#' @method print dataShape
#' @rdname normalityAssessment
#' @export
print.dataShape <- function(x, digits=x$input$digits, extraNotification=TRUE, ...) {
if (x$output$type == "G") {
skewness.inference <- paste0(" (se = ", format(x$intermediate$se.G1, digits=digits),
", confidence interval = [",
paste0(format(x$intermediate$ci.G1, digits=digits), collapse=", "),
"], z = ", format(x$intermediate$z.G1, digits=digits),
", ", formatPvalue(x$intermediate$p.G1, digits=digits), ")");
kurtosis.inference <- paste0(" (se = ", format(x$intermediate$se.G2, digits=digits),
", confidence interval = [",
paste0(format(x$intermediate$ci.G2, digits=digits), collapse=", "),
"], z = ", format(x$intermediate$z.G2, digits=digits),
", ", formatPvalue(x$intermediate$p.G2, digits=digits), ")");
}
else {
skewness.inference <- kurtosis.inference <- "";
}
cat(paste0("Skewness (", x$output$type, "1): ",
round(x$output$skewness, digits=digits),
skewness.inference, "\n"));
cat(paste0("Kurtosis (", x$output$type, "2): ",
round(x$output$kurtosis, digits=digits),
kurtosis.inference, "\n"));
cat(paste0("Hartigans' Dip Test: ",
round(x$output$D.dip.test, digits=digits), ", ",
formatPvalue(x$output$p.dip.test), "\n"));
if (x$input$plots) {
grid.draw(x$output$plot);
}
if (extraNotification && x$output$type == "g") {
cat("\nNote: g1 and g2 are biased estimates of the population skewness and kurtosis.",
"For unbiased estimates, use 'type=2' or 'type=3'.");
}
else if (extraNotification && x$output$type == "G") {
cat("\nNote: G1 and G2 are the estimates for skewness and kurtosis used by SPSS and SAS,",
"and corrected for the bias present in g1 and g2 ('type=1'). Note that b1 and b2 ('type=3')",
"may perform better in small samples from a normal distribution.");
}
else if (extraNotification && x$output$type == "b") {
cat("\nNote: b1 and b2 are estimates for skewness and kurtosis that have smaller mean-squared error in small",
"samples from a normal distribution than G1 and G2, which are used by SPSS and SAS ('type=2').");
}
}
#' @method pander dataShape
#' @rdname normalityAssessment
#' @export
pander.dataShape <- function(x, digits=x$input$digits, extraNotification=TRUE, ...) {
if (x$output$type == "G") {
skewness.inference <- paste0(" (se = ", format(x$intermediate$se.G1, digits=digits),
", confidence interval = [",
paste0(format(x$intermediate$ci.G1, digits=digits), collapse=", "),
"], z = ", format(x$intermediate$z.G1, digits=digits),
", ", formatPvalue(x$intermediate$p.G1, digits=digits), ")");
kurtosis.inference <- paste0(" (se = ", format(x$intermediate$se.G2, digits=digits),
", confidence interval = [",
paste0(format(x$intermediate$ci.G2, digits=digits), collapse=", "),
"], z = ", format(x$intermediate$z.G2, digits=digits),
", ", formatPvalue(x$intermediate$p.G2, digits=digits), ")");
}
else {
skewness.inference <- kurtosis.inference <- "";
}
cat(paste0("Skewness (", x$output$type, "1): ",
round(x$output$skewness, digits=digits),
skewness.inference, " \n"));
cat(paste0("Kurtosis (", x$output$type, "2): ",
round(x$output$kurtosis, digits=digits),
kurtosis.inference, " \n"));
cat(paste0("Hartigans' Dip Test: ",
round(x$output$D.dip.test, digits=digits), ", ",
formatPvalue(x$output$p.dip.test), "\n"));
if (extraNotification && x$output$type == "g") {
cat("\nNote: g1 and g2 are biased estimates of the population skewness and kurtosis.",
"For unbiased estimates, use 'type=2' or 'type=3'.\n");
}
else if (extraNotification && x$output$type == "G") {
cat("\nNote: G1 and G2 are the estimates for skewness and kurtosis used by SPSS and SAS,",
"and corrected for the bias present in g1 and g2 ('type=1'). Note that b1 and b2 ('type=3')",
"may perform better in small samples from a normal distribution.\n");
}
else if (extraNotification && x$output$type == "b") {
cat("\nNote: b1 and b2 are estimates for skewness and kurtosis that have smaller mean-squared error in small",
"samples from a normal distribution than G1 and G2, which are used by SPSS and SAS ('type=2').\n");
}
cat("\n");
if (x$input$plots) {
grid.draw(x$output$plot);
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.