## Install and Load Packages if (!'remotes' %in% rownames(installed.packages())) install.packages('remotes') if (!"rshinystatistics" %in% rownames(installed.packages())) { remotes::install_github("geiser/rshinystatistics") } else if (packageVersion("rshinystatistics") < "{{ rshinystatistics.version }}") { remotes::install_github("geiser/rshinystatistics") } wants <- c('ggplot2','ggpubr','rshinystatistics','utils') has <- wants %in% rownames(installed.packages()) if (any(!has)) install.packages(wants[!has]) library(utils) library(ggpubr) library(ggplot2) library(rshinystatistics)
library(knitr) opts_chunk$set(echo = TRUE) defaultW <- getOption("warn") options(warn = -1)
wid <- "{{ wid }}" {{ ifelse(length(covar) > 0, paste0('covar <- "',covar,'"'), '') }} between <- c({{ paste0(paste0('"', between, '"'), collapse = ',') }}) dvs <- c({{ paste0(paste0('"', dvs, '"'), collapse = ',') }}) names(dvs) <- dvs {{ if (length(skewness) > 0) { paste0('skewness <- c(', paste0(lapply(names(skewness), FUN = function(i) { paste0('"',i, '"="', skewness[[i]],'"') }), collapse = ','),')') } else { paste0('skewness <- c()') } }} dat <- lapply(dvs, FUN = function(dv) { data <- read.csv(paste0("../data/table-for-",dv,".csv")) rownames(data) <- data[["{{ wid }}"]] return(data) }) rdat <- dat sdat <- dat
{{ if (length(covar) > 0) { paste0(c('df <- dat; df[[covar]] <- dat[[1]]', '(df <- get.descriptives(df, dvs, between, include.global = T, symmetry.test = T))'), collapse = '\n') } else { paste0(c('(df <- get.descriptives(dat, dvs, between, include.global = T, symmetry.test = T))'), collapse = '\n') } }}
kable(df, digits = 3)
{{ sformula <- paste0(paste0('`',between,'`'), collapse = '*') box.plot.code <- paste0(lapply(dvs, FUN = function(dv) { paste0("car::Boxplot(`",dv,"` ~ ",sformula,", data = dat[[\"",dv,"\"]], id = list(n = Inf))") }), collapse = '\n') box.plot.code }}
{{ code.skewness }}
{{ code.outliers }}
non.normal <- {{ code.non.normal }} sdat <- removeFromDataTable(rdat, non.normal, wid)
{{ if (length(covar) > 0) { paste0('(df <- normality.test.by.residual(sdat, dvs, between, c(), covar, skewness = skewness))') } else { paste0('(df <- normality.test.by.residual(sdat, dvs, between, skewness = skewness))') } }}
kable(df, digits = 3)
This is an optional validation and only valid for groups with number greater than 30 observations
(df <- get.descriptives(sdat, dvs, between, include.global = F, normality.test = T, skewness = skewness))
kable(df, digits = 3)
Observation:
As sample sizes increase, parametric tests remain valid even with the violation of normality [1].
According to the central limit theorem, the sampling distribution tends to be normal if the sample is large, more than (n > 30
) observations.
Therefore, we performed parametric tests with large samples as described as follows:
In cases with the sample size greater than 100 (n > 100
), we adopted a significance level of p < 0.01
For samples with n > 50
observation, we adopted D'Agostino-Pearson test
that offers better accuracy for larger samples [2].
For samples' size between n > 100
and n <= 200
, we ignored the normality test,
and our decision of validating normality was based only in the interpretation of QQ-plots
and histograms because the Shapiro-Wilk and D'Agostino-Pearson tests tend to be too sensitive
with values greater than 200 observation [3].
For samples with n > 200
observation, we ignore the normality assumption based on the central theorem limit.
{{ linearity.code }}
{{ if (length(covar) > 0) { paste0('(df <- homogeneity.test(sdat, dvs, between, c(), covar, skewness = skewness))') } else { paste0('(df <- homogeneity.test(sdat, dvs, between, skewness = skewness))') } }}
kable(df, digits = 3)
ndat <- sdat[[1]] for (dv in names(sdat)[-1]) ndat <- merge(ndat, sdat[[dv]]) write.csv(ndat, paste0("../data/table-with-normal-distribution.csv"))
Descriptive statistics of data with normal distribution
(df <- get.descriptives(sdat, dvs, between))
kable(df, digits = 3)
for (dv in dvs) { car::Boxplot(`dv` ~ {{ paste0(paste0('`',between,'`'), collapse = '*') }}, data = sdat[[dv]] %>% cbind(dv=sdat[[dv]][[dv]]), id = list(n = Inf)) }
{{ code.hypothesis }}
{{ code.hypothesis.tbl }}
{{ code.pwc }}
{{ code.pwc.tbl }}
{{ code.emms }}
{{ code.emms.tbl }}
{{ code.plots }}
{{ hypothesis.text }}
{{ hypothesis.pwc.text }}
Use the site https://www.tablesgenerator.com to convert the HTML tables into Latex format
[2]: Miot, H. A. (2017). Assessing normality of data in clinical and experimental trials. J Vasc Bras, 16(2), 88-91.
[3]: Bárány, Imre; Vu, Van (2007). "Central limit theorems for Gaussian polytopes". Annals of Probability. Institute of Mathematical Statistics. 35 (4): 1593–1621.
options(warn = defaultW)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.