check.norm <- function(x) {
require(moments)
x <- x
x.res <- shapiro.test(x)
x.log <- log(x)
x.log.res <- shapiro.test(x.log)
x.sqrt <- sqrt(x)
x.sqrt.res <- shapiro.test(x.sqrt)
# https://stackoverflow.com/questions/4787332/how-to-remove-outliers-from-a-dataset
# super thanks!!!
remove_outliers <- function(x) {
qnt <- quantile(x, probs=c(.25, .75), na.rm = T)
H <- 1.5 * IQR(x, na.rm = T)
y <- x
y[x < (qnt[1] - H)] <- NA
y[x > (qnt[2] + H)] <- NA
y
}
y <- remove_outliers(x)
N.rem <- length(x)-length(na.omit(y))
y.res <- shapiro.test(y)
y.log <- log(y)
y.log.res <- shapiro.test(y.log)
y.sqrt <- sqrt(y)
y.sqrt.res <- shapiro.test(y.sqrt)
par(mfrow = c(3,3))
#################################################### normal
qqnorm(x,main = 'normal (b)'); qqline(x)
legend('topleft', bty = 'n', paste(
paste('W = ', round(x.res$statistic,3)),
paste('p = ', round(x.res$p.value,5)), sep = '\n')
)
qqnorm(x.log,main = 'log (b)'); qqline(x.log)
legend('topleft', bty = 'n', paste(
paste('W = ', round(x.log.res$statistic,3)),
paste('p = ', round(x.log.res$p.value,5)), sep = '\n')
)
qqnorm(x.sqrt,main = 'sqrt (b)'); qqline(x.sqrt)
legend('topleft', bty = 'n', paste(
paste('W = ', round(x.sqrt.res$statistic,3)),
paste('p = ', round(x.sqrt.res$p.value,5)), sep = '\n')
)
#################################################### normal
qqnorm(y,main = 'normal (b) \n(NoOut)'); qqline(y)
legend('topleft', bty = 'n', paste(
paste('W = ', round(y.res$statistic,3)),
paste('p = ', round(y.res$p.value,5)), sep = '\n')
)
qqnorm(y.log,main = 'log (b) \n(NoOut)'); qqline(y.log)
legend('topleft', bty = 'n', paste(
paste('W = ', round(y.log.res$statistic,3)),
paste('p = ', round(y.log.res$p.value,5)), sep = '\n')
)
qqnorm(y.sqrt,main = 'sqrt (b) \n(NoOut)'); qqline(y.sqrt)
legend('topleft', bty = 'n', paste(
paste('W = ', round(y.sqrt.res$statistic,3)),
paste('p = ', round(y.sqrt.res$p.value,5)), sep = '\n')
)
# par(mfrow = c(1,1))
df <- data.frame(
Transform = c(
'-',
'log',
'sqrt',
'-',
'log',
'sqrt'),
Outliers = c((rep('-', 3)), rep('removed', 3)),
Method = rep('before',6),
N.out.rm = c((rep(0, 3)), rep(N.rem, 3)),
Skewness = c(
round(moments::skewness(x, na.rm = T),3),
round(moments::skewness(x.log, na.rm = T),3),
round(moments::skewness(x.sqrt, na.rm = T),3),
round(moments::skewness(y, na.rm = T),3),
round(moments::skewness(y.log, na.rm = T),3),
round(moments::skewness(y.sqrt, na.rm = T),3)),
Kurtosis = c(
round(moments::kurtosis(x, na.rm = T),3),
round(moments::kurtosis(x.log, na.rm = T),3),
round(moments::kurtosis(x.sqrt, na.rm = T),3),
round(moments::kurtosis(y, na.rm = T),3),
round(moments::kurtosis(y.log, na.rm = T),3),
round(moments::kurtosis(y.sqrt, na.rm = T),3)),
W = c(
round(x.res$statistic,3),
round(x.log.res$statistic,3),
round(x.sqrt.res$statistic,3),
round(y.res$statistic,3),
round(y.log.res$statistic,3),
round(y.sqrt.res$statistic,3)),
p = c(
round(x.res$p.value,5),
round(x.log.res$p.value,5),
round(x.sqrt.res$p.value,5),
round(y.res$p.value,5),
round(y.log.res$p.value,5),
round(y.sqrt.res$p.value,5)))
# df <- df[order(-df$p, -df$W),]
# print( 'method: before')
# print(paste('Outliers removed: ', N.rem))
df.before <- df
######################################################################################
######################################################################################
x <- x
x.res <- shapiro.test(x)
x.log <- log(x)
x.log.res <- shapiro.test(x.log)
x.sqrt <- sqrt(x)
x.sqrt.res <- shapiro.test(x.sqrt)
# https://stackoverflow.com/questions/4787332/how-to-remove-outliers-from-a-dataset
# super thanks!!!
remove_outliers <- function(x) {
qnt <- quantile(x, probs=c(.25, .75), na.rm = T)
H <- 1.5 * IQR(x, na.rm = T)
y <- x
y[x < (qnt[1] - H)] <- NA
y[x > (qnt[2] + H)] <- NA
y
}
y <- remove_outliers(x)
y.res <- shapiro.test(y)
y.log <- remove_outliers(log(y))
y.log.res <- shapiro.test(y.log)
y.sqrt <- remove_outliers(sqrt(y))
y.sqrt.res <- shapiro.test(y.sqrt)
#################################################### normal
qqnorm(y,main = 'normal (a) \n(NoOut)'); qqline(y)
legend('topleft', bty = 'n', paste(
paste('W = ', round(y.res$statistic,3)),
paste('p = ', round(y.res$p.value,5)), sep = '\n')
)
qqnorm(y.log,main = 'log (a) \n(NoOut)'); qqline(y.log)
legend('topleft', bty = 'n', paste(
paste('W = ', round(y.log.res$statistic,3)),
paste('p = ', round(y.log.res$p.value,5)), sep = '\n')
)
qqnorm(y.sqrt,main = 'sqrt (a) \n(NoOut)'); qqline(y.sqrt)
legend('topleft', bty = 'n', paste(
paste('W = ', round(y.sqrt.res$statistic,3)),
paste('p = ', round(y.sqrt.res$p.value,5)), sep = '\n')
)
par(mfrow = c(1,1))
df <- data.frame(
Transform = c(
'log',
'sqrt'),
Outliers = rep('removed',2),
Method = rep('after',2),
N.out.rm = c(length(x.log)-length(na.omit(y.log)),
length(x.sqrt)-length(na.omit(y.sqrt))),
Skewness = c(round(moments::skewness(y.log, na.rm = T),3),
round(moments::skewness(y.sqrt, na.rm = T),3)),
Kurtosis = c(round(moments::kurtosis(y.log, na.rm = T),3),
round(moments::kurtosis(y.sqrt, na.rm = T),3)),
W = c(
round(y.log.res$statistic,3),
round(y.sqrt.res$statistic,3)),
p = c(
round(y.log.res$p.value,5),
round(y.sqrt.res$p.value,5))
)
df.after <- df
df <- rbind(df.after, df.before)
df <- df[order(-df$p, -df$W),]
df
}
#################################################################################################################################.
# -------------------------------------------------------------------------------------------------------------------------------.
# Norm_log_10.
# -------------------------------------------------------------------------------------------------------------------------------.
#################################################################################################################################.
Norm_log_10 <- function(x) {
require(moments)
x.res <- shapiro.test(x)
x.log <- log(x)
x.log.res <- shapiro.test(x.log)
x.log10 <- log10(x)
x.log10.res <- shapiro.test(x.log10)
par(mfrow=c(1,3),oma = c(0, 0, 2, 0))
qqnorm(x,main = 'normal'); qqline(x)
legend('topleft', bty = 'n', paste(
paste('W = ', round(x.res$statistic,3)),
paste('p = ', round(x.res$p.value,5)), sep = '\n')
)
qqnorm(x.log,main = 'log'); qqline(x.log)
legend('topleft', bty = 'n', paste(
paste('W = ', round(x.log.res$statistic,3)),
paste('p = ', round(x.log.res$p.value,5)), sep = '\n')
)
qqnorm(x.log10,main = 'log10'); qqline(x.log10)
legend('topleft', bty = 'n', paste(
paste('W = ', round(x.log10.res$statistic,3)),
paste('p = ', round(x.log10.res$p.value,5)), sep = '\n')
)
PlotTitle <- deparse(substitute(x))
mtext(text = PlotTitle, outer = T, cex = 1)
par(mfrow=c(1,1),oma = c(0, 0, 0, 0))
}
# Norm_log_10(mtcars$mpg)
#################################################################################################################################.
# -------------------------------------------------------------------------------------------------------------------------------.
# SaveQQplots
# -------------------------------------------------------------------------------------------------------------------------------.
#################################################################################################################################.
save.QQPlots <- function(dataset, measures, pic_ParentFolder, plotType) {
originalWD <- getwd()
if(missing(plotType)) stop('\n\n\nHey! plotType must either "3" ("Norm_log_10" function) or "9" ("check.norm" plots)')
else {
if(plotType == 3) {
TheFun <- as.function(Norm_log_10)
width = 10; height = 4
}
if(plotType == 9) {
TheFun <- as.function(check.norm)
width = 10; height = 10
}
}
for(i in 1:length(measures)) {
IVname <- measures[i]
parent_folder <- paste(originalWD, pic_ParentFolder, sep = '/')
dir.create(file.path(parent_folder), showWarnings = T,recursive = T)
setwd(file.path(parent_folder))
Mname <- measures[i]
filename_int <- paste(Mname,'.tiff', sep = '')
png(filename = filename_int, width = width, height = height, units = 'in', res = 72)
TheFun(dataset[, Mname])
title(main = Mname, outer = F)
dev.off()
setwd(originalWD)
}
}
# save.QQPlots(dataset = mtcars, measures = c('mpg', 'disp'), pic_ParentFolder = 'myFolder', plotType = 3)
# Norm_log_10(mtcars$mpg)
#
# sadfg <- x
#
# x <- c(0.98, 1.73, 0.83, 1.54, 0.85, 1.16, 1.03, 0.61, 1.17, 1.66, 0.71, 1.25, 0.58, 0.94,
# 2.35, NA, 0.76, 2.48, 1.12, 0.93, 1.14, 0.78, 1.01, 1.03, 1.14, 1.03, 0.72, 1.95, 0.85,
# 0.69, 0.74, 1.14, 1.55, 0.93, 0.78, 0.75, 1.61, 1, 0.57, 0.39, 0.8, 0.92, 0.8, 0.96, 0.55,
# 0.84, 0.87, 1.02, 1.1, 1.11, 0.63, 1.42, 0.77, 1.73, 1.5, 0.67, 0.75, 1.7, 1.21, 1.26, 0.97,
# 0.79, 2.96, 1.14, 0.53, 0.65, 1.96, 0.57, NA, 0.51, 0.98, 0.35, 1.02, 1.55, 1.18)
# check.norm(x)
#
# log(x)
# log10(x)
# shapiro.test(log10(x))
# shapiro.test(log(x))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.