Summary_explora <- function(dataset) {
require(moments)
nums <- sapply(dataset, is.numeric)
H <- dataset[ , nums]
# ------------------------------------------------------------------------------------------------
# ------------------------------------------------------------------------------------------------
# ################################### Shapiro test
Normal_log <- apply(H, 2, function(x) {
if((length(na.omit(x)) > 3 & length(na.omit(x)) < 5000) &&
(length(unique(x)) > 2) &&
(min(x,na.rm = T) >= 0))
{ if(min(x,na.rm = T) == 0) {x <- log((x)+1)} else {x <- log((x))}
x.res <- shapiro.test(na.omit(x))
W <- round(x.res$statistic,3)
P <- symnum(x.res$p.value, legend = F, corr = FALSE, na = FALSE,
cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 100), symbols = c("***", "**", "*", ".", " "))
return(paste(W, P, sep = ' '))
} else {return('----')}
})
Normal_exp <- apply(H, 2, function(x) {
if((length(na.omit(x)) > 3 & length(na.omit(x)) < 5000) &&
(length(unique(x)) > 2))
{
x <- exp((x))
x.res <- shapiro.test(na.omit(x))
W <- round(x.res$statistic,3)
P <- symnum(x.res$p.value, legend = F, corr = FALSE, na = FALSE,
cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 100), symbols = c("***", "**", "*", ".", " "))
return(paste(W, P, sep = ' '))
} else {return('----')}
})
Normal_raw <- apply(H, 2, function(x) {
if((length(na.omit(x)) > 3 & length(na.omit(x)) < 5000) &&
(length(unique(x)) > 2))
{
x.res <- shapiro.test(na.omit(x))
W <- round(x.res$statistic,3)
P <- symnum(x.res$p.value, legend = F, corr = FALSE, na = FALSE,
cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 100), symbols = c("***", "**", "*", ".", " "))
return(paste(W, P, sep = ' '))
} else {return('----')}
})
# ------------------------------------------------------------------------------------------------
# ------------------------------------------------------------------------------------------------
N_outliers <- apply(H, 2, function(x) {
x <- na.omit(x)
qnt <- quantile(x, probs=c(.25, .75))
H <- 1.5 * IQR(x, na.rm = T)
y <- x
y[x < (qnt[1] - H)] <- NA
y[x > (qnt[2] + H)] <- NA
sum(length(which(is.na(y))))
})
# ------------------------------------------------------------------------------------------------
# ------------------------------------------------------------------------------------------------
IntegerContiguous <- apply(H, 2, function(x) {
min.r <- min(x, na.rm=T)
max.r <- max(x, na.rm=T)
d <- diff(sort(unique(x)))
d.r <- ifelse(mean(d, na.rm=T) == 1, 'Yes', 'No')
return(d.r)
})
Min <- apply(H, 2, function(x) min(x,na.rm = T))
Max <- apply(H, 2, function(x) max(x,na.rm = T))
unique <- apply(H, 2, function(x) length(unique(x)))
N <- apply(H, 2, function(x) length(na.omit(x)))
N_NA <- apply(H, 2, function(x) sum(length(which(is.na(x)))))
Mean <- apply(H, 2, function(x) mean(x,na.rm = T))
Median <- apply(H, 2, function(x) median(x,na.rm = T))
SD <- apply(H, 2, function(x) sd(x,na.rm = T))
SEM <- apply(H, 2, function(x) sd(x, na.rm = T)/sqrt(length(na.omit(x))))
CV <- apply(H, 2, function(x) sd(x, na.rm = T)/mean(x, na.rm = T))
df <- data.frame(
N = N,
NAs = N_NA,
Min = round(Min,2),
Mean = round(Mean,2),
Median = round(Median,2),
Max = round(Max,2),
SD = round(SD,2),
SEM = round(SEM,2),
CV = round(CV,2),
IntContig = IntegerContiguous,
UniqueValues = unique,
W_raw = Normal_raw,
W_log = Normal_log,
W_exp = Normal_exp,
N_outliers = N_outliers
)
rownames(df) <- colnames(H)
return(df)
}
# Summary_explora(mtcars)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.