#' describeData
#'
#' @param x = est la variable decrite
#' @param head = 4 par defaut, nombre de lignes affichees en partant du haut
#' @param tail = 4 par defaut, nombre de lignes affichees en partant du bas
#'
#' @export
#'
describeData <- function(x, head = 4, tail = 4) {
valid <- function(x) {
sum(!is.na(x))
}
nvar <- ncol(x)
all.numeric <- nvar
ans <- matrix(NA, nrow = nvar, ncol = 2)
nobs <- nrow(x)
cc <- 0
cc <- try(complete.cases(x), silent = TRUE)
if (class(cc) == "try-error") {
print ("pouet")
cc <- NA
}
cc <- sum(cc, na.rm = TRUE)
for (i in 1:nvar) {
if (is.numeric(x[, i])) {
ans[i, 2] <- 1
} else {
if ((is.factor(x[, i])) || (is.logical(x[, i]))) {
ans[i, 2] <- 2
} else {
if (is.character(x[, i])) {
ans[i, 2] <- 3
} else {
ans[i, 2] <- 4
}
}
}
ans[i, 1] <- valid(x[, i])
}
if (is.numeric(unlist(x))) {
all.numeric <- TRUE
} else {
all.numeric <- FALSE
}
H1 <- t(x[1:head, 1:nvar])
T1 <- t(x[(nobs - tail + 1):nobs, 1:nvar])
temp <- data.frame(V = 1:nvar, ans, H1, T1)
colnames(temp) <- c("variable #", "n.obs", "type", paste("H", 1:head, sep = ""), paste("T", 1:tail, sep = ""))
rownames(temp)[temp[, "type"] != 1] <- paste(rownames(temp)[temp[, "type"] != 1], "*", sep = "")
result <- (list(n.obs = nobs, nvar = nvar, all.numeric = all.numeric, complete.cases = cc, variables = temp))
class(result) <- c("gftools", "describeData")
return(result)
}
#' describe
#'
#' @param x = est la variable decrite
#' @param na.rm = TRUE par defaut, outre-passe les valeurs NA
#' @param interp = FALSE par defaut, indique si la mediane est interpretee
#' @param ranges = TRUE par defaut, affiche le min, max, etendue et sum
#' @param trim = 0.1 par defaut
#' @param type = 3 par defaut
#' @param check = TRUE par defaut
#' @param fast = NULL par defaut, si TRUE affiche les valeurs fondamentales
#' @param quant = NULL par defaut, si TRUE affiche les quantiles
#' @param IQR = FALSE par defaut, si TRUE affiche les inter quartiles
#'
#' @export
#'
describe <- function(x, na.rm = TRUE, interp = FALSE, ranges = TRUE, trim = 0.1, type = 3, check = TRUE, fast = NULL, quant = NULL, IQR = FALSE) {
cl <- match.call()
# on definit une fonction locale
valid <- function(x) {
sum(!is.na(x))
}
# on complete les cases en erreur
if (!na.rm) {
x <- na.omit(x)
}
# par defaut utilisez fast pour les larges data
if (is.null(fast)) {
if (prod(dim(x)) > 10 ^ 7) {
fast <- TRUE
} else {
fast <- FALSE
}
}
numstats <- 10 + length(quant) + IQR
if (is.null(dim(x)[2])) {
len <- 1
nvar <- 1
# cree un tableau temporaire stats contenant toutes les stats
stats <- matrix(rep(NA, numstats), ncol = numstats)
stats[1, 1] <- valid(x)
stats[1, 2] <- mean(x, na.rm = na.rm)
if (interp) {
stats[1, 3] <- interp.median(x, na.rm = na.rm)
} else {
stats[1, 3] <- median(x, na.rm = na.rm)
}
stats[1, 4] <- min(x, na.rm = na.rm)
stats[1, 5] <- max(x, na.rm = na.rm)
stats[1, 6] <- sum(x, na.rm = na.rm)
stats[1, 7] <- mad(x, na.rm = na.rm)
stats[1, 8] <- mean(x, na.rm = na.rm, trim = trim)
stats[1, 9] <- sd(x, na.rm = na.rm)
stats[1, 10] <- sd(x, na.rm = na.rm) / mean(x, na.rm = na.rm)
vars <- 1
if (!is.null(quant)) {
Qnt <- quantile(x, prob = quant, na.rm = TRUE)
stats[1, (IQR + 11):numstats] <- t(Qnt)
}
if (IQR) {
Quart <- t(quantile(x, prob = c(0.25, 0.75), na.rm = TRUE))
Iqr <- Quart[, 2] - Quart[, 1]
stats[1, 10] <- Iqr
}
rownames(stats) <- "X1"
} else {
nvar <- ncol(x)
# cree un tableau temporaire
stats <- matrix(rep(NA, nvar * numstats), ncol = numstats)
if (is.null(colnames(x))) {
colnames(x) <- paste0("X", 1:ncol(x))
}
rownames(stats) <- colnames(x)
stats[, 1] <- apply(x, 2, valid)
vars <- c(1:nvar)
## ne fonctionne pas avec les matrices
if (!is.matrix(x) && check) {
for (i in 1:nvar) {
if (!is.numeric(x[[i]])) {
if (fast) {
x[[i]] <- NA
} else {
if (is.factor(unlist(x[[i]])) | is.character(unlist(x[[i]]))) {
x[[i]] <- as.numeric(x[[i]])
} else {
x[[i]] <- NA
}
}
rownames(stats)[i] <- paste(rownames(stats)[i], "*", sep = "")
}
}
}
x <- as.matrix(x)
if (!is.numeric(x)) {
message("Converted non-numeric matrix input to numeric. Are you sure you wanted to do this. Please check your data")
x <- matrix(as.numeric(x), ncol = nvar)
rownames(stats) <- paste0(rownames(stats), "*")
}
stats[, 2] <- apply(x, 2, mean, na.rm = na.rm)
stats[, 9] <- apply(x, 2, sd, na.rm = na.rm)
if (ranges) {
if (fast) {
stats[, 4] <- apply(x, 2, min, na.rm = na.rm)
stats[, 5] <- apply(x, 2, max, na.rm = na.rm)
stats[, 6] <- apply(x, 2, sum, na.rm = na.rm)
} else {
stats[, 4] <- apply(x, 2, min, na.rm = na.rm)
stats[, 5] <- apply(x, 2, max, na.rm = na.rm)
stats[, 6] <- apply(x, 2, sum, na.rm = na.rm)
stats[, 7] <- apply(x, 2, mad, na.rm = na.rm)
stats[, 8] <- apply(x, 2, mean, na.rm = na.rm, trim = trim)
if (interp) {
stats[, 3] <- apply(x, 2, interp.median, na.rm = na.rm)
} else {
stats[, 3] <- apply(x, 2, median, na.rm = na.rm)
}
}
}
if (!is.null(quant)) {
Qnt <- apply(x, 2, quantile, prob = quant, na.rm = TRUE)
stats[, (IQR + 11):numstats] <- t(Qnt)
}
if (IQR) {
Quart <- t(apply(x, 2, quantile, prob = c(0.25, 0.75), na.rm = TRUE))
Iqr <- Quart[, 2] - Quart[, 1]
stats[, 10] <- Iqr
}
}
# fin de la matrice maintenant on somme les resultats
if (numstats > (11 + IQR)) {
colnames(stats)[(11 + IQR):numstats] <- paste0("Q", quant[1:length(quant)])
}
# les autres options sont ranges, numstats > 11
if (fast) {
answer <- data.frame(vars = vars, n = stats[, 1], mean = stats[, 2], sd = stats[, 9], cv = stats[, 9] / stats[, 2], se = stats[, 9] / sqrt(stats[, 1]), sum = stats[, 6])
}
if (ranges) {
answer <- data.frame(vars = vars, n = stats[, 1], mean = stats[, 2], sd = stats[, 9], cv = stats[, 9] / stats[, 2], min = stats[, 4], max = stats[, 5], range = stats[
,
5
] - stats[, 4], se = stats[, 9] / sqrt(stats[, 1]), sum = stats[, 6])
} else {
answer <- data.frame(vars = vars, n = stats[, 1], mean = stats[, 2], sd = stats[, 9], cv = stats[, 9] / stats[, 2], se = stats[, 9] / sqrt(stats[, 1]), sum = stats[, 6])
}
if (IQR) {
answer <- data.frame(answer, IQR = stats[, 10])
}
if (numstats > (11 + IQR)) {
if (nvar > 1) {
# ajoute les quantiles
answer <- data.frame(answer, stats[, (IQR + 11):numstats])
} else {
answer <- data.frame(answer, t(stats[, (IQR + 11):numstats]))
}
}
class(answer) <- c("gftools", "describe", "data.frame")
return(answer)
}
#' describeBy
#'
#' @param x = data
#' @param group = NULL par defaut
#' @param mat = FALSE par defaut
#' @param type = 3 par defaut
#' @param digits = 15 apr defaut, nombre de decimales affichees
#' @param ... = other arguments
#'
#' @export
#'
describeBy <- function(x, group = NULL, mat = FALSE, type = 3, digits = 15, ...) {
# data are x, grouping variable is group
cl <- match.call()
if (is.null(group)) {
answer <- describe(x, type = type)
warning("no grouping variable requested")
} else {
if (!is.data.frame(group) && !is.list(group) && (length(group) < NROW(x))) {
group <- x[, group]
}
answer <- by(x, group, describe, type = type, ...)
class(answer) <- c("gftools", "describeBy") # probably better not to make of class psych (at least not yet)
}
if (mat) {
ncol <- length(answer[[1]]) # the more complicated case. How to reorder a list of data.frames
# the interesting problem is treating the case of multiple grouping variables.
n.var <- NROW(answer[[1]])
n.col <- NCOL(answer[[1]])
n.grouping <- length(dim(answer)) # this is the case of multiple grouping variables
n.groups <- prod(dim(answer))
names <- names(answer[[1]])
row.names <- attr(answer[[1]], "row.names")
dim.names <- attr(answer, "dimnames")
mat.ans <- matrix(NaN, ncol = ncol, nrow = n.var * n.groups)
labels.ans <- matrix(NaN, ncol = n.grouping + 1, nrow = n.var * n.groups)
colnames(labels.ans) <- c("item", paste("group", 1:n.grouping, sep = ""))
colnames(mat.ans) <- colnames(answer[[1]])
rn <- 1:(n.var * n.groups)
k <- 1
labels.ans[, 1] <- seq(1, (n.var * n.groups))
group.scale <- cumprod(c(1, dim(answer)))
for (var in 1:(n.var * n.groups)) {
for (group in 1:n.grouping) {
groupi <- ((trunc((var - 1) / group.scale[group])) %% dim(answer)[group]) + 1
labels.ans[var, group + 1] <- dim.names[[group]][[groupi]]
}
}
k <- 1
for (var in 1:n.var) {
for (group in 1:n.groups) {
rn[k] <- paste(row.names[var], group, sep = "")
# mat.ans[k,1] <- group
for (stat in 1:n.col) {
if (!is.null(answer[[group]][[stat]][var])) {
mat.ans[k, stat] <- round(answer[[group]][[stat]][var], digits)
} else {
mat.ans[k, stat] <- NA
}
}
k <- k + 1
}
}
answer <- data.frame(labels.ans, mat.ans)
rownames(answer) <- rn
}
return(answer)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.