R/describe.R

Defines functions describeData describe describeBy

Documented in describe describeBy describeData

#' describeData
#'
#' @param x = est la variable decrite
#' @param head = 4 par defaut, nombre de lignes affichees en partant du haut
#' @param tail = 4 par defaut, nombre de lignes affichees en partant du bas
#'
#' @export
#'

describeData <- function(x, head = 4, tail = 4) {
  valid <- function(x) {
    sum(!is.na(x))
  }
  nvar <- ncol(x)
  all.numeric <- nvar
  ans <- matrix(NA, nrow = nvar, ncol = 2)
  nobs <- nrow(x)
  cc <- 0
  cc <- try(complete.cases(x), silent = TRUE)
  if (class(cc) == "try-error") {
    print ("pouet")
    cc <- NA
  }
  cc <- sum(cc, na.rm = TRUE)
  for (i in 1:nvar) {
    if (is.numeric(x[, i])) {
      ans[i, 2] <- 1
    } else {
      if ((is.factor(x[, i])) || (is.logical(x[, i]))) {
        ans[i, 2] <- 2
      } else {
        if (is.character(x[, i])) {
          ans[i, 2] <- 3
        } else {
          ans[i, 2] <- 4
        }
      }
    }
    ans[i, 1] <- valid(x[, i])
  }
  if (is.numeric(unlist(x))) {
    all.numeric <- TRUE
  } else {
    all.numeric <- FALSE
  }
  H1 <- t(x[1:head, 1:nvar])
  T1 <- t(x[(nobs - tail + 1):nobs, 1:nvar])
  temp <- data.frame(V = 1:nvar, ans, H1, T1)

  colnames(temp) <- c("variable #", "n.obs", "type", paste("H", 1:head, sep = ""), paste("T", 1:tail, sep = ""))
  rownames(temp)[temp[, "type"] != 1] <- paste(rownames(temp)[temp[, "type"] != 1], "*", sep = "")
  result <- (list(n.obs = nobs, nvar = nvar, all.numeric = all.numeric, complete.cases = cc, variables = temp))
  class(result) <- c("gftools", "describeData")
  return(result)
}

#' describe
#'
#' @param x = est la variable decrite
#' @param na.rm = TRUE par defaut, outre-passe les valeurs NA
#' @param interp = FALSE par defaut, indique si la mediane est interpretee
#' @param ranges = TRUE par defaut, affiche le min, max, etendue et sum
#' @param trim = 0.1 par defaut
#' @param type = 3 par defaut
#' @param check = TRUE par defaut
#' @param fast = NULL par defaut, si TRUE affiche les valeurs fondamentales
#' @param quant = NULL par defaut, si TRUE affiche les quantiles
#' @param IQR = FALSE par defaut, si TRUE affiche les inter quartiles
#'
#' @export
#'

describe <- function(x, na.rm = TRUE, interp = FALSE, ranges = TRUE, trim = 0.1, type = 3, check = TRUE, fast = NULL, quant = NULL, IQR = FALSE) {
  cl <- match.call()
  # on definit une fonction locale
  valid <- function(x) {
    sum(!is.na(x))
  }
  # on complete les cases en erreur
  if (!na.rm) {
    x <- na.omit(x)
  }
  # par defaut utilisez fast pour les larges data
  if (is.null(fast)) {
    if (prod(dim(x)) > 10 ^ 7) {
      fast <- TRUE
    } else {
      fast <- FALSE
    }
  }
  numstats <- 10 + length(quant) + IQR
  if (is.null(dim(x)[2])) {
    len <- 1
    nvar <- 1
    # cree un tableau temporaire stats contenant toutes les stats
    stats <- matrix(rep(NA, numstats), ncol = numstats)
    stats[1, 1] <- valid(x)
    stats[1, 2] <- mean(x, na.rm = na.rm)
    if (interp) {
      stats[1, 3] <- interp.median(x, na.rm = na.rm)
    } else {
      stats[1, 3] <- median(x, na.rm = na.rm)
    }
    stats[1, 4] <- min(x, na.rm = na.rm)
    stats[1, 5] <- max(x, na.rm = na.rm)
    stats[1, 6] <- sum(x, na.rm = na.rm)
    stats[1, 7] <- mad(x, na.rm = na.rm)
    stats[1, 8] <- mean(x, na.rm = na.rm, trim = trim)
    stats[1, 9] <- sd(x, na.rm = na.rm)
    stats[1, 10] <- sd(x, na.rm = na.rm) / mean(x, na.rm = na.rm)

    vars <- 1
    if (!is.null(quant)) {
      Qnt <- quantile(x, prob = quant, na.rm = TRUE)
      stats[1, (IQR + 11):numstats] <- t(Qnt)
    }
    if (IQR) {
      Quart <- t(quantile(x, prob = c(0.25, 0.75), na.rm = TRUE))
      Iqr <- Quart[, 2] - Quart[, 1]
      stats[1, 10] <- Iqr
    }
    rownames(stats) <- "X1"
  } else {
    nvar <- ncol(x)
    # cree un tableau temporaire
    stats <- matrix(rep(NA, nvar * numstats), ncol = numstats)
    if (is.null(colnames(x))) {
      colnames(x) <- paste0("X", 1:ncol(x))
    }
    rownames(stats) <- colnames(x)
    stats[, 1] <- apply(x, 2, valid)
    vars <- c(1:nvar)
    ## ne fonctionne pas avec les matrices
    if (!is.matrix(x) && check) {
      for (i in 1:nvar) {
        if (!is.numeric(x[[i]])) {
          if (fast) {
            x[[i]] <- NA
          } else {
            if (is.factor(unlist(x[[i]])) | is.character(unlist(x[[i]]))) {
              x[[i]] <- as.numeric(x[[i]])
            } else {
              x[[i]] <- NA
            }
          }
          rownames(stats)[i] <- paste(rownames(stats)[i], "*", sep = "")
        }
      }
    }
    x <- as.matrix(x)
    if (!is.numeric(x)) {
      message("Converted non-numeric matrix input to numeric.  Are you sure you wanted to do this. Please check your data")
      x <- matrix(as.numeric(x), ncol = nvar)
      rownames(stats) <- paste0(rownames(stats), "*")
    }
    stats[, 2] <- apply(x, 2, mean, na.rm = na.rm)
    stats[, 9] <- apply(x, 2, sd, na.rm = na.rm)
    if (ranges) {
      if (fast) {
        stats[, 4] <- apply(x, 2, min, na.rm = na.rm)
        stats[, 5] <- apply(x, 2, max, na.rm = na.rm)
        stats[, 6] <- apply(x, 2, sum, na.rm = na.rm)
      } else {
        stats[, 4] <- apply(x, 2, min, na.rm = na.rm)
        stats[, 5] <- apply(x, 2, max, na.rm = na.rm)
        stats[, 6] <- apply(x, 2, sum, na.rm = na.rm)
        stats[, 7] <- apply(x, 2, mad, na.rm = na.rm)
        stats[, 8] <- apply(x, 2, mean, na.rm = na.rm, trim = trim)
        if (interp) {
          stats[, 3] <- apply(x, 2, interp.median, na.rm = na.rm)
        } else {
          stats[, 3] <- apply(x, 2, median, na.rm = na.rm)
        }
      }
    }
    if (!is.null(quant)) {
      Qnt <- apply(x, 2, quantile, prob = quant, na.rm = TRUE)
      stats[, (IQR + 11):numstats] <- t(Qnt)
    }
    if (IQR) {
      Quart <- t(apply(x, 2, quantile, prob = c(0.25, 0.75), na.rm = TRUE))
      Iqr <- Quart[, 2] - Quart[, 1]
      stats[, 10] <- Iqr
    }
  }
  # fin de la matrice maintenant on somme les resultats
  if (numstats > (11 + IQR)) {
    colnames(stats)[(11 + IQR):numstats] <- paste0("Q", quant[1:length(quant)])
  }
  # les autres options sont ranges, numstats > 11
  if (fast) {
    answer <- data.frame(vars = vars, n = stats[, 1], mean = stats[, 2], sd = stats[, 9], cv = stats[, 9] / stats[, 2], se = stats[, 9] / sqrt(stats[, 1]), sum = stats[, 6])
  }
  if (ranges) {
    answer <- data.frame(vars = vars, n = stats[, 1], mean = stats[, 2], sd = stats[, 9], cv = stats[, 9] / stats[, 2], min = stats[, 4], max = stats[, 5], range = stats[
      ,
      5
    ] - stats[, 4], se = stats[, 9] / sqrt(stats[, 1]), sum = stats[, 6])
  } else {
    answer <- data.frame(vars = vars, n = stats[, 1], mean = stats[, 2], sd = stats[, 9], cv = stats[, 9] / stats[, 2], se = stats[, 9] / sqrt(stats[, 1]), sum = stats[, 6])
  }
  if (IQR) {
    answer <- data.frame(answer, IQR = stats[, 10])
  }
  if (numstats > (11 + IQR)) {
    if (nvar > 1) {
      # ajoute les quantiles
      answer <- data.frame(answer, stats[, (IQR + 11):numstats])
    } else {
      answer <- data.frame(answer, t(stats[, (IQR + 11):numstats]))
    }
  }
  class(answer) <- c("gftools", "describe", "data.frame")
  return(answer)
}


#' describeBy
#'
#' @param x = data
#' @param group = NULL par defaut
#' @param mat = FALSE par defaut
#' @param type = 3 par defaut
#' @param digits = 15 apr defaut, nombre de decimales affichees
#' @param ... = other arguments
#'
#' @export
#'

describeBy <- function(x, group = NULL, mat = FALSE, type = 3, digits = 15, ...) {
  # data are x, grouping variable is group
  cl <- match.call()
  if (is.null(group)) {
    answer <- describe(x, type = type)
    warning("no grouping variable requested")
  } else {
    if (!is.data.frame(group) && !is.list(group) && (length(group) < NROW(x))) {
      group <- x[, group]
    }
    answer <- by(x, group, describe, type = type, ...)
    class(answer) <- c("gftools", "describeBy") # probably better not to make of class psych (at least not yet)
  }

  if (mat) {
    ncol <- length(answer[[1]]) # the more complicated case. How to reorder a list of data.frames
    # the interesting problem is treating the case of multiple grouping variables.
    n.var <- NROW(answer[[1]])
    n.col <- NCOL(answer[[1]])
    n.grouping <- length(dim(answer)) # this is the case of multiple grouping variables
    n.groups <- prod(dim(answer))
    names <- names(answer[[1]])
    row.names <- attr(answer[[1]], "row.names")
    dim.names <- attr(answer, "dimnames")

    mat.ans <- matrix(NaN, ncol = ncol, nrow = n.var * n.groups)
    labels.ans <- matrix(NaN, ncol = n.grouping + 1, nrow = n.var * n.groups)
    colnames(labels.ans) <- c("item", paste("group", 1:n.grouping, sep = ""))
    colnames(mat.ans) <- colnames(answer[[1]])
    rn <- 1:(n.var * n.groups)
    k <- 1
    labels.ans[, 1] <- seq(1, (n.var * n.groups))
    group.scale <- cumprod(c(1, dim(answer)))

    for (var in 1:(n.var * n.groups)) {
      for (group in 1:n.grouping) {
        groupi <- ((trunc((var - 1) / group.scale[group])) %% dim(answer)[group]) + 1
        labels.ans[var, group + 1] <- dim.names[[group]][[groupi]]
      }
    }

    k <- 1
    for (var in 1:n.var) {
      for (group in 1:n.groups) {
        rn[k] <- paste(row.names[var], group, sep = "")
        # mat.ans[k,1] <- group
        for (stat in 1:n.col) {
          if (!is.null(answer[[group]][[stat]][var])) {
            mat.ans[k, stat] <- round(answer[[group]][[stat]][var], digits)
          } else {
            mat.ans[k, stat] <- NA
          }
        }
        k <- k + 1
      }
    }
    answer <- data.frame(labels.ans, mat.ans)
    rownames(answer) <- rn
  }
  return(answer)
}
pobsteta/gftools documentation built on March 28, 2020, 8:25 p.m.