#added 1/11/14
#modified 12/29/14 to handle cases with non-numeric data
#for fast descriptions
describeData <-
function (x, head = 4, tail = 4)
{
valid <- function(x) {
sum(!is.na(x))
}
nvar <- ncol(x)
all.numeric <- nvar
ans <- matrix(NA,nrow=nvar,ncol=2)
nobs <- nrow(x)
cc <- 0
cc <- try(complete.cases(x),silent=TRUE)
if(class(cc) == "try-error") cc <- NA
cc <- sum(cc,na.rm=TRUE)
for (i in 1:nvar) {
if (is.numeric(x[,i])) {ans[i,2] <- 1 } else {
if ((is.factor(x[,i])) || (is.logical(x[,i]))) {
ans[i,2] <- 2
} else {
if (is.character(x[,i])) {
ans[i,2] <- 3
} else {ans[i,2] <- 4}
}
}
ans[i,1] <- valid(x[,i])
}
if (is.numeric(unlist(x))) {
all.numeric <- TRUE
}
else {
all.numeric <- FALSE
}
H1 <- t(x[1:head,1:nvar])
T1 <- t(x[(nobs-tail+1):nobs,1:nvar])
temp <- data.frame(V=1:nvar,ans,H1,T1)
colnames(temp) <- c("variable #", "n.obs", "type", paste("H",
1:head, sep = ""), paste("T", 1:tail, sep = ""))
rownames(temp)[temp[,"type"]!=1] <- paste(rownames(temp)[temp[,"type"]!=1],"*",sep="")
result <- (list(n.obs = nobs, nvar = nvar, all.numeric = all.numeric,
complete.cases = cc, variables = temp))
class(result) <- c("psych", "describeData")
return(result)
}
#changed October 12, 2011 to use apply because mean and sd are deprecated for data.frames
#modified January 10, 2014 to add the check option to improve speed. A few other improvements
#modified December 2014 to add the fast option for large data sets
#modified May 21, 2015 to allow non-numeric data to be described (but with a warning)
#further modified June 21, 2016 to allow for character input as well as well reporting quantiles
#tried to improve the speed by using multicores, but this requires using s or lapply which don't do what I need.
"describe" <-
function (x,na.rm=TRUE,interp=FALSE,skew=TRUE,ranges=TRUE,trim=.1,type=3,check=TRUE,fast=NULL,quant=NULL,IQR=FALSE) #basic stats after dropping non-numeric data
#slightly faster if we don't do skews
{
cl <- match.call()
#first, define a local function
valid <- function(x) {sum(!is.na(x))}
if(!na.rm) x <- na.omit(x) #just complete cases
if(is.null(fast)) {
if (prod(dim(x)) > 10^7) {fast <- TRUE } else {fast <- FALSE}} #the default is to use fast for large data sets
if(fast) {skew <- FALSE
}
numstats <- 10 + length(quant) + IQR
if ( is.null(dim(x)[2])) { #do it for vectors or
len <- 1
nvar <- 1
stats = matrix(rep(NA,numstats),ncol=numstats) #create a temporary array
stats[1, 1] <- valid(x )
stats[1, 2] <- mean(x, na.rm=na.rm )
stats[1,10] <- sd(x,na.rm=na.rm)
if(interp) {stats[1, 3] <- interp.median(x,na.rm=na.rm ) } else {stats[1,3] <- median(x,na.rm=na.rm) }
stats[1,9] <- mean(x,na.rm=na.rm, trim=trim)
stats[1, 4] <- min(x, na.rm=na.rm )
stats[1, 5] <- max(x, na.rm=na.rm )
stats[1, 6] <- skew(x,na.rm=na.rm,type=type )
stats[1,7] <- mad(x,na.rm=na.rm)
stats[1,8] <- kurtosi(x,na.rm=na.rm,type=type)
vars <- 1
if(!is.null(quant)) { Qnt <- quantile(x,prob=quant,na.rm=TRUE)
stats[1,(IQR+11):numstats] <- t(Qnt)}
if(IQR) {Quart <- t(quantile(x,prob=c(.25,.75),na.rm=TRUE))
Iqr <- Quart[,2] -Quart[,1]
stats[1,11] <- Iqr
}
rownames(stats) <- "X1"
} else {
nvar <- ncol(x)
stats = matrix(rep(NA,nvar*numstats),ncol=numstats) #create a temporary array
if(is.null(colnames(x))) colnames(x) <- paste0("X",1:ncol(x))
rownames(stats) <- colnames(x)
stats[,1] <- apply(x,2,valid)
vars <- c(1:nvar)
##adapted from the pairs function to convert logical or categorical to numeric
if(!is.matrix(x) && check) { #does not work for matrices
for(i in 1:nvar) {
if(!is.numeric(x[[i]] )) {
if(fast) {x[[i]] <- NA} else {
if(is.factor(unlist(x[[i]])) | is.character(unlist(x[[i]]))) { x[[i]] <- as.numeric(x[[i]])
# if(is.factor(unlist(x[[i]]))) { #fixed 5/21/15
# x[[i]] <- as.numeric(x[[i]])}
} else {x[[i]] <- NA} }
rownames(stats)[i] <- paste(rownames(stats)[i],"*",sep="")}
} }
x <- as.matrix(x)
if(!is.numeric(x)) {message("Converted non-numeric matrix input to numeric. Are you sure you wanted to do this. Please check your data")
x <- matrix(as.numeric(x),ncol=nvar)
rownames(stats) <- paste0(rownames(stats),"*")}
stats[,2] <- apply(x, 2,mean,na.rm=na.rm )
stats[,10] <- apply(x,2,sd,na.rm=na.rm)
if (skew) {stats[, 6] <- skew(x,na.rm=na.rm,type=type )
stats[,8] <- kurtosi(x,na.rm=na.rm,type=type)}
if(ranges) {
if(fast) {
stats[,4] <- apply(x,2,min,na.rm=na.rm)
stats[,5] <- apply(x,2,max,na.rm = na.rm)
} else {
stats[, 4] <- apply(x,2,min, na.rm=na.rm )
stats[, 5] <- apply(x,2,max, na.rm=na.rm )
stats[,7] <- apply(x,2,mad, na.rm=na.rm)
stats[,9] <- apply(x,2, mean,na.rm=na.rm,trim=trim)
if(interp) {stats[, 3] <- apply(x,2,interp.median,na.rm=na.rm ) } else {stats[,3] <- apply(x,2,median,na.rm=na.rm) }
}}
if(!is.null(quant)) { Qnt <- apply(x,2,quantile,prob=quant,na.rm=TRUE)
stats[,(IQR+11):numstats] <- t(Qnt)}
if(IQR) {Quart <- t(apply(x,2,quantile,prob=c(.25,.75),na.rm=TRUE))
Iqr <- Quart[,2] - Quart[,1]
stats[,11] <- Iqr
}
} #end of maxtrix input
#now summarize the results
if (numstats > (10 + IQR)) {
colnames(stats)[(11+IQR):numstats] <- paste0("Q",quant[1:length(quant)])}
#the following output was cleaned up on June 22, 2016 added the quantile information.
#the various options are ranges, skew, fast, numstats > 10
if(fast) { answer <- data.frame(vars=vars,n = stats[,1],mean=stats[,2], sd = stats[,10],se=stats[,10]/sqrt(stats[,1])) } #minimal case
#if((!skew) && ranges) {answer <- data.frame(vars=vars,n = stats[,1],mean=stats[,2], sd = stats[,10],min= stats[,4],max=stats[,5], range=stats[,5]-stats[,4],se=stats[,10]/sqrt(stats[,1])) }
if(skew) {
if(ranges) { answer <- data.frame(vars=vars,n = stats[,1],mean=stats[,2], sd = stats[,10], median = stats[, 3],trimmed =stats[,9], mad = stats[,7], min= stats[,4],max=stats[,5],
range=stats[,5]-stats[,4],skew = stats[, 6], kurtosis = stats[,8],se=stats[,10]/sqrt(stats[,1])) } else {
answer <- data.frame(vars=vars,n = stats[,1],mean=stats[,2], sd = stats[,10],skew = stats[, 6], kurtosis = stats[,8],se=stats[,10]/sqrt(stats[,1])) }
} else {if(ranges) {answer <- data.frame(vars=vars,n = stats[,1],mean=stats[,2], sd = stats[,10],min= stats[,4],max=stats[,5], range=stats[,5]-stats[,4],se=stats[,10]/sqrt(stats[,1])) } else {
answer <- data.frame(vars=vars,n = stats[,1],mean=stats[,2], sd = stats[,10],se=stats[,10]/sqrt(stats[,1])) }
}
if(IQR) answer <- data.frame(answer,IQR=stats[,11])
if (numstats > (10+ IQR)) {if(nvar > 1 ) {answer <- data.frame(answer, stats[,(IQR+11):numstats]) #add the quantile information
} else {
answer <- data.frame(answer, t(stats[,(IQR+11):numstats])) }
}
# {if (ranges) {
# if(skew){
#
#
# if(numstats > 10) { answer <- data.frame(vars=vars,n = stats[,1],mean=stats[,2], sd = stats[,10], median = stats[, 3],trimmed =stats[,9], mad = stats[,7], min= stats[,4],max=stats[,5],
# range=stats[,5]-stats[,4],skew = stats[, 6], kurtosis = stats[,8], se=stats[,10]/sqrt(stats[,1]), stats[,(11:numstats)])} else {
# answer <- data.frame(vars=vars,n = stats[,1],mean=stats[,2], sd = stats[,10], median = stats[, 3],trimmed =stats[,9], mad = stats[,7], min= stats[,4],max=stats[,5],
# range=stats[,5]-stats[,4],skew = stats[, 6], kurtosis = stats[,8],se=stats[,10]/sqrt(stats[,1]))} #the typical (maximum) case
# } else {
# if(!fast) {
# if(numstats > 10) {
# answer <- data.frame(vars=vars,n = stats[,1],mean=stats[,2], sd = stats[,10], median = stats[,3],trimmed =stats[,9],
# mad = stats[,7],min= stats[,4],max=stats[,5], range=stats[,5]-stats[,4],se=stats[,10]/sqrt(stats[,1])) #somewhat shorter
# } else {answer <- data.frame(vars=vars,n = stats[,1],mean=stats[,2], sd = stats[,10],min= stats[,4],max=stats[,5], range=stats[,5]-stats[,4],se=stats[,10]/sqrt(stats[,1])) #even shorter
# }
# }} else {
# if(skew){answer <- data.frame(vars,n = stats[,1],mean=stats[,2], sd =stats[,10],skew = stats[, 6], kurtosis = stats[,8],se=stats[,10]/sqrt(stats[,1]))} else {
# answer <- data.frame(vars=vars,n = stats[,1],mean=stats[,2], sd = stats[,10],se=stats[,10]/sqrt(stats[,1]))}} #the minimal case -- fixed 1/2/2014
# }
# answer <-data.frame(var=vars,temp, se = temp$sd/sqrt(temp$n)) #replaced with forming the answer in the if statements 10/1/2014 to improve memory management
class(answer) <- c("psych","describe","data.frame")
return(answer)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.