Nothing
#Added 2/10/24
#This is a parellel version of describe --much faster
#just calls the old describe and parallelizes it
describe <- function(x,na.rm=TRUE,interp=FALSE,skew=TRUE,ranges=TRUE,trim=.1,type=3,check=TRUE,fast=NULL,quant=NULL,
IQR=FALSE,omit=FALSE,data=NULL,size=50) {
if(inherits(x,"formula")) {ps <- fparse(x) #group was specified, call describeBy
if(missing(data)) {
x <- get(ps$y)
group <- x[,ps$x]} else {x <- data[ps$y]
group <- data[ps$x]
}
describeBy(x,group=group,na.rm=na.rm,interp=interp,skew=skew,ranges=ranges,trim=trim,type=type,check=check,fast=fast,quant=quant,IQR=IQR,omit=omit,data=data)
} else {
cl <- match.call()
#first, define a local function
valid <- function(x) {sum(!is.na(x))}
if(!na.rm) x <- na.omit(x) #just complete cases
if(is.null(fast)) { #don't reset fast inside describe.1 loop
if (prod(dim(x)) > 10^7) {fast <- TRUE } else {fast <- FALSE}
}
nvar <- NCOL(x)
v.names <- colnames(x)
if(nvar < size) {result <- describe.1(x=x,na.rm=na.rm, interp=interp, skew=skew, ranges=ranges,
trim=trim, type=type,check=check,fast=fast,quant=quant,
IQR = IQR, omit=omit, data=data)} else {
n.steps <- ceiling(nvar/size)
short <- function(i) {
loweri <- (i-1) * size + 1
upperi <- min(i * size,nvar)
res <- describe.1(x=x[,loweri:upperi],na.rm=na.rm, interp=interp, skew=skew, ranges=ranges,
trim=trim, type=type,check=check,fast=fast,quant=quant,
IQR = IQR, omit=omit, data=data)
return(res)
}
result <- mcmapply(short,c(1:n.steps))
n.result <- NROW(result)
names.result <- rownames(result)
temp <- NULL
for( i in 1:NCOL(result)) {tt <- matrix(unlist(result[,i]),ncol=n.result) #fix to be dynamic
# tt <- result[[i]]
temp <- rbind(temp,tt)}
# rownames(temp) <- colnames(x)
# colnames(temp) <- c("vars" , "n" , "mean", "sd", "median", "min" , "max", "range" , "se") #fix to be dynamic
colnames(temp) <- names.result
temp[,1 ] <-1:nvar
rownames(temp ) <- v.names
result <- as.data.frame(temp)}
class(result) <- c("psych","Pdescribe","data.frame")
return(result)
}
}
"describeFast" <- function(x) {
if(!inherits(x[1], "data.frame")) x <- fix.dplyr(x) #to get around a problem created by dplyr
nvar <- NCOL(x)
nobs <- NROW(x)
valid <- colSums(!is.na(x))
temp <- matrix(NA,nrow=nvar,ncol=4)
for(i in 1:nvar) {temp[i,1] <- is.numeric(x[1,i])
temp[i,2] <- is.factor(x[1,i])
temp[i,3] <- is.logical(x[1,i])
temp[i,4] <- is.character(x[1,i]) }
ttt <- which(temp[,1:4] == TRUE,arr.ind=TRUE)
if(nvar > 1) {
ttt <- dfOrder(ttt,"row")}
temp <- cbind(temp,ttt["col"])
colnames(temp) <- c("numeric","factor","logical","character","type")
cc <- try(complete.cases(x),silent=TRUE)
if(inherits(cc,"try-error")) cc <- NA
cc <- sum(cc,na.rm=TRUE)
all.numeric <- sum(temp[,1])
all.factor <- sum(temp[,2])
result.df <- data.frame(var=1:nvar,n.obs=valid,temp)
result<- list(nvar=nvar,n.obs =nobs,complete.cases = cc,numeric=all.numeric,factors=all.factor,result.df=result.df)
class(result) <- c("psych","describeFast")
return(result)
}
#added 1/11/14
#modified 12/29/14 to handle cases with non-numeric data
#for fast descriptions
describeData <-
function (x, head = 4, tail = 4)
{
valid <- function(x) {
sum(!is.na(x))
}
nvar <- ncol(x)
all.numeric <- nvar
ans <- matrix(NA,nrow=nvar,ncol=2)
nobs <- nrow(x)
cc <- 0
cc <- try(complete.cases(x),silent=TRUE)
if(inherits(cc, "try-error")) cc <- NA
cc <- sum(cc,na.rm=TRUE)
for (i in 1:nvar) {
if (is.numeric(x[,i])) {ans[i,2] <- 1 } else {
if ((is.factor(x[,i])) || (is.logical(x[,i]))) {
ans[i,2] <- 2
} else {
if (is.character(x[,i])) {
ans[i,2] <- 3
} else {ans[i,2] <- 4}
}
}
ans[i,1] <- valid(x[,i])
}
if (is.numeric(unlist(x))) {
all.numeric <- TRUE
}
else {
all.numeric <- FALSE
}
H1 <- t(x[1:head,1:nvar])
T1 <- t(x[(nobs-tail+1):nobs,1:nvar])
temp <- data.frame(V=1:nvar,ans,H1,T1)
colnames(temp) <- c("variable #", "n.obs", "type", paste("H",
1:head, sep = ""), paste("T", 1:tail, sep = ""))
rownames(temp)[temp[,"type"]!=1] <- paste(rownames(temp)[temp[,"type"]!=1],"*",sep="")
result <- (list(n.obs = nobs, nvar = nvar, all.numeric = all.numeric,
complete.cases = cc, variables = temp))
class(result) <- c("psych", "describeData")
return(result)
}
#changed October 12, 2011 to use apply because mean and sd are deprecated for data.frames
#modified January 10, 2014 to add the check option to improve speed. A few other improvements
#modified December 2014 to add the fast option for large data sets
#modified May 21, 2015 to allow non-numeric data to be described (but with a warning)
#further modified June 21, 2016 to allow for character input as well as well reporting quantiles
#tried to improve the speed by using multicores, but this requires using s or lapply which don't do what I need.
#Added the as.factor to the convert to numeric for non-numeric data
#Improved July 9, 2020 to allow formula input for grouping variables
#Further improved Sept 02, 2020 to allow formula input with data specified to match other formula features
#include median with fast output (has a very small slowing effect)
"describe.1" <-
function (x,na.rm=TRUE,interp=FALSE,skew=TRUE,ranges=TRUE,trim=.1,type=3,check=TRUE,fast=NULL,
quant=NULL,IQR=FALSE,omit=FALSE,data=NULL) #basic stats after dropping non-numeric data
#slightly faster if we don't do skews
{
if(inherits(x,"formula")) {ps <- fparse(x) #group was specified, call describeBy
if(missing(data)) {
x <- get(ps$y)
group <- x[,ps$x]} else {x <- data[ps$y]
group <- data[ps$x]
}
describeBy(x,group=group,na.rm=na.rm,interp=interp,skew=skew,ranges=ranges,trim=trim,type=type,check=check,fast=fast,quant=quant,IQR=IQR,omit=omit,data=data)
} else {
cl <- match.call()
#first, define a local function
valid <- function(x) {sum(!is.na(x))}
if(!na.rm) x <- na.omit(x) #just complete cases
#if(is.null(fast)) { #don't reset fast inside describe.1 loop
# if (prod(dim(x)) > 10^7) {fast <- TRUE } else {fast <- FALSE}} #the default is to use fast for large data sets
# if(fast) {skew <- FALSE
# }
numstats <- 10 + length(quant) + IQR
cn <- "X1"
if ( NCOL(x) < 2) {if(is.data.frame(x)) { #
if( !is.numeric(x[,1])) {warning ("You were trying to describe a non-numeric data.frame or vector which describe converted to numeric.")
x[,1] <- as.numeric(as.factor(x[,]))
}
if(!is.null(colnames(x))) {cn <- colnames(x)} else {cn <- "X1"}
x <- x[,1] } #getting around the problem of single column data frames
#do it for vectors or
len <- 1
nvar <- 1
stats = matrix(rep(NA,numstats),ncol=numstats) #create a temporary array
stats[1, 1] <- valid(x )
stats[1, 2] <- mean(x, na.rm=na.rm )
stats[1,10] <- sd(x,na.rm=na.rm)
if(interp) {stats[1, 3] <- interp.median(x,na.rm=na.rm ) } else {stats[1,3] <- median(x,na.rm=na.rm) }
stats[1,9] <- mean(x,na.rm=na.rm, trim=trim)
stats[1, 4] <- min(x, na.rm=na.rm )
stats[1, 5] <- max(x, na.rm=na.rm )
stats[1, 6] <- skew(x,na.rm=na.rm,type=type )
stats[1,7] <- mad(x,na.rm=na.rm)
stats[1,8] <- kurtosi(x,na.rm=na.rm,type=type)
vars <- 1
if(!is.null(quant)) { Qnt <- quantile(x,prob=quant,na.rm=TRUE)
stats[1,(IQR+11):numstats] <- t(Qnt)}
if(IQR) {Quart <- t(quantile(x,prob=c(.25,.75),na.rm=TRUE))
Iqr <- Quart[,2] -Quart[,1]
stats[1,11] <- Iqr
}
rownames(stats) <- cn
} else {
nvar <- ncol(x)
stats = matrix(rep(NA,nvar*numstats),ncol=numstats) #create a temporary array
if(is.null(colnames(x))) colnames(x) <- paste0("X",1:ncol(x))
rownames(stats) <- colnames(x)
stats[,1] <- apply(x,2,valid)
vars <- c(1:nvar)
##adapted from the pairs function to convert logical or categorical to numeric
select <- 1:nvar
if(!is.matrix(x) && check) { #does not work for matrices
for(i in 1:nvar) {
if(!is.numeric(x[[i]] )) {
if(fast) {x[[i]] <- NA} else {
if(omit) {select[i] <- NA}
if(is.factor(unlist(x[[i]])) | is.character(unlist(x[[i]]))) { x[[i]] <- as.numeric(as.factor(x[[i]]))
rownames(stats)[i] <- paste(rownames(stats)[i],"*",sep="")
} else {x[[i]] <- NA}
}
}
}
}
select <- select[!is.na(select)]
x <- as.matrix(x[,select])
vars <- vars[select]
stats <- stats[select,]
if(!is.numeric(x)) {message("Converted non-numeric matrix input to numeric. Are you sure you wanted to do this. Please check your data")
x <- matrix(as.numeric(x),ncol=nvar)
rownames(stats) <- paste0(rownames(stats),"*")}
stats[,2] <- apply(x, 2,mean,na.rm=na.rm )
stats[,10] <- apply(x,2,sd,na.rm=na.rm)
if (skew) {stats[, 6] <- skew(x,na.rm=na.rm,type=type )
stats[,8] <- kurtosi(x,na.rm=na.rm,type=type)}
if(ranges) {
if(fast) {
stats[,4] <- apply(x,2,min,na.rm=na.rm)
stats[,5] <- apply(x,2,max,na.rm = na.rm)
if(interp) {stats[, 3] <- apply(x,2,interp.median,na.rm=na.rm ) } else {stats[,3] <- apply(x,2,median,na.rm=na.rm) }
} else {
stats[, 4] <- apply(x,2,min, na.rm=na.rm )
stats[, 5] <- apply(x,2,max, na.rm=na.rm )
stats[,7] <- apply(x,2,mad, na.rm=na.rm)
stats[,9] <- apply(x,2, mean,na.rm=na.rm,trim=trim)
if(interp) {stats[, 3] <- apply(x,2,interp.median,na.rm=na.rm ) } else {stats[,3] <- apply(x,2,median,na.rm=na.rm) }
}}
if(!is.null(quant)) { Qnt <- apply(x,2,quantile,prob=quant,na.rm=TRUE)
stats[,(IQR+11):numstats] <- t(Qnt)}
if(IQR) {Quart <- t(apply(x,2,quantile,prob=c(.25,.75),na.rm=TRUE))
Iqr <- Quart[,2] - Quart[,1]
stats[,11] <- Iqr
}
} #end of maxtrix input
#now summarize the results
if (numstats > (10 + IQR)) {
colnames(stats)[(11+IQR):numstats] <- paste0("Q",quant[1:length(quant)])}
#the following output was cleaned up on June 22, 2016 added the quantile information.
#the various options are ranges, skew, fast, numstats > 10
if(fast) { answer <- data.frame(vars=vars,n = stats[,1],mean=stats[,2], sd = stats[,10],median=stats[,3],se=stats[,10]/sqrt(stats[,1])) } #minimal case
#if((!skew) && ranges) {answer <- data.frame(vars=vars,n = stats[,1],mean=stats[,2], sd = stats[,10],min= stats[,4],max=stats[,5], range=stats[,5]-stats[,4],se=stats[,10]/sqrt(stats[,1])) }
if(skew) {
if(ranges) {if(fast) { answer <- data.frame(vars=vars,n = stats[,1],mean=stats[,2], sd = stats[,10], median = stats[, 3], min= stats[,4],max=stats[,5],
range=stats[,5]-stats[,4],skew = stats[, 6], kurtosis = stats[,8],se=stats[,10]/sqrt(stats[,1])) } else {
answer <- data.frame(vars=vars,n = stats[,1],mean=stats[,2], sd = stats[,10], median = stats[, 3],trimmed =stats[,9], mad = stats[,7], min= stats[,4],max=stats[,5],
range=stats[,5]-stats[,4],skew = stats[, 6], kurtosis = stats[,8],se=stats[,10]/sqrt(stats[,1])) }
} else {
answer <- data.frame(vars=vars,n = stats[,1],mean=stats[,2], sd = stats[,10],skew = stats[, 6],
kurtosis = stats[,8],se=stats[,10]/sqrt(stats[,1])) }
} else {if(ranges) {answer <- data.frame(vars=vars,n = stats[,1],mean=stats[,2], sd = stats[,10],median=stats[,3],
min= stats[,4],max=stats[,5], range=stats[,5]-stats[,4],se=stats[,10]/sqrt(stats[,1])) } else {
answer <- data.frame(vars=vars,n = stats[,1],mean=stats[,2], sd = stats[,10],se=stats[,10]/sqrt(stats[,1])) }
}
if(IQR) answer <- data.frame(answer,IQR=stats[,11])
if (numstats > (10+ IQR)) {if(nvar > 1 ) {answer <- data.frame(answer, stats[,(IQR+11):numstats]) #add the quantile information
} else {
answer <- data.frame(answer, t(stats[,(IQR+11):numstats]))
rownames(answer) <- cn}
}
# {if (ranges) {
# if(skew){
#
#
# if(numstats > 10) { answer <- data.frame(vars=vars,n = stats[,1],mean=stats[,2], sd = stats[,10], median = stats[, 3],trimmed =stats[,9], mad = stats[,7], min= stats[,4],max=stats[,5],
# range=stats[,5]-stats[,4],skew = stats[, 6], kurtosis = stats[,8], se=stats[,10]/sqrt(stats[,1]), stats[,(11:numstats)])} else {
# answer <- data.frame(vars=vars,n = stats[,1],mean=stats[,2], sd = stats[,10], median = stats[, 3],trimmed =stats[,9], mad = stats[,7], min= stats[,4],max=stats[,5],
# range=stats[,5]-stats[,4],skew = stats[, 6], kurtosis = stats[,8],se=stats[,10]/sqrt(stats[,1]))} #the typical (maximum) case
# } else {
# if(!fast) {
# if(numstats > 10) {
# answer <- data.frame(vars=vars,n = stats[,1],mean=stats[,2], sd = stats[,10], median = stats[,3],trimmed =stats[,9],
# mad = stats[,7],min= stats[,4],max=stats[,5], range=stats[,5]-stats[,4],se=stats[,10]/sqrt(stats[,1])) #somewhat shorter
# } else {answer <- data.frame(vars=vars,n = stats[,1],mean=stats[,2], sd = stats[,10],min= stats[,4],max=stats[,5], range=stats[,5]-stats[,4],se=stats[,10]/sqrt(stats[,1])) #even shorter
# }
# }} else {
# if(skew){answer <- data.frame(vars,n = stats[,1],mean=stats[,2], sd =stats[,10],skew = stats[, 6], kurtosis = stats[,8],se=stats[,10]/sqrt(stats[,1]))} else {
# answer <- data.frame(vars=vars,n = stats[,1],mean=stats[,2], sd = stats[,10],se=stats[,10]/sqrt(stats[,1]))}} #the minimal case -- fixed 1/2/2014
# }
# answer <-data.frame(var=vars,temp, se = temp$sd/sqrt(temp$n)) #replaced with forming the answer in the if statements 10/1/2014 to improve memory management
class(answer) <- c("psych","Pdescribe","data.frame")
return(answer)
}
}
#minor patch on 9/10/23 for case of single variables with quants
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.