R/densityBy.r

"violin"  <- function(x,data=NULL,var=NULL,grp=NULL,grp.name=NULL, xlab=NULL, ylab=NULL,main="Density plot",vertical=TRUE,
   dots=FALSE, rain=FALSE,jitter=.05,
	alpha=1,errors=FALSE,eyes=TRUE,adjust=1,
	restrict=TRUE,xlim=NULL,add=FALSE,col=NULL,pch=20,scale=NULL, ...) {#calls violinBy which is now defaulg

violinBy(x=x,data=data,var=var,grp=grp,grp.name=grp.name,xlab=xlab,ylab=ylab,main=main,vertical=vertical,
      dots=dots, rain=rain, jitter=jitter,
      alpha=alpha, errors=errors,eyes=eyes,adjust=adjust,restrict=restrict,xlim=xlim,
      add=add,col=col,pch=pch,scale=scale,...)
}

#switched from density = 50 to alpha =.5  to speed up the plotting 
"violinBy"  <- function(x,var=NULL,grp=NULL,data=NULL,grp.name=NULL,xlab=NULL, ylab=NULL, main="Density plot",vertical=TRUE,
    dots=FALSE,rain=FALSE, jitter=.05,alpha= 1,errors=FALSE,eyes=TRUE,adjust=1,
    restrict=TRUE,xlim=NULL,add=FALSE,col=NULL,pch=20,scale=NULL, ...) {
   if(is.null(scale)) SCALE=.3  #how wide are the plots?

count.valid <- function(x) {sum(!is.na(x)) }

if(is.null(col)) {col <- c("blue","red","grey","purple","green","yellow")}  #default colors

 formula <- FALSE
   if(inherits(x, "formula")) {  ps <- fparse(x)
   	formula <- TRUE
   if(is.null(data)) {data <- get(ps$y) 
      var <- NULL #we specified the entire dataframe} else { #stop("You must specify the data if you are using formula input") 
     x <- data} else {x <- data[,ps$y,drop=FALSE] }  #added the drop = FALSE 11/28/20
   grp <- data[ps$x]
   if(is.null(xlab)) xlab  <- ps$x 
   names <- ps$x
   grp.n <- ps$x
   if(is.null(ylab)) ylab <- ps$y
   }
  if(!formula) {ps <- list(x = grp, y = x)  #convert from x, grp   to y ~ x format 
     if(is.null(data))  {data <- x
       
       if(!is.null(var))  {data <- data[,var, drop=FALSE]}
        var <-  ps$y <- colnames(data)
        if(is.null(var))  var <- paste("V",1:NCOL(data))
    #    colnames(x) <- colnames(data) <- ps$y  <- var}
        colnames(data) <- ps$y  <- var}
        if(!is.null(grp)) {x <- cbind(data[,ps$y,drop=FALSE], x[,grp,drop=FALSE])} else {
     x <- data[,ps$y,drop=FALSE] }
     data <- x 
     if(is.null(ylab)) ylab = "Observed "
     if(is.null(xlab))  xlab = "" 
    }  
       if(!is.null(grp)) { 
    if(!is.data.frame(grp) && !is.list(grp) && (length(grp) < NROW(x))) grp <- x[,grp,drop=FALSE]}

 if(!is.null(var)) {if(missing(ylab) & (length(var) ==1)) {ylab <- var}
          x <- x[,var , drop = FALSE]}
 nvar <- nvarg <- NCOL(x)
 x <- char2numeric(x)
 
 col <- adjustcolor(col,alpha.f =alpha)        
#if(!is.null(grp)) {
# if(!is.data.frame(grp) && !is.list(grp) && (length(grp) < NROW(x))) grp <- x[,grp]

 if(!is.null(grp)) {stats <- describeBy(data[,ps$y],group = data[,ps$x],quant=c(0,1,.5,.25,.75),mat=TRUE,skew=FALSE) } else { stats <- describe(data[,ps$y,drop=FALSE],quant=c(0,1,.5,.25,.75),skew=FALSE)}
  meanX <- stats[,"mean"]
  minx <- stats[,"min"]
  maxx <- stats[,"max"]
  medx <- stats[,"Q0.5"]
   Q25 <- stats[,"Q0.25"]
  Q75  <- stats[,"Q0.75"]
  nvarg <- NROW(stats)
  n.col <- length(col)
  nX <- stats[,"n"]
  col <- rep(col,nvarg) #too many, but this will do

#  Qnt <-  apply(x,2,function(xx) by(xx,grp,quantile,prob=c(0,1,.5,.25,.75),na.rm=TRUE))
# meanX <- apply(x,2,function(xx) by(xx,grp,mean,na.rm=TRUE))
# nX <- apply(x,2,function(xx) by(xx,grp,count.valid))
# meanX <- matrix(unlist(meanX))
#    Qnt <- matrix(unlist(Qnt),nrow=5)
#    ngrp <- ncol(Qnt)/nvar
#    nvarg <- ncol(Qnt)
#    rangex <- matrix(c(Qnt[1,],Qnt[2,]),nrow=2,byrow=TRUE)
# } else {Qnt <- apply(x,2,quantile,prob=c(0,1,.5,.25,.75),na.rm=TRUE)
# meanX <- apply(x,2,mean,na.rm=TRUE)}
# minx <- Qnt[1,]
# maxx <- Qnt[2,]
# medx <- Qnt[3,]
# Q25 <-  Qnt[4,]
# Q75 <-  Qnt[5,]
# #rangex <- apply(x,2,range,na.rm=TRUE)
# rangex <- matrix(c(Qnt[1,],Qnt[2,]),nrow=2,byrow=TRUE)

tot.n.obs <- nrow(x)

                                                        
names <- grp.name  
if(is.null(grp.name) & !is.null(grp)) { grp.name <- paste(ps$x[1],stats[,"group1"])
names <- grp.name  } else {if(length(grp.name) != NROW(stats)) names <- rownames(stats)}

# names <- rep(grp.name, length(ps$x)) 
# #if(length(ps$x) > 1) ) ) {names <- paste(rep(names,each=ngrp),grp.name[1:ngrp],sep=" ")} else {names <- grp.name}
# if(length(ps$y) > 1) names <- rep(names,length(ps$x))
#      col <- rep(col,nvar* ngrp)
d <- list(nvar)
if(is.null(xlim)) xlim <- c(.5,nvarg+.5)

for (i in 1:nvar) {
#if(!is.null(grp)) { if(restrict) {d[[i]] <- by(x[,i], grp ,function(xx) density(xx,na.rm=TRUE,from=rangex[1,i],to=rangex[2,i]))}
if(!is.null(grp)) { if(restrict) {d[[i]] <- by(x[,i], grp ,function(xx) density(xx,na.rm=TRUE,adjust=adjust,from=min(xx,na.rm=TRUE),to=max(xx,na.rm=TRUE)))} else {
d[[i]] <- by(x[,i], grp ,function(xx) density(xx,na.rm=TRUE)) }} else {
if(restrict) {d[[i]] <- density(x[,i],na.rm=TRUE,adjust=adjust,from=minx[i],to=maxx[i])} else {
d[[i]] <- density(x[,i],na.rm=TRUE)} }
}

if(!add) {if(vertical){plot(1:nvarg,meanX,ylim=c(min(minx),max(maxx)),xlim=xlim,axes=FALSE,xlab=xlab,ylab=ylab,main=main,pch=pch,...)
  axis(1,1:nvarg,names,...)
  axis(2,...)} else {plot(meanX,1:nvarg,xlim=c(min(minx),max(maxx)),ylim=c(.5,nvarg+.5),axes=FALSE,xlab=ylab,ylab=xlab,main=main,pch=pch,...)
      axis(1,...)
     axis(2, 1:nvarg,names,...)}
  box()}
if(!is.null(grp)) d <- unlist(d,recursive=FALSE)
rev <- (length(d[[1]]$y):1) #this prevents a line down the middle


for(i in 1:nvarg) {
if(!is.null(scale)) {width <- scale*sqrt(nX[[i]]/tot.n.obs)/max(d[[i]]$y)} else {width <- SCALE/max(d[[i]]$y)}
#polygon(width*c(-d[[i]]$y,d[[i]]$y[rev])+i,c(d[[i]]$x,d[[i]]$x[rev]),density=density,col=col[i],...)
#if(vertical) {polygon(width*c(-d[[i]]$y,  d[[i]]$y[rev])+i,  c(d[[i]]$x,  d[[i]]$x[rev]),col=col[i ],...)} else {
 #        polygon(c(d[[i]]$x,  d[[i]]$x[rev]), width* c(-d[[i]]$y,  d[[i]]$y[rev])+i,col=col[i],...)}
zero <- rep(0,length(d[[i]]$x))


if(rain) {if(vertical) {polygon(width*c(d[[i]]$y,  zero)+i,c( d[[i]]$x,d[[i]]$x[rev]  ),col=col[i ],...)} else {
         polygon(c(d[[i]]$x,  d[[i]]$x[rev]), width* c(d[[i]]$y,  zero)+i,col=col[i],...)}} else {
         #not rain
     if(vertical) {polygon(width*c(-d[[i]]$y,  d[[i]]$y[rev])+i,  c(d[[i]]$x,  d[[i]]$x[rev]),col=col[i ],...)} else {
                   polygon(c(d[[i]]$x,  d[[i]]$x[rev]), width* c(-d[[i]]$y,  d[[i]]$y[rev])+i,col=col[i],...)}}
         
  # if(vertical) {polygon(width*c(-d[[i]]$y,  d[[i]]$y[rev])+i,  c(d[[i]]$x,  d[[i]]$x[rev]+i),col=col[i ],...)} else {
   #      polygon(c(d[[i]]$x,  d[[i]]$x[rev]), width* c(-d[[i]]$y,  d[[i]]$y[rev])+i,col=col[i],...)}}
         
#draw the quartiles and  medians        
dmd <- max(which(d[[i]]$x <= medx[i]))
d25 <- max(which(d[[i]]$x <= Q25[i]))
d75 <- max(which(d[[i]]$x <= Q75[i]))

if(vertical) {
segments(x0=width*d[[i]]$y[dmd] +i ,y0=d[[i]]$x[dmd],x1=-(1-rain)*width*d[[i]]$y[dmd]+i,y1=d[[i]]$x[dmd],lwd=2)
segments(x0=width*d[[i]]$y[d25] +i ,y0=d[[i]]$x[d25],x1=-(1-rain)*width*d[[i]]$y[d25]+i,y1=d[[i]]$x[d25])
segments(x0=width*d[[i]]$y[d75] +i ,y0=d[[i]]$x[d75],x1=-(1-rain)*width*d[[i]]$y[d75]+i,y1=d[[i]]$x[d75])
  } else {
 
  segments(y0=width*d[[i]]$y[dmd] +i ,x0=d[[i]]$x[dmd],y1=(rain-1)*width*d[[i]]$y[dmd]+i,x1=d[[i]]$x[dmd],lwd=2)
segments(y0=width*d[[i]]$y[d25] +i ,x0=d[[i]]$x[d25],y1=(rain-1)*width*d[[i]]$y[d25]+i,x1=d[[i]]$x[d25])
segments(y0=width*d[[i]]$y[d75] +i ,x0=d[[i]]$x[d75],y1=(rain-1)*width*d[[i]]$y[d75]+i,x1=d[[i]]$x[d75])
  }
 } 

 
 if(errors) {   #currently only works for one DV and one IV

 if(vertical) {
    error.bars.by(x[,1],group=grp[,1],add=TRUE,lines=FALSE,eyes=eyes,v.labels=names,...) 

 } else {
           message("error bars are not implemented for horizontal plots")}
 }
 

if(dots) { if(!is.null(grp)) {
if(jitter >0) {if(length(ps$x) > 1 ) {stripchart(x[,ps$y]~ grp[,ps$x[1] ] +grp[,ps$x[2]] ,vertical=vertical,method="jitter",jitter=jitter,add=TRUE,pch=pch,...)} else {
        stripchart(x[,ps$y]~ grp[,ps$x[1] ] ,vertical=vertical,method="jitter",jitter=jitter,add=TRUE,pch=pch,...) }} else {
  if(length(ps$x) > 1 ) {stripchart(x[,ps$y]~ grp[,ps$x[1]]+ grp[,ps$x[,2]],vertical=vertical,add=TRUE,pch=pch,...)} else {
       {stripchart(x[,ps$y]~ grp[,ps$x[1]],vertical=vertical,add=TRUE,pch=pch,...)} 
       }}
       } else {
       if(jitter >0) {stripchart(x ,vertical=vertical,method="jitter",jitter=jitter,add=TRUE,pch=pch,...)} else {
        stripchart(x ,vertical=vertical,method="jitter",jitter=jitter,add=TRUE,pch=pch,...) }} 
 
                   }
                   
if(rain)  {
for(i in 1:nvar) {if(!is.null(grp)) {
  
 tempat <- 1:length(table(grp)) -.1 + (i-1)*nvar
if(jitter >0) {if(length(ps$x) > 1 ) {stripchart(x[,ps$y[i]]~ grp[,ps$x[1] ] +grp[,ps$x[2]] ,vertical=vertical,method="jitter",jitter=jitter,add=TRUE,pch=pch, at=tempat,...)} else {
      
        stripchart(x[,ps$y[i]]~ grp[,ps$x[1] ] ,vertical=vertical,method="jitter",jitter=jitter,add=TRUE,pch=pch,at=tempat, ...) }} else {
  if(length(ps$x) > 1 ) {stripchart(x[,ps$y[i]]~ grp[,ps$x[1]]+ grp[,ps$x[,2]],vertical=vertical,add=TRUE,pch=pch,...)} else {
       {stripchart(x[,ps$y[i]]~ grp[,ps$x[1]],vertical=vertical,add=TRUE,pch=pch,...)} 
       }}
       }  else {
        tempat <- 1:nvar -.1 + (i-1)*nvar
       if(jitter >0) {stripchart(x ,vertical=vertical,method="jitter",jitter=jitter,add=TRUE,pch=pch,at=tempat,...)} else {
        stripchart(x ,vertical=vertical,method="jitter",jitter=jitter,add=TRUE,pch=pch,at=tempat,...) }} 
 
                   }
       
       }
       }
        
#created March 10, 2014 following a discussion of the advantage of showing distributional values
#modified March 22, 2014 to add the grouping variable
#basically just a violin plot.
#modified December, 2016 to allow for scaling of the widths of the plots by sample size. 
#modified  December 2020 to provide stripcharts and errorbar options

# 
# "histBy" <- function(x,grp=NULL,data=NULL,restrict=TRUE,xlim=NULL,ylab="Observed",xlab="",main="Density plot",density=20,scale=TRUE,col= c("blue","red"),...) {
# count.valid <- function(x) {sum(!is.na(x)) }
# formula <- FALSE
#    if(inherits(x, "formula")) {  ps <- fparse(x)
#    formula <- TRUE
#    if(is.null(data)) stop("You must specify the data if you are using formula input") 
#      x <- data[ps$y]
#    grp <- data[ps$x]
#    }
# if(
#(col)) {col <- c("blue","red")}
# if(restrict) {minx <- min(x,na.rm=TRUE)
#    maxx <- max(x,na.rm=TRUE)}
# x <- as.matrix(x,drop=FALSE)
# meanX <- apply(x,2,mean,na.rm=TRUE)
# nX <- apply(x,2,function(xx) by(xx,grp,count.valid))
# 
# if(!is.null(grp)) { if(restrict) {d <- by(x[,1], grp ,function(xx) density(xx,na.rm=TRUE,from=min(xx,na.rm=TRUE),to=max(xx,na.rm=TRUE)))} else {
# d <- by(x[,1], grp ,function(xx) density(xx,na.rm=TRUE)) }} else {
# if(restrict) {d <- density(x[,1],na.rm=TRUE,from=minx,to=maxx)} else {
# d <- density(x[,1],na.rm=TRUE)} }
# d <- unlist(d,recursive=FALSE)
# maxy <- max(d[[1]]$y,d[[2]]$y,na.rm=TRUE)
# plot(NA,ylim=c(0,maxy),xlim=xlim,axes=FALSE,xlab=xlab,ylab=ylab,main=main,pch=pch,...)
#   axis(1,1:nvarg,names,...)
#   axis(2,...)
#   box()
# if(!is.null(scale)) {width <- scale*sqrt(nX[[i]]/tot.n.obs)/max(d[[i]]$y)} else {width <- SCALE/max(d[[i]]$y)}
# polygon(width*c(-d[[i]]$x,d[[i]]$x[rev])+i,c(d[[i]]$y,d[[i]]$y[rev]),density=density,col=col[i],...)
# }


#11/28/17
#Improved 09/02/20

 "densityBy" <- function(x,var=NULL,grp=NULL,data=NULL,freq=FALSE,col=c("blue","red","black"),alpha=.5,adjust=1,ylim=NULL,xlim=NULL,xlab="Variable", ylab="Density",main="Density Plot",legend=NULL) {
 formula <- FALSE
   if(inherits(x, "formula")) {  ps <- fparse(x)
   formula <- TRUE
   if(is.null(data)) stop("You must specify the data if you are using formula input") 
     var <- ps$y
   grp <- ps$x
    x <- data
   
   }
   

 n.grp <- length(table(x[grp]))
 if(length(col) < n.grp) col <- rainbow(n.grp)
if(!is.null(var)) x <- x[,c(var,grp),drop=FALSE]
 x <- char2numeric(x)
 col <- adjustcolor(col,alpha.f =alpha)
 if(missing(main) && freq) main="Frequency Plot"
 if(missing(ylab) && freq) ylab <- "N * density"
 if(missing(xlab) && (length(var) ==1)) xlab <- var
 if(!is.null(grp)) { 
    if(!is.data.frame(grp) && !is.list(grp) && (length(grp) < NROW(x))) grp <- x[,grp,drop=FALSE]
   d <- by(x[,var],grp,function(xx) density(xx, adjust=adjust,na.rm=TRUE))
   quants <-by(x[,var],grp,function(xx) quantile(xx,c(.25,.5,.75),na.rm=TRUE))
   maxiy <- rep(NA,length(d))
   rangex <- matrix(NA, ncol=2,nrow= length(d))
   for(i in 1:length(d) ) {if(freq){ maxiy[i] <- max(d[[i]]$n *d[[i]]$y)} else { 
                   maxiy[i] <- max(d[[i]]$y)}
                   rangex[i,] <- range(d[[i]]$x)
                   }
   maxy <- max(maxiy)
   ranges <- range(rangex)
   if(missing(ylim)) ylim <- c(0,maxy)
   if(missing(xlim)) xlim <- ranges
   plot(d[[1]],ylim=ylim,xlim=xlim,main=main,ylab=ylab,xlab=xlab ) 
   
   for (i in 1:length(d)) {
   if(freq) {scal <- d[[i]]$n
      d[[i]]$y <- d[[i]]$y*scal} 
   polygon(d[[i]] ,col=col[i])

   #find the y value corresponding to x median
   medy <- max(which(d[[i]]$x <= quants[[i]][2]))
   segments(x0 = quants[[i]][2],y0=0,x1=quants[[i]][2],y1=d[[i]]$y[medy],  lty= i)
   }
 } else {
 d <- density(x[,var],na.rm=TRUE,adjust=adjust)
   plot(d,main=main,ylab=ylab,xlab=xlab )
    polygon(d,col=col[1])
 }
 if(!missing(legend)) {
 location <- c("topleft","topright","top","left","right")
  grp.names <- paste(names(grp),names(table(grp)))
 leg.text <- grp.names
 legend(location[legend],legend=leg.text,col=c(1:n.grp),fill=c(1:n.grp), lty=c(1:n.grp))

 }
 invisible(d)
  }

Try the psych package in your browser

Any scripts or data that you put into this service are public.

psych documentation built on Sept. 26, 2023, 1:06 a.m.