Nothing
"scatter.hist" <- "scatterHist" <-
function(x,y=NULL,smooth=TRUE,ab=FALSE, correl=TRUE,data=NULL, density=TRUE,means=TRUE, ellipse=TRUE,digits=2,method="pearson",cex.cor=1,cex.point=1,title="Scatter plot + density",
xlab=NULL,ylab=NULL,smoother=FALSE,nrpoints=0,xlab.hist=NULL,ylab.hist=NULL,grid=FALSE,xlim=NULL,ylim=NULL,x.breaks=11,y.breaks=11,
x.space=0,y.space=0,freq=TRUE,x.axes=TRUE,y.axes=TRUE,size=c(1,2),col=c("blue","red","black"),legend=NULL,alpha=.5,pch=21,
show.d=TRUE,
x.arrow=NULL,y.arrow=NULL,d.arrow=FALSE,cex.arrow=1,...) {
old.par <- par(no.readonly = TRUE) # save default
sb <- grp <- NULL
main <- title
if(inherits(x, "formula")) { ps <- fparse(x)
formula <- TRUE
if(is.null(data)) stop("You must specify the data if you are using formula input")
y <- ps$y
x <- ps$x
if(length(x) > 1) {
byGroup <- TRUE
freq <- FALSE
grp <- x[-1]
grp.name <- grp
grp <- data[,grp,drop=FALSE]
n.grp <- length(table(grp))
x <- x[1]
xy <-data[,c(x,y,grp.name),drop=FALSE]
sb <- statsBy(xy,group=grp.name,cors=TRUE)
} else {grp <- NULL
byGroup <- FALSE
Md <- NULL
}
if(is.null(xlab)) xlab <- x
if(is.null(ylab)) ylab <- y
x <- data[,x,drop=FALSE]
y <- data[,y,drop=FALSE]
}
col <- adjustcolor(col,alpha.f =alpha)
n.obs <- sum(!is.na(x))
if(missing(xlab)) {
if(!is.null(colnames(x))) {xlab=colnames(x)[1]
ylab=colnames(x)[2]} else {xlab="V1"
ylab="V2"} }
if (is.null(y)) {y <- x[,2]
x <- x[,1]} else {if(!is.null(dim(x))) {x <- x[,1,drop=TRUE]
# if(!is.null(colnames(y))) ylab <- colnames(y)
if(!is.null(dim(y))) {y <- y[,1,drop=TRUE]} }
}
if(missing(ylab)) { ylab <- colnames(y) }
if(is.null(grp)) grp <-1
#if((length(pch) ==1 )&&(pch ==".")) pch <- 45 #this is kludge because we add 1 to pch later
if(NROW(grp) > 1) { byGroup <- TRUE
dx <- by(x,grp,function(xx) density(xx, adjust=1,na.rm=TRUE))
dy <- by(y,grp,function(xx) density(xx, adjust=1,na.rm=TRUE)) #what does adjust do? I am copying this from my densityBy function
x.means <- by(x, grp, function (xx) mean(xx, na.rm=TRUE)) #could clean this up to use statsBy output
y.means <- by(y, grp, function(xx) mean(xx, na.rm=TRUE) )
x.sd <- by(x, grp, function (xx) sd(xx, na.rm=TRUE))
y.sd <- by(y, grp, function (xx) sd(xx, na.rm=TRUE))
x.n <-by(x,grp,function(xx) sum(!is.na(xx)))
y.n <-by(y,grp,function(xx) sum(!is.na(xx)))
x.d <- (x.means[2]-x.means[1])/sqrt(((x.sd[1]^2*(x.n[1]-1))+ x.sd[2]^2* (x.n[2]-1))/(x.n[1] + x.n[2])) #changed to n-1 7/29/21
y.d <- (y.means[2]-y.means[1])/sqrt(((y.sd[1]^2*(y.n[1]-1)+ y.sd[2]^2* (y.n[2]-2)))/(y.n[1] + y.n[2]))
R.inv <- solve(sb$rwg)
dist <- c(x.d,y.d)
Md <- sqrt(t(dist) %*% R.inv %*% dist)
if(show.d) {x.arrow=round(x.d,2)
y.arrow=round(y.d,2)
}
grp <- unlist(grp)
dx.max <- dy.max <- -9999
for(i in 1:length(dx) ) {
dx.max <- max(dx.max, dx[[i]]$y)
dy.max <- max(dy.max, dy[[i]]$y)}
} else {byGroup <- FALSE
x.means <- y.means <-x.d <- y.d<- stats<- NA #give them a value so we have something to report in the results
}
xrange <- range(x,na.rm=TRUE)
yrange <- range(y,na.rm=TRUE)
if(missing(xlim)) xlim <- xrange
if(missing(ylim)) ylim <- yrange
x.breaks <- seq(xlim[1],xlim[2],(xlim[2] - xlim[1])/x.breaks)
y.breaks <- seq(ylim[1],ylim[2],(ylim[2] - ylim[1])/y.breaks)
xhist <- hist(x,breaks=x.breaks,plot=FALSE)
yhist <- hist(y,breaks=y.breaks,plot=FALSE)
nf <- layout(matrix(c(2,4,1,3),2,2,byrow=TRUE), c(3,1), c(1,3), TRUE) #locations to plot the scatter plot
#first plot is in location 1
#nf <- layout(matrix(c(3,4,6,1, 2,5),2,3,byrow=TRUE),c(3,3,1),c(1,3)) #two x, 1 y
# nf <- layout(matrix(c(5,6,9,3,4,8,1, 2,7),3,3,byrow=TRUE),c(3,3,1),c(1,3,3)) #two x, 2 y
#layout.show(nf)
#the first figure is the plot
par(mar=c(5,4,1,1)) #bottom, left, top, right
if(smoother) {smoothScatter(x,y,nrpoints=nrpoints,xlim=xlim,ylim=ylim,xlab=xlab,ylab=ylab,...)} else {
if((length(pch) ==1 )&&(pch ==".")){plot(x,y,xlim=xlim,ylim=ylim,xlab=xlab,ylab=ylab,bg=col[grp],pch=pch)} else {
plot(x,y,xlim=xlim,ylim=ylim,xlab=xlab,ylab=ylab,col=col[grp],bg=col[grp],pch=pch+grp,cex=cex.point)} }
if(grid) grid()
if(ab) abline(lm(y~x))
if(smooth) {
ok <- is.finite(x) & is.finite(y)
if (any(ok))
# lines(stats::lowess(x[ok], y[ok], f = span, iter = iter), col = col.smooth, ...)
lines(stats::lowess(x[ok],y[ok]),col="red")}
if(ellipse) {
if(byGroup) { grp.values <- table(grp) #fix to work with multiple (>20 groups)
grp.names <- dimnames(grp.values)
ngrp <- length(grp.names$grp)
for (i in 1:ngrp) {x.grp <- x[grp==grp.names[[1]][[i]]]
y.grp <- y[grp==grp.names[[1]][[i]]]
#lowx <- x[grp==grp.names[[1]][[1]]]
#lowy <- y[grp==grp.names[[1]][[1]]]
#highx <- x[grp==grp.names[[1]][[2]]]
# highy <- y[grp==grp.names[[1]][[2]]]
ellipses(x.grp,y.grp,smooth=FALSE,add=TRUE,n=1,size=size,col=col[i],...)
}
if(d.arrow) dia.arrow(c(x.means[1],y.means[1]), c(x.means[2],y.means[2]),labels=round(Md,2),both=TRUE,cex=cex.arrow)
} else {
#do something (but how?)
# xy <- cbind(x,y,grp)
#temp <- by(xy,grp,function(xx) colMeans(xx,na.rm=TRUE))
#for (i in length(temp) ){
# points(temp[[i]][1:2],pch=25,cex=4)
ellipses(x,y,add=TRUE,size=size )}
}
if(!missing(legend) & byGroup) { #show the legend
location <- c("topleft","topright","top","left","right")
grp.names <- paste(grp.name,names(table(grp)))
n.grp <- length(grp.names)
leg.text <- grp.names
legend(location[legend],legend=leg.text,col=col[1:n.grp],fill=col[1:n.grp],pch=(pch+1:n.grp), lty=c(1:n.grp))
}
#this next figure is the density of the x axis
par(mar=c(.75,4,2,1)) #the left and right here should match the left and right from above (the location for the x density)
#now, if we have a grouping variable, then plot a barplot for each grp value
if(byGroup) {
plot(dx[[1]],main="",xlim=xrange,axes=FALSE,ylim =c(0,dx.max))
title(main,...)
for (i in 1:length(dx)) {
if(freq) {scal <- dx[[i]]$n
dx[[i]]$y <- dx[[i]]$y*scal}
polygon(dx[[i]] ,col=col[i])
#x.mean <- mean(dx[[i]]$x) #this is based upon the density, not the data
x.mean <- x.means[i]
y.mean <- mean(dx[[i]]$y[256]) #the median
y.mean <- dx[[i]]$y[which.max(dx[[i]]$x > x.means[i])]
segments(x.mean,0,x.mean,y.mean)
}
if(!is.null(x.arrow)) {
dia.arrow(c(x.means[1],.2*max(dx[[1]]$y)), c(x.means[2],.2*max(dx[[1]]$y)),labels=x.arrow,both=TRUE,cex=cex.arrow)}
} else {
if(freq) { mp <- barplot(xhist$counts, axes=x.axes, space=x.space,xlab=xlab.hist)} else { mp <- barplot(xhist$density, axes=x.axes, space=x.space,xlab=xlab.hist)}
#xhist <- hist(x,breaks=11,plot=TRUE,freq=FALSE,axes=FALSE,col="grey",main="",ylab="")
tryd <- try( d <- density(x,na.rm=TRUE,bw="nrd",adjust=1.2),silent=TRUE)
if(!inherits(tryd ,"try-error")) {
d$x <- (mp[length(mp)] - mp[1]+1) * (d$x - min(xhist$breaks))/(max(xhist$breaks)-min(xhist$breaks))
if(freq) d$y <- d$y * max(xhist$counts/xhist$density,na.rm=TRUE)
if(density) lines(d)}
}
# title(title,cex=cex.title) #doesn't actually do anything
#now show the y axis densities
par(mar=c(5,0.5,1,2)) #the location for the y density
if(byGroup) {
#if(freq) { mp <- barplot(xhist$counts, axes=x.axes, space=x.space,xlab=xlab.hist,plot=TRUE)} else { mp <- barplot(xhist$density, axes=x.axes, space=x.space,xlab=xlab.hist,plot=TRUE)}
plot(dy[[1]],main="",axes=FALSE,ylim=yrange,xlim = c(0,dy.max),xlab="Density")
for (i in 1:length(dy)) {
temp <- dy[[i]]$y #swap the x and y columns
dy[[i]]$y <- dy[[i]]$x
dy[[i]]$x <- temp
if(freq) {scal <- dy[[i]]$n
dy[[i]]$y <- dy[[i]]$y*scal}
polygon(dy[[i]] ,col=col[i])
x.mean <- y.means[i]
# x.mean <- mean(dy[[i]]$y)
y.mean <- dy[[i]]$x[which.max(dy[[i]]$y > y.means[i])] #this adjusts for the problem that means of density do not match real means
segments(0,x.mean,y.mean,x.mean)
}
if(!is.null(y.arrow)) {
dia.arrow(c(.2*max(dx[[1]]$y),y.means[1]), c(.2*max(dx[[1]]$y), y.means[2]),labels=y.arrow,both=TRUE,cex=cex.arrow)}
} else {
if(freq) {mp <- barplot(yhist$counts, axes=y.axes, space=y.space, horiz=TRUE,ylab=ylab.hist) } else {mp <- barplot(yhist$density, axes=y.axes, space=y.space, horiz=TRUE,ylab=ylab.hist)}
tryd <- try( d <- density(y,na.rm=TRUE,bw="nrd",adjust=1.2),silent=TRUE)
if(!inherits(tryd,"try-error")) {
temp <- d$y
d$y <- (mp[length(mp)] - mp[1]+1) * (d$x - min(yhist$breaks))/(max(yhist$breaks)-min(yhist$breaks))
d$x <- temp
if(freq) d$x <- d$x * max(yhist$counts/yhist$density,na.rm=TRUE)
if(density) lines(d)
}
}
#this is the upper right hand square -- show the correlation/Md
par(mar=c(1,1,1,1)) #the location for the correlations if we are not doing groups
if(correl) {
plot(1,1,type="n",axes=FALSE)
#plot(x,y)
med.x <- median(x,na.rm=TRUE)
med.y <- median(y,na.rm=TRUE)
if(missing(method)) method <- "pearson"
r = (cor(x, y,use="pairwise",method=method))
# txt <- format(c(r, 0.123456789), digits=digits)[1]
txt <- paste0("r = ", round(r,digits),"\n")
if(missing(cex.cor)) {cex <- 0.75/strwidth(txt)} else {cex <- cex.cor}
text(1,1, txt,cex=cex)
if(!is.null(sb))text(1,.8,paste0("r wg =",round(sb$rwg[1,2],2) ),cex=.8*cex)} else {
plot(1,1,type="n",axes=FALSE)
if(!is.null(Md) ) {
txt <- paste0("D = ",sprintf("%.2f",round(Md,2)),"\n")
if(missing(cex.cor)) {cex <- 0.75/strwidth(txt)} else {cex <- cex.cor}
text(1,1,txt,cex=cex)
#text(1,.8,paste0("r wg =",round(sb$rwg[1,2],2) ),cex=cex)
}
}
result <- list(x.means=x.means,y.means=y.means,x.d=x.d, y.d = y.d,stats=sb)
par(old.par)
invisible(result)
}
#version of March 7, 2011
#revised Sept 7, 2013 to include method option in cor
#revised October 3, 2020 to allow groups and to allow formula input
#revised June 6, 2021 to allow ellipses by groups
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.