R/scatter.R

Defines functions polygon.fill polygon.vertex bin scatter.hexbin scatter.sunflower scatter.heat scatter

Documented in bin scatter

#' Advanced scatter plots
#' 
#' Various functions that display bi-variate data in different ways. The
#' default presents the number of points falling into pixles of a 2D grid.
#' 
#' The 'heat' type produces a heat map type display whereas 'sunflower' and
#' 'hexbin' present alternative infographics. Scatter also takes arguments
#' similar to plot such as 'xlim', 'ylim', 'main', 'sub', 'xlab', and 'ylab.
#' 
#' @param x x-variable
#' @param y y-variable
#' @param type Type of scatter cplot \code{c('heat','sunflower','hexbin'}
#' @param verbose TRUE for diagnosing the internals of the function.
#' @param breaks ('heat' option) Set the colour scaling.
#' @param ignorezero ('heat' option - default = TRUE) Zeros are blank.
#' @param log ('heat' option - default = FALSE) Use logarithmic colour scaling.
#' @param dig ('heat' option) Resolution in number of decimal points/digits.
#' @param fig ('heat' option) Figure region for the colour bar.
#' @return Graphics and visualisation only.
#'
#' @author Kajsa M. Parding and Rasmus E. Benestad
#' @seealso \code{\link{vis}},\code{\link{map}},\code{\link{plot}}
#'
#' @importFrom graphics symbols lines grid par image
#'
#' @keywords graphics infographics
#' @examples
#' 
#' scatter(rnorm(10000),rnorm(10000),dig=1,log=TRUE)
#' scatter(rnorm(10000),rnorm(10000),type='sunflower',petalsize=7,dx=NULL,dy=NULL,
#'                               xgrid=NULL,ygrid=NULL,xlim=NULL,ylim=NULL,
#'                               xlab=NULL,ylab=NULL,main=NULL,leg=TRUE,rotate=TRUE,
#'                               alpha=0.6,leg.loc=2,new=TRUE, verbose=FALSE)
#' scatter(rnorm(10000),rnorm(10000),type='hexbin',new=TRUE,Nmax=NULL,
#'                            dx=NULL,dy=NULL,xgrid=NULL,ygrid=NULL,
#'                            xlim=NULL,ylim=NULL,
#'                            xlab=NULL,ylab=NULL,main=NULL,
#'                            leg=TRUE,col='blue',border='white',
#'                            colmap='heat.colors',
#'                            scale.col=TRUE,scale.size=FALSE, verbose=FALSE)
#' 
#' @export scatter
scatter <- function(x,y,type='heat', verbose=FALSE,...) {
  if (verbose) print(match.call())
  if (tolower(type)=='heat') scatter.heat(x,y,verbose=verbose,...)
  if (tolower(type)=='sunflower') scatter.sunflower(x,y,verbose=verbose,...)
  if (tolower(type)=='hexbin') scatter.hexbin(x,y,verbose=verbose,...)
}

scatter.heat <- function(x,y,xlim=NULL,ylim=NULL,breaks=NULL,main='Scatter',
                         xlab='',ylab='',sub='',
                         ignorezero=TRUE,col=NULL,log=FALSE,dig=NULL,
                         fig=NULL, fig.legend=NULL, show.legend=FALSE,
                         new=FALSE,add=FALSE,verbose=FALSE) {
  if (verbose) print('scatter.heat')
  if (is.null(dig)) dig <- max(c(-2*log(max(c(x,y,na.rm=TRUE)))/log(10),0),na.rm=TRUE)
  if (verbose) print(paste(dig,'digits. Max:',max(c(x,y,na.rm=TRUE))))
  txy <- table(round(x,dig),round(y,dig))
  if (ignorezero) txy[txy==0] <- NA
  if (log) txy <- log(txy)/log(10)
  
  if (is.null(breaks) & is.null(col)) {
    breaks <- pretty(as.numeric(txy))
    col <- heat.colors(length(breaks)-1)
  }
  if (is.null(breaks)) {
    breaks <- pretty(as.numeric(txy),n=length(col)-1)
    if (length(breaks) != length(col)-1)
    breaks <- seq(min(as.numeric(txy),na.rm=TRUE), max(as.numeric(txy),na.rm=TRUE),length=length(col)+1)
  }
  if (is.null(col)) col <- heat.colors(length(breaks) - 1)
  if (verbose) print(c(length(col),length(breaks)))
  if(is.null(xlim)) xlim <- range(x,na.rm=TRUE) + 0.1*c(-1,1)*diff(range(x,na.rm=TRUE))
  if(is.null(ylim)) ylim <- range(y,na.rm=TRUE) + 0.1*c(-1,1)*diff(range(y,na.rm=TRUE))
  if(new) dev.new()
  par0 <- par()
  if(!is.null(fig)) par(fig=fig, new=add) else fig <- par()$fig
  par(bty='n', xpd=FALSE)
  image(as.numeric(rownames(txy)),as.numeric(colnames(txy)),txy,breaks=breaks,
        xlim=xlim,ylim=ylim,main=main,xlab=xlab,ylab=ylab,sub=sub,col=col)
  grid()
  lines(c(0,0.6),c(0,0.6),lty=2)
  if(show.legend) {
    if(is.null(fig.legend)) {
      fig.legend <- c(fig[1] + diff(fig[1:2])*0.65, 
                      fig[1] + diff(fig[1:2])*0.85,
                      fig[3] + diff(fig[3:4])*0.22,
                      fig[3] + diff(fig[3:4])*0.32)
    }
    par(fig=fig.legend,yaxt='n',yaxt='n',mar=c(2,0,0,0),new=TRUE,
        cex.axis=0.75,cex.lab=0.75,col.axis='white')
    image(breaks,1:2,cbind(c(breaks),c(breaks)),breaks=breaks,
          col=heat.colors(length(breaks)-1),add=TRUE)
    par(xaxt='s',col.axis='grey')
    if (log) {
      axis(1,at=breaks[seq(1,length(breaks),by=2)],
           labels=paste0('10^',breaks[seq(1,length(breaks),by=2)]),cex=0.75) 
    } else {
      axis(1,at=breaks[seq(1,length(breaks),by=2)],
           labels=as.character(breaks[seq(1,length(breaks),by=2)]),cex=0.75)
    }
    par(fig=fig, yaxt='s',yaxt='s',col.axis='black',new=TRUE)
  }
  par(bty=par0$bty, xpd=par0$xpd, yaxt=par0$yaxt, yaxt=par0$yaxt, 
      col.axis=par0$col.axis)
}

scatter.sunflower <- function(x,y,petalsize=7,dx=NULL,dy=NULL,
                              xgrid=NULL,ygrid=NULL,xlim=NULL,ylim=NULL,
                              xlab=NULL,ylab=NULL,main=NULL,leg=TRUE,rotate=TRUE,
                              alpha=0.6,leg.loc=2,fig=NULL,add=FALSE,
                              new=TRUE, verbose=FALSE) {
  
  stopifnot(is.numeric(x) & is.numeric(y) & length(x)==length(y))
  i <- !(is.na(x) | is.na(y))
  x <- x[i]; y <- y[i]
  
  # Define grid
  if (is.null(dx) & length(xgrid)<=2) dx <- (max(x)-min(x))/20
  if (is.null(dy) & length(ygrid)<=2) dy <- (max(y)-min(y))/20
  
  if (is.null(xgrid)) {
    xgrid <- seq(min(x),max(x),dx*(1+sin(pi/6)))
  } else if (length(xgrid)==2) {
    xgrid <- seq(min(xgrid),max(xgrid),dx*(1+sin(pi/6)))
  } else if (length(xgrid)>2) {
    dx <- mean(xgrid[2:length(xgrid)]-xgrid[1:(length(xgrid)-1)])/(1+sin(pi/6))
  }
  
  if (is.null(ygrid)) {
    ygrid <- seq(min(y),max(y),2*dy*cos(pi/6))
  } else if (length(ygrid)==2) {
    ygrid <- seq(min(ygrid),max(ygrid),2*dy*cos(pi/6))
  } else if (length(ygrid)>2) {
    dy <- mean(ygrid[2:length(ygrid)]-ygrid[1:(length(ygrid)-1)])/(2*cos(pi/6))
  }
  
  Y <- replicate(length(xgrid),ygrid)
  X <- t(replicate(length(ygrid),xgrid))
  fn <- function(x) {
    dx <- x[2:length(x)]-x[1:(length(x)-1)]
    x[1:(length(x)-1)] <- x[1:(length(x)-1)]+dx/2
    x[length(x)] <- x[length(x)]+dx[length(dx)]/2
    return(x)
  }
  Y[,seq(2,dim(Y)[2],2)] <- apply(Y[,seq(2,dim(Y)[2],2)],2,fn)
  
  # Count observations in each grid point
  XYN <- bin(x,y,X,Y)
  X <- XYN[,1]; Y <- XYN[,2]; N <- XYN[,3]
  
  # Define stuff for plot
  dx <- 0.9*dx; dy <- dy*0.9
  if (is.null(xlim)) xlim <- c(min(x)-dx/2,max(x)+dx/2)
  if (is.null(ylim)) ylim <- c(min(y)-dy/2,max(y)+dy/2)
  xr <- 0.8*dx
  yr <- 0.8*dy
  n <- length(X)
  
  # Generate figure
  if(new) dev.new()
  par0 <- par()
  if(!is.null(fig)) par(fig=fig, new=add)
  par(bty='n', xpd=FALSE)
  plot(X,Y,xlim=xlim,ylim=ylim,xlab=xlab,ylab=ylab,main=main,type='n')
  
  # Grid points with 1 observation
  if (any(N==1)) symbols(X[N==1],Y[N==1],
                         circles=rep(1,sum(N==1)),fg='blue',bg=FALSE,
                         inches=xr/(max(xlim)-min(xlim)),add=TRUE)
  
  # Grid points with few observations
  if (any(N>1) & any(N<petalsize*2)) {
    i.multi <- (1L:n)[( N>1 & N<petalsize*2 )]
    # Plot hexagons
    mapply(polygon.fill,X[i.multi],Y[i.multi],dx,dy,
           col=adjustcolor('khaki1',alpha.f=alpha),
           border=adjustcolor('khaki3',alpha.f=alpha),n=6)
    # Draw sunflowers
    i.rep <- rep.int(i.multi, N[i.multi])
    z <- numeric()
    for(i in i.multi)
      z <- c(z, 1L:N[i] + if(rotate) stats::runif(1) else 0)
    deg <- (2 * pi * z)/N[i.rep]
    segments(X[i.rep], Y[i.rep],
             X[i.rep] + xr * sin(deg),
             Y[i.rep] + yr * cos(deg),
             col="orange", lwd=1.5)
  }
  
  # Grid points with many observations
  if (any(N>=(petalsize*2))) {
    N2 <- floor(N/petalsize)
    i.multi <- (1L:n)[( N2>1 )]
    # Plot hexagons
    mapply(polygon.fill,X[i.multi],Y[i.multi],dx,dy,
           col=adjustcolor('coral',alpha.f=alpha),
           border=adjustcolor('coral2',alpha.f=alpha),n=6)
    # Draw sunflowers
    i.rep <- rep.int(i.multi, N2[i.multi])
    z <- numeric()
    for(i in i.multi)
      z <- c(z, 1L:N2[i] + if(rotate) stats::runif(1) else 0)
    deg <- (2 * pi * z)/N2[i.rep]
    segments(X[i.rep], Y[i.rep],
             X[i.rep] + xr * sin(deg),
             Y[i.rep] + yr * cos(deg),
             col="tomato4", lwd=1.5)
  }
  
  # Legend
  if (leg) {
    
    dx <- (max(xlim)-min(xlim))/20
    dy <- (max(ylim)-min(ylim))/20
    
    if (any( leg.loc %in% c(1,'upper right'))) {
      xy <- polygon.vertex(
        max(xlim)-3.2*dx,max(ylim)-dy/2,5.3*dx,dy*1.5,n=4,rot=pi/4)
    } else if (any( leg.loc %in% c(2,'upper left',NULL))) {
      xy <- polygon.vertex(
        min(xlim)+3.2*dx,max(ylim)-dy/2,5.3*dx,dy*1.5,n=4,rot=pi/4)
    } else if (any( leg.loc %in% c(3,'lower left'))) {
      xy <- polygon.vertex(
        min(xlim)+3.2*dx,min(ylim)+dy/2,5.3*dx,dy*1.5,n=4,rot=pi/4)
    } else if (any( leg.loc %in% c(4,'lower right'))) {
      xy <- polygon.vertex(
        max(xlim)-3.2*dx,min(ylim)+dy/2,5.3*dx,dy*1.5,n=4,rot=pi/4)
    }
    
    polygon(xy[,1],xy[,2],col='white',border='gray')
    polygon.fill(xy[2,1]+dx,xy[2,2]-dy*0.6,dx*0.35,dy*0.35,
                 col=adjustcolor('khaki1',alpha.f=alpha),
                 border=adjustcolor('khaki3',alpha.f=alpha),n=6)
    polygon.fill(xy[3,1]+dx,xy[3,2]+dy*0.6,dx*0.35,dy*0.35,
                 col=adjustcolor('coral',alpha.f=alpha),
                 border=adjustcolor('coral2',alpha.f=alpha),n=6)
    points(xy[2,1]+dx,xy[2,2]-dy*0.6,pch=3,col="orange",lwd=1)
    points(xy[3,1]+dx,xy[3,2]+dy*0.6,pch=3,col="tomato4",lwd=1)
    text(xy[2,1]+dx*1.5,xy[2,2]-dy*0.7,'1 petal = 1 obs',pos=4)
    text(xy[3,1]+dx*1.5,xy[3,2]+dy*0.5,paste('1 petal = ',
                                             as.character(petalsize),' obs'),pos=4)
  }
  par(bty=par0$bty, xpd=par0$xpd)
}

# Binned scatterplot with hexagons
scatter.hexbin <- function(x, y, new=TRUE, Nmax=NULL,
                           dx=NULL, dy=NULL, xgrid=NULL, ygrid=NULL,
                           xlim=NULL, ylim=NULL,
                           xlab=NULL, ylab=NULL, main=NULL,
                           leg=TRUE, col='blue', border='white',
                           colmap='gray.colors', fig=NULL, add=FALSE,
                           scale.col=TRUE, scale.size=FALSE, 
                           add.loess=FALSE, span=2/3, degree=1, evaluation=50,
                           family=c("symmetric", "gaussian"), lpars=list(),
                           verbose=FALSE) {
  
  if(verbose) print("scatter.hexbin")
  stopifnot(is.numeric(x) & is.numeric(y) & length(x)==length(y))
  i <- !(is.na(x) | is.na(y))
  x <- x[i]; y <- y[i]
  # Define grid
  if (is.null(dx) & length(xgrid)<=2) dx <- (max(x)-min(x))/20
  if (is.null(dy) & length(ygrid)<=2) dy <- (max(y)-min(y))/20
  
  if (is.null(xgrid)) {
    xgrid <- seq(min(x),max(x),dx*(1+sin(pi/6)))
  } else if (length(xgrid)==2) {
    xgrid <- seq(min(xgrid),max(xgrid),dx*(1+sin(pi/6)))
  } else if (length(xgrid)>2) {
    dx <- mean(xgrid[2:length(xgrid)]-xgrid[1:(length(xgrid)-1)])/(1+sin(pi/6))
  }
  
  if (is.null(ygrid)) {
    ygrid <- seq(min(y),max(y),2*dy*cos(pi/6))
  } else if (length(ygrid)==2) {
    ygrid <- seq(min(ygrid),max(ygrid),2*dy*cos(pi/6))
  } else if (length(ygrid)>2) {
    dy <- mean(ygrid[2:length(ygrid)]-ygrid[1:(length(ygrid)-1)])/(2*cos(pi/6))
  }
  
  Y <- replicate(length(xgrid),ygrid)
  X <- t(replicate(length(ygrid),xgrid))
  fn <- function(x) {
    dx <- x[2:length(x)]-x[1:(length(x)-1)]
    x[1:(length(x)-1)] <- x[1:(length(x)-1)]+dx/2
    x[length(x)] <- x[length(x)]+dx[length(dx)]/2
    return(x)
  }
  Y[,seq(2,dim(Y)[2],2)] <- apply(Y[,seq(2,dim(Y)[2],2)],2,fn)
  
  # Count observations in each grid point
  XYN <- bin(x,y,X,Y)
  X <- XYN[,1]; Y <- XYN[,2]; N <- XYN[,3]
  if(is.null(Nmax)) Nmax <- max(N)
  Nf <- sapply(N/Nmax,function(x) 0.2+0.8*min(1,x))
  X <- X[N>0]; Y <- Y[N>0]
  Nf <- Nf[N>0]; N <- N[N>0]
  
  # Plot
  if (scale.col) {
    colorscale <- colscal(n=10,pal=colmap)[seq(10,1,-1)]
    col <- colorscale[round(Nf*10)]
    border <- colorscale[round(Nf*10)]
  }
  if (scale.size) {
    dX <- dx*Nf
    dY <- dy*Nf
  } else {
    dX <- dx
    dY <- dy
  }
  if (is.null(xlim)) xlim <- c(min(X)-dx,max(X)+dx)
  if (is.null(ylim)) ylim <- c(min(Y)-dy,max(Y)+dy)
  if(new) dev.new()
  #if(new) plot(x,y,xlim=xlim,ylim=ylim,xlab=xlab,ylab=ylab,main=main,type='n')
  par0 <- par()
  if(!is.null(fig)) par(fig=fig, new=add)
  par(bty='n', xpd=FALSE)
  plot(x,y,xlim=xlim,ylim=ylim,xlab=xlab,ylab=ylab,main=main,type='n')
  mapply(polygon.fill,X,Y,dX,dY,n=6,col=col,border=border)
  # Legend
  if (leg) {
    dn <- round(Nmax/7)
    if (dn>5 & dn<50) { dn <- round(dn/5)*5 
    } else if (dn>50) { dn <- signif(dn,digits=(nchar(dn)-1))}
    nticks <- seq(Nmax,1,-dn)
    if (length(dX)>1) {
      szticks <- sapply(nticks/Nmax,function(x) 0.2+0.8*min(1,x))
    } else {
      szticks <- rep(1,length(nticks))
    }
    if (length(col)>1) {
      cticks <- colorscale[round(nticks/Nmax*10)]
      bticks <- colorscale[round(nticks/Nmax*10)]
    } else {
      cticks <- rep(col,length(nticks))
      bticks <- rep(border,length(nticks))  
    }              
    x0 <- max(xlim)
    y0 <- max(ylim)
    dy0 <- max(szticks)*dy*2.1
    dx0 <- diff(range(xlim))/10 + max(szticks)*dy/2
    text(x0+dx0, y0+dy0/2+diff(range(ylim))/20,"count")
    polygon.fill(x0+dx0,y0,dx*szticks[1],dy*szticks[1],
                 n=6,col=cticks[1],border=bticks[1])
    if (max(N)>Nmax) {
      text(x0+dx0+max(szticks)*dx,y0,
           paste(">=",as.character(nticks[1])),pos=4)
           #paste("\u2265",as.character(nticks[1])),pos=4)
    } else {
      text(x0+dx0+max(szticks)*dx,y0,as.character(nticks[1]),pos=4)
    }
    
    ivec <- 2:10
    j <- 1
    for (i in ivec) {
      polygon.fill(x0+dx0,y0-j*dy0,
                   dx*szticks[i],dy*szticks[i],
                   n=6,col=cticks[i],border=bticks[i])
      text(x0+dx0+max(szticks)*dx,y0-j*dy0,
           as.character(nticks[i]),pos=4)
      j <- j+1
    }
  }
  
  if(add.loess) {
    pred <- loess.smooth(x, y, span, degree, family, evaluation)
    do.call(lines, c(list(pred), lpars))
  }
  par(bty=par0$bty, xpd=par0$xpd)
}

# Count observations (x,y) in grid points (X,Y)
bin <- function(x,y,X,Y) {
  fn <- function(x,y) {
    d <- sqrt( (X-x)**2 + (Y-y)**2 )
    imin <- which(d==min(d),arr.ind=TRUE)
    if (length(imin)>2) imin <- imin[1,]
    invisible(imin)
  }
  ibin <- mapply(fn,x,y)
  N <- matrix(rep(0,nrow(X)*ncol(X)),nrow(X),ncol(X))
  for (k in 1:(dim(ibin)[2])) {
    i <- ibin[1,k]
    j <- ibin[2,k]
    N[i,j] <- N[i,j]+1
  }
  N <- array(N,length(N))
  X <- array(X,length(X))
  Y <- array(Y,length(Y))
  invisible(cbind(X,Y,N))
}

# Vertices of polygon with n sides, width dx, height dy, center (x0,y0)
polygon.vertex <- function(x0,y0,dx,dy=NULL,n=6,rot=0) {
  if (is.null(dy)) dy <- dx
  if (!findInterval(rot,c(-2*pi,2*pi))) rot <- 0
  i <- seq(0,n-1,1)
  x <- x0 + dx*cos(2*pi*i/n + rot)
  y <- y0 + dy*sin(2*pi*i/n + rot)
  invisible(cbind(x,y))
}

# Plot polygon defined by polygon.vertex
polygon.fill <- function(x0,y0,dx,dy,n=6,col='white',border='black',rot=0) {
  xy <- polygon.vertex(x0,y0,dx,dy,n=n,rot=rot)
  polygon(xy[,1],xy[,2],col=col,border=border)
}
metno/esd documentation built on April 29, 2024, 3:34 p.m.