sma: Statistical Microarray Analysis

###########################################################################
# Statistics for Microarray Analysis
# MA plots
#
# Date : March 19, 2001
#
# History:
#    May 17, 2001: Incorporate postscript fix from kadowaki@pharmadesign.co.jp
#    March 19, 2001: Some of the plot functions from Rarray.R.
#					
#
# Authors: Sandrine Dudoit and Yee Hwa (Jean) Yang.
##########################################################################

########################################################################/**
# \name{plot.mva}
# 
# \alias{plot.mva}
#   
# \title{M vs. A Plot}
# 
# \description{
# For a single slide, this function produces a scatter plot of log
# intensity ratios \eqn{M = log_2(R/G)} versus average log intensities
# \eqn{A = log_2 \sqrt{RG}}{A = log_2(R*G)/2}, where R and G represent
# the fluorescence intensities in the red and green channels
# respectively. 
# }
# 
# \usage{
# plot.mva(RG, layout, norm="p", pout=T, image.id=1, extra.type="tci",
# crit1=0.025,crit2=crit1, nclass=10, labs=NULL, plot.type="n",
# col.ex=NULL, ...) 
# }
# 
# \arguments{
#   \item{RG}{A list with at least 4 elements.  Each element of the list
#     being a matrix with p rows for p genes and n columns for n slides. 
#     The first element 'R' contains the raw red intensities,
#     the second element 'G' contains the raw green intensities,
#     the third element 'Rb' contains the background red intensities and
#     the 4th element 'Gb' contains the background green intensities.
#     This data structure can be generated by an interactive function
#     \code{\link{init.data}}.}  
# 
#   \item{layout}{a list specifying the dimensions of the spot matrix
#   and the grid matrix.  This can be generated by calling
#   \code{\link{init.grid}}.} 
# 
#   \item{norm}{character string, one of "n", "m", "l", "p" or "s".  This
#     argument  specifies the type of normalization method to be
#     performed: "n" no normalization between the 2 channels; "m"
#     \code{\link{median}} normalization, which sets the median of log
#     intensity ratios to zero; "l" global \code{\link{lowess}}
#     normalization; "p" print-tip group lowess normalization and "s"
#     scaled print-tip group lowess normalization.} 
#   
#   \item{pout}{if TRUE, an M vs. A plot will be produced.  Otherwise, the
#     function returns the normalized log intensity ratios M and the mean
#     log intensities A for each gene.} 
#   
#   \item{image.id}{integer value; the index of the slide which is considered.}
# 
#   \item{extra.type}{a character string, one of "t", "p", "tci","pci" or
#     "lci".  This argument specifies the type of plot to be drawn.  The
#     possible types are: \cr
#     * "t" for text, \cr
#     * "p" for points, \cr
#     * "tci" for highlighting a certain proportion of extreme `M' values
#     by text,\cr
#     * "pci" for highlighting a certain proportion of extreme `M' values
#     by points,\cr
#     * "lci" for including 2 intensity dependent lines where a 
#     prespecified proportion of points have more extreme `M' values. 
#   }
#   \item{crit1}{The number of points to be highlighted on the M vs A
#     plot.  If crit1 < 1, the crit1*100\% spots with the smallest M
#     values will be highlighted. If crit1 >= 1, the crit spots 
#    with the smallest M values are highlighted.} 
#  \item{crit2}{Similar to "crit1".   If crit2 < 1, the crit2*100\% spots
#    with the largest M values will be highlighted. If crit2 >= 1, the
#    crit2 spots with the smallest M values are highlighted.}
#  \item{nclass}{A single number giving the approximate number of
#    intensity dependent groups to consider.}
#  \item{labs}{one or more character strings or expressions specifying the
#    text to be written.  If this string is not specified, by
#    default the index of the vector `M' will be used.}
#  \item{plot.type}{a character string, this argument is either "n", "r"
#    or "b".  The different number of plots to be included are:\cr
#    * "n" for normalised M vs A plot, \cr
#    * "r" for unnormalised M vs A plot, and \cr 
#    * "b" both unnormalised and normalised M vs A plots.
#   }
#  \item{col.ex}{The colour used for the highlighting extreme points,
#  lines or text.} 
#  \item{\dots}{graphical parameters may also be supplied as arguments to the
#     function (see \code{\link{par}}).  }
# }
# 
# \value{A plot is created on the current graphics device.  The top
# plot is based on unnormalized log ratios and the bottom plot is
# based on normalized log ratios.} 
# 
# \details{M vs. A plots tend to be more revealing than their log R
# vs. log G counterparts in terms of identifying spot artifacts and
# detecting intensity dependent patterns in the log ratios. They are
# also very useful for normalization.} 
# 
# \references{S. Dudoit, Y. H. Yang, M. J. Callow, and T. P. Speed. Statistical
# methods for identifying differentially expressed genes in replicated
# cDNA microarray experiments (Statistics, UC Berkeley, Tech Report \#
# 578).}
# 
# \author{
#   Yee Hwa Yang, \email{yeehwa@stat.berkeley.edu} \cr
#   Sandrine Dudoit, \email{sandrine@stat.berkeley.edu} \cr
#   Natalie Roberts, \email{nroberts@wehi.edu.au}
# }
# 
# \seealso{\code{\link{ma.func}}, \code{\link{plot.smooth.line}},
# \code{\link{stat.ma}}, \code{\link{lowess}}, \code{\link{plot}}.} 
# 
# \examples{
# data(MouseArray)
# # mouse.setup <- init.grid()
# # mouse.data <- init.data() ## see \emph{init.data}
# mouse.lratio <- stat.ma(mouse.data, mouse.setup)
# 
# ## Look at the normalized second data sets in the list using points to
# ## highlight large positive or large negative ratios.
# plot.mva(mouse.data, mouse.setup, norm="l", 2, extra.type="pci",
# plot.type="n") 
# 
# ## Look at the both unnormalized and normalized first data sets in the
# ## list using text to highlight large positive or negative ratios.
# ## plot.mva(mouse.data, mouse.setup, norm="l", 2, extra.type="tci", plot.type="b") 
# }     
# 
# \keyword{microarray}
# 
#*/########################################################################

plot.mva <- function(x, layout, norm="p", pout=TRUE, image.id=1, extra.type="tci", crit1=0.025,crit2=crit1, nclass=10, labs=NULL, plot.type="n", col.ex=NULL, pch=".", ...)
{
#  RG <- x
  ma.func(R = x$R[,image.id], G=x$G[,image.id], Rb=x$Rb[,image.id], Gb = x$Gb[,image.id], layout=layout, norm=norm, pout=pout, extra.type=extra.type, crit1=crit1, crit2=crit2, nclass=nclass, labs=labs, plot.type=plot.type, col.ex=col.ex, pch=pch,...)
}

########################################################################/**
# \name{plot.smooth.line}
# 
# \alias{plot.smooth.line}
# 
# \title{Adding Lowess Lines to Current Plot}
# 
# \description{
#  This function adds a \code{\link{lowess}} line to the current
#  plot.  The  type of line can be specified as well as other
#  parameters.} 
# 
# \usage{
# plot.smooth.line(A, M, f=0.1, ...)
# }
# 
# \arguments{
#  \item{A}{a vector giving the x-coordinates of the points in the scatter
#           plot. In the microarray context, this could be a vector of
#           average log intensities.} 
# 
#  \item{M}{a vector giving the y-coordinates of the points in the scatter
#           plot. In the microarray context, this could be a vector of
#           log intensity ratios.} 
# 
#  \item{f}{the smoother span. This gives the proportion of points in the
#           plot which influence the smoothness at each value. Larger
# 	  values give greater smoothness. } 
# 
# \item{\dots}{graphical parameters may also be supplied as arguments
# to the function (see \code{\link{par}}).}        
# }
# 
# \value{
#  lines are added to the current plot.
# }
# 
# \note{An M vs A plot must be constructed \bold{prior} to the execution of this function.}
# 
# \references{ Chambers, J. M., Cleveland, W. S., Kleiner, B. and Tukey,
# P. A. (1983). Graphical Methods for Data Analysis. Wadsworth, Belmont,
# California. }
# 
# \seealso{ \code{\link{plot.mva}}, \code{\link{stat.ma}},
# \code{\link{lines}}, \code{\link{lowess}}, \code{\link{smooth}}. 
# }
# 
# \examples{
# data(MouseArray)
# ## mouse.setup <- init.grid()
# ## mouse.data <- init.data()
# 
# plot.mva(mouse.data, mouse.setup)
# plot.smooth.line(mouse.lratio$A, mouse.lratio$M)
# }
# 
# \keyword{microarray, lowess.}
#*/########################################################################

plot.smooth.line  <- function(x, M, f = 0.1, ...)
{
#  A <- x
  ind <- !(is.na(x) | is.na(M) | is.infinite(x) | is.infinite(M))
  #lines(lowess(A[ind], M[ind], f = f), ...)
  lines(approx(lowess(x[ind], M[ind], f = f)), ...)  
}

########################################################################/**
# \name{plot.confband.lines}
# 
# \alias{plot.confband.lines}
# 
# \title{Adding Lines Satisfying a Confidence Criterion to the Current M
#   vs A Plot}
# 
# \description{
#   This function adds 2 lines outlining the pointwise (intensity
#   dependent) confidence band on the M vs A plot.  The lines are drawn
#   such that a prespecified proportion of points are outside the 2
#   confidence curves.
#   The type of line may be specified as well as other parameters.}
# 
# \usage{
# plot.confband.line(A, M, crit1=0.025, crit2=crit1, nclass=10, ...)
# }
# 
# \arguments{
#  \item{A}{a vector giving the x-coordinates of the points in the scatter
#           plot. In the microarray context, this could be a vector of
#           average log intensities.} 
# 
#  \item{M}{a vector giving the y-coordinates of the points in the scatter
#    plot. In the microarray context, this could be a vector of log
#    intensity ratios.} 
# 	
#  \item{crit1}{The proportion of points less than the lower confidence
#    curve.  This takes a decimal value between 0 and 1. }
#  \item{crit2}{The proportion of points greater than the upper confidence
#    curve.  By default, this has the same value as "crit1".}
#  \item{nclass}{A single number giving the approximate number of
#  intensity depedent groups to consider.} 
#  \item{\dots}{graphical parameters may also be supplied as arguments
#  to the function (see  \code{\link{par}}).}        
# }
# 
# \value{
#   Lines are added to the current plot.
# }
# 
# \note{
#   An M vs A plot must be constructed \bold{prior} to the execution
#   of this function.} 
# 
# \seealso{ \code{\link{plot.mva}}, \code{\link{stat.ma}},
#   \code{\link{lines}}, \code{\link{matlines}},
#   \code{\link{plot.confband.text}}, \code{\link{plot.confband.points}} .
# }
# 
# \examples{data(MouseArray)
# ## mouse.setup <- init.grid
# ## mouse.data <- init.data
# 
# ## To display an M vs A plot of the data 
# plot.mva(mouse.data, mouse.setup) 
# 
# ## Calculate M and A values 
# mouse.lratio <- stat.ma(mouse.data, mouse.setup)
# 
# ## To add default upper and lower confidence curves line to the M vs A plot
# plot.confband.lines(mouse.lratio$A, mouse.lratio$M) 
# }
# 
# \keyword{microarray, point-wise confidence band.}
#*/########################################################################

plot.confband.lines<-function (x, M, crit1=0.025, crit2=crit1, nclass=10, ...)
{
#  A <- x
  if (crit1 >= 1) crit1 <- crit1 / length.na(M)
  if (crit2 >= 1) crit2 <- crit2 / length.na(M)
  cutoff<-NULL
  Abin <- quantile(x, probs=seq(0, nclass, 1)/nclass, na.rm=TRUE)
  for(i in (1:nclass) ){
    tmpind<-(Abin[i]<=x)&(x<Abin[i+1])
    xtmp <- M
    xtmp[!tmpind]<-NA
    n1<-sum.na(tmpind)
    cutoff <- rbind(cutoff,quantile.na(xtmp, probs=c(crit1, (1-crit2))))
  }
  matlines(Abin[-1],cutoff, ... )
}

########################################################################/**
# \name{plot.confband.points}
# 
# \alias{plot.confband.points}
# 
# \title{Highlights a Set of Points on the Current M vs A Plot}
# 
# \description{
#   This function highlights a prespecified proportion of extreme points
#   on the M vs A plots. 
# }
# 
# \usage{
# plot.confband.points(A, M, crit1=0.025, crit2=crit1, nclass=10, ...)
# }
# 
# \arguments{
#  \item{A}{a vector giving the x-coordinates of the points in the scatter
#           plot. In the microarray context, this could be a vector of
#           average log intensities.} 
# 
#  \item{M}{a vector giving the y-coordinates of the points in the scatter
#    plot. In the microarray context, this could be a vector of log
#    intensity ratios.} 
# 	
#  \item{crit1}{The number of points to be highlighted on the M vs A plot.
#    If crit1 < 1, the crit1*100\% spots with the smallest M values
#    will be highlighted. If crit1 >= 1, the crit spots with the
#    smallest M values are highlighted.}  
#  \item{crit2}{Similar to "crit1".   If crit2 < 1, the crit2*100\%
#  spots with the largest M values will be highlighted. If crit2 >= 1,
#  the crit2 spots with the smallest M values are highlighted.}  
#  \item{nclass}{A single number giving the approximate number of
#  intensity depedent groups to consider.} 
#  \item{\dots}{graphical parameters may also be supplied as arguments
#  to the function (see \code{\link{par}}).}        
# }
# 
# \value{
#   Points are added to the current plot.
# }
# 
# 
# \seealso{\code{\link{plot.mva}}, \code{\link{stat.ma}},
#   \code{\link{lines}}, \code{\link{matlines}},
#   \code{\link{plot.confband.text}}, \code{\link{plot.confband.lines}} .
# }
# 
# \note{An M vs A plot must be constructed \bold{prior} to the
# execution of this function.} 
# 
# \examples{data(MouseArray)
# ## mouse.setup <- init.grid()
# ## mouse.data <- init.data()
# 
# plot.mva(mouse.data, mouse.setup) ## an M vs A plot 
# 
# mouse.lratio <- stat.ma(mouse.data, mouse.setup)
# 
# plot.confband.points(mouse.lratio$A, mouse.lratio$M)
# 
# ## 2.5\% of the spots with the smallest and largest M values are
# ## highlighted on the M vs A plot. 
# }
# 
# \keyword{microarray, point-wise confidence band.}
# 
#*/########################################################################
 

plot.confband.points<-function (x, M, crit1=0.025, crit2=crit1, nclass=10, col.ex=NULL, ...)
{
  ## quantile.na removes infinite too...quantile(x, na.rm=F) doesn't.

  # A <- x
  
  if (crit1 >= 1) crit1 <- crit1 / length.na(M)
  if (crit2 >= 1) crit2 <- crit2 / length.na(M)  
  txtA<-(rep(FALSE,length(x)))
  Abin <- quantile(x, probs=seq(0, nclass, 1)/nclass, na.rm=TRUE)
  for(i in 1:nclass){
    tmpind<-(Abin[i]<=x)&(x<Abin[i+1])
    xtmp <- M
    xtmp[!tmpind]<-NA
    n1<-sum.na(tmpind)
    cutoff <- quantile.na(xtmp, probs=c(crit1, (1-crit2)))
    vals<- ((xtmp < cutoff[1]) | (xtmp > cutoff[2]))
    txtA[vals]<-TRUE 
  }
  points(x[txtA],M[txtA],pch=18, col=col.ex,...)
}


########################################################################/**
# \name{plot.confband.text}
# 
# \alias{plot.confband.text}
# 
# \title{Add Selected Text to an M vs A Plot}
#   
# \description{`text' draws the strings given in the vector `labs' at the
#   coordinates given by `M' and `A'}
# 
# \usage{
# plot.confband.text(A, M, crit1=0.025, crit2=crit1, nclass=10,
# labs=NULL, output=F, ...)  
# }
# 
# \arguments{
#  \item{A}{a vector giving the x-coordinates of the points in the scatter
#           plot. In the microarray context, this could be a vector of
#           average log intensities.} 
#  
#  \item{M}{a vector giving the y-coordinates of the points in the scatter
#    plot. In the microarray context, this could be a vector of log
#    intensity ratios.} 
# 	
#  \item{crit1}{The number of points to be highlighted on the M vs A plot.
#    If crit1 < 1, the crit1*100\% spots with the smallest M values
#    will be highlighted. If crit1 >= 1, the crit spots  with the
#    smallest M values are highlighted.}  
#  \item{crit2}{Similar to "crit1".   If crit2 < 1, the crit2*100\%
#  spots with the largest M values will be highlighted. If crit2 >= 1,
#  the crit2 spots with the largest M values are highlighted.}  
#  \item{nclass}{A single number giving the approximate number of
#  intensity depedent groups to consider.} 
#  \item{labs}{ one or more character strings or expressions specifying the
#    text to be written.  If this string is not specified, by
#    default the index of the vector `M' will be used.}
#  \item{output}{logical, defaulting to `FALSE'. If `TRUE' a vector
#    containning the index to the vector `M' that are  
#    highlighted.} 
#  \item{\dots}{graphical parameters may also be supplied as arguments
#  to the function (see \code{\link{par}}).}        
# }
# 
# \note{An M vs A plot must be constructed \bold{prior} to the execution of this function.}
# 
# \examples{data(MouseArray)
# ## mouse.setup <- init.grid()
# ## mouse.data <- init.data()
# 
# plot.mva(mouse.data, mouse.setup) ## an M vs A plot
# 
# mouse.lratio <- stat.ma(mouse.data, mouse.setup)
# 
# plot.confband.text(mouse.lratio$A, mouse.lratio$M)
# ## 2.5\% of the spots with the largest and smallest M values are
# ## highlighted on the M vs A plot, and each spot is assigned the
# ## default label of its corresponding index value. 
# }
# 
# \seealso{ \code{\link{plot.mva}}, \code{\link{stat.ma}},
#   \code{\link{lines}}, \code{\link{matlines}},
#   \code{\link{plot.confband.lines}}, \code{\link{plot.confband.points}} .
# }
# 
# \keyword{microarray, point-wise confidence band.}
#*/########################################################################
 

 plot.confband.text <- 
function (x, M, crit1=0.025, crit2=crit1, nclass=10, labs=NULL, output=FALSE, ...) 
{
#  A <- x
  if (crit1 >= 1) crit1 <- crit1 / length.na(M)
  if (crit2 >= 1) crit2 <- crit2 / length.na(M)

  txtA<-(rep(FALSE,length(x)))
  Abin <- quantile.na(x, probs=seq(0, nclass, 1)/nclass)
  for(i in 1:nclass){
    tmpind<-(Abin[i]<=x)&(x<Abin[i+1])
    xtmp <- M    
    xtmp[!tmpind]<-NA
    n1<-sum.na(tmpind)
    cutoff <- quantile.na(xtmp, probs=c(crit1, (1-crit2)))
    vals<- ((xtmp < cutoff[1]) | (xtmp > cutoff[2]))
    txtA[vals]<-TRUE
  }
  if(is.null(labs)) labs <- as.character(1:length(M))
  text(x[txtA],M[txtA],labels=labs[txtA], ...)
  if(output)
    res <- txtA
  else res <- NULL
  res
}


##########################################################################
#                                End of file
##########################################################################