R/yilin.R
In pickgene: Adaptive Gene Picking for Microarray Expression Data Analysis

Documented in adjustlevel model.pickgene multipickgene pickgene pickgene2 pickgene.poly robustscale twowayanovapickgene

###########################################################################
##
## $Id$
##
##     Copyright (C) 2000 Yi Lin and Brian S. Yandell
##
## This program is free software; you can redistribute it and/or modify it
## under the terms of the GNU General Public License as published by the
## Free Software Foundation; either version 2, or (at your option) any
## later version.
##
## These functions are distributed in the hope that they will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
## GNU General Public License for more details.
##
## The text of the GNU General Public License, version 2, is available
## as http://www.gnu.org/copyleft or by writing to the Free Software
## Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
##
############################################################################
pickgene2 <- function( ... )
  pickgene.two( ... )
multipickgene<-function( ... )
  pickgene.poly( ... )
twowayanovapickgene <- function( x, fac1level, fac2level, ... )
  pickgene( x, faclevel = c(fac1level,fac2level), ... )
###########################################################################
## the following function robustly estimate the center and scale.
robustscale<-function(y,x,nslice=400,corcenter=TRUE,decrease=TRUE){
  n<-length(x)
  ox<-order(x)
  x<-x[ox]
  y<-y[ox]
  rx<-rank(x)
  k<-floor(n/nslice)
  slicef<-factor(cut(rx, breaks = c(k*(0:(nslice-1)),n)))
  slicex<-unlist(tapply(x, slicef, median), recursive = TRUE, use.names=FALSE)

  if (corcenter) {
    slicemedian<-unlist(tapply(y, slicef, median), recursive = TRUE, use.names=FALSE)
    slicemad<-unlist(tapply(y, slicef, mad), recursive = TRUE, use.names=FALSE)
  }
  else {
    slicemad<-unlist(tapply(y, slicef, mad, center=0), recursive = TRUE,
                     use.names=FALSE)
  }
  tmp <- slicemad > 0
  if( sum( tmp ) > 3 ) {
    preroblogscale <- smooth.spline(slicex[tmp],log(slicemad[tmp]))
    robscale <- exp(predict(preroblogscale,x)$y)
  }
  else {
    preroblogscale <- list( y = rep( log( median( slicemad[tmp] )),
                              length( slicemad )))
    robscale <- rep( median( slicemad ), length( x ))
  }
  if (decrease) {
    robscale[(ceiling(n/2)):1] <- cummax(robscale[(ceiling(n/2)):1])
    robscale[(ceiling(n/2)):n] <- cummin(robscale[(ceiling(n/2)):n])
  }
  if (corcenter) {
    if( length( slicex ) > 3 ) {
      prerobcenter <- smooth.spline(slicex,slicemedian/(exp(preroblogscale$y)))
      robcenter <- predict(prerobcenter,x)$y * robscale
    }
    else
      robcenter <- rep( median( slicemedian ), length( x ))
    adjustedrobscale <- mad((y-robcenter)/(robscale)) * robscale
  }
  else {
    robcenter <- rep(0,n)
    adjustedrobscale<-mad((y/robscale), center = 0) * robscale
  }
  return(list(center=robcenter,scale=adjustedrobscale,x=x,y=y))
}

###########################################################################
## significance level adjustment for multiple tests. (similar to Bonferroni)
adjustlevel<-function(ntest,alpha){
  singlelevel<- 1 - exp((log(1-alpha))/ntest)
  return(singlelevel)
}

###########################################################################
## the following function picks genes in 2 condition case with logratio (y)
## and logofproduct (intensity). It also serves as the building block for
## the later functions for multi-condition and ANOVA.
pickgene.two <- function ( y, intensity,
                          geneid = 1:length(y),
                          ## set elsewhere
                          singlelevel=0.0001,
                          ## automatic pick genes (or pick npickgene if >0)
                          npickgene = -1, meanrank = FALSE,
                          xlab="Average Intensity",
                          ylab="Trend", main = "",
                          plotit = TRUE, col = "blue",
                          negative = numeric( 0 ),
                          ... ) {
  n <- length( y ) - length( negative )
  if( length( negative )) {
    sc <- robustscale( y[-negative], intensity[-negative], ... )
    geneid <- geneid[ order( intensity[-negative] ) ]
  }
  else {
    sc <- robustscale( y, intensity, ... )
    geneid <- geneid[ order( intensity ) ]
  }
  z <- ( sc$y - sc$center ) / ( sc$scale )
  if( meanrank )
    logxy <- "y"
  else {
    logxy <- "xy"
    sc$x <- exp( sc$x )
  }
  sc$y <- exp( sc$y )
  zcut <- qnorm( singlelevel[1] / 2, lower.tail = FALSE)

  sc$lower <- exp( sc$center - zcut * sc$scale )
  sc$upper <- exp( sc$center + zcut * sc$scale )
  if ( npickgene < 0 ) {
    pickedzscore <- abs(z) > zcut
    if( plotit ) {
      rx <- range( sc$x )
      ry <- range( sc$y )
      if( length( negative )) {
        ry <- range( c( ry, exp( y[negative] )))
        if( !meanrank )
          rx <- range( c( rx, exp( intensity[negative] )))
      }
      plot( rx, ry, type = "n", log = logxy, xlab=xlab, ylab=ylab )
      if( main != "" )
        title( main )

      ## critical lines
      lines( sc$x, sc$lower, col = "red" )
      lines( sc$x, sc$upper, col = "red" )
      if( length( singlelevel ) > 1 )
        for( i in seq( 2, length( singlelevel ))) {
          zcut <- qnorm( singlelevel[i] / 2, lower.tail = FALSE)
          lines( sc$x, exp( sc$center - zcut * sc$scale ), lty = 2 )
          lines( sc$x, exp( sc$center + zcut * sc$scale ), lty = 2 )
        }
      ## negligible contrasts
      points( sc$x[!pickedzscore], sc$y[!pickedzscore], cex = 0.25 )
      ## signficant contrasts
      points( sc$x[pickedzscore], sc$y[pickedzscore], cex = 0.75, col = col )
      ## negative values
      if( length( negative ))
        points( exp( intensity[negative] ), exp( y[negative] ),
               cex = 0.25, col = "red" )

      # center line
      lines( sc$x, exp( sc$center ), col = "red", lty = 2 )
    }
    ## get the order and zscore for the top picks
    pickedorder <- seq( n )[pickedzscore]
    z <- z[pickedorder]
    tmp <- order( (-1) * abs( z ))
    pickedorder <- pickedorder[tmp]
    pickedzscore <- z[tmp]
  }
  else {
    pickedorder <- order( (-1) * abs( z ))[1:npickgene]
    pickedzscore <- z[pickedorder]
  }
  ## data frame of picked genes
  fold <- sc$y[pickedorder]
  fold[ fold < 1 ] <- - 1 / fold[ fold < 1 ]
  pick <- data.frame( probe = geneid[pickedorder],
                     average = round( sc$x[pickedorder], 3 ),
                     fold = round( fold, 2 ),
                     pvalue = round( 1 - ( 1 - 2 * pnorm( abs( pickedzscore ),
                       lower.tail = FALSE )) ^ n, 4 ),
                     row.names = NULL )
  ## score structure with centered values
  sc <- as.data.frame( sc )
  sc$y <- ( log( sc$y ) - sc$center ) / sc$scale
  sc$center <- sc$scale <- NULL
  names( sc ) <- c("intensity","score","lower","upper")
  sc$probe <- geneid
  list( pick = pick, score = sc )
}

###########################################################################
## the following function picks genes in the situation of a linear sequence
## of conditions. The argument condi can be numerical (quantitative description
## of the conditions) or ordinal (1 : numberofconditions), depending on the
## situation. The function picks genes according to linear trend (d=1), or
## linear trend and quadratic trend (d=2). Cubic trend can be included if d is
## set to be 3. The argument x is a numberofgenes by numberofconditions matrix
## consisting of logmeasurements.
pickgene.poly <-function( x, # data matrix
                         condi = 1:min(ncol(x),2), # condition levels
                         geneID = NULL,
                         overalllevel = 0.05,
                         npickgene= -1,
                         d=2, # polynomial order (1-3)
                         ylabs = paste( contrastnames, "Trend" ),
                         contrastnames = c("Linear","Quadratic","Cubic"),
                         ... ) {
  if ( d > 3 | d < 1 )
    return( cat( "d should be 1, 2, or 3.\n" ))
  x <- as.matrix( x )
  if( ncol( x ) < 2 )
    return( cat( "x must have at least 2 columns.\n" ))
  d <- min( d, ncol(x)-1 )
  if( is.null( geneID ) | length( geneID ) != nrow( x ))
    geneID <- 1:nrow(x)
  ## get orthogonal polynomial contrasts
  y <- x %*% poly( condi, d )
  intensity <- x %*% rep( 1, length( condi )) / length( condi )

  singlelevel <- adjustlevel( nrow( x ) * d , overalllevel )
  npickgene <- floor( npickgene / d )
  pickedgene <- list()
  pickedgene$pick <- pickedgene$score <- list()

  par( mfrow = c(d,1) )
  for( i in 1:d) {
    tmp <- pickgene.two( y[,i], intensity,
                        geneid = geneID,
                        singlelevel = singlelevel,
                        npick = npickgene,
                        ylab = ylabs[i],
                        ... )
    pickedgene$pick[[i]] <- tmp$pick
    pickedgene$score[[i]] <- tmp$score
  }
  names( pickedgene$pick ) <- contrastnames[1:d]
  names( pickedgene$score ) <- contrastnames[1:d]
  return( pickedgene )
}

###########################################################################
## the following function picks genes in the two-factor (ANOVA) situation.
## It ignores interaction, and picks genes according to trends in each factor.
## Similar functions can easily be written in the same vein for multi-factor
## situation and taking (some) interactions into consideration.
model.pickgene <- function( faclevel,
                           facnames = letters[ seq( length( faclevel )) ],
                           contrasts.fac = "contr.poly",
                           collapse = "+",
                           show = NULL,
                           renorm = 1,
                           modelexpr = formula( paste( "~",
                             paste( facnames, collapse = collapse ))),
                           contrasts.list = contr.list ) {
  ## set up polynomial contrasts
  dd <- expand.grid( apply( as.matrix( rev( faclevel )), 1, seq ))
  names( dd ) <- rev( facnames )
  for( i in facnames )
    dd[[i]] <- factor( dd[[i]] )

  contr.list <- as.list( array( contrasts.fac, length( faclevel )))
  names( contr.list ) <- facnames
  mat <- model.matrix( modelexpr, dd, contrasts = contr.list )
  if( any( renorm != 1 )) {
    if( is.null( show ))
      show <- seq( ncol( mat ) - 1 )
    for( i in seq( length( show )))
      mat[,1+i] <- mat[,1+i] * renorm[show[i]]
  }
  mat
}

###########################################################################
pickgene <- function( data,
                     geneID = 1:nrow(data),
                     overalllevel=0.05,
                     npickgene= -1,
                     marginal = FALSE,
                     rankbased = TRUE, allrank = FALSE,
                     meanrank = FALSE,
                     offset = 0,
                     modelmatrix = model.pickgene( faclevel, facnames,
                       contrasts.fac, collapse, show, renorm ),
                     faclevel = ncol( data ),
                     facnames = letters[ seq( length( faclevel )) ],
                     contrasts.fac = "contr.poly",
                     show = NULL,
                     main = "", renorm = 1, drop.negative = FALSE,
                     plotit = npickgene < 1,
                     mfrow = c(nr,nc), mfcol = NULL,
                     ylab = paste( shownames, "Trend" ),
                     ... ){
  data <- as.matrix( data )
  if( rankbased | allrank ) {
    tmpfn <- function( data ) qnorm( rank( data ) / ( 1 + length( data )))
    data <- if( allrank )
      matrix( tmpfn( data ), nrow( data ), ncol( data ),
             dimnames = dimnames( data ))
    else
      apply( data, 2, tmpfn)
    negative <- numeric(0)
  }
  else { # log transformation, and drop zeroes
    xmin <- apply( data, 1, min, na.rm = TRUE )
    negative <- seq( length( xmin ))[ xmin <= -offset ]
    data <- offset + data
    if( drop.negative ) {
      data <- data[-negative,]
      warn <-  "probes dropped with values below"
    }
    else {
      tmp <- c( data ) > 0
      data[!tmp] <- min( data[tmp] ) / 2
      warn <-  "probes truncated to"
    }
    if( length( negative ))
      warning( paste( length( negative ), warn, offset, "\n" ))
    if( drop.negative )
      negative <- numeric( 0 )
    data <- log( data )
  }

  collapse <- if( marginal ) "+" else "*"
  numcontr <- ncol( modelmatrix ) - 1
  y <- data %*% modelmatrix
  if( is.null( show ))
    show <- seq( numcontr )

  if( meanrank )
    intensity <- rank( y[,1] )
  else
    intensity <- y[,1] / prod( faclevel )
  singlelevel <- adjustlevel( nrow( data ) * numcontr, overalllevel )
  npickgene <- floor ( npickgene / numcontr )

  if( plotit ) {
    nc <- min( 2, ceiling( length( show ) / 3 ))
    nr <- min( 3, ceiling( length( show ) / nc ))
    if( missing( mfcol )) {
      if( !is.null( mfrow ))
        par( mfrow = mfrow )
    }
    else {
      if( !is.null( mfcol ))
        par( mfcol = mfcol )
    }
  }
  pickedgene <- list()
  pickedgene$pick <- pickedgene$score <- list()
  shownames <- dimnames( y )[[2]][1+show]
  main <- array( main, length( shownames ), dimnames = list( shownames ))
  names( ylab ) <- shownames
  for( i in shownames ) {
    tmp <- pickgene.two( y[,i], intensity, geneID, singlelevel,
                        npickgene, main = main[i], plotit = plotit,
                        meanrank = meanrank,
                        negative = negative, ylab = ylab[i], ... )
    pickedgene$pick[[i]] <- tmp$pick
    pickedgene$score[[i]] <- tmp$score
  }
  invisible(pickedgene)
}

Any scripts or data that you put into this service are public.

pickgene documentation built on Nov. 8, 2020, 6:50 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

pickgene
Adaptive Gene Picking for Microarray Expression Data Analysis

R/yilin.R
In pickgene: Adaptive Gene Picking for Microarray Expression Data Analysis

Defines functions pickgene model.pickgene pickgene.poly adjustlevel robustscale twowayanovapickgene multipickgene pickgene2

Documented in adjustlevel model.pickgene multipickgene pickgene pickgene2 pickgene.poly robustscale twowayanovapickgene

Try the pickgene package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

pickgene Adaptive Gene Picking for Microarray Expression Data Analysis

R/yilin.R In pickgene: Adaptive Gene Picking for Microarray Expression Data Analysis

Defines functions pickgene model.pickgene pickgene.poly adjustlevel robustscale twowayanovapickgene multipickgene pickgene2

Documented in adjustlevel model.pickgene multipickgene pickgene pickgene2 pickgene.poly robustscale twowayanovapickgene

Try the pickgene package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

pickgene
Adaptive Gene Picking for Microarray Expression Data Analysis

R/yilin.R
In pickgene: Adaptive Gene Picking for Microarray Expression Data Analysis