R/yandell.R
In pickgene: Adaptive Gene Picking for Microarray Expression Data Analysis

Documented in holms orangene pickedchisq pickedhist pickedpair pickedscore robustbox toprankgene

#####################################################################
##
## $Id$
##
##     Copyright (C) 2001 Brian S. Yandell
##
## This program is free software; you can redistribute it and/or modify it
## under the terms of the GNU General Public License as published by the
## Free Software Foundation; either version 2, or (at your option) any
## later version.
##
## These functions are distributed in the hope that they will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
## GNU General Public License for more details.
##
## The text of the GNU General Public License, version 2, is available
## as http://www.gnu.org/copyleft or by writing to the Free Software
## Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
##
###############################################################################
rangene <- function (n = 10000, center = 4, spread = 2, contamination = 0.05,
    alpha = c(1,1), noise = 0.5, omega = c(20,20))
{
    contam <- round(n * contamination)
    data <- list()
    r <- rnorm(n, center, spread)
    rr <- rank(r) / ( 1 + n )
    ## contaminate preferentially genes with smaller intensities
    cc <- seq( n )[ contam >= rank( runif( n )^2 * (2 - rr )) ]
    ## half of contaminated genes upregulate
    upreg <- .5 < runif( contam )
    for (i in 1:2) {
        data[[i]] <- r
        s <- cc[upreg]
        if( length( s ))
          data[[i]][s] <- data[[i]][s] + rnorm(length(s),3-rr[s],0.25)
        upreg <- !upreg
    }
    for (i in 1:2)
        data[[i]] <- data[[i]] + rnorm(n, 0, noise)

    known <- data <- as.data.frame(data)
    for (i in 1:2) {
        data[[i]] <- alpha[i] * exp(data[[i]])
        data[[i]] <- data[[i]] + rnorm(n, 0, omega[i])
    }
    list(observed = data, true = known,contam=cc)
}
### Find top ranked genes
toprankgene <- function( yy, n = 500 )
{
  tmpy2 <- rank( apply( yy$true, 1, function(x) -abs( diff( x )) ))
  tmpy <- pickgene(yy$obs, npickgene = n )$pick[[1]]$probe

  count <- rep(NA,n)
  for( i in 1:n ) {
    count[i] <- sum( tmpy2[ tmpy[1:i] ] <= i )
  }
  count
}
### Yi Lin's original random gene generator
### Classical contamination model for robust statistics
orangene <- function( n = 10000, center = 4, spread = 2, contamination = .05,
         alpha = c(3,2), noise = 0.1, omega = c(20,30) )
{
  contam <- round( n * contamination )

  data <- list()
  r <- rnorm(n,center,spread)
  for( i in 1:2 ) {
    ## gene expression
    data[[i]] <- r

    ## contamination = differential expression
    data[[i]][seq(contam)] <- data[[i]][seq(contam)] + rnorm(contam)

    ## intrinsic noise
    data[[i]] <- data[[i]] + rnorm(n,0,noise)
  }
  known <- data <- as.data.frame( data )
  for( i in 1:2 ) {
    ## attenuation
    data[[i]] <- alpha[i] * exp( data[[i]] )

    ## measurement error
    data[[i]] <- data[[i]] + rnorm(n,0,omega[i])
  }
  list( observed = data, true = known )
}
### Organize scores from pickgene structure
pickedhist <- function( pick, show = names( pick ), title = show, p1 = .05,
                       plotit = TRUE, rotate = FALSE, mfrow = c(nr,nc), bw = NULL )
{
  pick <- pick$score
  if( is.numeric( show ))
    show <- names( pick )[show]
  else {
    if( any( is.na( match( show, names( pick )))))
      stop( paste( "show choices do not match contrasts:",
                  paste( names( pick ), collapse = ", " )))
  }
  n <- nrow( pick[[1]] )
  if( plotit ) {
    nr <- min( 2, ceiling( length( show ) / 3 ))
    nc <- min( 3, ceiling( length( show ) / nr ))
    if( !is.null( mfrow ))
      par( mfrow = mfrow, pty = "s" )
    tmplvl <- qnorm( adjustlevel( n * 2, .05 ) / 2, lower.tail
                    = FALSE)
    names( title ) <- show
    if( !is.null( bw )) {
      bw <- array( bw, length( show ))
      names( bw ) <- show
    }
    for( i in show ) {
      if( is.null( bw ))
        tmp <- density( pick[[i]]$score )
      else
        tmp <- density( pick[[i]]$score, bw = bw[i] )
      cat( "density bandwidth:", tmp$bw, "\n" )
      if( rotate )
        plot( tmp$y, tmp$x, type = "l", ylab = title[i],
             xlab = "Relative Frequency", main = "" )
      else
        plot( tmp$x, tmp$y, type = "l", xlab = title[i],
             ylab = "Relative Frequency", main = "" )
      tmpd <- dnorm( tmp$x )
      if( rotate )
        lines( tmpd, tmp$x, lty = 2, col = "blue" )
      else
        lines( tmp$x, tmpd, lty = 2, col = "blue" )
      tmphi <- 2 * tmpd < tmp$y
      xx <-  range( tmp$x[ abs( tmp$x ) < tmplvl & !tmphi] )
      if( rotate ) {
        lines( tmp$y * tmphi, tmp$x, lty = 4, col = "red" )
        abline( h = c(-1,1) * tmplvl, lty = 3, col = "red" )
        axis( 2, xx, labels = round( xx, 1 ))
        lines(( tmp$y - (1 - p1) * tmpd ) / p1, tmp$x, lty = 5, col = "purple" )
      }
      else {
        lines( tmp$x, tmp$y * tmphi, lty = 4, col = "red" )
        abline( v = c(-1,1) * tmplvl, lty = 3, col = "red" )
        axis( 1, xx, labels = round( xx, 1 ))
        lines( tmp$x, ( tmp$y - (1 - p1) * tmpd ) / p1, lty = 5, col = "purple" )
      }
    }
  }
  invisible( tmp )
}
### Organize scores from pickgene structure
pickedchisq <- function( pick, show = names( pick ),
                        title = "Squared Distance",
                        plotit = TRUE, alpha = .05 )
{
  pick <- pick$score
  if( is.numeric( show ))
    show <- names( pick )[show]
  else {
    if( any( is.na( match( show, names( pick )))))
      stop( paste( "show choices do not match contrasts:",
                  paste( names( pick ), collapse = ", " )))
  }
  n <- nrow( pick[[1]] )
  if( plotit ) {
    tmplvl <- sqrt( qchisq(( 1 - alpha ) ^ ( 1 /n ), length( show )))
    sumsq <- 0
    for( i in show )
      sumsq <- sumsq + pick[[i]]$score^2
    tmp <- density( sumsq )
    plot( tmp$x, tmp$y, type = "l", log = "x", xlab = title,
             ylab = "Relative Frequency", main = "" )
    tmpd <- dchisq( tmp$x, length( show ))
    lines( tmp$x, tmpd, col = "blue" )
    tmphi <- 2 * tmpd < tmp$y
    xx <-  max( tmp$x[ tmp$x < tmplvl & !tmphi ] )
    lines( tmp$x, tmp$y * tmphi, col = "red" )
    abline( v = tmplvl, col = "red" )
    axis( 1, xx, labels = round( xx, 1 ))
    lines( tmp$x, tmp$y - tmpd, col = "purple" )
  }
  invisible( tmp )
}
holms <- function( x, alpha = .05, cut = TRUE ) {
  n <- length( x )
  x <- sort( 2 * ( 1 - pnorm( abs( x ))))
  x[x==0] <- min( x[x>0] ) / 2
  if( cut )
    x <- x[ x <= alpha ]
  nx <- length( x )
  plot( x, log = "y", xlab = "rank order", ylab = "raw p-value" )
  axis( 2, alpha )
  abline( h = c(alpha,alpha/n), col = "red" )
  text( nx, 0.5 * ( alpha / n ), "Bonferroni", adj = 1, col = "red" )
  abline( h = 1-(1-alpha)^(1/n), col = "blue" )
  text( nx, 2 * ( 1-(1-alpha)^(1/n)), "Sidak", adj = 1, col = "blue" )
  lines( alpha / ( 1 + n - seq( x )), col = "purple" )
  text( nx, alpha / sqrt(n), "Holms", adj = 1, col = "purple" )
  abline( v = max( seq( x )[ x < alpha / n ] ), col = "blue", lty = 2  )
  abline( v = max( seq( x )[ x < alpha / ( 1 + n - seq( x )) ] ),
    col = "purple", lty = 2 )
  points( seq( x ) / ( 1 + n ), col = "grey" )
  invisible( x )
}
### Organize scores from pickgene structure
pickedscore <- function( pick, description, show = 1:2, alpha = .05,
                        xlab = show[1], ylab = show[2], main = "",
                        mfrow = c(1,1))
{
  score <- list()
  for( i in names( pick$score ))
    score[[i]] <- pick$score[[i]]$score
  score <- data.frame( score )

  show <- array( show, 2 )
  if( is.numeric( show ))
    show <- names( pick$pick )[show]
  else {
    if( any( is.na( match( show, names( pick$pick )))))
      stop( paste( "show choices do not match contrasts:",
                  paste( names( pick$pick ), collapse = ", " )))
  }
  if( !is.null( mfrow ))
    par( mfrow = mfrow, pty = "s" )

  eqscplot( score[[show[1]]], score[[show[2]]], type = "n",
           xlab = xlab, ylab = ylab )
  title( main )
  points( score[[show[1]]], score[[show[2]]], cex = .25 )
  n <- length( score[[1]] )
  tmplvl <- qnorm( adjustlevel( n * 2, alpha ) / 2, lower.tail
                  = FALSE)
  abline( h = c(-1,1)*tmplvl, v = c(-1,1) * tmplvl, col = "red" )
  tmp <- sqrt( qchisq(( 1 - alpha ) ^ ( 1 /n ), 2 ))
  x <- seq( -tmp, tmp, length = 200)
  y <- sqrt( tmp^2 - x^2 )
  lines(x,y,col="blue")
  lines(x,-y,col="blue")
  tmp <- abs( score[[show[1]]] ) > tmplvl | abs( score[[show[2]]] ) >
    tmplvl
  points( score[[show[1]]][tmp], score[[show[2]]][tmp], cex = .75, col =
         "blue" )

  probes <- character()
  for( i in names( pick$pick ))
    probes <- c( probes, as.character( pick$pick[[i]]$probe ))
  probes <- sort( unique( probes ))
  score <- score[ match( probes, pick$score[[1]]$probe, nomatch = 0 ), ]
  dimnames( score ) <- list( probes, names( score ))

  pick <- pick$pick
  fold <- pvalue <- matrix( NA, length( probes ), length( pick ),
                           dimnames = list( probes, names( pick )))
  for( i in names( pick )) {
    fold[ as.character( pick[[i]]$probe ), i ] <- pick[[i]]$fold
    pvalue[ as.character( pick[[i]]$probe ), i ] <- pick[[i]]$pvalue
  }
  ll <- list( score = score, fold = fold, pvalue = pvalue )
  ## add descriptions if provided
  if( !missing( description ))
    ll$description <- description[probes]
  ### want to add probe description to list
  ll
}
pickedpair <- function( x, columns, description,
                       probe = "Probe.Set",
                       renorm = c(sqrt(2),sqrt(6)),
                       pick = pickgene( x[,columns],
                         x[,probe], ...,
                         renorm = renorm, plotit = FALSE ),
                       main = "", ... )
{
  par(mfrow=c(2,2),pty = "s" )
  score <- pickedscore( pick, description, mfrow = NULL,
                              xlab = "Additive Effect",
                              ylab = "Dominance Effect",
                              main = main )
  pickedhist( pick, 2, title = "Dominance Effect", mfrow = NULL,
             rotate = TRUE )
  pickedhist( pick, 1, title = "Additive Effect", mfrow = NULL )
  invisible( score )
}
robustbox<-function(y,x,nslice=400,
  xlab = "Log Average Intensity", ylab = "Standardized Difference",
  shrink = FALSE,
  crit = qnorm( adjustlevel( n , overalllevel ) / 2, lower.tail = FALSE),
  overalllevel = .05, cex = 0.1, lwd = 2, plotit = TRUE )
{
  n<-length(x)
  ylim <- if( shrink )
    c(-1,1) * crit
  else
    range( y )

  ox<-order(x)
  x<-x[ox]
  y<-y[ox]
  rx<-rank(x)
  k<-floor(n/nslice)
  slicef<-factor(cut(rx, breaks = c(k*(0:(nslice-1)),n)))
  slicex<-unlist(tapply(x, slicef, median), recursive = TRUE,  use.names=FALSE)

  slicebox <- t( matrix( unlist( tapply( y, slicef, function(x)
    boxplot.stats(x)$stats ), recursive = TRUE, use.names=FALSE), 5 ))
  robmed <- apply( slicebox, 2, median )
  plot(x,y, type = "n", xlab = xlab, ylab = ylab, ylim = ylim )
  if( plotit )
    points(x,y,cex = cex)
  for( i in seq( ncol( slicebox ))) {
    if( diff( range( slicebox[,i] )) > 0 ) {
      tmp <- smooth.spline( slicex, slicebox[,i] )
      tmp <- predict( tmp, x )$y
      lines( x, tmp, col = "blue", lwd = lwd )
    }
    else
      lines( range( x ), rep( robmed[i], 2 ), col = "red", lwd = lwd )
  }
  invisible()
}

Any scripts or data that you put into this service are public.

pickgene documentation built on Nov. 8, 2020, 6:50 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

pickgene
Adaptive Gene Picking for Microarray Expression Data Analysis

R/yandell.R
In pickgene: Adaptive Gene Picking for Microarray Expression Data Analysis

Defines functions robustbox pickedpair pickedscore holms pickedchisq pickedhist orangene toprankgene

Documented in holms orangene pickedchisq pickedhist pickedpair pickedscore robustbox toprankgene

Try the pickgene package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

pickgene Adaptive Gene Picking for Microarray Expression Data Analysis

R/yandell.R In pickgene: Adaptive Gene Picking for Microarray Expression Data Analysis

Defines functions robustbox pickedpair pickedscore holms pickedchisq pickedhist orangene toprankgene

Documented in holms orangene pickedchisq pickedhist pickedpair pickedscore robustbox toprankgene

Try the pickgene package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

pickgene
Adaptive Gene Picking for Microarray Expression Data Analysis

R/yandell.R
In pickgene: Adaptive Gene Picking for Microarray Expression Data Analysis