R/plots.R
In EBarrays: Unified Approach for Simultaneous Gene Clustering and Differential Expression Identification

Documented in checkCCV checkModel checkVarsQQ plot.ebarraysEMfit

checkCCV <- function(data, useRank = FALSE, f = 1/2)
{
    cv <- function(x) {
        return(sqrt(var(x))/mean(x))
    }
    if (is(data, "ExpressionSet"))
        data <- exprs(data)
    means <- apply(data, 1, mean)
    cvs <- apply(data, 1, cv)

    if (useRank)
    {
        plot(rank(means), rank(cvs), pch=".",
             xlab=c("Rank of Mean"),
             ylab=c("Rank of CV") )
        lines(lowess(rank(means), rank(cvs), f = f), col = "red")
    }
    else
    {
        plot(means, cvs, pch = ".",
             xlab = "Mean",
             ylab = "CV",
             log="xy")
        lines(lowess(means, cvs, f = f), col="red")
    }
    invisible()
}

checkModel <- function(data, fit, model = c("gamma", "lognormal", "lnnmv"),
                       number = 9, nb = 10, cluster=1, groupid=NULL){
  if (!require(lattice, quietly = TRUE)) stop("The lattice package could not be loaded")
  model <- match.arg(model)
  if (is(data, "ExpressionSet")) data <- exprs(data)
  
  flag <- apply(data<=0,1,sum)
  data <- data[flag==0,]

  if(model=="gamma" | model=="lognormal"){
    clusters <- postprob(fit,data)$cluster
  
    pp.to.cluster <- function(pps){
      return(order(-pps)[1])
    }
  
    clusters <- apply(clusters,1,pp.to.cluster)
    data <- data[, unlist(fit@hypotheses@patterns[[1]])]
  
    curr.data <- data[clusters==cluster,]
  } else if (model=="lnnmv"){
    curr.data <- data[, unlist(fit@hypotheses@patterns[[1]])]
  }

  if (model == "gamma"){
    means <- apply(curr.data, 1, mean)
    mean.ranks <- equal.count(rank(means), number = number,
                              overlap = (number - length(means) / nb) / (number - 1))
    theta <- fit@family@invlink(fit@thetaEst[cluster,])
    ans <- qqmath(~ as.vector(curr.data) | mean.ranks, 
                  scales = list(relation = "free", draw = TRUE),
                  prepanel = function(x, ...) {
                    scale.gg <- mean(x) / theta[1]
                    xx <- qgamma(ppoints(length(x)), shape = theta[1], scale = scale.gg)
                    list(xlim = range(xx), ylim = range(x))
                  },
                  panel = function(x, ...) {
                    scale.gg <- mean(x) / theta[1]
                    xx <- qgamma(ppoints(length(x)), shape = theta[1], scale = scale.gg)
                    panel.qqmathline(x, distribution = function(p) qgamma(p, shape = theta[1], scale = scale.gg))
                    panel.qqmath(x, distribution = function(p) qgamma(p, shape = theta[1], scale = scale.gg))
                  },
                  xlab = "Quantiles of fitted Gamma distribution",
                  ylab = "Expression")
  } else if (model == "lognormal"){
    curr.data <- log(curr.data)
    means <- apply(curr.data, 1, mean)
    curr.data <- curr.data-outer(means, rep(1,dim(curr.data)[2]))
    mean.ranks <- equal.count(rank(means), number = number,
                              overlap = (number - length(means) / nb) / (number - 1))
    theta <- fit@family@invlink(fit@thetaEst[cluster,])
    ans <- qqmath(~ as.vector(curr.data) | mean.ranks,
                  distribution = function(p){qnorm(p, mean=0, sd=sqrt(theta[2]))},
                  scales = list(relation = "free", draw = TRUE),
                  panel = function(x, distribution, ...) {
                      panel.qqmathline(x, distribution = distribution, ...)
                      panel.qqmath(x, distribution = distribution, ...)
                    },
                  xlab = paste("Quantiles of Normal N(0,", round(theta[2],3),")", sep=""),
                  ylab = "log Expression centered at 0")
  } else if (model== "lnnmv"){
    curr.data <- log(curr.data)
    means <- apply(curr.data, 1, mean)
    if(is.null(groupid)) stop("Please specify groupid")
    vars <- fit@family@f0.arglist(data, fit@hypotheses@patterns, groupid)$common.args$sigmag2
    curr.data <- (curr.data-outer(means, rep(1,dim(curr.data)[2])))/outer(sqrt(vars), rep(1,dim(curr.data)[2]))
    mean.ranks <- equal.count(rank(means), number = number,
                              overlap = (number - length(means) / nb) / (number - 1))
    ans <- qqmath(~ as.vector(curr.data) | mean.ranks,
                  distribution = qnorm,
                  scales = list(relation = "free", draw = TRUE),
                  panel = function(x, distribution, ...) {
                      panel.qqmathline(x, distribution = distribution, ...)
                      panel.qqmath(x, distribution = distribution, ...)
                    },
                  xlab = "Quantiles of Standard Normal",
                  ylab = "Standardized log Expression")                
  }
  ans
}

checkVarsQQ <- function(data, groupid, ...){
  if (!require(lattice, quietly = TRUE)) stop("The lattice package could not be loaded")
  if (is(data, "ExpressionSet")) data <- exprs(data)
  flag <- apply(data<=0,1,sum)
  data <- data[flag==0,]
  data <- log(data)
  n <- sum(groupid!=0)
  groups <- unique(groupid)
  groups <- groups[groups!=0]
  ngroup <- length(groups)
  vars <- 0
  for(i in 1:ngroup){
    temp <- data[,groupid==groups[i]]
    ncoltemp <- sum(groupid==groups[i])
    if(ncoltemp==1){
      vars <- vars  
    }else{
      vars <- vars + (ncoltemp-1)*apply(temp,1,FUN=var)
    }
  }

  # gene specific sample variance
  svars <- vars/(n-ngroup)
  
  # The following is the moment estimate of the (v0,sigma02)
  v0 <- mean(svars)^2/((length(svars)-1)/length(svars)*var(svars))*2+4
  sigma02 <- mean(svars)*(v0-2)/v0
                  
  ans <- qqmath(~ svars, 
                distribution = function(p){v0*sigma02/qchisq(1-p, v0)},
                panel = function(x, distribution, ...) {
                  panel.qqmathline(x, distribution = distribution, ...)
                  panel.qqmath(x, distribution = distribution, ...)
                },               
                xlab = expression(paste("Quantiles of Inv-",
                  chi^2, "(", list(hat(nu)[0], hat(sigma)[0]^2), ")")),
                ylab = "Gene Specific Sample Variances", ...)
  ans
}

checkVarsMar <- function (data, groupid, xlab, ylab, ...){
  if (!require(lattice, quietly = TRUE)) stop("The lattice package could not be loaded")
  if (is(data, "ExpressionSet")) data <- exprs(data)

  # density of scaled inverse chi-square distribution
  dsichisq <- function(x, df, scale){
    return(dchisq(df*scale/x, df)*df*scale/(x^2))
  }

  flag <- apply(data<=0,1,sum)
  data <- data[flag==0,]
  data <- log(data)
  nn <- sum(groupid!=0)
  groups <- unique(groupid)
  groups <- groups[groups!=0]
  ngroup <- length(groups)
  vars <- 0
  for(i in 1:ngroup){
    temp <- data[,groupid==groups[i]]
    ncoltemp <- sum(groupid==groups[i])
    if(ncoltemp==1){
      vars <- vars  
    }else{
      vars <- vars + (ncoltemp-1)*apply(temp,1,FUN=var)
      }
  }
  ## gene specific sample variance
  svars <- vars/(nn-ngroup)

  # The following is the moment estimate of the (v0,sigma02)
  v0 <- mean(svars)^2/((length(svars)-1)/length(svars)*var(svars))*2+4
  sigma02 <- mean(svars)*(v0-2)/v0
  
  if (missing(xlab)) xlab <- "Gene Specific Sample Variances"
  if (missing(ylab)) ylab <- "Density"
 
  ans <- histogram( ~ svars, endpoints=c(min(svars), max(svars)),
                   type = "density", ...)
  y.limit <- ans$y.limit
  ans <- histogram( ~ svars, endpoints=c(min(svars), max(svars)),
                   xlab = xlab, ylab = ylab, type = "density",
                   prepanel = function(x,...){
                     xx <- seq(min(x), max(x), 0.0001)
                     yy <- dsichisq(xx,v0,sigma02)
                     list(ylim = c(range(yy)[1], max(range(yy)[2], y.limit[2])))
                   },
                   panel = function(x, ...) {
                     panel.histogram(x, ...)
                     xx <- seq(min(x), max(x), 0.0001)
                     yy <- dsichisq(xx,v0,sigma02)
                     panel.xyplot(xx, yy, type="l", col="red", lwd=2)
                   }, ... )
  ans
}

#plots
plotMarginal <- function (fit, data, kernel = "rect", n = 100, bw = "nrd0", adjust = 1, xlab, ylab, ...)
{

    ## Input: data is a matrix or ExpressionSet. fit is the
    ## output from emfit

    ## Output: Plot comparing the marginal distribution under the
    ## theoretical model to the empirical distribution, on the log
    ## scale.

    ## Note: x_ij values have the same marginal, but x_i1, ... x_im
    ## are not necessarily independent. Still OK to estimate density
    ## from all data, since the result can be thought of as the
    ## average of several density estimates, one for each column.

    if (!require(lattice, quietly = TRUE)) stop("The lattice package could not be loaded")

    n.cluster<-dim(fit@thetaEst)[1]

    if (is(data, "ExpressionSet")) data <- exprs(data)

    flag <- apply(data<=0,1,sum)
    data <- data[flag==0,]

    clusters <- postprob(fit,data)$cluster

    pp.to.cluster<-function(pps)
    {
        return(order(-pps)[1])
    }

    clusters<-apply(clusters,1,pp.to.cluster)


    data <- log(data)

    #lims <- range(data)

    ## By default, plot average shifted histogram of log expressions
    ## instead of plain histogram. Other kernels also possible

    #logmarg<-NULL
    #y<-NULL
    #supp<-NULL
    #cluster<-NULL
    data <- data[,unlist(fit@hypotheses@patterns[[1]])]
    empmarg <- density(data, bw = "nrd0", adjust = 1,
                       kernel = kernel, n = n)

    all.marg<-0
    ps<-apply(fit@probEst,1,sum)
    
    for (i in 1:n.cluster)
    {
        theta <- fit@family@invlink(fit@thetaEst[i,])
        all.marg <- all.marg+ps[i]*exp(fit@family@logDensity(empmarg$x, theta))
    }
        
    supp <- empmarg$x
    marg <- all.marg
    y <- empmarg$y
    cluster <- rep(paste("All Genes:",dim(data)[1],"genes"),length(empmarg$x))

    if (n.cluster>1) {
        for (i in 1:n.cluster) {
            curr.data<-data[clusters==i,]
        
            if (sum(curr.data>0)>100)
            {
                empmarg <- density(curr.data, bw = "nrd0", adjust = 1,
                                   kernel = kernel, n = n)
                                        #from = lims[1], to = lims[2])
                
                supp <- c(supp, empmarg$x)
                theta <- fit@family@invlink(fit@thetaEst[i,])
                marg <- c(marg,exp(fit@family@logDensity(empmarg$x, theta)))
                
                if (i>9)
                {
                    cluster<-c(cluster,rep(paste("Cluster",i,":",sum(clusters==i),"genes"),length(empmarg$x)))
                } else {
                    cluster<-c(cluster,rep(paste("Cluster ",i,":",sum(clusters==i),"genes"),length(empmarg$x)))
                }
            
                y<-c(y,empmarg$y)
            }
        }
    }

    if (missing(xlab)) xlab <- "log(expressions)"
    if (missing(ylab)) ylab <- "Density"

    xyplot(marg + y ~ supp|cluster, type = 'l', as.table=TRUE,
           allow.mult = TRUE, scales=list(y="free"),
           key = simpleKey(text = c("Marginal Density of log Expressions from Fitted Model",
                           "Empirical Kernel Density of log Expressions"),
           lines = TRUE, points = FALSE),
           xlab = xlab,
           ylab = ylab, ...)
}


plotCluster<-function (fit, data, cond = NULL, ncolors = 123, sep=TRUE, transform=NULL) 
{
    pps<-postprob(fit,data)
    clu<-pps$cluster
    pat<-pps$pattern[,2]

    pp.to.clus<-function(pps)
    {
        return(order(-pps)[1])
    }

    clus<-apply(clu,1,pp.to.clus)
    
    o<-order(clus*10+pat)
    z<-(log(data[o,]))
    
    if (!is.null(cond)) {
      o<-order(cond)
      z<-t(z[,o])
    }
    
    zr <- range(z, na.rm = TRUE)
    zmax <- max(abs(zr))
    zmin <- zr[1]

    low <- c(0, 1, 0)
    high <- c(1, 0, 0)

    zerocenter <- (zmin < 0)
    if (zerocenter) {
        zlim <- c(-zmax, zmax)
    } else {
        zlim <- c(zmin, zmax)
    }

    col <- rgb(seq(low[1], high[1], len = ncolors), seq(low[2], 
        high[2], len = ncolors), seq(low[3], high[3], len = ncolors))
    x<-1:nrow(z)
    y<-1:ncol(z)

    if (!is.null(transform)) {
      image(y,x,t(transform(z)), zlim = transform(zlim), col = col, axes = FALSE, xlab = "Replicate", ylab = "Gene")
    } else {
      image(y,x,t(z), zlim = zlim, col = col, axes = FALSE, xlab = "Replicate", ylab = "Gene")
    }
    #axis(1, at = x)
    #len<-max(10,length(x)+5)
    #axis(2, at = seq(1,floor(ncol(z)/len)*len,length=len))

    if (sep==TRUE) {
      clus<-table(clus)
    
      clusline<-0
      for (i in 1:(length(clus)-1))
      {
        clusline<-clusline+clus[i]
        abline(h=clusline,lwd=3)
      }
    
      if (!is.null(cond)) {
        cond<-table(cond)
    
        condline<-0.5
        for (i in 1:(length(cond)-1))
          {
            condline<-condline+cond[i]
            abline(v=condline)
          }
      }
    }
}

plot.ebarraysEMfit<-function(x,data,plottype="cluster",...)
{
    if (plottype=="cluster")
    {
        plotCluster(fit=x,data=data,...)
    } else if (plottype=="marginal") {
        plotMarginal(fit=x,data=data,...)
    }
}

Any scripts or data that you put into this service are public.

EBarrays documentation built on Nov. 8, 2020, 8:27 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

EBarrays
Unified Approach for Simultaneous Gene Clustering and Differential Expression Identification

R/plots.R
In EBarrays: Unified Approach for Simultaneous Gene Clustering and Differential Expression Identification

Defines functions plot.ebarraysEMfit checkVarsQQ checkModel checkCCV

Documented in checkCCV checkModel checkVarsQQ plot.ebarraysEMfit

Try the EBarrays package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

EBarrays Unified Approach for Simultaneous Gene Clustering and Differential Expression Identification

R/plots.R In EBarrays: Unified Approach for Simultaneous Gene Clustering and Differential Expression Identification

Defines functions plot.ebarraysEMfit checkVarsQQ checkModel checkCCV

Documented in checkCCV checkModel checkVarsQQ plot.ebarraysEMfit

Try the EBarrays package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

EBarrays
Unified Approach for Simultaneous Gene Clustering and Differential Expression Identification

R/plots.R
In EBarrays: Unified Approach for Simultaneous Gene Clustering and Differential Expression Identification