R/edd.R
In edd: expression density diagnostics

Documented in centerScale edd eddObsolete makeCandmat.raw maxKSp mkt pmixnorm qmixnorm rmixnorm rmixnorm.alt

pmixnorm <- function(x,p1,m1,s1,m2,s2)
   p1 * pnorm(x,m1,s1) + (1-p1) * pnorm(x, m2, s2)
qmixnorm <- function(pct,p1,m1,s1,m2,s2)
   sapply(pct, function(z) optimize(function(x)
     (pmixnorm(x,p1,m1,s1,m2,s2)-z)^2,c(-20,20))$minimum)
rmixnorm <- function(n,p1,m1,s1,m2,s2)
     qmixnorm(runif(n),p1,m1,s1,m2,s2)
dmixnorm <- function (x, p1, m1, s1, m2, s2)
{
    if (p1 >= 1)
        stop("mixture fraction must be less than 1")
    p1 * dnorm(x, m1, s1) + (1 - p1) * dnorm(x, m2, s2)
}


makeCandmat.theor <- function (nPerRow, distList, pctiles=
  ppoints(nPerRow))
{
#
# each row is the sorted rescaledQuantiles of the
# 9 reference distributions N01, chisq1 t3 ln01, u01, mix1, mix2, beta2,8, beta8,2
#
    rescaledQuantiles <- function(eddist, pctiles) {
        (qfun(eddist)(pctiles) - med(eddist))/Mad(eddist)
    }
    t(sapply(distList, rescaledQuantiles, pctiles ))
}

eddObsolete <- function(eset, 
    ref=c("multiCand","uniCand","test","nnet")[1], k=10, l=6,
    nnsize=6, nniter=200)
 {
#
# oddities of this function
#  1) it uses the flatQQnorm transformation for data when
#     it should suffice to just use the edf
#  2) it has odd defaults and some hardcoded options inside
#  3) the logic is strange
#
 require(class)
 require(nnet)
 nsamp <- ncol(exprs(eset))
 if (ref=="multiCand" || ref=="nnet") 
      {
      refmat <- makeCandmat.raw(nPerRow=nsamp,nRowPerCand=100)
      refmat <- t(apply(refmat,1,centerScale))
      fq.ref <- fq.matrows(refmat)
      if (ref == "nnet")
        {
        Y <- row.names(fq.ref)
        row.names(fq.ref) <- NULL ##so we don't get a warning next
        dfcx <- data.frame(Y=Y,fq.ref)
        refnet <- nnet(Y~.,data=dfcx,size=nnsize,iter=nniter, decay=0.01)
        }
      }
 else if (ref=="uniCand")
      fq.ref <- makeCandmat.theor(nPerRow=nsamp)
 else if (ref=="test")
      return( t(apply(exprs(eset), 1, function(x)maxKSp(centerScale(x)))) )
 fq.test <- fq.matrows(t(apply(exprs(eset),1,centerScale)))
 if (ref == "nnet")
   return(predict(refnet,newdata=data.frame(fq.test),type="class"))
 else knn( train=fq.ref, cl=row.names(fq.ref), test=fq.test, k=k, l=l )
 }

edd <- function( eset, distList=eddDistList, tx=c(sort, flatQQNormY)[[1]],
  refDist=c("multiSim", "theoretical")[1],
  method=c("knn", "nnet", "test")[1], nRowPerCand=100, ...) {
    nsamp <- ncol(exprs(eset))
    testSet <- data.frame(t(apply(exprs(eset),1,function(x)tx(centerScale(x)))))
    if (method=="test") return(t(apply(testSet,1,maxKSp, dists=distList, ...)))
    if (refDist == "multiSim")
      ref <- makeCandmat.raw(nPerRow=nsamp, nRowPerCand=nRowPerCand,
                             dists=distList)
    else
      ref <- makeCandmat.theor(nPerRow=nsamp, distList=distList)
    ref <- t(apply(ref,1,tx))
    if (method == "knn")
      ans <- knn( train=ref, cl=row.names(ref), 
		test=testSet, ...)
    else
        {
        Y <- row.names(ref)
	dimnames(ref) <- list(NULL,NULL)
        dfcx <- data.frame(Y=Y,ref)
        names(dfcx) <- c("Y",names(testSet))
        refnet <- nnet(Y~.,data=dfcx,...)
	ans <- predict(refnet, newdata=testSet, type="class")
        }
    ans
}
	
			  

mkt <- function() {
# a test matrix with samples from some of the reference dists
test <- matrix(NA,nr=140,nc=50)
test[1:20,] <- rnorm(1000)
test[21:40,] <- rchisq(1000,1)
test[41:60,] <- rt(1000,3)
test[61:80,] <- rlnorm(1000)
test[81:100,] <- runif(1000)
test[101:120,] <- rmixnorm(1000,.75,0,1,4,1)
test[121:140,] <- rbeta(1000,2,8)
test
}

# labels for the test matrix entries
testcl <- c(rep("n01",20), rep("csq1",20), rep("t3",20), 
      rep("ln",20), rep("u",20), rep("mix1",20),rep("b28",20))

"fq.matrows" <-
function(mat)
 t(apply(mat,1,function(x,...){tmp <- flatQQNorm(x); tmp$y[order(tmp$x)]}))

#"ctr" <-
#function(x) (x-median(x))/mad(x)

"flatQQNorm" <-
function (y)
{
    n <- length(y)
    x <- qnorm(ppoints(n))[order(order(y))]
    y <- y - x
    list(x = x, y = y)
}
"flatQQNormY" <- function(x) flatQQNorm(x)$y

#"matbox.strat" <-
#function (x, strat, ...) 
#{
## split a matrix by strat, then plot boxplots
## for each strat (separate boxplots within panel for each stratum)
#    spar <- par()
#    p <- ncol(x)
#    m <- list()
#    cl <- strat
#    cltags <- unique(cl)
#    ncl <- length(cltags)
#    for (i in 1:ncl) {
#        m[[i]] <- list()
#        for (j in 2:p) m[[i]][[j - 1]] <- x[cl == cltags[i], 
#            j]
#        n <- length(m[[i]][[1]])
#        boxplot(m[[i]], main = paste("class", cltags[i], "size=", 
#            n), ...)
#    }
#    par(spar)
#    invisible(0)
#}

centerScale <- function(x) (x-median(x))/mad(x)

rmixnorm.alt <- function(n,p1,m1,s1,m2,s2) 
  c( rnorm(floor(p1*n),m1,s1), rnorm(ceiling((1-p1)*n),m2,s2) )

pmixnorm <- function(x,p1,m1,s1,m2,s2)  {
  if (p1 >= 1) stop("mixture fraction must be less than 1")
  p1*pnorm(x,m1,s1)+(1-p1)*pnorm(x,m2,s2)
  }

#
# maxKSp is a good illustration of what should be  done
# generically to compare a vector to a collection of
# distributional models
#
# in this setting we have 3 possible comparisons
# a and b treat edf(x) as an n-dimensional multivariate datum
# and use multivariate classification methods like knn or nnet
#  a) compare edf(x) to theoretical edf 
#  b) compare edf(x) to edfs of simulated 
#            realizations from theoretical dists
#  c) compare edf(x) to theoretical edf using KS test
#
# problem: to specify conveniently and flexibly the
# various theoretical edfs of interest so that the user
# can add new ones or omit some in the default collection
#
# for maxKSp we need to be able to do do.call("ks.test", list
# (x, pfun, parms) ) and we may have to rescale x to match
# the median and mad for pfun
#
# for makeCandmat.raw we need to do do.call(gen,parmlist)
#
# for makeCandmat.theor we need to evaluate quantiles at
# selected probabilities
#
# thus an eddDist object would have a character stub
# for the name (e.g., "norm" or "t") from which the
# generator, quantile function or cdf can be derived
# by pasting, a numeric parameter vector assumed to
# be conformable for the distribution in hand, numerical
# precomputed values of median and mad
#
setClass("eddDist", representation(stub="character",
		parms="numeric", median="numeric", mad="numeric",
		tag="character", plotlim="numeric", latexTag="character"))
setGeneric("stub", function(x)standardGeneric("stub"))
setMethod("stub", "eddDist", function(x)x@stub)
setGeneric("plotlim", function(x)standardGeneric("plotlim"))
setMethod("plotlim", "eddDist", function(x)x@plotlim)
setGeneric("tag", function(x)standardGeneric("tag"))
setMethod("tag", "eddDist", function(x)x@tag)
setGeneric("latexTag", function(x)standardGeneric("latexTag"))
setMethod("latexTag", "eddDist", function(x)x@latexTag)
setGeneric("parms", function(x)standardGeneric("parms"))
setMethod("parms", "eddDist", function(x)x@parms)
setGeneric("med", function(x)standardGeneric("med"))
setMethod("med", "eddDist", function(x)x@median)
setGeneric("Mad", function(x)standardGeneric("Mad"))
setMethod("Mad", "eddDist", function(x)x@mad)
setGeneric("CDFname", function(x)standardGeneric("CDFname"))
setMethod("CDFname", "eddDist", function(x) paste("p",stub(x),sep=""))
setGeneric("qfName", function(x)standardGeneric("qfName"))
setMethod("qfName", "eddDist", function(x) paste("q",stub(x),sep=""))
setGeneric("qfun", function(e)standardGeneric("qfun"))
setMethod("qfun", "eddDist", function(e)  {
  argl <- list()
  pn <- names(parms(e))
  for (i in 1:length(parms(e)))
     argl[[pn[i]]] <- parms(e)[i]
# following assumes arg to quantile func is always named p
  function(x) { argl[["p"]] <- x; do.call(qfName(e),argl) }
})
setGeneric("genName", function(x)standardGeneric("genName"))
setMethod("genName", "eddDist", function(x) paste("r",stub(x),sep=""))
setGeneric("testVec", function(x,eddd,is.centered)standardGeneric("testVec"))
setMethod("testVec", c("numeric","eddDist","logical"), 
  function(x,eddd,is.centered) {
    if (is.centered) x <- Mad(eddd)*x + med(eddd)
    argl <- list(x, CDFname(eddd))
    if (length(Parms <- parms(eddd))>0)
       for (j in 1:length(Parms)) argl[[2+j]] <- Parms[j]
 oldopt = options()
 options(warn=-1)
 on.exit(options(oldopt))
    do.call(ks.test, argl)
})

N01 <- new("eddDist", stub="norm", parms=c(mean=0,sd=1), 
  median=0, mad=1, tag="N(0,1)", plotlim=c(-3,3),
  latexTag="$\\Phi$")
CS1 <- new("eddDist", stub="chisq", parms=c(df=1), median=.46, mad=.6,
 tag="X^2(1)", plotlim=c(0.001,6), latexTag="$\\chi^2_1$")
T3 <- new("eddDist", stub="t", parms=c(df=3), median=.0, mad=1.15,
 tag="t(3)", plotlim=c(-4,4), latexTag="$t_3$")
LN01 <- new("eddDist", stub="lnorm", parms=c(meanlog=0,sdlog=1), 
   median=1, mad=.88, tag="logN(0,1)", plotlim=c(0,5),latexTag="$LN_{0,1}$")
U01 <- new("eddDist", stub="unif", parms=c(min=0,max=1), median=.5, mad=.37,
 tag="U(0,1)", plotlim=c(-.1,1.1), latexTag="$U_{0,1}$")
MIXN1 <- new("eddDist", stub="mixnorm", parms=c(p1=.75,m1=0,s1=1,m2=4,s2=1),
     median=.42, mad=1.57, tag=".75N(0,1)+.25N(4,1)", plotlim=c(-3,7),
	latexTag="$\\frac{3}{4}\\Phi+\\frac{1}{4}\\Phi_{4,1}$")
MIXN2 <- new("eddDist", stub="mixnorm", parms=c(p1=.25,m1=0,s1=1,
     m2=4,s2=1),
     median=3.58, mad=1.57, tag=".25N(0,1)+.75N(4,1)", plotlim=c(-3,7),
	latexTag="$\\frac{1}{4}\\Phi+\\frac{3}{4}\\Phi_{4,1}$")
B28 <- new("eddDist", stub="beta", parms=c(shape1=2,shape2=8), 
	median=.18, mad=.12, tag="B(2,8)", plotlim=c(0,1),
	latexTag="$\\beta_{2,8}$")
B82 <- new("eddDist", stub="beta", parms=c(shape1=8,shape2=2), 
	median=.82, mad=.12, tag="B(8,2)", plotlim=c(0,1),
	latexTag="$\\beta_{8,2}$")

# put these in order of similarity
eddDistList <- list(N01=N01, T3=T3, LN01=LN01, CS1=CS1, 
   B82=B82, U01=U01, B28=B28, MIXN1=MIXN1, MIXN2=MIXN2)

maxKSp <- function( x, is.centered=TRUE, dists=eddDistList, thresh=.1 ) {
 ps <- sapply(dists, function(z) testVec(x,z,is.centered)$p.value)
 if (all(ps<thresh)) return("outlier") # need to handle doubt as well
 ind <- (1:length(ps))[ps>=max(ps)][1]
 return(tag(dists[[ind]]))
}


makeCandmat.raw <- function(nPerRow=20, nRowPerCand=20, dists=
  eddDistList, centerScale=TRUE)
{
 ncand <- length(dists)
 out <- matrix(NA, nr=nRowPerCand*ncand, nc=nPerRow)
 gnames <- rep(NA,ncand)
 for (i in 1:ncand)
  {
  gnames[i] <- tag(dists[[i]])
  inds <- ((i-1)*nRowPerCand+1):(i*nRowPerCand)
  n2gen <- nRowPerCand*nPerRow
  argl <- list(n2gen)
  for (j in 1:length(pl <- parms(dists[[i]])))
      argl[[1+j]] <- pl[j]
  out[inds,] <- do.call(genName(dists[[i]]), argl)
  if (centerScale) {
    me <- med(dists[[i]])
    ma <- Mad(dists[[i]])
    out[inds,] <- (out[inds,]-me)/ma
    }
  }
 row.names(out) <- rep(gnames,each=nRowPerCand)
 out
}

Any scripts or data that you put into this service are public.

edd documentation built on May 2, 2019, 6:10 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

edd
expression density diagnostics

R/edd.R
In edd: expression density diagnostics

Defines functions pmixnorm qmixnorm rmixnorm eddObsolete edd mkt centerScale rmixnorm.alt pmixnorm maxKSp makeCandmat.raw

Documented in centerScale edd eddObsolete makeCandmat.raw maxKSp mkt pmixnorm qmixnorm rmixnorm rmixnorm.alt

Try the edd package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

edd expression density diagnostics

R/edd.R In edd: expression density diagnostics

Defines functions pmixnorm qmixnorm rmixnorm eddObsolete edd mkt centerScale rmixnorm.alt pmixnorm maxKSp makeCandmat.raw

Documented in centerScale edd eddObsolete makeCandmat.raw maxKSp mkt pmixnorm qmixnorm rmixnorm rmixnorm.alt

Try the edd package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

edd
expression density diagnostics

R/edd.R
In edd: expression density diagnostics