R/localfdr2d.R
In OCplus: Operating characteristics plus sample size and local fdr for microarray experiments

Documented in approx2d df2var dgfree DoConstrain fdr2d gausei plot.fdr2d.result rmatrix smooth2d.basic smooth2d.direct Tornadoplot Volcanoplot

##
## fdr2d.R
##
##  Core functions for computing the two-dimensional local false discovery rate:
##      fdr2d
##      approx2d
##      plot.fdr2d.result
##      Volcanoplot
##      Tornadoplot
##      DoConstrain
##      smooth2d.direct
##      smooth2d.basic
##      df2var
##      dgfree
##      rmatrix
##      gausei
##      
##
##  Alexander.Ploner@meb.ki.se 220605
##
##      090805 prepare for OCplus
##      120805 modified fdr2d
##      150805 modified gridding, added smoothing adjustment
##      010905 integarted into OCplus
##
###############################################################################

##----------------------- Computing the FDR2D ------------------------------##
## 090805

fdr2d = function(xdat, grp, test, p0, nperm=100, nr=15, seed=NULL, null=NULL, 
                 constrain = TRUE, smooth=0.2, verb=TRUE, ...)
#
# Name: fdr2d
# Desc: compute two-dimensional local fdr for t-statistics plus se
# Uses: 
# Auth: Alexander.Ploner@meb.ki.se 090805
#       based on earlier code by AP & Yudi.Pawitan@meb.ki.se 
#
# TODO:
#
# Chng: 120805 AP changed smoothing, scaling of fdr, added p0 warning
##      150805 modified gridding, added smoothing adjustment
#       031105 AP changed function name to tstatistics
#
{
    vcat = function(...) if (verb) cat(...)
    
    # The default function
    if (missing(test)) {
        test = function(...) tstatistics(..., logse=TRUE)
    }

    # some validity checks
    if (ncol(xdat)!=length(grp))
        stop("grouping variable has invalid length")
    # If null is explicitly specified, we don't need nperm or seed
    if (!is.null(null)) {
        if (!is.null(seed)) 
            warning("Parameter null overrides parameter seed - ignored\n")
    }
    # remove NAs from the expression data, grouping
    if (any(is.na(grp))) {
        Prefilt = !is.na(grp)
        xdat = xdat[, Prefilt]
        grp  = grp[Prefilt]
        vcat("Removed",length(which(!Prefilt)), "samples with missing grp\n")
    }
    ng = nrow(xdat)
    nc = ncol(xdat)
    # Degree of smoothing all right?
    if ((smooth < 0.01) | (smooth > 0.99)) {
        smooth = min(max(smooth, 0.01), 0.99)
        warning("smooth must be between 0.01 and 0.99 - reset to ",smooth)
    }
    
    # compute the original statistics
    Z = test(xdat, grp, ...)   
    
    # Breaks and such 
    xbreaks = MidBreaks.t(Z[,1], nr)
    ybreaks = MidBreaks(Z[,2], nr)
    xmid = brk2mid(xbreaks)
    ymid = brk2mid(ybreaks)       
    
    # Do the permutations: we are not efficient here in that we store them 
    # all instead of binning them immediately
    if (is.null(null)) {
        zstar = matrix(0, nrow=ng*nperm, ncol=2)
        if (!is.null(seed)) set.seed(seed)
            vcat("Starting permutations...\n")        
        for (i in 1:nperm){
            vcat("\t", i,"\n")
            perm = sample(nc)
            zstar[((i-1)*ng+1):(i*ng), 1:2] = as.matrix(test(xdat, grp[perm]))
        }
    } else { # Use pre-packaged null - not efficient either (double allocation)
        nperm = nrow(null)/ng
        if (nperm != floor(nperm)) 
            stop("Permutations not multiple of number of genes")
        zstar = null
        vcat("Using pre-computed permutations (nperm =",nperm,", seed =",
             attr(null, "seed"),")\n")
    }    

    # Table it
    cut.stat.x = cut(c(Z[,1], zstar[,1]), xbreaks, include.lowest=TRUE)
    cut.stat.y = cut(c(Z[,2], zstar[,2]), ybreaks, include.lowest=TRUE)    
    cut.null.x = cut(zstar[,1], xbreaks, include.lowest=TRUE)
    cut.null.y = cut(zstar[,2], ybreaks, include.lowest=TRUE)    

    tab.stat = table(cut.stat.x, cut.stat.y)
    tab.null = table(cut.null.x, cut.null.y)

    # Smooth & scale
    df.smooth = (1-smooth)*(nr-1)^2
    b = smooth2d.direct(tab.null, tab.stat, df=df.smooth)    
    f0fz = b$fit/((1-b$fit)*nperm)
    
    # An explicit estimate for p0: taking the maximum for a central area, 
    # comparable to 1D
    if (missing(p0)) {
        zndx = findInterval(0,xbreaks)
        nn = length(ybreaks)
        mndx = floor(nn/4):ceiling(3*nn/4)
        p0 = min(1/f0fz[zndx, mndx])
        #p0 = 1/mean(f0fz[zndx, mndx])
        p0.est = TRUE
    } else {
        p0.est = FALSE
    }
    
    # Check how sensible p0 is 
    if (p0 > 1) {
        warning("p0 estimate > 1. This may be due to oversmoothing",
                " - consider reducing smooth")
    }

    # The fdr    
    fdr  = p0*f0fz
    dimnames(fdr) = dimnames(tab.stat)
    
    # Constrain to be monotonous, if required
    if (constrain) 
        fdr = DoConstrain(fdr, xmid)
    
    # Compute the individual approximations
    ifdr = approx2d(xmid, ymid, fdr, Z[,1], Z[,2])
    
    # Output
    ret = data.frame(cbind(Z, ifdr))
    colnames(ret)[3] = "fdr.local"
    param = list(p0=p0, p0.est=p0.est, fdr=fdr, xbreaks=xbreaks, ybreaks=ybreaks) 
    attr(ret, "param") = param
    class(ret) = c("fdr2d.result","fdr.result","data.frame")
    ret
}

##------------------- Helper: approximate linearly on a 2D grid --------------##
##

approx2d = function(x,y, z, xout, yout, ...)
#
# Name: approx2d
# Desc: Compute linear approximation on a grid, reasonably but not crazily 
#       efficient; points oustide the grid are asigned to the closest grid point
# Auth: Alexander.Ploner@meb.ki.se
#
# Chng:
#
{
	n = length(x)
	m = length(y)
	no = length(xout)
	zout = rep(NA, no)
	for (i in 2:n) {
		for (j in 2:m) {	
			ndx = x[i-1]<=xout & xout<=x[i] & y[j-1]<=yout & yout<=y[j]
			lower = z[i-1,j-1] + (xout[ndx]-x[i-1])*(z[i,j-1]-z[i-1,j-1])/(x[i]-x[i-1])
			upper = z[i-1,j]   + (xout[ndx]-x[i-1])*(z[i,j]  -z[i-1,j])/(x[i]-x[i-1])
            zout[ndx] = lower  + (yout[ndx]-y[j-1])*(upper-lower)/(y[j]-y[j-1])
        }
    }
    # For the undefined guys, we take the closest grid point
    undef = which(is.na(zout))
    if (length(undef>0)) {
        gg = as.matrix(expand.grid(x, y))
        for (i in undef) {
            d2 = (xout[i]-gg[,1])^2 + (yout[i]-gg[,2])
            nn = which.min(d2)
            zout[i] = z[nn]
        }
    }
    zout
}
    
##---------------------- Plot a 2D fdr result ------------------------------##
## 220605
## 180805 added akima


plot.fdr2d.result = function(x, levels, nr.plot=20, add=FALSE, grid=FALSE,  
                             pch=".", xlab, ylab, vfont=c("sans serif", "plain"), 
                             lcol="black",  ...)
#
# Name: plot.fdr2d.result
# Desc: a S3 plotting method for fdr2d results
# Uses: akima for pretty smoothing
# Auth: Alexander.Ploner@meb.ki.se 180805
#
# Chng:
#
{
    if (missing(levels)) 
        levels = c(0.05, 0.1, 0.2, 0.3)
    if (missing(xlab))
        xlab = colnames(x)[1]
    if (missing(ylab))
        ylab = colnames(x)[2]
    p = attr(x, "param")
    if (add) {
        points(x[,1], x[,2], pch=pch, ...)
    } else {
        plot(x[,1], x[,2], xlab=xlab, ylab=ylab, pch=pch, ...)
    }        

    cfdr = list() 
    # No smoothing
    if (nr.plot<1) {
        cfdr$x = brk2mid(p$xbreaks)
        cfdr$y = brk2mid(p$ybreaks)
        cfdr$z = p$fdr
    } else {
        # Actually, not using MidBreaks.t and MidBreaks seems to please more
        xo = seq(min(x[,1]), max(x[,1]), len=nr.plot)
        yo = seq(min(x[,2]), max(x[,2]), len=nr.plot)
        cfdr = interp(x[,1], x[,2], x[,3] , xo, yo)
    }
    contour(cfdr, add=TRUE, col=lcol, levels=levels, vfont=vfont)
    if (grid) {
        abline(v=p$xbreaks, lty=3)
        abline(h=p$ybreaks, lty=3)
    }
    invisible(x)
}

##------------------ Plotting on alternative scales ------------------------##
## 090805
## 190805

Tornadoplot = function(x, levels, nr.plot=20, label=FALSE, constrain=FALSE, pch=".", 
                       xlab, ylab, vfont=c("sans serif", "plain"), lcol="black",  ...)
{
    # We check whether we really have a suitable argument
    if (!("fdr2d.result" %in% class(x)))
        stop("requires object of class fdr2d.result")
    cc = colnames(x)
    if (cc[1] != "tstat" | cc[2] != "logse")
        stop("requires fdrd2d based on tstat and logse")
    
    # Some defaults
    if (missing(levels)) 
        levels = c(0.05, 0.1, 0.2, 0.3)
    if (missing(xlab))
        xlab = "Mean Difference"
    if (missing(ylab))
        ylab = "log(Standard Error)"    
    
    # Conversion routines
    conv = function(t, logse) 
    {
        md    = t*exp(logse)
        cbind(md, logse)
    }
    lconv = function(x) 
    {
        xx = conv(x$x, x$y)
        list(level=x$level, x=xx[,1], y=xx[,2])
    }
        
    # Now go
    cc = conv(x[,1], x[,2])
    md = cc[,1]
    logse  = cc[,2]
    
    # Compute the contour lines in the original scale
    cfdr = list() 
    if (nr.plot<1) {
        p = attr(x, "param")
        cfdr$x = brk2mid(p$xbreaks)
        cfdr$y = brk2mid(p$ybreaks)
        cfdr$z = p$fdr
    } else {
        # Actually, not using MidBreaks.t and MidBreaks seems to please more
        xo = seq(min(x[,1]), max(x[,1]), len=nr.plot)
        yo = seq(min(x[,2]), max(x[,2]), len=nr.plot)
        cfdr = interp(x[,1], x[,2], x[,3] , xo, yo)
    }
    if (constrain) {
        cfdr$z = DoConstrain(cfdr$z, cfdr$x)
    }
    cl = contourLines(cfdr, levels=levels)
    cl.conv = lapply(cl, lconv)

    # Do it    
    plot(md, logse, xlab=xlab, ylab=ylab, pch=pch, ...)
    DrawContourlines(cl.conv, label=label, col=lcol, vfont=vfont)
    invisible(x)    
}

Volcanoplot = function(x, df, levels, nr.plot=20, label=FALSE, constrain=FALSE, 
                       pch=".", xlab, ylab, vfont=c("sans serif", "plain"), 
                       lcol="black",  ...)
{
    # We check whether we really have a suitable argument
    if (!("fdr2d.result" %in% class(x)))
        stop("requires object of class fdr2d.result")
    cc = colnames(x)
    if (cc[1] != "tstat" | cc[2] != "logse")
        stop("requires fdrd2d based on tstat and logse")

    # Some defaults
    if (missing(levels)) 
        levels = c(0.05, 0.1, 0.2, 0.3)
    if (missing(xlab))
        xlab = "Mean Difference"
    if (missing(ylab))
        ylab = "-log10(p-Value)"
    
    # Conversion routine
    conv = function(t, logse) 
    {
        md    = t*exp(logse)
        logp  = -log10(2*pt(-abs(t), df=df))
        logp[logp==0] = min(logp[logp!=0])
        cbind(md, logp)
    }
    lconv = function(x) 
    {
        xx = conv(x$x, x$y)
        list(level=x$level, x=xx[,1], y=xx[,2])
    }
        
    # Now go
    cc = conv(x[,1], x[,2])
    md = cc[,1]
    logp  = cc[,2]
    
    # Compute the contour lines in the original scale
    cfdr = list() 
    if (nr.plot<1) {
        p = attr(x, "param")
        cfdr$x = brk2mid(p$xbreaks)
        cfdr$y = brk2mid(p$ybreaks)
        cfdr$z = p$fdr
    } else {
        # Actually, not using MidBreaks.t and MidBreaks seems to please more
        xo = seq(min(x[,1]), max(x[,1]), len=nr.plot)
        yo = seq(min(x[,2]), max(x[,2]), len=nr.plot)
        cfdr = interp(x[,1], x[,2], x[,3] , xo, yo)
    }
    if (constrain) {
        cfdr$z = DoConstrain(cfdr$z, cfdr$x)
    }
    cl = contourLines(cfdr, levels=levels)
    cl.conv = lapply(cl, lconv)
        
    plot(md, logp, xlab=xlab, ylab=ylab, pch=".", ...)
    DrawContourlines(cl.conv, label=label, col=lcol, vfont=vfont)
    invisible(x)    
}

# Another helper function: constrain
DoConstrain = function(fdr, xmid)
{
    rhs = xmid >= 0
    lhs = xmid < 0
    for (i in 1:ncol(fdr)){
        aa = fdr[rhs,i]
        fdr[rhs,i] = cummin(aa)
        bb = rev(fdr[lhs,i])
        fdr[lhs,i] = rev(cummin(bb))
    }
    fdr
}

###############################################################################
## De smoothing section: all writen by Yudi.Pawitan@meb.ki.se, but 
##   prettified by Alexander.Ploner@meb.ki.se
###############################################################################


smooth2d.direct = function(y, n, df = 100, err=0.01, edge.count=3, mid.start=6)
{
    # Technically, we can allow different nx/ny, but note that we use the 
    # same edge/middle definition for both!
    nx = nrow(y)
    ny = ncol(y)
    # Testing: we can tolerate pretty small nx, becausd the indices 
    # will run backward
    if (nx < mid.start | edge.count > ny)
        stop("Grid size too small")
    # Implicit imputation around the corners: all zero counts are set to 
    # fdr zero
    n = ifelse(n <= 1,1,n)
    # Modify this in the lower middle: here the fdr is set to zero 
    edge = 1:edge.count
    mid  = mid.start:(nx-mid.start+1)
    y[mid,edge]= ifelse(n[mid,edge]==1, 1, y[mid,edge])

    p = y/n
    sv2 = df2var(nx,df=df)
    fit = smooth2d.basic(p,sv2=sv2,err=err) 
    list(fit=fit, df=df, sv2=sv2)
}

# tranform df to sv2
df2var = function(nx,df=nx, lower=0.0001, upper=1000)
{
    ff = function(x) dgfree(nx,x)-df
    out = uniroot(ff, lower=lower, upper=upper)
    return(out$root)
}

# degrees of freedom of basic smooth
dgfree = function(nx, sv2=1, ny=nx)
{
    imp = matrix(0,nrow=nx, ncol=ny)             ## impulse function
    imp[nx/2, ny/2] = 1
    imp.resp = smooth2d.basic(imp,sv2=sv2)  ## impulse response
    df = nx*ny*imp.resp[nx/2, ny/2]
    return(df)
}


# y is data matrix on regular grid
smooth2d.basic = function(y, sv2=0.01,se2=1,err=0.01)
{
    W = 1/se2
    nx = nrow(y)
    ny = ncol(y)
    Y = c(y)    # vectorized
    R = rmatrix(nx,ny)
    d =  R$d/sv2 + W   # diagonal elements
    xx =  R$x  # neighbors index

    ybar = mean(Y)
    B =  W*(Y-ybar)    # Z'W (Y-Xb) 
    v =  gausei(d,xx,B,sv2, err=err, maxiter=100)
    est = ybar+v
    matrix(est,ncol=ny)
}

# Index of neighbors of regular nx.ny grids, maximum nx and ny is 1000
rmatrix= function(nx,ny)
{
    xymat = matrix(0,nrow=nx, ncol=ny)
    grid = c((row(xymat)-1)*1000 + col(xymat)-1)
    neighbor= c(grid+1000,grid-1000,grid+1,grid-1)
    loc = match(neighbor,grid,0)
    loc = matrix(loc,ncol=4)
    num = c((loc>0)%*% rep(1,4))
    list(x=loc,d=num)
}

#/***************  Gauss Seidel Function  **********************/
# solving A v =B
# d contains diagonal elements of A
# x contains index of neighbours
# sv2 = sigma2v 
gausei=function(d,xx,B,sv2, err=0.01, maxiter=10)
{
    ng = length(d)                      # number of grid points
    xx[xx<1] = ng+1                     # non-neighbors = last location 
    v = c(B/(d+0.000001),0)       # rough solution, use 0 for non-neighbors
                                  # at last location
    vold = 3*v
    iter = 1
    while ( (sd(vold-v, na.rm=TRUE)/sd(v, na.rm=TRUE)  > err) & (iter< maxiter) )
    {
        vold = v
        vmat =  matrix(v[xx],ncol=4)
        v[1:ng]  = (B + c(vmat%*%rep(1,4))/sv2) /d
        iter = iter+1
    }   # end while
    v[1:ng]
}

Any scripts or data that you put into this service are public.

OCplus documentation built on Nov. 8, 2020, 5:20 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

OCplus
Operating characteristics plus sample size and local fdr for microarray experiments

R/localfdr2d.R
In OCplus: Operating characteristics plus sample size and local fdr for microarray experiments

Defines functions gausei rmatrix smooth2d.basic dgfree df2var smooth2d.direct DoConstrain Volcanoplot Tornadoplot plot.fdr2d.result approx2d fdr2d

Documented in approx2d df2var dgfree DoConstrain fdr2d gausei plot.fdr2d.result rmatrix smooth2d.basic smooth2d.direct Tornadoplot Volcanoplot

Try the OCplus package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

OCplus Operating characteristics plus sample size and local fdr for microarray experiments

R/localfdr2d.R In OCplus: Operating characteristics plus sample size and local fdr for microarray experiments

Defines functions gausei rmatrix smooth2d.basic dgfree df2var smooth2d.direct DoConstrain Volcanoplot Tornadoplot plot.fdr2d.result approx2d fdr2d

Documented in approx2d df2var dgfree DoConstrain fdr2d gausei plot.fdr2d.result rmatrix smooth2d.basic smooth2d.direct Tornadoplot Volcanoplot

Try the OCplus package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

OCplus
Operating characteristics plus sample size and local fdr for microarray experiments

R/localfdr2d.R
In OCplus: Operating characteristics plus sample size and local fdr for microarray experiments