########################################################################################################
# This R code implements the SPLOSH method for the analysis of microarray data
# described by Pounds and Cheng (Improving False Discovery Rate Estimation - Bioinformatics 2004).
#
# Last Update: July 14, 2004
#
# Function name: splosh
# Arguments: pvals - a vector of p-values obtained by applying a statistical hypothesis
# test to the expression values for each gene individually
# LC - (optional) a list containing tuning parameters for the loess algorithm,
# see help(loess.control) for more details, defaults to S-plus defaults
# dplaces - (optional) round p-values to this many decimal places to avoid
# numerical difficulties that can arise by division by small numbers, default = 3
# notes - (optional) may pass any string describing notes regarding the analysis, default = NULL
#
# Outputs: a list with the following elements
# cfdr - a vector of the cfdr estimates at the values specified by the user in pvals
# q - a vector of corresponding monotone fdr estimates, similar to Storey's q-value
# cdf - vector of the SPLOSH cdf estimate evaluated at pvals
# fp - vector of estimated no. of false positives incurred by setting threshold
# at corresponding value of pvals
# fn - vector of estimated no. of false negatives incurred by setting threshold
# at corresponding value of pvals
# toterr - vector giving the sum of fp and fn for each entry
# ebp - the empirical Bayes posterior that the null hypothesis is true, see Efron et al. (JASA 2001)
# pi - estimate of the mixing parameter giving the probability of the null hypothesis
# ord - a vector giving index numbers to order results according to ascending p-values
# dplaces - echoes value of dplaces used in algorithm
# LC - echoes value of LC used in algorithm
# fdr.method - gives bibliographic reference for the method
# notes - returns notes regarding the analysis
###########################################################################################################
splosh<-function(pvals,LC=loess.control(),dplaces=3,notes=NULL)
{
notes<-c(notes,"SPLOSH used to estimate FDR.")
x<-round(pvals,dplaces) # Prevent numerical difficulties
n<-length(pvals) # Obtain no. of points
r<-rank(x) # rank observations
ux<-sort(unique(x)) # ordered unique p-values
ur<-(sort(unique(r)))-.5 # ordered unique ranks
#plot(ux,ur)
if (max(ux)<1) # edge adjustments
{
ux<-c(ux,1)
ur<-c(ur,n)
}
if (min(ux)>0)
{
ux<-c(0,ux)
ur<-c(0,ur)
}
nux<-length(ux) # No. of unique points
ur<-ur/n # Divide ranks by n
dx<-diff(ux) # spacings of unique p-values
dr<-diff(ur) # spacings of unique ranks
dFdx<-exp(log(dr)-log(dx)) # spacing-interval slopes
mr<-(ur[1:(nux-1)]+ur[2:nux])/(2) # Unitized Medians of Unique Rank Intervals
mx<-(ux[1:(nux-1)]+ux[2:nux])/2 # Unitized Medians of Unique p-value Intervals
tr<-asin(2*(mr-.5)) # Arc-Sine Transform Rank Midpoints
fit<-loess(log(dFdx)~tr,loess.control=LC) # Lowess of log-transformed EDQF and transformed rank-midpoints
tr2<-asin(2*(ur-.5)) # Arc-Sine Transform Unique
yhat<-(predict(fit,tr2)) # Obtain Estimated Derivative of CDF (up to constant multiplier)
yhat[c(1,nux)]<-approx(tr2[2:(nux-1)],
yhat[2:(nux-1)],
xout=tr2[c(1,nux)],
rule=3)$y # Extrapolate to get endpoint estimates
pdf1<-exp(yhat) # Obtain PDF up to a constant
trap<-0.5*(pdf1[1:(nux-1)]+pdf1[2:nux])*dx # Trapezoid rule terms
const<-sum(trap) # Constant via trapezoid rule
pdf2<-pdf1/const # Adjust pdf by constant
pdf<-approx(ux,pdf2,xout=pvals,rule=3)$y # Obtain pdf estimate for p-values
cdf<-approx(ux[2:nux],
cumsum(trap),
xout=pvals,
rule=2)$y/const # Obtain CDF
pi<-min(pdf) # Take pi as min of PDF
p.pi<-max(pvals[pdf==pi]) # largest p-value at which pi occurs
cdf.pi<-max(cdf[pvals==p.pi]) # CDF evaluated at p.pi
cfdr<-pi*pvals/cdf # Estimate cFDR for p-values
ebp<-pi/pdf # Estimate EBP of null for p-values
cfdr[pvals==0]<-ebp[pvals==0] # L'Hospital's Rule
cfdr[cfdr>1]<-1 # Set max(fdr)=1
qval<-cfdr # Initialize q-value vector
ordering<-order(pvals) # Report Results
qval[rev(ordering)]<-cummin(qval[rev(ordering)]) # Compute q-value
fp<-n*cfdr*cdf # Compute Estimated No. False Positives
fn<-n*(cdf.pi-cdf-pi*(p.pi-pvals))*(pvals<=p.pi) # Compute Estimated No. False Negatives
toterr<-fp+fn # Compute Estimated Total No. Errors
return(list(cfdr=matrix(cfdr,n,1), # cfdr estimate
q=matrix(qval,n,1), # q-value estimate
cdf=matrix(cdf,n,1), # cdf estimate
fp=matrix(fp,n,1), # estimate of no. of false postives
fn=matrix(fn,n,1), # estimate of no. of false negatives
toterr=matrix(toterr,n,1), # estimated total no. of errors
pdf=matrix(pdf,n,1), # estimate of pdf
ebp=matrix(ebp,n,1), # estimate of ebp
pi=pi, # estimate of null proportion
p.pi=p.pi, # p-value at which estimated pdf achieves minimum
cdf.pi=cdf.pi, # cdf evaluated at p.pi
ord=ordering, # ordering vector
dplaces=dplaces, # rounded to this no. of decimal places
LC=LC, # loess.control options used
fdr.method="Pounds and Cheng (2004)", # Bibliographic Information
notes=notes)) # Notes regarding analysis
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.