R/fdr-library-from-stjuderesearch.R

Defines functions VDS.unif.est.Qknots bsmat Bspline EDF Ch04.1 inv.dbum find.EB.threshold find.FDR.threshold bum.weighted.error bum.true.negative bum.true.positive bum.false.negative bum.false.positive bum.FDR bum.emp.post ext.pi bum.histogram qqbum bum.mle neg.bum.logL inv.logit logit bum.logL qalt palt dalt rbum qbum pbum dbum bum.1 splosh.1 storey.1 BH95.1 Ch04 bum BY01 BH95 storey splosh default.opts fdr.table fdr.plots compute.fdr

##########################################################################
# Function: compute.fdr
# Purpose: Apply the requested fdr method to the set(s) of p-values provided
# Arguments: p - a vector or matrix of p-values
#            opts - a list with the following components
#                   LC = loess.control options for SPLOSH
#                   dplaces = pre-rounding option for SPLOSH
#                   lam = lambda for Storey's q-value
#                   robust = robust option for Storey's q-value
#                   adj.eps = epsilon adjustment for BUM to avoid applying MLE to p-values with 0
#                   n.adj = adjustment factor for BUM
#            notes - a vector of strings giving some notes about the analysis
# Returns: a list with the following components:
#          p - the set of p-values supplied to the function
#          fdr - an estimate of the ratio of the number of false positives to the number of results with lesser or equal p-value
#          q - the q-value or fdr-adjusted p-value
#          cdf - the estimate of the expected proportion of tests with lesser or equal p-value
#          ord - gives the value of indices to order results by ascending p-value
#          pi - estimate of the proportion of tests with a true null hypothesis (n.a. for BH95 & BY01)
#          fp - estimate of the total number of false positives among results with equal or lesser p-value
#          fn - estimate of the total number of false negatives among results with larger p-value
#          toterr - sum of fp and fn
#          fdr.method - indicates which method was used
#          notes - appends string description of method used to notes argument supplied
#########################################################################

compute.fdr<-function(p,
                      fdr.method="PC04",
                      opts=default.opts(fdr.method),
                      notes=NULL
                     )

{
	res<-"Choose a valid fdr.method"
	if (fdr.method=="PC04")    res<-splosh(p,opts$LC,opts$dplaces,notes)
	if (fdr.method=="St02")    res<-storey(p,opts$lam,opts$robust,notes)
	if (fdr.method=="PM03")    res<-bum(p,opts$adj.eps,opts$n.adj,notes)
	if (fdr.method=="BH95")    res<-BH95(p,notes)
	if (fdr.method=="Ch04")    res<-Ch04(p,opts$qpos,notes)
	if (fdr.method=="BY01")    res<-BY01(p,notes)
	return(res)
}

#############################################################
# Function: fdr.plots
# Purpose: Produce diagnostic and illustrative plots for fdr analysis
# Arguments: fdrobj - object returned from compute.fdr
# Returns: NULL
#############################################################

fdr.plots<-function(fdrobj,alpha=1,sub.labels="",main="")

{
	if(is.vector(fdrobj$p)) fdrobj$p<-matrix(fdrobj$p,length(fdrobj$p),1)
	h<-dim(fdrobj$p)[2]
	g<-dim(fdrobj$p)[1]
	
	if (length(main)==1) main<-rep(main,h)
	if (length(sub.labels)==1) 
	{
		if ((sub.labels=="")&&(h>1)) sub.labels<-paste("Null Hyp.",1:h)
		sub.labels<-rep(sub.labels,h)
	}
	nscreen<-3+2*(!is.element(fdrobj$fdr.method,c("BH95","BY01","St02")))+
                 any(!c(is.null(fdrobj$fp),is.null(fdrobj$fn),is.null(fdrobj$toterr)))
        split.screen(c(h,nscreen))
	k<-0
	for (i in 1:h)
	{
		# p-value histogram
		k<-k+1
                screen(k)
		hist(fdrobj$p[,i],prob=T,xlab="p-value",ylab="density",main=main[i],sub=sub.labels[i],col=16)
		if (!is.null(fdrobj$pdf)) 
		{
			H<-hist(fdrobj$p[,i],prob=T,plot=F)
			x<-fdrobj$p[fdrobj$ord[,i],i]
			y<-fdrobj$pdf[fdrobj$ord[,i],i]
			inc<-y<max(H$counts)
			lines(x[inc],y[inc],lwd=5)
		}		
		if (!is.null(fdrobj$pi)) lines(c(0,1),rep(fdrobj$pi[i],2),lwd=5)

		inc<-(fdrobj$p[,i]<=alpha)
		g.alpha<-sum(inc)
		ord.inc<-1:g.alpha
		# P-P plots
		if (!is.element(fdrobj$fdr.method,c("BH95","BY01","St02")))
		{
			k<-k+1
			screen(k)
			plot(ord.inc/g,fdrobj$cdf[fdrobj$ord[ord.inc,i],i],
			     xlab="Empirical p-value CDF",
			     ylab="Estimated p-value CDF",
			     main=main[i],sub=sub.labels[i],type="l")
			lines(c(0,max(g.alpha/g,fdrobj$cdf[fdrobj$ord[ord.inc,i],i])),c(0,max(g.alpha/g,fdrobj$cdf[fdrobj$ord[ord.inc,i],i])))			
			k<-k+1
			screen(k)
			plot(fdrobj$p[fdrobj$ord[ord.inc,i],i],
			     fdrobj$cdf[fdrobj$ord[ord.inc,i],i]-ord.inc/g,
			     xlab="p-value",ylab="p-value CDF Estimate - p-value EDF",
			     main=main[i],sub=sub.labels[i])
			lines(c(0,1),c(0,0))
		}
		
		k<-k+1
		screen(k)
		plot((1:g.alpha)/g,fdrobj$fdr[fdrobj$ord[inc,i],i],
		     xlab="Proportion of Tests Declared Significant",
		     ylab="Raw Empirical FDR",type="p",main=main[i],sub=sub.labels[i])

		k<-k+1
		screen(k)
		plot(fdrobj$p[fdrobj$ord[ord.inc,i],i],fdrobj$q[fdrobj$ord[ord.inc,i],i],
		     type="l",xlab="p-value",ylab="q",sub=sub.labels[i],main=main[i])

		if (any(!c(is.null(fdrobj$fp),is.null(fdrobj$fn),is.null(fdrobj$toterr))))
		{
			r<-range(fdrobj$fp[inc,i],fdrobj$fn[inc,i],fdrobj$toterr[inc,i])
			k<-k+1
			screen(k)
			plot(c(0,alpha),r/g,xlab="p-value threshold",ylab="Error Estimates",
			     type="n",sub=sub.labels[i],main=main[i])
			if (!is.null(fdrobj$fp))     lines(fdrobj$p[fdrobj$ord[ord.inc,i],i],fdrobj$fp[fdrobj$ord[ord.inc,i],i]/g,col=3)
			if (!is.null(fdrobj$fn))     lines(fdrobj$p[fdrobj$ord[ord.inc,i],i],fdrobj$fn[fdrobj$ord[ord.inc,i],i]/g,col=2)
			if (!is.null(fdrobj$toterr)) lines(fdrobj$p[fdrobj$ord[ord.inc,i],i],fdrobj$toterr[fdrobj$ord[ord.inc,i],i]/g,col=1)
			legend(x="topleft",legend=c("FPs","FNs","TEs"),lty=1,col=3:1)
		}
	}
}

#########################################################
# Function: fdr.table
# Purpose: Write results to a tab-delimited text file (Excel will read if the filename has .xls extension)
# Arguments: fdrobj - an object returned by fdr.compute
#            outfile - string or string-vector giving output file name(s)
#            stat - vector or matrix of test statistics corresponding to p-values in fdr.object (default=NULL)
#            top - no. of features to include in file, i.e. include the top 100 (default=100)
#            sep - delimiter for output file(s) (default = "\t", i.e. tab)
#            type - type of table: "abb" for abbreviated or "full" for complete
#            features - vector giving the feature names (default=NULL)
#            annotations - vector, matrix, or data.frame giving the annotations of the features (default=NULL)
# Returns: NULL
#############################################################

fdr.table<-function(fdrobj,outfile,stat=NULL,top=100,sep="\t",type="abb",features=NULL,annotations=NULL)

{
	if(is.vector(fdrobj$p)) fdrobj$p<-matrix(fdrobj$p,length(fdrobj$p),1)
	h<-dim(fdrobj$p)[2]
	g<-dim(fdrobj$p)[1]
	top<-min(top,g)
	if (length(outfile)!=h) return(paste("Your p-value matrix has",h,
		                                  "columns but you have specified",length(outfiles),"output file names.",
		                                  "Please specify",h,"output file names."))
	if (!is.null(stat))
	{
		if(is.vector(stat)) stat<-matrix(stat,length(stat),1)
		if(any(dim(stat)!=dim(fdrobj$p))) return(paste("The p-value matrix has dimension ",dim(p)[1]," by ",dim(p)[2],
			                                    ".  The stat matrix has dimension ",dim(stat)[1]," by ",dim(stat)[2],
			                                    ".  Please give matrices with equal dimension."))
		
	}
	if (is.null(features)) features<-paste("Feature",1:g)
	if (type=="abb")
	{
		for (i in 1:h)
		{
			if (!is.null(stat)) this.stat<-stat[,i]
			else this.stat<-NULL
			ord<-fdrobj$ord[,i]
			X<-cbind(features,stat=this.stat,p=fdrobj$p[,i],q=fdrobj$q[,i],annotations)
			write.table(X[ord[1:top],],outfile[i],sep=sep,row.names=FALSE)
		}
	}
	if (type=="full")
	{
		for (i in 1:h)
		{
			if (!is.null(stat)) this.stat<-stat[,i]
			else this.stat<-NULL	
			
			if (!is.null(fdrobj$fp)) this.fp<-fdrobj$fp[,i]
			else this.fp<-NULL
			
			if (!is.null(fdrobj$fn)) this.fn<-fdrobj$fn[,i]
			else this.fn<-NULL
			
			if (!is.null(fdrobj$toterr)) this.te<-fdrobj$toterr[,i]
			else this.te<-NULL
				
			if (!is.null(fdrobj$ebp)) this.ebp<-fdrobj$ebp[,i]
			else this.ebp<-NULL
				
			if (!is.null(fdrobj$pdf)) this.pdf<-fdrobj$pdf[,i]
			else this.pdf<-NULL	
			
			ord<-fdrobj$ord[,i]
			X<-cbind(features,stat=this.stat,p=fdrobj$p[,i],q=fdrobj$q[,i],fdr=fdrobj$fdr[,i],ebp=this.ebp,fp=this.fp,fn=this.fn,
			         toterr=this.te,cdf=fdrobj$cdf[,i],pdf=this.pdf,annotations)
			write.table(X[ord[1:top],],outfile[i],sep=sep,row.names=FALSE)			
		}
	}
}

default.opts<-function(fdr.method)

{
	if (fdr.method=="PC04") return(list(LC=loess.control(),dplaces=4))
	if (fdr.method=="St02") return(list(lam=NULL,robust=F))
	if (fdr.method=="PM03")    return(list(adj.eps=0.05,n.adj=10000))
	if (fdr.method=="Ch04")    return(list(qpos=c(0.00625,0.01,0.0125,0.025,0.05,0.1,0.25,0.5,0.75)))
}

splosh<-function(p,LC=loess.control(),dplaces=6,notes=NULL)

{
	if (is.vector(p)) p<-matrix(p,length(p),1)
	h<-dim(p)[2]
	g<-dim(p)[1]
	fdr<-q<-cdf<-fp<-fn<-toterr<-pdf<-ebp<-ord<-matrix(0,g,h)
	pi<-p.pi<-rep(0,h)
	for (i in 1:h)
	{
		res<-splosh.1(p[,i],LC,dplaces,notes)
		fdr[,i]<-res$fdr
		q[,i]<-res$q
		cdf[,i]<-res$cdf
		fp[,i]<-res$fp
		fn[,i]<-res$fn
		toterr[,i]<-res$toterr
		pdf[,i]<-res$pdf
		ebp[,i]<-res$ebp
		ord[,i]<-res$ord
		pi[i]<-res$pi
		p.pi[i]<-res$p.pi
	}
	return(list(p=p,fdr=fdr,q=q,cdf=cdf,fp=fp,fn=fn,toterr=toterr,pdf=pdf,ebp=ebp,ord=ord,pi=pi,p.pi=p.pi,
	            dplaces=dplaces,                       # rounded to this no. of decimal places
	            LC=LC,                                 # loess.control options used
	            fdr.method="PC04",
	            notes=c(notes,res$notes)))
	            
}

storey<-function(p,lam=NULL,robust=F,notes=NULL)

{
	if (is.vector(p)) p<-matrix(p,length(p),1)
	h<-dim(p)[2]
	g<-dim(p)[1]
	fdr<-q<-cdf<-fp<-fn<-toterr<-ord<-matrix(0,g,h)
	pi<-rep(0,h)
	for (i in 1:h)
	{
		res<-storey.1(p[,i],lam,robust,notes)
		fdr[,i]<-res$fdr
		q[,i]<-res$q
		cdf[,i]<-res$cdf
		fp[,i]<-res$fp
		fn[,i]<-res$fn
		toterr[,i]<-res$toterr
		ord[,i]<-res$ord
		pi[i]<-res$pi
	}
	return(list(p=p,fdr=fdr,q=q,cdf=cdf,ord=ord,pi=pi,
	            lam=lam,                       
	            robust=robust,                                 
	            fdr.method="St02",
	            notes=c(notes,res$notes)))
	            
}

BH95<-function(p,notes=NULL)

{
	if (is.vector(p)) p<-matrix(p,length(p),1)
	h<-dim(p)[2]
	g<-dim(p)[1]
	fdr<-q<-cdf<-fp<-fn<-toterr<-ord<-matrix(0,g,h)
	pi<-rep(0,h)
	for (i in 1:h)
	{
		res<-BH95.1(p[,i])
		fdr[,i]<-res$fdr
		q[,i]<-res$q
		cdf[,i]<-res$cdf
		fp[,i]<-res$fp
		fn[,i]<-res$fn
		toterr[,i]<-res$toterr
		ord[,i]<-res$ord
		pi[i]<-res$pi
	}
	return(list(p=p,fdr=fdr,q=q,cdf=cdf,ord=ord,
	            fdr.method="BH95",
	            notes=c(notes,res$notes)))
}

BY01<-function(p,notes=NULL)

{
	if (is.vector(p)) p<-matrix(p,length(p),1)
	g<-dim(p)[1]
	h<-dim(p)[2]
	const<-sum(1/(1:g))
	res95<-BH95(p,notes)
	fdr<-const*res95$fdr
	q<-const*res95$q
	fdr[fdr>1]<-1
	q[q>1]<-1
	pi<-rep(const,h)
	cdf<-res95$cdf
	ord<-res95$ord
	return(list(p=p,fdr=fdr,q=q,cdf=cdf,ord=ord,
	            fdr.method="BY01",
	            notes=c("Benjamini and Yekutieli (2001) Used to Control FDR.",notes)))
}

bum<-function(p,adj.eps=0.5,n.adj=length(p),notes=NULL)

{
	if (is.vector(p)) p<-matrix(p,length(p),1)
	h<-dim(p)[2]
	g<-dim(p)[1]
	fdr<-q<-cdf<-fp<-fn<-toterr<-pdf<-ebp<-ord<-matrix(0,g,h)
	a<-lambda<-pi<-rep(0,h)
	for (i in 1:h)
	{
		res<-bum.1(p[,i],adj.eps,n.adj,notes)
		fdr[,i]<-res$fdr
		q[,i]<-res$q
		cdf[,i]<-res$cdf
		fp[,i]<-res$fp
		fn[,i]<-res$fn
		toterr[,i]<-res$toterr
		pdf[,i]<-res$pdf
		ebp[,i]<-res$ebp
		ord[,i]<-res$ord
		pi[i]<-res$pi
		a[i]<-res$a
		lambda[i]<-res$lambda
	}
	return(list(p=p,fdr=fdr,q=q,cdf=cdf,fp=fp,fn=fn,toterr=toterr,pdf=pdf,ebp=ebp,ord=ord,pi=pi,a=a,lambda=lambda,
	            adj.eps=adj.eps,                       # rounded to this no. of decimal places
	            n.adj=n.adj,                                 # loess.control options used
	            fdr.method="PM02",
	            notes=c(notes,res$notes)))
	            
}

Ch04<-function(p,qpos=c(0.00625,0.01,0.0125,0.025,0.05,0.1,0.25,0.5,0.75),notes=NULL)

{
	if (is.vector(p)) p<-matrix(p,length(p),1)
	h<-dim(p)[2]
	g<-dim(p)[1]
	fdr<-q<-cdf<-pdf<-ord<-matrix(0,g,h)
	pi<-rep(NA,h)
	for (i in 1:h)
	{
		res<-Ch04.1(p[,i],qpos)
		fdr[,i]<-res$fdr
		q[,i]<-res$q
		cdf[,i]<-res$cdf
		pdf[,i]<-res$pdf
		ord[,i]<-res$ord
		pi[i]<-res$pi
	}
	return(list(p=p,fdr=fdr,q=q,cdf=cdf,pdf=pdf,pi=pi,
	            fdr.method="Ch04",notes=c(notes,res$notes)))
}



#####################################
# Function: BH95
# Purpose: Compute quantity that Benjamini and Hochberg use to control the FDR
#####################################

BH95.1<-function(p,                  # p-value vector
                 notes=NULL)         # vector of notes gathered to date

{
	m<-length(p)                   # number of genes examined in analysis
	u<-order(p)                    # vector to order results by ascending p-values
	v<-rank(p)                     # rank of p-values
	cdf<-v/m                       # empirical distribution function used to estimate cdf
	fdr<-p/cdf                     # quantity used to control fdr
	fdr[fdr>1]<-1                  # any larger than 1, assign 1

	q<-fdr                         # initialize q-value vector
	q[u[m]]<-min(fdr[u[m]],1)      # get the ball rolling
	q[rev(u)]<-cummin(q[rev(u)])   # find q-value
	fp<-m*cdf*fdr                  # estimated false positive count, very conservative
	fn<-(1-cdf-(1-p))*m            # estimated false negative count, so conservative often near 0
	fn[fn<0]<-0                    # set false negative count to 0 if less than 0
	toterr<-fp+fn                  # compute total error criterion
	# return results
	return(list(fdr=matrix(fdr,m,1),           # fdr estimate
	            q=matrix(q,m,1),               # q-value estimate               
	            cdf=matrix(cdf,m,1),           # cdf estimate
	            fp=matrix(fp,m,1),             # estimated no. of false positives incurred
	            fn=matrix(fn,m,1),             # estimated no. of false negatives incurred
	            toterr=matrix(toterr,m,1),     # total estimated no. of errors
	            ord=u,                         # ordering vector
	            pi=1,                          # implied estimate of null proportion
	            fdr.method="Benjamini and Hochberg (1995)",
	            notes=c(notes,"Benjamini and Hochberg 1995 Used to Estimate FDR.")
	           ))
}

###########################################################
# storey
# computes FDR and q-values according to Storey's method
###########################################################

storey.1<-function(
                 p,            # vector of p-values
                 lam=NULL,     # smoothing parameter for estimating pi
                 robust=F,     # indicates whether to compute robust q-values
                 notes=NULL    # notes or comments on analysis
                )

{
    #This is just some pre-processing
    m <- length(p)
    notes<-c(notes,"Storey's method used to estimate FDR.")
    #These next few functions are the various ways to automatically choose lam
    #and estimate pi0
    if(!is.null(lam)) {
        pi0 <- mean(p>lam)/(1-lam)
        pi0 <- min(pi0,1)
        notes <- c(notes,"The user prespecified lam in the calculation of pi0.")
    }
    else{
        notes <- c(notes,"A smoothing method was used in the calculation of pi0.")
        lam <- seq(0,0.95,0.01)
        pi0 <- rep(0,length(lam))
        for(i in 1:length(lam)) {
        pi0[i] <- mean(p>lam[i])/(1-lam[i])
        }
        spi0 <- smooth.spline(lam,pi0,df=3,w=(1-lam))
        pi0 <- predict(spi0,x=0.95)$y
        pi0 <- min(pi0,1)
    }

    #The q-values are actually calculated here
    u <- order(p)
    v <- rank(p)
    fdr <- pi0*m*p/v
    fdr[fdr>1]<-1
    q<-fdr
    if(robust) 
    {
        q <- pi0*m*p/(v*(1-(1-p)^m))
        notes <- c(notes, "The robust version of the q-value was calculated. See Storey JD (2002) JRSS-B 64: 479-498.")
    }
    q[u[m]] <- min(q[u[m]],1)
    q[rev(u)]<-cummin(q[rev(u)])
    cdf<-v/m
    fp<-m*cdf*fdr
    fn<-(1-cdf-pi0*(1-p))*m
    fn[fn<0]<-0

   return(list(
               fdr=matrix(fdr,m,1),         # FDR estimate
               q=matrix(q,m,1),             # q-values
               cdf=matrix(cdf,m,1),         # CDF estimate (EDF)
               fp=matrix(fp,m,1),           # Est. No. False Positives
               fn=matrix(fn,m,1),           # Est. No. False Negatives
               toterr=matrix(fp+fn,m,1),    # Total of FP and FN
               ord=u,                       # ordering vector, places in order of increasing p-values
               pi=pi0,                      # pi - null proportion

               lam=lam,                     # lam - smoothing parameter for estimating pi
               robust=robust,               # robust q-values or not
               fdr.method="Storey (2002), Storey and Tibshirani (2003)",

               notes=notes      # notes regarding analysis
              ))
}
########################################################################################################
# This S-plus code implements the SPLOSH method for the analysis of microarray data
# described by Pounds and Cheng (Improving False Discovery Rate Estimation - Bioinformatics 2004).
#
# Last Update: July 14, 2004
# 
# Function name: splosh
# Arguments: pvals - a vector of p-values obtained by applying a statistical hypothesis
#                    test to the expression values for each gene individually
#            LC - (optional) a list containing tuning parameters for the loess algorithm, 
#                 see help(loess.control) for more details, defaults to S-plus defaults
#            dplaces - (optional) round p-values to this many decimal places to avoid
#                      numerical difficulties that can arise by division by small numbers, default = 6
#            notes - (optional) may pass any string describing notes regarding the analysis, default = NULL
#
# Outputs: a list with the following elements
#         fdr - a vector of the cfdr estimates at the values specified by the user in pvals
#         q - a vector of corresponding monotone fdr estimates, similar to Storey's q-value
#         cdf - vector of the SPLOSH cdf estimate evaluated at pvals
#         fp - vector of estimated no. of false positives incurred by setting threshold 
#              at corresponding value of pvals
#         fn - vector of estimated no. of false negatives incurred by setting threshold 
#              at corresponding value of pvals
#         toterr - vector giving the sum of fp and fn for each entry
#         ebp - the empirical Bayes posterior that the null hypothesis is true, see Efron et al. (JASA 2001)
#         pi - estimate of the mixing parameter giving the probability of the null hypothesis
#         ord - a vector giving index numbers to order results according to ascending p-values
#         dplaces - echoes value of dplaces used in algorithm
#         LC - echoes value of LC used in algorithm
#         fdr.method - gives bibliographic reference for the method
#         notes - returns notes regarding the analysis
###########################################################################################################

splosh.1<-function(pvals,LC=loess.control(),dplaces=3,notes=NULL)
     
{
	notes<-c(notes,"SPLOSH used to estimate FDR.")
	x<-round(pvals,dplaces)                                      # Prevent numerical difficulties
	n<-length(pvals)                                             # Obtain no. of points
	r<-rank(x)                                                   # rank observations
	ux<-sort(unique(x))                                          # ordered unique p-values
	ur<-(sort(unique(r)))-.5                                     # ordered unique ranks
	#plot(ux,ur)	
	if (max(ux)<1)                                               # edge adjustments
	{
		ux<-c(ux,1)
		ur<-c(ur,n)
	}
	if (min(ux)>0)
	{
		ux<-c(0,ux)
		ur<-c(0,ur)
	}  
	nux<-length(ux)                                              # No. of unique points
	ur<-ur/n                                                     # Divide ranks by n
	dx<-diff(ux)                                                 # spacings of unique p-values
	dr<-diff(ur)                                                 # spacings of unique ranks
	dFdx<-exp(log(dr)-log(dx))                                   # spacing-interval slopes
	mr<-(ur[1:(nux-1)]+ur[2:nux])/(2)                            # Unitized Medians of Unique Rank Intervals
	mx<-(ux[1:(nux-1)]+ux[2:nux])/2                              # Unitized Medians of Unique p-value Intervals
	tr<-asin(2*(mr-.5))                                          # Arc-Sine Transform Rank Midpoints
	fit<-loess(log(dFdx)~tr,loess.control=LC)                    # Lowess of log-transformed EDQF and transformed rank-midpoints
	tr2<-asin(2*(ur-.5))                                         # Arc-Sine Transform Unique
	yhat<-(predict(fit,tr2))                               # Obtain Estimated Derivative of CDF (up to constant multiplier)
	yhat[c(1,nux)]<-approx(tr2[2:(nux-1)],
	                       yhat[2:(nux-1)],
	                       xout=tr2[c(1,nux)],
	                       rule=3)$y                             # Extrapolate to get endpoint estimates
	pdf1<-exp(yhat)                                              # Obtain PDF up to a constant
	trap<-0.5*(pdf1[1:(nux-1)]+pdf1[2:nux])*dx                   # Trapezoid rule terms
	const<-sum(trap)                                             # Constant via trapezoid rule
	pdf2<-pdf1/const                                             # Adjust pdf by constant
	pdf<-approx(ux,pdf2,xout=pvals,rule=3)$y                     # Obtain pdf estimate for p-values
	cdf<-approx(ux,c(0,cumsum(trap)),
	            xout=pvals,rule=2)$y/const                       # Obtain cdf
	pi<-min(pdf)                                                 # Take pi as min of PDF
	p.pi<-max(pvals[pdf==pi])                                    # largest p-value at which pi occurs
	cdf.pi<-max(cdf[pvals==p.pi])                                # CDF evaluated at p.pi
	cfdr<-pi*pvals/cdf                                           # Estimate cFDR for p-values
	ebp<-pi/pdf                                                  # Estimate EBP of null for p-values
	cfdr[pvals==0]<-ebp[pvals==0]                                # L'Hospital's Rule
	cfdr[cfdr>1]<-1                                              # Set max(fdr)=1
	qval<-cfdr                                                   # Initialize q-value vector
	ordering<-order(pvals)                                       # Report Results
	qval[rev(ordering)]<-cummin(qval[rev(ordering)])             # Compute q-value
	fp<-n*cfdr*cdf                                               # Compute Estimated No. False Positives
	fn<-n*(cdf.pi-cdf-pi*(p.pi-pvals))*(pvals<=p.pi)             # Compute Estimated No. False Negatives
	toterr<-fp+fn                                                # Compute Estimated Total No. Errors
	
	return(list(fdr=matrix(cfdr,n,1),                  # cfdr estimate
	            q=matrix(qval,n,1),                    # q-value estimate
	            cdf=matrix(cdf,n,1),                   # cdf estimate
	            fp=matrix(fp,n,1),                     # estimate of no. of false postives
	            fn=matrix(fn,n,1),                     # estimate of no. of false negatives
	            toterr=matrix(toterr,n,1),             # estimated total no. of errors
	            pdf=matrix(pdf,n,1),                   # estimate of pdf
	            ebp=matrix(ebp,n,1),                   # estimate of ebp
	            pi=pi,                                 # estimate of null proportion
	            p.pi=p.pi,                             # p-value at which estimated pdf achieves minimum
	            cdf.pi=cdf.pi,                         # cdf evaluated at p.pi
	            ord=ordering,                          # ordering vector
	            dplaces=dplaces,                       # rounded to this no. of decimal places
	            LC=LC,                                 # loess.control options used
	            fdr.method="Pounds and Cheng (2004)",  # Bibliographic Information
	            notes=notes))                          # Notes regarding analysis
}

bum.1<-function(p,adj.eps=0.5,n.adj=length(p),notes=NULL)

#################################################################
# Function: bum
# Purpose: Compute FDR estimates using beta-uniform mixture model
#          proposed by Pounds and Morris (2003).
#################################################################

{
	m<-length(p)   # No. of genes
	notes<-c(notes,"Pounds and Morris (2003) BUM model used to estimate FDR.")
	# adjust p-values if necessary
	if (any(p==0)) 
	{
		p<-(n.adj*p+adj.eps)/(n.adj+2*adj.eps)
		notes<-c(notes,"The p-values were adjusted because some p-values equal zero.")
	}
	res<-bum.mle(p)                                                 # find MLE's of parameters
	pi<-ext.pi(res$a,res$lambda)                                    # determine pi from MLE's
	fdr<-bum.FDR(p,res$a,res$lambda)                                # determine fdr estimates
	q<-fdr                                                          # q-value = fdr due to implied monotonicity
	cdf<-pbum(p,res$a,res$lambda)                                   # cdf estimate
	fp<-m*bum.false.positive(p,res$a,res$lambda)                    # estimated no. of false positives
	fn<-m*bum.false.negative(p,res$a,res$lambda)                    # estimated no. of false negatives
	toterr<-fp+fn                                                   # estimated total no. of errors
	pdf<-dbum(p,res$a,res$lambda)                                   # estimated pdf
	ebp<-1-bum.emp.post(p,res$a,res$lambda)                         # estimated ebp
	# return results
	return(list(fdr=matrix(fdr,m,1),                                # fdr estimate
	            q=matrix(q,m,1),                                    # q-value estimate
	            cdf=matrix(cdf,m,1),                                # cdf estimate
	            fp=matrix(fp,m,1),                                  # estimated no. of false positives
	            fn=matrix(fn,m,1),                                  # estimated no. of false negatives
	            toterr=matrix(toterr,m,1),                          # total error
	            pdf=matrix(pdf,m,1),                                # pdf estimate
	            ebp=matrix(ebp,m,1),                                # ebp estimate
	            pi=pi,                                              # pi estimate (null proportion)
	            ord=order(p),                                       # ordering vector
	            a=res$a,                                            # MLE of BUM parameter a
	            lambda=res$lambda,                                  # MLE of BUM parameter lambda
	            fdr.method="Pounds and Morris (2003)",              # method used to estimate FDR
	            notes=notes))
}


##########################################################################
# Function: dbum
# Purpose: Compute the pdf of the bum distribution
# Arguments: x - point or vector of points at which to compute pdf
#            a - shape parameter of beta component
#            lambda - mixture parameter, proportion of uniform component
# Returns: value of the pdf of the bum distribution for x
##########################################################################

dbum<-function(x,a,lambda)

{
	return(lambda+(1-lambda)*a*x^(a-1))
}

##########################################################################
# Function: pbum
# Purpose: Compute the cdf of the bum distribution
# Arguments: x - point or vector of points at which to compute the pdf
#            a - shape parameter of the beta component
#            lambda - mixing parameter, weight of uniform component
# Returns: value of the cdf of the bum distribution for x
##########################################################################

pbum<-function(x,a,lambda)
{
	return(lambda*x+(1-lambda)*x^a)
}

##########################################################################
# Function: qbum
# Purpose: Compute the quantile of the bum distribution
# Arguments: p - percentile or vector of percentiles
#            a - shape parameter of the beta component
#            lambda - mixing parameter, weight of uniform component
#            nbisect - the number of bisections to perform
# Returns: the values x such that the pdf of x equals the percentiles
# Notes: Uses the bisection method to find quantiles, a larger value of
#        nbisect will result in more accurate results.  The results will
#        be accurate to within 2^(-nbisect).
##########################################################################

qbum<-function(p,a,lambda,nbisect=20)

{

	n<-length(p)
	mid<-rep(0,n)
	top<-rep(1,n)
	bot<-rep(0,n)
	gohigher<-rep(F,n)
	for (j in 1:nbisect)
	{
		mid<-(top+bot)/2
		gohigher<-(pbum(mid,a,lambda)<p)
		bot[gohigher]<-mid[gohigher]
		top[!gohigher]<-mid[!gohigher]
	}
	return(mid)
}

#########################################################################
# Function: rbum
# Purpose: generate random bum observations
# Arguments: n - number of observations
#            a - parameter a
#            lambda - parameter lambda
# Returns: a vector of random bum observations
##########################################################################

rbum<-function(n,a,lambda)

{
	u<-runif(n)
	return(qbum(u,a,lambda))
}

#########################################################################
# Function: dalt
# Purpose: compute the density of the alternative distribution
# Arguments: x - p-value of interest
#            a - shape parameter of beta component
#            lambda - mixing parameter, proportion of uniform component
# Returns: vector of density at x
########################################################################

dalt<-function(x,a,lambda)

{
	pi<-ext.pi(a,lambda)
	return((dbum(x,a,lambda)-pi)/(1-pi))
}

#########################################################################
# Function: palt
# Purpose: compute the cdf of the extracted alternative component of 
#          the BUM distribution
# Arguments: x - pvalue of interest
#            a - shape parameter of beta component
#            lambda - mixing parameter, proportion of uniform component
# Returns: vector of cdf at x
#########################################################################

palt<-function(x,a,lambda)

{
	pi<-ext.pi(a,lambda)
	return((pbum(x,a,lambda)-pi*x)/(1-pi))
}

##########################################################################
# Function: qalt
# Purpose: compute the quantile of the alternative component of BUM
# Arguments: p - the percentile
#            a - shape parameter of beta component
#            lambda - mixing parameter, proportion of uniform component
# Returns: a vector the p quantiles
###########################################################################

qalt<-function(p,a,lambda,nbisect=20)

{
	n<-length(p)
	mid<-rep(0,n)
	for (i in 1:n)
	{
		top<-1
		bot<-0
		for (j in 1:nbisect)
		{
			mid[i]<-mean(c(top,bot))
			if(palt(mid[i],a,lambda)<p[i]) bot<-mid[i]
			else top<-mid[i]	
		}
	}
	return(mid)
}

#########################################################################
# Function: bum.logL
# Purpose: for a set of p-values x, compute the log-likelihood of 
#          parameters a and lambda
# Arguments: a - the shape parameter of the beta component
#            lambda - mixing parameter, proportion of uniform component
#            x - vector of p-values
# Returns: the log-likelihood of the parameters
# Notes: Permutation techniques can result in p-values of zero.  This
#        routine will provide non-numeric results if a p-value of zero
#        is included.  A recommendation is to adjust permutation p-values
#        before calling the function by letting the new p-values
#        equal (nperms*old p-values + .5)/(nperms+1).
#########################################################################

bum.logL<-function(a,lambda,x)

{
  return(sum(log(dbum(x,a,lambda))))
}

#########################################################################
# Function: logit
# Purpose: Compute the logit of an x in (0,1)
# Arguments: x - point or vector of points in (0,1)
# Returns: logit of x
#########################################################################

logit<-function(x)

{
	return(log(x)-log(1-x))
}

#########################################################################
# Function: inv.logit
# Purpose: Compute the inverse logit of x
# Arguments: x - point or vector of points
# Returns: inverse logit of x
#########################################################################

inv.logit<-function(x)

{
	return(exp(x)/(1+exp(x)))
}

#########################################################################
# Function: neg.bum.logL
# Purpose: Compute the negative log-likelihood for use by nlminb in
#          bum.mle.
# Arguments: x - a vector with two components 
#                x[1] corresponds to logit of a, beta shape parameter
#                x[2] corresponds to logit of lambda, the prop. uniform
#            pvals - a vector of non-zero p-values
# Returns: the negative log-likelihood 
# Notes: used in bum.mle
#########################################################################

neg.bum.logL<-function(x,pvals)

{
	return(-1*sum(log(dbum(pvals,inv.logit(x[1]),inv.logit(x[2])))))
}

#########################################################################
# Function: bum.mle
# Purpose: For a set of p-values, compute MLE's for a and lambda
# Arguments: p-vals - the set of p-values
#            nstartpts - number of starting points to randomly generate
#            starta - vector of logit of starting a's for optimization routine, default = 0
#            startlambda - vector of logit of starting lambdas for optimization, default = 0
#            nadj - number for adjusting p-values if necessary, default = 100000
#            adjeps - epsilon for adjusting pvalues, if necessary, default = 0.5
# Returns: optimal values of a and lambda for given starting values
# Notes: May not provide global mle.  Repeating with multiple starting
#        points may yield different results.
#########################################################################

bum.mle<-function(pvals,nstartpts=0,starta=0,startlambda=0,nadj=100000,adjeps=0.5)
{
	if (nstartpts>0)
	{
		starta<-logit(runif(nstartpts))
		startlambda<-logit(runif(nstartpts))
	}
	adjusted<-min(pvals)<=0
	if (adjusted) pvals<-(nadj*pvals+adjeps)/(nadj+2*adjeps)
	bestlogL<- -Inf
	nstartpts<-length(starta)	
	for (i in 1:nstartpts)
	{
		results <- nlm( neg.bum.logL, c(starta[i],startlambda[i]),pvals=pvals )		#	OK in R
		#    name of value	in R results$minimum as results$objective in S+
		if ( -results$minimum > bestlogL)
		{
			a<-inv.logit(results$estimate[1])		# 	parameters[1])  # S+
			lambda<-inv.logit(results$estimate[2])	# 	parameters[2])	# S+
			logL<- -results$minimum
			nits<- results$iterations
			termination <- results$code				#	message		#	S+
			bestlogL<-logL
		}
	}
	return(list(a=a,lambda=lambda,logL=logL,pvals.adjusted=adjusted,nstartpts=nstartpts,nits=nits,termination=termination))
}

#########################################################################
# Function: qqbum
# Purpose: Produce a quantile-quantile plot for a set of p-values
# Arguments: pvals - a vector of p-values
#            a - shape parameter for beta component of bum distribution
#            lambda - mixing parameter for BUM distribution
#            main - primary plot tile, default = "BUM QQ Plot"
#            xlab - label of x-axis, default = "BUM Expected p-value"
#            ylab - label of y-axis, default = "Observed p-value"
# Returns: a quantile-quantile plot
# Notes: If values of a and lambda are not provided, bum.mle
#        will be used to estimate a and lambda.  May take a few minutes.
#########################################################################


qqbum<-function(pvals,a=NULL,lambda=NULL,main="BUM QQ Plot",xlab="BUM Expected p-value",ylab="Observed p-value",nstartpts=0,starta=0,startlambda=0)

{
	n<-length(pvals)
	pvals<-sort(pvals)	
	if(is.null(a)||is.null(lambda))
	{
		al<-bum.mle(pvals,nstartpts=nstartpts,starta=starta,startlambda=startlambda)
		plot(c(0,1),c(0,1),main=main,xlab=xlab,ylab=ylab,type="n")
		lines(qbum((rank(pvals)-.5)/n,al$a,al$lambda),pvals,lty=2)
		lines(c(0,1),c(0,1))
	}
	else
	{
		plot(c(0,1),c(0,1),main=main,xlab=xlab,ylab=ylab,type="n")
		lines(qbum((rank(pvals)-.5)/n,a,lambda),pvals,lty=2)
		lines(c(0,1),c(0,1))		
	}
}

#########################################################################
# Function: bum.histogram
# Purpose: Compare the fitted bum curve to the histogram
# Arguments: pvalues - vector of p-values
#            a - a for bum curve, default = estimated MLE
#            lambda - lambda for bum curve, default = estimated MLE
#             main - primary title of plot, default = "Histogram"
#             xlab - label of x-axis, default = "p-value
#             ylab - label of y-axis, default = "Density"
#########################################################################

bum.histogram<-function(pvalues,a=NA,lambda=NA,main="Histogram",xlab="p-value",ylab="Density",nstartpts=0,starta=0,startlambda=0)

{
	if(is.na(a)||is.na(lambda))
	{
		MLE<-bum.mle(pvalues,nstartpts=nstartpts,starta=starta,startlambda=startlambda)
		a<-MLE$a
		lambda<-MLE$lambda
	}
	hist(pvalues,probability=T,main=main,xlab=xlab,ylab=ylab)
	x<-1:100/100
	lines(x,dbum(x,a,lambda),lwd=3)
}

#########################################################################
# Function: ext.pi
# Purpose: Extract the maximal uniform componet from a bum density
# Arguments: a - shape parameter of beta component of bum distribution
#            lambda - mixing parameter, component of bum that is uniform
# Returns: the proportion of the density that can be extracted as a uniform
#########################################################################

ext.pi<-function(a,lambda)

{
	lambda[(lambda>1)+(lambda<0)>0]<-NA
	a[(a>1)+(a<0)]<-NA
	return((lambda+(1-lambda)*a))
}

#########################################################################
# Function: bum.emp.post
# Purpose: Compute the upper bound of BUM based empirical Bayes posterior
#          probability of the alternative hypothesis
# Arguments: x - the point or vector of points at which to compute EB post
#            a - shape parameter of beta component of bum distribution
#            lambda - mixing parameter, proportion uniform in bum dist
# Returns: the upper bound of the BUM based empirical Bayes posterior at x
# Notes: Computes an upper bound because maximal extraction of uniform
#        is performed.
#########################################################################

bum.emp.post<-function(x,a,lambda)

{
   pi<-ext.pi(a,lambda)
   return((dbum(x,a,lambda)-pi)/dbum(x,a,lambda))
}

#########################################################################
# Function: bum.FDR
# Purpose: Compute an estimated upper bound for the false discovery rate
#          when significance is determined by comparing a p-value to
#          a threshold tau
# Arguments: tau - the threshold of comparison (point or vector)
#            a - shape parameter of beta component of bum distribution
#            lambda - mixing parameter, proportion uniform in bum dist
# Returns: Estimated upper bound of FDR
# Notes: FDR is the expected proportion of rejections that are false
#        positives, or Type I errors
#########################################################################

bum.FDR<-function(tau,a,lambda)

{
   pi<-ext.pi(a,lambda)
   return(tau*pi/pbum(tau,a,lambda))
}

#########################################################################
# Function: bum.false.positive
# Purpose: Compute an estimated upper bound for the proportion of all tests
#          that will be false positives when significance is determined
#          by comparing the p-value to a threshold tau
# Arguments: tau - the threshold of comparison (point or vector)
#            a - shape parameter of beta component of bum distribution
#            lambda - mixing parameter, proportion uniform in bum dist
# Returns: Estimated upper bound of proportion of all tests that are
#          false positives
#########################################################################

bum.false.positive<-function(tau,a,lambda)

{
   pi<-ext.pi(a,lambda)
   return(tau*pi)
}

#########################################################################
# Function: bum.false.negative
# Purpose: Compute an estimated lower bound for the proportion of all
#          tests resulting in false negatives when significance is
#          determined by comparing the p-value to a threshold tau
# Arguments: tau - threshold
#            a - shape parameter of beta component of bum distribution
#            lambda - mixing parameter, proportion uniform in bum dist
# Returns: Estimated lower bound for proportion of all tests resulting
#          false negatives (Type II errors)
#########################################################################

bum.false.negative<-function(tau,a,lambda)

{
   pi<-ext.pi(a,lambda)
   return(1-pbum(tau,a,lambda)-pi*(1-tau))
}

#########################################################################
# Function: bum.true.positive
# Purpose: Compute an estimated lower bound for the proportion of all
#          tests resulting in true positives when significance is
#          determined by comparing the p-value to a threshold tau
# Arguments: tau - threshold
#            a - shape parameter of beta component of bum distribution
#            lambda - mixing parameter, proportion uniform in bum dist
# Returns: Estimated lower bound for proportion of all tests resulting
#          true positives
#########################################################################

bum.true.positive<-function(tau,a,lambda)

{
   pi<-ext.pi(a,lambda)
   return(pbum(tau,a,lambda)-pi*tau)
}

#########################################################################
# Function: bum.true.negative
# Purpose: Compute an estimated lower bound for the proportion of all
#          tests resulting in true negatives when significance is
#          determined by comparing the p-value to a threshold tau
# Arguments: tau - threshold
#            a - shape parameter of beta component of bum distribution
#            lambda - mixing parameter, proportion uniform in bum dist
# Returns: Estimated lower bound for proportion of all tests resulting
#          true negatives
#########################################################################

bum.true.negative<-function(tau,a,lambda)


{
   pi<-ext.pi(a,lambda)
   return(pi*(1-tau))
}


#########################################################################
# Function: bum.weighted.error
# Purpose: Compute a linear combination of the upper bound for the 
#          proportion of false positives and the lower bound for the
#          proportion of false negatives resulting when significance
#          is determined by comparing p-values to a threshold 
# Arguments: tau - threshold
#            a - shape parameter of beta component of bum distribution
#            lambda - mixing parameter, proportion uniform in bum dist
#            wfp - weight of the false positives
#            wfn - weight of the false negatives
# Returns: the weighted combination of error rates
#########################################################################

bum.weighted.error<-function(tau,a,lambda,wfp=1,wfn=1)

{
   return(wfp*bum.false.positive(tau,a,lambda)+wfn*bum.false.negative(tau,a,lambda))
}

#########################################################################
# Function: find.FDR.threshold
# Purpose: Find a p-value threshold so that all p-values less than the
#          threshold have an FDR lower than a desired level
# Arguments: fdr - desired fdr
#            a - shape parameter of beta component of bum distribution
#            lambda - mixing parameter, proportion uniform in bum dist
# Returns: p-value threshold that maintains the desired fdr
#########################################################################

find.FDR.threshold<-function(fdr,a,lambda)

{
   pi<-ext.pi(a,lambda)
   return(((pi-fdr*lambda)/(fdr*(1-lambda)))^(1/(a-1)))
}



#########################################################################
# Function: find.EB.threshold
# Purpose: Find a p-value threshold so that all p-values less than the
#          threshold have an empirical Bayes (EB) posterior probability
#          greater than a desired level
# Arguments: EB - desired empirical Bayes' posterior
#            a - shape parameter of beta component of bum distribution
#            lambda - mixing parameter, proportion uniform in bum dist
# Returns: p-value threshold that maintains the desired EB posterior
#########################################################################

find.EB.threshold<-function(EB,a,lambda)

{
   pi<-ext.pi(a,lambda)
   return(((EB*lambda+a*(1-lambda))/(a*(1-EB)*(1-lambda)))^(1/(a-1)))
}

##########################################################################
# Function: inv.dbum
# Purpose: Compute the inverse of the pdf  of the bum distribution
# Arguments: y - value of the pdf of the bum of interest
#            a - shape parameter of the beta component
#            lambda - mixing parameter, weight of uniform component
#            nbisect - the number of bisections to perform
# Returns: x so that pbum(x,a,lambda) = y
# Notes: Uses the bisection method.  The results will
#        be accurate to within 2^(-nbisect).  Used by find.WE.threshold.
##########################################################################

inv.dbum<-function(y,a,lambda,nbisect=20)

{

	n<-length(y)
	mid<-rep(0,n)
	for (i in 1:n)
	{
		top<-1
		bot<-0
		for (j in 1:nbisect)
		{
			mid[i]<-mean(c(top,bot))
			if(dbum(mid[i],a,lambda)>y[i]) bot<-mid[i]
			else top<-mid[i]	
		}
	}
	return(mid)
}


#=======================================================================
# Estimate pFDR and FDR using P values and V-D spline cdf/pdf estimators
# Return: list of 
#   PvalFDR -- the evaluation of pFDR and FDR on the alpha.seq grid
#        mx3 matrix, alpha.seq in column 1, pFDR in col 2, FDR in col 3
#   pi0 -- estimate of pi_0
#   cdf -- Pval distr cdf evaluatd on alph.grid
#   pdf -- Pval distr pdf evaluatd on alph..grid
#   alph.grid
#   Pvalues -- Pvals
#   side, error
#=======================================================================
Ch04.1 <- function(Pvals,qpos=c(0.00625,0.01,0.0125,0.025,0.05,0.1,0.25,0.5,0.75),notes=NULL)
{  
  G <- length(Pvals)
  Pgrid <- alph.grid <- alpha.seq <- Pvals
  nPg <- length(Pgrid)
  Z <- VDS.unif.est.Qknots(Pgrid,Pvals,qpos=qpos,degree=4,print=F)
  #pi0 <- min(Z[,2])
  pi0 <- min(1,Z[nPg,2],na.rm=T)
  cdf <- Z[,1]
  pdf <- Z[,2]

# Estimate pFDR and FDR over a grid of alpha values:
  Z <- VDS.unif.est.Qknots(alpha.seq,Pvals,qpos=qpos,degree=4,print=F)
  pFDR <- (pi0*alpha.seq)/(Z[,1]*(1-(1-alpha.seq)^G))
  FDR <- (pi0*alpha.seq)/Z[,1]
  
  ord<-order(Pvals)
  q<-rep(1,G)
  q[ord[G]]<-min(1,FDR[ord[G]])
  q[rev(ord)]<-cummin(FDR[rev(ord)])
  return(list(p=matrix(Pvals,G,1),
              fdr=matrix(FDR,G,1),
              q=matrix(q,G,1),
              cdf=matrix(cdf,G,1),
              pdf=matrix(pdf,G,1),
              ord=matrix(ord,G,1),
              pi=pi0,
              notes=c(notes,"Cheng et al (2004) used to obtain FDR estimates.")))
}

#========================================================
# The eimprical distribution function
# Input: X -- Data (missing values are deleted)
#        x1 -- grid over which to evaluate the EDF
# Output: vector of EDF values over x1
#========================================================
EDF <- function(X,x1) {  
  Xodr <- sort(X)
  n <- length(Xodr)
  if(n==0) return(NA)
  if(n==1) {  
    y <- approx(c(0.5*Xodr[1],Xodr),c(0,1),x1,
           method="constant",rule=2,f=0)
    return(y[[2]])
  }
  LB <- Xodr[1]-(Xodr[2]-Xodr[1])
  uniqueXodr<-unique(Xodr)
  z <- cumsum(table(Xodr))/n
  y <- approx(c(LB,uniqueXodr),c(0,z),x1,method="constant",
         rule=2,f=0)
  return(y[[2]])
}

#========================================================

#==================================================================
# Compute a B-spline approximation of a continuous function over a
# closed interval [a,b]. 
# Input:
#   x -- a grid over which to evaluate the approximation 
#   fv -- a vector of sampled function values
#   knots.int -- the interior knot sequence. 
#            m=length(knots).
#   degree -- spline degree (default 3). degree=order-1
#   a -- left boundary of interval (default=0)
#   b -- right boundary of interval (default=1)
#   The length of fv must be equal to m+degree+1, i.e., the number of 
#   interior knots plus order of the spline; otherwise NULL is returned.
# Output: a vector of the spline values over x.
#====================================================================
Bspline <- function(x,fv,knots.int,degree=3,a=0,b=1) {  
  m <- length(knots.int)
  nf <- length(fv)
  if(nf!=m+degree+1) return(NULL)
  Smat <- bsmat(x,knots.int=knots.int,degree=degree,
            a=a,b=b)
  func.val <- Smat%*%fv
  return(func.val)
}

#=====================================================================
#  Compute B-spline matrix evaluated overa grid of values on a closed 
#  interval [a,b].
# Input:
#   x -- grid
#   knots.int -- the interior knot sequance
#   degree -- spline degree (default 3). degree=order-1
#   a -- left boundary of interval (default=0)
#   b -- right boundary of interval (default=1)
#======================================================================
bsmat <- function(x,knots.int,degree=3,a=0,b=1) {  
# To be comformal with de Boor's notation:
  k <- degree+1  # order of spline
# Number of interior knots:
  m <- length(knots.int)
# Number of Splines:
  n <- m+k
  d <- length(x)
  smat1 <- smat2 <- matrix(0,d,n)
# The initial (i.e., k=1) B spline matrix
  kt <- unique(sort(c(a,knots.int,b))) # initial extended knot seq
  el <- 1; nel <- m+el
  indx <- numeric(0)
  for(j in 1:nel) {  
    indx <- (1:d)[x>=kt[j]&x<kt[j+1]]
    smat1[indx,j] <- 1
  }
# Make the last piece (order-1 B spline) left-continuous at b:
  indx <- (1:d)[x==b]
  smat1[indx,nel] <- 1
  if(k==1) return(smat1)
# Start the iteration, done if el=k:
  while(el<k) {  
    el <- el+1
    nel <- m+el
    kt <- c(a,kt,b)
    for(j in 2:(nel-1)) {  
      smat2[,j] <- ((x-kt[j])/(kt[j+el-1]-kt[j]))*smat1[,j-1]+
                   ((kt[j+el]-x)/(kt[j+el]-kt[j+1]))*smat1[,j]
    }
    smat2[,1] <- ((kt[1+el]-x)/(kt[1+el]-kt[2]))*smat1[,1]
    smat2[,nel] <- ((x-kt[nel])/(kt[nel+el-1]-kt[nel]))*smat1[,nel-1]
    smat1 <- smat2
  }
  return(smat1)
}

#=====================================================================

#==================================================================
# Compute variation-dimishing spline estimators of cdf and pdf on a 
# closed interval [0,1] over a grid of values, under the constraint 
# that the population distr. is stochastically smaller than the U(0,1). 
# The knot sequence is determined by linearly interpolated sample 
# quantiles at positions specified by user.
# Input:
#   x -- grid over which to evaluate the estimators
#   X.data -- vector of data based on which to compute the estimators
#   NOTE: The range of X.data must be in [0,1] in order to get 
#         meaningful estimates.
#   qpos -- quantile pos for knot generation, default (1:16)/17
#   a -- left boundary of interval, default 0
#   b -- right boundary of interval, default 1
#   degree -- degree of B spline for the cdf estimator, default 3
# Output: a two-column matrix; first column contains the cdf estimate
#         over x, 2nd column contains the pdf estimate over x 
#====================================================================
VDS.unif.est.Qknots <- function(x,X.data,qpos=(1:16)/17,a=0,b=1,degree=3,
                          print=T) 
{  
# Get unique interior knots:
# i.knots <- unique(pmin(quantile(X.data,qpos),qpos))
  i.knots <- unique(quantile(X.data,qpos))
  m <- length(i.knots)
# make sure that the interior knots don't touch the boundary:
  indx <- max((1:m)[i.knots<=a])
  if((!is.na(indx))&&(is.finite(indx))) {i.knos <- i.knots[(indx+1):m]; m <- m-indx}
  indx <- min((1:m)[i.knots>=b])
  if((!is.na(indx))&&(is.finite(indx))) {i.knots <- i.knots[1:(indx-1)]; m <- indx-1}
  m <- length(i.knots)
  knots <- unique(c(a,i.knots,b))
  nknots <- length(knots)
# To be conformal with de Boor's notation:
  k <- degree+1 
  n <- nknots+k-2  # so n = number of unique interior knots + order
  knots.ext <- c(rep(a,k-1),knots,rep(b,k-1))
  ts <- apply(matrix(1:n,n,1),1,
          function(i,tk,k) {  
            return(sum(tk[(i+1):(i+k-1)])/(k-1))
          },knots.ext,k
        )
# set the empirical cdf above the unif(0,1) cdf for the estimator
# to obey the stochastic order constraint:
  Ecdf <- pmax(EDF(X.data,ts),ts)
# Ecdf <- EDF(X.data,ts)
# lambn <- (ts[n-1]-ts[n-2])/(ts[n]-ts[n-2])
# Ecdf[n-1] <- lambn*Ecdf[n]+(1-lambn)*Ecdf[n-2]
  Epdf <- (k-1)*(Ecdf[2:n]-Ecdf[1:(n-1)])/
                (knots.ext[(2:n)+k-1]-knots.ext[2:n])
  if(print) {  
    cat("knots =",knots,"\n") 
    cat("knots.ext =",knots.ext,"\n")
    cat("ts =",ts,"\n")
    cat("Ecdf =",Ecdf,"\n")
    cat("Epdf =",Epdf,"\n")
  }
  VDcdf <- Bspline(x,Ecdf,knots[2:(nknots-1)],degree)
  VDpdf <- Bspline(x,Epdf,knots[2:(nknots-1)],degree-1)
  return(cbind(VDcdf,VDpdf))
}
thomazbastiaanssen/Tjazi documentation built on Aug. 22, 2023, 1:30 a.m.