DSS: Dispersion shrinkage for sequencing data

locfdr <-
function(zz, bre = 120, df = 7, pct = 0, pct0 = 1/4, nulltype = 1, type = 0, plot = 1, mult, mlests, main = " ", sw = 0)
{
  call = match.call()
	if(length(bre) > 1) {
		lo <- min(bre)
		up <- max(bre)
		bre <- length(bre)
	}
	else {
		if(length(pct) > 1) {
			lo <- pct[1]
			up <- pct[2]
		}
		else {
			if(pct == 0) {
				lo <- min(zz)
				up <- max(zz)
			}
			if(pct < 0) {
				med = median(zz)
				ra = med + (1 - pct) * (range(zz) - med)
				lo = ra[1]
				up = ra[2]
			}
			if(pct > 0) {
				v <- quantile(zz, c(pct, 1 - pct))
				lo <- v[1]
				up <- v[2]
			}
		}
	}
	zzz <- pmax(pmin(zz, up), lo)
	breaks <- seq(lo, up, length = bre)
	zh <- hist(zzz, breaks = breaks, plot = F)
	x <- (breaks[-1] + breaks[ - length(breaks)])/2
	yall <- y <- zh$counts
	K <- length(y)
	N <- length(zz)
	if(pct > 0) {
		y[1.] <- min(y[1.], 1.)
		y[K] <- min(y[K], 1.)
	}
	if(type == 0) {
                X <- cbind(1, ns(x, df = df))
		f <- glm(y ~ ns(x, df = df), poisson)$fit
	}
	else {
                X <- cbind(1, poly(x, df = df))
		f <- glm(y ~ poly(x, df = df), poisson)$fit
	}
	l <- log(f)
	Fl <- cumsum(f)
	Fr <- cumsum(rev(f))
	D <- (y - f)/(f + 1)^0.5
	D <- sum(D[2:(K - 1)]^2)/(K - 2 - df)
	if(D > 1.5)
          warning(paste("f(z) misfit = ", round(
			D, 1), ".  Rerun with increased df", sep=""))
        # ............. create fp0 matrix ..........................
        if (nulltype == 3) {
                fp0 = matrix(NA, 6, 4)
                colnames(fp0) = c("delta", "sigleft", "p0", "sigright")
        }
        else {
                fp0 = matrix(NA, 6, 3)
                colnames(fp0) = c("delta", "sigma", "p0")
        }
        rownames(fp0) = c("thest", "theSD", "mlest", "mleSD", "cmest", "cmeSD")
        fp0["thest", 1:2] = c(0,1)
        fp0["theSD", 1:2] = 0
	# ..............begin central matching f0 calcs...............        
	imax <- seq(l)[l == max(l)][1]
	xmax <- x[imax]
	if(length(pct0) == 1) {
		pctup <- 1 - pct0
		pctlo <- pct0
	}
	else {
		pctlo <- pct0[1]
		pctup <- pct0[2]
	}
	lo0 <- quantile(zz, pctlo)
	hi0 <- quantile(zz, pctup)
	nx <- length(x)
	i0 <- (1.:nx)[x > lo0 & x < hi0]
	x0 <- x[i0]
	y0 <- l[i0]
	if(nulltype == 3) {
		X00 <- cbind((x0 - xmax)^2, pmax(x0 - xmax, 0)^2)
	}
	else {
		X00 <- cbind(x0 - xmax, (x0 - xmax)^2)
	}
	lr <- lm(y0 ~ X00)
	co <- lr$coef
        ## Error messages for failed CM estimation ##
        if (nulltype == 3) {
          cmerror = I(is.na(co[3]) | is.na(co[2]))
          if (!cmerror) cmerror = I(co[2] >= 0 | co[2]+co[3]>=0)
        }
        else {
          cmerror = is.na(co[3])
          if (!cmerror) cmerror = I(co[3] >= 0)
        }
        if (cmerror) {
          if (nulltype == 3)
            stop("CM estimation failed.  Rerun with nulltype = 1 or 2.")
          else
            if (nulltype == 2)
            stop("CM estimation failed.  Rerun with nulltype = 1.")
          else {
            X0 <- cbind(1, x - xmax, (x - xmax)^2)
            warning("CM estimation failed, middle of histogram non-normal")
          }
        }
        else {
	  if(nulltype == 3) {
		X0 <- cbind(1, (x - xmax)^2, pmax(x - xmax, 0)^2)
                sigs <- 1/sqrt(-2 * (c(co[2], co[2] + co[3])))
                fp0["cmest", c(1,2,4)] <- c(xmax, sigs)
	  }
	  else {
		X0 <- cbind(1, x - xmax, (x - xmax)^2)
                xmaxx <-  - co[2.]/(2. * co[3.]) + xmax
                sighat <- 1./sqrt(-2. * co[3.])
                fp0["cmest", 1:2] <- c(xmaxx, sighat)
	  }
	  l0 <- as.vector(X0 %*% co)
	  f0 <- exp(l0)
	  p0 <- sum(f0)/sum(f)
	  f0 <- f0/p0
          fp0["cmest", 3] <- p0
        }
	#............... begin MLE f0 calcs ........................
        b = 4.3 * exp(-0.26*log(N,10))
        if(missing(mlests)){
          med = median(zz);sc=diff(quantile(zz)[c(2,4)])/(2*qnorm(.75))
          mlests = locmle(zz, xlim=c(med, b*sc))
          if (N>500000) {
            warning("length(zz) > 500,000: For ML estimation, a wider interval than optimal was used.  To use the optimal interval, rerun with mlests = c(", mlests[1], ", ", b * mlests[2], ").\n", sep="")
            mlests = locmle(zz, xlim=c(med, sc))
          }
        }
	if (!is.na(mlests[1])) {
          if (N>500000) b = 1
          if (nulltype == 1) {
              Cov.in = list(x=x, X=X, f=f, sw=sw)
              ml.out = locmle(zz, xlim = c(mlests[1], b * mlests[2]),
                d=mlests[1], s=mlests[2], Cov.in=Cov.in)
              mlests = ml.out$mle
            }
            else  mlests = locmle(zz, xlim = c(mlests[1], b * mlests[2]),
                d=mlests[1], s=mlests[2])
            fp0["mlest", 1:3] = mlests[1:3]
            fp0["mleSD", 1:3] = mlests[4:6]
        }
        if (sum(is.na(fp0[c(3,5),1:2])) == 0 & nulltype > 1)
          if(abs(fp0["cmest",1] - mlests[1]) > 0.050000000000000003 |
             abs(log(fp0["cmest",2]/mlests[2])) > 0.050000000000000003)
		warning("Discrepancy between central matching and maximum likelihood estimates.\nConsider rerunning with nulltype = 1")
        ## Error messages for failed ML estimation ##
        if (is.na(mlests[1])) {
          if (nulltype == 1) {
            if (is.na(fp0["cmest", 1]))
              stop("CM and ML Estimation failed, middle of histogram non-normal")
            else stop("ML estimation failed.  Rerun with nulltype=2")
          }
          else warning("ML Estimation failed")
        }
	if(nulltype < 2) {
		delhat = xmax = xmaxx = mlests[1]
		sighat = mlests[2]
		p0 = mlests[3]
		f0 = dnorm(x, delhat, sighat)
		f0 = (sum(f) * f0)/sum(f0)
	}
        fdr = pmin((p0 * f0)/f, 1)
	f00 <- exp( - x^2/2)
	f00 <- (f00 * sum(f))/sum(f00)
	p0theo <- sum(f[i0])/sum(f00[i0])
        fp0["thest", 3] = p0theo
	fdr0 <- pmin((p0theo * f00)/f, 1)
	f0p <- p0 * f0
	if(nulltype == 0)
		f0p <- p0theo * f00
	F0l <- cumsum(f0p)
	F0r <- cumsum(rev(f0p))
	Fdrl <- F0l/Fl
	Fdrr <- rev(F0r/Fr)
	Int <- (1 - fdr) * f * (fdr < 0.90000000000000002)
	##### raise fdr to 1 near xmax .............
        if (sum(x <= xmax & fdr == 1) > 0)
          xxlo <- min(x[x <= xmax & fdr == 1])
        else xxlo = xmax
        if (sum(x >= xmax & fdr == 1) > 0)
          xxhi <- max(x[x >= xmax & fdr == 1])
        else xxhi = xmax
        if (sum(x >= xxlo & x <= xxhi) > 0)
          fdr[x >= xxlo & x <= xxhi] <- 1
        if (sum(x <= xmax & fdr0 == 1) > 0)
          xxlo <- min(x[x <= xmax & fdr0 == 1])
        else xxlo = xmax
        if (sum(x >= xmax & fdr0 == 1) > 0)
          xxhi <- max(x[x >= xmax & fdr0 == 1])
        else xxhi = xmax
	if (sum(x >= xxlo & x <= xxhi) > 0)
          fdr0[x >= xxlo & x <= xxhi] <- 1
	##################### raise fdr to 1 for mle option
	if(nulltype == 1) {
		fdr[x >= mlests[1] - mlests[2] & x <= mlests[1] + mlests[
			2]] = 1
		fdr0[x >= mlests[1] - mlests[2] & x <= mlests[1] + mlests[
			2]] = 1
	}
	p1 <- sum((1 - fdr) * f)/N
	p1theo <- sum((1 - fdr0) * f)/N
	fall <- f + (yall - y)
	########Efdr1 calculations
	Efdr <- sum((1 - fdr) * fdr * fall)/sum((1 - fdr) * fall)
	Efdrtheo <- sum((1 - fdr0) * fdr0 * fall)/sum((1 - fdr0) * fall)
	iup <- (1:K)[x >= xmax]
	ido <- (1:K)[x <= xmax]
	Eleft <- sum((1 - fdr[ido]) * fdr[ido] * fall[ido])/sum((1 - fdr[
		ido]) * fall[ido])
	Eleft0 <- sum((1 - fdr0[ido]) * fdr0[ido] * fall[ido])/sum((1 -
		fdr0[ido]) * fall[ido])
	Eright <- sum((1 - fdr[iup]) * fdr[iup] * fall[iup])/sum((1 - fdr[
		iup]) * fall[iup])
	Eright0 <- sum((1 - fdr0[iup]) * fdr0[iup] * fall[iup])/sum((
		1 - fdr0[iup]) * fall[iup])
	Efdr <- c(Efdr, Eleft, Eright, Efdrtheo, Eleft0, Eright0)
        Efdr[which(is.na(Efdr))] = 1
	names(Efdr) <- c("Efdr", "Eleft", "Eright", "Efdrtheo", "Eleft0",
		"Eright0")
	if(nulltype == 0)
		f1 <- (1 - fdr0) * fall
	else f1 <- (1 - fdr) * fall
	############ multiple sample size Efdr1 calculation
	if(!missing(mult)) {
		mul = c(1, mult)
		EE = rep(0, length(mul))
		for(m in 1:length(EE)) {
			xe = sqrt(mul[m]) * x
			f1e = approx(xe, f1, x, rule = 2, ties=mean)$y
			f1e = (f1e * sum(f1))/sum(f1e)
			f0e = f0
			p0e = p0
			if(nulltype == 0) {
				f0e = f00
				p0e = p0theo
			}
			fdre = (p0e * f0e)/(p0e * f0e + f1e)
			EE[m] = sum(f1e * fdre)/sum(f1e)
		}
		EE = EE/EE[1]
		names(EE) = mul
	}
	#................. Accuracy Calcs .................................
        Cov2.out = loccov2(X, X0, i0, f, fp0["cmest",], N)
        Cov0.out = loccov2(X, matrix(1, length(x), 1), i0, f, fp0["thest",], N)
        if(sw == 3) {
                if (nulltype==0) Ilfdr = Cov0.out$Ilfdr
                else if (nulltype==1) Ilfdr = ml.out$Ilfdr
		else if (nulltype==2) Ilfdr = Cov2.out$Ilfdr
                else stop("With sw=3, nulltype must equal 0, 1, or 2.")
	        return(Ilfdr)
              }
        if (nulltype == 0) Cov = Cov0.out$Cov
        else if (nulltype == 1) Cov = ml.out$Cov.lfdr
        else Cov = Cov2.out$Cov
	lfdrse <- diag(Cov)^0.5
        fp0["cmeSD",1:3] = Cov2.out$stdev[c(2,3,1)]
        if (nulltype==3) fp0["cmeSD",4] = fp0["cmeSD",2]
        fp0["theSD",3] = Cov0.out$stdev[1]
	########### sw==2 returns Influence function pds. ##########
	if(sw == 2) {
          if (nulltype==0) {
             pds = fp0["thest", c(3,1,2)]
             stdev = fp0["theSD", c(3,1,2)]
             pds. = t(Cov0.out$pds.)
          }
          else if (nulltype==1) {
            pds = fp0["mlest",c(3,1,2)]
            stdev = fp0["mleSD",c(3,1,2)]
            pds. = t(ml.out$pds.)
          }
          else if (nulltype==2) {
            pds = fp0["cmest",c(3,1,2)]
            stdev = fp0["cmeSD", c(3,1,2)]
            pds. = t(Cov2.out$pds.)
          }
          else stop("With sw=2, nulltype must equal 0, 1, or 2.")
          colnames(pds.) = names(pds) = c("p0", "delhat", "sighat")
          names(stdev) = c("sdp0", "sddelhat", "sdsighat")
	  return(list(pds=pds, x=x, f=f, pds.=pds., stdev=stdev))
        }
	# find cdf1, the cdf of fdr according to f1 density..................
	p1 <- seq(0.01, 0.99, 0.01)
	cdf1 <- rep(0, 99)
	fd <- fdr
	if(nulltype == 0)
		fd <- fdr0
	for(i in 1:99)
		cdf1[i] <- sum(f1[fd <= p1[i]])
        cdf1 <- cbind(p1, cdf1/cdf1[99])
	mat <- cbind(x, fdr, Fdrl, Fdrr, f, f0, f00, fdr0, yall, lfdrse,
		f1)
	namat <- c("x", "fdr", "Fdrleft", "Fdrright", "f", "f0", "f0theo",
		"fdrtheo", "counts", "lfdrse", "p1f1")
	if(nulltype == 0)
		namat[c(3, 4, 10)] <- c("Fdrltheo", "Fdrrtheo", 
			"lfdrsetheo")
	dimnames(mat) <- list(NULL, namat)
        ############## Locations of triangles ###########
        z.2 = rep(NA, 2)
        m = order(fd)[nx]
        if (fd[nx] < 0.2) {
          z.2[2] = approx(fd[m:nx], x[m:nx], 0.20000000000000001,
                               ties=mean)$y
        }
        if (fd[1] < 0.2) {
          z.2[1] = approx(fd[1:m], x[1:m], 0.20000000000000001,
                              ties=mean)$y
        }
        ################### Plotting ####################
	if(plot > 0) {
		if(plot == 2 | plot == 3)
			oldpar <- par(mfrow = c(1, 2), pty = "m")
                else if (plot ==4) oldpar = par(mfrow = c(1, 3), pty = "m")
		hist(zzz, breaks = breaks, xlab = " ", main = main)
		################### make yt positive ##############
		yt <- pmax(yall * (1 - fd), 0)
		for(k in 1:K)
		  lines(c(x[k], x[k]), c(0, yt[k]), lwd = 2, col = 6)
		if(nulltype == 3)
			title(xlab = paste("delta=", round(xmax, 3),
				"sigleft=", round(sigs[1], 3), 
				" sigright=", round(sigs[2], 3), "p0=",
				round(fp0["cmest", 3], 3)))
		if(nulltype == 1 | nulltype == 2)
			title(xlab = paste("MLE: delta:", round(
				mlests[1], 3), "sigma:", round(mlests[
				2], 3), "p0:", round(mlests[3], 3)),
                                sub = paste("CME: delta:",
                                            round(fp0["cmest",1], 3),
                                            "sigma:", round(fp0["cmest",2], 3),
                                            "p0:", round(fp0["cmest", 3], 3)))
		lines(x, f, lwd = 3, col = 3)
		if(nulltype == 0)
			lines(x, p0theo*f00, lwd = 2, lty = 2, col = 4)
		else
			lines(x, p0*f0, lwd = 2, lty = 2, col = 4)
                ################## Plot triangles ###############
                if (!is.na(z.2[2]))
		   points(z.2[2], -0.5, pch = 24, col="red", bg="yellow")
                if(!is.na(z.2[1]))
		   points(z.2[1], -0.5, pch = 24, col="red", bg="yellow")
		if(nulltype == 1 | nulltype ==2)
			Ef <- Efdr[1]
		else if (nulltype == 0) Ef <- Efdr[4]
		if (plot == 2 | plot == 4) {
			if(nulltype == 0)
				fdd <- fdr0
			else fdd = fdr
			matplot(x, cbind(fdd, Fdrl, Fdrr), type = "l",
				lwd = 3, xlab = " ", ylim = c(0, 
				1.1000000000000001), main = 
				"fdr (solid); Fdr's (dashed)")
			title(xlab = paste("Efdr= ", round(Ef, 3)))
			abline(0, 0, lty = 3, col = 2)
			lines(c(0, 0), c(0, 1), lty = 3, col = 2)
		}
		if (plot == 3 | plot == 4) {
			if(sum(is.na(cdf1[, 2])) == nrow(cdf1))
				warning("cdf1 not available")
			else {
				plot(cdf1[, 1], cdf1[, 2], type = "l",
					lwd = 3, xlab = "fdr level", ylim
					 = c(0, 1), ylab = 
					"f1 proportion < fdr level", main
					 = "f1 cdf of estimated fdr")
				title(sub = paste("Efdr= ", round(Ef,
					3)))
				lines(c(0.20000000000000001, 
					0.20000000000000001), c(0, cdf1[
					20, 2]), col = 4, lty = 2)
				lines(c(0, 0.20000000000000001), rep(
					cdf1[20, 2], 2), col = 4, lty = 2)
				text(0.050000000000000003, cdf1[20, 2],
					round(cdf1[20, 2], 2))
				abline(0, 0, col = 2)
				lines(c(0, 0), c(0, 1), col = 2)
			}
		}
		if(plot > 1)
			par(oldpar)
	}
	if(nulltype == 0) {
		ffdr <- approx(x, fdr0, zz, rule = 2, ties="ordered")$y
	}
	else ffdr <- approx(x, fdr, zz, rule = 2, ties="ordered")$y
	vl = list(fdr = ffdr, fp0 = fp0, Efdr = Efdr, cdf1 = cdf1,  mat = mat,
          z.2 = z.2)
	if(!missing(mult))
		vl$mult = EE
        vl$call = call
	vl
}