inst/doc/OPWeight.R

## ----setup, echo = FALSE------------------------------------------------------
knitr::opts_chunk$set(tidy = FALSE, cache = FALSE, autodep = TRUE)

## ----loadlibs, message=FALSE, warning=FALSE-----------------------------------
# install OPWeight package
if (!requireNamespace("BiocManager", quietly=TRUE))
    install.packages("BiocManager")
BiocManager::install("OPWeight", suppressUpdates=TRUE)

# load packages
library("OPWeight")
library("ggplot2")
library("airway")
library("DESeq2")
library(cowplot)
library(tibble)
library(qvalue)
library(MASS)

## ----dataProcessing, message=FALSE, warning=FALSE-----------------------------
data("airway", package = "airway")
dds <- DESeqDataSet(se = airway, design = ~ cell + dex)
dds <- DESeq(dds)
de_res_air <- results(dds)

## ----colnames_de_res_air------------------------------------------------------
colnames(de_res_air)
dim(de_res_air)

## ----loadOPWeight, message=FALSE, warning=FALSE-------------------------------
filters = de_res_air$baseMean + .0001  # add a small constant to make all values positive
set.seed(123)
# one should use more nrep to obtain more acurate results
opw_results <- opw(pvalue = de_res_air$pvalue, filter = filters, nrep = 1000,
                   alpha = .1, tail = 2, effectType = "continuous", method = "BH")

## ----outputsName--------------------------------------------------------------
names(opw_results)

## ----nullProp-----------------------------------------------------------------
opw_results$nullProp

## ----no.OfRejections----------------------------------------------------------
opw_results$rejections

## ----probVsWeight-------------------------------------------------------------
m = opw_results$totalTests
testRanks = 1:m
probs = opw_results$ranksProb
weights = opw_results$weight
dat = tibble(testRanks, probs, weights)
p_prob = ggplot(dat, aes(x = testRanks, y = probs)) + geom_point(size = .5, col="firebrick4") + 
    labs(x = "Test rank" , y = "p(rank | effect)")
p_weight = ggplot(dat, aes(x = testRanks, y = weights)) + geom_point(size = .5, col="firebrick4")+
    labs(x = "Test rank" , y = "Weight")
plot_grid(p_prob, p_weight, labels = c("a", "b"))

## ----summaryPlots-------------------------------------------------------------
Data <- tibble(pval=de_res_air$pvalue, filter=de_res_air$baseMean)


hist_pval <- ggplot(Data, aes(x = Data$pval)) +
        geom_histogram(colour = "#1F3552", fill = "#4281AE")+
		labs(x = "P-values")

hist_filter <- ggplot(Data, aes(x = Data$filter)) +
        geom_histogram( colour = "#1F3552", fill = "#4274AE")+
		labs(x = "Filter statistics")
        
pval_filter <- ggplot(Data, aes(x = rank(-Data$filter), y = -log10(pval))) +
		geom_point()+
		labs(x = "Ranks of filters", y = "-log(pvalue)")
		
p_ecdf <- ggplot(Data, aes(x = pval)) +
			stat_ecdf(geom = "step")+
			labs(x = "P-values", title="Empirical cumulative distribution")+
			theme(plot.title = element_text(size = rel(.7)))

p <- plot_grid(hist_pval, hist_filter, pval_filter, p_ecdf, 
               labels = c("a", "b", "c", "d"), ncol = 2)
# now add the title
title <- ggdraw() + draw_label("Airway: data summary")
plot_grid(title, p, ncol = 1, rel_heights=c(0.1, 1)) 


## ----dataAnalysis-------------------------------------------------------------
# initial stage--------
pvals = de_res_air$pvalue
tests = qnorm(pvals/2, lower.tail = FALSE)
filters = de_res_air$baseMean + .0001    # to ensure filters are postive for the box-cox

# formulate a data set-------------
Data = tibble(pvals, filters)
OD <- Data[order(Data$filters, decreasing=TRUE), ]
Ordered.pvalue <- OD$pvals


# estimate the true null and alternative test sizes------
m = length(Data$pvals); m
nullProp = qvalue(Data$pvals, pi0.method="bootstrap")$pi0; nullProp
m0 = ceiling(nullProp*m); m0
m1 = m - m0; m1

# fit box-cox regression
#--------------------------------

bc <- boxcox(filters ~ tests)
lambda <- bc$x[which.max(bc$y)]; lambda
model <- lm(filters^lambda ~ tests)

# etimated test efects of the true altrnatives------------
test_effect = if(m1 == 0) {0
} else {sort(tests, decreasing = TRUE)[1:m1]}		# two-tailed test

# for the continuous effects etimated mean effects
mean_testEffect = mean(test_effect, na.rm = TRUE)
mean_testEffect
mean_filterEffect = model$coef[[1]] + model$coef[[2]]*mean_testEffect
mean_filterEffect

## ----ranksProb----------------------------------------------------------------
set.seed(123)
prob_cont <- sapply(1:m, prob_rank_givenEffect, et = mean_filterEffect,
                   ey = mean_filterEffect, nrep = 1000, m0 = m0, m1 = m1)

## ----weights------------------------------------------------------------------
# delInterval can be smaller to otain the better results
wgt <- weight_continuous(alpha = .1, et = mean_testEffect, m = m, 
                         delInterval = .01, ranksProb = prob_cont)

## ----results------------------------------------------------------------------
alpha = .1
padj <- p.adjust(Ordered.pvalue/wgt, method = "BH")
rejections_list = OD[which((padj <= alpha) == TRUE), ]
n_rejections = dim(rejections_list)[1]
n_rejections

## -----------------------------------------------------------------------------
opw_results2 <- opw(pvalue = pvals, filter = filters, weight = wgt, 
                     effectType = "continuous", alpha = .1, method = "BH")
opw_results2$rejections

Try the OPWeight package in your browser

Any scripts or data that you put into this service are public.

OPWeight documentation built on Nov. 8, 2020, 11:06 p.m.