setwd("U:/Documents/My Research (UGA)/Multiple Hypoetheses/Article-1")
library(IHW)
library(IHWpaper)
library(OPWeight) # proposed library
library(dplyr) # group_by function
library(reshape2) # melt
library(ggplot2) # ggplot
library(cowplot) # nice plots
library(MASS) # box-cox transformation
library(gridExtra) # grid.arrange fucntion
# data processing
#-------------------------
proteomics_file <- system.file("extdata/real_data","science_signaling.csv", package = "IHWpaper")
proteomics_df <- read.csv(proteomics_file, stringsAsFactors = F)
# pvalues were adjusted by BH method so rewrite to obtain orginal pvlaues
proteomics_df$pvalue <- rank(proteomics_df$p1, ties.method="first")*proteomics_df$p1/nrow(proteomics_df)
proteomics_df$test = qnorm(proteomics_df$pvalue, lower.tail = F)
names(proteomics_df)
pval = proteomics_df$pvalue
test = qnorm(pval, lower.tail = FALSE)
test[which(!is.finite(test))] <- NA
covariate = proteomics_df$X..peptides
Data <- tibble(test, pval, covariate) # data of covariate covariate and pvlaues
bc2 <- boxcox(covariate ~ test)
trans2 <- bc2$x[which.max(bc2$y)]
model_prot <- lm(covariate^trans2 ~ test)
# summary statistics of the data
#------------------------------------
barlines <- "#1F3552"
hist_test <- ggplot(Data, aes(x = Data$test)) +
geom_histogram(aes(y = ..density..), binwidth = 1,
colour = barlines, fill = "#4271AE") +
labs(x = "Test statistics")
hist_covariate <- ggplot(Data, aes(x = Data$covariate)) +
geom_histogram(aes(y = ..density..),
colour = barlines, fill = "#4274AE") +
labs(x = "covariate")
hist_pval <- ggplot(Data, aes(x = Data$pval)) +
geom_histogram(aes(y = ..density..),
colour = barlines, fill = "#4281AE")+
labs(x = "P-values")
pval_covariate <- ggplot(Data, aes(x = rank(-Data$covariate), y = -log10(pval))) +
geom_point()+
labs(x = "Ranks of covariates", y = "-log(pvalue)")
p_ecdf <- ggplot(Data, aes(x = pval)) +
stat_ecdf(geom = "step")+
labs(x = "P-values", title="empirical cumulative distribution")+
theme(plot.title = element_text(size = rel(.7)))
qqplot.data <- function (vec) # argument: vector of numbers
{
# following four lines from base R's qqline()
y <- quantile(vec[!is.na(vec)], c(0.25, 0.75))
x <- qnorm(c(0.25, 0.75))
slope <- diff(y)/diff(x)
int <- y[1L] - slope * x[1L]
d <- data.frame(resids = vec)
ggplot(d, aes(sample = resids)) + stat_qq() +
geom_abline(slope = slope, intercept = int, col="red")+
labs(x = "Normal quantiles", y = "Fitted values",
title = expression(paste("Model: ", covariate^(-1.414), " ~ ", beta[0] + beta[1]*test)))+
theme(plot.title = element_text(size = rel(.7)))
}
qqplot <- qqplot.data(model_prot$fit)
p_prot = plot_grid(hist_test, hist_covariate, hist_pval, pval_covariate, p_ecdf, qqplot,
ncol = 3, labels = letters[1:6], align = 'hv')
title_prot <- ggdraw() + draw_label("Proteomics: data summary")
plot_grid(title_prot, p_prot, ncol = 1, rel_heights=c(.1, 1))
# applying proposed methods------------
n_rej_cont <- NULL
n_rej_bin <- NULL
for(alphaVal in seq(.05, .1, length = 5))
{
set.seed(123)
res_cont = opw(pvalue = pval, covariate = covariate, effectType = "continuous",
alpha = alphaVal, method = "BH")
res_bin = opw(pvalue = pval, covariate = covariate, effectType = "binary",
alpha = alphaVal, method = "BH")
n_rej_cont <- c(n_rej_cont, res_cont$rejections)
n_rej_bin <- c(n_rej_bin, res_bin$rejections)
}
# from IHW paper-------------
alpha = seq(.05, .1, length = 5)
proteomics_file <- system.file("real_data_examples/result_files", "proteomics_benchmark.Rds", package = "IHWpaper")
proteomics_data <- readRDS(file=proteomics_file)
panel_c_data <- group_by(proteomics_data$alpha_df, alpha) %>%
summarize(BH = max(bh_rejections), IHW=max(rejections))
# rejected test from different methods-------------
rej_mat_prot_FDR <- data.frame(n_rej_bin, n_rej_cont, panel_c_data)
colnames(rej_mat_prot_FDR) <- c("PRO_bin","PRO_cont", "alpha", "BH","IHW")
rej_mat_prot_FDR2 <- melt(rej_mat_prot_FDR, id.var = "alpha")
p_fdr_prot <- ggplot(rej_mat_prot_FDR2, aes(x = alpha, y = value, group = variable,
colour = variable)) +
geom_line(aes(linetype = variable), size = 1.5) +
labs(x = expression(bold(paste("Nominal ",alpha))),
y = "Discoveries", title = "Proteomics data") +
theme(legend.title = element_blank(), legend.position="bottom")
p_fdr_prot
# save the results-------------------
save.image("proteomics_data_example.RDATA")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.