In mshasan/OPWpaper: Reproduce figures and simulations of OPWeight

knitr::opts_chunk$set(fig.width = 8.5, fig.height = 4)
knitr::opts_chunk$set(tidy = FALSE, cache = FALSE, autodep = TRUE)

Relationship between covariate-effect and test-effect

The basis of the Covariate Rank Weighting (CRW) method is that there is a positive association between the test and covariate effect sizes. Therefore, we want to see the relationship between the test-effects and the ranks obtained from the covariate-effects. One way of doing so is to compute the ranks probability of the tests when the effect size is given from the data instead of the independent source, and expect similar probability curves for the strong relationship.

We observed the relationship via simulations. For the simulation, we assumed that $\varepsilon_y, \varepsilon_t \sim BVN(0, 0, 1, 1, \rho)$ and $\varepsilon_y \sim N(0,1)$ and $\varepsilon_t \sim N(0,1)$, where the correlation coefficient, $\rho$, is chosen arbitrarily. Consequently, the conditional distribution of $\varepsilon_y$ given $\varepsilon_t$ is $\varepsilon_y \mid \varepsilon_t \sim Normal(\rho \varepsilon_t, 1-\rho^2 )$. Our goal is to observe the change of the relationship between the covariate and the test effects with the change of $\rho$. We would expect that the probability plots computed from test effects are similar to the probability plots compute from the covariate effects if the correlation between the effects are strong.

Below we generate the necessary plots to demonstrate the relationship between the covariate-effect and the test-effect sizes.

Load the necessary libraries

library(OPWpaper)       
library(ggplot2)
library(reshape2)       # library for the melt function
library(cowplot)        # plot_grid function

Load the data stored in r Biocpkg("OPWpaper")

load(system.file("simulations/results", "simu_probRel_CovVsTest_effect.RDATA",
                 package = "OPWpaper"), envir = environment())

# nice plots---------
ranks = 1:100

# extract the legend from one of the plots--------------------------------------
datRelaion1 <- data.frame(ranks, prob0_ed2, prob1_ed2, prob_test0_cor.2_ed2, prob_test1_cor.2_ed2)
colnames(datRelaion1) <- c("ranks", "CH0","CH1","TH0","TH1")
datRelaion1_melt1 <- melt(datRelaion1, id.var="ranks")

p_.2 <- ggplot(datRelaion1_melt1, aes(x = ranks, y = value, group = variable, colour = variable)) +
    geom_line(aes(linetype = variable), size = 1.5) +
    labs(x = "ranks", y = "p(rank | effect)", title = "cor = .5") +
    theme(legend.title = element_blank(), legend.position="bottom")

legend_rel <- get_legend(p_.2 + theme(legend.direction="horizontal", legend.position="bottom"))

# for 90% ---------
dat_.2_.9_ed2<- cbind(prob02_ed2, prob12_ed2, prob_test0_cor.22_ed2, prob_test1_cor.22_ed2)
p_.2_.9_ed2 <-  nice_plots(x_vec = ranks, y_matrix = dat_.2_.9_ed2, cor = .2, figure = "ranksProb")

dat_.5_.9_ed2<- cbind(prob02_ed2, prob12_ed2, prob_test0_cor.52_ed2, prob_test1_cor.52_ed2)
p_.5_.9_ed2 <-  nice_plots(x_vec = ranks, y_matrix = dat_.5_.9_ed2, cor = .5, figure = "ranksProb")

dat_.8_.9_ed2<- cbind(prob02_ed2, prob12_ed2, prob_test0_cor.82_ed2, prob_test1_cor.82_ed2)
p_.8_.9_ed2 <-  nice_plots(x_vec = ranks, y_matrix = dat_.8_.9_ed2, cor = .8, figure = "ranksProb")


p_rel2 = plot_grid(p_.2_.9_ed2, p_.5_.9_ed2, p_.8_.9_ed2, ncol = 3, labels = letters[1:3], align = 'hv')
title2 <- ggdraw() + draw_label("et = 2, m0 = 90, m1 = 10")
plot_grid(title2, p_rel2, legend_rel, ncol = 1, rel_heights=c(.1, 1, .1))

Figure 1: The relationship between the test effect ($T$) and the covariate effect ($F$) in terms of the ranks probability of the test given the test-effect size, $P(r_y=k \mid \varepsilon_t )$. In the legend, the first letter represents the source of the effects, and $H0$ and $H1$ represent the null and the alternative hypothesis, respectively. The number of hypothesis tests is $m=100$, of which $m_0 = 90$ are true null and $m_1 = 10$ are true alternative tests; the mean test-effect ($ey$) of the alternative test is $\varepsilon_t=2$; and the correlation varies by $\rho={.2,.5,.8}$.

# for 50% -----------------
dat_.2_.5_ed2<- cbind(prob0_ed2, prob1_ed2, prob_test0_cor.2_ed2, prob_test1_cor.2_ed2)
p_.2_.5_ed2 <-  nice_plots(x_vec = ranks, y_matrix = dat_.2_.5_ed2, cor = .2, figure = "ranksProb")

dat_.5_.5_ed2<- cbind(prob0_ed2, prob1_ed2, prob_test0_cor.5_ed2, prob_test1_cor.5_ed2)
p_.5_.5_ed2 <-  nice_plots(x_vec = ranks, y_matrix = dat_.5_.5_ed2, cor = .5, figure = "ranksProb")

dat_.8_.5_ed2<- cbind(prob0_ed2, prob1_ed2, prob_test0_cor.8_ed2, prob_test1_cor.8_ed2)
p_.8_.5_ed2 <-  nice_plots(x_vec = ranks, y_matrix = dat_.8_.5_ed2, cor = .8, figure = "ranksProb")


p_rel1 = plot_grid(p_.2_.5_ed2, p_.5_.5_ed2, p_.8_.5_ed2, ncol = 3, labels = letters[1:3], align = 'hv')
title1 <- ggdraw() + draw_label("et = 2, m0 = 50, m1 = 50")
plot_grid(title1, p_rel1, legend_rel, ncol = 1, rel_heights=c(.1, 1, .1))

Figure 2: Relationship when the number of hypothesis tests is $m=100$, of which $m_0 = 50$ are true null and $m_1 = 50$ are true alternative tests; the mean test-effect ($ey$) of the alternative test is $\varepsilon_t=2$; and the correlation varies by $\rho={.2,.5,.8}$.