knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
library(simulateGP)
library(MASS)
library(tidyverse)
library(ggplot2)

Sample overlap

$$ \begin{bmatrix} \hat{\beta}_1\ \hat{\beta}_2 \end{bmatrix} = MVN\left ( \begin{bmatrix} \beta_1\ \beta_2 \end{bmatrix}, \boldsymbol{S} \right ) $$

$$ \boldsymbol{C} = \boldsymbol{S}

\begin{bmatrix} N_1 & \rho_{1,2}\frac{N_O}{\sqrt{N_1 N_2}}\ \rho_{1,2}\frac{N_O}{\sqrt{N_1 N_2}} & N_2 \end{bmatrix}

\boldsymbol{S} $$

betas <- function(b1, b2, se1, se2, n1, n2, pcor, n_overlap, Nrep=1)
{
    mu <- c(b1,b2)
    # se1 and se2 are computed as per your formula
    ses <- matrix(c(se1,0, 0, se2), 2, 2)
    # pcor is the phenotypic correlation between traits, N_0 is the sample overlap, N1 is sample size 1, N2 is sample size 2
    r <- pcor * n_overlap / sqrt(n1*n2)
    cor = matrix(c(1,r, r, 1), 2, 2)  
    cov <- ses %*% cor %*% ses
    sample <- mvrnorm(Nrep, mu, cov)
    sample
}

maf <- 0.4
vy <- 1
n1 <- 10000
n2 <- 10000
b1 <- 0.1
bxy <- 0
b2 <- b1 * bxy

se1 <- expected_se(b1, maf, n1, 1)
(b1 / se1)^2
se2 <- expected_se(b2, maf, n2, 1)

betas(b1, b2, se1, se2, 1000, 1000, 0.5, n_overlap=1000, 10000) %>% colMeans %>% {.[2]/.[1]}
betas(b1, b2, se1, se2, 1000, 1000, 0.5, n_overlap=0, 10000) %>% colMeans %>% {.[2]/.[1]}

betas(b1, b2, se1, se2, 1000, 1000, 0.5, n_overlap=1000, 10000) %>% cor
betas(b1, b2, se1, se2, 1000, 1000, 0.5, n_overlap=0, 10000) %>% cor

# rsq = F / (F + n -1)

# rsq F + rsq(n-1) = F
# rsq F - F = -rsq(n-1)
# F (rsq - 1) = - rsq(n - 1)
# F = (rsq - rsq n) / rsq - 1

# rsq = 20 / (20 + 1000 - 1)


nrep=2000
nsnp=100
n1 <- 1000
n2 <- 1000
maf <- 0.4
pcor <- 0.6

a <- generate_gwas_params(tibble(snp=1:nsnp, af=maf), 1, S=5)
a <- expand.grid(b1=a$beta, overlap=c(0, 0.5, 1), bxy=c(0, 0.3)) %>% as_tibble
a$se1 <- expected_se(a$b1, maf, n1, 1)
a$fval <- (a$b1/a$se1)^2
hist(a$fval)

a$b2 <- a$b1 * a$bxy
a$se2 <- expected_se(a$b2, maf, n2, 1)

l <- list()
for(i in 1:nrow(a))
{
    r <- betas(a$b1[i], a$b2[i], a$se1[i], a$se2[i], n1, n2, pcor, a$overlap[i] * n1, nrep)
    d <- a[i,] %>% slice(rep(row_number(), nrep))
    d$bx <- r[,1]
    d$by <- r[,2]
    l[[i]] <- d
}
dat <- bind_rows(l)
dat$wr <- dat$by/dat$bx
ggplot(subset(dat, wr < 3 & wr > -3), aes(x=fval, y=by/bx)) +
# geom_point(size=0.1, aes(colour=as.factor(overlap))) +
geom_smooth(aes(colour=as.factor(overlap))) +
facet_grid(. ~ bxy) +
scale_colour_brewer(type="qual")
nsim <- 500
dat1 <- tibble(rsq1=runif(nsim, 0.001, 0.01), overlap=1)
dat2 <- tibble(rsq1=dat1$rsq1, overlap=0)
dat3 <- tibble(rsq1=dat1$rsq1, overlap=0.5)

n1 <- 1000
n2 <- 1000
g1 <- make_geno(n1, 1, 0.4)
g2 <- make_geno(n2, 1, 0.4)
u1 <- rnorm(n1)
u2 <- rnorm(n2)
mid <- round(n2/2)
for(i in 1:nrow(dat1))
{
    b <- choose_effects(1, dat1$rsq1[i])
    x1 <- make_phen(c(b, sqrt(0.5)), cbind(g1, u1))
    x2 <- make_phen(c(b, sqrt(0.5)), cbind(g2, u2))
    y1 <- make_phen(c(0.1, -sqrt(0.5)), cbind(x1, u1))
    y2 <- make_phen(c(0.1, -sqrt(0.5)), cbind(x2, u2))
    e1 <- get_effs(x1, y1, g1)
    ex1 <- fast_assoc(x1, g1)
    ey1 <- fast_assoc(y1, g1)
    ey2 <- fast_assoc(y2, g2)
    ey3 <- fast_assoc(c(y1[1:mid], y2[(mid+1):n2]), c(g1[1:mid], g2[(mid+1):n2]))
    dat1$fval[i] <- ex1$fval
    dat2$fval[i] <- ex1$fval
    dat3$fval[i] <- ex1$fval
    dat1$bx[i] <- ex1$bhat
    dat1$by[i] <- ey1$bhat
    dat2$bx[i] <- ex1$bhat
    dat2$by[i] <- ey2$bhat
    dat3$bx[i] <- ex1$bhat
    dat3$by[i] <- ey3$bhat
}

dat <- bind_rows(dat1, dat2, dat3)
ggplot(dat, aes(x=fval, y=by/bx)) +
geom_point(size=0.1, aes(colour=as.factor(overlap))) +
geom_smooth(aes(colour=as.factor(overlap))) +
xlim(c(0.1, max(dat$fval))) +
ylim(c(-3, 3)) +
scale_colour_brewer(type="qual")


dat1 %>% subset(fval > 0.1) %>% {cor(.$bx, .$by)}
dat2 %>% subset(fval > 0.1) %>% {cor(.$bx, .$by)}
dat3 %>% subset(fval > 0.1) %>% {cor(.$bx, .$by)}


explodecomputer/simulateGP documentation built on April 3, 2024, 2:38 a.m.