# calculate standard deviation using n
std <- function(x) {
# calculate standard deviation with denominator of n
#return(sqrt(mean((x-mean(x, na.rm = T))^2, na.rm = T)))
var <- sqrt(mean((x-mean(x, na.rm = T))^2, na.rm = T))
if (var == 0) {
return(1)
} else {
return(var)
}
}
# LRT
# quantitative trait
my_lrt_method <- function(E, G, idv.rare, e = exp(1), c = NULL, grid.search = T, dist = "norm.var") {
# each row represents each individual, each column represents each variant
# extract expression levels of idv with rare variants or without rare variants
E.y = apply(idv.rare, 2, function(x) E[x]) # with rare variants
E.x = apply(idv.rare, 2, function(x) E[!x]) # without rare variants
K = ncol(G)
N = nrow(G)
n = colSums(idv.rare)
m = colSums(!idv.rare)
# set causal.rate
if (is.null(c)) {
c = runif(K)
} else {
c = rep(c, K)
}
# parameters
mu.all = mean(E, na.rm = TRUE)
sigma.all = std(E)
mu.x = as.numeric(lapply(E.x, mean))
sigma.x = as.numeric(lapply(E.x, std))
sigma.x[sigma.x==0] <- sigma.all
mu.y = as.numeric(lapply(E.y, mean))
sigma.y = as.numeric(lapply(E.y, std))
sigma.y[sigma.y==0] <- sigma.all
# calculate log-transformed L_0
l0 = sum(log(1 - c, base = e)) - (K * N / 2) * log(2 * pi * sigma.all ^ 2, base = e) - K * N / 2
# calculate log-transformed L0 + L1
# we use sum here because ln.a and ln.b are vectors
ln.a = log(1 - c, base = e) - ((m + n) / 2) * log(2 * pi * sigma.all ^ 2, base = e) - N / 2
if (grid.search) {
exp.y.constant <- numeric(length(mu.y))
exp.x.constant <- numeric(length(mu.x))
for (i in 1:length(mu.y)) {
exp.y.constant[i] <- sum((E.y[[i]] - mu.y[i]) ^ 2)
exp.x.constant[i] <- sum((E.x[[i]] - mu.x[i]) ^ 2)
}
space <- seq(-2, 2, 0.01)
# changed
sigma.x.range <- t(matrix(0, nrow=length(sigma.x), ncol = length(space)) + sigma.x) + space
sigma.x.range <- t(sigma.x.range)
sigma.x.range[sigma.x.range <= 0] = 0.01
ln.bx.range <- matrix(0, ncol = length(space), nrow = length(c))
#ln.bx.max <- numeric(length(c))
sigma.x.max <- numeric(length(c))
sigma.y.range <- t(matrix(0, nrow=length(sigma.y), ncol = length(space)) + sigma.y) + space
sigma.y.range <- t(sigma.y.range)
sigma.y.range[sigma.y.range <= 0] = 0.01
ln.by.range <- matrix(0, ncol = length(space), nrow = length(c))
#ln.by.max <- numeric(length(c))
sigma.y.max <- numeric(length(c))
ln.bx.range <- log(c, base = e) -
(m / 2) * log(2 * pi * sigma.x.range ^ 2, base = e) -
1 / 2 * (exp.x.constant / sigma.x.range ^ 2) -
(n / 2) * log(2 * pi * sigma.y ^ 2, base = e) -
1 / 2 * (exp.y.constant / sigma.y ^ 2)
ln.by.range <- log(c, base = e) -
(m / 2) * log(2 * pi * sigma.x ^ 2, base = e) -
1 / 2 * (exp.x.constant / sigma.x ^ 2) -
(n / 2) * log(2 * pi * sigma.y.range ^ 2, base = e) -
1 / 2 * (exp.y.constant / sigma.y.range ^ 2)
# changed
max.y.idx <- apply(ln.by.range, 1, function(x) which(x == max(x))[1])
max.x.idx <- apply(ln.bx.range, 1, function(x) which(x == max(x))[1])
for (i in 1:length(c)) {
#ln.b.max[i] <- ln.b.range[i, max.idx[i]]
sigma.y.max[i] <- sigma.y.range[i, max.y.idx[i]]
sigma.x.max[i] <- sigma.x.range[i, max.x.idx[i]]
}
#ln.b = ln.b.max
ln.b = log(c, base = e) -
(m / 2) * log(2 * pi * sigma.x.max ^ 2, base = e) -
1 / 2 * (exp.x.constant / sigma.x.max ^ 2) -
(n / 2) * log(2 * pi * sigma.y.max ^ 2, base = e) -
1 / 2 * (exp.y.constant / sigma.y.max ^ 2)
} else if (dist == "norm.var") {
ln.b = log(c, base = e) - (m / 2) * log(2 * pi * sigma.x ^ 2, base = e) -
(n / 2) * log(2 * pi * sigma.y ^ 2, base = e) - N / 2
} else if (dist == "norm.mean") {
sigma.var <- sqrt( ( unlist(lapply(E.x, function(x) {sum( (x-mean(x))^2 )})) +
unlist(lapply(E.y, function(y) {sum( (y-mean(y))^2 )})) ) /
(m+n) )
ln.b = log(c, base = e) -
((m+n) / 2) * log(2 * pi * sigma.var ^ 2, base = e) -
N / 2
} else if (dist == "y.norm.var") {
# sigma.null = 1, mu.null = 0
lt.y.null <- unlist(lapply(E.y, function(y) {sum(log(dnorm(y, mean = 0, sd = 1)))}))
lt.y.alt <- unlist(lapply(E.y, function(y) {sum(log(dnorm(y, mean = mean(y), sd = std(y))))}))
ln.a = log(1 - c, base = e) + lt.y.null
ln.b = log(c, base = e) + lt.y.alt
l0 = sum(ln.a)
} else if (dist == "y.norm.mean") {
# sigma.null = 1, mu.null = 0
lt.y.null <- unlist(lapply(E.y, function(y) {sum(log(dnorm(y, mean = 0, sd = 1)))}))
lt.y.alt <- unlist(lapply(E.y, function(y) {sum(log(dnorm(y, mean = mean(y), sd = 1)))}))
ln.a = log(1 - c, base = e) + lt.y.null
ln.b = log(c, base = e) + lt.y.alt
l0 = sum(ln.a)
}
# changed
l0l1 = sum(ln.a) + sum(log(1 + e ^ (ln.b - ln.a), base = e))
l1 = l0l1 + log(1 - e ^ (l0 - l0l1))
#lr = l0 / l1
lr = l1 - l0
return(lr)
}
LRT <- function(y, X, maf = 0.05, perm = 1000, e = exp(1), c = NULL, grid.search = T, dist = "norm.var") {
# get minor allele frequencies
MAF = colMeans(X, na.rm=TRUE) / 2
# how many variants < maf
rare.maf = MAF < maf & MAF > 0
rare = sum(rare.maf)
# expression with rare variants and genotypes of rare variants
X = X[, rare.maf]
idv.rare = X > 0
# permutation
# unpermuted LRT statistics
if (dist == "both") {
lrt.stat = max(my_lrt_method(y, X, idv.rare, e, c, grid.search = grid.search, dist = "norm.var"),
my_lrt_method(y, X, idv.rare, e, c, grid.search = grid.search, dist = "norm.mean"))
} else {
lrt.stat = my_lrt_method(y, X, idv.rare, e, c, grid.search = grid.search, dist = dist)
}
# For a specific score, returns how often permuted data has a higher score than unpermuted data.
perm.pval = NA
if (perm > 0)
{
x.perm = rep(0, perm)
for (i in 1:perm)
{
perm.sample = sample(1:length(y))
if (dist == "both") {
x.perm[i] = max(my_lrt_method(y[perm.sample], X, idv.rare, e, c, grid.search = grid.search, dist = "norm.var"),
my_lrt_method(y[perm.sample], X, idv.rare, e, c, grid.search = grid.search, dist = "norm.mean"))
} else {
x.perm[i] = my_lrt_method(y[perm.sample], X, idv.rare, e, c, grid.search = grid.search, dist = dist)
}
}
# p-value
perm.pval = (sum(x.perm >= lrt.stat) + 1) / (perm + 1)
}
# results
name = "LRT: Likelihood Ratio Test"
arg.spec = c(length(y), ncol(X), maf, perm)
names(arg.spec) = c("samples", "variants", "maf", "n.perms")
res = list(lrt.stat = lrt.stat,
perm.pval = perm.pval,
args = arg.spec,
name = name)
return(res)
}
# VT
compute.vt <- function(y, z.num.pre, z.denom) {
y.new = y - mean(y)
z.num = unlist(lapply(z.num.pre, function(x) {sum(x * y.new, na.rm = T)}))
z.scores = z.num / z.denom
vt.stat = max(z.scores, na.rm = T)
return(vt.stat)
}
VT <- function(y, X, perm=1000, ksi=NULL) {
## running vt method
mafs = (1 + colSums(X, na.rm=TRUE)) / (2 + 2*nrow(X))
h.maf = sort(unique(mafs))
z.num.pre = vector("list", length(h.maf)-1)
z.denom = rep(0, length(h.maf)-1)
if (length(h.maf) == 1) {
name = "VT: Variable Threshold"
arg.spec = c(length(y), ncol(X), perm)
names(arg.spec) = c("samples", "variants", "n.perms")
res = list(vt.stat = NA,
perm.pval = NA,
sig.perm = NA,
args = arg.spec,
name = name)
return(res)
}
# precompute unchanged variables
for (i in 1:(length(h.maf)-1))
{
if (is.null(ksi)) {
selected.var = mafs < h.maf[i+1]
z.num.pre[[i]] = X[, selected.var]
z.denom[i] = sqrt(sum((X[, selected.var]) ^ 2, na.rm=TRUE))
} else {
selected.var <- mafs < h.maf[i+1]
z.num.pre[[i]] = t(ksi * t(X))[, selected.var]
z.denom[i] = sqrt(sum((t(ksi * t(X)))[, selected.var] ^ 2, na.rm=TRUE))
}
}
## Unpermuted VT statistics
vt.stat = compute.vt(y, z.num.pre, z.denom)
## permutations
## For a specific score, returns how often permuted data has a higher score than unpermuted data.
perm.pval = NA
sig.perm = 0
if (perm > 0)
{
y.perm = replicate(perm, sample(y))
x.perm = apply(y.perm, 2, function(y) compute.vt(y, z.num.pre, z.denom))
sig.perm = sum(x.perm >= vt.stat)
}
## p-value
perm.pval = (sig.perm + 1) / (perm + 1)
## results
name = "VT: Variable Threshold"
arg.spec = c(length(y), ncol(X), perm)
names(arg.spec) = c("samples", "variants", "n.perms")
res = list(vt.stat = vt.stat,
perm.pval = perm.pval,
sig.perm = sig.perm,
args = arg.spec,
name = name)
return(res)
}
# Weighted methods - WSS
# use linear regression + Wald statistic
WSS.agg <- function(y, X, threshold = 1.64, covariates = NULL, maf = 0.05) {
require(car)
# each row represents each individual, each column represents each variant
# assume that all variants are in one small-size gene, so we do need to
# separate rare variants into different groups
# get minor allele frequencies
MAF = colMeans(X, na.rm=TRUE) / 2
# how many variants < maf
rare.maf = MAF < maf & MAF > 0
rare = sum(rare.maf)
# threshold is set based on normal distribution. define top 5% expression as high
unaffected <- y <= threshold
# calculate WSS weight
mU <- colSums(X[unaffected, ], na.rm = T)
nU <- colSums(!is.na(X)[unaffected, ], na.rm = T)
q = (mU+1) / (2*nU+2)
n = colSums(!is.na(X))
w = sqrt(n * q * (1 - q))
# WSS aggregation
# multiply the weights
X <- t(t(X) * w)
if (rare <= 1)
{
# if rare variants <= 1, then NO aggregation is needed
X.new = X
} else {
# aggregate rare variants into one column
X.agg = rowSums(X[, rare.maf], na.rm=T)
# joining aggregated rare variants to common variants
X.new = cbind(X[, !rare.maf], X.agg)
}
# linear regression and Wald statistic
if (is.null(covariates)) {
model <- lm(y ~ ., data = as.data.frame(X.new))
if (sum(is.na(model$coefficients)) > 0) {
model <- lm(y ~ ., data = as.data.frame(X.new[, !is.na(model$coefficients)[-1] ]))
}
} else {
model <- lm(y ~ covariates + ., data = as.data.frame(X.new))
if (sum(is.na(model$coefficients)) > 0) {
model <- lm(y ~ covariates + ., data = as.data.frame(X.new[, !is.na(model$coefficients)[-1] ]))
}
}
wald <- Anova(model, type = "II", test.statistic = "Wald")
## results
name = "Weighted Aggregation: WSS Aggregation Method (Linear regression + Wald statistic)"
arg.spec = c(length(y), ncol(X), rare, maf)
arg.spec = as.character(arg.spec)
names(arg.spec) = c("samples", "variants", "rarevar", "maf")
res = list(wss.stat = rev(wald$`F value`)[2],
asym.pval = rev(wald$`Pr(>F)`)[2],
args = arg.spec,
name = name)
return(res)
}
# Can treat low expression level as control, while high expression level as case
my_wss_method <- function(casecon, gen) {
controls = !casecon
n.loc = ncol(gen)
## calculate weights
w <- rep(0, n.loc)
for (j in 1:n.loc)
{
nNA = is.na(gen[,j])
mU = sum(gen[controls,j], na.rm=TRUE)
nU = sum(controls[!nNA])
q = (mU+1) / (2*nU+2)
n = sum(!nNA)
w[j] = sqrt(n * q * (1-q))
}
## calculate genetic score
score = rowSums(gen %*% diag(1/w), na.rm=TRUE)
# rank.score = order(score)
rank.score = rank(score)
## sum of ranks of cases
x = sum(rank.score[casecon==1])
return(x)
}
WSS <- function(y, X, perm=1000, threshold = 1.64) {
# threshold is set based on normal distribution. assume top 5% expression levels are high (cases)
case = y > threshold
y[case] = 1
y[!case] = 0
## running wss method
wss.stat = my_wss_method(y, X)
## permutations
perm.pval = NA
if (perm > 0)
{
x.perm = rep(0, perm)
for (i in 1:perm)
{
perm.sample = sample(1:length(y))
x.perm[i] = my_wss_method(y[perm.sample], X)
}
# p-value
perm.pval = (sum(x.perm >= wss.stat) + 1) / (perm + 1)
}
## results
name = "WSS: Weighted Sum Statistic"
arg.spec = c(sum(y), length(y)-sum(y), ncol(X), perm)
names(arg.spec) = c("cases", "controls", "variants", "n.perms")
res = list(wss.stat = wss.stat,
perm.pval = perm.pval,
args = arg.spec,
name = name)
return(res)
}
# Aggregation methods - BURDEN
BURDEN <- function(y, X, maf=0.05, covariates = NULL) {
require(car)
# each row represents each individual, each column represents each variant
# assume that all variants are in one small-size gene, so we do need to
# separate rare variants into different groups
# get minor allele frequencies
MAF = colMeans(X, na.rm=TRUE) / 2
# how many variants < maf
rare.maf = MAF < maf & MAF > 0
rare = sum(rare.maf)
# aggregation
if (rare <= 1)
{
# if rare variants <= 1, then NO aggregation is needed
X.new = X
} else {
# aggregate rare variants into one column
X.agg = rowSums(X[,rare.maf], na.rm=TRUE)
# joining aggregated rare variants to common variants
X.new = cbind(X[, !rare.maf], X.agg)
}
# linear regression and Wald statistic
if (is.null(covariates)) {
model <- lm(y ~ ., data = as.data.frame(X.new))
if (sum(is.na(model$coefficients)) > 0) {
model <- lm(y ~ ., data = as.data.frame(X.new[, !is.na(model$coefficients)[-1] ]))
}
} else {
model <- lm(y ~ covariates + ., data = as.data.frame(X.new))
if (sum(is.na(model$coefficients)) > 0) {
model <- lm(y ~ covariates + ., data = as.data.frame(X.new[, !is.na(model$coefficients)[-1] ]))
}
}
wald <- Anova(model, type = "II", test.statistic = "Wald")
## results
name = "BURDEN: Aggregation Method (Linear regression + Wald statistic)"
arg.spec = c(length(y), ncol(X), rare, maf)
arg.spec = as.character(arg.spec)
names(arg.spec) = c("samples", "variants", "rarevar", "maf")
res = list(burden.stat = rev(wald$`F value`)[2],
asym.pval = rev(wald$`Pr(>F)`)[2],
args = arg.spec,
name = name)
return(res)
}
# Collapsing methods - CMC
my_cmc_method <- function(casecon, X.new) {
# Internal function for CMAT method
## number of individuals N, cases nA, controls nU
N = nrow(X.new)
nA = sum(casecon)
nU = N - nA
## matrix of genotypes in cases
Xx = X.new[casecon==1,]
## matrix of genotypes in controls
Yy = X.new[casecon==0,]
## get means
Xx.mean = colMeans(Xx, na.rm=TRUE)
Yy.mean = colMeans(Yy, na.rm=TRUE)
## center matrices Xx and Yy
Dx = sweep(Xx, 2, Xx.mean)
Dy = sweep(Yy, 2, Yy.mean)
## pooled covariance matrix
if (sum(complete.cases(X.new)) == N) # no missing values
{
COV = (t(Dx) %*% Dx + t(Dy) %*% Dy) / (N-2)
} else { # with missing values
## covariance matrix of cases
tDx = t(Dx)
Sx = matrix(0, ncol(X.new), ncol(X.new))
for (i in 1:nrow(tDx))
{
for (j in i:ncol(Dx))
{
Sx[i,j] = sum(tDx[i,] * Dx[,j], na.rm=TRUE)
}
}
sx.diag = diag(Sx)
Sx = Sx + t(Sx)
diag(Sx) = sx.diag
## covariance matrix of controls
tDy = t(Dy)
Sy = matrix(0, ncol(X.new), ncol(X.new))
for (i in 1:nrow(tDy))
{
for (j in i:ncol(Dy))
{
Sy[i,j] = sum(tDy[i,] * Dy[,j], na.rm=TRUE)
}
}
sy.diag = diag(Sy)
Sy = Sy + t(Sy)
diag(Sy) = sy.diag
## pooled covariance matrix
COV = (1/(N-2)) * (Sx + Sy)
}
## general inverse
if (nrow(COV) == 1) # only one variant
{
if (COV < 1e-8) COV = 1e-8
COV.inv = 1 / COV
} else {
COV.eigen = eigen(COV)
eig.vals = COV.eigen$values
inv.vals = ifelse(abs(eig.vals) <= 1e-8, 0, 1/eig.vals)
EV = solve(COV.eigen$vectors)
COV.inv = t(EV) %*% diag(inv.vals) %*% EV
}
## Hotellings T2 statistic
stat = t(Xx.mean - Yy.mean) %*% COV.inv %*% (Xx.mean - Yy.mean) * nA * nU / N
as.numeric(stat)
}
CMC.casecon <- function(y, X, maf=0.05, perm=1000, threshold = 1.64) {
# threshold is set based on normal distribution. assume top 5% expression levels are high (cases)
case = y > threshold
y[case] = 1
y[!case] = 0
## number of individuals N
N = nrow(X)
## get minor allele frequencies
MAF = colMeans(X, na.rm=TRUE) / 2
## how many variants < maf
rare.maf = MAF < maf & MAF > 0
rare = sum(rare.maf)
## collapsing
if (rare <= 1)
{
# if rare variants <= 1, then NO collapse is needed
X.new = X
} else {
# collapsing rare variants into one column
X.collaps = rowSums(X[,rare.maf], na.rm=TRUE)
X.collaps[X.collaps != 0] = 1
# joining collapsed to common variants
X.new = cbind(X[,!rare.maf], X.collaps)
}
## change values to -1, 0, 1
X.new = X.new - 1
## number of new variants
M = ncol(X.new)
## Hotellings T2 statistic
cmc.stat = my_cmc_method(y, X.new)
## Asymptotic p-values
# under the null hypothesis T2 follows an F distribution
f.stat = cmc.stat * (N-M-1)/(M*(N-2))
df1 = M # degrees of freedom
df2 = N - M - 1 # degrees of freedom
asym.pval = 1 - pf(f.stat, df1, df2)
## under the alternative hyposthesis T2 follows a chi-square distr
# pval = 1 - pchisq(cmc.stat, df=M)
## permutations
perm.pval = NA
if (perm > 0)
{
x.perm = rep(0, perm)
for (i in 1:perm)
{
perm.sample = sample(1:length(y))
x.perm[i] = my_cmc_method(y[perm.sample], X.new)
}
# p-value
perm.pval = (sum(x.perm >= cmc.stat) + 1) / (perm + 1)
}
## results
name = "CMC: Combined Multivariate and Collapsing Method"
arg.spec = c(sum(y), length(y)-sum(y), ncol(X), rare, maf, perm)
arg.spec = as.character(arg.spec)
names(arg.spec) = c("cases", "controls", "variants", "rarevar", "maf", "perm")
res = list(cmc.stat = cmc.stat,
asym.pval = asym.pval,
perm.pval = perm.pval,
args = arg.spec,
name = name)
return(res)
}
CMC <- function(y, X, maf=0.05, covariates = NULL) {
require(car)
# each row represents each individual, each column represents each variant
# assume that all variants are in one small-size gene, so we do need to
# separate rare variants into different groups
# get minor allele frequencies
MAF = colMeans(X, na.rm=TRUE) / 2
# how many variants < maf
rare.maf = MAF < maf & MAF > 0
rare = sum(rare.maf)
# collapsing
if (rare <= 1)
{
# if rare variants <= 1, then NO collapse is needed
X.new = X
} else {
# collapsing rare variants into one column
X.collaps = rowSums(X[,rare.maf], na.rm=TRUE)
X.collaps[X.collaps != 0] = 1
if (all(X.collaps == 1)) {
## results
name = "CMC: Combined Multivariate and Collapsing Method (Linear regression + Wald statistic)"
arg.spec = c(length(y), ncol(X), rare, maf)
arg.spec = as.character(arg.spec)
names(arg.spec) = c("samples", "variants", "rarevar", "maf")
res = list(cmc.stat = NA,
asym.pval = NA,
args = arg.spec,
name = name)
return(res)
}
# joining collapsed to common variants
X.new = cbind(X[, !rare.maf], X.collaps)
}
# linear regression and Wald statistic
if (is.null(covariates)) {
model <- lm(y ~ ., data = as.data.frame(X.new))
if (sum(is.na(model$coefficients)) > 0) {
model <- lm(y ~ ., data = as.data.frame(X.new[, !is.na(model$coefficients)[-1] ]))
}
} else {
model <- lm(y ~ covariates + ., data = as.data.frame(X.new))
if (sum(is.na(model$coefficients)) > 0) {
model <- lm(y ~ covariates + ., data = as.data.frame(X.new[, !is.na(model$coefficients)[-1] ]))
}
}
wald <- Anova(model, type = "II", test.statistic = "Wald")
## results
name = "CMC: Combined Multivariate and Collapsing Method (Linear regression + Wald statistic)"
arg.spec = c(length(y), ncol(X), rare, maf)
arg.spec = as.character(arg.spec)
names(arg.spec) = c("samples", "variants", "rarevar", "maf")
res = list(cmc.stat = rev(wald$`F value`)[2],
asym.pval = rev(wald$`Pr(>F)`)[2],
args = arg.spec,
name = name)
return(res)
}
# SKAT
SKAT_ <- function(y, X, covariates = NULL, c) {
require(SKAT)
# changed
weights <- rep(c, ncol(X))
if (is.null(covariates)) {
obj <- SKAT_Null_Model(y ~ 1, out_type = "C")
skat.result <- SKAT(X, obj, method="SKATO", weights = weights)
p <- skat.result$p.value
} else {
obj <- SKAT_Null_Model(y ~ covariates, out_type = "C")
skat.result <- SKAT(X, obj, method="SKATO", weights = weights)
p <- skat.result$p.value
}
## results
name = "SKAT-O: SNP-set (Sequence) Kernel Association Test"
arg.spec = c(length(y), ncol(X))
names(arg.spec) = c("samples", "variants")
res = list(p.value = p,
skat.obj = obj,
skat.result = skat.result,
args = arg.spec,
name = name)
return(res)
}
power.simulate <- function(cosi.hap, perm=1000, method = 6, repeats = 100, individual = 1000, causal.ratio = 0.10, a = 1.5, maf.cutoff = 0.05) {
require(RNOmni)
p = as.data.frame(matrix(nrow = repeats, ncol = method))
colnames(p) <- c("CMC", "WSS-Agg", "BURDEN", "VT", "SKAT-O", "LRT-q")
for (i in 1:repeats) {
hap4k <- cosi.hap[sample(nrow(cosi.hap), 2*individual),]
hap2k <- hap4k[1:individual, ] + hap4k[(individual+1):(2*individual), ]
hap2k <- as.matrix(hap2k)
maf.list <- colMeans(hap2k, na.rm = T) / 2
hap2k[, maf.list > 0.5] <- 2 - hap2k[, maf.list > 0.5]
hap2k <- hap2k[, maf.list != 0 & maf.list != 1]
maf.list <- colMeans(hap2k, na.rm = T) / 2
# only use rare variants
hap2k <- hap2k[, maf.list < maf.cutoff]
maf.list <- colMeans(hap2k, na.rm = T) / 2
rare.var.maf <- maf.list[maf.list < maf.cutoff]
causal.pos <- sample(length(rare.var.maf), ceiling(causal.ratio * length(rare.var.maf)))
causal.var.maf <- rare.var.maf[causal.pos]
causal.genotypes <- hap2k[, names(causal.var.maf)]
effect.size <- a * abs(log10(causal.var.maf)) *
sample(c(1, -1), replace = T, length(causal.var.maf))
if (is.matrix(causal.genotypes)) {
var.effect <- as.numeric(causal.genotypes %*% effect.size)
} else if (is.integer(causal.genotypes) || is.numeric(causal.genotypes)) {
var.effect <- causal.genotypes * effect.size
}
expr <- rnorm(individual) + var.effect + rnorm(individual)
expr <- rankNorm(expr)
covariates <- NULL
print(paste("rare variants", length(rare.var.maf)))
print(paste("causal variants", length(causal.pos)))
p[i, 1] = CMC(expr, hap2k, covariates = covariates)$asym.pval
p[i, 2] = WSS.agg(expr, hap2k, covariates = covariates)$asym.pval
p[i, 3] = BURDEN(expr, hap2k, covariates = covariates)$asym.pval
p[i, 4] = VT(expr, hap2k, perm = perm)$perm.pval
p[i, 5] = SKAT_(expr, hap2k, covariates = covariates, c = 0.30)$p.value
p[i, 6] = (lrt_perm(expr = expr, geno = hap2k, perm = perm, causal_ratio = rep(0.30, ncol(hap2k))) + 1) / (perm + 1)
if (i %% (repeats / 10) == 0) {
print(paste("round", i))
}
}
return(p)
}
# main
args <- commandArgs(trailingOnly=T)
cosi.hap.file <- args[1]
repeats <- as.numeric(args[2])
causal.ratio <- as.numeric(args[3])
a <- as.numeric(args[4])
result.file <- args[5]
print(args)
require(Rcpp)
sourceCpp("./lrt.cpp")
# hap is read from cosi result
require(data.table)
cosi.hap <- fread(cosi.hap.file)
power.p.values <- power.simulate(cosi.hap=cosi.hap, repeats=repeats, causal.ratio=causal.ratio, a = a)
write.csv(power.p.values, result.file, quote=F, row.names=F)
print(warnings())
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.