#' Compute weighted burden test
#'
#' The function takes in a gene expression model in MOSTWAS form
#' and GWAS summary statistics and
#' carries out the weighted burden Z-test for a trait
#'
#' @param mod data.frame, model for a given isoform
#' @param ld matrix, ld reference matrix
#' @param gene character, gene name
#' @param sumStats data frame, GWAS summary statistics
#' @param chr character, colnames in sumStats that keeps the chromosome
#' @param pos character, colnames in sumStats that keeps the position
#' @param a1 character, colnames in sumStats that keeps the ALT allele
#' @param a2 character, colnames in sumStats that keeps the REF allele
#' @param a1_mod character, colnames in model that keeps the ALT allele
#' @param a2_mod character, colnames in model that keeps the REF allele
#' @param snpName character, colnames in sumStats that keeps the SNP id
#' @param Z character, colnames in sumStats that keeps the Z score
#' @param beta character, colnames in sumStats that keeps the effect size
#' @param se character, colnames in sumStats that keeps the standard error
#' @param featureName character, colname in model that keeps the feature name
#' @param R2cutoff numeric, predictive R2 cutoff
#' @param alpha numeric, P-value threshold for permutation testing
#' @param nperms numeric, number of permutations
#' @param usePos logical, use SNP positions vs. SNP ids
#'
#' @return list of results for burden and permutation tests
#'
#' @importFrom boot boot
#' @importFrom stats pnorm
#'
#' @export
burdenTest <- function(mod,
ld,
gene,
sumStats,
chr,
pos,
a1,
a2,
a1_mod = 'ALT',
a2_mod = 'REF',
snpName = 'SNP',
Z = NULL,
beta = NULL,
se = NULL,
featureName = 'Feature',
R2cutoff = 0.01,
alpha = 2.5e-6,
nperms = 1e3,
usePos = F){
if (is.null(Z)){
if (any(is.null(c(beta,se)))){
stop('Please provide a column name for the Z-score or beta and SE.')
}
}
colnames(mod)[which(colnames(mod) == featureName)] = 'Feature'
colnames(sumStats)[which(colnames(sumStats) == snpName)] = 'SNP'
if (chr %in% colnames(sumStats)){
colnames(sumStats)[which(colnames(sumStats) == chr)] = 'Chromosome'
}
if (pos %in% colnames(sumStats)){
colnames(sumStats)[which(colnames(sumStats) == pos)] = 'Position'
}
colnames(sumStats)[which(colnames(sumStats) == a1)] = 'A1_GWAS'
colnames(sumStats)[which(colnames(sumStats) == a2)] = 'A2_GWAS'
colnames(mod)[which(colnames(mod) == a1_mod)] = 'A1_Mod'
colnames(mod)[which(colnames(mod) == a2_mod)] = 'A2_Mod'
if (!is.null(Z)){
colnames(sumStats)[which(colnames(sumStats) == Z)] = 'Z'
}
if (!all(is.null(c(beta,se)))){
colnames(sumStats)[which(colnames(sumStats) == beta)] = 'Beta'
colnames(sumStats)[which(colnames(sumStats) == se)] = 'SE'
}
if (!'Z' %in% colnames(sumStats)){
sumStats$Z = sumStats$Beta/sumStats$SE
}
if (mod$R2[1] <= R2cutoff){
return(paste0('The isoform is not predicted at R2 > ',
R2cutoff))
}
if (usePos){
sumStats$SNP = paste(sumStats$Chromosome,sumStats$Position,sep=':')
mod$SNP = paste(mod$Chromosome,mod$Position,sep=':')
}
tot = merge(mod,sumStats,by = 'SNP')
# making sure all alleles are in the same case
cols_toupper <- c("A1_GWAS", "A2_GWAS", "A1_Mod", "A2_Mod")
tot$A1_GWAS = toupper(tot$A1_GWAS)
tot$A2_GWAS = toupper(tot$A1_GWAS)
tot$A1_Mod = toupper(tot$A1_Mod)
tot$A2_Mod = toupper(tot$A2_Mod)
#tot[, c(cols_toupper) := lapply(.SD, toupper), .SDcols = cols_toupper]
if (nrow(tot) == 0){
return('SNPs not found.')
}
tot$Z = ifelse(tot$A1_GWAS == tot$A1_Mod,
tot$Z,
-1 * tot$Z)
calculateTWAS <- function(effects,
Z,
LD,
indices){
effects = effects[indices]
twasZ = as.numeric(effects %*% Z)
twasr2pred = as.numeric(effects %*% LD %*% effects)
if (twasr2pred > 0){
twas = as.numeric(twasZ/sqrt(twasr2pred))
} else {
twas = 0
}
return(twas)
}
twasLD = as.numeric(tot$Weight %*% tot$Z) /
sqrt(as.numeric(tot$Weight) %*% ld[tot$SNP,tot$SNP] %*% as.numeric(tot$Weight))
twasLD = as.numeric(twasLD)
P = 2*stats::pnorm(-abs(twasLD))
if (P <= alpha){
permutationLD = boot::boot(data = tot$Weight,
statistic = calculateTWAS,
R = nperms,
sim = 'permutation',
Z = tot$Z,
LD = ld[tot$SNP,tot$SNP])
permute.p = (nperms * mean(abs(permutationLD$t) >
abs(permutationLD$t0)) + 1)/(nperms+1)
} else {
permute.p = 1}
return(data.frame(Gene = gene,
Feature = tot$Feature[1],
Z = twasLD,
P = 2*pnorm(-abs(twasLD)),
permute.P = permute.p,
topSNP = tot$SNP[which.max(abs(tot$Z))],
topSNP.P = 2*pnorm(-abs(max(abs(tot$Z))))))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.