#' Estimate germline heritability of a biomarker
#'
#' The function invokves GCTA in the system and estimates the germline
#' heritability of a biomarker given cis-regions around the biomarker
#' and any associated mediators of interest.
#'
#' @param biomInt character, identifier for biomarker of interest
#' @param snps data frame, SNP dosages
#' @param snpLocs data frame, MatrixEQTL locations for SNPs
#' @param mediator data frame, mediator intensities
#' @param medLocs data frame, MatrixEQTL locations for mediators
#' @param covariates data frame, covariates
#' @param numMed integer, number of top mediators to include
#' @param fileName character, throw away name for PLINK files
#' @param dimNumeric integer, number of continuous covariates
#' @param numMed integer, maximum number of mediators to find
#' @param cisDist numeric, base pair window of cis definition
#' @param needMed logical, T/F if the biomarker of interest has associated mediators
#' @param medList character vector, vector of mediator names
#' @param verbose logical, T/F for detailed output
#' @param windowSize integer, window size for PLINK pruning
#' @param numSNPShift integer, shifting window from PLINK pruning
#' @param ldThresh numeric, maximum threshold for linkage disequilibrium
#' @param ldScrRegion numeric, number of SNPs per bin in GCTA-LDMS
#' @param LDMS logical, T/F to run GCTA-LDMS
#' @param supplySNP logical, T/F to run GCTA on the full provided snps
#'
#'
#' @return heritability estimate and associated LRT P-value
#'
#' @importFrom data.table fread
#' @importFrom data.table fwrite
#' @importFrom abind abind
#'
#' @export
estimateHeritability <- function(biomInt,
snps,
pheno,
snpLocs,
mediator,
medLocs,
covariates,
fileName,
dimNumeric,
numMed = 5,
cisDist = 5e5,
needMed = T,
prune = F,
medList,
verbose = F,
windowSize,
numSNPShift,
ldThresh,
ldScrRegion = 200,
LDMS = F,
supplySNP = F,
snpAnnot = NULL){
if (!supplySNP){
if (needMed) {
cisGeno = lapply(c(biomInt,medList),
getCisGenotypes,
locs = medLocs,
snps = snps,
snpLocs = snpLocs,
cisDist = cisDist)
W = abind::abind(lapply(cisGeno,function(x) x[[1]]),
along=1)
snpList = unlist(lapply(cisGeno, function(x) x[[2]]))
thisSNP = as.data.frame(abind::abind(lapply(cisGeno,function(x) x[[3]]),
along=1))
}
if (!needMed) {
cisGeno = getCisGenotypes(biomInt,
medLocs,
snps,
snpLocs,
cisDist)
W = cisGeno$snpCur
snpList = cisGeno$snpList
thisSNP = cisGeno$thisSNP
}}
if (supplySNP){
snps = snps[!duplicated(snps$SNP),]
snpList = snps$SNP
W = as.matrix(subset(snps,SNP %in% snpList)[,-1])
rownames(W) = snpList
thisSNP = subset(snpLocs,snpid %in% snps$SNP)
}
if (nrow(W) == 0){return(list(h2 = 0,P = 1))}
fileName = paste0('h2_',biomInt)
tempDir = paste0(biomInt,'_h2_temp/')
if (!dir.exists(tempDir)){ dir.create(tempDir) }
genfile = paste0(tempDir,fileName,'.gen')
samplefile = paste0(tempDir,fileName,'.sample')
bedfile = paste0(tempDir,fileName)
outFile = paste0(tempDir,fileName)
phenFile = paste0(tempDir,fileName,'.phen')
covarFile = paste0(tempDir,fileName,'.qcovar')
df = as.data.frame(matrix(nrow = 1,ncol =2))
ids = colnames(W)
geno = as.data.frame(matrix(ncol=ncol(W)+4,nrow = nrow(W)))
colnames(geno) <- c('SNP','Pos','A1','A2',ids)
W = W[match(snpList,rownames(W)),]
geno[,5:ncol(geno)] = W
geno$SNP = snpList
onlyThese <- thisSNP[thisSNP$snpid %in% geno$SNP,]
onlyThese = onlyThese[match(rownames(W),onlyThese$snpid),]
geno <- geno[geno$SNP %in% snpLocs$snpid,]
onlyThese <- onlyThese[match(onlyThese$snpid,geno$SNP),]
geno = geno[!duplicated(geno$SNP),]
onlyThese = onlyThese[!is.na(onlyThese$snpid),]
onlyThese = onlyThese[!duplicated(onlyThese$snpid),]
onlyThese = subset(onlyThese,snpid %in% geno$SNP)
geno = subset(geno,SNP %in% onlyThese$snpid)
chr <- onlyThese$chr
geno$Pos <- onlyThese$pos
if (is.null(snpAnnot)){
geno$A1 <- unlist(lapply(strsplit(geno$SNP,':'),function(x) as.character(x[3])))
geno$A2 <- unlist(lapply(strsplit(geno$SNP,':'),function(x) as.character(x[4])))}
if (!is.null(snpAnnot)){
snpA = subset(snpAnnot,SNP %in% geno$SNP)
snpA = snpA[order(snpA$SNP),]
geno$A1 = snpA$REF
geno$A2 = snpA$ALT
}
chr_dosage <- cbind(chr,geno)
rm(geno)
new.levels <- c('1 0 0','0 1 0','0 0 1')
matrix.alleles <- round(as.matrix(chr_dosage[,6:ncol(chr_dosage)] + 1))
impute2.format <- matrix(new.levels[matrix.alleles],ncol=ncol(matrix.alleles))
gen <- cbind(chr_dosage[,1:5],impute2.format)
gen[is.na(gen)] <- '<NA>'
colnames(gen) = colnames(chr_dosage)
require(data.table)
data.table::fwrite(gen,genfile,row.names=FALSE,
col.names = FALSE, quote = FALSE, sep='\t')
sample <- as.data.frame(matrix(nrow=(ncol(chr_dosage)-4),ncol=5))
colnames(sample) <- c('ID_1','ID_2','missing','gender','pheno')
sample$ID_1[2:nrow(sample)] <- paste('1A',2:nrow(sample),sep='')
sample$ID_2[2:nrow(sample)] <- colnames(chr_dosage)[6:ncol(chr_dosage)]
sample$missing[2:nrow(sample)] <- 0
sample$gender[2:nrow(sample)] <- 0
sample$pheno[2:nrow(sample)] <- pheno
sample[1,] <- c(0,0,0,'D','P')
write.table(sample,samplefile,row.names=FALSE,
col.names = TRUE, quote = FALSE)
write.table(sample[-1,c(1,2,5)],phenFile,row.names = F,
col.names = F, quote = F)
write.table(cbind(sample[-1,1:2],t(covariates[1:dimNumeric,-1])),
covarFile,row.names = F,col.names = F,quote=F)
if (!verbose){
sink('/dev/null')
}
system(paste('plink',
'--gen',genfile,
'--sample',samplefile,
'--make-bed',
'--allow-no-sex',
'--out',bedfile),
intern = !verbose)
a = data.table::fread(paste0(bedfile,'.fam'))
a$V5 = 0
data.table::fwrite(a,paste0(bedfile,'.fam'),col.names=F,row.names=F,quote=F,sep='\t')
if (prune){
system(paste('plink','--bfile',bedfile,
'--indep-pairwise',windowSize,numSNPShift,ldThresh,
'--out',bedfile),
intern = !verbose)
system(paste('plink','--bfile',bedfile,
'--extract',paste0(bedfile,'.prune.in'),
'--make-bed',
'--out',paste0(bedfile,'_prune')),
intern = !verbose)
bedfile = paste0(bedfile,'_prune')}
if (LDMS){
system(paste('gcta64',
'--bfile',bedfile,
'--ld-score-region',ldScrRegion,
'--out',bedfile),
intern = !verbose)
lds_seg = read.table(paste0(bedfile,".score.ld"),
header=T,
colClasses=c("character",rep("numeric",8)))
quartiles=summary(lds_seg$ldscore_SNP)
lb1 = which(lds_seg$ldscore_SNP <= quartiles[2])
lb2 = which(lds_seg$ldscore_SNP > quartiles[2] & lds_seg$ldscore_SNP <= quartiles[3])
lb3 = which(lds_seg$ldscore_SNP > quartiles[3] & lds_seg$ldscore_SNP <= quartiles[5])
lb4 = which(lds_seg$ldscore_SNP > quartiles[5])
lb1_snp = lds_seg$SNP[lb1]
lb2_snp = lds_seg$SNP[lb2]
lb3_snp = lds_seg$SNP[lb3]
lb4_snp = lds_seg$SNP[lb4]
write.table(lb1_snp,
paste0(bedfile,"snp_group1.txt"),
row.names=F,
quote=F,
col.names=F)
write.table(lb2_snp,
paste0(bedfile,"snp_group2.txt"),
row.names=F,
quote=F,
col.names=F)
write.table(lb3_snp,
paste0(bedfile,"snp_group3.txt"),
row.names=F,
quote=F,
col.names=F)
write.table(lb4_snp,
paste0(bedfile,"snp_group4.txt"),
row.names=F,
quote=F,
col.names=F)
system(paste('gcta64',
'--bfile',bedfile,
'--extract',paste0(bedfile,"snp_group1.txt"),
'--make-grm',
'--out',paste0(bedfile,'_group1')),
intern = !verbose)
system(paste('gcta64',
'--bfile',bedfile,
'--extract',paste0(bedfile,"snp_group2.txt"),
'--make-grm',
'--out',paste0(bedfile,'_group2')),
intern = !verbose)
system(paste('gcta64',
'--bfile',bedfile,
'--extract',paste0(bedfile,"snp_group3.txt"),
'--make-grm',
'--out',paste0(bedfile,'_group3')),
intern = !verbose)
system(paste('gcta64',
'--bfile',bedfile,
'--extract',paste0(bedfile,"snp_group4.txt"),
'--make-grm',
'--out',paste0(bedfile,'_group4')),
inter = !verbose)
write.table(paste0(bedfile,'_group',1:4),
paste0(bedfile,'_multiGRM.txt'),
col.names = F,
row.names = F,
quote=F)
system(paste('gcta64',
'--reml',
'--mgrm',paste0(bedfile,'_multiGRM.txt'),
'--pheno',phenFile,
'--qcovar',covarFile,
'--reml-no-constrain',
'--out',paste0(bedfile,'_multi')),
intern = !verbose)
if (file.exists(paste0(bedfile,'_multi.hsq'))) {
hsq = data.table::fread(paste0(bedfile,'_multi.hsq'),
fill = T)
h2 = hsq$Variance[10]
P = hsq$Variance[17]
}
if (!file.exists(paste0(bedfile,'_multi.hsq'))){
h2 = 0
P = 1
}
system(paste('rm -r',
tempDir))
}
if (!LDMS) {
system(paste('gcta64',
'--bfile',bedfile,
'--autosome --make-grm',
'--out',bedfile),
intern = !verbose)
system(paste('gcta64',
'--reml',
'--grm',bedfile,
'--pheno',phenFile,
'--qcovar',covarFile,
'--reml-no-constrain --reml-bendV',
'--out',paste0(bedfile,'_multi')),
intern = !verbose)
if (file.exists(paste0(bedfile,'_multi.hsq'))){
hsq = data.table::fread(paste0(bedfile,'_multi.hsq'),
fill = T)
h2 = hsq$Variance[4]
P = hsq$Variance[9]
}
if (!file.exists(paste0(bedfile,'_multi.hsq'))){
h2 = 0
P = 1
}
system(paste('rm -r',
tempDir))
}
if (!verbose){
sink()
}
return(list(h2 = h2, P = P))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.