#' Perform conditional analysis for single variant tests
#'
#' @param candidate.variant Candidate variant position;
#' @param score.stat.file files of score statistics
#' @param cov.file covariance matrix files
#' @param known.variant.vec range of candidate variant, expressed in a vector, e.g. c("1:12345","1:234567");
#' @param refaltList A list of ref, alt alleles, as well as variant frequencies, whether it needs to check AF as for flipping alleles;
#' @param test test of rare variant tests
#' @param maf.cutoff Cutoffs of MAF used for determining rare variants
#' @param alternative Alternative hypothesis to be tested
#' @param out.digits The number of digits used in the output
#' @param callrate.cutoff Cutoff of call rates. Sites with callrates lower than the cutoff will be labeled as missing
#' @param hwe.cutoff Cutoff of HWE p-values. Sites with HWE pvalues lower than the cutoff will be labeled as missing
#' @param knownCoding knownCoding option allows users to specify if a linear transformation needs to be applied known variants. Possible choices for knownCoding option include identical, burden, randomm-eff.
#' @return return a list of meta-analysis results
#' @export
conditional.rareMETALS.single.group <- function(candidate.variant,score.stat.file,cov.file,known.variant.vec,refaltList,maf.cutoff,alternative=c('two.sided','greater','less'),out.digits=4,callrate.cutoff=0,hwe.cutoff=0,correctFlip=TRUE,knownCoding="identity",impMissing=TRUE)
{
cat('Conditional analysis',candidate.variant[1],"\n",sep=" ");
res <- conditional.rareMETALS.single.group.core(candidate.variant[1],score.stat.file,cov.file,known.variant.vec,refaltList,maf.cutoff,alternative,out.digits,callrate.cutoff,hwe.cutoff,correctFlip,knownCoding,impMissing);
res.mat <- matrix(nrow=length(candidate.variant),ncol=ncol(res$res.out))
res.mat[1,] <- res$res.out;
colnames(res.mat) <- c("CHROM","POS","REF","ALT","SAMPLE_SIZE","PVALUE","AF","BETA_EST","BETA_SD","DIRECTION","POS_REF_ALT_KNOWN","LOG","NOTE");
res.list <- list();
res.list[[1]] <- res$res;
if(length(candidate.variant)>1)
{
for(ii in 2:length(candidate.variant))
{
cat('Conditional analysis',candidate.variant[ii],"\n",sep=" ");
res.ii <- conditional.rareMETALS.single.group.core(candidate.variant[ii],score.stat.file,cov.file,known.variant.vec,refaltList,maf.cutoff,alternative,out.digits,callrate.cutoff,hwe.cutoff,correctFlip,knownCoding,impMissing);
res.mat[ii,] <- res.ii$res.out;
res.list[[ii]] <- res.ii$res;
}
}
return(list(res=res.list,
res.out=res.mat))
}
#' Perform conditional analysis for single variant tests
#'
#' @param candidate.variant Candidate variant position;
#' @param score.stat.file files of score statistics
#' @param cov.file covariance matrix files
#' @param known.variant.vec range of candidate variant, expressed in a vector, e.g. c("1:12345","1:234567");
#' @param refaltList A list of ref, alt alleles, as well as variant frequencies, whether it needs to check AF as for flipping alleles;
#' @param test test of rare variant tests
#' @param maf.cutoff Cutoffs of MAF used for determining rare variants
#' @param alternative Alternative hypothesis to be tested
#' @param out.digits The number of digits used in the output
#' @param callrate.cutoff Cutoff of call rates. Sites with callrates lower than the cutoff will be labeled as missing
#' @param hwe.cutoff Cutoff of HWE p-values. Sites with HWE pvalues lower than the cutoff will be labeled as missing
#' @param knownCoding knownCoding option allows users to specify if a linear transformation needs to be applied known variants. Possible choices for knownCoding option include identical, burden, randomm-eff.
#' @return return a list of meta-analysis results
#' @export
conditional.rareMETALS.single.group.core <- function(candidate.variant,score.stat.file,cov.file,known.variant.vec,refaltList,maf.cutoff,alternative=c('two.sided','greater','less'),out.digits=4,callrate.cutoff=0,hwe.cutoff=0,correctFlip=TRUE,knownCoding='identity',impMissing=FALSE)
{
alpha <- 0.05;maf.cutoff <- 1;no.boot <- 0;test <- "SINGLE";
both.vec <- set.intersect(candidate.variant,known.variant.vec);
gene.file <- "refFlat_hg19.txt.gz";
extra.pars <- list(alpha=alpha,out.digits=out.digits,QC.par=list(callrate.cutoff=callrate.cutoff,hwe.cutoff=hwe.cutoff));
raw.data.all <- list();
tabix.range <- get.tabix.range(c(candidate.variant,known.variant.vec));
capture.output(raw.data.all[[1]] <- rvmeta.readDataByRange( score.stat.file, cov.file,tabix.range)[[1]]);
tmp <- c("CHROM","POS","REF","ALT","SAMPLE_SIZE","PVALUE","AF","BETA_EST","BETA_SD","DIRECTION_BY_STUDY","POS_REF_ALT_ANNO_KNOWN","LOG","NOTE");
res.single.out.0 <- matrix(NA,nrow=1,ncol=length(tmp));
colnames(res.single.out.0) <- tmp;
chrpos <- unlist(strsplit(candidate.variant,split=":"));
res.single.out.0[1,1:2] <- chrpos;
res.0 <- list(res.list=list(),res.out=res.single.out.0);
if(length(raw.data.all)==0)
{
return(res.0)
}
no.sample <- 0;
res <- list(list(p.value=NA,statistic=NA,no.var=NA,no.sample=NA));
p.value <- double(0);
ref <- character(0);
alt <- ref;
direction.by.study <- character(0);
ix.gold <- extra.pars$ix.gold;
if(length(extra.pars$ix.gold)==0) {
ix.gold <- 1;
}
out.digits <- extra.pars$out.digits;
if(length(out.digits)==0) out.digits <- 4;
statistic <- double(0);pos <- integer(0);anno <- character(0);direction <- integer(0);res.maf.vec <- double(0);beta1.est.vec <- double(0);beta1.sd.vec <- double(0);
gene.name.out <- 0;p.value.out <- 0;statistic.out <- 0;no.site.out <- 0;beta1.est.out <- 0;beta1.sd.out <- 0;maf.cutoff.out <- 0;direction.burden.by.study.out <- 0;direction.meta.single.var.out <- 0;pos.ref.alt.out <- 0;pos.ref.alt.known.out <- 0;p.value.known.out <- 0;anno.known.out <- 0;
pos.single.out <- candidate.variant;p.value.single.out <- NA;ref.single.out <- NA;alt.single.out <- NA;anno.single.out <- NA;maf.single.out <- NA;beta1.est.single.out <- NA;beta1.sd.single.out <- NA;pos.ref.alt.known.single.out <- NA;direction.single.out <- NA;p.value.known.single.out <- NA;anno.known.single.out <- NA;af.single.out <- NA;
kk <- 1;
candidate.variant <- candidate.variant;
raw.data <- raw.data.all[[1]];
raw.data.ori <- raw.data;
if(length(extra.pars$QC.par)>0)
raw.data <- QC(raw.data,extra.pars$QC.par,cov=1);
log.mat <- raw.data$log.mat;
candidate.variant <- set.intersect(candidate.variant,raw.data$pos);
known.variant.vec <- set.intersect(known.variant.vec,raw.data$pos);
ix.var <- 1:length(raw.data$ustat[[1]]);
ix.match <- match(raw.data$pos,refaltList$pos);
ref.gold <- refaltList$ref[ix.match];
alt.gold <- refaltList$alt[ix.match];
af.gold <- refaltList$af[ix.match];
anno.gold <- refaltList$anno[ix.match];
pos.gold <- refaltList$pos[ix.match];
if(length(ix.match)==0) return(res.0);
refaltList <- list(pos=pos.gold,
ref=ref.gold,
alt=alt.gold,
anno=anno.gold,
af.diff.max=refaltList$af.diff.max,
checkAF=refaltList$checkAF,
af=af.gold);
ix.candidate <- match(candidate.variant,pos.gold);
ix.known <- match(known.variant.vec,pos.gold);
p.value.single.out <- NA;
pos.single.out <- matrix(unlist(strsplit(pos.gold,split=":")),ncol=2,byrow=TRUE);
direction.single.out <- NA;
ref.single.out <- paste(ref.gold[ix.candidate],collapse=",");
alt.single.out <- paste(alt.gold[ix.candidate],collapse=",");
beta1.est.single.out <- NA;
beta1.sd.single.out <- NA;
maf.single.out <- NA;
anno.single.out <- paste(anno.gold[ix.candidate],collapse=",");
p.value.known.single.out <- NA;
anno.known.single.out <- paste(anno.gold[ix.known],sep=",",collapse=",");
pos.ref.alt.known.single.out <- paste(pos.gold[ix.known],ref.gold[ix.known],alt.gold[ix.known],anno.gold[ix.known],sep="/",collapse=",");
res.single.out <- res.single.out.0;
res.extra <- list(ref.candidate=refaltList$ref[ix.candidate],
alt.candidate=refaltList$alt[ix.candidate],
anno.candidate=refaltList$anno[ix.candidate],
pos.candidate=refaltList$pos[ix.candidate],
ref.known=refaltList$ref[ix.known],
tabix.range=tabix.range,
alt.known=refaltList$alt[ix.known],
anno.known=refaltList$anno[ix.known],
pos.known=refaltList$pos[ix.known],
raw.data=raw.data.ori,
clean.data=raw.data);
res.null <- list(res.list=res.extra,
res.out=res.single.out);
note <- "";
if(length(both.vec)>0)
{
warning("Candidate variants overlaps with known variants");
note <- "isKnownVariant";
}
if(length(candidate.variant)==0 | length(known.variant.vec)==0)
{
warning("No candidate variant or no known variants")
note <- "noCandidate|noKnown";
return(res.null);
}
if(length(candidate.variant)>0 & length(known.variant.vec)>0)
{
ix.pop <- 1:length(raw.data$nSample);
score.stat.vec.list <- list();mac.vec.list <- list();maf.vec.list <- list();cov.mat.list <- list();var.Y.list <- list();N.list <- list();mean.Y.list <- list();pos.list <- list();anno.list <- list();ac.vec.list <- list();af.vec.list <- list();
af.mat <- matrix(NA,ncol=length(raw.data$ustat[[1]]),nrow=length(raw.data$ustat));
ac.mat <- af.mat;
N.mat <- af.mat;
ref.list <- list();
alt.list <- list();V.list <- list();
for(ii in 1:length(ix.pop))
{
if(length(raw.data$covXZ[[ii]])>0) {
warning(paste0("Study ",ii," is analyzed as binary trait. It is advised to use rareMETALS2 for meta-analysis"))
}
for(jj in 1:length(ix.var))
{
res.flipAllele <- flipAllele(raw.data,raw.data.ori,refaltList,ii,ix.var[jj],log.mat[ix.var[jj],],correctFlip);
raw.data <- res.flipAllele$raw.data;
log.mat[ix.var[jj],] <- res.flipAllele$log.mat.var;
}
}
U.stat <- 0;V.stat.sq <- 0;
ustat.list <- list();
vstat.list <- list();
ix.X1 <- ix.candidate;
ix.X2 <- ix.known;
conditional.U.all <- 0;
conditional.V.all <- 0;
direction.single.vec <- 0;
for(ii in 1:length(ix.pop)) {
N.list[[ii]] <- rm.na(as.integer(mean(raw.data$nSample[[ii]],na.rm=TRUE)));
N.mat[ii,] <- raw.data$nSample[[ii]];
no.sample <- no.sample+N.list[[ii]];
ustat.list[[ii]] <- raw.data$ustat[[ii]];
vstat.list[[ii]] <- raw.data$vstat[[ii]];
if(impMissing==FALSE) {
ustat.list[[ii]] <- rm.na(ustat.list[[ii]])
vstat.list[[ii]] <- rm.na(vstat.list[[ii]])
}
cov.mat.list[[ii]] <- matrix(raw.data$cov[[ii]][ix.var,ix.var],nrow=length(ix.var),ncol=length(ix.var));
V.list[[ii]] <- cov.mat.list[[ii]]*N.list[[ii]];
}
impState <- NULL;
if(impMissing==TRUE) {
res.impute <- imputeMeta(ustat.list,vstat.list,cov.mat.list,N.mat,NULL,ix.known);
ustat.list <- res.impute$ustat.list.imp;
vstat.list <- res.impute$vstat.list.imp;
V.list <- res.impute$V.list;
N.mat <- res.impute$N.mat;
impState <- res.impute$impState;
cov.mat.list <- res.impute$cov.mat.list.imp;
}
for(ii in 1:length(ix.pop)) {
ustat.tmp <- ustat.list[[ii]];
if(knownCoding=="identity") {
if(impMissing==TRUE)
res.tmp <- get.conditional.score.stat(ustat.tmp,V.list[[ii]],mean(N.mat[ii,],na.rm=TRUE),ix.candidate,ix.known,res.impute$impState[ii,])
if(impMissing==FALSE)
res.tmp <- get.conditional.score.stat(ustat.tmp,V.list[[ii]],mean(N.mat[ii,],na.rm=TRUE),ix.candidate,ix.known,NULL)
conditional.U.ii <- as.numeric(res.tmp$conditional.ustat);
conditional.V.ii <- as.numeric(res.tmp$conditional.V)
}
if(knownCoding=="burden") {
ustat.tmp <- c(ustat.tmp[ix.candidate],sum(ustat.tmp[ix.known]))
V.ii <- rbind(cbind(matrix(V.list[[ii]][ix.candidate,ix.candidate],nrow=length(ix.candidate),ncol=length(ix.candidate)),
matrix(rowSums(matrix(V.list[[ii]][ix.candidate,ix.known],nrow=length(ix.candidate),ncol=length(ix.known))),nrow=length(ix.candidate),ncol=1)),
cbind(matrix(rowSums(matrix(V.list[[ii]][ix.candidate,ix.known],nrow=length(ix.candidate),ncol=length(ix.known))),ncol=length(ix.candidate),nrow=1),
matrix(sum(V.list[[ii]][ix.known,ix.known]),nrow=1,ncol=1)));
res.tmp <- get.conditional.score.stat(ustat.tmp,V.ii,mean(N.mat[ii,],na.rm=T),ix.candidate,ix.known)
conditional.U.ii <- as.numeric(res.tmp$conditional.ustat);
conditional.V.ii <- as.numeric(res.tmp$conditional.V)
}
conditional.U.all <- conditional.U.all+rm.na(conditional.U.ii);
conditional.V.all <- conditional.V.all+rm.na(conditional.V.ii);
if(is.na(conditional.U.ii)) direction.single.vec[ii] <- "X";
if(!is.na(conditional.U.ii))
{
if(conditional.U.ii>0) direction.single.vec[ii] <- "+";
if(conditional.U.ii==0) direction.single.vec[ii] <- "=";
if(conditional.U.ii<0) direction.single.vec[ii] <- "-";
}
var.Y.list[[ii]] <- 1;
mean.Y.list[[ii]] <- 0;
af.vec.list[[ii]] <- (raw.data$af[[ii]]);
ac.vec.list[[ii]] <- (raw.data$ac[[ii]]);
af.mat[ii,] <- af.vec.list[[ii]][ix.var];
ac.mat[ii,] <- ac.vec.list[[ii]][ix.var];
pos.list[[ii]] <- (raw.data$pos)[ix.var];
ref.list[[ii]] <- refaltList$ref;
alt.list[[ii]] <- refaltList$alt;
anno.list[[ii]] <- refaltList$anno;
}
statistic <- conditional.U.all/sqrt(conditional.V.all);
if(alternative=="two.sided") {
statistic <- statistic^2;
p.value <- pchisq(statistic,df=1,lower.tail=FALSE);
}
beta1.est <- conditional.U.all/conditional.V.all;
beta1.sd <- sqrt(1/conditional.V.all);
maf.vec <- rep(0,length(af.vec.list[[1]]));
af.vec <- maf.vec;
mac.vec <- 0;
ac.vec <- 0;
af.vec <- colSums(af.mat*N.mat,na.rm=TRUE)/colSums(N.mat,na.rm=TRUE);
ac.vec <- colSums(ac.mat,na.rm=TRUE);
N.vec <- colSums(N.mat,na.rm=TRUE);
maf.vec <- af.vec;
mac.vec <- ac.vec;
res.extra$cov.mat.list <- cov.mat.list;
res.extra$ustat.list <- ustat.list;
res.extra$vstat.list <- vstat.list;
res.extra$af.mat <- af.mat;
res.extra$ac.mat <- ac.mat;
res.extra$N.mat <- N.mat;
res.extra$log.mat <- log.mat;
p.value.single.out <- format(p.value,digit=out.digits);
pos.single.out <- paste(pos.gold[ix.candidate],sep='');
direction.single.out <- paste(direction.single.vec,sep='',collapse='');
ref.single.out <- c(ref.gold[ix.candidate]);
alt.single.out <- c(alt.gold[ix.candidate]);
beta1.est.single.out <- c(format(beta1.est,digits=out.digits));
beta1.sd.single.out <- c(format(beta1.sd,digits=out.digits));
af.single.out <- c(format(af.vec[ix.candidate],digits=out.digits));
anno.single.out <- anno.gold[ix.candidate];
log.out <- paste(log.mat[ix.candidate,],sep=",",collapse=",")
}
res.single.out <- cbind(matrix(unlist(strsplit(pos.single.out,split=":")),ncol=2,byrow=TRUE),ref.single.out,alt.single.out,N.vec[ix.candidate],p.value.single.out,af.single.out,beta1.est.single.out,beta1.sd.single.out,direction.single.out,pos.ref.alt.known.single.out,log.out,note);
colnames(res.single.out) <- c("CHROM","POS","REF","ALT","SAMPLESIZE","PVALUE","AF","BETA_EST","BETA_SD","DIRECTION","POS_REF_ALT_KNOWN","LOG","NOTE");
return(list(res=res.extra,
res.out=res.single.out));
}
r2cov.mat <- function(r2.mat,maf.vec)
{
var.vec <- sqrt(maf.vec*(1-maf.vec)*2);
var.mat <- (var.vec%*%t(var.vec));
cov.mat <- r2.mat*var.mat;
return(cov.mat);
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.