#' Summarize individual-variant analysis results generated by \code{STAARpipeline} package
#'
#' The \code{Individual_Analysis_Results_Summary} function takes in the objects of individual analysis results
#' generated by \code{STAARpipeline} package,
#' the object from fitting the null model, and the set of known variants to be adjusted for in conditional analysis
#' to summarize the individual analysis results and analyze the conditional association between a quantitative/dichotomous phenotype and
#' the unconditional significant single variants.
#' @param agds_dir a data farme containing directory of GDS/aGDS files.
#' @param jobs_num a data frame containing the number of analysis results, including the number of individual analysis results,
#' the number of sliding window analysis results, and the number of dynamic window analysis results.
#' @param input_path the directory of individual analysis results that generated by \code{STAARpipeline} package.
#' @param output_path the directory for the output files.
#' @param individual_results_name the file name of individual analysis results generated by \code{STAARpipeline} package.
#' @param obj_nullmodel an object from fitting the null model, which is either the output from \code{fit_nullmodel} function in the \code{STAARpipeline} package,
#' or the output from \code{fitNullModel} function in the \code{GENESIS} package and transformed using the \code{genesis2staar_nullmodel} function in the \code{STAARpipeline} package.
#' @param known_loci the data frame of variants to be adjusted for in conditional analysis and should
#' contain 4 columns in the following order: chromosome (CHR), position (POS), reference allele (REF),
#' and alternative allele (ALT) (default = NULL).
#' @param method_cond a character value indicating the method for conditional analysis.
#' \code{optimal} refers to regressing residuals from the null model on \code{known_loci}
#' as well as all covariates used in fitting the null model (fully adjusted) and taking the residuals;
#' \code{naive} refers to regressing residuals from the null model on \code{known_loci}
#' and taking the residuals (default = \code{optimal}).
#' @param QC_label channel name of the QC label in the GDS/aGDS file.
#' @param variant_type type of variant included in the analysis. Choices include "variant", "SNV", or "Indel" (default = "variant").
#' @param geno_missing_imputation method of handling missing genotypes. Either "mean" or "minor" (default = "mean").
#' @param alpha p-value threshold of significant results (default = 5E-09).
#' @param manhattan_plot output manhattan plot or not (default = FALSE).
#' @param QQ_plot output Q-Q plot or not (default = FALSE).
#' @param SPA_p_filter logical: are only the variants with a score-test-based p-value smaller than a pre-specified threshold use the SPA method to recalculate the p-value, only used for imbalanced case-control setting (default = FALSE).
#' @param p_filter_cutoff threshold for the p-value recalculation using the SPA method, only used for imbalanced case-control setting (default = 0.05)
#' @param cond_null_model_name the null model name for conditional analysis in the SPA setting, only used for imbalanced case-control setting (default = NULL).
#' @param cond_null_model_dir the directory of storing the null model for conditional analysis in the SPA setting, only used for imbalanced case-control setting (default = NULL).
#' @return The function returns the following analysis results:
#' @return \code{results_individual_analysis_genome.Rdata}: a matrix contains the score test p-value and effect size estimation of each variant across the genome.
#' @return \code{results_individual_analysis_sig.Rdata} and \code{results_individual_analysis_sig.csv}: a matrix contains the score test p-values and effect size estimations of significant results (p-value < alpha).
#' @return \code{results_sig_cond.Rdata} and \code{results_sig_cond.csv}: a matrix contains the conditional score test p-values for each significant variant (available if known_loci is not a NULL).
#' @return manhattan plot (optional) and Q-Q plot (optional) of the individual analysis results.
#' @references Li, Z., Li, X., et al. (2022). A framework for detecting
#' noncoding rare-variant associations of large-scale whole-genome sequencing
#' studies. \emph{Nature Methods}, \emph{19}(12), 1599-1611.
#' (\href{https://doi.org/10.1038/s41592-022-01640-x}{pub})
#' @export
Individual_Analysis_Results_Summary <- function(agds_dir,jobs_num,input_path,output_path,individual_results_name,
obj_nullmodel,known_loci=NULL,
method_cond=c("optimal","naive"),
QC_label="annotation/filter",variant_type=c("variant","SNV","Indel"),geno_missing_imputation=c("mean","minor"),
alpha=5E-09,manhattan_plot=FALSE,QQ_plot=FALSE,
SPA_p_filter=FALSE,p_filter_cutoff=0.05,
cond_null_model_name=NULL,cond_null_model_dir=NULL){
## evaluate choices
method_cond <- match.arg(method_cond)
geno_missing_imputation <- match.arg(geno_missing_imputation)
## set SPA
if(!is.null(obj_nullmodel$use_SPA))
{
use_SPA <- obj_nullmodel$use_SPA
}else
{
use_SPA <- FALSE
}
## Summarize Individual Analysis Results
results_individual_analysis_genome <- c()
num <- 0
for(chr in 1:22)
{
print(chr)
if(chr > 1)
{
num <- num + jobs_num$individual_analysis_num[chr-1]
}
results_individual_analysis_chr <- c()
for(kk in 1:jobs_num$individual_analysis_num[chr])
{
print(kk)
job_id <- kk + num
results_individual_analysis <- get(load(paste0(input_path,individual_results_name,"_",job_id,".Rdata")))
results_individual_analysis_chr <- rbind(results_individual_analysis_chr,results_individual_analysis)
}
results_individual_analysis_genome <- rbind(results_individual_analysis_genome,results_individual_analysis_chr)
rm(results_individual_analysis_chr)
}
# save results
save(results_individual_analysis_genome,file=paste0(output_path,"results_individual_analysis_genome.Rdata"))
## Significant findings
results_sig <- results_individual_analysis_genome[results_individual_analysis_genome$pvalue<alpha,]
# save significant results
save(results_sig,file=paste0(output_path,"results_individual_analysis_sig.Rdata"))
write.csv(results_sig,paste0(output_path,"results_individual_analysis_sig.csv"))
## manhattan plot
if(manhattan_plot)
{
png(paste0(output_path,"manhattan_MAC_20.png"), width = 9, height = 6, units = 'in', res = 600)
pvalue <- results_individual_analysis_genome$pvalue
if(min(pvalue)==0)
{
if(!use_SPA)
{
print(manhattan_plot(results_individual_analysis_genome$CHR, results_individual_analysis_genome$POS, results_individual_analysis_genome$pvalue_log10, use_logp=TRUE, col = c("blue4", "orange3"),sig.level=alpha))
}else
{
pvalue_log10 <- -log10(pvalue)
pvalue_log10[!is.finite(pvalue_log10)] <- 308
print(manhattan_plot(results_individual_analysis_genome$CHR, results_individual_analysis_genome$POS, pvalue_log10, use_logp=TRUE, col = c("blue4", "orange3"),sig.level=alpha))
rm(pvalue_log10)
}
}else
{
print(manhattan_plot(results_individual_analysis_genome$CHR, results_individual_analysis_genome$POS, results_individual_analysis_genome$pvalue, col = c("blue4", "orange3"),sig.level=alpha))
}
rm(pvalue)
gc()
dev.off()
}
## Q-Q plot
if(QQ_plot)
{
observed <- sort(results_individual_analysis_genome$pvalue)
if(min(observed)==0)
{
if(!use_SPA)
{
lobs <- sort(results_individual_analysis_genome$pvalue_log10,decreasing = TRUE)
}else
{
lobs <- -(log10(observed))
lobs[!is.finite(lobs)] <- 308
}
}else
{
lobs <- -(log10(observed))
}
expected <- c(1:length(observed))
lexp <- -(log10(expected / (length(expected)+1)))
rm(results_individual_analysis_genome)
gc()
png(paste0(output_path,"qqplot_MAC_20.png"), width = 9, height = 9, units = 'in', res = 600)
par(mar=c(5,6,4,4))
plot(lexp,lobs,pch=20, cex=1, xlim = c(0, max(lexp)), ylim = c(0, max(lobs)),
xlab = expression(Expected ~ ~-log[10](italic(p))), ylab = expression(Observed ~ ~-log[10](italic(p))),
font.lab=2,cex.lab=2,cex.axis=2,font.axis=2)
abline(0, 1, col="red",lwd=2)
dev.off()
}
## Conditional Analysis
if(length(known_loci)!=0)
{
if(!use_SPA)
{
results_sig_cond <- c()
for(chr in 1:22)
{
if(sum(results_sig$CHR==chr)>=1)
{
results_sig_chr <- results_sig[results_sig$CHR==chr,]
gds.path <- agds_dir[chr]
genofile <- seqOpen(gds.path)
results_sig_cond_chr <- Individual_Analysis_cond(chr=chr,individual_results=results_sig_chr,genofile=genofile,obj_nullmodel=obj_nullmodel,
known_loci=known_loci,method_cond=method_cond,QC_label=QC_label,
variant_type=variant_type,geno_missing_imputation=geno_missing_imputation)
results_sig_cond <- rbind(results_sig_cond,results_sig_cond_chr)
seqClose(genofile)
}
}
save(results_sig_cond,file=paste0(output_path,"results_sig_cond.Rdata"))
write.csv(results_sig_cond,paste0(output_path,"results_sig_cond.csv"))
}else
{
results_sig_cond <- c()
for(chr in 1:22)
{
if(sum(results_sig$CHR==chr)>=1)
{
if(file.exists(paste0(cond_null_model_dir,cond_null_model_name,".chr",chr,".Rdata")))
{
results_sig_chr <- results_sig[results_sig$CHR==chr,,drop=FALSE]
gds.path <- agds_dir[chr]
genofile <- seqOpen(gds.path)
obj_nullmodel_cond <- get(load(paste0(cond_null_model_dir,cond_null_model_name,".chr",chr,".Rdata")))
results_sig_cond_chr <- Individual_Analysis_cond_spa(chr=chr,individual_results=results_sig_chr,genofile=genofile,obj_nullmodel=obj_nullmodel_cond,
QC_label=QC_label,variant_type=variant_type,geno_missing_imputation=geno_missing_imputation,
SPA_p_filter=SPA_p_filter,p_filter_cutoff=p_filter_cutoff)
results_sig_cond <- rbind(results_sig_cond,results_sig_cond_chr)
seqClose(genofile)
}else
{
results_sig_chr <- results_sig[results_sig$CHR==chr,,drop=FALSE]
gds.path <- agds_dir[chr]
genofile <- seqOpen(gds.path)
results_sig_cond_chr <- Individual_Analysis_cond_spa(chr=chr,individual_results=results_sig_chr,genofile=genofile,obj_nullmodel=obj_nullmodel,
QC_label=QC_label,variant_type=variant_type,geno_missing_imputation=geno_missing_imputation,
SPA_p_filter=SPA_p_filter,p_filter_cutoff=p_filter_cutoff)
results_sig_cond <- rbind(results_sig_cond,results_sig_cond_chr)
seqClose(genofile)
}
}
}
save(results_sig_cond,file=paste0(output_path,"results_sig_cond.Rdata"))
write.csv(results_sig_cond,paste0(output_path,"results_sig_cond.csv"))
}
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.