#' Summarize gene-centric noncoding analysis results generated by \code{STAARpipeline} package
#'
#' The \code{Gene_Centric_Noncoding_Results_Summary} function takes in the objects of gene-centric noncoding analysis results
#' generated by \code{STAARpipeline} package,
#' the object from fitting the null model, and the set of known variants to be adjusted for in conditional analysis
#' to summarize the gene-centric noncoding analysis results and analyze the conditional association between a quantitative/dichotomous phenotype
#' (including imbalanced case-control design) and
#' the rare variants in the unconditional significant noncoding masks.
#' @param agds_dir a data farme containing directory of GDS/aGDS files.
#' @param gene_centric_noncoding_jobs_num the number of results for gene-centric noncoding analysis of protein-coding genes generated by \code{STAARpipeline} package.
#' @param input_path the directory of gene-centric noncoding analysis results for protein-coding genes that generated by \code{STAARpipeline} package.
#' @param output_path the directory for the output files of the summary of gene-centric noncoding analysis results for protein-coding genes.
#' @param gene_centric_results_name the file name of gene-centric noncoding analysis results for protein-coding genes generated by \code{STAARpipeline} package.
#' @param ncRNA_jobs_num the number of results for gene-centric noncoding analysis of ncRNA genes generated by \code{STAARpipeline} package..
#' @param ncRNA_input_path the directory of gene-centric noncoding analysis results for ncRNA genes that generated by \code{STAARpipeline} package.
#' @param ncRNA_output_path the directory for the output files of the summary of gene-centric noncoding analysis results for ncRNA genes.
#' @param ncRNA_results_name file name of gene-centric noncoding analysis results for ncRNA genes that generated by \code{STAARpipeline} package.
#' @param obj_nullmodel an object from fitting the null model, which is either the output from \code{fit_nullmodel} function in the \code{STAARpipeline} package,
#' or the output from \code{fitNullModel} function in the \code{GENESIS} package and transformed using the \code{genesis2staar_nullmodel} function in the \code{STAARpipeline} package.
#' @param known_loci the data frame of variants to be adjusted for in conditional analysis and should
#' contain 4 columns in the following order: chromosome (CHR), position (POS), reference allele (REF),
#' and alternative allele (ALT) (default = NULL).
#' @param cMAC_cutoff the cutoff of the minimum number of the cumulative minor allele of variants in the masks
#' when summarizing the results (default = 0).
#' @param method_cond a character value indicating the method for conditional analysis.
#' \code{optimal} refers to regressing residuals from the null model on \code{known_loci}
#' as well as all covariates used in fitting the null model (fully adjusted) and taking the residuals;
#' \code{naive} refers to regressing residuals from the null model on \code{known_loci}
#' and taking the residuals (default = \code{optimal}).
#' @param rare_maf_cutoff the cutoff of maximum minor allele frequency in
#' defining rare variants (default = 0.01).
#' @param QC_label channel name of the QC label in the GDS/aGDS file (default = "annotation/filter").
#' @param variant_type type of variant included in the analysis. Choices include "SNV", "Indel", or "variant" (default = "SNV").
#' @param geno_missing_imputation method of handling missing genotypes. Either "mean" or "minor" (default = "mean").
#' @param Annotation_dir channel name of the annotations in the aGDS file \cr (default = "annotation/info/FunctionalAnnotation").
#' @param Annotation_name_catalog a data frame containing the name and the corresponding channel name in the aGDS file.
#' @param Use_annotation_weights use annotations as weights or not (default = FALSE).
#' @param Annotation_name a vector of annotation names used in STAAR (default = NULL).
#' @param alpha p-value threshold of significant results of protein coding genes (default = 2.5E-06).
#' @param alpha_ncRNA p-value threshold of significant results of ncRNA genes (default = 2.5E-06).
#' @param ncRNA_pos positions of ncRNA genes, required for generating the Manhattan plot and Q-Q plot of the results of ncRNA genes (default=NULL).
#' @param manhattan_plot output manhattan plot or not (default = FALSE).
#' @param QQ_plot output Q-Q plot or not (default = FALSE).
#' @param cond_null_model_name the null model name for conditional analysis in the SPA setting, only used for imbalanced case-control setting (default = NULL).
#' @param cond_null_model_dir the directory of storing the null model for conditional analysis in the SPA setting, only used for imbalanced case-control setting (default = NULL).
#' @param SPA_p_filter logical: are only the variants with a normal approximation based p-value smaller than a pre-specified threshold use the SPA method to recalculate the p-value, only used for imbalanced case-control setting (default = FALSE).
#' @param p_filter_cutoff threshold for the p-value recalculation using the SPA method, only used for imbalanced case-control setting (default = 0.05).
#' @return The function returns the following analysis results:
#' @return \code{noncoding_sig.csv}: a matrix that summarized the unconditional significant region detected by STAAR-O or STAAR-B in imbalanced case-control setting (STAAR-O/-B pvalue smaller than the threshold alpha),
#' including gene name ("Gene name"), chromosome ("chr"), coding functional category ("Category"), number of variants ("#SNV"),
#' and the unconditional STAAR p-values (including STAAR-O or STAAR-B in imbalanced case-control setting).
#' @return \code{noncoding_sig_cond.csv}: a matrix that summarized the conditional analysis results of the unconditional significant region detected by STAAR-O or STAAR-B in imbalanced case-control setting (available if known_loci is not a NULL),
#' including gene name ("Gene name"), chromosome ("chr"), coding functional category ("Category"), number of variants ("#SNV"),
#' and the conditional STAAR p-values (including STAAR-O or STAAR-B in imbalanced case-control setting).
#' @return \code{results_UTR_genome}: a matrix contains the STAAR p-values (including STAAR-O or STAAR-B in imbalanced case-control setting) of the noncoding masks defined by UTR variants (UTR) for all protein-coding genes across the genome.
#' @return \code{UTR_sig.csv}: a matrix contains the unconditional STAAR p-values (including STAAR-O or STAAR-B in imbalanced case-control setting) of the unconditional significant UTR masks.
#' @return \code{UTR_sig_cond.csv}: a matrix contains the conditional STAAR p-values (including STAAR-O or STAAR-B in imbalanced case-control setting) of the unconditional significant UTR masks (available if known_loci is not a NULL).
#' @return \code{results_upstream_genome}: a matrix contains the STAAR p-values (including STAAR-O or STAAR-B in imbalanced case-control setting) of the noncoding masks defined by upstream variants (upstream) for all protein-coding genes across the genome.
#' @return \code{upstream_sig.csv}: a matrix contains the unconditional STAAR p-values (including STAAR-O or STAAR-B in imbalanced case-control setting) of the unconditional significant upstream masks.
#' @return \code{upstream_sig_cond.csv}: a matrix contains the conditional STAAR p-values (including STAAR-O or STAAR-B in imbalanced case-control setting) of the unconditional significant upstream masks (available if known_loci is not a NULL).
#' @return \code{results_downstream_genome}: a matrix contains the STAAR p-values (including STAAR-O or STAAR-B in imbalanced case-control setting) of the noncoding masks defined by downstream variants (downstream) for all protein-coding genes across the genome.
#' @return \code{downstream_sig.csv}: a matrix contains the unconditional STAAR p-values (including STAAR-O or STAAR-B in imbalanced case-control setting) of the unconditional significant downstream masks.
#' @return \code{downstream_sig_cond.csv}: a matrix contains the conditional STAAR p-values (including STAAR-O or STAAR-B in imbalanced case-control setting) of the unconditional significant downstream masks (available if known_loci is not a NULL).
#' @return \code{results_promoter_CAGE_genome}: a matrix contains the STAAR p-values (including STAAR-O or STAAR-B in imbalanced case-control setting) of the noncoding masks defined by variants overlaid with CAGE sites in the promoter (promoter_CAGE) for all protein-coding genes across the genome.
#' @return \code{promoter_CAGE_sig.csv}: a matrix contains the unconditional STAAR p-values (including STAAR-O or STAAR-B in imbalanced case-control setting) of the unconditional significant promoter_CAGE masks.
#' @return \code{promoter_CAGE_sig_cond.csv}: a matrix contains the conditional STAAR p-values (including STAAR-O or STAAR-B in imbalanced case-control setting) of the unconditional significant promoter_CAGE masks (available if known_loci is not a NULL).
#' @return \code{results_promoter_DHS_genome}: a matrix contains the STAAR p-values (including STAAR-O or STAAR-B in imbalanced case-control setting) of the noncoding masks defined by variants overlaid with DHS sites in the promoter (promoter_DHS) for all protein-coding genes across the genome.
#' @return \code{promoter_DHS_sig.csv}: a matrix contains the unconditional STAAR p-values (including STAAR-O or STAAR-B in imbalanced case-control setting) of the unconditional significant promoter_DHS masks.
#' @return \code{promoter_DHS_sig_cond.csv}: a matrix contains the conditional STAAR p-values (including STAAR-O or STAAR-B in imbalanced case-control setting) of the unconditional significant promoter_DHS masks (available if known_loci is not a NULL).
#' @return \code{results_enhancer_CAGE_genome}: a matrix contains the STAAR p-values (including STAAR-O or STAAR-B in imbalanced case-control setting) of the noncoding masks defined by variants overlaid with CAGE sites in the enhancer (enhancer_CAGE) for all protein-coding genes across the genome.
#' @return \code{enhancer_CAGE_sig.csv}: a matrix contains the unconditional STAAR p-values (including STAAR-O or STAAR-B in imbalanced case-control setting) of the unconditional significant enhancer_CAGE masks.
#' @return \code{enhancer_CAGE_sig_cond.csv}: a matrix contains the conditional STAAR p-values (including STAAR-O or STAAR-B in imbalanced case-control setting) of the unconditional significant enhancer_CAGE masks (available if known_loci is not a NULL).
#' @return \code{results_enhancer_DHS_genome}: a matrix contains the STAAR p-values (including STAAR-O or STAAR-B in imbalanced case-control setting) of the noncoding masks defined by variants overlaid with DHS sites in the enhancer (enhancer_DHS) for all protein-coding genes across the genome.
#' @return \code{enhancer_DHS_sig.csv}: a matrix contains the unconditional STAAR p-values (including STAAR-O or STAAR-B in imbalanced case-control setting) of the unconditional significant enhancer_DHS masks.
#' @return \code{enhancer_DHS_sig_cond.csv}: a matrix contains the conditional STAAR p-values (including STAAR-O or STAAR-B in imbalanced case-control setting) of the unconditional significant enhancer_DHS masks (available if known_loci is not a NULL).
#' @return \code{results_ncRNA_genome}: a matrix contains the STAAR p-values (including STAAR-O or STAAR-B in imbalanced case-control setting) of the noncoding masks defined by exonic and splicing ncRNA variants (ncRNA) for all ncRNA genes across the genome.
#' @return \code{ncRNA_sig.csv}: a matrix contains the unconditional STAAR p-values (including STAAR-O or STAAR-B in imbalanced case-control setting) of the unconditional significant ncRNA masks.
#' @return \code{ncRNA_sig_cond.csv}: a matrix contains the conditional STAAR p-values (including STAAR-O or STAAR-B in imbalanced case-control setting) of the unconditional significant ncRNA masks (available if known_loci is not a NULL).
#' @return manhattan plot (optional) and Q-Q plot (optional) of the gene-centric noncoding analysis results.
#' @references Li, Z., Li, X., et al. (2022). A framework for detecting
#' noncoding rare-variant associations of large-scale whole-genome sequencing
#' studies. \emph{Nature Methods}, \emph{19}(12), 1599-1611.
#' (\href{https://doi.org/10.1038/s41592-022-01640-x}{pub})
#' @export
Gene_Centric_Noncoding_Results_Summary <- function(agds_dir,gene_centric_noncoding_jobs_num,input_path,output_path,gene_centric_results_name,
ncRNA_jobs_num,ncRNA_input_path,ncRNA_output_path,ncRNA_results_name,
obj_nullmodel,known_loci=NULL,cMAC_cutoff=0,
method_cond=c("optimal","naive"),rare_maf_cutoff=0.01,
QC_label="annotation/filter",variant_type=c("SNV","Indel","variant"),geno_missing_imputation=c("mean","minor"),
Annotation_dir="annotation/info/FunctionalAnnotation",Annotation_name_catalog,
Use_annotation_weights=FALSE,Annotation_name=NULL,
alpha=2.5E-06,alpha_ncRNA=2.5E-06,
ncRNA_pos=NULL,manhattan_plot=FALSE,QQ_plot=FALSE,
cond_null_model_name=NULL,cond_null_model_dir=NULL,
SPA_p_filter=FALSE,p_filter_cutoff=0.05){
## evaluate choices
method_cond <- match.arg(method_cond)
variant_type <- match.arg(variant_type)
geno_missing_imputation <- match.arg(geno_missing_imputation)
## SPA status
if(!is.null(obj_nullmodel$use_SPA))
{
use_SPA <- obj_nullmodel$use_SPA
}else
{
use_SPA <- FALSE
}
#######################################################
# summarize unconditional analysis results
#######################################################
results_noncoding_genome <- c()
for(kk in 1:gene_centric_noncoding_jobs_num)
{
print(kk)
results_noncoding <- get(load(paste0(input_path,gene_centric_results_name,"_",kk,".Rdata")))
results_noncoding_genome <- c(results_noncoding_genome,results_noncoding)
}
results_UTR_genome <- c()
results_upstream_genome <- c()
results_downstream_genome <- c()
results_promoter_CAGE_genome <- c()
results_promoter_DHS_genome <- c()
results_enhancer_CAGE_genome <- c()
results_enhancer_DHS_genome <- c()
for(kk in 1:length(results_noncoding_genome))
{
results <- results_noncoding_genome[[kk]]
if(is.null(results)==FALSE)
{
### UTR
if(results[3]=="UTR")
{
results_UTR_genome <- rbind(results_UTR_genome,results)
}
### upstream
if(results[3]=="upstream")
{
results_upstream_genome <- rbind(results_upstream_genome,results)
}
### downstream
if(results[3]=="downstream")
{
results_downstream_genome <- rbind(results_downstream_genome,results)
}
### promoter_CAGE
if(results[3]=="promoter_CAGE")
{
results_promoter_CAGE_genome <- rbind(results_promoter_CAGE_genome,results)
}
### promoter_DHS
if(results[3]=="promoter_DHS")
{
results_promoter_DHS_genome <- rbind(results_promoter_DHS_genome,results)
}
### enhancer_CAGE
if(results[3]=="enhancer_CAGE")
{
results_enhancer_CAGE_genome <- rbind(results_enhancer_CAGE_genome,results)
}
### enhancer_DHS
if(results[3]=="enhancer_DHS")
{
results_enhancer_DHS_genome <- rbind(results_enhancer_DHS_genome,results)
}
}
if(kk%%1000==0)
{
print(kk)
}
}
###### cMAC_cutoff
# UTR
results_UTR_genome <- results_UTR_genome[results_UTR_genome[,"cMAC"]>cMAC_cutoff,]
# upstream
results_upstream_genome <- results_upstream_genome[results_upstream_genome[,"cMAC"]>cMAC_cutoff,]
# downstream
results_downstream_genome <- results_downstream_genome[results_downstream_genome[,"cMAC"]>cMAC_cutoff,]
# promoter_CAGE
results_promoter_CAGE_genome <- results_promoter_CAGE_genome[results_promoter_CAGE_genome[,"cMAC"]>cMAC_cutoff,]
# promoter_DHS
results_promoter_DHS_genome <- results_promoter_DHS_genome[results_promoter_DHS_genome[,"cMAC"]>cMAC_cutoff,]
# enhancer_CAGE
results_enhancer_CAGE_genome <- results_enhancer_CAGE_genome[results_enhancer_CAGE_genome[,"cMAC"]>cMAC_cutoff,]
# enhancer_DHS
results_enhancer_DHS_genome <- results_enhancer_DHS_genome[results_enhancer_DHS_genome[,"cMAC"]>cMAC_cutoff,]
###### whole-genome results
# UTR
save(results_UTR_genome,file=paste0(output_path,"UTR.Rdata"))
# upstream
save(results_upstream_genome,file=paste0(output_path,"upstream.Rdata"))
# downstream
save(results_downstream_genome,file=paste0(output_path,"downstream.Rdata"))
# promoter CAGE
save(results_promoter_CAGE_genome,file=paste0(output_path,"promoter_CAGE.Rdata"))
# promoter DHS
save(results_promoter_DHS_genome,file=paste0(output_path,"promoter_DHS.Rdata"))
# enhancer CAGE
save(results_enhancer_CAGE_genome,file=paste0(output_path,"enhancer_CAGE.Rdata"))
# enahncer DHS
save(results_enhancer_DHS_genome,file=paste0(output_path,"enhancer_DHS.Rdata"))
###### ncRNA
results_ncRNA_genome <- c()
for(kk in 1:ncRNA_jobs_num)
{
print(kk)
results_ncRNA <- get(load(paste0(ncRNA_input_path,ncRNA_results_name,"_",kk,".Rdata")))
results_ncRNA_genome <- rbind(results_ncRNA_genome,results_ncRNA)
}
###### cMAC_cutoff
results_ncRNA_genome <- results_ncRNA_genome[results_ncRNA_genome[,"cMAC"]>cMAC_cutoff,]
###### whole-genome results
save(results_ncRNA_genome,file=paste0(ncRNA_output_path,"results_ncRNA_genome.Rdata"))
###### significant results
if(!use_SPA)
{
### UTR
UTR_sig <- results_UTR_genome[results_UTR_genome[,"STAAR-O"]<alpha,,drop=FALSE]
write.csv(UTR_sig,file=paste0(output_path,"UTR_sig.csv"))
noncoding_sig <- c()
noncoding_sig <- rbind(noncoding_sig,UTR_sig)
### upstream
upstream_sig <- results_upstream_genome[results_upstream_genome[,"STAAR-O"]<alpha,,drop=FALSE]
write.csv(upstream_sig,file=paste0(output_path,"upstream_sig.csv"))
noncoding_sig <- rbind(noncoding_sig,upstream_sig)
### downstream
downstream_sig <- results_downstream_genome[results_downstream_genome[,"STAAR-O"]<alpha,,drop=FALSE]
write.csv(downstream_sig,file=paste0(output_path,"downstream_sig.csv"))
noncoding_sig <- rbind(noncoding_sig,downstream_sig)
### promoter_CAGE
promoter_CAGE_sig <- results_promoter_CAGE_genome[results_promoter_CAGE_genome[,"STAAR-O"]<alpha,,drop=FALSE]
write.csv(promoter_CAGE_sig,file=paste0(output_path,"promoter_CAGE_sig.csv"))
noncoding_sig <- rbind(noncoding_sig,promoter_CAGE_sig)
### promoter_DHS
promoter_DHS_sig <- results_promoter_DHS_genome[results_promoter_DHS_genome[,"STAAR-O"]<alpha,,drop=FALSE]
write.csv(promoter_DHS_sig,file=paste0(output_path,"promoter_DHS_sig.csv"))
noncoding_sig <- rbind(noncoding_sig,promoter_DHS_sig)
### enhancer_CAGE
enhancer_CAGE_sig <- results_enhancer_CAGE_genome[results_enhancer_CAGE_genome[,"STAAR-O"]<alpha,,drop=FALSE]
write.csv(enhancer_CAGE_sig,file=paste0(output_path,"enhancer_CAGE_sig.csv"))
noncoding_sig <- rbind(noncoding_sig,enhancer_CAGE_sig)
### enhancer_DHS
enhancer_DHS_sig <- results_enhancer_DHS_genome[results_enhancer_DHS_genome[,"STAAR-O"]<alpha,,drop=FALSE]
write.csv(enhancer_DHS_sig,file=paste0(output_path,"enhancer_DHS_sig.csv"))
noncoding_sig <- rbind(noncoding_sig,enhancer_DHS_sig)
### ncRNA
ncRNA_sig <- results_ncRNA_genome[results_ncRNA_genome[,"STAAR-O"]<alpha_ncRNA,,drop=FALSE]
write.csv(ncRNA_sig,file=paste0(ncRNA_output_path,"ncRNA_sig.csv"))
noncoding_sig <- rbind(noncoding_sig,ncRNA_sig)
write.csv(noncoding_sig,file=paste0(output_path,"noncoding_sig.csv"))
}else
{
### UTR
UTR_sig <- results_UTR_genome[results_UTR_genome[,"STAAR-B"]<alpha,,drop=FALSE]
write.csv(UTR_sig,file=paste0(output_path,"UTR_sig.csv"))
noncoding_sig <- c()
noncoding_sig <- rbind(noncoding_sig,UTR_sig)
### upstream
upstream_sig <- results_upstream_genome[results_upstream_genome[,"STAAR-B"]<alpha,,drop=FALSE]
write.csv(upstream_sig,file=paste0(output_path,"upstream_sig.csv"))
noncoding_sig <- rbind(noncoding_sig,upstream_sig)
### downstream
downstream_sig <- results_downstream_genome[results_downstream_genome[,"STAAR-B"]<alpha,,drop=FALSE]
write.csv(downstream_sig,file=paste0(output_path,"downstream_sig.csv"))
noncoding_sig <- rbind(noncoding_sig,downstream_sig)
### promoter_CAGE
promoter_CAGE_sig <- results_promoter_CAGE_genome[results_promoter_CAGE_genome[,"STAAR-B"]<alpha,,drop=FALSE]
write.csv(promoter_CAGE_sig,file=paste0(output_path,"promoter_CAGE_sig.csv"))
noncoding_sig <- rbind(noncoding_sig,promoter_CAGE_sig)
### promoter_DHS
promoter_DHS_sig <- results_promoter_DHS_genome[results_promoter_DHS_genome[,"STAAR-B"]<alpha,,drop=FALSE]
write.csv(promoter_DHS_sig,file=paste0(output_path,"promoter_DHS_sig.csv"))
noncoding_sig <- rbind(noncoding_sig,promoter_DHS_sig)
### enhancer_CAGE
enhancer_CAGE_sig <- results_enhancer_CAGE_genome[results_enhancer_CAGE_genome[,"STAAR-B"]<alpha,,drop=FALSE]
write.csv(enhancer_CAGE_sig,file=paste0(output_path,"enhancer_CAGE_sig.csv"))
noncoding_sig <- rbind(noncoding_sig,enhancer_CAGE_sig)
### enhancer_DHS
enhancer_DHS_sig <- results_enhancer_DHS_genome[results_enhancer_DHS_genome[,"STAAR-B"]<alpha,,drop=FALSE]
write.csv(enhancer_DHS_sig,file=paste0(output_path,"enhancer_DHS_sig.csv"))
noncoding_sig <- rbind(noncoding_sig,enhancer_DHS_sig)
### ncRNA
ncRNA_sig <- results_ncRNA_genome[results_ncRNA_genome[,"STAAR-B"]<alpha_ncRNA,,drop=FALSE]
write.csv(ncRNA_sig,file=paste0(ncRNA_output_path,"ncRNA_sig.csv"))
noncoding_sig <- rbind(noncoding_sig,ncRNA_sig)
write.csv(noncoding_sig,file=paste0(output_path,"noncoding_sig.csv"))
}
#######################################################
# conditional analysis
#######################################################
if(length(known_loci)!=0)
{
if(!use_SPA)
{
### UTR
UTR_sig_cond <- c()
if(length(UTR_sig)!=0)
{
if((class(UTR_sig)[1]!="matrix")&(class(UTR_sig)[1]!="data.frame"))
{
UTR_sig <- matrix(UTR_sig,nrow=1)
}
for(k in 1:dim(UTR_sig)[1])
{
chr <- as.numeric(UTR_sig[k,2])
gene_name <- as.character(UTR_sig[k,1])
category <- as.character(UTR_sig[k,3])
gds.path <- agds_dir[chr]
genofile <- seqOpen(gds.path)
res_cond <- Gene_Centric_Noncoding_cond(chr=chr,gene_name=gene_name,category=category,genofile=genofile,
obj_nullmodel=obj_nullmodel,known_loci=known_loci,rare_maf_cutoff=rare_maf_cutoff,
variant_type=variant_type,geno_missing_imputation=geno_missing_imputation,QC_label=QC_label,
Annotation_dir=Annotation_dir,Use_annotation_weights=Use_annotation_weights,
Annotation_name_catalog=Annotation_name_catalog,Annotation_name=Annotation_name)
UTR_sig_cond <- rbind(UTR_sig_cond,res_cond)
seqClose(genofile)
}
}
write.csv(UTR_sig_cond,file=paste0(output_path,"UTR_sig_cond.csv"))
noncoding_sig_cond <- c()
noncoding_sig_cond <- rbind(noncoding_sig_cond,UTR_sig_cond)
### upstream
upstream_sig_cond <- c()
if(length(upstream_sig)!=0)
{
if((class(upstream_sig)[1]!="matrix")&(class(upstream_sig)[1]!="data.frame"))
{
upstream_sig <- matrix(upstream_sig,nrow=1)
}
for(k in 1:dim(upstream_sig)[1])
{
chr <- as.numeric(upstream_sig[k,2])
gene_name <- as.character(upstream_sig[k,1])
category <- as.character(upstream_sig[k,3])
gds.path <- agds_dir[chr]
genofile <- seqOpen(gds.path)
res_cond <- Gene_Centric_Noncoding_cond(chr=chr,gene_name=gene_name,category=category,genofile=genofile,
obj_nullmodel=obj_nullmodel,known_loci=known_loci,rare_maf_cutoff=rare_maf_cutoff,
variant_type=variant_type,geno_missing_imputation=geno_missing_imputation,QC_label=QC_label,
Annotation_dir=Annotation_dir,Use_annotation_weights=Use_annotation_weights,
Annotation_name_catalog=Annotation_name_catalog,Annotation_name=Annotation_name)
upstream_sig_cond <- rbind(upstream_sig_cond,res_cond)
seqClose(genofile)
}
}
write.csv(upstream_sig_cond,file=paste0(output_path,"upstream_sig_cond.csv"))
noncoding_sig_cond <- rbind(noncoding_sig_cond,upstream_sig_cond)
### downstream
downstream_sig_cond <- c()
if(length(downstream_sig)!=0)
{
if((class(downstream_sig)[1]!="matrix")&(class(downstream_sig)[1]!="data.frame"))
{
downstream_sig <- matrix(downstream_sig,nrow=1)
}
for(k in 1:dim(downstream_sig)[1])
{
chr <- as.numeric(downstream_sig[k,2])
gene_name <- as.character(downstream_sig[k,1])
category <- as.character(downstream_sig[k,3])
gds.path <- agds_dir[chr]
genofile <- seqOpen(gds.path)
res_cond <- Gene_Centric_Noncoding_cond(chr=chr,gene_name=gene_name,category=category,genofile=genofile,
obj_nullmodel=obj_nullmodel,known_loci=known_loci,rare_maf_cutoff=rare_maf_cutoff,
variant_type=variant_type,geno_missing_imputation=geno_missing_imputation,QC_label=QC_label,
Annotation_dir=Annotation_dir,Use_annotation_weights=Use_annotation_weights,
Annotation_name_catalog=Annotation_name_catalog,Annotation_name=Annotation_name)
downstream_sig_cond <- rbind(downstream_sig_cond,res_cond)
seqClose(genofile)
}
}
write.csv(downstream_sig_cond,file=paste0(output_path,"downstream_sig_cond.csv"))
noncoding_sig_cond <- rbind(noncoding_sig_cond,downstream_sig_cond)
### promoter_CAGE
promoter_CAGE_sig_cond <- c()
if(length(promoter_CAGE_sig)!=0)
{
if((class(promoter_CAGE_sig)[1]!="matrix")&(class(promoter_CAGE_sig)[1]!="data.frame"))
{
promoter_CAGE_sig <- matrix(promoter_CAGE_sig,nrow=1)
}
for(k in 1:dim(promoter_CAGE_sig)[1])
{
chr <- as.numeric(promoter_CAGE_sig[k,2])
gene_name <- as.character(promoter_CAGE_sig[k,1])
category <- as.character(promoter_CAGE_sig[k,3])
gds.path <- agds_dir[chr]
genofile <- seqOpen(gds.path)
res_cond <- Gene_Centric_Noncoding_cond(chr=chr,gene_name=gene_name,category=category,genofile=genofile,
obj_nullmodel=obj_nullmodel,known_loci=known_loci,rare_maf_cutoff=rare_maf_cutoff,
variant_type=variant_type,geno_missing_imputation=geno_missing_imputation,QC_label=QC_label,
Annotation_dir=Annotation_dir,Use_annotation_weights=Use_annotation_weights,
Annotation_name_catalog=Annotation_name_catalog,Annotation_name=Annotation_name)
promoter_CAGE_sig_cond <- rbind(promoter_CAGE_sig_cond,res_cond)
seqClose(genofile)
}
}
write.csv(promoter_CAGE_sig_cond,file=paste0(output_path,"promoter_CAGE_sig_cond.csv"))
noncoding_sig_cond <- rbind(noncoding_sig_cond,promoter_CAGE_sig_cond)
### promoter_DHS
promoter_DHS_sig_cond <- c()
if(length(promoter_DHS_sig)!=0)
{
if((class(promoter_DHS_sig)[1]!="matrix")&(class(promoter_DHS_sig)[1]!="data.frame"))
{
promoter_DHS_sig <- matrix(promoter_DHS_sig,nrow=1)
}
for(k in 1:dim(promoter_DHS_sig)[1])
{
chr <- as.numeric(promoter_DHS_sig[k,2])
gene_name <- as.character(promoter_DHS_sig[k,1])
category <- as.character(promoter_DHS_sig[k,3])
gds.path <- agds_dir[chr]
genofile <- seqOpen(gds.path)
res_cond <- Gene_Centric_Noncoding_cond(chr=chr,gene_name=gene_name,category=category,genofile=genofile,
obj_nullmodel=obj_nullmodel,known_loci=known_loci,rare_maf_cutoff=rare_maf_cutoff,
variant_type=variant_type,geno_missing_imputation=geno_missing_imputation,QC_label=QC_label,
Annotation_dir=Annotation_dir,Use_annotation_weights=Use_annotation_weights,
Annotation_name_catalog=Annotation_name_catalog,Annotation_name=Annotation_name)
promoter_DHS_sig_cond <- rbind(promoter_DHS_sig_cond,res_cond)
seqClose(genofile)
}
}
write.csv(promoter_DHS_sig_cond,file=paste0(output_path,"promoter_DHS_sig_cond.csv"))
noncoding_sig_cond <- rbind(noncoding_sig_cond,promoter_DHS_sig_cond)
### enhancer_CAGE
enhancer_CAGE_sig_cond <- c()
if(length(enhancer_CAGE_sig)!=0)
{
if((class(enhancer_CAGE_sig)[1]!="matrix")&(class(enhancer_CAGE_sig)[1]!="data.frame"))
{
enhancer_CAGE_sig <- matrix(enhancer_CAGE_sig,nrow=1)
}
for(k in 1:dim(enhancer_CAGE_sig)[1])
{
chr <- as.numeric(enhancer_CAGE_sig[k,2])
gene_name <- as.character(enhancer_CAGE_sig[k,1])
category <- as.character(enhancer_CAGE_sig[k,3])
gds.path <- agds_dir[chr]
genofile <- seqOpen(gds.path)
res_cond <- Gene_Centric_Noncoding_cond(chr=chr,gene_name=gene_name,category=category,genofile=genofile,
obj_nullmodel=obj_nullmodel,known_loci=known_loci,rare_maf_cutoff=rare_maf_cutoff,
variant_type=variant_type,geno_missing_imputation=geno_missing_imputation,QC_label=QC_label,
Annotation_dir=Annotation_dir,Use_annotation_weights=Use_annotation_weights,
Annotation_name_catalog=Annotation_name_catalog,Annotation_name=Annotation_name)
enhancer_CAGE_sig_cond <- rbind(enhancer_CAGE_sig_cond,res_cond)
seqClose(genofile)
}
}
write.csv(enhancer_CAGE_sig_cond,file=paste0(output_path,"enhancer_CAGE_sig_cond.csv"))
noncoding_sig_cond <- rbind(noncoding_sig_cond,enhancer_CAGE_sig_cond)
### enhancer_DHS
enhancer_DHS_sig_cond <- c()
if(length(enhancer_DHS_sig)!=0)
{
if((class(enhancer_DHS_sig)[1]!="matrix")&(class(enhancer_DHS_sig)[1]!="data.frame"))
{
enhancer_DHS_sig <- matrix(enhancer_DHS_sig,nrow=1)
}
for(k in 1:dim(enhancer_DHS_sig)[1])
{
chr <- as.numeric(enhancer_DHS_sig[k,2])
gene_name <- as.character(enhancer_DHS_sig[k,1])
category <- as.character(enhancer_DHS_sig[k,3])
gds.path <- agds_dir[chr]
genofile <- seqOpen(gds.path)
res_cond <- Gene_Centric_Noncoding_cond(chr=chr,gene_name=gene_name,category=category,genofile=genofile,
obj_nullmodel=obj_nullmodel,known_loci=known_loci,rare_maf_cutoff=rare_maf_cutoff,
variant_type=variant_type,geno_missing_imputation=geno_missing_imputation,QC_label=QC_label,
Annotation_dir=Annotation_dir,Use_annotation_weights=Use_annotation_weights,
Annotation_name_catalog=Annotation_name_catalog,Annotation_name=Annotation_name)
enhancer_DHS_sig_cond <- rbind(enhancer_DHS_sig_cond,res_cond)
seqClose(genofile)
}
}
write.csv(enhancer_DHS_sig_cond,file=paste0(output_path,"enhancer_DHS_sig_cond.csv"))
noncoding_sig_cond <- rbind(noncoding_sig_cond,enhancer_DHS_sig_cond)
### ncRNA
ncRNA_sig_cond <- c()
if(length(ncRNA_sig)!=0)
{
if((class(ncRNA_sig)[1]!="matrix")&(class(ncRNA_sig)[1]!="data.frame"))
{
ncRNA_sig <- matrix(ncRNA_sig,nrow=1)
}
for(k in 1:dim(ncRNA_sig)[1])
{
chr <- as.numeric(ncRNA_sig[k,2])
gene_name <- as.character(ncRNA_sig[k,1])
category <- as.character(ncRNA_sig[k,3])
gds.path <- agds_dir[chr]
genofile <- seqOpen(gds.path)
res_cond <- ncRNA_cond(chr=chr,gene_name=gene_name,genofile=genofile,
obj_nullmodel=obj_nullmodel,known_loci=known_loci,rare_maf_cutoff=rare_maf_cutoff,
variant_type=variant_type,geno_missing_imputation=geno_missing_imputation,QC_label=QC_label,
Annotation_dir=Annotation_dir,Use_annotation_weights=Use_annotation_weights,
Annotation_name_catalog=Annotation_name_catalog,Annotation_name=Annotation_name)
ncRNA_sig_cond <- rbind(ncRNA_sig_cond,res_cond)
seqClose(genofile)
}
}
write.csv(ncRNA_sig_cond,file=paste0(ncRNA_output_path,"ncRNA_sig_cond.csv"))
noncoding_sig_cond <- rbind(noncoding_sig_cond,ncRNA_sig_cond)
## noncoding
write.csv(noncoding_sig_cond,file=paste0(output_path,"noncoding_sig_cond.csv"))
}else
{
### UTR
UTR_sig_cond <- c()
if(length(UTR_sig)!=0)
{
if((class(UTR_sig)[1]!="matrix")&(class(UTR_sig)[1]!="data.frame"))
{
UTR_sig <- matrix(UTR_sig,nrow=1)
}
for(k in 1:dim(UTR_sig)[1])
{
chr <- as.numeric(UTR_sig[k,2])
gene_name <- as.character(UTR_sig[k,1])
category <- as.character(UTR_sig[k,3])
if(file.exists(paste0(cond_null_model_dir,cond_null_model_name,".chr",chr,".Rdata")))
{
gds.path <- agds_dir[chr]
genofile <- seqOpen(gds.path)
obj_nullmodel_cond <- get(load(paste0(cond_null_model_dir,cond_null_model_name,".chr",chr,".Rdata")))
res_cond <- Gene_Centric_Noncoding_cond_spa(chr=chr,gene_name=gene_name,category=category,genofile=genofile,
obj_nullmodel=obj_nullmodel_cond,known_loci=known_loci,rare_maf_cutoff=rare_maf_cutoff,
variant_type=variant_type,geno_missing_imputation=geno_missing_imputation,QC_label=QC_label,
Annotation_dir=Annotation_dir,Annotation_name_catalog=Annotation_name_catalog,
Use_annotation_weights=Use_annotation_weights,Annotation_name=Annotation_name,
SPA_p_filter=SPA_p_filter,p_filter_cutoff=p_filter_cutoff)
seqClose(genofile)
}else
{
res_cond <- UTR_sig[k,,drop=FALSE]
res_cond[1,3] <- "UTR_cond"
}
UTR_sig_cond <- rbind(UTR_sig_cond,res_cond)
}
}
write.csv(UTR_sig_cond,file=paste0(output_path,"UTR_sig_cond.csv"))
noncoding_sig_cond <- c()
noncoding_sig_cond <- rbind(noncoding_sig_cond,UTR_sig_cond)
### upstream
upstream_sig_cond <- c()
if(length(upstream_sig)!=0)
{
if((class(upstream_sig)[1]!="matrix")&(class(upstream_sig)[1]!="data.frame"))
{
upstream_sig <- matrix(upstream_sig,nrow=1)
}
for(k in 1:dim(upstream_sig)[1])
{
chr <- as.numeric(upstream_sig[k,2])
gene_name <- as.character(upstream_sig[k,1])
category <- as.character(upstream_sig[k,3])
if(file.exists(paste0(cond_null_model_dir,cond_null_model_name,".chr",chr,".Rdata")))
{
gds.path <- agds_dir[chr]
genofile <- seqOpen(gds.path)
obj_nullmodel_cond <- get(load(paste0(cond_null_model_dir,cond_null_model_name,".chr",chr,".Rdata")))
res_cond <- Gene_Centric_Noncoding_cond_spa(chr=chr,gene_name=gene_name,category=category,genofile=genofile,
obj_nullmodel=obj_nullmodel_cond,known_loci=known_loci,rare_maf_cutoff=rare_maf_cutoff,
variant_type=variant_type,geno_missing_imputation=geno_missing_imputation,QC_label=QC_label,
Annotation_dir=Annotation_dir,Annotation_name_catalog=Annotation_name_catalog,
Use_annotation_weights=Use_annotation_weights,Annotation_name=Annotation_name,
SPA_p_filter=SPA_p_filter,p_filter_cutoff=p_filter_cutoff)
seqClose(genofile)
}else
{
res_cond <- upstream_sig[k,,drop=FALSE]
res_cond[1,3] <- "upstream_cond"
}
upstream_sig_cond <- rbind(upstream_sig_cond,res_cond)
}
}
write.csv(upstream_sig_cond,file=paste0(output_path,"upstream_sig_cond.csv"))
noncoding_sig_cond <- rbind(noncoding_sig_cond,upstream_sig_cond)
### downstream
downstream_sig_cond <- c()
if(length(downstream_sig)!=0)
{
if((class(downstream_sig)[1]!="matrix")&(class(downstream_sig)[1]!="data.frame"))
{
downstream_sig <- matrix(downstream_sig,nrow=1)
}
for(k in 1:dim(downstream_sig)[1])
{
chr <- as.numeric(downstream_sig[k,2])
gene_name <- as.character(downstream_sig[k,1])
category <- as.character(downstream_sig[k,3])
if(file.exists(paste0(cond_null_model_dir,cond_null_model_name,".chr",chr,".Rdata")))
{
gds.path <- agds_dir[chr]
genofile <- seqOpen(gds.path)
obj_nullmodel_cond <- get(load(paste0(cond_null_model_dir,cond_null_model_name,".chr",chr,".Rdata")))
res_cond <- Gene_Centric_Noncoding_cond_spa(chr=chr,gene_name=gene_name,category=category,genofile=genofile,
obj_nullmodel=obj_nullmodel_cond,known_loci=known_loci,rare_maf_cutoff=rare_maf_cutoff,
variant_type=variant_type,geno_missing_imputation=geno_missing_imputation,QC_label=QC_label,
Annotation_dir=Annotation_dir,Annotation_name_catalog=Annotation_name_catalog,
Use_annotation_weights=Use_annotation_weights,Annotation_name=Annotation_name,
SPA_p_filter=SPA_p_filter,p_filter_cutoff=p_filter_cutoff)
seqClose(genofile)
}else
{
res_cond <- downstream_sig[k,,drop=FALSE]
res_cond[1,3] <- "downstream_cond"
}
downstream_sig_cond <- rbind(downstream_sig_cond,res_cond)
}
}
write.csv(downstream_sig_cond,file=paste0(output_path,"downstream_sig_cond.csv"))
noncoding_sig_cond <- rbind(noncoding_sig_cond,downstream_sig_cond)
### promoter_CAGE
promoter_CAGE_sig_cond <- c()
if(length(promoter_CAGE_sig)!=0)
{
if((class(promoter_CAGE_sig)[1]!="matrix")&(class(promoter_CAGE_sig)[1]!="data.frame"))
{
promoter_CAGE_sig <- matrix(promoter_CAGE_sig,nrow=1)
}
for(k in 1:dim(promoter_CAGE_sig)[1])
{
chr <- as.numeric(promoter_CAGE_sig[k,2])
gene_name <- as.character(promoter_CAGE_sig[k,1])
category <- as.character(promoter_CAGE_sig[k,3])
if(file.exists(paste0(cond_null_model_dir,cond_null_model_name,".chr",chr,".Rdata")))
{
gds.path <- agds_dir[chr]
genofile <- seqOpen(gds.path)
obj_nullmodel_cond <- get(load(paste0(cond_null_model_dir,cond_null_model_name,".chr",chr,".Rdata")))
res_cond <- Gene_Centric_Noncoding_cond_spa(chr=chr,gene_name=gene_name,category=category,genofile=genofile,
obj_nullmodel=obj_nullmodel_cond,known_loci=known_loci,rare_maf_cutoff=rare_maf_cutoff,
variant_type=variant_type,geno_missing_imputation=geno_missing_imputation,QC_label=QC_label,
Annotation_dir=Annotation_dir,Annotation_name_catalog=Annotation_name_catalog,
Use_annotation_weights=Use_annotation_weights,Annotation_name=Annotation_name,
SPA_p_filter=SPA_p_filter,p_filter_cutoff=p_filter_cutoff)
seqClose(genofile)
}else
{
res_cond <- promoter_CAGE_sig[k,,drop=FALSE]
res_cond[1,3] <- "promoter_CAGE_cond"
}
promoter_CAGE_sig_cond <- rbind(promoter_CAGE_sig_cond,res_cond)
}
}
write.csv(promoter_CAGE_sig_cond,file=paste0(output_path,"promoter_CAGE_sig_cond.csv"))
noncoding_sig_cond <- rbind(noncoding_sig_cond,promoter_CAGE_sig_cond)
### promoter_DHS
promoter_DHS_sig_cond <- c()
if(length(promoter_DHS_sig)!=0)
{
if((class(promoter_DHS_sig)[1]!="matrix")&(class(promoter_DHS_sig)[1]!="data.frame"))
{
promoter_DHS_sig <- matrix(promoter_DHS_sig,nrow=1)
}
for(k in 1:dim(promoter_DHS_sig)[1])
{
chr <- as.numeric(promoter_DHS_sig[k,2])
gene_name <- as.character(promoter_DHS_sig[k,1])
category <- as.character(promoter_DHS_sig[k,3])
if(file.exists(paste0(cond_null_model_dir,cond_null_model_name,".chr",chr,".Rdata")))
{
gds.path <- agds_dir[chr]
genofile <- seqOpen(gds.path)
obj_nullmodel_cond <- get(load(paste0(cond_null_model_dir,cond_null_model_name,".chr",chr,".Rdata")))
res_cond <- Gene_Centric_Noncoding_cond_spa(chr=chr,gene_name=gene_name,category=category,genofile=genofile,
obj_nullmodel=obj_nullmodel_cond,known_loci=known_loci,rare_maf_cutoff=rare_maf_cutoff,
variant_type=variant_type,geno_missing_imputation=geno_missing_imputation,QC_label=QC_label,
Annotation_dir=Annotation_dir,Annotation_name_catalog=Annotation_name_catalog,
Use_annotation_weights=Use_annotation_weights,Annotation_name=Annotation_name,
SPA_p_filter=SPA_p_filter,p_filter_cutoff=p_filter_cutoff)
seqClose(genofile)
}else
{
res_cond <- promoter_DHS_sig[k,,drop=FALSE]
res_cond[1,3] <- "promoter_DHS_cond"
}
promoter_DHS_sig_cond <- rbind(promoter_DHS_sig_cond,res_cond)
}
}
write.csv(promoter_DHS_sig_cond,file=paste0(output_path,"promoter_DHS_sig_cond.csv"))
noncoding_sig_cond <- rbind(noncoding_sig_cond,promoter_DHS_sig_cond)
### enhancer_CAGE
enhancer_CAGE_sig_cond <- c()
if(length(enhancer_CAGE_sig)!=0)
{
if((class(enhancer_CAGE_sig)[1]!="matrix")&(class(enhancer_CAGE_sig)[1]!="data.frame"))
{
enhancer_CAGE_sig <- matrix(enhancer_CAGE_sig,nrow=1)
}
for(k in 1:dim(enhancer_CAGE_sig)[1])
{
chr <- as.numeric(enhancer_CAGE_sig[k,2])
gene_name <- as.character(enhancer_CAGE_sig[k,1])
category <- as.character(enhancer_CAGE_sig[k,3])
if(file.exists(paste0(cond_null_model_dir,cond_null_model_name,".chr",chr,".Rdata")))
{
gds.path <- agds_dir[chr]
genofile <- seqOpen(gds.path)
obj_nullmodel_cond <- get(load(paste0(cond_null_model_dir,cond_null_model_name,".chr",chr,".Rdata")))
res_cond <- Gene_Centric_Noncoding_cond_spa(chr=chr,gene_name=gene_name,category=category,genofile=genofile,
obj_nullmodel=obj_nullmodel_cond,known_loci=known_loci,rare_maf_cutoff=rare_maf_cutoff,
variant_type=variant_type,geno_missing_imputation=geno_missing_imputation,QC_label=QC_label,
Annotation_dir=Annotation_dir,Annotation_name_catalog=Annotation_name_catalog,
Use_annotation_weights=Use_annotation_weights,Annotation_name=Annotation_name,
SPA_p_filter=SPA_p_filter,p_filter_cutoff=p_filter_cutoff)
seqClose(genofile)
}else
{
res_cond <- enhancer_CAGE_sig[k,,drop=FALSE]
res_cond[1,3] <- "enhancer_CAGE_cond"
}
enhancer_CAGE_sig_cond <- rbind(enhancer_CAGE_sig_cond,res_cond)
}
}
write.csv(enhancer_CAGE_sig_cond,file=paste0(output_path,"enhancer_CAGE_sig_cond.csv"))
noncoding_sig_cond <- rbind(noncoding_sig_cond,enhancer_CAGE_sig_cond)
### enhancer_DHS
enhancer_DHS_sig_cond <- c()
if(length(enhancer_DHS_sig)!=0)
{
if((class(enhancer_DHS_sig)[1]!="matrix")&(class(enhancer_DHS_sig)[1]!="data.frame"))
{
enhancer_DHS_sig <- matrix(enhancer_DHS_sig,nrow=1)
}
for(k in 1:dim(enhancer_DHS_sig)[1])
{
chr <- as.numeric(enhancer_DHS_sig[k,2])
gene_name <- as.character(enhancer_DHS_sig[k,1])
category <- as.character(enhancer_DHS_sig[k,3])
if(file.exists(paste0(cond_null_model_dir,cond_null_model_name,".chr",chr,".Rdata")))
{
gds.path <- agds_dir[chr]
genofile <- seqOpen(gds.path)
obj_nullmodel_cond <- get(load(paste0(cond_null_model_dir,cond_null_model_name,".chr",chr,".Rdata")))
res_cond <- Gene_Centric_Noncoding_cond_spa(chr=chr,gene_name=gene_name,category=category,genofile=genofile,
obj_nullmodel=obj_nullmodel_cond,known_loci=known_loci,rare_maf_cutoff=rare_maf_cutoff,
variant_type=variant_type,geno_missing_imputation=geno_missing_imputation,QC_label=QC_label,
Annotation_dir=Annotation_dir,Annotation_name_catalog=Annotation_name_catalog,
Use_annotation_weights=Use_annotation_weights,Annotation_name=Annotation_name,
SPA_p_filter=SPA_p_filter,p_filter_cutoff=p_filter_cutoff)
seqClose(genofile)
}else
{
res_cond <- enhancer_DHS_sig[k,,drop=FALSE]
res_cond[1,3] <- "enhancer_DHS_cond"
}
enhancer_DHS_sig_cond <- rbind(enhancer_DHS_sig_cond,res_cond)
}
}
write.csv(enhancer_DHS_sig_cond,file=paste0(output_path,"enhancer_DHS_sig_cond.csv"))
noncoding_sig_cond <- rbind(noncoding_sig_cond,enhancer_DHS_sig_cond)
### ncRNA
ncRNA_sig_cond <- c()
if(length(ncRNA_sig)!=0)
{
if((class(ncRNA_sig)[1]!="matrix")&(class(ncRNA_sig)[1]!="data.frame"))
{
ncRNA_sig <- matrix(ncRNA_sig,nrow=1)
}
for(k in 1:dim(ncRNA_sig)[1])
{
chr <- as.numeric(ncRNA_sig[k,2])
gene_name <- as.character(ncRNA_sig[k,1])
category <- as.character(ncRNA_sig[k,3])
if(file.exists(paste0(cond_null_model_dir,cond_null_model_name,".chr",chr,".Rdata")))
{
gds.path <- agds_dir[chr]
genofile <- seqOpen(gds.path)
obj_nullmodel_cond <- get(load(paste0(cond_null_model_dir,cond_null_model_name,".chr",chr,".Rdata")))
res_cond <- ncRNA_cond_spa(chr=chr,gene_name=gene_name,genofile=genofile,
obj_nullmodel=obj_nullmodel_cond,known_loci=known_loci,rare_maf_cutoff=rare_maf_cutoff,
variant_type=variant_type,geno_missing_imputation=geno_missing_imputation,QC_label=QC_label,
Annotation_dir=Annotation_dir,Annotation_name_catalog=Annotation_name_catalog,
Use_annotation_weights=Use_annotation_weights,Annotation_name=Annotation_name,
SPA_p_filter=SPA_p_filter,p_filter_cutoff=p_filter_cutoff)
seqClose(genofile)
}else
{
res_cond <- ncRNA_sig[k,,drop=FALSE]
res_cond[1,3] <- "ncRNA_cond"
}
ncRNA_sig_cond <- rbind(ncRNA_sig_cond,res_cond)
}
}
write.csv(ncRNA_sig_cond,file=paste0(ncRNA_output_path,"ncRNA_sig_cond.csv"))
noncoding_sig_cond <- rbind(noncoding_sig_cond,ncRNA_sig_cond)
## noncoding
write.csv(noncoding_sig_cond,file=paste0(output_path,"noncoding_sig_cond.csv"))
}
}
## manhattan plot for protein coding genes
if(manhattan_plot)
{
############## noncoding
### UTR
results_STAAR <- results_UTR_genome[,c(1,2,dim(results_UTR_genome)[2])]
results_m <- c()
for(i in 1:dim(results_STAAR)[2])
{
results_m <- cbind(results_m,unlist(results_STAAR[,i]))
}
colnames(results_m) <- colnames(results_STAAR)
results_m <- data.frame(results_m,stringsAsFactors = FALSE)
results_m[,2] <- as.numeric(results_m[,2])
results_m[,3] <- as.numeric(results_m[,3])
genes_info_manhattan <- dplyr::left_join(genes_info,results_m,by=c("chromosome_name"="Chr","hgnc_symbol"="Gene.name"))
genes_info_manhattan[is.na(genes_info_manhattan)] <- 1
colnames(genes_info_manhattan)[dim(genes_info_manhattan)[2]] <- "UTR"
### upstream
results_STAAR <- results_upstream_genome[,c(1,2,dim(results_upstream_genome)[2])]
results_m <- c()
for(i in 1:dim(results_STAAR)[2])
{
results_m <- cbind(results_m,unlist(results_STAAR[,i]))
}
colnames(results_m) <- colnames(results_STAAR)
results_m <- data.frame(results_m,stringsAsFactors = FALSE)
results_m[,2] <- as.numeric(results_m[,2])
results_m[,3] <- as.numeric(results_m[,3])
genes_info_manhattan <- dplyr::left_join(genes_info_manhattan,results_m,by=c("chromosome_name"="Chr","hgnc_symbol"="Gene.name"))
genes_info_manhattan[is.na(genes_info_manhattan)] <- 1
colnames(genes_info_manhattan)[dim(genes_info_manhattan)[2]] <- "upstream"
### downstream
results_STAAR <- results_downstream_genome[,c(1,2,dim(results_downstream_genome)[2])]
results_m <- c()
for(i in 1:dim(results_STAAR)[2])
{
results_m <- cbind(results_m,unlist(results_STAAR[,i]))
}
colnames(results_m) <- colnames(results_STAAR)
results_m <- data.frame(results_m,stringsAsFactors = FALSE)
results_m[,2] <- as.numeric(results_m[,2])
results_m[,3] <- as.numeric(results_m[,3])
genes_info_manhattan <- dplyr::left_join(genes_info_manhattan,results_m,by=c("chromosome_name"="Chr","hgnc_symbol"="Gene.name"))
genes_info_manhattan[is.na(genes_info_manhattan)] <- 1
colnames(genes_info_manhattan)[dim(genes_info_manhattan)[2]] <- "downstream"
### promoter_CAGE
results_STAAR <- results_promoter_CAGE_genome[,c(1,2,dim(results_promoter_CAGE_genome)[2])]
results_m <- c()
for(i in 1:dim(results_STAAR)[2])
{
results_m <- cbind(results_m,unlist(results_STAAR[,i]))
}
colnames(results_m) <- colnames(results_STAAR)
results_m <- data.frame(results_m,stringsAsFactors = FALSE)
results_m[,2] <- as.numeric(results_m[,2])
results_m[,3] <- as.numeric(results_m[,3])
genes_info_manhattan <- dplyr::left_join(genes_info_manhattan,results_m,by=c("chromosome_name"="Chr","hgnc_symbol"="Gene.name"))
genes_info_manhattan[is.na(genes_info_manhattan)] <- 1
colnames(genes_info_manhattan)[dim(genes_info_manhattan)[2]] <- "promoter_CAGE"
### promoter_DHS
results_STAAR <- results_promoter_DHS_genome[,c(1,2,dim(results_promoter_DHS_genome)[2])]
results_m <- c()
for(i in 1:dim(results_STAAR)[2])
{
results_m <- cbind(results_m,unlist(results_STAAR[,i]))
}
colnames(results_m) <- colnames(results_STAAR)
results_m <- data.frame(results_m,stringsAsFactors = FALSE)
results_m[,2] <- as.numeric(results_m[,2])
results_m[,3] <- as.numeric(results_m[,3])
genes_info_manhattan <- dplyr::left_join(genes_info_manhattan,results_m,by=c("chromosome_name"="Chr","hgnc_symbol"="Gene.name"))
genes_info_manhattan[is.na(genes_info_manhattan)] <- 1
colnames(genes_info_manhattan)[dim(genes_info_manhattan)[2]] <- "promoter_DHS"
### enhancer_CAGE
results_STAAR <- results_enhancer_CAGE_genome[,c(1,2,dim(results_enhancer_CAGE_genome)[2])]
results_m <- c()
for(i in 1:dim(results_STAAR)[2])
{
results_m <- cbind(results_m,unlist(results_STAAR[,i]))
}
colnames(results_m) <- colnames(results_STAAR)
results_m <- data.frame(results_m,stringsAsFactors = FALSE)
results_m[,2] <- as.numeric(results_m[,2])
results_m[,3] <- as.numeric(results_m[,3])
genes_info_manhattan <- dplyr::left_join(genes_info_manhattan,results_m,by=c("chromosome_name"="Chr","hgnc_symbol"="Gene.name"))
genes_info_manhattan[is.na(genes_info_manhattan)] <- 1
colnames(genes_info_manhattan)[dim(genes_info_manhattan)[2]] <- "enhancer_CAGE"
### enhancer_DHS
results_STAAR <- results_enhancer_DHS_genome[,c(1,2,dim(results_enhancer_DHS_genome)[2])]
results_m <- c()
for(i in 1:dim(results_STAAR)[2])
{
results_m <- cbind(results_m,unlist(results_STAAR[,i]))
}
colnames(results_m) <- colnames(results_STAAR)
results_m <- data.frame(results_m,stringsAsFactors = FALSE)
results_m[,2] <- as.numeric(results_m[,2])
results_m[,3] <- as.numeric(results_m[,3])
genes_info_manhattan <- dplyr::left_join(genes_info_manhattan,results_m,by=c("chromosome_name"="Chr","hgnc_symbol"="Gene.name"))
genes_info_manhattan[is.na(genes_info_manhattan)] <- 1
colnames(genes_info_manhattan)[dim(genes_info_manhattan)[2]] <- "enhancer_DHS"
## ylim
noncoding_minp <- min(genes_info_manhattan[,(dim(genes_info_manhattan)[2]-6):dim(genes_info_manhattan)[2]])
min_y <- ceiling(-log10(noncoding_minp)) + 1
pch <- c(0,1,2,3,4,5,6)
figure1 <- manhattan_plot(genes_info_manhattan[,2], (genes_info_manhattan[,3]+genes_info_manhattan[,4])/2, genes_info_manhattan$UTR,sig.level=alpha,pch=pch[1],col = c("blue4", "orange3"),ylim=c(0,min_y),
auto.key=T,key=list(space="top", columns=5, title="Functional Category", cex.title=1, points=TRUE,pch=pch,text=list(c("UTR","upstream","downstream","promoter_CAGE","promoter_DHS","enhancer_CAGE","enhancer_DHS"))))
figure2 <- manhattan_plot(genes_info_manhattan[,2], (genes_info_manhattan[,3]+genes_info_manhattan[,4])/2, genes_info_manhattan$upstream,sig.level=alpha,pch=pch[2],col = c("blue4", "orange3"),ylim=c(0,min_y),
auto.key=T,key=list(space="top", columns=5, title="Functional Category", cex.title=1, points=TRUE,pch=pch,text=list(c("UTR","upstream","downstream","promoter_CAGE","promoter_DHS","enhancer_CAGE","enhancer_DHS"))))
figure3 <- manhattan_plot(genes_info_manhattan[,2], (genes_info_manhattan[,3]+genes_info_manhattan[,4])/2, genes_info_manhattan$downstream,sig.level=alpha,pch=pch[3],col = c("blue4", "orange3"),ylim=c(0,min_y),
auto.key=T,key=list(space="top", columns=5, title="Functional Category", cex.title=1, points=TRUE,pch=pch,text=list(c("UTR","upstream","downstream","promoter_CAGE","promoter_DHS","enhancer_CAGE","enhancer_DHS"))))
figure4 <- manhattan_plot(genes_info_manhattan[,2], (genes_info_manhattan[,3]+genes_info_manhattan[,4])/2, genes_info_manhattan$promoter_CAGE,sig.level=alpha,pch=pch[4],col = c("blue4", "orange3"),ylim=c(0,min_y),
auto.key=T,key=list(space="top", columns=5, title="Functional Category", cex.title=1, points=TRUE,pch=pch,text=list(c("UTR","upstream","downstream","promoter_CAGE","promoter_DHS","enhancer_CAGE","enhancer_DHS"))))
figure5 <- manhattan_plot(genes_info_manhattan[,2], (genes_info_manhattan[,3]+genes_info_manhattan[,4])/2, genes_info_manhattan$promoter_DHS,sig.level=alpha,pch=pch[5],col = c("blue4", "orange3"),ylim=c(0,min_y),
auto.key=T,key=list(space="top", columns=5, title="Functional Category", cex.title=1, points=TRUE,pch=pch,text=list(c("UTR","upstream","downstream","promoter_CAGE","promoter_DHS","enhancer_CAGE","enhancer_DHS"))))
figure6 <- manhattan_plot(genes_info_manhattan[,2], (genes_info_manhattan[,3]+genes_info_manhattan[,4])/2, genes_info_manhattan$enhancer_CAGE,sig.level=alpha,pch=pch[6],col = c("blue4", "orange3"),ylim=c(0,min_y),
auto.key=T,key=list(space="top", columns=5, title="Functional Category", cex.title=1, points=TRUE,pch=pch,text=list(c("UTR","upstream","downstream","promoter_CAGE","promoter_DHS","enhancer_CAGE","enhancer_DHS"))))
figure7 <- manhattan_plot(genes_info_manhattan[,2], (genes_info_manhattan[,3]+genes_info_manhattan[,4])/2, genes_info_manhattan$enhancer_DHS,sig.level=alpha,pch=pch[7],col = c("blue4", "orange3"),ylim=c(0,min_y),
auto.key=T,key=list(space="top", columns=5, title="Functional Category", cex.title=1, points=TRUE,pch=pch,text=list(c("UTR","upstream","downstream","promoter_CAGE","promoter_DHS","enhancer_CAGE","enhancer_DHS"))))
print("Manhattan plot")
png(paste0(output_path,"gene_centric_noncoding_manhattan.png"), width = 9, height = 6, units = 'in', res = 600)
print(figure1)
print(figure2,newpage = FALSE)
print(figure3,newpage = FALSE)
print(figure4,newpage = FALSE)
print(figure5,newpage = FALSE)
print(figure6,newpage = FALSE)
print(figure7,newpage = FALSE)
dev.off()
}
## manhattan plot for ncRNA genes
if(manhattan_plot)
{
if(!is.null(ncRNA_pos))
{
results_ncRNA_genome_temp <- data.frame(results_ncRNA_genome[,c(1,2,dim(results_ncRNA_genome)[2])],stringsAsFactors = FALSE)
results_ncRNA_genome_temp[,2] <- as.numeric(results_ncRNA_genome_temp[,2])
results_ncRNA_genome_temp[,1] <- as.character(results_ncRNA_genome_temp[,1])
results_ncRNA_genome_temp[,3] <- as.numeric(results_ncRNA_genome_temp[,3])
ncRNA_gene_pos_results <- dplyr::left_join(ncRNA_pos,results_ncRNA_genome_temp,by=c("chr"="Chr","ncRNA"="Gene.name"))
ncRNA_gene_pos_results[is.na(ncRNA_gene_pos_results[,5]),5] <- 1
print("ncRNA Manhattan plot")
png(paste0(ncRNA_output_path,"gene_centric_ncRNA_manhattan.png"), width = 9, height = 6, units = 'in', res = 600)
print(manhattan_plot(as.numeric(ncRNA_gene_pos_results[,1]), (as.numeric(ncRNA_gene_pos_results[,3])+as.numeric(ncRNA_gene_pos_results[,4]))/2, as.numeric(ncRNA_gene_pos_results[,5]), col = c("blue4", "orange3"),sig.level=alpha_ncRNA))
dev.off()
}
}
## Q-Q plot for protein coding genes
if(QQ_plot)
{
if(!manhattan_plot)
{
############## noncoding
### UTR
results_STAAR <- results_UTR_genome[,c(1,2,dim(results_UTR_genome)[2])]
results_m <- c()
for(i in 1:dim(results_STAAR)[2])
{
results_m <- cbind(results_m,unlist(results_STAAR[,i]))
}
colnames(results_m) <- colnames(results_STAAR)
results_m <- data.frame(results_m,stringsAsFactors = FALSE)
results_m[,2] <- as.numeric(results_m[,2])
results_m[,3] <- as.numeric(results_m[,3])
genes_info_manhattan <- dplyr::left_join(genes_info,results_m,by=c("chromosome_name"="Chr","hgnc_symbol"="Gene.name"))
genes_info_manhattan[is.na(genes_info_manhattan)] <- 1
colnames(genes_info_manhattan)[dim(genes_info_manhattan)[2]] <- "UTR"
### upstream
results_STAAR <- results_upstream_genome[,c(1,2,dim(results_upstream_genome)[2])]
results_m <- c()
for(i in 1:dim(results_STAAR)[2])
{
results_m <- cbind(results_m,unlist(results_STAAR[,i]))
}
colnames(results_m) <- colnames(results_STAAR)
results_m <- data.frame(results_m,stringsAsFactors = FALSE)
results_m[,2] <- as.numeric(results_m[,2])
results_m[,3] <- as.numeric(results_m[,3])
genes_info_manhattan <- dplyr::left_join(genes_info_manhattan,results_m,by=c("chromosome_name"="Chr","hgnc_symbol"="Gene.name"))
genes_info_manhattan[is.na(genes_info_manhattan)] <- 1
colnames(genes_info_manhattan)[dim(genes_info_manhattan)[2]] <- "upstream"
### downstream
results_STAAR <- results_downstream_genome[,c(1,2,dim(results_downstream_genome)[2])]
results_m <- c()
for(i in 1:dim(results_STAAR)[2])
{
results_m <- cbind(results_m,unlist(results_STAAR[,i]))
}
colnames(results_m) <- colnames(results_STAAR)
results_m <- data.frame(results_m,stringsAsFactors = FALSE)
results_m[,2] <- as.numeric(results_m[,2])
results_m[,3] <- as.numeric(results_m[,3])
genes_info_manhattan <- dplyr::left_join(genes_info_manhattan,results_m,by=c("chromosome_name"="Chr","hgnc_symbol"="Gene.name"))
genes_info_manhattan[is.na(genes_info_manhattan)] <- 1
colnames(genes_info_manhattan)[dim(genes_info_manhattan)[2]] <- "downstream"
### promoter_CAGE
results_STAAR <- results_promoter_CAGE_genome[,c(1,2,dim(results_promoter_CAGE_genome)[2])]
results_m <- c()
for(i in 1:dim(results_STAAR)[2])
{
results_m <- cbind(results_m,unlist(results_STAAR[,i]))
}
colnames(results_m) <- colnames(results_STAAR)
results_m <- data.frame(results_m,stringsAsFactors = FALSE)
results_m[,2] <- as.numeric(results_m[,2])
results_m[,3] <- as.numeric(results_m[,3])
genes_info_manhattan <- dplyr::left_join(genes_info_manhattan,results_m,by=c("chromosome_name"="Chr","hgnc_symbol"="Gene.name"))
genes_info_manhattan[is.na(genes_info_manhattan)] <- 1
colnames(genes_info_manhattan)[dim(genes_info_manhattan)[2]] <- "promoter_CAGE"
### promoter_DHS
results_STAAR <- results_promoter_DHS_genome[,c(1,2,dim(results_promoter_DHS_genome)[2])]
results_m <- c()
for(i in 1:dim(results_STAAR)[2])
{
results_m <- cbind(results_m,unlist(results_STAAR[,i]))
}
colnames(results_m) <- colnames(results_STAAR)
results_m <- data.frame(results_m,stringsAsFactors = FALSE)
results_m[,2] <- as.numeric(results_m[,2])
results_m[,3] <- as.numeric(results_m[,3])
genes_info_manhattan <- dplyr::left_join(genes_info_manhattan,results_m,by=c("chromosome_name"="Chr","hgnc_symbol"="Gene.name"))
genes_info_manhattan[is.na(genes_info_manhattan)] <- 1
colnames(genes_info_manhattan)[dim(genes_info_manhattan)[2]] <- "promoter_DHS"
### enhancer_CAGE
results_STAAR <- results_enhancer_CAGE_genome[,c(1,2,dim(results_enhancer_CAGE_genome)[2])]
results_m <- c()
for(i in 1:dim(results_STAAR)[2])
{
results_m <- cbind(results_m,unlist(results_STAAR[,i]))
}
colnames(results_m) <- colnames(results_STAAR)
results_m <- data.frame(results_m,stringsAsFactors = FALSE)
results_m[,2] <- as.numeric(results_m[,2])
results_m[,3] <- as.numeric(results_m[,3])
genes_info_manhattan <- dplyr::left_join(genes_info_manhattan,results_m,by=c("chromosome_name"="Chr","hgnc_symbol"="Gene.name"))
genes_info_manhattan[is.na(genes_info_manhattan)] <- 1
colnames(genes_info_manhattan)[dim(genes_info_manhattan)[2]] <- "enhancer_CAGE"
### enhancer_DHS
results_STAAR <- results_enhancer_DHS_genome[,c(1,2,dim(results_enhancer_DHS_genome)[2])]
results_m <- c()
for(i in 1:dim(results_STAAR)[2])
{
results_m <- cbind(results_m,unlist(results_STAAR[,i]))
}
colnames(results_m) <- colnames(results_STAAR)
results_m <- data.frame(results_m,stringsAsFactors = FALSE)
results_m[,2] <- as.numeric(results_m[,2])
results_m[,3] <- as.numeric(results_m[,3])
genes_info_manhattan <- dplyr::left_join(genes_info_manhattan,results_m,by=c("chromosome_name"="Chr","hgnc_symbol"="Gene.name"))
genes_info_manhattan[is.na(genes_info_manhattan)] <- 1
colnames(genes_info_manhattan)[dim(genes_info_manhattan)[2]] <- "enhancer_DHS"
## ylim
noncoding_minp <- min(genes_info_manhattan[,(dim(genes_info_manhattan)[2]-6):dim(genes_info_manhattan)[2]])
min_y <- ceiling(-log10(noncoding_minp)) + 1
}
print("Q-Q plot")
cex_point <- 1
png(paste0(output_path,"gene_centric_noncoding_qqplot.png"), width = 8, height = 8, units = 'in', res = 600)
### UTR
## remove unconverged p-values
observed <- sort(genes_info_manhattan$UTR[genes_info_manhattan$UTR < 1])
lobs <- -(log10(observed))
expected <- c(1:length(observed))
lexp <- -(log10(expected / (length(expected)+1)))
par(mar=c(5,6,4,4))
plot(lexp,lobs,pch=0, cex=cex_point, xlim = c(0, 5), ylim = c(0, min_y),
xlab = expression(Expected ~ ~-log[10](italic(p))), ylab = expression(Observed ~ ~-log[10](italic(p))),
font.lab=2,cex.lab=2,cex.axis=2,font.axis=2)
abline(0, 1, col="red",lwd=2)
### upstream
## remove unconverged p-values
observed <- sort(genes_info_manhattan$upstream[genes_info_manhattan$upstream < 1])
lobs <- -(log10(observed))
expected <- c(1:length(observed))
lexp <- -(log10(expected / (length(expected)+1)))
par(new=T)
par(mar=c(5,6,4,4))
plot(lexp,lobs,pch=1, cex=cex_point, xlim = c(0, 5), ylim = c(0, min_y),
xlab = expression(Expected ~ ~-log[10](italic(p))), ylab = expression(Observed ~ ~-log[10](italic(p))),
font.lab=2,cex.lab=2,cex.axis=2,font.axis=2)
abline(0, 1, col="red",lwd=2)
### downstream
## remove unconverged p-values
observed <- sort(genes_info_manhattan$downstream[genes_info_manhattan$downstream < 1])
lobs <- -(log10(observed))
expected <- c(1:length(observed))
lexp <- -(log10(expected / (length(expected)+1)))
par(new=T)
par(mar=c(5,6,4,4))
plot(lexp,lobs,pch=2, cex=cex_point, xlim = c(0, 5), ylim = c(0, min_y),
xlab = expression(Expected ~ ~-log[10](italic(p))), ylab = expression(Observed ~ ~-log[10](italic(p))),
font.lab=2,cex.lab=2,cex.axis=2,font.axis=2)
abline(0, 1, col="red",lwd=2)
### promoter_CAGE
## remove unconverged p-values
observed <- sort(genes_info_manhattan$promoter_CAGE[genes_info_manhattan$promoter_CAGE < 1])
lobs <- -(log10(observed))
expected <- c(1:length(observed))
lexp <- -(log10(expected / (length(expected)+1)))
par(new=T)
par(mar=c(5,6,4,4))
plot(lexp,lobs,pch=3, cex=cex_point, xlim = c(0, 5), ylim = c(0, min_y),
xlab = expression(Expected ~ ~-log[10](italic(p))), ylab = expression(Observed ~ ~-log[10](italic(p))),
font.lab=2,cex.lab=2,cex.axis=2,font.axis=2)
abline(0, 1, col="red",lwd=2)
### promoter_DHS
## remove unconverged p-values
observed <- sort(genes_info_manhattan$promoter_DHS[genes_info_manhattan$promoter_DHS < 1])
lobs <- -(log10(observed))
expected <- c(1:length(observed))
lexp <- -(log10(expected / (length(expected)+1)))
par(new=T)
par(mar=c(5,6,4,4))
plot(lexp,lobs,pch=4, cex=cex_point, xlim = c(0, 5), ylim = c(0, min_y),
xlab = expression(Expected ~ ~-log[10](italic(p))), ylab = expression(Observed ~ ~-log[10](italic(p))),
font.lab=2,cex.lab=2,cex.axis=2,font.axis=2)
abline(0, 1, col="red",lwd=2)
### enhancer_CAGE
## remove unconverged p-values
observed <- sort(genes_info_manhattan$enhancer_CAGE[genes_info_manhattan$enhancer_CAGE < 1])
lobs <- -(log10(observed))
expected <- c(1:length(observed))
lexp <- -(log10(expected / (length(expected)+1)))
par(new=T)
par(mar=c(5,6,4,4))
plot(lexp,lobs,pch=5, cex=cex_point, xlim = c(0, 5), ylim = c(0, min_y),
xlab = expression(Expected ~ ~-log[10](italic(p))), ylab = expression(Observed ~ ~-log[10](italic(p))),
font.lab=2,cex.lab=2,cex.axis=2,font.axis=2)
abline(0, 1, col="red",lwd=2)
### enhancer_DHS
## remove unconverged p-values
observed <- sort(genes_info_manhattan$enhancer_DHS[genes_info_manhattan$enhancer_DHS < 1])
lobs <- -(log10(observed))
expected <- c(1:length(observed))
lexp <- -(log10(expected / (length(expected)+1)))
par(new=T)
par(mar=c(5,6,4,4))
plot(lexp,lobs,pch=6, cex=cex_point, xlim = c(0, 5), ylim = c(0, min_y),
xlab = expression(Expected ~ ~-log[10](italic(p))), ylab = expression(Observed ~ ~-log[10](italic(p))),
font.lab=2,cex.lab=2,cex.axis=2,font.axis=2)
abline(0, 1, col="red",lwd=2)
legend("topleft",legend=c("UTR","upstream","downstream","promoter_CAGE","promoter_DHS","enhancer_CAGE","enhancer_DHS"),ncol=1,bty="o",box.lwd=1,pch=0:6,cex=1.5,text.font=2)
dev.off()
}
## Q-Q plot for ncRNA genes
if(QQ_plot)
{
if(!is.null(ncRNA_pos))
{
if(!manhattan_plot)
{
results_ncRNA_genome_temp <- data.frame(results_ncRNA_genome[,c(1,2,dim(results_ncRNA_genome)[2])],stringsAsFactors = FALSE)
results_ncRNA_genome_temp[,2] <- as.numeric(results_ncRNA_genome_temp[,2])
results_ncRNA_genome_temp[,1] <- as.character(results_ncRNA_genome_temp[,1])
results_ncRNA_genome_temp[,3] <- as.numeric(results_ncRNA_genome_temp[,3])
ncRNA_gene_pos_results <- dplyr::left_join(ncRNA_pos,results_ncRNA_genome_temp,by=c("chr"="Chr","ncRNA"="Gene.name"))
ncRNA_gene_pos_results[is.na(ncRNA_gene_pos_results[,5]),5] <- 1
}
observed <- sort(ncRNA_gene_pos_results[ncRNA_gene_pos_results[,5] < 1,5])
lobs <- -(log10(observed))
expected <- c(1:length(observed))
lexp <- -(log10(expected / (length(expected)+1)))
ncRNA_minp <- min(ncRNA_gene_pos_results[,5])
min_ncRNA_y <- ceiling(-log10(ncRNA_minp)) + 1
print("ncRNA Q-Q plot")
png(paste0(ncRNA_output_path,"gene_centric_ncRNA_qqplot.png"), width = 8, height = 8, units = 'in', res = 600)
par(mar=c(5,6,4,4))
plot(lexp,lobs,pch=20, cex=cex_point, xlim = c(0, 5), ylim = c(0, min_ncRNA_y),
xlab = expression(Expected ~ ~-log[10](italic(p))), ylab = expression(Observed ~ ~-log[10](italic(p))),
font.lab=2,cex.lab=2,cex.axis=2,font.axis=2)
abline(0, 1, col="red",lwd=2)
dev.off()
}
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.