MERIT: MastEr Regulator Inference Tool

message("initiate MERIT...")
source("R/params.R")
source("R/functions.R")
source("R/load_datasets.R")


#install_and_load_libraries()

l.data  =  load_datasets(filename.genes = "MERIT_athaliana_PMN_2017/athaliana_gene_expression/genes.txt",
                         filename.experiment_ids = "MERIT_athaliana_PMN_2017/athaliana_gene_expression/ids_differentialExpressionSamples.txt",
                         filename.foldChange_differentialExpression = "MERIT_athaliana_PMN_2017/athaliana_gene_expression/m.foldChange_differentialExpression.txt",
                         filename.pvalue_differentialExpression =	"MERIT_athaliana_PMN_2017/athaliana_gene_expression/m.pvalue_differentialExpression.txt",
                         filename.experiment_condition_tissue_annotation =	"MERIT_athaliana_PMN_2017/athaliana_gene_expression/experiment_annotation_He_et_al_2015.txt",
                         filename.transcriptionfactor_annotation = "MERIT_athaliana_PMN_2017/data/df.transcriptionFactorAnnotation.txt",
                         filename.geneGroups = "MERIT_athaliana_PMN_2017/data/df.enzymes_w_metabolic_domains.txt")


## run MERIT

b.load_grn_inference = "yes"
b.load_TFBS_inference = "yes"
b.load_treatment_tissue_inference = "yes"
m.foldChange_differentialExpression=l.data$m.foldChange_differentialExpression
m.pvalue_differentialExpression=l.data$m.pvalue_differentialExpression
df.experiment_condition_annotation=l.data$df.experiment_condition_annotation
tb.condition_treatments=l.data$tb.condition_treatments
tb.condition_tissues=l.data$tb.condition_tissues
df.transcriptionFactorAnnotation=l.data$df.transcriptionFactorAnnotation
df.geneGroups=l.data$df.geneGroups
tb.geneGroups=l.data$tb.geneGroups
v.geneGroups=l.data$v.geneGroups
l.geneGroups=l.data$l.geneGroups
n.cpus = 3
seed=1234
importance.measure="impurity"
n.trees=1000
n.lead_method_expression_shuffling = 1
n.bootstrap=100
n.stepsLARS=5
th.lead_grn_method = 0.95
th.support_grn_methods = 0.95
n.grnSupport = 1
file.TF_to_Motif_IDs = "MERIT_athaliana_PMN_2017/data/TF_to_Motif_IDs.txt"
file.TFBS_motifs = "MERIT_athaliana_PMN_2017/data/Transcription_factor_weight_matrix_Arabidopsis_thaliana.txt"
file.promoterSeq = "MERIT_athaliana_PMN_2017/data/TAIR10_upstream_1000_20101104.txt"
file.geneSeq = "MERIT_athaliana_PMN_2017/data/TAIR10_seq_20110103_representative_gene_model_updated.txt"
th.pre_tss = 1000
th.post_tss = 200
genome_nucleotide_distribution = c(0.3253439, 0.1746561, 0.1746561, 0.3253439)
th.pval.known_motifs = 0.05
th.diffexp = 0.05
th.pval.treatment = 0.05
th.pval.tissue = 0.05
th.min.samples = 1
s.multipleTestCorrection = "none"
th.min_number_targets = 2
th.min_number_MR_targets = 2
th.pval_masterRegulator = 0.05
foldername.tmp = "/tmp"
foldername.results = "/results"



if(!file.exists(foldername.tmp)){
  dir.create(foldername.tmp)
}

if(!file.exists(foldername.results)){
  dir.create(foldername.results)
}

#
# source("R/compute_ensemble_regression_with_montecarlo_based_stability_selection.R")
# l.res.grn = compute_ensemble_regression_with_montecarlo_based_stability_selection(m.foldChange_differentialExpression=m.foldChange_differentialExpression,
#                                                                                   df.transcriptionFactorAnnotation=df.transcriptionFactorAnnotation,
#                                                                                   df.geneGroups=df.geneGroups,
#                                                                                   seed=seed,
#                                                                                   importance.measure=importance.measure,
#                                                                                   n.trees=n.trees,
#                                                                                   n.lead_method_expression_shuffling = n.lead_method_expression_shuffling,
#                                                                                   n.bootstrap=n.bootstrap,
#                                                                                   n.stepsLARS=n.stepsLARS,
#                                                                                   n.cpus=n.cpus)


source("R/load_lead_support_grn.R")
l.res.grn = load_lead_support_grn(df.transcriptionFactorAnnotation=l.data$df.transcriptionFactorAnnotation,
                                  df.geneGroups = l.data$df.geneGroups,
                                  v.genes = rownames(l.data$m.foldChange_differentialExpression),
                                  targetSet = "all",
                                  th.lead_grn_method = th.lead_grn_method,
                                  n.lead_method_expression_shuffling = n.lead_method_expression_shuffling,
                                  th.support_grn_methods = th.support_grn_methods,
                                  n.grnSupport = n.grnSupport,
                                  folder="MERIT_athaliana_PMN_2017/tmp/")


source("R/transcriptionFactorBindingInference.R")
l.res.grn_tfbs = transcriptionFactorBindingInference(m.grn = l.res.grn$m.lead_support.grn ,
                                                     file.TF_to_Motif_IDs = file.TF_to_Motif_IDs,
                                                     file.TFBS_motifs = file.TFBS_motifs,
                                                     file.promoterSeq = file.promoterSeq,
                                                     file.geneSeq = file.geneSeq,
                                                     th.pre_tss = th.pre_tss,
                                                     th.post_tss = th.post_tss,
                                                     genome_nucleotide_distribution = genome_nucleotide_distribution,
                                                     th.pval.known_motifs=th.pval.known_motifs,
                                                     th.multipleHypothesisTest = "bonferroni",
                                                     b.load = b.load_TFBS_inference,
                                                     folder="MERIT_athaliana_PMN_2017/tmp/")




#
# source("R/annotate_links_with_treatments_and_tissues.R")
# l.res.link_annotation = annotate_links_with_treatments_and_tissues(m.lead_support_w_motif.grn=l.res.grn_tfbs$m.lead_suppport_w_motif.grn,
#                                                                    m.pvalue_differentialExpression=m.pvalue_differentialExpression,
#                                                                    df.experiment_condition_annotation=df.experiment_condition_annotation,
#                                                                    tb.condition_treatments=tb.condition_treatments,
#                                                                    tb.condition_tissues=tb.condition_tissues,
#                                                                    th.diffexp = th.diffexp,
#                                                                    th.pval.treatment = th.pval.treatment,
#                                                                    th.pval.tissue = th.pval.tissue,
#                                                                    th.min.samples = th.min.samples,
#                                                                    s.multipleTestCorrection = "none",
#                                                                    b.load = b.load_treatment_tissue_inference,
#                                                                    folder="MERIT_athaliana_PMN_2017/tmp/")
#
#
#
#
# l.res.MR_hierarchy = do_master_regulator_hierarchy_inference(m.grn = l.res.link_annotation$m.grn,
#                                                              l.grn_subnetworks = l.res.link_annotation$l.grn_subnetworks,
#                                                              df.transcriptionFactorAnnotation = df.transcriptionFactorAnnotation,
#                                                              df.geneGroups,
#                                                              tb.geneGroups,
#                                                              v.geneGroups,
#                                                              l.geneGroups,
#                                                              th.min_number_targets = th.min_number_targets,
#                                                              th.min_number_MR_targets = th.min_number_MR_targets,
#                                                              th.pval = th.pval_masterRegulator)
#
#








#
#
# if(b.run_statistics_and_figures){
#
#   prepare_results_and_figures(m.lead_suppport.grn,
#                               m.lead_suppport_w_motif.grn,
#                               l.res_ensemble_grn_based_on_stability_selection,
#                               l.res_master_regulator_hierarchy)
# }
#
#
#
# if(b.run_comparative_evaluation){
#
#   message("Performance evaluation on resulting networks ")
#   l.results = run_comparative_evaluation(l.grns)
#
#   print(l.results$df.comparativeEvaluation.total)
#   plot(l.results$plot_auroc_curves)
#   print(l.results$df.auroc)
#   plot(l.results$plot_auroc)
#
#
# }