run_MERIT: run algorithm

Description Usage Arguments Value

View source: R/functions.R View source: R/MERIT_functions.R

Description

run algorithm

Usage

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
run_MERIT(b.load_grn_inference = "yes", b.load_TFBS_inference = "yes",
  b.load_treatment_tissue_inference = "yes",
  m.foldChange_differentialExpression = m.foldChange_differentialExpression,
  m.pvalue_differentialExpression = m.pvalue_differentialExpression,
  df.experiment_condition_annotation = df.experiment_condition_annotation,
  tb.condition_treatments = tb.condition_treatments,
  tb.condition_tissues = tb.condition_tissues,
  df.transcriptionFactorAnnotation = df.transcriptionFactorAnnotation,
  df.geneGroups = df.geneGroups, tb.geneGroups = tb.geneGroups,
  v.geneGroups = v.geneGroups, l.geneGroups = l.geneGroups,
  n.cpus = 3, seed = 1234, importance.measure = "impurity",
  n.trees = 1000, n.lead_method_expression_shuffling = 1,
  n.bootstrap = 100, n.stepsLARS = 5, th.lead_grn_method = 0.95,
  th.support_grn_methods = 0.95, n.grnSupport = 1,
  file.TF_to_Motif_IDs = "data/TF_to_Motif_IDs.txt",
  file.TFBS_motifs = "data/Transcription_factor_weight_matrix_Arabidopsis_thaliana.txt",
  file.promoterSeq = "data/TAIR10_upstream_1000_20101104.txt",
  file.geneSeq = "data/TAIR10_seq_20110103_representative_gene_model_updated.txt",
  th.pre_tss = 1000, th.post_tss = 200,
  genome_nucleotide_distribution = c(A = 0.3253439, C = 0.1746561, G =
  0.1746561, T = 0.3253439), th.pval.known_motifs = 0.05,
  th.diffexp = 0.05, th.pval.treatment = 0.05, th.pval.tissue = 0.05,
  th.min.samples = 1, s.multipleTestCorrection = "none",
  th.min_number_targets = 2, th.min_number_MR_targets = 2,
  th.pval_masterRegulator = 0.05, foldername.tmp = "tmp/",
  foldername.results = "results/")

Arguments

b.load_grn_inference

("yes","no")

b.load_TFBS_inference

"yes","no")

b.load_treatment_tissue_inference

("yes","no")

df.transcriptionFactorAnnotation
seed

(default = 1234)

importance.measure

(default = "impurity")

n.trees

(default = 1000)

n.lead_method_expression_shuffling

(default = 1)

th.lead_grn_method

(default = 0.95)

th.support_grn_methods

(default = 0.95)

n.grnSupport

(default = 1)

file.TF_to_Motif_IDs

(default = "data/TF_to_Motif_IDs.txt")

file.TFBS_motifs

(default = "data/Transcription_factor_weight_matrix_Arabidopsis_thaliana.txt")

file.promoterSeq

(default = "data/TAIR10_upstream_1000_20101104.txt")

file.geneSeq

(default = "data/TAIR10_seq_20110103_representative_gene_model_updated.txt")

th.pre_tss

(default= 1000)

th.post_tss

(default = 200)

genome_nucleotide_distribution

ACGT distribution (default = c(0.3253439, 0.1746561, 0.1746561, 0.3253439),

th.pval.known_motifs

(default = 0.05)

th.diffexp

(default = 0.05)

th.pval.treatment

(default = 0.05)

th.pval.tissue

(default= 0.05)

th.min.samples

(default = 1)

s.multipleTestCorrection

(default = "none")

th.min_number_targets

(default = 2)

th.min_number_MR_targets

(default = 2)

th.pval_masterRegulator

(default = 0.05)

foldername.tmp

(default = "tmp/")

foldername.results

= (default = "results/")

nbootstrap

(default = =100)

nstepsLARS

(default = 5)

Value

a list of results from all 4 steps setwd(...) # set to dataset

l.data = load_datasets(filename.genes = "data/genes.txt", filename.experiment_ids = "data/experiment_ids.txt", filename.foldChange_differentialExpression = "data/m.foldChange_differentialExpression.txt", filename.pvalue_differentialExpression = "data/m.pvalue_differentialExpression.txt", filename.experiment_condition_tissue_annotation = "data/experiment_annotation.txt", filename.transcriptionfactor_annotation = "data/df.transcriptionFactorAnnotation.txt", filename.geneGroups = "data/df.enzymes_w_metabolic_domains.txt")

l.results = run_MERIT(b.load_grn_inference = "yes", b.load_TFBS_inference = "yes", b.load_treatment_tissue_inference = "yes", m.foldChange_differentialExpression=l.data$m.foldChange_differentialExpression, m.pvalue_differentialExpression=l.data$m.pvalue_differentialExpression, df.experiment_condition_annotation=l.data$df.experiment_condition_annotation, tb.condition_treatments=l.data$tb.condition_treatments, tb.condition_tissues=l.data$tb.condition_tissues, df.transcriptionFactorAnnotation=l.data$df.transcriptionFactorAnnotation, df.geneGroups=l.data$df.geneGroups, tb.geneGroups=l.data$tb.geneGroups, v.geneGroups=l.data$v.geneGroups, l.geneGroups=l.data$l.geneGroups, n.cpus = 3, seed=1234, importance.measure="impurity", n.trees=1000, n.lead_method_expression_shuffling = 1, n.bootstrap=100, n.stepsLARS=5, th.lead_grn_method = 0.95, th.support_grn_methods = 0.95, n.grnSupport = 1, file.TF_to_Motif_IDs = "data/TF_to_Motif_IDs.txt", file.TFBS_motifs = "data/Transcription_factor_weight_matrix_Arabidopsis_thaliana.txt", file.promoterSeq = "data/TAIR10_upstream_1000_20101104.txt", file.geneSeq = "data/TAIR10_seq_20110103_representative_gene_model_updated.txt", th.pre_tss = 1000, th.post_tss = 200, genome_nucleotide_distribution = c(A = 0.3253439, C = 0.1746561, G = 0.1746561, T = 0.3253439 ), th.pval.known_motifs = 0.05, th.diffexp = 0.05, th.pval.treatment = 0.05, th.pval.tissue = 0.05, th.min.samples = 1, s.multipleTestCorrection = "none", th.min_number_targets = 2, th.min_number_MR_targets = 2, th.pval_masterRegulator = 0.05, foldername.tmp = "tmp/", foldername.results = "results/")

# Step 1 - Gene regulatory network inference using ensemble regression with Monte Carlo based threshold selection l.res.grn = l.results$l.res.grn

# Step 2 - Transcription factor direct target promoter binding based filtering of gene regulatory link predictions l.res.grn_tfbs = l.results$l.res.grn_tfbs

# Step3 - Context specific annotation and filtering of gene regulatory link predictions l.res.link_annotation = l.results$l.res.link_annotation

# Step 4 - Master regulator hierarchy inference l.res.MR_hierarchy = l.results$l.res.MR_hierarchy

format_results(l.grn_subnetworks = l.res.link_annotation$l.grn_subnetworks, tb.condition_tissue_differentialExpression = l.res.link_annotation$tb.condition_tissue_differentialExpression, l.Hierarchy=l.res.MR_hierarchy$l.Hierarchy, l.Hierarchy_tfs_per_tier=l.res.MR_hierarchy$l.Hierarchy_tfs_per_tier, l.Hierarchy_nb_tfs_per_tier=l.res.MR_hierarchy$l.Hierarchy_nb_tfs_per_tier, l.df.masterRegulatorHierarchy=l.res.MR_hierarchy$l.df.masterRegulatorHierarchy, v.number_tiers=l.res.MR_hierarchy$v.number_tiers, m.MR_vs_conditions = l.res.MR_hierarchy$m.MR_vs_conditions, # A) TFs versus Conditions (Matrix plot) P(TF,C) l.MR_vs_geneGroups_given_condition = l.res.MR_hierarchy$l.MR_vs_geneGroups_given_condition, # B) per condition - TFs versus Domains (P(TF,D|C)) => also cumulative plot number_of_conditions_per_master_regulator=l.res.MR_hierarchy$number_of_conditions_per_master_regulator, tb.condition_treatments=l.data$tb.condition_treatments, tb.condition_tissues=l.data$tb.condition_tissues, df.transcriptionFactorAnnotation=l.data$df.transcriptionFactorAnnotation, df.geneGroups=l.data$df.geneGroups, tb.geneGroups=l.data$tb.geneGroups, v.geneGroups=l.data$v.geneGroups, l.geneGroups=l.data$l.geneGroups, th.pval = 0.05, foldername.results = "results/", file.subGeneGroups = "data/aracyc_pathways.20180327")


mbanf/MERIT documentation built on June 16, 2021, 1:07 p.m.