README.md

M2D

The goal of M2D is to find Molecule(s) TO Disease by reverse trancsriptome.

Installation

You can install the development version of M2D from GitHub with:

# install.packages("devtools")
devtools::install_github("lishensuo/M2D")

Analysis pipeline

This is a basic analysis pipeline shows you how to use this package to find approprate molecule(s) for specific complex disease.

library(M2D)
## basic example code

Step1 : create m2d object

dir_disease = system.file("extdata",
                          "demo_BRCA_disease.csv",
                          package = "M2D")
disease = read.csv(dir_disease)
head(disease)
#>     Symbol    logFC  P.Value
#> 1   GPR146 2.741035 1.31e-08
#> 2 FAM189A2 2.849698 1.69e-08
#> 3    ENPP2 2.580100 2.82e-08
#> 4  CCDC178 2.690297 3.70e-08
#> 5   TSPAN7 3.328692 7.23e-08
#> 6     CD36 4.197532 9.23e-08
dir_molecule = system.file("extdata",
                           "demo_molecules_321_12320.csv",
                           package = "M2D")

molecules = data.table::fread(dir_molecule,
                              data.table = FALSE) %>%
  tibble::column_to_rownames("V1")
molecules[1:4,1:4]
#>       racecadotril   alfuzosin strychnine sulpiride
#> PSME1   0.20403610 -0.73459315  0.5252931   0.73325
#> ATF1    0.87755609 -0.20398252 -0.2814721   0.13225
#> RHEB    0.07486866 -0.08269265 -0.3476646   1.10205
#> FOXO3   0.29977050 -0.15295053 -0.0912619  -1.19990
m2dObj = list(disease=disease,
              molecule=molecules)

Step2 : find disease related (GOBP) pathway

# load pre-calculated result for saving time
#disease_pw_list <- find_disease_pw(m2dObj)
data("find_disease_pw_result")
disease_pw_list = find_disease_pw_result
disease_pw <- disease_pw_list$term_df
head(disease_pw)
#>         Term   Gene                       Description   weight size
#> 1 GO:0030198    A2M extracellular matrix organization 2.718282  393
#> 2 GO:0030198   ABL1 extracellular matrix organization 2.718282  393
#> 3 GO:0030198  ADAM8 extracellular matrix organization 2.718282  393
#> 4 GO:0030198 ADAM10 extracellular matrix organization 2.718282  393
#> 5 GO:0030198  AEBP1 extracellular matrix organization 2.718282  393
#> 6 GO:0030198   ACAN extracellular matrix organization 2.718282  393

#visualize pathways correlation
pwIDs <- unique(disease_pw$Term)
p = vis_pw_cor_ph(pwIDs)
#> 

#visualize weight of pathways
vis_pw_wt_radar(disease_pw_list)

Step2 : calculate RRS(Reverse Regulated Score) for each moleculs against above pathways

(1) merge degs of the disease and molecules

merge_pw_degs <- merge_pw_deg(m2dObj,
  disease_pw = disease_pw_list$term_df
)
head(merge_pw_degs)
#>         Term    Gene disease_logFC
#> 1 GO:0045765   ENPP2      2.580100
#> 2 GO:0002237    CD36      4.197532
#> 3 GO:2000377    CD36      4.197532
#> 4 GO:0030198 COL11A1     -6.448583
#> 5 GO:0001503 COL11A1     -6.448583
#> 6 GO:0001501 COL11A1     -6.448583
#>                                               Description   weight size
#> 1                              regulation of angiogenesis 1.680587  335
#> 2                response to molecule of bacterial origin 1.451683  346
#> 3 regulation of reactive oxygen species metabolic process 1.000000  192
#> 4                       extracellular matrix organization 2.718282  393
#> 5                                            ossification 1.365460  401
#> 6                             skeletal system development 1.008163  486
#>   size_disease     molecule  mol_logFC sign size_trans size_zero size_cis
#> 1           77 racecadotril -1.1681212   -1         34         6       37
#> 2           75 racecadotril -0.6960892   -1         30         9       36
#> 3           36 racecadotril -0.6960892   -1         17         2       17
#> 4          106 racecadotril -0.6138223    1         38        14       54
#> 5           96 racecadotril -0.6138223    1         34         8       54
#> 6          113 racecadotril -0.6138223    1         42        15       56

# visualization of Trans gene distribution between a molecule and the disease
sle_pw <- "GO:0001501"
sle_mol <- "CGP-20712"
vis_bell_term(
  score_RRS = merge_pw_degs,
  sle_pw = sle_pw,
  sle_mol = sle_mol
)

(2) calculate RRS(Reverse Regulated Score)

scores_RRS <- score_RRS(merge_pw_degs)
#> Joining, by = c("Term", "molecule")
head(scores_RRS)
#>         Term     molecule    Gene disease_logFC  mol_logFC size size_disease
#> 1 GO:0001501 racecadotril    ACAN    -0.3578067 -0.4739981  486          113
#> 2 GO:0001501 racecadotril  AKAP13     0.6688817 -0.4329134  486          113
#> 3 GO:0001501 racecadotril    ANKH    -1.6881350 -0.4016990  486          113
#> 4 GO:0001501 racecadotril ANKRD11     0.8630050 -0.2253753  486          113
#> 5 GO:0001501 racecadotril   ANXA6    -0.8165067  0.2647382  486          113
#> 6 GO:0001501 racecadotril  ARID5B     1.1655967 -0.1979710  486          113
#>   size_trans      RRS   RRS_sum                 Description   weight sign
#> 1         42 -21.6755 -2449.332 skeletal system development 1.008163    1
#> 2         42 -21.6755 -2449.332 skeletal system development 1.008163   -1
#> 3         42 -21.6755 -2449.332 skeletal system development 1.008163    1
#> 4         42 -21.6755 -2449.332 skeletal system development 1.008163   -1
#> 5         42 -21.6755 -2449.332 skeletal system development 1.008163   -1
#> 6         42 -21.6755 -2449.332 skeletal system development 1.008163   -1
#>   size_zero size_cis
#> 1        15       56
#> 2        15       56
#> 3        15       56
#> 4        15       56
#> 5        15       56
#> 6        15       56

#ternplot visualization of trans/cis/zero distribution
sle_pw <- "GO:0001501"
sle_mol <- "CGP-20712"
vis_tern_one(
  score_RRS = score_RRS_result,
  sle_pw = sle_pw,
  sle_mol = sle_mol
)
#> Registered S3 methods overwritten by 'ggtern':
#>   method           from   
#>   grid.draw.ggplot ggplot2
#>   plot.ggplot      ggplot2
#>   print.ggplot     ggplot2

Step3 : disease up/down geneset GSEA enrichment to molecules’ signature

enrich_double_list <- gsea_disease_set(
  m2dObj = m2dObj,
  demo = TRUE
)
enrich_signi <- enrich_double_list$gsea_sig
dim(enrich_signi)
#> [1] 73 14
enrich_signi[1:10,1:6]
#>                    Type      molecule disease_deg_set sign       pvalue
#> pantoprazole.UP    both  pantoprazole              UP   -1 2.547912e-06
#> flavokavain-b.UP   both flavokavain-b              UP   -1 8.558145e-06
#> practolol.DOWN     both     practolol            DOWN   -1 3.919290e-04
#> prazosin.UP        both      prazosin              UP   -1 5.907788e-03
#> PNU-282987.DOWN    both    PNU-282987            DOWN   -1 1.141806e-02
#> PNU-282987.UP      both    PNU-282987              UP   -1 1.282106e-02
#> flavokavain-b.DOWN both flavokavain-b            DOWN   -1 1.592946e-02
#> practolol.UP       both     practolol              UP   -1 2.038932e-02
#> pantoprazole.DOWN  both  pantoprazole            DOWN   -1 2.499728e-02
#> prazosin.DOWN      both      prazosin            DOWN   -1 3.427709e-02
#>                          NES
#> pantoprazole.UP    -1.696903
#> flavokavain-b.UP   -1.669828
#> practolol.DOWN      1.452859
#> prazosin.UP        -1.363451
#> PNU-282987.DOWN     1.314022
#> PNU-282987.UP      -1.313321
#> flavokavain-b.DOWN  1.278665
#> practolol.UP       -1.323774
#> pantoprazole.DOWN   1.255052
#> prazosin.DOWN       1.274183

#Barplot visualization: single/double gsea enrichment(Down geneset with negative NES or Up geneset with positive NES)
data("get_Formula_result") #see the follow  step
gsea_df <- enrich_double_list$gsea_sig
sle_mols <- get_Formula_result$formula1$molecule[1:10]
vis_bar_double_enrich(gsea_df = gsea_df, sle_mols = sle_mols)

Step4 : get 2 types of formula

formula_list <- get_Formula(
  score_RRS = scores_RRS,
  enrich_double = enrich_signi
)
#> Joining, by = "molecule"
#Type1: best global molecule rank for the disease
formula_1 = formula_list$formula1
head(formula_1)
#>          molecule  RRS_sum Rank   Type
#> 1    xanthoxyline 2791.099    1 single
#> 2         GW-6471 2107.564    5 single
#> 3       CGP-57380 2107.564    7 single
#> 4       ezetimibe 1879.720    8 single
#> 5 brompheniramine 1651.875   11 single
#> 6     alaproclate 1424.030   15 single
#Type2: appropriate molecule formula against all disease related pathway
formula_2 = formula_list$formula2
head(formula_2)
#>         Term   molecule       RRS   RRS_sum
#> 1 GO:0001501  CGP-20712 0.5040814  56.96120
#> 2 GO:0001503 securinine 2.7309205 262.16837
#> 3 GO:0001822 homosalate 1.8728313 146.08084
#> 4 GO:0002237 ornidazole 2.1775252 163.31439
#> 5 GO:0003158 securinine 2.0457444  81.82978
#> 6 GO:0030198  BRL-37344 2.7182818 288.13787

# chord visualization:  formula molecule against disease related pathways
sle_mols <- unique(formula_list$formula2$molecule)
vis_formula_RRS_chord(score_RRS = scores_RRS, sle_mols = sle_mols)

# chord visualization:  formula molecule against disease up/down geneset
vis_formula_gsea_chord(gsea_df = enrich_signi, sle_mols = sle_mols)

What is special about using README.Rmd instead of just README.md? You can include R chunks like so: You’ll still need to render README.Rmd regularly, to keep README.md up-to-date. devtools::build_readme() is handy for this. You could also use GitHub Actions to re-render README.Rmd every time you push. An example workflow can be found here: https://github.com/r-lib/actions/tree/v1/examples. In that case, don’t forget to commit and push the resulting figure files, so they display on GitHub and CRAN.



lishensuo/M2D documentation built on Jan. 4, 2022, 9:44 a.m.