README.md
In lishensuo/M2D: find Molecule(s) to Disease by reverse transcriptome

M2D

The goal of M2D is to find Molecule(s) TO Disease by reverse trancsriptome.

You can install the development version of M2D from GitHub with:

# install.packages("devtools")
devtools::install_github("lishensuo/M2D")

This is a basic analysis pipeline shows you how to use this package to find approprate molecule(s) for specific complex disease.

library(M2D)
## basic example code

(BRCA)disease degs

dir_disease = system.file("extdata",
                          "demo_BRCA_disease.csv",
                          package = "M2D")
disease = read.csv(dir_disease)
head(disease)
#>     Symbol    logFC  P.Value
#> 1   GPR146 2.741035 1.31e-08
#> 2 FAM189A2 2.849698 1.69e-08
#> 3    ENPP2 2.580100 2.82e-08
#> 4  CCDC178 2.690297 3.70e-08
#> 5   TSPAN7 3.328692 7.23e-08
#> 6     CD36 4.197532 9.23e-08

(LINCS)molecules signature

dir_molecule = system.file("extdata",
                           "demo_molecules_321_12320.csv",
                           package = "M2D")

molecules = data.table::fread(dir_molecule,
                              data.table = FALSE) %>%
  tibble::column_to_rownames("V1")
molecules[1:4,1:4]
#>       racecadotril   alfuzosin strychnine sulpiride
#> PSME1   0.20403610 -0.73459315  0.5252931   0.73325
#> ATF1    0.87755609 -0.20398252 -0.2814721   0.13225
#> RHEB    0.07486866 -0.08269265 -0.3476646   1.10205
#> FOXO3   0.29977050 -0.15295053 -0.0912619  -1.19990

create a list of m2d object

m2dObj = list(disease=disease,
              molecule=molecules)

# load pre-calculated result for saving time
#disease_pw_list <- find_disease_pw(m2dObj)
data("find_disease_pw_result")
disease_pw_list = find_disease_pw_result

disease_pw <- disease_pw_list$term_df
head(disease_pw)
#>         Term   Gene                       Description   weight size
#> 1 GO:0030198    A2M extracellular matrix organization 2.718282  393
#> 2 GO:0030198   ABL1 extracellular matrix organization 2.718282  393
#> 3 GO:0030198  ADAM8 extracellular matrix organization 2.718282  393
#> 4 GO:0030198 ADAM10 extracellular matrix organization 2.718282  393
#> 5 GO:0030198  AEBP1 extracellular matrix organization 2.718282  393
#> 6 GO:0030198   ACAN extracellular matrix organization 2.718282  393

#visualize pathways correlation
pwIDs <- unique(disease_pw$Term)
p = vis_pw_cor_ph(pwIDs)
#>

#visualize weight of pathways
vis_pw_wt_radar(disease_pw_list)

(1) merge degs of the disease and molecules

merge_pw_degs <- merge_pw_deg(m2dObj,
  disease_pw = disease_pw_list$term_df
)
head(merge_pw_degs)
#>         Term    Gene disease_logFC
#> 1 GO:0045765   ENPP2      2.580100
#> 2 GO:0002237    CD36      4.197532
#> 3 GO:2000377    CD36      4.197532
#> 4 GO:0030198 COL11A1     -6.448583
#> 5 GO:0001503 COL11A1     -6.448583
#> 6 GO:0001501 COL11A1     -6.448583
#>                                               Description   weight size
#> 1                              regulation of angiogenesis 1.680587  335
#> 2                response to molecule of bacterial origin 1.451683  346
#> 3 regulation of reactive oxygen species metabolic process 1.000000  192
#> 4                       extracellular matrix organization 2.718282  393
#> 5                                            ossification 1.365460  401
#> 6                             skeletal system development 1.008163  486
#>   size_disease     molecule  mol_logFC sign size_trans size_zero size_cis
#> 1           77 racecadotril -1.1681212   -1         34         6       37
#> 2           75 racecadotril -0.6960892   -1         30         9       36
#> 3           36 racecadotril -0.6960892   -1         17         2       17
#> 4          106 racecadotril -0.6138223    1         38        14       54
#> 5           96 racecadotril -0.6138223    1         34         8       54
#> 6          113 racecadotril -0.6138223    1         42        15       56

# visualization of Trans gene distribution between a molecule and the disease
sle_pw <- "GO:0001501"
sle_mol <- "CGP-20712"
vis_bell_term(
  score_RRS = merge_pw_degs,
  sle_pw = sle_pw,
  sle_mol = sle_mol
)

(2) calculate RRS(Reverse Regulated Score)

scores_RRS <- score_RRS(merge_pw_degs)
#> Joining, by = c("Term", "molecule")
head(scores_RRS)
#>         Term     molecule    Gene disease_logFC  mol_logFC size size_disease
#> 1 GO:0001501 racecadotril    ACAN    -0.3578067 -0.4739981  486          113
#> 2 GO:0001501 racecadotril  AKAP13     0.6688817 -0.4329134  486          113
#> 3 GO:0001501 racecadotril    ANKH    -1.6881350 -0.4016990  486          113
#> 4 GO:0001501 racecadotril ANKRD11     0.8630050 -0.2253753  486          113
#> 5 GO:0001501 racecadotril   ANXA6    -0.8165067  0.2647382  486          113
#> 6 GO:0001501 racecadotril  ARID5B     1.1655967 -0.1979710  486          113
#>   size_trans      RRS   RRS_sum                 Description   weight sign
#> 1         42 -21.6755 -2449.332 skeletal system development 1.008163    1
#> 2         42 -21.6755 -2449.332 skeletal system development 1.008163   -1
#> 3         42 -21.6755 -2449.332 skeletal system development 1.008163    1
#> 4         42 -21.6755 -2449.332 skeletal system development 1.008163   -1
#> 5         42 -21.6755 -2449.332 skeletal system development 1.008163   -1
#> 6         42 -21.6755 -2449.332 skeletal system development 1.008163   -1
#>   size_zero size_cis
#> 1        15       56
#> 2        15       56
#> 3        15       56
#> 4        15       56
#> 5        15       56
#> 6        15       56

#ternplot visualization of trans/cis/zero distribution
sle_pw <- "GO:0001501"
sle_mol <- "CGP-20712"
vis_tern_one(
  score_RRS = score_RRS_result,
  sle_pw = sle_pw,
  sle_mol = sle_mol
)
#> Registered S3 methods overwritten by 'ggtern':
#>   method           from   
#>   grid.draw.ggplot ggplot2
#>   plot.ggplot      ggplot2
#>   print.ggplot     ggplot2

enrich_double_list <- gsea_disease_set(
  m2dObj = m2dObj,
  demo = TRUE
)
enrich_signi <- enrich_double_list$gsea_sig
dim(enrich_signi)
#> [1] 73 14
enrich_signi[1:10,1:6]
#>                    Type      molecule disease_deg_set sign       pvalue
#> pantoprazole.UP    both  pantoprazole              UP   -1 2.547912e-06
#> flavokavain-b.UP   both flavokavain-b              UP   -1 8.558145e-06
#> practolol.DOWN     both     practolol            DOWN   -1 3.919290e-04
#> prazosin.UP        both      prazosin              UP   -1 5.907788e-03
#> PNU-282987.DOWN    both    PNU-282987            DOWN   -1 1.141806e-02
#> PNU-282987.UP      both    PNU-282987              UP   -1 1.282106e-02
#> flavokavain-b.DOWN both flavokavain-b            DOWN   -1 1.592946e-02
#> practolol.UP       both     practolol              UP   -1 2.038932e-02
#> pantoprazole.DOWN  both  pantoprazole            DOWN   -1 2.499728e-02
#> prazosin.DOWN      both      prazosin            DOWN   -1 3.427709e-02
#>                          NES
#> pantoprazole.UP    -1.696903
#> flavokavain-b.UP   -1.669828
#> practolol.DOWN      1.452859
#> prazosin.UP        -1.363451
#> PNU-282987.DOWN     1.314022
#> PNU-282987.UP      -1.313321
#> flavokavain-b.DOWN  1.278665
#> practolol.UP       -1.323774
#> pantoprazole.DOWN   1.255052
#> prazosin.DOWN       1.274183

#Barplot visualization: single/double gsea enrichment(Down geneset with negative NES or Up geneset with positive NES)
data("get_Formula_result") #see the follow  step
gsea_df <- enrich_double_list$gsea_sig
sle_mols <- get_Formula_result$formula1$molecule[1:10]
vis_bar_double_enrich(gsea_df = gsea_df, sle_mols = sle_mols)

filter Step2 RRS result by Step3 gsea result

formula_list <- get_Formula(
  score_RRS = scores_RRS,
  enrich_double = enrich_signi
)
#> Joining, by = "molecule"

#Type1: best global molecule rank for the disease
formula_1 = formula_list$formula1
head(formula_1)
#>          molecule  RRS_sum Rank   Type
#> 1    xanthoxyline 2791.099    1 single
#> 2         GW-6471 2107.564    5 single
#> 3       CGP-57380 2107.564    7 single
#> 4       ezetimibe 1879.720    8 single
#> 5 brompheniramine 1651.875   11 single
#> 6     alaproclate 1424.030   15 single

#Type2: appropriate molecule formula against all disease related pathway
formula_2 = formula_list$formula2
head(formula_2)
#>         Term   molecule       RRS   RRS_sum
#> 1 GO:0001501  CGP-20712 0.5040814  56.96120
#> 2 GO:0001503 securinine 2.7309205 262.16837
#> 3 GO:0001822 homosalate 1.8728313 146.08084
#> 4 GO:0002237 ornidazole 2.1775252 163.31439
#> 5 GO:0003158 securinine 2.0457444  81.82978
#> 6 GO:0030198  BRL-37344 2.7182818 288.13787

# chord visualization:  formula molecule against disease related pathways
sle_mols <- unique(formula_list$formula2$molecule)
vis_formula_RRS_chord(score_RRS = scores_RRS, sle_mols = sle_mols)

# chord visualization:  formula molecule against disease up/down geneset
vis_formula_gsea_chord(gsea_df = enrich_signi, sle_mols = sle_mols)

What is special about using README.Rmd instead of just README.md? You can include R chunks like so: You’ll still need to render README.Rmd regularly, to keep README.md up-to-date. devtools::build_readme() is handy for this. You could also use GitHub Actions to re-render README.Rmd every time you push. An example workflow can be found here: https://github.com/r-lib/actions/tree/v1/examples. In that case, don’t forget to commit and push the resulting figure files, so they display on GitHub and CRAN.