R/ttd_check.R

Defines functions ttd_check

Documented in ttd_check

#' Gene therapy targets
#'
#' Identify the overlap between your prioritised list of gene therapy targets
#' and currently existing gene therapy targets that are currently on the market
#' or are in clinical trials. Uses data from the
#' \href{https://db.idrblab.net/ttd/full-data-download}{
#' Therapeutic Target Database}.
#' @param top_targets Top targets generated by
#'  \link[MSTExplorer]{prioritise_targets}.
#' @param drug_types Filter results by drug type.
#' @param keep_status Filter results by drug approval status.
#' @param failed_status Drug approval status categories that indicate
#' the drug failed.
#' @param remove_status Remove results by drug approval status.
#' @param as_percent Show the plot's y-axis as a percentage within each
#'  x-axis group (TRUE).
#'  Otherwise, show y-axis as the number of drugs within that x-axis group.
#' @param base_size Base size for ggplot2 theme.
#' @param label_size Size of labels in plot.
#' @inheritParams plot_
#' @inheritParams load_example_ctd
#' @inheritParams data.table::merge.data.table
#' @inheritParams KGExplorer::get_ttd
#' @inheritParams HPOExplorer::add_disease
#'
#' @export
#' @importFrom utils tail
#' @examples
#' top_targets <- MSTExplorer::example_targets$top_targets
#' res <- ttd_check(top_targets=top_targets,
#'                  run_map_genes=FALSE)
ttd_check <- function(top_targets,
                      drug_types = NULL,
                      failed_status = c(
                        "Terminated",
                        "Withdrawn from market",
                        "Discontinued*",
                        NA
                      ),
                      keep_status = NULL,
                      remove_status=c(NA),
                      allow.cartesian = FALSE,
                      run_map_genes = FALSE,
                      add_descriptions = FALSE,
                      force_new=FALSE,
                      as_percent = TRUE,
                      base_size = 9,
                      label_size = 2,
                      show_plot = TRUE,
                      save_path = NULL,
                      height=NULL,
                      width=NULL,
                      phenotype_to_genes=HPOExplorer::load_phenotype_to_genes()
                      ){
  # top_targets <- prioritise_targets()$top_targets
  # drug_types <- c("Gene therapy"
    # "Antisense drug",
    # "Antisense oligonucleotide",
    # "Aptamer",
    # "Combination drug (Antisense drug)",
    # "CAR T Cell Therapy",
    # "CAR T Cell Therapy (Dual specific)",
    # "CAR-NK Cell therapy",
    # "CAR-NKT Cell therapy",
    # "CAR-PBMC Cell therapy",
    # "mRNA therapy",
    # "RNAi therapeutics",
    # "Short hairpin RNA",
    # "siRNA drug",
    # "Small activating RNA",
    # "Small interfering RNA",
    # "TCR-T cell therapy",
    # "Peptide",
    # "Protein",
    # "Protein/peptide",
    # "Protein/peptide drug"
  # )

  TARGETID <- DRUGNAME <- DRUGTYPE <- DRUGID <-
    GENENAME2 <- GENENAME3 <- prioritised <- HIGHEST_STATUS <- NULL;

  {
    top_targets <- HPOExplorer::add_disease(top_targets,
                                            add_descriptions = add_descriptions)
    ttdi <- KGExplorer::get_ttd(force_new = force_new,
                                run_map_genes = run_map_genes)
    #### Remove results that can't be linked to specific genes #####
    dat_sub <- ttdi$merged[!is.na(TARGETID) &
                           !is.na(GENENAME2) &
                           GENENAME2!="",]
    #### Create baseline ####
    # Must be computed BEFORE drug_types filtering
    baseline <- dat_sub[,list(
      baseline_prop=sum(GENENAME3 %in% unique(phenotype_to_genes$gene_symbol))/.N
    ), by=c("HIGHEST_STATUS")]
    #### Filter by drug type ####
    if(!is.null(drug_types)){
      dat_sub <- dat_sub[
        tolower(DRUGNAME) %in% tolower(drug_types) |
          tolower(DRUGTYPE) %in% tolower(drug_types) |
          grepl(paste(drug_types,collapse = "|"),DRUGNAME,ignore.case = TRUE) |
          grepl(paste(drug_types,collapse = "|"),DRUGTYPE,ignore.case = TRUE),
      ]
    }
    #### Filter by status ####
    if(!is.null(keep_status)){
      dat_sub <- dat_sub[HIGHEST_STATUS %in% keep_status,]
    }
    if(!is.null(remove_status)){
      dat_sub <- dat_sub[!HIGHEST_STATUS %in% remove_status,]
    }
    dat_sub[,failed:=(
      tolower(HIGHEST_STATUS) %in% tolower(failed_status) |
        (grepl(paste(failed_status,collapse = "|"), HIGHEST_STATUS,
               ignore.case = TRUE))
    )]
    #### Filter to only those in top_targets ####
    dat_sub2 <- merge(
      dat_sub[failed==FALSE],
      top_targets,
      allow.cartesian = allow.cartesian,
      by.x = "GENENAME3",
      by.y = "gene_symbol")
    cols <- intersect(names(dat_sub2),
                      c("GENENAME2","TARGETID","TARGNAME",
                        "INDICATI","DRUGID","DRUGNAME",
                        "HIGHEST_STATUS",
                        "disease_name","disease_id","hpo_name",
                        "CellType","ontLvl"))
    dat_sub2 <- dat_sub2[,cols, with=FALSE] |> unique()
    #### Count proportion of drugs that our analyses captured ####
    pct_captured <- length(unique(dat_sub2$DRUGID)) /
      length(unique(dat_sub$DRUGID))*100
    dat_sub[,prioritised:=(DRUGID %in% dat_sub2$DRUGID)]
    #### Hypergeometric test ####
    fail <- dat_sub[failed==TRUE,drop=FALSE]
    notfail <- dat_sub[failed==FALSE,drop=FALSE]
    ttd_hypergeo_out <- ttd_hypergeo(fail=fail,
                                     notfail=notfail,
                                     top_targets=top_targets,
                                     p2g=phenotype_to_genes)
  }
  #### Plot ####
  plt <- plot_ttd(dat_sub = dat_sub,
                  as_percent = as_percent,
                  baseline = baseline,
                  base_size = base_size,
                  label_size = label_size)
  #### Show ####
  if(isTRUE(show_plot)) methods::show(plt)
  #### Save ####
  KGExplorer::plot_save(plt = plt,
                        save_path=save_path,
                        height=height,
                        width=width)
  #### Return ####
  return(
    list(data=dat_sub,
         data_overlap=dat_sub2,
         pct_captured,
         plot=plt,
         ttd_hypergeo_out=ttd_hypergeo_out)
  )
}
neurogenomics/MultiEWCE documentation built on April 17, 2025, 9:27 p.m.