R/utils.R

Defines functions file_is_writable add_excel_sheet validate_db_df validate_db validate_query_genes

Documented in validate_db validate_db_df

validate_query_genes <- function(qgenes,
                               q_id_type = "symbol",
                               qtype = "target",
                               ignore_id_err = F,
                               genedb = NULL,
                               transcript_xref = NULL) {

  lgr::lgr$appenders$console$set_layout(
    lgr::LayoutFormat$new(timestamp_fmt = "%Y-%m-%d %T"))

  stopifnot(!is.null(q_id_type))
  stopifnot(is.character(qgenes))
  stopifnot(q_id_type == "symbol" |
              q_id_type == "entrezgene" |
              q_id_type == "ensembl_mrna" |
              q_id_type == "ensembl_protein" |
              q_id_type == "refseq_protein" |
              q_id_type == "refseq_mrna" |
              q_id_type == "uniprot_acc" |
              q_id_type == "ensembl_gene")
  stopifnot(is.character(qgenes))
  qgenes <- qgenes[!is.na(qgenes)]
  stopifnot(!is.null(genedb))
  stopifnot(!is.null(transcript_xref))
  validate_db_df(genedb, dbtype = "genedb")
  validate_db_df(transcript_xref, dbtype = "transcript_xref")

  alias2entrez <-
    transcript_xref |>
    dplyr::filter(.data$property == "alias") |>
    dplyr::rename(alias = "value") |>
    dplyr::select(-c("property"))

  target_genes <- data.frame(
    'qid' = unique(qgenes),
    stringsAsFactors = F)

  target_genes$qid <- stringr::str_trim(
    target_genes$qid
  )

  if (q_id_type == "entrezgene") {
    target_genes$qid <- as.integer(target_genes$qid)
  }

  if (q_id_type == "ensembl_gene" |
      q_id_type == "ensembl_protein" |
      q_id_type == "ensembl_mrna" |
      q_id_type == "refseq_mrna" |
      q_id_type == "refseq_peptide") {
    target_genes$qid <- stringr::str_replace(
      target_genes$qid, "\\.[0-9]{1,}","")
  }


  gdb <- genedb |>
    dplyr::select(c("symbol",
                  "entrezgene",
                  "name")) |>
    dplyr::distinct()

  queryset <- list()
  queryset[['found']] <- data.frame()
  queryset[['not_found']] <- data.frame()
  queryset[['all']] <- data.frame()
  queryset[['match_status']] <- "perfect_go"

  qtype_id <- 'symbol'
  if (q_id_type == 'entrezgene' |
     q_id_type == 'symbol'){

    qtype_id <- q_id_type

    target_genes <- target_genes |>
      dplyr::left_join(gdb,
                       by = c("qid" = qtype_id), relationship = "many-to-many") |>
      dplyr::mutate(!!rlang::sym(qtype_id) := qid) |>
      dplyr::distinct()

  } else {

    qtype_id <- q_id_type
    if (q_id_type == "refseq_protein") {
      qtype_id <- "refseq_peptide"
    }
    if (q_id_type == "ensembl_mrna") {
      qtype_id <- "ensembl_transcript_id"
    }
    if (q_id_type == "ensembl_protein") {
      qtype_id <- "ensembl_protein_id"
    }
    if (q_id_type == "ensembl_gene") {
      qtype_id <- "ensembl_gene_id"
    }

    gene_xref_map <-
      transcript_xref |>
      dplyr::filter(.data$property == qtype_id) |>
      dplyr::rename(!!rlang::sym(qtype_id) := value) |>
      dplyr::select(c("entrezgene"), rlang::sym(qtype_id))

    target_genes <- as.data.frame(
      target_genes |>

        ## map query to entrezgene
        dplyr::left_join(
          gene_xref_map,
          by = c("qid" = qtype_id), relationship = "many-to-many") |>
        dplyr::mutate(!!rlang::sym(qtype_id) := qid) |>
        ## append other gene annotations
        dplyr::left_join(gdb, by = c("entrezgene"), relationship = "many-to-many") |>
        dplyr::distinct() |>
        dplyr::group_by(
          .data$symbol,
          .data$entrezgene,
          .data$name) |>
        dplyr::summarise(
          !!rlang::sym(qtype_id) := paste(
            !!rlang::sym(qtype_id), collapse = ","),
          qid = paste(.data$qid, collapse = ","),
          .groups = "drop")
    )

  }

  queryset[['found']] <- target_genes |>
    dplyr::filter(!is.na(.data$symbol) &
                    !is.na(.data$entrezgene)) |>
    dplyr::mutate(alias = F)

  queryset[['not_found']] <- target_genes |>
    dplyr::filter(is.na(.data$symbol) |
                    is.na(.data$entrezgene))

  if (nrow(queryset[['not_found']]) > 0) {
    if (ignore_id_err == T) {
      queryset[['match_status']] <- "imperfect_go"

      if (q_id_type == 'symbol') {
        lgr::lgr$info( paste0("WARNING: ", qtype, " gene identifiers NOT found as primary symbols: ",paste0(queryset[['not_found']]$qid, collapse = ", ")))
        lgr::lgr$info( paste0("Trying to map ", qtype, " gene identifiers as gene aliases/synonyms: ",paste0(queryset[['not_found']]$qid, collapse = ", ")))

        query_as_alias <-
          dplyr::inner_join(
            dplyr::select(queryset[['not_found']], c("qid")),
            alias2entrez,
            by = c("qid" = "alias"), relationship = "many-to-many")

        ## Check that alias is not an alias for existing query entries (found)
        ## anti_join against found entries

        if (nrow(query_as_alias) > 0) {

          if (nrow(queryset[['found']]) > 0) {
            query_as_alias <- query_as_alias |>
              dplyr::anti_join(queryset[['found']],
                               by = "entrezgene") |>
              dplyr::distinct()
          }
          if (nrow(query_as_alias) > 0) {

            query_as_alias <- query_as_alias |>
              dplyr::left_join(gdb,
                               by = "entrezgene", relationship = "many-to-many") |>
              dplyr::distinct() |>
              dplyr::mutate(alias = T)

            lgr::lgr$info(
              paste0("Mapped query identifiers as gene aliases ",
                     paste0(query_as_alias$qid, collapse = ", ")," ---> ",
                     paste0(query_as_alias$symbol, collapse = ", ")))

            queryset[['found']] <-
              dplyr::bind_rows(queryset[['found']], query_as_alias)
            queryset[['not_found']] <- queryset[['not_found']] |>
              dplyr::anti_join(query_as_alias, by = "qid")

            if (nrow(queryset[['not_found']]) > 0) {
              lgr::lgr$warn(
                paste0(stringr::str_to_title(qtype)," gene identifiers NOT found: ",
                       paste0(queryset[['not_found']]$qid, collapse = ", "),
                       " (make sure that unambiguous primary identifiers/symbols are used)"))
            } else {
              queryset[['match_status']] <- "perfect_go"
            }
          }
        } else {
          lgr::lgr$warn(
            paste0("Query gene identifiers NOT found: ",
                   paste0(queryset[['not_found']]$qid, collapse = ", "),
                   " (make sure that unambiguous primary identifiers/symbols are used)"))
        }

      } else {
        lgr::lgr$warn(paste0(
          stringr::str_to_title(qtype), " gene identifiers NOT found: ",
          paste0(queryset[['not_found']]$qid, collapse = ", ")))
      }

      ## Indicate that processing should stop when encountering invalid query identifiers
    } else {
      queryset[['match_status']] <- "imperfect_stop"

      if (q_id_type == 'symbol') {
        lgr::lgr$warn( paste0("WARNING: ", qtype, " gene identifiers NOT found as primary symbols: ",paste0(queryset[['not_found']]$qid, collapse = ", ")))
        lgr::lgr$info( paste0("Trying to map ", qtype, " gene identifiers as gene aliases/synonyms: ",paste0(queryset[['not_found']]$qid, collapse = ", ")))

        query_as_alias <-
          dplyr::inner_join(
            dplyr::select(queryset[['not_found']], c("qid")),
            alias2entrez,
            by = c("qid" = "alias"), relationship = "many-to-many")

        if (nrow(query_as_alias) > 0) {
          query_as_alias <- query_as_alias |>
            dplyr::left_join(gdb,
                             by = "entrezgene", relationship = "many-to-many") |>
            dplyr::distinct() |>
          dplyr::mutate(alias = T)


          lgr::lgr$info(
            paste0("Mapped ", qtype, " gene identifiers as gene aliases ",
                   paste0(query_as_alias$qid, collapse = ", ")," ---> ",
                   paste0(query_as_alias$symbol, collapse = ", ")))

          queryset[['found']] <-
            dplyr::bind_rows(queryset[['found']], query_as_alias)
          queryset[['not_found']] <- queryset[['not_found']] |>
            dplyr::anti_join(query_as_alias, by = "qid")

          if (nrow(queryset[['not_found']]) > 0) {
            lgr::lgr$info(
              paste0("ERROR: ", qtype, " gene identifiers NOT found: ",
                     paste0(queryset[['not_found']]$qid, collapse = ", "),
                     " (make sure that unambiguous primary identifiers/symbols are used)"))
          } else {
            queryset[['match_status']] <- "perfect_go"

          }
        } else {
          lgr::lgr$info( paste0("ERROR: ", qtype, " gene identifiers NOT found: ",
                         paste0(queryset[['not_found']]$qid, collapse = ", "),
                         " (make sure that unambiguous primary identifiers/symbols are used)"))

        }
      }
      else{
        lgr::lgr$info( paste0("ERROR: ", qtype, " gene identifiers NOT found: ",
                       paste0(queryset[['not_found']]$qid, collapse = ", "),
                       " (make sure that unambiguous primary identifiers/symbols are used)"))
      }
    }
  }
  if (nrow(queryset[['found']]) == length(qgenes)) {
    lgr::lgr$info( paste0('SUCCESS: Identified all genes (n = ',
                             nrow(queryset[['found']]),') in ',qtype,' set'))
  } else {
    if (nrow(queryset[['found']]) == 0) {
      lgr::lgr$info( paste0(
        "ERROR: NO ", qtype, " gene identifiers found: ",
        paste0(target_genes$qid, collapse = ", "),
        " - wrong query_id_type (",q_id_type,")?","\n"))
      queryset[['match_status']] <- "imperfect_stop"
    } else {
      lgr::lgr$info(
        paste0('Identified n = ',
               nrow(queryset[['found']]),' entries in ', qtype,
               ' set (n = ',
               nrow(queryset[['not_found']]),' invalid entries)'))
    }

  }

  if (nrow(queryset[['found']]) > 0) {
    queryset[['found']] <- queryset[['found']] |>
      dplyr::mutate(status = 'found') |>
      dplyr::mutate(
        status = dplyr::if_else(
          .data$status == "found" & .data$alias == T,
          as.character("found_as_alias"),
          as.character(.data$status))
      ) |>
      dplyr::arrange(dplyr::desc(.data$status), .data$symbol) |>
      dplyr::mutate(
        genename = paste0("<a href='https://www.ncbi.nlm.nih.gov/gene/",
                          .data$entrezgene,"' target='_blank'>",.data$name,"</a>")
      )
  }
  if (nrow(queryset[['not_found']]) > 0) {
    queryset[['not_found']] <- queryset[['not_found']] |>
      dplyr::mutate(status = 'not_found') |>
      dplyr::mutate(genename = NA) |>
      dplyr::arrange(.data$qid)
  }

  if (nrow(queryset[['found']]) > 0 | nrow(queryset[['not_found']]) > 0) {

    queryset[['all']] <- as.data.frame(
      queryset[['not_found']] |>
      dplyr::bind_rows(queryset[['found']]) |>
      dplyr::rename(query_id = "qid") |>
      dplyr::select(c("query_id",
                    "status",
                    "symbol",
                    "genename")) |>
        dplyr::rowwise() |>
        dplyr::mutate(
          symbol = dplyr::if_else(
            .data$status == "not_found",
            as.character(NA),
            as.character(.data$symbol)))
    )
  }

  queryset[['found']]$qid <- NULL
  queryset[['not_found']]$qid <- NULL


  return(queryset)

}

#' Function that validates the oncoEnrichR database object
#'
#' @param db list object with annotation data for oncoEnrichR
#'
#' @keywords internal
#'
validate_db <- function(oe_db) {

  ## check that db is of list type

  db_entries <-
    c("cancerdrugdb",
      "genedb",
      "hpa",
      "ligandreceptordb",
      "otdb",
      "pathwaydb",
      "pfamdb",
      "depmapdb",
      "survivaldb",
      "release_notes",
      "slparalogdb",
      "subcelldb",
      "tcgadb",
      "tftargetdb",
      "tissuecelldb")

  for (db in db_entries) {
    if (!(db %in% names(oe_db))) {
      lgr::lgr$info(paste0("ERROR: '",db,"' NOT found in oncoEnrichR db object"))
      return(-1)
    }

  }
  return(0)
}

#' Function that validates a particular db object (data.frame) in oncoEnrichR
#'
#' @param df data.frame with annotation data for oncoEnrichR
#' @param dbtype type of oncoEnrichR datasource
#'
#' @keywords internal
#'

validate_db_df <- function(df, dbtype = "genedb") {

  val <- assertthat::validate_that(
    is.data.frame(df)
  )
  if (!is.logical(val)) {
    stop(val)
  }

  dbtypes <- c("genedb",
               "hpadb",
               "tcga_aberration",
               "tcga_diagnosis_code",
               "tcga_site_code",
               "tcga_clinical_strata_code",
               "tcga_coexpression",
               "tcga_recurrent_variants",
               "protein_complex",
               "protein_domain",
               "dorothea",
               "ligand_receptor_db",
               "ligand_receptor_xref",
               "transcript_xref",
               "biogrid",
               "compartments",
               "oeDB",
               "tf_target_interactions",
               "go_gganatogram",
               "opentarget_disease_assoc",
               "opentarget_disease_site_rank",
               "enrichment_db_hpa_singlecell",
               "enrichment_db_hpa_tissue",
               "ppi_nodes",
               "slparalog",
               "fitness_scores",
               "target_priority_scores",
               "survival_km_cshl",
               "ppi_edges")
  if (!(dbtype %in% dbtypes)) {
    stop(
      paste0("dbtype '",dbtype,
             "' not recognized, possible s are: ",
             paste(sort(dbtypes), collapse = ", ")))
  }

  if (dbtype == "genedb") {
    cols <- c('symbol',
              'entrezgene',
              'oncogene',
              'oncogene_confidence_level',
              'tumor_suppressor',
              'tsg_confidence_level',
              'cancer_driver',
              'hgnc_id',
              'tsg_support',
              'oncogene_support',
              'driver_support',
              'cancergene_evidence',
              'go_term_link',
              'name',
              'gene_summary',
              'gene_biotype',
              'AB_tractability_category',
              'AB_tractability_support',
              'SM_tractability_category',
              'SM_tractability_support',
              'genename',
              'targeted_cancer_drugs_lp',
              'targeted_cancer_drugs_ep',
              'approved_drugs',
              'num_go_terms',
              'cancer_max_rank',
              'unknown_function_rank',
              'has_gene_summary')
  }

  if (dbtype == "opentarget_disease_assoc") {
    cols <- c("disease_efo_id",
              "direct_ot_association",
              "ot_association_score",
              "ot_datatype_support",
              "ensembl_gene_id",
              "efo_name",
              "primary_site",
              "cancer_phenotype",
              "ot_link")
  }

  if (dbtype == "biogrid") {
    cols <- c("entrezgene_A",
              "entrezgene_B",
              "method",
              "pmid",
              "throughput")
  }

  if (dbtype == "tcga_aberration") {
    cols <- c("symbol",
              "variant_type",
              "samples_mutated",
              "tot_samples",
              "percent_mutated",
              "percentile",
              "decile",
              "clinical_strata_code",
              "diagnosis_code",
              "site_code")
  }

  if (dbtype == "slparalog") {
    cols <- c("entrezgene_A1",
              "symbol_A1",
              "entrezgene_A2",
              "symbol_A2",
              "prediction_score",
              "prediction_percentile",
              "sequence_identity_pct",
              "family_size",
              "conservation_score",
              "ppi_overlap")
  }

  if (dbtype == "tcga_coexpression") {
    cols <- c("symbol",
              "symbol_partner",
              "r",
              "p_value",
              "tumor",
              "tumor_suppressor",
              "oncogene",
              "cancer_driver")
  }

  if (dbtype == "tcga_diagnosis_code") {
    cols <- c("primary_diagnosis",
              "diagnosis_code")
  }

  if (dbtype == "tcga_site_code") {
    cols <- c("primary_site",
              "site_code")
  }

  if (dbtype == "tcga_clinical_strata_code") {
    cols <- c("clinical_strata",
              "clinical_strata_code")
  }


  if (dbtype == "opentarget_disease_site_rank") {
    cols <- c("primary_site",
              "ensembl_gene_id",
              "tissue_assoc_score",
              "tissue_assoc_rank",
              "global_assoc_score",
              "global_assoc_rank")
  }

  if (dbtype == "enrichment_db_hpa_singlecell") {
    cols <- c("ensembl_gene_id",
              "category",
              "cell_type")
  }

  if (dbtype == "enrichment_db_hpa_tissue") {
    cols <- c("ensembl_gene_id",
              "category",
              "tissue")
  }

  if (dbtype == "hpadb") {
    cols <- c("ensembl_gene_id",
              "property",
              "value")
  }


  if (dbtype == "tf_target_interactions") {
    cols = c("queryset_overlap",
                 "literature_support",
                 "interaction_sources",
                 "regulator_cancer_max_rank",
                 "target_cancer_max_rank",
                 "regulator",
                 "regulator_name",
                 "target",
                 "target_name",
                 "confidence_level",
                 "mode_of_regulation")
  }


  if (dbtype == "go_gganatogram") {
    cols <- c("ggcompartment",
                  "go_id")
  }
  if (dbtype == "protein_complex") {
    cols <- c('complex_id',
              'complex_name',
              'purification_method',
              'complex_comment',
              'disease_comment',
              'sources',
              'confidence',
              'complex_literature',
              'complex_literature_support')
  }
  if (dbtype == "protein_domain") {
    cols <- c('uniprot_acc',
              'pfam_id',
              'pfam_short_name',
              'pfam_long_name',
              'domain_freq')
  }

  if (dbtype == "ligand_receptor_xref") {
    cols <- c('interaction_id',
              'symbol',
              'class')
  }

  if (dbtype == "ligand_receptor_db") {
    cols <- c('interaction_id',
              'interaction_name',
              'annotation',
              'pathway_name',
              'interaction_members',
              'ligand',
              'receptor',
              'agonist',
              'antagonist',
              'co_A_receptor',
              'co_I_receptor',
              'literature_support')
  }
  if (dbtype == "dorothea") {
    cols <- c('regulator',
              'target',
              'interaction_sources',
              'confidence_level',
              'mode_of_regulation',
              'tf_target_literature_support',
              'tf_target_literature')
  }
  if (dbtype == "transcript_xref") {
    cols <- c('entrezgene',
              'property',
              'value')
  }
  if (dbtype == "survival_km_cshl") {
    cols <- c('symbol',
              'tcga_cohort',
              'z_score')
  }

  if (dbtype == "target_priority_scores") {
    cols <- c('symbol',
              'gene_id',
              'priority_score',
              'tumor_type')
  }

  if (dbtype == "fitness_scores") {
    cols <- c('symbol',
              'model_name',
              'model_id',
              'loss_of_fitness',
              'scaled_BF',
              'tissue',
              'cancer_type',
              'tissue_status',
              'entrezgene',
              'sample_site',
              'gene_id_project_score')
  }

  if (dbtype == "comppidb") {
    cols <- c('uniprot_acc',
              'go_id',
              'go_term',
              'confidence',
              'annotation_source',
              'annotation_type')
  }

  if (dbtype == "compartments") {
    cols <- c('entrezgene',
              'go_id',
              'go_term',
              'confidence',
              'annotation_source',
              'annotation_channel')
  }


  if (dbtype == "ppi_nodes") {
    cols <- c('symbol',
              'entrezgene',
              'genename',
              'query_node',
              'cancer_driver',
              'id',
              'tumor_suppressor',
              'oncogene')
  }

  if (dbtype == "ppi_edges") {
    cols <- c('symbol_A',
              'symbol_B',
              'entrezgene_A',
              'entrezgene_B',
              'oncogene_A',
              'oncogene_B',
              'tsgene_A',
              'tsgene_B',
              'cdriver_A',
              'cdriver_B',
              'querynode_A',
              'querynode_B',
              'weight',
              'from',
              'to',
              'interaction_symbol')

  }
  assertable::assert_colnames(df,
                              colnames = cols,
                              only_colnames = F,
                              quiet = T)

  if (dbtype == 'transcript_xref') {
    identifiers_expected <- c('alias',
                     'ensembl_gene_id',
                     'ensembl_protein_id',
                     'ensembl_transcript_id',
                     'refseq_mrna',
                     'refseq_peptide',
                     'symbol',
                     'uniprot_acc')

    identifiers_found <- sort(unique(df$property))

    assertthat::assert_that(
      identical(identifiers_expected, identifiers_found),
      msg = paste0("Identifier types present in 'transcript_xref' data frame",
                   " ('property' column) does not have all necessary s")
    )

    assertthat::assert_that(
      typeof(df$entrezgene) == "integer",
      msg = paste0("Type of 'entrezgene' column in 'transcript_xref' ",
                   "is not of type 'integer'")
    )


  }

  return(0)

}

add_excel_sheet <- function(
  report = NULL,
  workbook = NULL,
  analysis_output = "disease_association",
  tableStyle = "TableStyleMedium15") {

  invisible(assertthat::assert_that(!is.null(report)))
  invisible(assertthat::assert_that(!is.null(report$data)))
  invisible(assertthat::assert_that(!is.null(workbook)))

  target_df <- data.frame()

  lgr::lgr$info(
    paste0("Adding Excel sheet to workbook - ", analysis_output))

  if (analysis_output == "settings") {
    target_df <- data.frame(
      category = 'CANCER_ASSOCIATION',
      configuration = 'show_disease',
      value = as.character(report$config$show$disease),
      stringsAsFactors = F
    )
    target_df <- target_df |>
      dplyr::bind_rows(
        data.frame(
          category = 'CANCER_ASSOCIATION',
          configuration = 'show_top_diseases',
          value = as.character(report$config$disease$show_top_diseases),
          stringsAsFactors = F
        ),
        data.frame(
          category = 'CANCER_HALLMARK',
          configuration = 'show_hallmark',
          value = as.character(report$config$show$cancer_hallmark),
          stringsAsFactors = F
        ),
        data.frame(
          category = 'UNKNOWN_FUNCTION',
          configuration = 'show_unknown_function',
          value = as.character(report$config$show$unknown_function),
          stringsAsFactors = F
        ),
        data.frame(
          category = 'DRUG_KNOWN',
          configuration = 'show_drug',
          value = as.character(report$config$show$drug),
          stringsAsFactors = F
        ),
        data.frame(
          category = 'SYNTHETIC_LETHALITY',
          configuration = 'show_synleth',
          value = as.character(report$config$show$synleth),
          stringsAsFactors = F
        ),
        data.frame(
          category = 'PROTEIN_COMPLEX',
          configuration = 'show_protein_complex',
          value = as.character(report$config$show$protein_complex),
          stringsAsFactors = F
        ),

        data.frame(
          category = 'ENRICHMENT',
          configuration = 'show_enrichment',
          value = as.character(report$config$show$enrichment),
          stringsAsFactors = F
        ),
        data.frame(
          category = 'ENRICHMENT',
          configuration = 'enrichment_p_value_adj',
          value = as.character(report$config$enrichment$p_adjust_method),
          stringsAsFactors = F
        ),
        data.frame(
          category = 'ENRICHMENT',
          configuration = 'enrichment_p_value_cutoff',
          value = as.character(report$config$enrichment$p_value_cutoff),
          stringsAsFactors = F
        ),
        data.frame(
          category = 'ENRICHMENT',
          configuration = 'enrichment_q_value_cutoff',
          value = as.character(report$config$enrichment$q_value_cutoff),
          stringsAsFactors = F
        ),
        data.frame(
          category = 'ENRICHMENT',
          configuration = 'enrichment_min_geneset_size',
          value = as.character(report$config$enrichment$min_gs_size),
          stringsAsFactors = F
        ),
        data.frame(
          category = 'ENRICHMENT',
          configuration = 'enrichment_max_geneset_size',
          value = as.character(report$config$enrichment$max_gs_size),
          stringsAsFactors = F
        ),
        data.frame(
          category = 'ENRICHMENT',
          configuration = 'enrichment_simplify_go',
          value = as.character(report$config$enrichment$simplify_go),
          stringsAsFactors = F
        ),
        data.frame(
          category = 'ENRICHMENT',
          configuration = 'bgset_description',
          value = as.character(report$config$enrichment$bgset_description),
          stringsAsFactors = F
        ),
        data.frame(
          category = 'ENRICHMENT',
          configuration = 'bgset_size',
          value = as.character(report$config$enrichment$bgset_size),
          stringsAsFactors = F
        ),
        data.frame(
          category = 'REGULATORY',
          configuration = 'show_regulatory',
          value = as.character(report$config$show$regulatory),
          stringsAsFactors = F
        ),
        data.frame(
          category = 'REGULATORY',
          configuration = 'regulatory_min_confidence',
          value = as.character(report$config$regulatory$min_confidence),
          stringsAsFactors = F
        ),
        data.frame(
          category = 'SUBCELL',
          configuration = 'show_subcell',
          value = as.character(report$config$show$subcellcomp),
          stringsAsFactors = F
        ),
        data.frame(
          category = 'SUBCELL',
          configuration = 'subcellcomp_min_confidence',
          value = as.character(report$config$subcellcomp$minimum_confidence),
          stringsAsFactors = F
        ),
        data.frame(
          category = 'SUBCELL',
          configuration = 'subcellcomp_min_channels',
          value = as.character(report$config$subcellcomp$minimum_channels),
          stringsAsFactors = F
        ),
        data.frame(
          category = 'SUBCELL',
          configuration = 'subcellcomp_channels',
          value = paste(
            as.character(
              report$config$subcellcomp$channels),
            collapse = ", "),
          stringsAsFactors = F
        ),
        data.frame(
          category = 'SUBCELL',
          configuration = 'show_cytosol',
          value = as.character(report$config$subcellcomp$show_cytosol),
          stringsAsFactors = F
        ),
        data.frame(
          category = 'CELL_TISSUE',
          configuration = 'show_cell_tissue',
          value = as.character(report$config$show$cell_tissue),
          stringsAsFactors = F
        ),
        data.frame(
          category = 'PPI',
          configuration = 'show_ppi',
          value = as.character(report$config$show$ppi),
          stringsAsFactors = F
        ),
        data.frame(
          category = 'PPI',
          configuration = 'ppi_string_min_score',
          value = as.character(report$config$ppi$string$minimum_score),
          stringsAsFactors = F
        ),
        data.frame(
          category = 'PPI',
          configuration = 'ppi_string_network_type',
          value = as.character(report$config$ppi$string$network_type),
          stringsAsFactors = F
        ),
        data.frame(
          category = 'PPI',
          configuration = 'ppi_biogrid_min_evidence',
          value = as.character(report$config$ppi$biogrid$minimum_evidence),
          stringsAsFactors = F
        ),
        data.frame(
          category = 'PPI',
          configuration = 'ppi_show_drugs',
          value = as.character(report$config$ppi$string$show_drugs),
          stringsAsFactors = F
        ),
        data.frame(
          category = 'PPI',
          configuration = 'ppi_add_nodes',
          value = as.character(report$config$ppi$string$add_nodes),
          stringsAsFactors = F
        ),
        data.frame(
          category = 'PPI',
          configuration = 'ppi_show_isolated_nodes',
          value = as.character(report$config$ppi$string$show_isolated_nodes),
          stringsAsFactors = F
        ),
        data.frame(
          category = 'FITNESS',
          configuration = 'show_fitness',
          value = as.character(report$config$show$fitness),
          stringsAsFactors = F
        ),
        data.frame(
          category = 'FITNESS',
          configuration = 'fitness_max_score',
          value = as.character(report$config$fitness$max_BF_score),
          stringsAsFactors = F
        ),
        data.frame(
          category = 'ABERRATION',
          configuration = 'show_aberration',
          value = as.character(report$config$show$aberration),
          stringsAsFactors = F
        ),
        data.frame(
          category = 'COEXPRESSION',
          configuration = 'show_coexpression',
          value = as.character(report$config$show$coexpression),
          stringsAsFactors = F
        ),
        data.frame(
          category = 'LIGAND_RECEPTOR',
          configuration = 'show_ligand_receptor',
          value = as.character(report$config$show$ligand_receptor),
          stringsAsFactors = F
        ),
        data.frame(
          category = 'PROGNOSTIC',
          configuration = 'show_prognostic',
          value = as.character(report$config$show$cancer_prognosis),
          stringsAsFactors = F
        ),
        data.frame(
          category = 'OTHER',
          configuration = 'project_title',
          value = as.character(report$config$project_title),
          stringsAsFactors = F
        ),
        data.frame(
          category = 'OTHER',
          configuration = 'project_description',
          value = as.character(report$config$project_description),
          stringsAsFactors = F
        ),
        data.frame(
          category = 'OTHER',
          configuration = 'project_owner',
          value = as.character(report$config$project_owner),
          stringsAsFactors = F
        ),
        data.frame(
          category = 'OTHER',
          configuration = 'query_ignore_err',
          value = as.character(report$config$query$ignore_err),
          stringsAsFactors = F
        ),
        data.frame(
          category = 'OTHER',
          configuration = 'query_id_type',
          value = as.character(report$config$query$id_type),
          stringsAsFactors = F
        ),
        data.frame(
          category = 'OTHER',
          configuration = 'bgset_id_type',
          value = as.character(report$config$bgset$id_type),
          stringsAsFactors = F
        ),
        data.frame(
          category = 'OTHER',
          configuration = 'floating_toc',
          value = as.character(report$config$rmarkdown$floating_toc),
          stringsAsFactors = F
        ),
        data.frame(
          category = 'OTHER',
          configuration = 'report_theme',
          value = as.character(report$config$rmarkdown$theme),
          stringsAsFactors = F
        )

      )

  }

  if (analysis_output == "query") {
    if (is.data.frame(report$data$query$target)) {
      if (NROW(report$data$query$target) > 0) {
        target_df <- report$data$query$target |>
          dplyr::mutate(
            genename =
              stringr::str_squish(
                stringr::str_trim(
                  textclean::replace_html(.data$genename)
                )
              )
          )
      }
    }
  }

  if (analysis_output == "unknown_function") {
    if (is.data.frame(report$data$unknown_function$hits_df)) {
      if (NROW(report$data$unknown_function$hits_df) > 0) {
        target_df <- report$data$unknown_function$hits_df |>
          dplyr::rename(go_terms = .data$go_term_link) |>
          dplyr::mutate(
            annotation_source = "Gene Ontology (2022-12)/NCBI Gene/UniProt (2022_05)",
            version = NA) |>
          dplyr::select(c("annotation_source",
                        "version"),
                        dplyr::everything()) |>
          dplyr::mutate(
            genename =
              stringr::str_trim(
                textclean::replace_html(.data$genename)
              )
          ) |>
          dplyr::mutate(
            go_terms =
              stringr::str_trim(
                textclean::replace_html(.data$go_terms)
              )
          ) |>
          dplyr::mutate(
            gene_summary =
              stringr::str_squish(
                stringr::str_trim(
                  textclean::replace_html(.data$gene_summary)
                )
              )
          )
      }
    }
  }

  if (analysis_output == "cancer_association") {
    if (is.data.frame(report$data$disease$target)) {
      if (NROW(report$data$disease$target) > 0) {
        target_df <- report$data$disease$target |>
          dplyr::mutate(
            annotation_source = report$config$resources$opentargets$name,
            version = report$config$resources$opentargets$version) |>
          dplyr::select(-c("cancer_association_links",
                           "disease_association_links",
                           "cancergene_evidence",
                           "gene_summary")) |>
          dplyr::select(c("annotation_source", "version"),
                        dplyr::everything()) |>
          dplyr::mutate(
            genename =
              stringr::str_trim(
                textclean::replace_html(.data$genename)
              )
          )
      }
    }
  }

  if (analysis_output == "cancer_hallmark") {
    if (is.data.frame(report$data$cancer_hallmark$target)) {
      if (NROW(report$data$cancer_hallmark$target) > 0) {
        target_df <- report$data$cancer_hallmark$target |>
          dplyr::mutate(
            symbol =
              stringr::str_squish(
                stringr::str_trim(
                  textclean::replace_html(.data$symbol)
                )
              )
          ) |>
          dplyr::select(-c("literature_support"))
      }
    }
  }


  if (analysis_output == "drug_known") {
    if (is.data.frame(report$data$drug$target_drugs)) {
      if (NROW(report$data$drug$target_drugs)) {
        target_df <- report$data$drug$target_drugs |>
          dplyr::mutate(
            annotation_source = report$config$resources$opentargets$name,
            version = report$config$resources$opentargets$version) |>
          dplyr::select(c("annotation_source", "version"),
                        dplyr::everything()) |>
          dplyr::mutate(
            genename =
              stringr::str_trim(
                textclean::replace_html(.data$genename)
              )
          ) |>
          dplyr::mutate(
            approved_drugs =
              stringr::str_trim(
                textclean::replace_html(.data$approved_drugs)
              )
          ) |>
          dplyr::mutate(
            drugs_late_phase =
              stringr::str_replace_all(
                stringr::str_squish(
                  stringr::str_trim(
                    textclean::replace_html(.data$drugs_late_phase)
                  )
                ),
                " , ",
                ", "
              )
          ) |>
          dplyr::mutate(
            drugs_early_phase =
              stringr::str_replace_all(
                stringr::str_squish(
                  stringr::str_trim(
                    textclean::replace_html(.data$drugs_early_phase)
                  )
                ),
                " , ",
                ", "
              )
          )
      }
    }
  }

  if (analysis_output == "drug_tractability") {
    if (is.data.frame(report$data$drug$tractability_ab) &
        is.data.frame(report$data$drug$tractability_sm)) {
      if (NROW(report$data$drug$tractability_ab) |
          NROW(report$data$drug$tractability_sm)) {

        target_df <- data.frame()

        if (NROW(report$data$drug$tractability_sm)) {
          df <- report$data$drug$tractability_sm |>
            dplyr::mutate(
              annotation_source = report$config$resources$opentargets$name,
              version = report$config$resources$opentargets$version) |>
            dplyr::rename(tractability_category = "SM_tractability_category",
                          tractability_support = "SM_tractability_support") |>
            dplyr::mutate(tractability_drugtype = "Small molecules/compounds") |>
            dplyr::select(c("annotation_source", "version",
                          "tractability_drugtype"),
                          dplyr::everything()) |>
            dplyr::mutate(
              symbol =
                stringr::str_trim(
                  textclean::replace_html(.data$symbol)
                )
            ) |>
            dplyr::mutate(
              tractability_support =
                stringr::str_trim(
                  textclean::replace_html(.data$tractability_support)
                )
            )

          target_df <- target_df |>
            dplyr::bind_rows(df)
        }

        if (NROW(report$data$drug$tractability_ab)) {
          df <- report$data$drug$tractability_ab |>
            dplyr::mutate(
              annotation_source = report$config$resources$opentargets$name,
              version = report$config$resources$opentargets$version) |>
            dplyr::rename(tractability_category = .data$AB_tractability_category,
                          tractability_support = .data$AB_tractability_support) |>
            dplyr::mutate(tractability_drugtype = "Antibody") |>
            dplyr::select(c("annotation_source", "version",
                          "tractability_drugtype"),
                          dplyr::everything()) |>
            dplyr::mutate(
              symbol =
                stringr::str_trim(
                  textclean::replace_html(.data$symbol)
                )
            ) |>
            dplyr::mutate(
              tractability_support =
                stringr::str_trim(
                  textclean::replace_html(.data$tractability_support)
                )
            )

          target_df <- target_df |>
            dplyr::bind_rows(df)
        }
      }
    }
  }

  if (analysis_output == "ligand_receptor") {
    for (c in c('secreted_signaling','cell_cell_contact',
               'ecm_receptor')) {

      if (is.data.frame(report$data$ligand_receptor[[c]])) {
        if (NROW(report$data$ligand_receptor[[c]]) > 0) {

          df <- report$data$ligand_receptor[[c]] |>
            dplyr::mutate(
              annotation_source = report$config$resources[['cellchatdb']]$name,
              version = report$config$resources[['cellchatdb']]$version) |>
            dplyr::select(c("annotation_source", "version"),
                          dplyr::everything()) |>
            dplyr::mutate(
              literature_support =
                stringr::str_trim(
                  textclean::replace_html(.data$literature_support)
                )
            )

          target_df <- target_df |>
            dplyr::bind_rows(df)
        }
      }

    }
  }

  if (analysis_output == "protein_domain") {

    if (is.data.frame(report$data$protein_domain$target)) {
      if (NROW(report$data$protein_domain$target) > 0) {

        df <- report$data$protein_domain$target |>
          dplyr::mutate(
            annotation_source = report$config$resources[['pfam']]$name,
            version = report$config$resources[['pfam']]$version) |>
          dplyr::select(c("annotation_source",
                        "version"),
                        dplyr::everything()) |>
          dplyr::mutate(
            protein_domain =
              stringr::str_trim(
                textclean::replace_html(.data$protein_domain)
              ),
            target_genes =
              stringr::str_trim(
                textclean::replace_html(.data$target_genes)
              )
          )

        target_df <- target_df |>
          dplyr::bind_rows(df)
      }
    }
  }

  if (analysis_output == "protein_complex") {

    for (c in c('omnipath','humap2')) {

      if (is.data.frame(report$data$protein_complex[[c]])) {
        if (NROW(report$data$protein_complex[[c]]) > 0) {

          res_name <- c
          if(c == 'omnipath'){
            res_name <- 'omnipathr'
          }

          df <- report$data$protein_complex[[c]] |>
            dplyr::mutate(
              annotation_source = report$config$resources[[res_name]]$name,
              version = report$config$resources[[res_name]]$version) |>
            dplyr::select(c("annotation_source",
                            "version"),
                          dplyr::everything()) |>
            dplyr::mutate(
              complex_genes =
                stringr::str_replace_all(
                  stringr::str_squish(
                    stringr::str_trim(
                      textclean::replace_html(.data$complex_genes)
                    )
                  ),
                  " , ",
                  ", "
                )
            )

          target_df <- target_df |>
            dplyr::bind_rows(df)
        }
      }
    }

    if (NROW(target_df) > 0 &
       "literature" %in% colnames(target_df)) {
      target_df <- target_df |>
        dplyr::mutate(
          literature =
            stringr::str_trim(
              textclean::replace_html(.data$literature)
            )
        ) |>
        dplyr::mutate(
          complex_name =
            stringr::str_trim(
              textclean::replace_html(.data$complex_name)
            )
        )
    }

  }


  if (analysis_output == "prognostic_association_I") {
    if (is.data.frame(report$data$cancer_prognosis$hpa$assocs)) {
      if (NROW(report$data$cancer_prognosis$hpa$assocs) > 0) {
        target_df <- report$data$cancer_prognosis$hpa$assocs |>
          dplyr::mutate(
            annotation_source = report$config$resources$hpa$name,
            version = report$config$resources$hpa$version) |>
          dplyr::select(c("annotation_source",
                        "version"),
                        dplyr::everything()) |>
          dplyr::mutate(
            tumor_types =
              stringr::str_trim(
                textclean::replace_html(.data$tumor_types)
              )
          )
      }
    }
  }

  if (analysis_output == "synthetic_lethality") {
    for (t in c('single_pair_member','both_in_pair')) {

      if (is.data.frame(report$data$synleth[[t]])) {
        if (NROW(report$data$synleth[[t]]) > 0) {
          df <- report$data$synleth[[t]] |>
            dplyr::mutate(
              annotation_source = "De Kegel et al., Cell Systems, 2021",
              version = "v1") |>
            dplyr::mutate(feature_type = t) |>
            dplyr::mutate(
              genename_A =
                stringr::str_trim(
                  textclean::replace_html(.data$genename_A)
                )
            ) |>
            dplyr::mutate(
              genename_B =
                stringr::str_trim(
                  textclean::replace_html(.data$genename_B)
                )
            ) |>
            dplyr::arrange(.data$feature_type,
                           dplyr::desc(.data$prediction_score)) |>
            dplyr::select(c("annotation_source",
                            "version",
                          "feature_type"), dplyr::everything())

          target_df <- target_df |>
            dplyr::bind_rows(df)

        }
      }
    }
  }


  if (analysis_output == "prognostic_association_II") {
    for (t in c('cna','mut','exp','meth')) {
      if (is.data.frame(report$data$cancer_prognosis$km_cshl$assocs[[t]])) {
        if (NROW(report$data$cancer_prognosis$km_cshl$assocs[[t]]) > 0) {
          df <- report$data$cancer_prognosis$km_cshl$assocs[[t]] |>
            dplyr::mutate(
              annotation_source = report$config$resources$tcga_survival$name,
              version = report$config$resources$tcga_survival$version) |>
            dplyr::mutate(feature_type = t) |>
            dplyr::arrange(.data$feature_type, .data$z_score) |>
            dplyr::select(c("annotation_source", "version"),
                          dplyr::everything())

          target_df <- target_df |>
            dplyr::bind_rows(df)

        }
      }
    }
  }

  if (analysis_output == "coexpression") {

    ## co-expression
    if (is.data.frame(report$data$tcga$coexpression)) {
      if (NROW(report$data$tcga$coexpression) > 0) {
        target_df <- report$data$tcga$coexpression |>
          dplyr::mutate(
            annotation_source = report$config$resources$tcga$name,
            version = report$config$resources$tcga$version) |>
          dplyr::rename(tcga_cohort = "tumor") |>
          dplyr::select(-c("entrezgene")) |>
          dplyr::rename(partner_oncogene = "oncogene",
                        partner_tumor_suppressor = "tumor_suppressor",
                        partner_cancer_driver = "cancer_driver") |>
          dplyr::select(c("annotation_source", "version"),
                        dplyr::everything())
      }
    }
  }

  if (analysis_output == "regulatory") {

    ## regulatory interactions
    for (c in c('pancancer','global')) {

      if (is.data.frame(report$data$regulatory$interactions[[c]])) {
        if (NROW(report$data$regulatory$interactions[[c]]) > 0) {
          df <- report$data$regulatory$interactions[[c]] |>
            dplyr::mutate(
              dorothea_collection = c,
              annotation_source = report$config$resources$dorothea$name,
              version = report$config$resources$dorothea$version) |>
            dplyr::select(c("annotation_source",
                            "version",
                          "dorothea_collection"),
                          dplyr::everything()) |>
            dplyr::mutate(
              target_name =
                stringr::str_trim(
                  textclean::replace_html(.data$target_name)
                )
            ) |>
            dplyr::mutate(
              regulator_name =
                stringr::str_trim(
                  textclean::replace_html(.data$regulator_name)
                )
            ) |>
            dplyr::mutate(
              literature_support =
                stringr::str_trim(
                  textclean::replace_html(.data$literature_support)
                )
            )

          target_df <- target_df |>
            dplyr::bind_rows(df)
        }
      }
    }

  }

  if (analysis_output == "recurrent_variants") {

    if (is.data.frame(report$data$tcga$recurrent_variants)) {
      if (NROW(report$data$tcga$recurrent_variants) > 0) {
        df <-
          report$data$tcga$recurrent_variants

        colnames(df) <- tolower(colnames(df))
        df <- as.data.frame(
          df |>
            dplyr::mutate(
              site_recurrence = as.numeric(.data$site_recurrence)
            ) |>
            dplyr::arrange(
              dplyr::desc(.data$total_recurrence),
              dplyr::desc(.data$site_recurrence)) |>
            dplyr::mutate(
              annotation_source = report$config$resources$tcga$name,
              version = report$config$resources$tcga$version) |>
            dplyr::mutate(
              ensembl_gene_id =
                stringr::str_trim(
                  textclean::replace_html(.data$ensembl_gene_id)
                )
            ) |>
            dplyr::mutate(
              ensembl_transcript_id =
                stringr::str_trim(
                  textclean::replace_html(.data$ensembl_transcript_id)
                )
            ) |>
            dplyr::mutate(
              protein_domain =
                stringr::str_trim(
                  textclean::replace_html(.data$protein_domain)
                )
            ) |>
            dplyr::mutate(
              cosmic_mutation_id =
                stringr::str_trim(
                  textclean::replace_html(.data$cosmic_mutation_id)
                )
            ) |>
            dplyr::mutate(
              site_recurrence = paste(.data$primary_site,
                                      .data$site_recurrence, sep =":")
            ) |>
            dplyr::group_by(
              .data$symbol, .data$consequence,
              .data$protein_change, .data$protein_domain,
              .data$mutation_hotspot,
              .data$loss_of_function,
              .data$ensembl_gene_id,
              .data$ensembl_transcript_id,
              .data$total_recurrence,
              .data$cosmic_mutation_id
            ) |>
            dplyr::summarise(site_recurrence = paste(
              .data$site_recurrence, collapse = ", "
            ), .groups = "drop") |>
            dplyr::arrange(
              dplyr::desc(.data$total_recurrence)
            )

        )

        target_df <- target_df |>
          dplyr::bind_rows(df)
      }
    }
  }

  if (analysis_output == "aberration") {

    ## cna aberrations
    for (t in c('cna_ampl','cna_homdel')) {
      if (is.data.frame(report$data$tcga$aberration$table[[t]])) {
        if (NROW(report$data$tcga$aberration$table[[t]]) > 0) {
          df <-
            report$data$tcga$aberration$table[[t]] |>
            dplyr::mutate(
              annotation_source = report$config$resources$tcga$name,
              version = report$config$resources$tcga$version) |>
            dplyr::mutate(
              symbol =
                stringr::str_trim(
                  textclean::replace_html(.data$gene)
                )
            ) |>
            dplyr::select(-c("gene")) |>
            dplyr::select(c("annotation_source", "version",
                          "symbol"), dplyr::everything())

          target_df <- target_df |>
            dplyr::bind_rows(df)
        }
      }
    }

  }

  if (analysis_output == "subcellcomp") {
    if (is.data.frame(report$data$subcellcomp$all)) {
      if (NROW(report$data$subcellcomp$all) > 0) {
        target_df <- report$data$subcellcomp$all |>
          dplyr::mutate(
            annotation_source = report$config$resources$compartments$name,
            version = report$config$resources$compartments$version) |>
          dplyr::select(-c("ggcompartment")) |>
          dplyr::select(c("annotation_source", "version"),
                        dplyr::everything()) |>
          dplyr::mutate(
            compartment =
              stringr::str_trim(
                textclean::replace_html(.data$compartment)
              )
          ) |>
          dplyr::mutate(
            genename =
              stringr::str_trim(
                textclean::replace_html(.data$genename)
              )
          )
      }
    }
  }

  if (analysis_output == "enrichment") {
    enrichment_df <- data.frame()
    for (e in c('go','wikipathway','kegg','msigdb','netpath')) {
      if (NROW(report$data$enrichment[[e]]) > 0) {
        enrichment_df <- enrichment_df |>
          dplyr::bind_rows(
            report$data$enrichment[[e]] |>
              dplyr::mutate(
                annotation_source = report$config$resources[[e]]$name,
                version = report$config$resources[[e]]$version) |>
              dplyr::rename(category = "db",
                            entrezgene = "gene_id")
          ) |>
          dplyr::select(-c("gene_symbol_link",
                           "description_link")) |>
          dplyr::select(c("annotation_source", "version",
                        "category", "description"),
                        dplyr::everything())
      }
    }
    target_df <- enrichment_df
  }



  if (analysis_output == "fitness_scores") {
    if (is.data.frame(report$data$fitness$fitness_scores$targets)) {
      if (NROW(report$data$fitness$fitness_scores$targets) > 0) {
        target_df <- report$data$fitness$fitness_scores$targets |>
          dplyr::mutate(
            annotation_source = report$config$resources$depmap$name,
            version = report$config$resources$depmap$version) |>
          dplyr::select(-c("symbol_link_ps", "model_link_ps",
                           "n_gene")) |>
          dplyr::select(c("annotation_source", "version"),
                        dplyr::everything())
      }
    }
  }

  if (analysis_output == "fitness_prioritized") {
    if (is.data.frame(report$data$fitness$target_priority_scores$targets)) {
      if (NROW(report$data$fitness$target_priority_scores$targets) > 0) {
        target_df <- report$data$fitness$target_priority_scores$targets |>
          dplyr::mutate(
            annotation_source = report$config$resources$depmap$name,
            version = report$config$resources$depmap$version) |>
          dplyr::select(c("annotation_source", "version"),
                        dplyr::everything())
      }
    }
  }

  if (analysis_output == "ppi_string") {

    target_df <- data.frame()
    if (is.data.frame(report$data$ppi$string$complete_network$edges)) {
      if (NROW(report$data$ppi$string$complete_network$edges) > 0) {
        target_df <- report$data$ppi$string$complete_network$edges |>
          dplyr::filter(!is.na(.data$entrezgene_A) &
                          !is.na(.data$entrezgene_B)) |>
          dplyr::arrange(dplyr::desc(.data$score)) |>
          dplyr::rename(is_target_A = "querynode_A",
                        is_target_B = "querynode_B",
                        string_score = "score",
                        string_nscore = "nscore",
                        string_fscore = "fscore",
                        string_pscore = "pscore",
                        string_ascore = "ascore",
                        string_escore = "escore",
                        string_dscore = "dscore",
                        string_tscore = "tscore"
                        ) |>
          dplyr::select(c("symbol_A",
                        "symbol_B",
                        "is_target_A",
                        "is_target_B",
                        "string_score",
                        "string_nscore",
                        "string_fscore",
                        "string_pscore",
                        "string_ascore",
                        "string_escore",
                        "string_dscore",
                        "string_tscore")) |>
          dplyr::mutate(
            annotation_source = report$config$resources$string$name,
            version = report$config$resources$string$version) |>
          dplyr::select(c("annotation_source", "version"),
                        dplyr::everything())
      }
    }

  }

  if (analysis_output == "ppi_biogrid") {

    target_df <- data.frame()
    if (is.data.frame(report$data$ppi$biogrid$complete_network$edges)) {
      if (NROW(report$data$ppi$biogrid$complete_network$edges) > 0) {
        target_df <- report$data$ppi$biogrid$complete_network$edges |>
          dplyr::filter(!is.na(.data$entrezgene_A) &
                          !is.na(.data$entrezgene_B)) |>
          dplyr::rename(is_target_A = "querynode_A",
                        is_target_B = "querynode_B",
                        biogrid_evidence = "evidence",
                        biogrid_num_evidence = "num_evidence_items"
          ) |>
          dplyr::select(c("symbol_A",
                          "symbol_B",
                          "is_target_A",
                          "is_target_B",
                          "biogrid_evidence",
                          "biogrid_num_evidence")) |>
          dplyr::arrange(
            dplyr::desc(.data$biogrid_num_evidence)) |>
          dplyr::mutate(
            annotation_source = report$config$resources$biogrid$name,
            version = report$config$resources$biogrid$version) |>
          dplyr::select(c("annotation_source", "version"),
                        dplyr::everything())
      }
    }

  }

  if (analysis_output == "cell_tissue") {

    target_df <- data.frame()
    for (e in c("tissue_enrichment", "scRNA_enrichment")) {
      if (is.data.frame(report$data$cell_tissue[[e]]$per_gene)) {
        if (NROW(report$data$cell_tissue[[e]]$per_gene) > 0) {
          if (e == "tissue_enrichment") {
            df <-
              report$data$cell_tissue[[e]]$per_gene |>
              dplyr::mutate(
                annotation_source = report$config$resources$gtex$name,
                version = report$config$resources$gtex$version,
                category = stringr::str_replace(e,"_enrichment","")) |>
              dplyr::select(c("annotation_source",
                            "version",
                            "category"),
                            dplyr::everything()) |>
              dplyr::rename(tissue_or_celltype = "tissue")

          } else {
            df <-
              report$data$cell_tissue[[e]]$per_gene |>
              dplyr::mutate(
                annotation_source = report$config$resources$hpa$name,
                version = report$config$resources$hpa$version,
                category = stringr::str_replace(e,"_enrichment","")) |>
              dplyr::select(c("annotation_source",
                              "version",
                            "category"), dplyr::everything()) |>
              dplyr::rename(tissue_or_celltype = "cell_type")
          }
          target_df <- target_df |>
            dplyr::bind_rows(df) |>
            dplyr::mutate(
              genename =
                stringr::str_trim(
                  textclean::replace_html(.data$genename)
                )
            )
        }
      }
    }
  }

  if (nrow(target_df) > 0) {

    analysis_output = stringr::str_replace(
      analysis_output,"_ps_","_")

    openxlsx::addWorksheet(workbook,
                           sheetName = toupper(analysis_output))

    ## set automatic column widths
    openxlsx::setColWidths(workbook,
                           sheet = toupper(analysis_output),
                           cols = 1:ncol(target_df),
                           widths = "auto")

    ## write with default Excel Table style
    openxlsx::writeDataTable(workbook,
                             sheet = toupper(analysis_output),
                             x = target_df,
                             startRow = 1,
                             startCol = 1,
                             colNames = TRUE,
                             tableStyle = tableStyle)
  }

  return(workbook)
}


file_is_writable <- function(path) {
  assertthat::is.string(path) &&
    file.exists(path) &&
    assertthat::is.writeable(path)
}

#' Tidy eval helpers
#'
#' <https://cran.r-project.org/web/packages/dplyr/vignettes/programming.html>
#'
#' @name tidyeval
#' @keywords internal
#' @importFrom rlang .data :=
NULL

#' GSEAbase imports
#'
#' @name gseabase
#' @keywords internal
#' @importFrom GSEABase GeneSet SymbolIdentifier ENSEMBLIdentifier EntrezIdentifier
NULL

utils::globalVariables(c("."))
sigven/oncoEnrichR documentation built on Aug. 31, 2023, 8:05 a.m.