R/rankGenes.R

Defines functions rankGenes

rankGenes <- function(subtype, final_results, complete_pathway_results) {

    print(subtype)

    # select DEG results (logFCs, pvals) from step 1
    DEG_results <- purrr::map(final_results$DEG, ~.[[subtype]])
    DEG_results <- bind_rows(DEG_results, .id = 'resample')

    gene_ref <- purrr::map(DEG_results$canon_entrez, ~ trimws(strsplit(., '///')[[1]][1]))
    DEG_results$entrezgene_id <- unlist(gene_ref)

    # select pathway pair PCI scores
    pathway_PCI <- complete_pathway_results[[subtype]][,c('edge', 'PCI')]

    # select gene list from union of repeated pathway pairs in final network
    network_genes <- gene_results[[subtype]]
    network_gene_list <- network_genes$entrezgene_id

    # calculate average logFC and harmonic mean p-value for each gene across resamples
    resampleAverageDEA <- function(gene, DEG_results){

        gene_DEG_results <- filter(DEG_results, entrezgene_id == gene)
        avg_fc <- mean(gene_DEG_results$logFC)
        harmonic_p <- harmonicmeanp::p.hmp(gene_DEG_results$pvalue,
                                           L = nrow(gene_DEG_results))
        gene_df <- data.frame(entrezgene_id = gene,
                              avg_logFC = avg_fc,
                              hmp = harmonic_p)
        return(gene_df)
    }

    annotated_genes <- purrr::map(network_gene_list,
                                  ~resampleAverageDEA(gene = .x,
                                                      DEG_results = DEG_results))
    annotated_genes %<>% bind_rows() %>% unique

    ranked_genes <-  left_join(network_genes, annotated_genes)
    ranked_genes %<>% left_join(pathway_PCI)

    # add NE sums
    gene_list <- purrr::map(ranked_genes$entrezgene_id,
                            ~filter(ranked_genes, entrezgene_id == .)[,'PCI'])
    gene_sums <- purrr::map(gene_list, ~sum(.))
    ranked_genes$PCI_sum <- as.character(gene_sums) %>% as.numeric

    # # arrange output
    # ranked_genes_p <- ranked_genes %>%
    #     dplyr::arrange(PCI_sum, hmp, .by_group = TRUE)
    # ranked_genes_lFC <- ranked_genes %>%
    #     dplyr::arrange(PCI_sum, avg_logFC, .by_group = TRUE)

    # create unique summary
    ranked_genes_unique <- unique(ranked_genes[,c('entrezgene_id', 'external_gene_name',
                                                 'description', 'avg_logFC', 'hmp',
                                                 'PCI_sum')]) %>%
        dplyr::arrange(PCI_sum, hmp)

    return(ranked_genes_unique)
}
hemoshear/pathwayTalk documentation built on July 16, 2022, 12:09 a.m.