R/fct_gene_verse.R

Defines functions getDotPlot_selInt getNumLR ensemblLink uniprotLink getGeneTable

Documented in ensemblLink getDotPlot_selInt getGeneTable getNumLR uniprotLink

#' Get table for gene-verse
#'
#' @param input.data preprocessed input data
#'
#' @return gene table with unique intpairs (no connection to clusters)
#' 
#' @importFrom dplyr select %>% distinct
#' @importFrom utils read.csv
#' @importFrom tibble add_column
#' @importFrom scales rescale
#' @importFrom biomaRt getBM useMart
#' @export
#' @examples 
#' data(input.data)
#' gene_table <- getGeneTable(input.data)
getGeneTable <- function(input.data){
    if("annotation_strategy" %in% colnames(input.data)){
    gene_tab <- input.data %>%
        dplyr::select(int_pair, geneA, geneB, 
                      typeA, typeB, annotation_strategy) %>%
        distinct()
    } else if("pathway_cellchat" %in% colnames(input.data)){
        gene_tab <- input.data %>%
            dplyr::select(int_pair, geneA, geneB, 
                          typeA, typeB, pathway_cellchat,
                          annotation_cellchat, evidence_cellchat) %>%
            distinct()
    } else {
        gene_tab <- input.data %>%
            dplyr::select(int_pair, geneA, geneB, 
                          typeA, typeB) %>%
            distinct()
    }
    if("annotation_strategy" %in% colnames(input.data)){ # CPDB
        # get protein info from cpdb_v2 file
        gene_input <- read.csv(app_sys("app", "extdata", "cpdb_gene_input.csv"))
        
        ### Adding for geneA
        gene_input_sub <- gene_input[match(gene_tab$geneA, gene_input$hgnc_symbol), 
                                     c("uniprot", "ensembl")]
        gene_tab <- add_column(gene_tab, 
                               "uniprotA" = uniprotLink(as.character(gene_input_sub$uniprot)), 
                               .after = "geneA")
        gene_tab <- add_column(gene_tab, 
                               "ensemblA" = ensemblLink(as.character(gene_input_sub$ensembl)), 
                               .after = "uniprotA")
        # considering complexes
        ind_compl <- grep(",", gene_tab$geneA)
        for(i in ind_compl){
            genes <- unlist(strsplit(gene_tab$geneA[i], ","))
            prots <- uniprotLink(as.character(
                gene_input[match(genes, gene_input$hgnc_symbol), "uniprot"]))
            ensembls <- ensemblLink(as.character(
                gene_input[match(genes, gene_input$hgnc_symbol), "ensembl"]))
            gene_tab$uniprotA[i] <- paste(prots, collapse = ",")
            gene_tab$ensemblA[i] <- paste(ensembls, collapse = ",")
        }
        
        ### Adding for geneB
        gene_input_sub <- gene_input[match(gene_tab$geneB, gene_input$hgnc_symbol), 
                                     c("uniprot", "ensembl")]
        gene_tab <- add_column(gene_tab, "uniprotB" = uniprotLink(as.character(gene_input_sub$uniprot)), 
                               .after = "geneB")
        gene_tab <- add_column(gene_tab, "ensemblB" = ensemblLink(as.character(gene_input_sub$ensembl)), 
                               .after = "uniprotB")
        # considering complexes
        ind_compl <- grep(",", gene_tab$geneB)
        for(i in ind_compl){
            genes <- unlist(strsplit(gene_tab$geneB[i], ","))
            prots <- uniprotLink(as.character(
                gene_input[match(genes, gene_input$hgnc_symbol), "uniprot"]))
            ensembls <- ensemblLink(as.character(
                gene_input[match(genes, gene_input$hgnc_symbol), "ensembl"]))
            gene_tab$uniprotB[i] <- paste(prots, collapse = ",")
            gene_tab$ensemblB[i] <- paste(ensembls, collapse = ",")
        }
    } else { #cellchat, icellnet and custom
        ### prepare db with all possible genes to query biomart
        all_genes <- character()
        # separating simple genes from complexes
        # simple A
        ind_compl <- grepl(",", gene_tab$geneA)
        all_genes <- c(all_genes, gene_tab$geneA[!ind_compl])
        # simple B
        ind_compl <- grepl(",", gene_tab$geneB)
        all_genes <- c(all_genes, gene_tab$geneB[!ind_compl])
        # complex A
        ind_compl <- grep(",", gene_tab$geneA)
        for(i in ind_compl){
            genes <- unlist(strsplit(gene_tab$geneA[i], ","))
            all_genes <- c(all_genes, genes)
        }
        # complex B
        ind_compl <- grep(",", gene_tab$geneB)
        for(i in ind_compl){
            genes <- unlist(strsplit(gene_tab$geneB[i], ","))
            all_genes <- c(all_genes, genes)
        }
        
        # unique all genes
        all_genes <- unique(all_genes)
        
        ensembl <- biomaRt::useMart("ensembl", dataset="hsapiens_gene_ensembl")
        # query biomaRt for all genes
        bm <- biomaRt::getBM(attributes = c('hgnc_symbol', 
                                            'uniprotswissprot', 
                                            'ensembl_gene_id'), 
                             filters = 'hgnc_symbol', 
                             values = all_genes, 
                             mart = ensembl,
                             useCache = TRUE)
        
        # remove rows with empty values
        bm <- bm %>%
            filter(uniprotswissprot != "" & ensembl_gene_id != "") %>%
            group_by(hgnc_symbol)
        
        ### Adding for geneA
        bm_sub <- bm[match(gene_tab$geneA, bm$hgnc_symbol),]
        gene_tab <- add_column(gene_tab, 
                               "uniprotA" = uniprotLink(as.character(bm_sub$uniprotswissprot)), 
                               .after = "geneA")
        gene_tab <- add_column(gene_tab, 
                               "ensemblA" = ensemblLink(as.character(bm_sub$ensembl_gene_id)), 
                               .after = "uniprotA")
        # considering complexes for geneA
        ind_compl <- grep(",", gene_tab$geneA)
        for(i in ind_compl){
            genes <- unlist(strsplit(gene_tab$geneA[i], ","))
            prots <- uniprotLink(as.character(unlist(
                bm[match(genes, bm$hgnc_symbol), "uniprotswissprot"])))
            ensembls <- ensemblLink(as.character(unlist(
                bm[match(genes, bm$hgnc_symbol), "ensembl_gene_id"])))
            gene_tab$uniprotA[i] <- paste(prots, collapse = ",")
            gene_tab$ensemblA[i] <- paste(ensembls, collapse = ",")
        }
        
        ### Adding for geneB
        bm_sub <- bm[match(gene_tab$geneB, bm$hgnc_symbol),]
        
        gene_tab <- add_column(gene_tab, "uniprotB" = uniprotLink(as.character(bm_sub$uniprotswissprot)), 
                               .after = "geneB")
        gene_tab <- add_column(gene_tab, "ensemblB" = ensemblLink(as.character(bm_sub$ensembl_gene_id)), 
                               .after = "uniprotB")
        # considering complexes for geneB
        ind_compl <- grep(",", gene_tab$geneB)
        for(i in ind_compl){
            genes <- unlist(strsplit(gene_tab$geneB[i], ","))
            prots <- uniprotLink(as.character(unlist(
                bm[match(genes, bm$hgnc_symbol), "uniprotswissprot"])))
            ensembls <- ensemblLink(as.character(unlist(
                bm[match(genes, bm$hgnc_symbol), "ensembl_gene_id"])))
            gene_tab$uniprotB[i] <- paste(prots, collapse = ",")
            gene_tab$ensemblB[i] <- paste(ensembls, collapse = ",")
        }
    }
    # } else {
    #     ensembl <- biomaRt::useMart("ensembl", dataset="hsapiens_gene_ensembl")
    #     # query biomaRt for geneA
    #     bm <- biomaRt::getBM(attributes = c('hgnc_symbol', 
    #                                'uniprotswissprot', 
    #                                'ensembl_gene_id'), 
    #           filters = 'hgnc_symbol', 
    #           values = gene_tab$geneA, 
    #           mart = ensembl)
    #     # remove rows with empty values
    #     bm <- bm %>%
    #         filter(uniprotswissprot != "" & ensembl_gene_id != "")
    #     ### Adding for geneA
    #     bm_sub <- bm[match(gene_tab$geneA, bm$hgnc_symbol),]
    #     gene_tab <- add_column(gene_tab, 
    #                            "uniprotA" = uniprotLink(as.character(bm_sub$uniprotswissprot)), 
    #                            .after = "geneA")
    #     gene_tab <- add_column(gene_tab, 
    #                            "ensemblA" = ensemblLink(as.character(bm_sub$ensembl_gene_id)), 
    #                            .after = "uniprotA")
    #     # query biomaRt for geneB
    #     bm <- biomaRt::getBM(attributes = c('hgnc_symbol', 
    #                                'uniprotswissprot', 
    #                                'ensembl_gene_id'), 
    #                 filters = 'hgnc_symbol', 
    #                 values = gene_tab$geneB, 
    #                 mart = ensembl)
    #     # remove rows with empty values
    #     bm <- bm %>%
    #         filter(uniprotswissprot != "" & ensembl_gene_id != "")
    #     ### Adding for geneB
    #     bm_sub <- bm[match(gene_tab$geneB, bm$hgnc_symbol),]
    #     
    #     gene_tab <- add_column(gene_tab, "uniprotB" = uniprotLink(as.character(bm_sub$uniprotswissprot)), 
    #                            .after = "geneB")
    #     gene_tab <- add_column(gene_tab, "ensemblB" = ensemblLink(as.character(bm_sub$ensembl_gene_id)), 
    #                            .after = "uniprotB")
    # }
    
    
    

    return(gene_tab)
}


#' Get html link to uniprot
#'
#' @param uniprot symbol
#'
#' @return html link to website
#'

uniprotLink <- function(uniprot){
    paste0('<a href="https://www.uniprot.org/uniprot/', uniprot, 
           '" target="_blank">', uniprot, '</a>')
}

#' Get html link to ensembl
#'
#' @param ensembl symbol
#'
#' @return html link to website
#'

ensemblLink <- function(ensembl){
    paste0('<a href="https://www.ensembl.org/Homo_sapiens/geneview?gene=', 
           ensembl, '" target="_blank">', ensembl, '</a>')
}


# 
#' Get number of unique ligands and receptors
#'
#' @param gene.table gene table of unique int-pairs
#' @param type either L or R
#'
#' @return number of L or R genes
#'

#' @importFrom dplyr distinct filter
getNumLR <- function(gene.table, type){
    LR.df <- data.frame(gene = c(gene.table$geneA, gene.table$geneB),
                        type = c(gene.table$typeA, gene.table$typeB))
    LR.df <- distinct(LR.df)
    if(type == "L"){
        return(nrow(filter(LR.df, type == "L")))
    } else if(type == "R"){
        return(nrow(filter(LR.df, type == "R")))
    }
}






#' Functions to plot DotPlots 
#'
#' @param selected_tab selected rows of filt.data by selection from gene table
#' @param clust.order how to order clusters
#' @param low_color of dotplot
#' @param high_color of dotplot
#'
#' @return list with modified selected data and ggplot2 dotplot
#' @importFrom tidyr unite
#' @import ggplot2 

getDotPlot_selInt <- function(selected_tab, clust.order, 
                              low_color = "aquamarine", 
                              high_color = "#131780"){
    selected_tab$groups_x <- factor(selected_tab$clustA, levels = clust.order)
    selected_tab <- selected_tab %>%
        unite(col="cluster_pair", clustA:clustB, sep = "::", remove = TRUE)
    p <- ggplot(selected_tab, aes(x = int_pair, y = cluster_pair)) +
        geom_point(aes(color = score, size=3.5)) + 
        theme_minimal() +
        scale_color_gradient(low = low_color, high = high_color) + 
        facet_grid(groups_x ~ ., scales = "free", space = "free") +
        theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1), 
              text = element_text(size=20),
              strip.text = element_blank()) +
        guides(size = "none") + 
        labs(x = "Int-pairs", y = "Cluster-pairs")
    
    return(list(data_dot = selected_tab, p = p))
}
martaint/InterCellar documentation built on April 7, 2022, 11:44 a.m.