R/path_genes_by_cell_type.R

Defines functions path_genes_by_cell_type

Documented in path_genes_by_cell_type

#' See how many pathway gene knock-outs are available from data
#' @description Check quantity of data across cell lines available from LINCS
#'  corresponding to the pathway of interest 
#' @export 
#' @importFrom graphics barplot legend par
#' @importFrom utils data  
#' @param KEGG_mappings KEGG_mappings The data.frame object generated by the 
#' function expand_KEGG_mappings
#' @param pert_time Choose from (6,24,48,96,120,144,168)
#' @param generate_plot Logical indicator to generate histogram
#' @param get_KOs  Logical indicator to have data frame returned
#' @return A plot depicting percentage of pathway genes knocked-out by 
#' cell line and a data frame object listing the genes [by cell line]
#' @examples 
#' p53_KGML <- get_KGML("hsa04115")
#' p53_KEGG_mappings <- expand_KEGG_mappings(p53_KGML)
#' 
#' path_genes_by_cell_type(p53_KEGG_mappings)

path_genes_by_cell_type <- 
function(KEGG_mappings, pert_time = 96, get_KOs = FALSE, generate_plot = TRUE){
    keeps <- c("entryACCESSION", "entrySYMBOL")
    path_genes <- KEGG_mappings[KEGG_mappings$entryTYPE == "gene", keeps]
    names(path_genes)[2] <- "SYMBOL"
    path_genes <- data.frame(ENTREZID = unlist(path_genes$entryACCESSION), 
                            SYMBOL = unlist(path_genes$SYMBOL), 
                            stringsAsFactors = FALSE)
    path_genes <- path_genes[!duplicated(path_genes$ENTREZID), ]
    data("KO_data", envir = environment())
    cell_types <- data.frame(cell_type = unique(KO_data$cell_id), 
                             stringsAsFactors = FALSE)
    KO_data <- KO_data[KO_data$pert_time == pert_time, ]
    KO_data <- subset(KO_data, KO_data$pert_desc %in% path_genes$SYMBOL)
    by_cell_type <- data.frame(table(KO_data$cell_id), stringsAsFactors = FALSE)
    names(by_cell_type) <- c("cell_type", "num_pathway_KOs")
    by_cell_type$cell_type <- as.character(by_cell_type$cell_type)
    by_cell_type <- merge(cell_types, by_cell_type, 
                            incomparables = 0, all.x = TRUE)
    for (i in 1:nrow(by_cell_type)) {
        if (is.na(by_cell_type$num_pathway_KOs[i])) {
            by_cell_type$num_pathway_KOs[i] = 0
        }
        by_cell_type$knock_outs[i] <- list(KO_data$pert_desc[KO_data$cell_id ==
            by_cell_type$cell_type[i]])
        if (by_cell_type$num_pathway_KOs[i]/nrow(path_genes) > 0.6) {
            by_cell_type$color[i] <- "red"
        }
        if (by_cell_type$num_pathway_KOs[i]/nrow(path_genes) <= 0.6) {
            by_cell_type$color[i] <- "yellow"
        }
        if (by_cell_type$num_pathway_KOs[i]/nrow(path_genes) <= 0.4) {
            by_cell_type$color[i] <- "blue"
        }
    }
    by_cell_type <- by_cell_type[order(by_cell_type$num_pathway_KOs), ]
    if (generate_plot) {
        par(mar = c(5.1, 4.1, 4.1, 8.1), xpd = TRUE)
        barplot((by_cell_type$num_pathway_KOs/nrow(path_genes)) * 100, 
                names.arg = by_cell_type$cell_type, las = 2.5, 
                col = by_cell_type$color, 
                ylab = "Percentage of Pathway Genes Knocked Out", 
                cex.names = 0.8, main = "Pathway Coverage by Cell-Type")
        legend("topright", inset = c(-0.47, 0.2), 
                legend = c("60% +", "40% to 60%", 
                "Below 40%"), fill = c("red", "yellow", "blue"))
    }
    if (get_KOs) {
        return(by_cell_type)
    }
}
uc-bd2k/KEGGlincs documentation built on Dec. 8, 2017, 3:32 a.m.