R/refine_mappings.R

#' Refine pathway by cell type
#' @description Reduce the KEGG pathway by only including genes that are 
#' expressed within a given cell type
#' @export
#' @param KEGG_mappings The data.frame object generated by the function 
#' expand_KEGG_mappings
#' @param cell_line Choose from the set of cell lines with baseline data; 
#' cell-lines may or may not have corresponding KO data
#' @return A dataframe object with reduced set of pathway mappings to be
#'  passed on to other functions
#' @examples
#' p53_KGML <- get_KGML("hsa04115")
#' p53_KEGG_mappings <-  expand_KEGG_mappings(p53_KGML)
#' MCF7_p53_mappings <- refine_mappings(p53_KEGG_mappings, "MCF7")

refine_mappings <- function(KEGG_mappings, cell_line){
  data("gene_cell_info", envir = environment())
  gene_cell_info <- get("gene_cell_info")
  
  if (!cell_line %in% gene_cell_info$cell){
    warning("Baseline expression data not available for selected cell type; 
            pathway will not be refined")
    return(KEGG_mappings)
  }
  
  chosen_cell_info <- gene_cell_info[gene_cell_info$cell == cell_line,]
  gene_nodes <- KEGG_mappings[KEGG_mappings$entryTYPE == "gene",]
  non_gene_nodes <- KEGG_mappings[KEGG_mappings$entryTYPE != "gene",]
  
  genes_with_info <- gene_nodes[gene_nodes$entrySYMBOL %in% 
                                  chosen_cell_info$pr_gene_symbol,]

  for (i in 1:nrow(genes_with_info)){
    genes_with_info$is_expressed[i] <- 
      chosen_cell_info$is_expressed[which(chosen_cell_info$pr_gene_symbol == 
                                            genes_with_info$entrySYMBOL[i])]
  }
  genes_expressed_in_cell_type <- 
    genes_with_info[genes_with_info$is_expressed == TRUE,]
  genes_expressed_in_cell_type <- genes_expressed_in_cell_type[, -c(17)]
  refined_mappings <- rbind(non_gene_nodes, genes_expressed_in_cell_type)
  
  return(refined_mappings)
}
uc-bd2k/KEGGlincs documentation built on May 3, 2019, 2:13 p.m.