R/expand_KEGG_edges.R

Defines functions expand_KEGG_edges

Documented in expand_KEGG_edges

#' Get detailed KEGG mapping information for each relation [edge] documented 
#' in KEGG
#' @description Extract relationship information from KGML object and re-map
#'  based on normalized node information
#'
#' @param KGML_file An object of formal class KEGGPathway
#' @param KEGG_mappings The data.frame object generated by the function 
#' expand_KEGG_mappings
#'
#' @return A dataframe object with unique entry information for all edges 
#' documented in the KEGG pathway. 
#'         Note that each row has a unique combination of values for
#'          (entry1, entry2, entry1symbol, entry2symbol).
#' @export
#' @importMethodsFrom KEGGgraph edges
#' @examples
#' p53_KGML <- get_KGML("hsa04115")
#' p53_KEGG_mappings <- expand_KEGG_mappings(p53_KGML, FALSE)
#' p53_edges <- expand_KEGG_edges(p53_KGML, p53_KEGG_mappings)


expand_KEGG_edges <-
function(KGML_file,KEGG_mappings){
    num_edges <- length(KGML_file@edges)
    if (num_edges == 0){
        print("No Documented Edges in Pathway")
        expanded_edges <- data.frame("edgeID"=1, "entry1"= "1", "entry2" = "1",
                                    "entry1accession"="dummy",  "
                                    entry2accession"="dummy",
                                    "type"= "dummy", "subtype1"="dummy",       
                                    "value"="dummy", "subtype2" = "dummy", 
                                    "value2" = "dummy", 
                                    "specific_subtype"="dummy", 
                                    "is_direct"= 0, "entry1type"="dummy",     
                                    "entry2type"= "dummy", "entry1symbol"= "1",
                                    "entry2symbol"="1", 
                                    stringsAsFactors = FALSE) 
    return(expanded_edges)
    }
    map_edge_data<- KGML_file@edges
    get_edges<- data.frame(edgeID = seq(1:length(map_edge_data )))
    for (i in 1:nrow(get_edges)){
        get_edges$entry1[i] <- as.numeric(map_edge_data[[i]]@entry1ID)
        get_edges$entry2[i] <- as.numeric(map_edge_data[[i]]@entry2ID)
        get_edges$type[i] <- map_edge_data[[i]]@type
        if (length(map_edge_data[[i]]@subtype) == 1){
            get_edges$subtype1[i] <- map_edge_data[[i]]@subtype[[1]]@name
            if (get_edges$subtype1[i] == "binding/association"){
                get_edges$subtype1[i] <- "binding_association"
                }
            get_edges$value[i] <- map_edge_data[[i]]@subtype[[1]]@value
            get_edges$subtype2[i] <- NA
            get_edges$value2[i] <- NA
            get_edges$specific_subtype[i] <- get_edges$subtype1[i]
            get_edges$is_direct[i] <- 1
            if (get_edges$subtype1[i] == "indirect effect"){
                get_edges$is_direct[i] <- 0
                get_edges$subtype1[i] <- "indirect_effect"
            }
        }
        else if (length(map_edge_data[[i]]@subtype) == 2){
            get_edges$subtype1[i] <- map_edge_data[[i]]@subtype[[1]]@name
                if (get_edges$subtype1[i] == "binding/association"){
                    get_edges$subtype1[i] <- "binding_association"
                    }
            get_edges$value[i] <- map_edge_data[[i]]@subtype[[1]]@value
            get_edges$subtype2[i] <- map_edge_data[[i]]@subtype[[2]]@name
            get_edges$specific_subtype[i] <- paste0(get_edges$subtype1[i], "_",
                                                    get_edges$subtype2[i])
            get_edges$value2[i] <- map_edge_data[[i]]@subtype[[2]]@value
            get_edges$is_direct[i] <- 1
            if (get_edges$subtype2[i] == "indirect effect"){
                get_edges$subtype2[i] <- "indirect"
                get_edges$is_direct[i] <- 0
            }
        }
        else {
            get_edges$subtype1[i] <- "Not defined in KEGG"
            get_edges$value[i] <- "Not defined in KEGG"
            get_edges$subtype2[i] <- NA
            get_edges$value2[i] <- NA
            get_edges$specific_subtype[i] <- NA
            get_edges$is_direct[i] <- 1
        }
        get_edges$entry1type[i] <- 
            KEGG_mappings$entryTYPE[which(KEGG_mappings$entryID == 
                                         get_edges$entry1[i])][1]
        get_edges$entry2type[i] <- 
            KEGG_mappings$entryTYPE[which(KEGG_mappings$entryID == 
                                         get_edges$entry2[i])][1]
    }
    get_edges <- get_edges[!is.na(get_edges$entry1type) & 
                            !is.na(get_edges$entry2type),]
    if (nrow(get_edges) ==0){
        print("No Documented Edges in Pathway for selected cell type; 
            all edges are between non-expressed genes")
        expanded_edges <- data.frame("edgeID"=1, "entry1"= "1", "entry2" = "1",
                                     "entry1accession"="dummy",  
                                    "entry2accession"="dummy","type"= "dummy", 
                                    "subtype1"="dummy", "value"="dummy", 
                                    "subtype2" = "dummy", "value2" = "dummy",
                                    "specific_subtype"="dummy", "is_direct"= 0,
                                    "entry1type"="dummy", 
                                    "entry2type"= "dummy","entry1symbol"= "1",
                                    "entry2symbol"="1", 
                                    stringsAsFactors = FALSE)
        return(expanded_edges)
    }

  ##At some point include option not to ungroup edges
    edges_no_groups <- subset(get_edges, get_edges$entry1type != "group" 
                            & get_edges$entry2type != "group", select = -c(1))
    edges_with_groups <- subset(get_edges, get_edges$entry1type == "group" | 
                                get_edges$entry2type == "group")
    edges_one_group <- subset(get_edges, get_edges$entry1type == "group" & 
                                !get_edges$entry2type == "group" | 
                                get_edges$entry2type == "group" & 
                                !get_edges$entry1type == "group")
    edges_two_groups <- subset(get_edges, get_edges$entry1type == "group" & 
                                get_edges$entry2type == "group")
    if (nrow(edges_with_groups) > 0){
        if (nrow(edges_one_group) > 0){
            edges_one_group$entry1all <- NA
            edges_one_group$entry2all <- NA
            for (i in 1:nrow(edges_one_group)){
                if (edges_one_group$entry1type[i] == "group"){
                    edges_one_group$entry1all[i] <-  
                    KEGG_mappings$groupMEMBERS[which(KEGG_mappings$entryID == 
                                                edges_one_group$entry1[i])]
                    l <- length(unlist(edges_one_group$entry1all[i]))
                    edges_one_group$entry2all[i] <- 
                        list(as.character(rep(edges_one_group$entry2[i], l)))
                    }
                else if (edges_one_group$entry2type[i] == "group"){
                    edges_one_group$entry2all[i] <-  
                    KEGG_mappings$groupMEMBERS[which(KEGG_mappings$entryID == 
                                                edges_one_group$entry2[i])]
                    l = length(unlist(edges_one_group$entry2all[i]))
                    edges_one_group$entry1all[i] <- 
                        list(as.character(rep(edges_one_group$entry1[i], l)))
                    }
            l = length(unlist(edges_one_group$entry1all[i]))
            edges_one_group$edgeID_list[i] <- 
                list(rep(edges_one_group$edgeID[i],l))
            }
            ewg_temp <- data.frame("edgeID" = 
                                        unlist(edges_one_group$edgeID_list), 
                            "entry1" = unlist(edges_one_group$entry1all), 
                            "entry2" = unlist(edges_one_group$entry2all),
                            stringsAsFactors = FALSE)
            edges_one_group <- edges_one_group[,-c(2:3, 13:15)]
            edges_one_group <- merge(ewg_temp,edges_one_group, by = "edgeID")
            edges_one_group <- edges_one_group[,-c(1)]
        }
        if (nrow(edges_two_groups) > 0){
            edges_two_groups$entry1all <- NA
            edges_two_groups$entry2all <- NA
                for (i in 1:nrow(edges_two_groups)){
                    edges_two_groups$entry1all[i] <-  
                    KEGG_mappings$groupMEMBERS[which(KEGG_mappings$entryID == 
                                                edges_two_groups$entry1[i])]
                    edges_two_groups$entry2all[i] <-  
                    KEGG_mappings$groupMEMBERS[which(KEGG_mappings$entryID == 
                                                edges_two_groups$entry2[i])]
                    x <- length(unlist(edges_two_groups$entry1all[i]))
                    y <- length(unlist(edges_two_groups$entry2all[i]))
                    if (x == 1 & y >1){
                        edges_two_groups$entry1all[i] <- 
                            list(rep(edges_two_groups$entry1all[i], y))
                        }
                    else if (x > 1 & y == 1){
                        edges_two_groups$entry2all[i] <- 
                            list(rep(edges_two_groups$entry2all[i], x))
                        }
                    else if (x > 1 & y > 1){
                        edges_two_groups$entry1all[i] <- 
                            list(rep(edges_two_groups$entry1all[i],y))
                        edges_two_groups$entry1all[i] <- 
                            list(unlist(edges_two_groups$entry1all[i])[
                            sort.list(unlist(edges_two_groups$entry1all[i]))])
                        edges_two_groups$entry2all[i] <- 
                            list(rep(edges_two_groups$entry2all[i],x))
                    }
                    l <- length(unlist(edges_two_groups$entry1all[i]))
                    edges_two_groups$edgeID_list[i] <- 
                        list(rep(edges_two_groups$edgeID[i],l))
                    }
            ewg_temp <- data.frame("edgeID" = 
                                    unlist(edges_two_groups$edgeID_list),
                                "entry1" = unlist(edges_two_groups$entry1all), 
                                "entry2" = unlist(edges_two_groups$entry2all), 
                                stringsAsFactors = FALSE)
            edges_two_groups <- edges_two_groups[,-c(2:3, 13:15)]
            edges_two_groups <- merge(ewg_temp,edges_two_groups, by = "edgeID")
            edges_two_groups <- edges_two_groups[,-c(1)]
        }
        if (nrow(edges_one_group) > 0 & nrow(edges_two_groups) > 0){
            all_edges <- rbind(edges_no_groups, edges_one_group, 
                                edges_two_groups)
        }
        else if (nrow(edges_one_group) > 0){
            all_edges <- rbind(edges_no_groups, edges_one_group)
        }
        else if (nrow(edges_two_groups) > 0){
             all_edges <- rbind(edges_no_groups, edges_two_groups)
        }
    }
    else {
        all_edges <- edges_no_groups
    }
    for (i in 1:nrow(all_edges)){
        all_edges$entry1all_accession[i] <- 
            list(KEGG_mappings$entryACCESSION[which(KEGG_mappings$entryID == 
                                                all_edges$entry1[i])])
        all_edges$entry2all_accession[i] <- 
            list(KEGG_mappings$entryACCESSION[which(KEGG_mappings$entryID ==
                                                all_edges$entry2[i])])
        x <- length(unlist(all_edges$entry1all_accession[i]))
        y <- length(unlist(all_edges$entry2all_accession[i]))
        if (x == 1 & y >1){
            all_edges$entry1all_accession[i] <- 
                list(rep(all_edges$entry1all_accession[i], y))
            }
        else if (x > 1 & y == 1) {
            all_edges$entry2all_accession[i] <- 
                list(rep(all_edges$entry2all_accession[i], x))
        }
        else if (x > 1 & y > 1) {
            all_edges$entry1all_accession[i] <-
                list(rep(all_edges$entry1all_accession[i],y))
            all_edges$entry1all_accession[i] <- 
                list(unlist(all_edges$entry1all_accession[i])[sort.list(unlist(
                    all_edges$entry1all_accession[i]))])
            all_edges$entry2all_accession[i] <- 
                list(rep(all_edges$entry2all_accession[i],x))
            }
        l <- length(unlist(all_edges$entry1all_accession[i]))
        all_edges$edgeID[i] = list(rep(i,l))
    }
    for (i in 1:nrow(all_edges)){
        all_edges$l1[i] <- length(all_edges$entry1all_accession[[i]])
        all_edges$l2[i] <- length(all_edges$entry2all_accession[[i]])
    }
    all_edges <- all_edges[all_edges$l1 >0 & all_edges$l2 > 0, -c(15,16)]
    expanded_edges <- data.frame("edgeID" = unlist(all_edges$edgeID), 
                            "entry1accession" = 
                                unlist(all_edges$entry1all_accession), 
                            "entry2accession" =
                                unlist(all_edges$entry2all_accession), 
                            stringsAsFactors = FALSE)
  
    all_edges <- all_edges[,-c(12:14)]
    all_edges$edgeID <- seq(1:nrow(all_edges))
    expanded_edges <- merge(expanded_edges, all_edges, by = "edgeID")
    for (i in 1:nrow(expanded_edges)){
        expanded_edges$entry1type[i] <- 
            KEGG_mappings$entryTYPE[which(KEGG_mappings$entryID == 
                                      expanded_edges$entry1[i])][1]
        expanded_edges$entry2type[i] <- 
            KEGG_mappings$entryTYPE[which(KEGG_mappings$entryID ==
                                      expanded_edges$entry2[i])][1]
        if (expanded_edges$entry1type[i] == "gene"|
            expanded_edges$entry1type[i] == "compound") {
            expanded_edges$entry1symbol[i] <- 
            KEGG_mappings$entrySYMBOL[which(KEGG_mappings$entryACCESSION == 
                                        expanded_edges$entry1accession[i])][1]
            }
        else {
            expanded_edges$entry1symbol[i] <- NA
        }
        if (expanded_edges$entry2type[i] == "gene" |
            expanded_edges$entry2type[i] == "compound") {
            expanded_edges$entry2symbol[i] <- 
            KEGG_mappings$entrySYMBOL[which(KEGG_mappings$entryACCESSION == 
                                        expanded_edges$entry2accession[i])][1]
        }
        else {
            expanded_edges$entry2symbol[i] <- NA
        }
    }
    expanded_edges$entry1symbol <- unlist(expanded_edges$entry1symbol)
    expanded_edges$entry2symbol <- unlist(expanded_edges$entry2symbol)
    expanded_edges$is_direct <- as.numeric(expanded_edges$is_direct)
    return(expanded_edges)
}

Try the KEGGlincs package in your browser

Any scripts or data that you put into this service are public.

KEGGlincs documentation built on Nov. 8, 2020, 5:47 p.m.