R/emapplot_utilities.R

Defines functions add_pie_node get_pie_data add_category_nodes add_ellipse groupNode add_node_label add_group_label get_label_location list2df list2graph build_ggraph merge_compareClusterResult get_igraph build_emap_graph has_pairsim get_similarity_matrix

##' Get the similarity matrix
##'
##' @param y A data.frame of enrichment result
##' @param geneSets A list, the names of geneSets are term ids,
##' and every object is a vertor of genes.
##' @param method Method of calculating the similarity between nodes,
##' one of "Resnik", "Lin", "Rel", "Jiang" , "Wang"  and
##' "JC" (Jaccard similarity coefficient) methods
##' @param semData GOSemSimDATA object
##' @noRd
get_similarity_matrix <- function(y, geneSets, method, semData = NULL) {
    id <- y[, "ID"]
    geneSets <- geneSets[id]
    n <- nrow(y)
    y_id <- unlist(strsplit(y$ID[1], ":"))[1]
    ## Choose the method to calculate the similarity
    if (method == "JC") {
        w <- matrix(NA, nrow=n, ncol=n)
        colnames(w) <- rownames(w) <- y$Description
        for (i in seq_len(n-1)) {
            for (j in (i+1):n) {
                w[i,j] <- overlap_ratio(geneSets[id[i]], geneSets[id[j]])
            }
        }
        return(w)
    }

    if (y_id == "GO") {
        if(is.null(semData)) {
            stop("The semData parameter is missing,
                and it can be obtained through godata function in GOSemSim package.")
        }
        w <- GOSemSim::mgoSim(id, id, semData=semData, measure=method,
                              combine=NULL)
    }

    if (y_id == "DOID") w <- DOSE::doSim(id, id, measure=method)
    rownames(y) <- y$ID
    rownames(w) <- colnames(w) <- y[colnames(w), "Description"]
    return(w)
}


##' Check whether the similarity matrix exists
##'
##' @param x result of enrichment analysis
##'
##' @noRd
has_pairsim <- function(x) {
    if (length(x@termsim) == 0) {
        error_message <- paste("Term similarity matrix not available.",
            "Please use pairwise_termsim function to",
            "deal with the results of enrichment analysis.")
        stop(error_message)
    }

}


##' Get graph.data.frame() result
##'
##' @importFrom igraph graph.empty
##' @importFrom igraph graph.data.frame
##' @param enrichDf A data.frame of enrichment result.
##' @param geneSets A list gene sets with the names of enrichment IDs
##' @param color a string, the column name of y for nodes colours
##' @param cex_line Numeric, scale of line width
##' @param min_edge The minimum similarity threshold for whether 
##' two nodes are connected, should between 0 and 1, default value is 0.2.
##' @param pair_sim Semantic similarity matrix.
##' @param method Method of calculating the similarity between nodes,
##' one of "Resnik", "Lin", "Rel", "Jiang" , "Wang"  and
##' "JC" (Jaccard similarity coefficient) methods
##' @return result of graph.data.frame()
##' @noRd
build_emap_graph <- function(enrichDf, geneSets, color, cex_line, min_edge,
                             pair_sim, method) {

    if (!is.numeric(min_edge) | min_edge < 0 | min_edge > 1) {
    	stop('"min_edge" should be a number between 0 and 1.')
    }

    if (is.null(dim(enrichDf)) | nrow(enrichDf) == 1) {  # when just one node
        g <- graph.empty(0, directed=FALSE)
        g <- add_vertices(g, nv = 1)
        V(g)$name <- as.character(enrichDf$Description)
        V(g)$color <- "red"
        return(g)
    } else {
        w <- pair_sim[as.character(enrichDf$Description), 
            as.character(enrichDf$Description)]
    }

    wd <- melt(w)
    wd <- wd[wd[,1] != wd[,2],]
    # remove NA
    wd <- wd[!is.na(wd[,3]),]
    if (method != "JC") {
        # map id to names
        wd[, 1] <- enrichDf[wd[, 1], "Description"]
        wd[, 2] <- enrichDf[wd[, 2], "Description"]
    }

    g <- graph.data.frame(wd[, -3], directed=FALSE)
    E(g)$width <- sqrt(wd[, 3] * 5) * cex_line
    # Use similarity as the weight(length) of an edge
    E(g)$weight <- wd[, 3]
    g <- delete.edges(g, E(g)[wd[, 3] < min_edge])
    idx <- unlist(sapply(V(g)$name, function(x) which(x == enrichDf$Description)))
    cnt <- sapply(geneSets[idx], length)
    V(g)$size <- cnt
    colVar <- enrichDf[idx, color]
    V(g)$color <- colVar
    return(g)
}



##' Get an iGraph object
##'
##' @param x Enrichment result.
##' @param nCategory Number of enriched terms to display.
##' @param color variable that used to color enriched terms, e.g. 'pvalue',
##' 'p.adjust' or 'qvalue'.
##' @param cex_line Scale of line width.
##' @param min_edge The minimum similarity threshold for whether 
##' two nodes are connected, should between 0 and 1, default value is 0.2.
##'
##' @return an iGraph object
##' @noRd
get_igraph <- function(x, nCategory, color, cex_line, min_edge){
    y <- as.data.frame(x)
    geneSets <- geneInCategory(x) ## use core gene for gsea result
    if (is.numeric(nCategory)) {
        y <- y[1:nCategory, ]
    } else {
        y <- y[match(nCategory, y$Description),]
        nCategory <- length(nCategory)
    }

    if (nCategory == 0) {
        stop("no enriched term found...")
    }

    g <- build_emap_graph(enrichDf = y, geneSets = geneSets, color = color,
             cex_line = cex_line, min_edge = min_edge,
             pair_sim = x@termsim, method = x@method)
}


##' Merge the compareClusterResult file
##'
##' @param yy A data.frame of enrichment result.
##'
##' @return a data.frame
##' @noRd
merge_compareClusterResult <- function(yy) {
    yy_union <- yy[!duplicated(yy$ID),]
    yy_ids <- lapply(split(yy, yy$ID), function(x) {
        ids <- unique(unlist(strsplit(x$geneID, "/")))
        cnt <- length(ids)
        list(ID=paste0(ids, collapse="/"), cnt=cnt)
    })

    ids <- vapply(yy_ids, function(x) x$ID, character(1))
    cnt <- vapply(yy_ids, function(x) x$cnt, numeric(1))

    yy_union$geneID <- ids[yy_union$ID]
    yy_union$Count <- cnt[yy_union$ID]
    yy_union$Cluster <- NULL
    yy_union
}

##' Get the an ggraph object
##'
##' @importFrom ggplot2 ylim
##' @param x enrichment result.
##' @param enrichDf A data.frame of enrichment result.
##' @param mergedEnrichDf A data.frame of merged enrichment result.
##' @param cex_category Numeric, scale of pie plot.
##' @param pie Proportion of clusters in the pie chart, one of 'equal' (default) or 'Count'.
##' @param layout Layout of the map.
##' @param color a string, the column name of y for nodes colours
##' @param cex_line Numeric, scale of line width
##' @param min_edge The minimum similarity threshold for whether 
##' two nodes are connected, should between 0 and 1, default value is 0.2.
##' @param pair_sim Semantic similarity matrix.
##' @param method Method of calculating the similarity between nodes,
##' one of "Resnik", "Lin", "Rel", "Jiang" , "Wang"  and
##' "JC" (Jaccard similarity coefficient) methods.
##' @param with_edge Logical, if TRUE (the default), draw the edges of the network diagram.
##' @noRd
build_ggraph <- function(x, enrichDf, mergedEnrichDf, cex_category, pie, 
                         layout, coords, cex_line, min_edge, pair_sim,
                         method, with_edge){
    geneSets <- setNames(strsplit(as.character(mergedEnrichDf$geneID), "/",
                              fixed = TRUE), mergedEnrichDf$ID) 
                              
    g <- build_emap_graph(enrichDf=mergedEnrichDf,geneSets=geneSets,color="p.adjust",
        cex_line=cex_line, min_edge=min_edge, pair_sim = x@termsim, 
        method = x@method)
    ## when enrichDf just have one line
    if(is.null(dim(enrichDf)) | nrow(enrichDf) == 1) {
        title <- enrichDf$Cluster
        p <- ggraph(g, "tree") + geom_node_point(color="red", size=5 * cex_category) +
            geom_node_text(aes_(label=~name)) + theme_void() +
            labs(title=title)
        return(p)
    }

    if(is.null(dim(mergedEnrichDf)) | nrow(mergedEnrichDf) == 1) {
        p <- ggraph(g, "tree")
        ID_Cluster_mat <- prepare_pie_category(enrichDf = enrichDf, pie=pie)

        ID_Cluster_mat <- cbind(ID_Cluster_mat,1,1,0.1*cex_category)
        colnames(ID_Cluster_mat) <- c(colnames(ID_Cluster_mat)[1:(ncol(ID_Cluster_mat)-3)],
            "x", "y", "radius")


        p <- p + geom_scatterpie(aes_(x=~x,y=~y,r=~radius), data=ID_Cluster_mat,
                cols=names(ID_Cluster_mat)[1:(ncol(ID_Cluster_mat)-3)],
                color=NA)+
            xlim(-3,3) + ylim(-3,3) + coord_equal()+
            geom_node_text(aes_(label=~name), repel=TRUE) +
            theme_void()+labs(fill = "Cluster")
        return(p)

    }
    if (!is.null(layout)) {
        p <- ggraph(g, layout=layout)
    } else {
        p <- ggraph(g, layout="nicely")
        if (!is.null(coords)) {
            ggData <- p$data
            ggData$x <- coords$x
            ggData$y <- coords$y
            p$data <- ggData  
        }
    }
    
    
    ## add edges
    if (with_edge & length(E(g)$width) > 0) {
        p <- p + geom_edge_link(alpha=.8, aes_(width=~I(width)),
                                colour='darkgrey')
    }
    return(p)
}

## Keep selected category in enrichment result
##
## @param showCategory A number or a vector of terms. If it is a number, 
## the first n terms will be displayed. If it is a vector of terms, 
## the selected terms will be displayed.
## @param x Enrichment result
## @param split Separate result by 'category' variable.
## @noRd
# get_selected_category <- function(showCategory, x, split) {
#     if (is.numeric(showCategory)) {
#         y <- fortify(x, showCategory = showCategory,
#                                       includeAll = TRUE, split = split)

#     } else {
#         y <- as.data.frame(x)
#         y <- y[y$Description %in% showCategory, ]
#         y <- fortify(y, showCategory=NULL,
#                                       includeAll = TRUE, split = split)
#     }
#     y$Cluster <- sub("\n.*", "", y$Cluster)
#     return(y)
# }

##' Convert a list of gene IDs to igraph object.
##'
##'
##' @title Convert gene IDs to igraph object
##' @param inputList A list of gene IDs.
##' @return A igraph object.
##' @importFrom igraph graph.data.frame
##' @author Guangchuang Yu
##' @noRd
list2graph <- function(inputList) {
    x <- list2df(inputList)
    g <- graph.data.frame(x, directed=FALSE)
    return(g)
}

##' Convert a list of gene IDs to data.frame object.
##'
##'
##' @title Convert gene IDs to data.frame object
##' @param inputList A list of gene IDs
##' @return a data.frame object.
##' @noRd
list2df <- function(inputList) {
    # ldf <- lapply(1:length(inputList), function(i) {
    ldf <- lapply(seq_len(length(inputList)), function(i) {
        data.frame(categoryID=rep(names(inputList[i]),
                                  length(inputList[[i]])),
                   Gene=inputList[[i]])
    })

    do.call('rbind', ldf)
}

##' Get the location of group label
##'
##' @param ggData data of a ggraph object
##' @param label_format A numeric value sets wrap length, alternatively a
##' custom function to format axis labels.
##' @return a data.frame object.
##' @noRd
get_label_location <- function(ggData, label_format) {
    label_func <- default_labeller(label_format)
    if (is.function(label_format)) {
        label_func <- label_format
    }
    label_x <- stats::aggregate(x ~ color2, ggData, mean)
    label_y <- stats::aggregate(y ~ color2, ggData, mean)
    data.frame(x = label_x$x, y = label_y$y,
        label = label_func(label_x$color2))
}

##' Add group label to a ggplot2 object
##'
##' @param label_style style of group label, one of "shadowtext" and "ggforce".
##' @param repel a logical value, whether to correct the position of the label.
##' @param shadowtext a logical value, whether to use shadow font. 
##' @param p a ggplot2 object.
##' @param label_location a data.frame with the location of group label.
##' @param label_group a numeric value, default size of group label.
##' @param cex_label_group scale of group labels size.
##' @param ... additional parameters.
##' @return a ggplot2 object.
##' @noRd
add_group_label <- function(label_style, repel, shadowtext, p, label_location, 
                            label_group, cex_label_group, ...) {
    if (label_style != "shadowtext") return(p)
    if (!repel) {
        if (shadowtext) {
            p <- p + geom_shadowtext(data = label_location,
                aes_(x =~ x, y =~ y, label =~ label), colour = "black",
                size = label_group * cex_label_group, bg.color = "white", bg.r = 0.1)
        } else {
            p <- p + geom_text(data = label_location,
                aes_(x =~ x, y =~ y, label =~ label), colour = "black",
                size = label_group * cex_label_group)
        }
        
        return(p)
    }

    if (shadowtext) {
        p <- p + ggrepel::geom_text_repel(data = label_location,
            aes_(x =~ x, y =~ y, label =~ label), colour = "black",
            size = label_group * cex_label_group, bg.color = "white", bg.r = 0.1,
            show.legend = FALSE, ...)
    } else {
        p <- p + ggrepel::geom_text_repel(data = label_location,
            aes_(x =~ x, y =~ y, label =~ label), colour = "black",
            size = label_group * cex_label_group, 
            show.legend = FALSE, ...)
    }
    return(p)   
}

##' Add node label to a ggplot2 object
##'
##' @param p a ggplot2 object.
##' @param data it is uesd as the `data` parameter of function `ggraph::geom_node_text`, a data.frame or NULL.
##' @param label_size_node a numeric value to indicate the font size of the node label.
##' @param cex_label_node a numeric value to indicate the scale of node label size.
##' @param shadowtext  a logical value, whether to use shadow font. 
##' @return a ggplot2 object.
##' @noRd
add_node_label <- function(p, data, label_size_node, cex_label_node, shadowtext) {
    if (shadowtext) {
        p <- p + geom_node_text(aes_(label=~name), data = data,
            size = label_size_node * cex_label_node, bg.color = "white", repel=TRUE)
    } else {
        p <- p + geom_node_text(aes_(label=~name), data = data,
            size = label_size_node * cex_label_node, repel=TRUE)
    }
    return(p)
}




##' Cluster similar nodes together by k-means
##' 
##' @param p a ggraph object.
##' @param enrichDf data.frame of enrichment result.
##' @param nWords Numeric, the number of words in the cluster tags.
##' @param clusterFunction function of Clustering method, such as stats::kmeans, cluster::clara,
##' cluster::fanny or cluster::pam.
##' @param nCluster Numeric, the number of clusters, 
##' the default value is square root of the number of nodes.
##' @noRd
groupNode <- function(p, enrichDf, nWords, clusterFunction =  stats::kmeans, nCluster) {
    ggData <- p$data
    wrongMessage <- paste("Wrong clusterFunction parameter or unsupported clustering method;",
         "set to default `clusterFunction = kmeans`")
    if (!"color2" %in% colnames(ggData)) {
        dat <- data.frame(x = ggData$x, y = ggData$y)
        nCluster <- ifelse(is.null(nCluster), floor(sqrt(nrow(dat))), 
                           min(nCluster, nrow(dat)))
        ggData$color2 <- tryCatch(expr  = clusterFunction(dat, nCluster)$cluster, 
                                  error = function(e) {
                                      message(wrongMessage)
                                      stats::kmeans(dat, nCluster)$cluster
                                  })
        if (is.null(ggData$color2)) {
            message(wrongMessage)
            ggData$color2 <- stats::kmeans(dat, nCluster)$cluster
        }
    }
    goid <- enrichDf$ID
    cluster_color <- unique(ggData$color2)
    clusters <- lapply(cluster_color, function(i){goid[which(ggData$color2 == i)]})
    cluster_label <- sapply(cluster_color, get_wordcloud, ggData = ggData,
                            nWords=nWords)
    names(cluster_label) <- cluster_color
    ggData$color2 <- cluster_label[as.character(ggData$color2)] 
    return(ggData)
}

##' add ellipse to group the node
##'
##' @param p ggplot2 object
##' @param group_legend Logical, if TRUE, the grouping legend will be displayed.
##' The default is FALSE.
##' @param label_style style of group label, one of "shadowtext" and "ggforce".
##' @param ellipse_style style of ellipse, one of "ggforce" an "polygon".
##' @param ellipse_pro numeric indicating confidence value for the ellipses
##' @param alpha the transparency of ellipse fill.
##' @noRd
add_ellipse <- function(p, group_legend, label_style, 
    ellipse_style = "ggforce", ellipse_pro = 0.95, alpha = 0.3, ...) {
    show_legend <- c(group_legend, FALSE)
    names(show_legend) <- c("fill", "color") 
    ellipse_style <- match.arg(ellipse_style, c("ggforce", "polygon"))
    if (ellipse_style == "ggforce") {
        if (label_style == "shadowtext") {
            p <- p + ggforce::geom_mark_ellipse(aes_(x =~ x, y =~ y, color =~ color2,
                        fill =~ color2), alpha = alpha, show.legend = show_legend)
        } else {
            p <- p + ggforce::geom_mark_ellipse(aes_(x =~ x, y =~ y, color =~ color2,
                        fill =~ color2, label =~ color2), alpha = alpha,
                        show.legend = show_legend)
        }
        if (group_legend) p <- p + scale_fill_discrete(name = "groups")
     } 
    
    if (ellipse_style == "polygon") {
        p <- p + ggplot2::stat_ellipse(aes_(x =~ x, y =~ y, fill =~ color2),
                                       geom = "polygon", level = ellipse_pro,
                                       alpha = alpha,
                                       show.legend = group_legend, ...)
    }

    return(p)
}
# add_ellipse <- function(p, group_legend, label_style) {
#     show_legend <- c(group_legend, FALSE)
#     names(show_legend) <- c("fill", "color") 
#     if (label_style == "shadowtext") {
#             p <- p + ggforce::geom_mark_ellipse(aes_(x =~ x, y =~ y, color =~ color2,
#                          fill =~ color2), show.legend = show_legend)
#         } else {
#             p <- p + ggforce::geom_mark_ellipse(aes_(x =~ x, y =~ y, color =~ color2,
#                          fill =~ color2, label =~ color2), show.legend = show_legend)
#         }
#     if (group_legend) p <- p + scale_fill_discrete(name = "groups")
#     return(p)
# }


##' add category nodes 
##'
##' @param p ggplot2 object
##' @param cex_category Number indicating the amount by which plotting category
##' nodes should be scaled relative to the default.
##' @param color Variable that used to color enriched terms, e.g. 'pvalue',
##' 'p.adjust' or 'qvalue'.
##' @noRd
add_category_nodes <- function(p, cex_category, color) {
    p + ggnewscale::new_scale_fill() +
        geom_point(shape = 21, aes_(x =~ x, y =~ y, fill =~ color,
                                    size =~ size)) +
        scale_size_continuous(name = "number of genes",
                              range = c(3, 8) * cex_category) +
        scale_fill_continuous(low = "red", high = "blue", name = color,
                              guide = guide_colorbar(reverse = TRUE)) + 
        theme(legend.title = element_text(size = 10),
                   legend.text  = element_text(size = 10)) +
        theme(panel.background = element_blank()) 
}



##' Get data for pie plot 
##'
##' @param enrichDf A data.frame of enrichment result.
##' @param pie proportion of clusters in the pie chart, one of 'equal' (default) and 'Count'
##' @param mergedEnrichDf A data.frame of merged enrichment result.
##' @param cex_pie2axis It is used to adjust the relative size of the pie chart on the coordinate axis.
##' @param p a ggraph object.
##' @param cex_category Number indicating the amount by which plotting category
##' nodes should be scaled relative to the default.
##' @noRd
get_pie_data <- function(enrichDf, pie, mergedEnrichDf, cex_pie2axis, p, cex_category) {
    ggData <- p$data
    ID_Cluster_mat <- prepare_pie_category(enrichDf = enrichDf, pie=pie) 
    desc <- mergedEnrichDf$Description[match(rownames(ID_Cluster_mat),
                                      mergedEnrichDf$Description)]
    i <- match(desc, ggData$name)
    ID_Cluster_mat$x <- ggData$x[i]
    ID_Cluster_mat$y <- ggData$y[i]
    ID_Cluster_mat$radius <- sqrt(ggData$size[i] / sum(ggData$size) * cex_category * cex_pie2axis)
    return(ID_Cluster_mat)
}

##' Add category node(pie plot) 
##'
##' @param p ggplot2 object.
##' @param ID_Cluster_mat a matrix data for pie plot
##' @param node_label Select which labels to be displayed,
##' one of 'category', 'group', 'all' and 'none'.
##' @param cex_category Number indicating the amount by which plotting category
##' nodes should be scaled relative to the default.
##' @param cex_pie2axis It is used to adjust the relative size of the pie chart on the coordinate axis.
##' @param cex_label_category Scale of category node label size.
##' @param shadowtext a logical value, whether to use shadow font.
##' @param legend_n number of circle in legend
##' @param label_size_category Base size of node label.
##' @noRd
add_pie_node <- function(p, ID_Cluster_mat, node_label, 
                         cex_category, cex_pie2axis,
                         cex_label_category,
                         shadowtext, legend_n,
                         label_size_category) {
    color <- NULL
    if(ncol(ID_Cluster_mat) > 4) {
        p <- p + ggnewscale::new_scale_fill() + 
            geom_scatterpie(aes_(x=~x,y=~y,r=~radius), data=ID_Cluster_mat,
            cols=colnames(ID_Cluster_mat)[1:(ncol(ID_Cluster_mat)-3)],color=NA) +
            coord_equal() 

        if (node_label == "all" || node_label == "category") {
            p <- add_node_label(p = p, data = NULL, label_size_node = label_size_category,
                cex_label_node = cex_label_category, shadowtext = shadowtext)
        }
        
        p <- p + theme_void() +
            geom_scatterpie_legend(ID_Cluster_mat$radius, 
                x=min(ID_Cluster_mat$x), y=min(ID_Cluster_mat$y),
                n = legend_n,
                labeller=function(x) round(sum(p$data$size) * x^2 / cex_category/ cex_pie2axis)) +
            labs(fill = "Cluster")
    } else {
        title <- colnames(ID_Cluster_mat)[1]
        p <- p + theme_void() + geom_node_point(aes_(color=~color, size=~size))
        if (node_label == "all" || node_label == "category") {
            p <- add_node_label(p = p, data = NULL, label_size_node = label_size_category,
                cex_label_node = cex_label_category, shadowtext = shadowtext)
        }
        p <- p + scale_color_continuous(low="red", high="blue", name = color,
                                        guide=guide_colorbar(reverse=TRUE)) +
            scale_size(range=c(3, 8) * cex_category)  +labs(title= title)  
    }  
    return(p)
}
GuangchuangYu/enrichplot documentation built on Aug. 31, 2021, 7:48 p.m.