R/geom_msa.R

Defines functions geom_msa

Documented in geom_msa

##' Multiple sequence alignment layer for ggplot2. It creats background tiles with/without sequence characters.
##'
##' @title geom_msa
##' @param data sequence alignment with data frame, generated by tidy_msa().
##' @param font font families, possible values are 'helvetical', 'mono', and 'DroidSansMono', 'TimesNewRoman'. Defaults is 'helvetical'.
##' @param mapping aes mapping
##' If font = NULL, only plot the background tile.
##' @param color A Color scheme. One of 'Clustal', 'Chemistry_AA', 'Shapely_AA', 'Zappo_AA', 'Taylor_AA', 'LETTER','CN6',, 'Chemistry_NT', 'Shapely_NT', 'Zappo_NT', 'Taylor_NT'. Defaults is 'Chemistry_AA'.
##' @param custom_color A data frame with two cloumn called "names" and "color".Customize the color scheme.
##' @param char_width a numeric vector. Specifying the character width in the range of 0 to 1. Defaults is 0.9.
##' @param by_conservation a logical value. The most conserved regions have the brightest colors.
##' @param none_bg a logical value indicating whether background should be disaplayed. Defaults is FALSE.
##' @param posHighligthed A numeric vector of the position that need to be highlighted.
##' @param seq_name a logical value indicating whether seqence names should be displayed. Defaults is 'NULL' which indicates that the sequence name is displayed when 'font = null', but 'font = char' will not be displayed. If 'seq_name = TRUE' the sequence name will be displayed in any case. If 'seq_name = FALSE' the sequence name will not be displayed under any circumstances.
##' @param border a character string. The border color.
##' @param consensus_views a logical value that opeaning consensus views.
##' @param use_dot a logical value. Displays characters as dots instead of fading their color in the consensus view.
##' @param disagreement a logical value. Displays characters that disagreememt to consensus(excludes ambiguous disagreements).
##' @param ignore_gaps a logical value. When selected TRUE, gaps in column are treated as if that row didn't exist.
##' @param ref a character string. Specifying the reference sequence which should be one of input sequences when 'consensus_views' is TRUE.
##' @param position Position adjustment, either as a string, or the result of a call to a position adjustment function,
##' default is 'identity' meaning 'position_identity()'.
##' @param show.legend logical. Should this layer be included in the legends?
##' @param ... additional parameter
##' @return A list
##' @importFrom ggplot2 scale_fill_manual
##' @importFrom utils modifyList
##' @export
##' @author Guangchuang Yu, Lang Zhou
geom_msa <- function(data, font = "helvetical",
                     mapping = NULL,
                     color = "Chemistry_AA",
                     custom_color = NULL,
                     char_width = 0.9,
                     none_bg = FALSE,
                     by_conservation = FALSE,
                     posHighligthed = NULL,
                     seq_name = NULL,
                     border = NULL,
                     consensus_views = FALSE,
                     use_dot = FALSE,
                     disagreement = TRUE,
                     ignore_gaps = FALSE,
                     ref = NULL,
                     position = "identity",
                     show.legend = FALSE,
                     ... ) {

    data <- msa_data(data,
                     font = font,
                     color = color,
                     custom_color = custom_color,
                     char_width = char_width,
                     by_conservation = by_conservation,
                     consensus_views  = consensus_views,
                     use_dot = use_dot,
                     disagreement = disagreement,
                     ignore_gaps = ignore_gaps,
                     ref = ref)

    #legend work
    xx <- data[,c("character","color")] %>% unique()
    xx <- xx[!is.na(xx$color),]
    labs <- lapply(unique(xx$color) %>% seq_along, function(i) {
        cols <- unique(xx$color)[i]
        dup_char <- xx[xx$color == cols, "character"]
        lab <- paste0(dup_char, collapse = ",")
    }) %>% do.call("rbind",.) %>% as.vector()

    cols <- xx$color %>% unique()
    names(cols) <- cols
    sacle_tile_cols <- scale_fill_manual(values = cols, breaks = cols, labels = labs)


    bg_data <- data

    #work to ggtreeExtra
    if(is.null(mapping)) {
        mapping <- aes_(x = ~position, y = ~name, fill = ~I(color))
    }

    #'seq_name' work
    if  (!isTRUE(seq_name)) {
        if ('y' %in% colnames(data) | isFALSE(seq_name) ) {
            y <- as.numeric(bg_data$name)
            mapping <- modifyList(mapping, aes_(y = ~y))
        }
    }

    #'posHighligthed' work
    if (!is.null(posHighligthed)) {
        none_bg = TRUE
        bg_data <- bg_data[bg_data$position %in% posHighligthed,]
        bg_data$postion <- as.factor(bg_data$position)
        mapping <- modifyList(mapping, aes_(x = ~position, fill = ~color, width = 1))
    }

    #'border' work
    if(is.null(border)){
        ly_bg <- geom_tile(mapping = mapping, data = bg_data, color = 'grey', inherit.aes = FALSE, position = position, show.legend = show.legend)
    }else{
        ly_bg <- geom_tile(mapping = mapping, data = bg_data, color = border, inherit.aes = FALSE, position = position, show.legend = show.legend)
    }

    if (!all(c("yy", "order", "group") %in% colnames(data))) {
        return(list(ly_bg, sacle_tile_cols))
    }

    if ('y' %in% colnames(data)) {
        data$yy = data$yy - as.numeric(data$name) + data$y
    }

    label_mapping <- aes_(x = ~x, y = ~yy, group = ~group)

    # use_dot work
    if (consensus_views && !use_dot) {
        if(show.legend) {
            stop("legends catn't be shown in the consensus view!")
        }
        label_mapping <- modifyList(label_mapping, aes_(fill = ~I(font_color)))
    }
    ly_label <- geom_polygon(mapping = label_mapping, data = data, inherit.aes = FALSE, position = position)

    #'none_bg' work
    if (none_bg & is.null(posHighligthed)) {
        return(ly_label)
    }

    if(consensus_views) {
        return(list(ly_bg, ly_label))
    }else {
        return(list(ly_bg, ly_label, sacle_tile_cols))
    }

}

Try the ggmsa package in your browser

Any scripts or data that you put into this service are public.

ggmsa documentation built on Aug. 3, 2021, 9:06 a.m.