R/cons.R
In ggmsa: Plot Multiple Sequence Alignment using 'ggplot2'

Documented in tidy_color

##' cleaning the needless sequeces' color according to the consensus sequcence (only used in the consensus views).
##'
##' @param y a data frame, sequence alignment with specified color.
##' @param consensus the consensus sequence which can be called by get_consensus().
##' @param disagreement a logical value. Displays characters that disagreememt to consensus(excludes ambiguous disagreements).
##' @param ref a character string. Specifying the reference sequence which should be one of input sequences when 'consensus_views' is TRUE.
##' @keywords tidy_color
tidy_color <- function(y, consensus, disagreement, ref) {
    c <- lapply(unique(y$position), function(i) {
        msa_cloumn <- y[y$position == i, ]
        if(!is.null(ref)) {
            msa_cloumn <- msa_cloumn[!msa_cloumn$name == ref, ]
        }
        cons_char <- consensus[consensus$position == i, "character"] #Get consensus char.
        logic <- msa_cloumn$character == cons_char #Compare the characters of the current position(i) to the consensus char.
        #Cleaning colors according to the 'logic'.
        if(cons_char == "X") {
            msa_cloumn$color <- NA
        }
        if(disagreement){
            msa_cloumn[logic, "color"] <- NA
        }else{
            msa_cloumn[!logic, "color"] <- NA
        }
        msa_cloumn
    }) %>% do.call("rbind", .)
    return(c)
}

##' calling the consensus sequence.
##'
##' @param tidy sequence alignment with data frame, generated by tidy_msa().
##' @param ignore_gaps a logical value. When selected TRUE, gaps in column are treated as if that row didn't exist.
##' @param ref a character string. Specifying the reference sequence which should be one of input sequences when 'consensus_views' is TRUE.
##' @keywords get_consensus
get_consensus <- function(tidy, ignore_gaps = FALSE, ref = NULL) {
    if(!is.null(ref)) {
        if(ignore_gaps) {
            warning("The argument 'ignore_gaps' is invalid when 'ref' is specified!")
        }
        ref <- match.arg(ref, levels(tidy$name))
        cons <- tidy[tidy$name == ref,]
        return(cons)
    }
    cons <- lapply(unique(tidy$position), function(i) { #Iterate through each columns
        msa_cloumn <- tidy[tidy$position == i, ]
        cons <- data.frame(position = i)
        if(ignore_gaps) {
            msa_cloumn <- msa_cloumn[!msa_cloumn$character %in% "-",]
        }
        #Gets the highest frequency characters
        fre <- table(msa_cloumn$character) %>% data.frame
        max_element <- fre[fre[2] == max(fre[2]),]
        max_number <-  max_element %>% nrow
        if(max_number == 1) {
            cons$character <- max_element[1,1]
        }else {
            cons$character <- "X"
        }
        cons
        }) %>% do.call("rbind", .)

        cons$name = "Consensus"
        cons$character <- as.character(cons$character) #debug 'as.character'
        return(cons)
}


order_name <- function(name, order = NULL, consensus_views = FALSE, ref = NULL) {
    name_uni <- unique(name)
    if(is.null(ref)){
        #placed 'consensus' at the top
        name_expect <- name_uni[!name_uni %in% "Consensus"] %>% rev %>% as.character
        name <- factor(name, levels = c(name_expect, "Consensus"))
    }else {
        name_expect <- name_uni[!name_uni %in% ref] %>% rev %>% as.character
        name <- factor(name, levels = c(name_expect, ref))
    }

    # #adjust the msa order according to 'order'
    # if(!is.null(order)) {
    #     if(!length(name_uni) == length(order)) {
    #     stop("The 'order' length does not match the number of names")
    #   }
    #   name_levels <- levels(name)[order]
    #   name <- factor(name, levels = name_levels)
    # }
    return(name)
}