R/cons.R

Defines functions tidy_color

##' cleaning the needless sequences' color according to the 
##' consensus sequence (only used in the consensus views).
##'
##' @param y a data frame, sequence alignment with specified color.
##' @param consensus the consensus sequence which can be called by 
##' get_consensus().
##' @param disagreement a logical value. Displays characters that 
##' disagreement to consensus(excludes ambiguous disagreements).
##' @param ref a character string. Specifying the reference sequence
##'  which should be one of input sequences when 'consensus_views' is TRUE.
##' @keywords tidy_color
##' @noRd
tidy_color <- function(y, consensus, disagreement, ref) {
    c <- lapply(unique(y$position), function(i) {
        msa_cloumn <- y[y$position == i, ]
        if(!is.null(ref)) {
            if ('label' %in% names(msa_cloumn)) { ##work for ggtreeExtra
                msa_cloumn <- msa_cloumn[!msa_cloumn$label == ref, ]
            }else{
                msa_cloumn <- msa_cloumn[!msa_cloumn$name == ref, ]
                }
           
        }
        #Get consensus char.
        cons_char <- consensus[consensus$position == i, "character"] 
        
        #Compare the characters of the current position(i) 
        #to the consensus char.
        logic <- msa_cloumn$character == cons_char 
        #Cleaning colors according to the 'logic'.
        if(cons_char == "X") {
            msa_cloumn$color <- NA
        }
        if(disagreement){
            msa_cloumn[logic, "color"] <- NA
        }else{
            msa_cloumn[!logic, "color"] <- NA
        }
        msa_cloumn
    }) %>% do.call("rbind", .)
    return(c)
}

##' calling the consensus sequence.
##'
##' @param tidy sequence alignment with data frame, generated by tidy_msa().
##' @param ignore_gaps a logical value. When selected TRUE, gaps in 
##' column are treated as if that row didn't exist.
##' @param ref a character string. Specifying the reference sequence 
##' which should be one of input sequences when 'consensus_views' is TRUE.
##' @keywords get_consensus
##' @noRd
get_consensus <- function(tidy, ignore_gaps = FALSE, ref = NULL) {
    if(!is.null(ref)) {
        if(ignore_gaps) {
            warning("The argument 'ignore_gaps' is 
                    invalid when 'ref' is specified!")
        }
        if ('label' %in% names(tidy)) { ##work for ggtreeExtra
            ref <- match.arg(ref, levels(factor(tidy$label)))
            cons <- tidy[tidy$label == ref,]
        }else {
            ref <- match.arg(ref, levels(tidy$name))
            cons <- tidy[tidy$name == ref,]
        }
        return(cons)
    }
    #Iterate through each columns
    cons <- lapply(unique(tidy$position), function(i) { 
        msa_cloumn <- tidy[tidy$position == i, ]
        cons <- data.frame(position = i)
        if(ignore_gaps) {
            msa_cloumn <- msa_cloumn[!msa_cloumn$character %in% "-",]
        }
        #Gets the highest frequency characters
        fre <- table(msa_cloumn$character) %>% data.frame
        max_element <- fre[fre[2] == max(fre[2]),]
        max_number <-  max_element %>% nrow
        if(max_number == 1) {
            cons$character <- max_element[1,1]
        }else {
            cons$character <- "X"
        }
        cons
        }) %>% do.call("rbind", .)

        cons$name = "Consensus"
        cons$character <- as.character(cons$character) #debug 'as.character'
        return(cons)
}


order_name <- function(name, order = NULL, 
                       consensus_views = FALSE,
                       ref = NULL) {
    name_uni <- unique(name)
    if(is.null(ref)){
        #placed 'consensus' at the top
        name_expect <- name_uni[!name_uni %in% "Consensus"] %>%
            rev %>% 
            as.character
        name <- factor(name, levels = c(name_expect, "Consensus"))
    }else {
        name_expect <- name_uni[!name_uni %in% ref] %>%
            rev %>%
            as.character
        name <- factor(name, levels = c(name_expect, ref))
    }

    return(name)
}
YuLab-SMU/ggmsa documentation built on Aug. 26, 2022, 1:48 a.m.