pepliner: MS Data Visualization

# suppressPackageStartupMessages(library(tidyverse))
# suppressPackageStartupMessages(library(cowplot))
# suppressPackageStartupMessages(library(lazyeval))


covplot_row <- function(row,elementid){

    # Dummy variable height of each rectangle. Not very important since plots are resized afterwards according to number of rows.
    row$dummy_y <- 20

    #auxiliary function to plot each row of the covplots
    #the x-value for each row is always the length of the protein
    rect <- ggplot2::ggplot(data = row, ggplot2::aes(x=max(row$Length), y = dummy_y)) +
        #get rid of the grey grid in the background set by default in ggplot2
        cowplot::theme_cowplot() +
        #set font size
        ggplot2::theme(axis.text = ggplot2::element_text(size=8)) +
        #start off each plot as a blank element
        ggplot2::geom_blank() +
        #get rid of all axes, titles, ticks and labels
        ggplot2::theme(
            axis.text.y=ggplot2::element_blank(),
            axis.text.x=ggplot2::element_blank(),
            axis.title.x=ggplot2::element_blank(),
            axis.title.y=ggplot2::element_blank(),
            axis.ticks.y=ggplot2::element_blank(),
            axis.ticks.x=ggplot2::element_blank(),
            axis.line.y=ggplot2::element_blank(),
            axis.line.x=ggplot2::element_blank()) +
        #plot a grey rectangle that represents the full protein structure.
        ggplot2::geom_rect(ggplot2::aes(xmin=0, xmax=max(row$Length), ymin=0, ymax=20,fill=I('grey95'))) +
        #plot slightly darker vertical lines that represent intervals of 100 residues.
        ggplot2::geom_vline(xintercept=seq(0, max(row$Length), by=100),color='grey56') +
        #plot orange rectangles that represent the span of each peptide with respect to the full protein sequence, according to the Start and End columns.
        ggplot2::geom_rect(ggplot2::aes(xmin=row$Start, xmax=row$End, ymin=0, ymax=20,fill=I('#f46d43'))) +
        #Label each row according to the peptide being represented: plot a geom_text with the sequence of the peptide (head of the elementid column). Position them according to the length of the row.
        ggplot2::geom_text(ggplot2::aes(label=as.character(utils::head(dplyr::select_(row,elementid)[,1],1))),size=4, position = ggplot2::position_nudge(x=-0.5*(max(row$Length)),y=-10))#as.character(head(row$Peptide,1)))

    #generate a ggplot2 plot grob from the previous ggplot object
    rect <- ggplot2::ggplotGrob(rect)


    #remove unnecessary plot elements
    rect <- cowplot::gtable_remove_grobs(rect, c('title', 'xlab-b', 'ylab-l', 'axis-b','axis-l','spacer'))
    #print(rect)
    #compress unused plot space
    rect <- cowplot::gtable_squash_rows(rect, c(1, 2, 3, 4, 5, 7, 8, 9, 10))
    rect <- cowplot::gtable_squash_cols(rect, c(1, 2, 3, 5, 6, 7))
    return(rect)

    #facet_grid(Peptide ~ .)
}
#' Make a coverage plot for all the peptides retrieved for a single protein.
#'
#' @param input_data Data frame to which the coverage columns will be appended.
#' @param groupid Data frame column (factor) corresponding to the names of the groups of elements in the data.
#' @param group_name (Optional) If specified, make plots from only those rows whose ID matched this argument.
#' @param elementid Data frame column (factor) corresponding to the elements for each of which a row of the final plot will be produced.
#' @param sort_column (Optional) If specified, rows in the final plot will be sorted according to this (numeric) column.
#' @importFrom dplyr select_ group_by_ summarize n filter transmute select
## @import tidyr
#' @importFrom purrr map %>%
#' @importFrom ggplot2 ggplot aes aes_ theme geom_rect geom_vline geom_text geom_blank position_nudge element_blank ggplotGrob element_text xlab xlim
#' @importFrom cowplot gtable_remove_grobs gtable_squash_rows gtable_squash_cols plot_grid ggdraw theme_cowplot
#' @importFrom utils head
## @import lazyeval
#' @return ggplot item to be plotted using cowplot::ggdraw().
## @seealso \code{\link{nchar}} which this function wraps
#' @export
#' @examples
#' library(purrr)
#' test_data <- read.csv(system.file('extdata/msdata.csv',package='pepliner'))
#' sequences <- system.file('extdata/proteome.fasta',package='pepliner')
#' cov_columns(test_data,sequences,groupid='ID',elementid='Peptide') %>%
#' covplot('Peptide','ID','sp|P55036|PSMD4_HUMAN') %>%
#' cowplot::ggdraw()

#default sort column is Start
covplot <- function(input_data,elementid,groupid='',group_name='',sort_column='Start'){

    #test if there are elements that appear under different groups.
    uniqueness_test <- input_data %>% dplyr::group_by_(elementid, groupid) %>%
    dplyr::summarize(groupsize = n()) %>%
    dplyr::filter(groupsize > 1)
    if(nrow(uniqueness_test) > 0){
        cat(paste0("Warning, ", nrow(uniqueness_test), " elements are not unique to groups.\nEx. peptides match multiple proteins.\n"))
    }

    #pre-filter columns that pertain to the plot
    pre_data <- input_data[colnames(input_data)%in%c(groupid,elementid,'Start','End','Length')]

    #if there is a specified group name but no ID column, stop running and show error.
    if(groupid=='' & group_name!=''){
        stop('Group ID column required to filter by group.')
    }
    #if there is a specified group Id column and a specified group name, filter data that whose Id value is equal to group_name
    if(groupid!=''&group_name!=''){
        data_group <- pre_data[pre_data[colnames(pre_data)==groupid]==group_name,] #not elegant, but hey! {base}
    #otherwise, don't do anything
    }else{
        data_group <- pre_data
    }
    #unnecessary step to be purged. Pending.
    cov_data <- data_group

    #Feed peptides into cov_row function
    cov_data = droplevels(cov_data)

    #reorder elementid column by sort_column. Default: Start.
    cov_data[,colnames(cov_data)==elementid] <- stats::reorder(cov_data[,colnames(cov_data)==elementid],cov_data[,colnames(cov_data)==sort_column])
    #Feed peptides into covplot_row function

    cov_data %>% split(select_(cov_data,elementid)[,1]) %>% purrr::map(covplot_row,elementid=elementid) -> cov_list

    start_end <- dplyr::select(cov_data,c('Start','End'))
    start_end<- dplyr::transmute(start_end,Interval=paste0((as.character(Start)),':',as.character(End)))
    interval_list <- purrr::map(start_end$Interval,.f=function(x){eval(parse(text=x))})
    cov_rate <- Reduce(union,interval_list) %>% length()/max(cov_data$Length) %/% 100#plot(y=rep(1,360))


    xaxis_plot <- ggplot2::ggplot(data=cov_data, ggplot2::aes(x=max(cov_data$Length), y = 20)) +
        cowplot::theme_cowplot() +
        ggplot2::geom_blank() +
        ggplot2::xlim(-2,max(cov_data[cov_data[colnames(cov_data)==groupid]==group_name,]$Length)) +
        ggplot2::xlab(paste0('Coverage: ',cov_rate,'%')) +
        # #get rid of all axes, titles, ticks and labels
        ggplot2::theme(
            axis.text.y=ggplot2::element_blank(),
            axis.title.y=ggplot2::element_blank(),
            axis.ticks.y=ggplot2::element_blank(),
            axis.line.y=ggplot2::element_blank())

    xaxis_plot <- ggplot2::ggplotGrob(xaxis_plot)
    xaxis_plot <- cowplot::gtable_remove_grobs(xaxis_plot, c('title', 'axis-l','spacer','axis-r','axis-t','panel','xlab-t','subtitle','caption','ylab-l','ylab-r'))
    #xaxis_plot <- cowplot::gtable_squash_rows(xaxis_plot, c(1, 2, 3, 4, 5, 7, 8, 9, 10))
    xaxis_plot <- cowplot::gtable_squash_cols(xaxis_plot, c(1, 2, 3, 5, 6, 7, 8, 9, 10))
    xaxis_plot <- cowplot::gtable_squash_cols(xaxis_plot, c(1, 2, 3, 5, 6, 7,8,9,10))

    #Aggregate ggplot objects
    clusterplot <- cowplot::plot_grid(plotlist = cov_list, ncol=1, align = "v")
    cowplot::plot_grid(clusterplot,xaxis_plot,align='h',nrow=2,rel_heights = c(25,1))


}