ChromSCape: Analysis of single-cell epigenomics datasets with a Shiny App

Documented in col2hex colors_scExp get_color_dataframe_from_input gg_fill_hue plot_cluster_consensus_scExp plot_correlation_PCA_scExp plot_coverage_BigWig plot_differential_summary_scExp plot_differential_volcano_scExp plot_distribution_scExp plot_heatmap_scExp plot_inter_correlation_scExp plot_intra_correlation_scExp plot_most_contributing_features plot_percent_active_feature_scExp plot_pie_most_contributing_chr plot_reduced_dim_scExp plot_top_TF_scExp plot_violin_feature_scExp retrieve_top_bot_features_pca warning_plot_reduced_dim_scExp

## Authors : Pacôme Prompsy, Celine Vallot
##Title : Wrappers & function to create variety of plot
## to uncover heterogeneity in single cell Dataset

#' Plotting distribution of signal
#'
#' @param scExp A SingleCellExperiment Object
#' @param raw Use raw counts ?
#' @param log10 Transform using log10 ?
#' @param pseudo_counts Pseudo-count to add if using log10 
#' @param bins Number of bins in the histogram
#'
#' @return A ggplot histogram representing the distribution of count per cell
#' @export
#'
#' 
#' @importFrom  ggplot2 ggplot
#' @importFrom Matrix colSums
#' @importFrom SummarizedExperiment assayNames
#' 
#' @examples
#' data("scExp")
#' plot_distribution_scExp(scExp)
plot_distribution_scExp <- function(
    scExp, raw = TRUE, log10 = FALSE, pseudo_counts = 1, bins = 150)
{
    
    stopifnot(is(scExp, "SingleCellExperiment"), is.numeric(pseudo_counts))
    if (!raw %in% c(TRUE, FALSE) | !log10 %in% c(TRUE, FALSE)) 
        stop(paste0("ChromSCape::plot_distribution_scExp - raw and log10 must ",
                    "be true or false."))
    if (raw == FALSE && !("normcounts" %in%
                          SummarizedExperiment::assayNames(scExp))) 
        stop(paste0("ChromSCape::plot_distribution_scExp - If raw is false, ",
                    "normcounts must not be empty - run normalize_scExp first."))
    
    if (raw) 
        cell_cov_df = data.frame(
            "coverageByCell" = Matrix::colSums(counts(scExp)))
    else cell_cov_df = data.frame(
        "coverageByCell" = Matrix::colSums(normcounts(scExp)))
    
    if (log10) 
        cell_cov_df$coverageByCell = log10(
            cell_cov_df$coverageByCell + pseudo_counts)
    
    ggplot(cell_cov_df, aes(x = .data$coverageByCell)) + 
        geom_histogram(color = "black", fill = "steelblue", bins = bins) +
        labs(x = "read coverageByCell per cell") + 
        theme(panel.grid.major = element_blank(), panel.grid.minor = 
                  element_blank(), panel.background = element_blank(),
              axis.line = element_line(colour = "black"), 
              panel.border = element_rect(colour = "black", fill = NA))
}

#' Adding colors to cells & features
#'
#' @param scExp A SingleCellExperiment Object
#' @param annotCol Column names to color
#' @param color_by If specifying color_df, column names to color
#' @param color_df Color data.frame to specify which color for which condition
#'
#' @return A SingleCellExperiment with additionnal "color" columns in colData
#' @export
#'
#' @importFrom SingleCellExperiment colData
#' @importFrom SummarizedExperiment colData
#' 
#' @examples
#' data("scExp")
#' scExp = colors_scExp(scExp,annotCol = c("sample_id",
#' "total_counts"),
#'  color_by =  c("sample_id","total_counts"))
#' 
#' #Specific colors using a manually created data.frame :
#' color_df = data.frame(sample_id=unique(scExp$sample_id),
#'  sample_id_color=c("red","blue","green","yellow"))
#' scExp = colors_scExp(scExp,annotCol="sample_id",
#' color_by="sample_id",color_df=color_df)
#' 
colors_scExp <- function(
    scExp, annotCol = "sample_id", color_by = "sample_id", color_df = NULL)
{
    stopifnot(is(scExp, "SingleCellExperiment"),
              is.character(annotCol), is.character(color_by))
    annot = as.data.frame(SingleCellExperiment::colData(scExp))
    anocol <- annotToCol2(annotS = annot[, annotCol, drop = FALSE],
                          annotT = annot, 
                          plotLegend = FALSE, categCol = NULL)
    SummarizedExperiment::colData(
        scExp)[, paste0(annotCol, "_color")] = as.data.frame(anocol, 
                                                             stringsAsFactors = FALSE)[, annotCol]  # factor or not ?
    if (!is.null(color_df))
    {
        # add custom colors
        if (!color_by %in% colnames(color_df)) 
            stop(paste0("ChromSCape::colors_scExp - color_by must be present ",
                        "in colnames of color_df is not null."))
        
        SummarizedExperiment::colData(scExp)[, paste0(annotCol, "_color")] = 
            color_df[match(SingleCellExperiment::colData(scExp)[, color_by],
                           color_df[, color_by]), paste0(color_by, "_color"),
                     drop = FALSE]
    }
    
    # same colors for cell_cluster & cluster + 'feature'
    if(any(grepl("cluster_", annotCol))){
        SummarizedExperiment::colData(scExp)[, paste0("cell_cluster_color")] =
            SummarizedExperiment::colData(scExp)[, paste0("cluster_",mainExpName(scExp),"_color")]
    }
    if(any(grepl("cell_cluster", annotCol))){
        SummarizedExperiment::colData(scExp)[, paste0("cluster_",mainExpName(scExp),"_color")] =
            SummarizedExperiment::colData(scExp)[, "cell_cluster_color"]
    }
    
    
    return(scExp)
}

#' Get color dataframe from shiny::colorInput
#'
#' @param input Shiny input object
#' @param levels_selected Names of the features
#' @param color_by Which feature color to retrieve
#' @param input_id_prefix Prefix in front of the feature names
#'
#' @return A data.frame with the feature levels and the colors of each level of
#'   this feature.
#'
#' @importFrom tibble rownames_to_column
#'   
get_color_dataframe_from_input <- function(
    input, levels_selected, color_by = c("sample_id", "total_counts"),
    input_id_prefix = "color_")
{
    stopifnot(!is.null(input), is.character(levels_selected),
              is.character(color_by))
    
    color_list <- paste0(
        "list(", paste0(levels_selected, " = input$", input_id_prefix, 
                        levels_selected, collapse = ", "), ")")
    
    color_list <- eval(parse(text = color_list))
    # Transform into dataframe with right column names
    color_df = as.matrix(color_list) %>% as.data.frame(
        stringsAsFactors = FALSE) %>% tibble::rownames_to_column(color_by)
    color_df[, 2] = as.character(color_df[, 2])
    colnames(color_df)[2] = paste0(color_by, "_color")
    
    return(color_df)
}

# Wrapper for plotting PCA & TSNE & UMAP
#' Plot reduced dimensions (PCA, TSNE, UMAP)
#'
#' @param scExp A SingleCellExperiment Object
#' @param color_by Character of eature used for coloration. Can be cell 
#' metadata ('total_counts', 'sample_id', ...) or a gene name.
#' @param reduced_dim Reduced Dimension used for plotting
#' @param select_x Which variable to select for x axis
#' @param select_y Which variable to select for y axis
#' @param downsample Number of cells to downsample
#' @param transparency Alpha parameter, between 0 and 1
#' @param max_distanceToTSS The maximum distance to TSS to consider a gene 
#' linked to a region. Used only if "color_by" is a gene name.
#' @param size Size of the points.
#' @param annotate_clusters A logical indicating if clusters should be labelled.
#' The 'cell_cluster' column should be present in metadata.
#' @param min_quantile The lower threshold to remove outlier cells, 
#' as quantile of cell embeddings (between 0 and 0.5).
#' @param max_quantile The upper threshold to remove outlier cells, 
#' as quantile of cell embeddings (between 0.5 and 1).
#'  
#' @return A ggplot geom_point plot of reduced dimension 2D reprensentation 
#' @export
#'
#' @importFrom SingleCellExperiment colData reducedDim normcounts
#' @importFrom ggplot2 ggplot geom_point labs theme element_blank element_line
#' element_rect scale_color_gradientn geom_label aes
#' @importFrom dplyr group_by filter slice_min mutate 
#' @importFrom tidyr separate_rows
#' @importFrom tibble rownames_to_column
#' @importFrom colorRamps matlab.like
#' 
#' @examples
#' data("scExp")
#' plot_reduced_dim_scExp(scExp, color_by = "sample_id")
#' plot_reduced_dim_scExp(scExp, color_by = "total_counts")
#' plot_reduced_dim_scExp(scExp, reduced_dim = "UMAP")
#' plot_reduced_dim_scExp(scExp, color_by = "CD52",  reduced_dim = "UMAP")
#' 
plot_reduced_dim_scExp <- function(
    scExp, color_by = "sample_id", reduced_dim = c("PCA", "TSNE", "UMAP"),
    select_x = NULL, select_y = NULL, downsample = 5000, 
    transparency = 0.6,  size = 1, max_distanceToTSS = 1000,
    annotate_clusters = "cell_cluster" %in% colnames(colData(scExp)),
    min_quantile = 0.01, max_quantile = 0.99)
{
  if (ncol(scExp) > downsample) 
    scExp = scExp[, sample(ncol(scExp), downsample, replace = FALSE)]
  annot = SingleCellExperiment::colData(scExp)
  
  if (!color_by %in% colnames(annot)) {
    annot_feature = as.data.frame(SummarizedExperiment::rowRanges(scExp))
    annot_feature = annot_feature[grep(color_by, annot_feature$Gene),]
    if(nrow(annot_feature) == 0)
      stop("ChromSCape::plot_reduced_dim_scExp - The gene chosen, ", 
           color_by, ", is not closer than ", max_distanceToTSS, 
           " to any", "loci. Consider increasing max_distanceToTSS to take in account ", 
           "this gene.")
    annot_feature = annot_feature %>% tidyr::separate_rows(.data[["Gene"]], 
                                                           sep = ", ") %>% dplyr::group_by(.data[["Gene"]]) %>% 
      dplyr::slice_min(.data[["distanceToTSS"]], with_ties = TRUE)
    annot_feature = annot_feature %>% dplyr::filter(.data[["distanceToTSS"]] < 
                                                      max_distanceToTSS)
    if (!color_by %in% annot_feature$Gene) 
      stop("ChromSCape::plot_reduced_dim_scExp - The gene chosen, ", 
           color_by, ", is not closer than ", max_distanceToTSS, 
           " to any", "loci. Consider increasing max_distanceToTSS to take in account ", 
           "this gene.")
    counts = SingleCellExperiment::normcounts(scExp[annot_feature$ID[which(annot_feature$Gene == 
                                                                             color_by)], ])
    if (!is.null(counts) & nrow(counts) > 1) {
      counts = Matrix::colSums(counts)
    }
    annot[, color_by] = as.numeric(counts)
  } else if (!paste0(color_by, "_color") %in% colnames(annot)) {
    scExp = colors_scExp(scExp, annotCol = color_by)
    annot = SingleCellExperiment::colData(scExp)
  }
  plot_df = as.data.frame(cbind(SingleCellExperiment::reducedDim(scExp, 
                                                                 reduced_dim[1]), annot))
  if (is.null(select_x)) 
    select_x = colnames(plot_df)[1]
  if (is.null(select_y)) 
    select_y = colnames(plot_df)[2]
  plot_df = plot_df %>% dplyr::mutate(transparency = transparency) %>% 
    dplyr::mutate(transparency = base::I(.data[["transparency"]]))
  plot_df = plot_df %>% dplyr::filter(.data[[select_x]] > quantile(plot_df[, 
                                                                           select_x], min_quantile), .data[[select_x]] < quantile(plot_df[, 
                                                                                                                                          select_x], max_quantile))
  plot_df = plot_df %>% dplyr::filter(.data[[select_y]] > quantile(plot_df[, 
                                                                           select_y], min_quantile), .data[[select_y]] < quantile(plot_df[, 
                                                                                                                                          select_y], max_quantile))
  annot = annot[match(rownames(plot_df), rownames(annot)), 
  ]
  if (!color_by %in% colnames(SingleCellExperiment::colData(scExp)) | 
      is.numeric(annot[, color_by])) {
    plot_df = plot_df %>% dplyr::mutate(transparency = ifelse(annot[, 
                                                                    color_by] == min(annot[, color_by]), 0.15, 1)) %>% 
      dplyr::mutate(transparency = I(.data[["transparency"]]))
  }
  p <- ggplot(plot_df, aes_string(x = select_x, y = select_y)) + 
    geom_point(size = size, aes(alpha = plot_df[, "transparency"], 
                                color = annot[, color_by])) + labs(color = color_by) + 
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), 
          panel.background = element_blank(), axis.line = element_line(colour = "black"), 
          panel.border = element_rect(colour = "black", fill = NA))
  if (is.numeric(annot[, color_by])) {
    p <- p + scale_color_gradientn(colours = c("grey75", 
                                                       rev(viridis::inferno(100))))
  } else {
    cols = unique(as.character(annot[, paste0(color_by, "_color")]))
    names(cols) = unique(as.character(annot[, color_by]))
    p <- p + scale_color_manual(values = cols)
  }
  if (annotate_clusters) {
    centroids = as.data.frame(t(sapply(unique(annot$cell_cluster), 
                                       function(clust) {
                                         cells = annot$cell_id[which(annot$cell_cluster == 
                                                                       clust)]
                                         centroid = c(mean(plot_df[cells, select_x]), 
                                                      mean(plot_df[cells, select_y]))
                                         return(centroid)
                                       }))) %>% tibble::rownames_to_column("cluster")
    p <- p + geom_label(data = centroids, aes(x = V1, y = V2, 
                                              label = cluster), size = 4, label.padding = unit(0.15, 
                                                                                               "lines"), label.size = 0.2)
  }
  
  return(p)
}

#' A warning helper for plot_reduced_dim_scExp
#'
#' @param scExp A SingleCellExperiment Object
#' @param color_by Feature used for coloration
#' @param reduced_dim Reduced Dimension used for plotting
#' @param downsample Number of cells to downsample
#' @param transparency Alpha parameter, between 0 and 1
#' @param size Size of the points.
#' @param max_distanceToTSS Numeric. Maximum distance to a gene's TSS to consider
#' a region linked to a gene. 
#' @param annotate_clusters A logical indicating if clusters should be labelled.
#' The 'cell_cluster' column should be present in metadata.
#' @param min_quantile The lower threshold to remove outlier cells, 
#' as quantile of cell embeddings (between 0 and 0.5).
#' @param max_quantile The upper threshold to remove outlier cells, 
#' as quantile of cell embeddings (between 0.5 and 1).
#' 
#' @return Warning or errors if the inputs are not correct
#' 
warning_plot_reduced_dim_scExp <- function(scExp, color_by , reduced_dim,
                                           downsample,
                                           transparency, size, max_distanceToTSS,
                                           annotate_clusters,
                                           min_quantile, max_quantile){
    stopifnot(is(scExp, "SingleCellExperiment"), is.character(color_by),
              is.character(reduced_dim), is.numeric(downsample),
              is.numeric(transparency), is.numeric(size),
              is.logical(annotate_clusters), is.numeric(max_distanceToTSS),
              is.numeric(min_quantile), is.numeric(max_quantile))
    if (!reduced_dim[1] %in% SingleCellExperiment::reducedDimNames(scExp)) 
        stop(paste0("ChromSCape::plot_reduced_dim_scExp - ", reduced_dim[1],
                    " is not present in object, please run normalize_scExp ",
                    "first."))
    if (!color_by %in% colnames(SingleCellExperiment::colData(scExp))) {
        genes = unique(
            unlist(strsplit(SummarizedExperiment::rowRanges(scExp)$Gene,
                                split = ", ", fixed=TRUE)))
        if(!color_by %in% genes){
            stop("ChromSCape::plot_reduced_dim_scExp - color_by must be a ",
        "character vector present in colnames of colData(scExp) or be a gene ",
                        "name present in the rowRanges(scExp) after calling ",
                        "feature_annotation_scExp() function.")
        } 
    }
}

#' Plotting correlation of PCs with a variable of interest
#'
#' @param scExp A SingleCellExperiment Object
#' @param correlation_var A string specifying with which numeric variable from
#' colData of scExp to calculate and plot the correlation of each PC with. 
#' ('total_counts')
#' @param color_by A string specifying with which categorical variable to
#'  color the plot. ('NULL')
#' @param topPC An integer specifying the number of PCs to plot correlation
#' with 10
#'
#' @return A ggplot histogram representing the distribution of count per cell
#' @export
#'
#' 
#' @importFrom  ggplot2 ggplot
#' @importFrom Matrix colSums
#' @importFrom SingleCellExperiment reducedDimNames reducedDim colData
#' 
#' @examples
#' data("scExp")
#' plot_correlation_PCA_scExp(scExp, topPC = 25)
#' plot_correlation_PCA_scExp(scExp, color_by = "cell_cluster")
#' plot_correlation_PCA_scExp(scExp, color_by = "sample_id")
plot_correlation_PCA_scExp <- function(
    scExp, correlation_var = 'total_counts', color_by = NULL, topPC = 10)
{
    
    stopifnot(is(scExp, "SingleCellExperiment"), is.numeric(topPC))
    if (!("PCA" %in% SingleCellExperiment::reducedDimNames(scExp))) 
        stop(paste0("ChromSCape::plot_correlation_PCA_scExp -",
                    "'PCA' must not be empty - run reduce_dims_scExp() first."))
    
    if (!(correlation_var %in% colnames(SingleCellExperiment::colData(scExp))))
        stop(paste0("ChromSCape::plot_correlation_PCA_scExp - correlation_var ",
                    "is not present in scExp colData."))
    
    if (!is.null(color_by)  && !(color_by %in% colnames(SingleCellExperiment::colData(scExp))))
        stop(paste0("ChromSCape::plot_correlation_PCA_scExp - color_by ",
                    "is not present in scExp colData."))
    
    annot = as.data.frame(SingleCellExperiment::colData(scExp))
    
    if (!is.numeric(annot[,correlation_var]))
        stop(paste0("ChromSCape::plot_correlation_PCA_scExp - If raw is false, ",
                    "normcounts must not be empty - run normalize_scExp first."))

    pca = SingleCellExperiment::reducedDim(scExp,"PCA")
    

    if(!is.null(color_by)) {
        color_by_vector = unique(annot[,color_by])
        correlation = sapply(seq_along(color_by_vector), function(group) {
            pca. = pca[which(annot[,color_by] %in% color_by_vector[group]),]
            annot. = annot[which( annot[,color_by] %in% color_by_vector[group]),]
            cormat = apply(pca., 2, function(i){
                cor(i, annot.[,correlation_var], method = "pearson")
            })
        })
        df = as.data.frame(correlation)
        colnames(df) = color_by_vector
        df$Component = forcats::as_factor(rownames(df))
        
        df = df %>% tidyr::gather(-Component, key = "color_by_vector",
                                  value = "correlation")
        df = df[which(df$Component %in% df$Component[seq_len(topPC)]),]
        p = df %>% ggplot(aes(x = .data[["Component"]] ,
                                           y = .data[["correlation"]],
                                           fill = .data[["color_by_vector"]])) + 
        geom_bar(position = position_dodge2(), stat = "identity") + 
        xlab("") + ylab(paste0("Pearson's correlation with '", correlation_var,"'")) +
            labs(fill=color_by) + 
        theme(panel.grid.major = element_blank(), panel.grid.minor = 
                  element_blank(), panel.background = element_blank(),
              axis.line = element_line(colour = "black"), 
              panel.border = element_rect(colour = "black", fill = NA),
                  axis.text.x = element_text(angle = 90, vjust = 0.5)) 
    } else {
        color_by_vector = unique(annot[,color_by])
        correlation = apply(pca, 2, function(i){
            cor(i, annot[,correlation_var], method = "pearson")
        })
        df = as.data.frame(correlation)
        df$Component = forcats::as_factor(rownames(df))
        
        p = utils::head(df, topPC) %>% ggplot(aes(x = .data[["Component"]] , y = .data[["correlation"]])) + 
            geom_bar(color = "black", fill = "steelblue", stat = "identity") +
            xlab("") + ylab(paste0("Pearson's correlation with '", correlation_var,"'")) +
            theme(panel.grid.major = element_blank(), panel.grid.minor = 
                      element_blank(), panel.background = element_blank(),
                  axis.line = element_line(colour = "black"), 
                  panel.border = element_rect(colour = "black", fill = NA),
                  axis.text.x = element_text(angle = 90, vjust = 0.5))
    
        }
    return(p)
}

#' Plot Top/Bottom most contributing features to PCA
#'
#' @details 
#' If a gene TSS is within 10,000bp of the region, the name of the gene(s) will
#' be displayed instead of the region
#' 
#' @param scExp A SingleCellExperiment containing "PCA" in reducedDims and gene
#' annotation in rowRanges
#' @param component The name of the component of interest 
#' @param n_top_bot An integer number of top and bot regions to plot 
#'
#' @return A barplot of top and bottom features with the largest absolute
#' value in the component of interest
#' 
#' @importFrom SingleCellExperiment reducedDim normcounts
#' @importFrom SummarizedExperiment rowRanges
#'  
#' @export
#'
#' @examples
#' data(scExp)
#' plot_most_contributing_features(scExp, component = "Component_1")
plot_most_contributing_features <- function(scExp, component = "Component_1",
                                            n_top_bot = 10){
    
    top_bot  = retrieve_top_bot_features_pca(
        SingleCellExperiment::reducedDim(scExp,"PCA"),
        SingleCellExperiment::normcounts(scExp), component, n_top_bot)
    
    gene_info = SummarizedExperiment::rowRanges(scExp)
    gene_info = gene_info[match(rownames(top_bot), gene_info$ID)]
    Gene = substr(gene_info$Gene,start = 1, stop = 13)
    Gene = ifelse(nchar(Gene)>=13, paste0(Gene,"..."), Gene)
    top_bot$genes = ifelse(gene_info$distanceToTSS < 10000,
                           Gene,  gene_info$ID)
   
    top_bot$genes = factor(top_bot$genes, levels = unique(top_bot$genes))
    
    ggplot(top_bot) + geom_bar(aes(x = genes, y = .data[[component]]),
                               fill = c(rep("#52C461DF",n_top_bot),
                                        rep("red",n_top_bot)),
                               alpha = 0.7, stat="identity") + theme_classic() +
        theme(axis.text.x = element_text(angle=75, hjust = 1)) +
        xlab("") + ylab(paste0("Contribution of features to ", component))
}

#' Pie chart of top contribution of chromosomes in the 100 most contributing 
#' features to PCA
#'#' 
#' @param scExp A SingleCellExperiment containing "PCA" in reducedDims and gene
#' annotation in rowRanges
#' @param component The name of the component of interest 
#' @param n_top_bot An integer number of top and bot regions to plot (100)
#'
#' @return A pie chart showing the distribution of chromosomes in the top 
#' features with the largest absolute value in the component of interest
#' 
#' @importFrom SingleCellExperiment reducedDim normcounts
#' @importFrom SummarizedExperiment rowRanges
#' @importFrom graphics pie
#'  
#' @export
#'
#' @examples
#' data(scExp)
#'  plot_pie_most_contributing_chr(scExp, component = "Component_1")
plot_pie_most_contributing_chr <- function(scExp, component = "Component_1",
                                           n_top_bot = 100){
    
    top_bot  = retrieve_top_bot_features_pca(
        SingleCellExperiment::reducedDim(scExp,"PCA"),
        SingleCellExperiment::normcounts(scExp), component, n_top_bot,
        absolute = TRUE)
    
    chr_info = as.data.frame(SummarizedExperiment::rowRanges(scExp))
    chr_info = chr_info[match(rownames(top_bot), chr_info$ID),]
    chr_info$absolute_value = abs(top_bot[,component])
    distrib = chr_info %>% dplyr::group_by(seqnames) %>% 
        summarise(contribution = sum(absolute_value))
    distrib$contribution = 100 * distrib$contribution/sum(distrib$contribution)
    distrib = setNames(distrib$contribution,distrib$seqnames)
    distrib = sort(distrib, decreasing = TRUE)
    distrib[6] = sum(distrib[6:length(distrib)])
    names(distrib)[6] = "Others"
    distrib = distrib[1:6]
    distrib_pc = round(distrib, 1)
    names(distrib) = paste0(names(distrib), " - ",distrib_pc, " %")
    graphics::pie(distrib,clockwise = TRUE, radius = 1,
        col = c("#5DA5DA","#FAA43A","#60BD68","#F17CB0","#DECF3F","#4D4D4D"))
}

#' Retrieve Top and Bot most contributing features of PCA
#'
#' @param pca A matrix/data.frame of rotated data
#' @param counts the normalized counts used for PCA
#' @param component the componenent of interest
#' @param n_top_bot the number of top & bot features to take
#' @param absolute If TRUE, return the top features in absolute values instead.
#'
#' @return a data.frame of top bot contributing features in PCA 
#'
retrieve_top_bot_features_pca <- function(pca, counts, component, n_top_bot,
                                          absolute = FALSE){
    rotations = counts %*% as.matrix(pca)

    rotations = rotations[,component,drop=FALSE]
    top = as.data.frame(as.matrix(
        utils::head(rotations[order(rotations, decreasing = TRUE),, drop=FALSE],
             n_top_bot)))
    bot = as.data.frame(as.matrix(
        utils::tail(rotations[order(rotations, decreasing = TRUE),, drop=FALSE],
             n_top_bot)))
    top_bot = as.data.frame(rbind(top, bot))
    if(absolute) {
        rotations = abs(rotations)
        top_bot = as.data.frame(as.matrix(
            utils::head(rotations[order(rotations, decreasing = TRUE),, drop=FALSE],
             n_top_bot)))
    }
    return(top_bot)
}

#' Plot cell correlation heatmap with annotations
#'
#' @param scExp A SingleCellExperiment Object
#' @param name_hc Name of the hclust contained in the SingleCellExperiment
#'   object
#' @param corColors A palette of colors for the heatmap
#' @param color_by Which features to add as additional bands on top of plot
#' @param downsample Number of cells to downsample
#' @param hc_linkage A linkage method for hierarchical clustering. See
#'   \link[stats]{cor}. ('ward.D')
#'
#' @return A heatmap of cell to cell correlation, grouping cells by hierarchical
#'   clustering.
#' @export
#'
#' @importFrom SingleCellExperiment reducedDim reducedDimNames colData
#' @importFrom grDevices colorRampPalette
#'
#' @examples
#' data("scExp")
#' plot_heatmap_scExp(scExp)
#' 
plot_heatmap_scExp <- function(scExp, name_hc = "hc_cor", corColors = (
    grDevices::colorRampPalette(c("royalblue", "white", "indianred1")))(256),
    color_by = NULL, downsample = 1000, hc_linkage = "ward.D")
{
    fullCor = TRUE
    stopifnot(is(scExp, "SingleCellExperiment"))
    if (!"Cor" %in% SingleCellExperiment::reducedDimNames(scExp)){
      fullCor = FALSE
      if(!"cormat" %in% names(scExp@metadata)){
        stop(paste0("ChromSCape::plot_heatmap_scExp - No correlation, run ",
                    "correlation_and_hierarchical_clust_scExp before filtering."))
      }
    } 
        
    
    if (!name_hc %in% names(scExp@metadata)) 
        stop(paste0("ChromSCape::plot_heatmap_scExp - No dendrogram, run ",
                    "correlation_and_hierarchical_clust_scExp before filtering."))
    
    if (fullCor & length(scExp@metadata[[name_hc]]$order) != ncol(scExp)) 
        stop(paste0("ChromSCape::plot_heatmap_scExp - Dendrogram has different",
                    " number of cells than dataset."))
    if(fullCor & ncol(scExp) > downsample) {
        samp = sample(ncol(scExp),downsample, replace = FALSE)
        scExp = scExp[,samp]
        SingleCellExperiment::reducedDim(scExp, "Cor") = 
            SingleCellExperiment::reducedDim(scExp, "Cor")[,samp]
        cor_mat = as.matrix(SingleCellExperiment::reducedDim(scExp, "Cor"))
        hc_cor = stats::hclust(as_dist(1 - cor_mat), method = hc_linkage)
        hc_cor$labels = rep("",ncol(scExp))
        scExp@metadata[[name_hc]] = hc_cor
    }
    
    anocol = as.matrix(
        SingleCellExperiment::colData(
            scExp)[, grep("_color",
                          colnames(SingleCellExperiment::colData(scExp))), 
                   drop = FALSE])
    colnames(anocol) = gsub("_color","",colnames(anocol))
    
    if(!is.null(color_by)) {
        if(length(intersect(color_by,colnames(anocol)))>0) 
            anocol = anocol[,color_by,drop=FALSE]
    }
    
    if(fullCor){
      return(
        hclustAnnotHeatmapPlot(
          x = as.matrix(SingleCellExperiment::reducedDim(scExp, "Cor"))[
            scExp@metadata[[name_hc]]$order, 
            scExp@metadata[[name_hc]]$order], hc = scExp@metadata[[name_hc]],
          hmColors = corColors,
          anocol = anocol[scExp@metadata[[name_hc]]$order, ,drop=FALSE],
          xpos = c(0.15, 0.9, 0.164, 0.885),
          ypos = c(0.1, 0.5, 0.5, 0.6, 0.62, 0.95), dendro.cex = 0.04, 
          xlab.cex = 0.8, hmRowNames = FALSE)
      )
    } else{
      anocol = anocol[match(rownames(scExp@metadata$cormat), 
                            rownames(anocol)),]
      return(
        hclustAnnotHeatmapPlot(
          x = as.matrix(scExp@metadata$cormat)[
            scExp@metadata[[name_hc]]$order, 
            scExp@metadata[[name_hc]]$order],
          hc = scExp@metadata[[name_hc]],
          hmColors = corColors,
          anocol = anocol[scExp@metadata[[name_hc]]$order, ,drop=FALSE],
          xpos = c(0.15, 0.9, 0.164, 0.885),
          ypos = c(0.1, 0.5, 0.5, 0.6, 0.62, 0.95), dendro.cex = 0.04, 
          xlab.cex = 0.8, hmRowNames = FALSE)
      )
    }
      
    
}


#' Violin plot of intra-correlation distribution
#'
#' @param scExp_cf A SingleCellExperiment
#' @param by Color by sample_id or cell_cluster
#' @param jitter_by Add jitter points of another layer
#'  (cell_cluster or sample_id)
#' @param downsample Downsample for plotting
#'
#' @return A violin plot of intra-correlation
#' @export
#' @importFrom forcats fct_inorder
#' @examples 
#' data(scExp)
#' plot_intra_correlation_scExp(scExp)
plot_intra_correlation_scExp <- function(
    scExp_cf, by = c("sample_id", "cell_cluster")[1], jitter_by = NULL,
    downsample = 5000){
    
  fullCor = TRUE
  if (!"Cor" %in% SingleCellExperiment::reducedDimNames(scExp_cf)){
    fullCor = FALSE
    if(!"cormat" %in% names(scExp_cf@metadata)){
      stop(paste0("ChromSCape::plot_intra_correlation_scExp - No correlation, run ",
                  "correlation_and_hierarchical_clust_scExp before filtering."))
    }
  } 
  if(ncol(scExp_cf) > downsample) scExp_cf = scExp_cf[,sample(ncol(scExp_cf),
                                                              downsample,
                                                              replace = FALSE)]

  samp = intra_correlation_scExp(scExp_cf, by, fullCor)
  annot = SingleCellExperiment::colData(scExp_cf)
  if(!is.null(jitter_by)){
    if(jitter_by != by) samp[,jitter_by] = 
        annot[,jitter_by][match(rownames(samp),annot$cell_id)]
  }
  p <- ggplot(samp) + geom_violin(aes(x=.data[[by]], y=.data$intra_corr,
                                      fill=.data[[by]]), alpha=0.8) +
    theme_classic() +
    theme(axis.text.x = element_text(angle=90)) + 
    ylab(paste0("Intra-",by," correlation")) + xlab("")
  
  cols = unique(as.character(annot[,paste0(by,"_color")][
    match(rownames(samp),annot$cell_id)]))
  names(cols) = unique(as.character(annot[,by][match(
    rownames(samp),annot$cell_id)]))
  p <- p + scale_fill_manual(values = cols)
  
  if(!is.null(jitter_by)){
    if(grepl("counts", jitter_by)){
      p <- p + geom_jitter(
        aes(x=.data[[by]], y=.data$intra_corr,
            color = .data[[jitter_by]]),
        alpha = 0.45) +
        scale_color_gradientn(colours = matlab.like(100))
      
    } else{
      p <- p + geom_jitter(
        aes(x=.data[[by]], y=.data$intra_corr,
            color = forcats::fct_inorder(.data[[jitter_by]])),
        alpha = 0.45) +
        scale_color_manual(
          values = unique(annot[,paste0(jitter_by,"_color")][
            match(rownames(samp),annot$cell_id)]))
    }
  }
  return(p + theme(legend.title = element_text("")))
}

#' Violin plot of inter-correlation distribution between one or multiple groups
#' and one reference group
#'
#' @param scExp_cf A SingleCellExperiment
#' @param by Color by sample_id or cell_cluster
#' @param jitter_by Add jitter points of another layer
#'  (cell_cluster or sample_id)
#' @param reference_group Character containing the reference group name to 
#' calculate correlation from.
#' @param other_groups Character vector of the other groups for which to 
#' calculate correlation with the reference group.
#' @param downsample Downsample for plotting
#'
#' @return A violin plot of inter-correlation
#' @export
#'
#' @importFrom forcats fct_inorder
#' 
#' @examples
#' data(scExp)
#' plot_intra_correlation_scExp(scExp)
plot_inter_correlation_scExp <- function(
    scExp_cf, by = c("sample_id", "cell_cluster")[1], jitter_by = NULL,
    reference_group = unique(scExp_cf[[by]])[1],
    other_groups = unique(scExp_cf[[by]]),
    downsample = 5000){
  fullCor = TRUE
  if (!"Cor" %in% SingleCellExperiment::reducedDimNames(scExp_cf)){
    fullCor = FALSE
    if(!"cormat" %in% names(scExp_cf@metadata)){
      stop(paste0("ChromSCape::plot_intra_correlation_scExp - No correlation, run ",
                  "correlation_and_hierarchical_clust_scExp before filtering."))
    }
  } 
  if(ncol(scExp_cf) > downsample) scExp_cf =
      scExp_cf[,sample(ncol(scExp_cf),downsample,replace = FALSE)]
  
  samp = inter_correlation_scExp(scExp_cf, by, reference_group, other_groups, fullCor)
  annot = SingleCellExperiment::colData(scExp_cf)
  if(!is.null(jitter_by)){
    samp[,jitter_by] = 
      annot[,jitter_by][match(rownames(samp),annot$cell_id)]
  }
  p <- ggplot(samp) + geom_violin(aes(
    x=.data[[paste0(by,"_i")]], y=.data[["inter_corr"]],
    fill=.data[[paste0(by,"_i")]]), alpha=0.8) +
    theme_classic() +
    theme(axis.text.x = element_text(angle=90)) + 
    ylab(paste0("Inter-",by," correlation")) + xlab("")
  
  cols = unique(as.character(annot[,paste0(by,"_color")][
    match(rownames(samp),annot$cell_id)]))
  names(cols) = unique(as.character(annot[,by][match(
    rownames(samp),annot$cell_id)]))
  
  p <- p + scale_fill_manual(values = cols)
  
  if(!is.null(jitter_by)){
    if(grepl("counts_", jitter_by)){
      p <- p + geom_jitter(
        aes(x=.data[[paste0(by,"_i")]], y=.data$inter_corr,
            color = .data[[jitter_by]]),
        alpha = 0.45) +
        scale_color_gradientn(colours = matlab.like(100))
    } else{
      p <- p + geom_jitter(
        aes(x=.data[[paste0(by,"_i")]], y=.data$inter_corr,
            color = forcats::fct_inorder(
              .data[[jitter_by]])),
        alpha = 0.45) +
        scale_color_manual(
          values = unique(annot[,paste0(jitter_by,"_color")][
            match(rownames(samp),annot$cell_id)]))
    }
  }
  return(p + theme(legend.title = element_text("")))
}

#' Coverage plot 
#'
#' @param coverages A list containing sample coverage as GenomicRanges
#' @param label_color_list List of colors, list names are labels
#' @param peaks A GRanges object containing peaks location to plot (optional)
#' @param chrom Chromosome
#' @param start Start
#' @param end End
#' @param ref Genomic Reference
#'
#' @return A coverage plot annotated with genes
#' 
#' @importFrom ggrepel geom_text_repel
#' @importFrom gggenes geom_gene_arrow
#' @importFrom dplyr mutate filter
#' @importFrom gridExtra grid.arrange
#' @importFrom S4Vectors subjectHits 
#' @importFrom GenomicRanges GRanges findOverlaps score
#' @export
#'
#' @examples
#' data(scExp)
#' 
plot_coverage_BigWig <- function(
    coverages, label_color_list, peaks = NULL,
    chrom, start, end, ref = "hg38"){
    
    eval(parse(text = paste0("data(", ref, ".GeneTSS)")))
    genebed = eval(parse(text = paste0("", ref, ".GeneTSS")))
    genebed$strand2 = genebed$strand
    genebed$strand = "."
    colnames(genebed)[5:6] = c("score", "strand")
    
    bin_width = floor(mean(width(coverages[[1]])))
    
    #Select region of interest
    roi = GenomicRanges::GRanges(
        chrom, ranges = IRanges::IRanges(start, end))

    coverage_list = list()
    if(!is(coverages, "list")) {
        coverage_list[[1]] = coverages[S4Vectors::subjectHits(
            GenomicRanges::findOverlaps(roi, coverages)),]
    } else{
        for(i in seq_along(coverages)){
            coverage_list[[i]] = coverages[[i]][S4Vectors::subjectHits(
                GenomicRanges::findOverlaps(roi,coverages[[i]])),]
        }
    }
    
    if(sum(sapply(coverage_list, length)) >0){
        
        # Tile on region
        bins <- unlist(GenomicRanges::tile(roi, width = bin_width))
        
        for(i in seq_along(coverage_list)){
            bins. = bins
            bins.$score = 0
            matches = GenomicRanges::findOverlaps(coverage_list[[i]], bins., select = "first")
            bins.$score[matches] = coverage_list[[i]]$score
            coverage_list[[i]] = bins.
        }
        min_x = min(start(bins)) - 100 
        max_x = max(end(bins)) + 100
        max_y = round(max(sapply(coverage_list, function(tab) max(tab$score))),3)
        
        n = length(coverage_list)
        layout.matrix <- matrix(
            c(sort(rep(seq_len(n),4)), rep(n+1,8)), ncol = 1)
        peaks_roi=data.frame()
        if(!is.null(peaks)){
            # peaks_roi =   peaks[S4Vectors::subjectHits(
            #     GenomicRanges::findOverlaps(roi, peaks)),] 
            peaks_roi = S4Vectors::intersect(peaks, roi, ignore.strand = TRUE)
            peaks_roi = as.data.frame(peaks_roi)
            if(nrow(peaks_roi)>0) layout.matrix <- matrix(
                c(sort(rep(seq_len(n),3)), n+1,n+1,n+1,n+1,n+2,n+2,n+2,n+2), ncol = 1)
        }
        
        list_plot = list()    
        for(i in seq_along(coverage_list)){
            coverages_df_tmp = as.data.frame(coverage_list[[i]])
            list_plot[[i]] = coverages_df_tmp %>%
                ggplot(mapping = aes(x = start, y = score)) +
                geom_area(stat = "identity", fill = label_color_list[i]) + 
                geom_hline(yintercept = 0, size = 0.1) +
                ylab(label = names(label_color_list)[i]) +  ylim(c(0, max_y)) + 
                theme_classic() + 
                theme(panel.spacing.y = unit(x = 0.5, units = "line"),
                      axis.title.x = element_blank(),
                      axis.text.x = element_blank(),
                      axis.ticks.x = element_blank(),
                      axis.line.x = element_blank(),
                      axis.line.y = element_blank()) 
            
        }
        
        if(nrow(peaks_roi)>0) {
            
            peaks_roi$molecule = 
                rep(c(1.2,1.8),
                    length(peaks_roi$start))[1:length(peaks_roi$start)]
            
            list_plot[[length(list_plot) + 1 ]] = peaks_roi %>% 
                ggplot(aes(xmin = start, xmax = end, y = molecule)) +
                gggenes::geom_gene_arrow(colour ="#B82144" , fill = "#B82144",
                                         arrowhead_width = grid::unit(0, "mm"),
                                         arrowhead_height = grid::unit(0, "mm"),
                                         arrow_body_height = grid::unit(2, "mm"),
                                         show.legend = FALSE) +
                theme_classic() + ylim(c(1,2)) + 
                theme(panel.spacing.y = unit(x = 0.5, units = "line"),
                      axis.text.x = element_blank(),
                      axis.title.x = element_blank(),
                      axis.text.y = element_blank(),
                      axis.ticks.y = element_blank(),
                      axis.ticks.x = element_blank(),
                      axis.line.x = element_blank(),
                      axis.line.y = element_blank()) +
                ylab("Peaks")  + xlim(c(min_x, max_x))
        }
        
        genebed_tmp = genebed[which(genebed$chr==chrom &
                                        genebed$start > start  & 
                                        genebed$end < end),]
        
        if(nrow(genebed_tmp) > 0) { 
            genebed_tmp$orientation = ifelse(genebed_tmp$strand=="+", 1, -1)
            genebed_tmp$molecule = 
                rep(c(1,2.35),length(genebed_tmp$start))[1:length(genebed_tmp$start)]
        list_plot[[length(list_plot) + 1 ]] = genebed_tmp %>% 
            ggplot(aes(xmin = start, xmax = end, y = molecule, forward = orientation)) +
            gggenes::geom_gene_arrow(fill = "black", 
                                     arrowhead_width = grid::unit(2, "mm"),
                                     arrowhead_height = grid::unit(4, "mm"),
                                     arrow_body_height = grid::unit(1, "mm"),
                                     show.legend = FALSE) +
            ggrepel::geom_text_repel(data = genebed_tmp %>%
                                         dplyr::mutate(start = (.data[["start"]] + .data[["end"]])/2),
                                     aes(x = start, y = molecule, label = Gene),
                                     size = 4,   inherit.aes = FALSE,
                                     nudge_y = -0.1) + 
            theme_classic() + ylim(c(0, 2.5)) + 
            theme(panel.spacing.y = unit(x = 0.5, units = "line"),
                  axis.title.x = element_text(), axis.title = element_blank(),
                  axis.text.y = element_blank(), axis.ticks.y = element_blank(),
                  axis.line.y = element_blank()) + xlab(chrom) + xlim(c(min_x, max_x))
        } else{
            list_plot[[length(list_plot) + 1 ]] = NULL
        }
        print(gridExtra::grid.arrange(grobs = list_plot, layout_matrix = layout.matrix))
        
    } else{
        message("ChromSCape::plot_coverage_BigWig - No reads found within the required ranges.")
    }
}


#' Differential summary barplot
#'
#' @param scExp_cf A SingleCellExperiment object
#' @param qval.th Adjusted p-value threshold. (0.01)
#' @param logFC.th Fold change threshold. (1)
#' @param min.percent Minimum fraction of cells having the feature active to
#' consider it as significantly differential. (0.01)
#' 
#' @return A barplot summary of differential analysis
#' @export
#' @importFrom graphics barplot
#' @examples
#' data("scExp")
#' plot_differential_summary_scExp(scExp)
plot_differential_summary_scExp <- function(scExp_cf, qval.th = 0.01, 
                                            logFC.th = 1, min.percent = 0.01)
{
    
    stopifnot(is(scExp_cf, "SingleCellExperiment"))
    if(!any(grepl("logFC", colnames(SingleCellExperiment::rowData(scExp_cf)))) ) 
        stop(paste0("ChromSCape::differential_barplot_scExp - ",
                    "No DA, please run differential_analysis_scExp first."))
    
    summary = summary_DA(scExp_cf, qval.th = qval.th, 
                         logFC.th = logFC.th, min.percent = min.percent)
    myylim <- range(c(summary["over", ], -summary["under", ]))
    barplot(summary["over", ], col = "red", las = 1, ylim = myylim,
            main = "Differentially enriched regions", 
            ylab = "Number of regions", axes = FALSE)
    barplot(-summary["under", ], col = "forestgreen", ylim = myylim,
            add = TRUE, axes = FALSE, names.arg = "")
    z <- axis(2, pos = -10)
    axis(2, at = z, labels = abs(z), las = 1)
}

#' Volcano plot of differential features
#'
#' @param scExp_cf A SingleCellExperiment object
#' @param group A character indicating the group for which to plot the 
#' differential volcano plot. ("C1")
#' @param qval.th Adjusted p-value threshold. (0.01)
#' @param logFC.th Fold change threshold. (1)
#' @param min.percent Minimum fraction of cells having the feature active to
#' consider it as significantly differential. (0.01)
#' 
#' @return A volcano plot of differential analysis of a specific cluster
#' @export
#'
#' @examples
#' data("scExp")
#' plot_differential_volcano_scExp(scExp,"C1")
plot_differential_volcano_scExp <- function(
    scExp_cf, group = "C1", logFC.th = 1, qval.th = 0.01, 
    min.percent = 0.01)
{
    
    stopifnot(is(scExp_cf, "SingleCellExperiment"), is.character(group), 
              is.numeric(qval.th), is.numeric(logFC.th), is.numeric(min.percent))
    
  if(!any(grepl("logFC", colnames(SingleCellExperiment::rowData(scExp_cf)))) ) 
    stop(paste0("ChromSCape::plot_differential_volcano_scExp - ",
                "No DA, please run differential_analysis_scExp first."))
    
    if (!any(grepl(group, colnames(SingleCellExperiment::rowData(scExp_cf))))) 
        stop(paste0("ChromSCape::differential_volcano_plot_scExp - Chromatin ",
                    "group specified doesn't correspond to differential analysis, please ",
                    "rerun differential_analysis_scExp first with correct parameters."))
    
    res = SingleCellExperiment::rowData(scExp_cf)
    summary = summary_DA(scExp = scExp_cf, qval.th = qval.th,
                         logFC.th = logFC.th, min.percent = min.percent)
    
    mycol <- rep("black", nrow(res))
    mycol[which(res[, paste("qval", group, sep = ".")]
                <= qval.th & res[, 
                                 paste("logFC", group, sep = ".")] > logFC.th)] <- "red"
    mycol[which(res[, paste("qval", group, sep = ".")]
                <= qval.th & res[, 
                                 paste("logFC", group, sep = ".")] < -logFC.th)] <- "forestgreen"

    pch = rep(1, nrow(res))
    pch[which(res[, paste("logFC", group, sep = ".")] > 0 &  
                res[, paste("group_activation", group, sep = ".")] > 
                  min.percent)]  = 16
    pch[which(res[, paste("logFC", group, sep = ".")] < 0 &  
                res[, paste("reference_activation", group, sep = ".")] > 
                  min.percent)]  = 16
    
    opar <- par(no.readonly = TRUE)
    par(mar = c(12, 5, 5, 5))
    
    plot(res[, paste("logFC", group, sep = ".")],
         -log10(res[, paste("qval", group, sep = ".")]), col = mycol,
         cex = 1.3 , xlab = "log2-FC", 
         pch = pch,
         ylab = "-log10(adjusted p-value)", las = 1,
         main = paste(group, "\n",
                      summary["over", group],
                      "enriched,", summary["under", group], "depleted"))
    abline(v = logFC.th, lty = 2)
    abline(h = -log10(qval.th), lty = 2)
    abline(v = -logFC.th, lty = 2)
    
    legend(x = "bottom",
           inset = c(-1, -1.15),
           legend = c(paste0("Region activated in > ",
                             100 * min.percent, "% cells of ", group),
                      paste0("Region activated in < ",
                             100 * min.percent, "% cells of ", group),
                      paste0("Region activated in > ",
                             100 * min.percent, "% cells of ref."),
                      paste0("Region activated in < ",
                             100 * min.percent, "% cells of ref.")), 
           pch = c(16, 1), col = c("red", "red",
                                   "forestgreen", "forestgreen"), xpd = TRUE
    )
    par(opar)
}

#' gg_fill_hue
#'
#' @param n num hues
#'
#' @importFrom grDevices hcl
#' @return A color in HEX format 
gg_fill_hue <- function(n)
{
    hues = seq(15, 375, length = n + 1)
    grDevices::hcl(h = hues, l = 65, c = 100)[seq_len(n)]
}


#' Col2Hex
#'
#' Transform character color to hexadecimal color code.
#'
#' @param cname Color name
#'
#' @return The HEX color code of a particular color
#' @importFrom grDevices col2rgb rgb
#'
col2hex <- function(cname)
{
    colMat <- grDevices::col2rgb(cname)
    grDevices::rgb(
        red = colMat[1, ]/255, green = colMat[2, ]/255, blue = colMat[3,]/255)
}

#' Plot cluster consensus
#'
#' Plot cluster consensus score for each k as a bargraph.
#'
#' @param scExp A SingleCellExperiment
#'
#' @return The consensus score for each cluster for each k as a barplot
#' @importFrom dplyr as_tibble
#' @importFrom ggplot2 ggplot geom_bar facet_grid scale_fill_manual 
#' element_blank theme_minimal theme position_dodge ylab
#' 
#' @export
#'
#' @examples
#' data("scExp")
#' 
#' plot_cluster_consensus_scExp(scExp)
#' 
plot_cluster_consensus_scExp <- function(scExp)
{
    stopifnot(is(scExp,"SingleCellExperiment"))
    if(!"icl" %in% names(scExp@metadata))
        stop(paste0("plot_cluster_consensus_scExp - please run ",
                    "consensus_clustering_scExp first."))
    
    if(!"consclust" %in% names(scExp@metadata))
        stop(paste0("plot_cluster_consensus_scExp - please run ",
                    "consensus_clustering_scExp first."))
    cc = dplyr::as_tibble(scExp@metadata$icl$clusterConsensus)
    colors = unique(as.character(scExp@metadata$consclust[[1]]))
    colors = c(colors,"#B55274")
    cc$k = factor(paste0("k=",cc$k), levels=unique(paste0("k=",cc$k)))
    cc$cluster =  factor(
        paste0("C",cc$cluster), levels = unique(paste0("C",cc$cluster)))
    p = cc %>% ggplot(aes(x=.data$cluster, y=.data$clusterConsensus,
                          fill=.data$cluster)) +
        geom_bar(stat = "identity", position=position_dodge(width=0.9)) +
        facet_grid(.~as.factor(k), scales="free_x", space="free") +
        theme_minimal() + theme(panel.grid = element_blank(),
                                axis.text.x = element_blank()) +
        scale_fill_manual(values=unique(colors)) + ylab("Consensus Score")
    return(p)
}


#' Barplot of top TFs from ChEA3 TF enrichment analysis
#'
#' @param scExp A SingleCellExperiment
#' @param group A character string specifying the differential group to display
#' the top TFs
#' @param set A character string specifying the set of genes in which the TF 
#' were enriched, either 'Differential', 'Enriched' or 'Depleted'. 
#' @param type A character string specifying the Y axis of the plot, either the
#' number of differential targets or the ChEA3 integrated mean score. E.g.
#' either "Score", "nTargets", "nTargets_over_TF" for the number of target genes
#' over the total number of genes targeted by the TF or "nTargets_over_genes" for
#' the  number of target genes over the number of genes in the gene set.
#' @param n_top An integer specifying the number of top TF to display
#'
#' @return A bar plot of top TFs from ChEA3 TF enrichment analysis
#' 
#' @importFrom forcats fct_reorder
#' 
#' @export
#' @examples 
#' data("scExp")
#' 
#' plot_top_TF_scExp(
#'  scExp,
#'  group = "C1",
#'   set = "Differential",
#'    type = "Score",
#'     n_top = 10)
#'     
#' plot_top_TF_scExp(
#'  scExp,
#'  group = "C1",
#'   set = "Enriched",
#'    type = "nTargets_over_genes",
#'     n_top = 20)
#'     
plot_top_TF_scExp <- function(
    scExp, group = unique(scExp$cell_cluster)[1], 
    set = c("Differential",'Enriched','Depleted')[1],
    type = c("Score", "nTargets", "nTargets_over_TF", "nTargets_over_genes")[1], n_top = 25){
  stopifnot(!is.null(scExp), type %in% c("nTargets", "Score", "nTargets_over_TF",
                                         "nTargets_over_genes"),
            set %in% c("Differential","Enriched","Depleted"))
  if (!"TF_enrichment" %in% names(scExp@metadata)){
      stop(paste0("ChromSCape::plot_top_TF - No TF enrichment, run ",
                  "enrich_TF_ChEA3_scExp before filtering."))
    }

  TF_enrichment = scExp@metadata$TF_enrichment[[group]][[set]]
  if(type == "nTargets"){
    p =  utils::head(TF_enrichment %>% dplyr::arrange(.data[["Score"]]), n_top) %>%
      dplyr::mutate(TF = forcats::fct_reorder(TF, .data[["Score"]])) %>%
      ggplot(aes(x=TF, y= .data[["nTargets"]])) + geom_bar(fill = "#009688", stat="identity") +
      theme_classic() + theme(axis.text.x = element_text(size = 15, angle=90),
                              axis.text.y = element_text(size = 14)) + xlab("") +
      ylab("Number of genes targeted by TF")
  } else if(type == "Score"){
    p = utils::head(TF_enrichment %>% dplyr::arrange(.data[["Score"]]), n_top) %>%
      dplyr::mutate(TF = forcats::fct_reorder(TF, .data[["Score"]])) %>%
      ggplot(aes(x=TF, y= .data[["Score"]])) + geom_bar(fill = "#009688", stat="identity") +
      theme_classic() + theme(axis.text.x = element_text(size = 15, angle=90),
                              axis.text.y = element_text(size = 14)) + xlab("") +
      ylab("ChEA3 Mean-Rank Score")
  } else if(type == "nTargets_over_TF"){
      df =  utils::head(TF_enrichment %>% dplyr::arrange(.data[["Score"]]), n_top) %>%
          dplyr::mutate(TF = forcats::fct_reorder(TF, .data[["Score"]])) %>%
          dplyr::mutate(Percentage_nTargets_totalTargetsTF = 100 *
                            round(.data[["nTargets"]] / .data[["totalTargetsTF"]],3))
      p = df %>%
          ggplot(aes(x=TF, y= .data[["Percentage_nTargets_totalTargetsTF"]])) + 
          geom_bar( fill = "#009688", stat="identity") +
          theme_classic() + theme(axis.text.x = element_text(size = 15, angle=90),
                                  axis.text.y = element_text(size = 14)) + xlab("") +
          ylab("nTargets over all TF targets (%)") + 
          ylim(c(0,max(10, max(df$Percentage_nTargets_totalTargetsTF))))
          
  } else if(type == "nTargets_over_genes"){
      df =  utils::head(TF_enrichment %>% dplyr::arrange(.data[["Score"]]), n_top) %>%
          dplyr::mutate(TF = forcats::fct_reorder(TF, .data[["Score"]])) %>%
          dplyr::mutate(Percentage_nTargets_totalGenes = 100 *
                            round(.data[["nTargets"]] / .data[["totalGenesInSet"]],3))
      p = df %>%
          ggplot(aes(x=TF, y= .data[["Percentage_nTargets_totalGenes"]])) + 
          geom_bar(fill = "#009688", stat="identity") +
          theme_classic() + theme(axis.text.x = element_text(size = 15, angle=90),
                                  axis.text.y = element_text(size = 14)) + xlab("") +
          ylab("nTargets over all genes in list (%)") + 
          ylim(c(0,max(30, max(df$Percentage_nTargets_totalGenes)))) 
  }
 
  return(p)
}


#' Violin plot of features 
#'
#' @param scExp A SingleCellExperiment
#' @param gene A character specifying the gene to plot 
#' @param by Color violin by cell_cluster or sample_id  ("cell_cluster")
#' @param max_distanceToTSS Numeric. Maximum distance to a gene's TSS to consider
#' a region linked to a gene. (1000)
#' @param downsample Downsample for plotting (5000)
#' 
#' @return A violin plot of intra-correlation
#' @export
#' @importFrom forcats fct_inorder
#' @examples 
#' data(scExp)
#' plot_violin_feature_scExp(scExp, "UBXN10")
#' 
plot_violin_feature_scExp <- function(
    scExp, gene, by = c( "cell_cluster", "sample_id")[1], downsample = 5000,
    max_distanceToTSS = 1000){
  
  if(ncol(scExp) > downsample) scExp = scExp[,sample(ncol(scExp),
                                                     downsample,
                                                     replace = FALSE)]
  
  annot = as.data.frame(SingleCellExperiment::colData(scExp))
  annot_feature = as.data.frame(SummarizedExperiment::rowRanges(scExp))
  if(nrow(annot_feature) == 0)
    stop("ChromSCape::plot_reduced_dim_scExp - The gene chosen, ", 
         gene, ", is not closer than ", max_distanceToTSS, 
         " to any", "loci. Consider increasing max_distanceToTSS to take in account ", 
         "this gene.")
  annot_feature =  annot_feature[grep(gene, annot_feature$Gene),]
  annot_feature = annot_feature %>% 
    tidyr::separate_rows(.data[["Gene"]], sep = ", ") %>% 
    dplyr::group_by(.data[["Gene"]]) %>%
    dplyr::slice_min(.data[["distanceToTSS"]])
  annot_feature = annot_feature %>%
    dplyr::filter( .data[["distanceToTSS"]] < max_distanceToTSS)
  
  if(!gene %in% annot_feature$Gene) stop(
    "ChromSCape::plot_reduced_dim_scExp - The gene chosen, ",
    gene, ", is not closer than ", max_distanceToTSS, " to any", 
    "loci. Consider increasing max_distanceToTSS to take in account ",
    "this gene.")
  
  counts = SingleCellExperiment::counts(scExp[annot_feature$ID[which(
    annot_feature$Gene == gene)],])
  
  if(!is.null(counts) & nrow(counts) > 1 ){
    counts = Matrix::colSums(counts)
  }
  annot[,gene] = as.numeric(counts)

  p <- ggplot(annot) + geom_violin(aes(x=.data[[by]], y= .data[[gene]],
                                      fill=.data[[by]]), alpha=0.8) + 
    geom_jitter(aes(x=.data[[by]], y= .data[[gene]]), height = 0.05, size = 0.3) +
    theme_classic() +
    theme(axis.text.x = element_text(angle=90)) + 
    ylab(paste0(gene)) + xlab("") 
  
  cols = unique(as.character(annot[,paste0(by,"_color")]))
  names(cols) = unique(as.character(annot[,by]))
  p <- p + scale_fill_manual(values = cols)
  
  return(p + theme(legend.title = element_text("")))
}

#' Barplot of the % of active cells for a given features 
#'
#' @param scExp A SingleCellExperiment
#' @param gene A character specifying the gene to plot 
#' @param by Color violin by cell_cluster or sample_id  ("cell_cluster")
#' @param max_distanceToTSS Numeric. Maximum distance to a gene's TSS to consider
#' a region linked to a gene. (1000)
#' @param highlight A specific group to highlight in a one vs all fashion
#' @param downsample Downsample for plotting (5000)
#' 
#' @return A violin plot of intra-correlation
#' @export
#' @importFrom forcats fct_inorder
#' @examples 
#' data(scExp)
#' plot_percent_active_feature_scExp(scExp, "UBXN10")
#' 
plot_percent_active_feature_scExp <- function(
    scExp, gene, by = c( "cell_cluster", "sample_id")[1], highlight = NULL,
    downsample = 5000, max_distanceToTSS = 1000){
    
    if(ncol(scExp) > downsample) scExp = scExp[,sample(ncol(scExp),
                                                       downsample,
                                                       replace = FALSE)]
    
    annot = as.data.frame(SingleCellExperiment::colData(scExp))
    annot_feature = as.data.frame(SummarizedExperiment::rowRanges(scExp))
    annot_feature =  annot_feature[grep(gene, annot_feature$Gene),]
    if(nrow(annot_feature) == 0)
      stop("ChromSCape::plot_reduced_dim_scExp - The gene chosen, ", 
           gene, ", is not closer than ", max_distanceToTSS, 
           " to any", "loci. Consider increasing max_distanceToTSS to take in account ", 
           "this gene.")
    annot_feature = annot_feature %>% 
        tidyr::separate_rows(.data[["Gene"]], sep = ", ") %>% 
        dplyr::group_by(.data[["Gene"]]) %>%
        dplyr::slice_min(.data[["distanceToTSS"]])
    annot_feature = annot_feature %>%
        dplyr::filter( .data[["distanceToTSS"]] < max_distanceToTSS)
    
    if(!gene %in% annot_feature$Gene) stop(
        "ChromSCape::plot_reduced_dim_scExp - The gene chosen, ",
        gene, ", is not closer than ", max_distanceToTSS, " to any", 
        "loci. Consider increasing max_distanceToTSS to take in account ",
        "this gene.")
    
    bin_counts = Matrix::Matrix((SingleCellExperiment::counts(scExp) > 0) + 0, sparse = TRUE)
    bin_counts = bin_counts[annot_feature$ID[which(annot_feature$Gene == gene)],]
    
    if(!is.null(bin_counts) & !is.null(dim(bin_counts))){
      bin_counts = Matrix::colSums(bin_counts)
    }
    annot[,gene] = as.numeric(bin_counts)
    
    cols = unique(as.character(annot[,paste0(by,"_color")]))
    names(cols) = unique(as.character(annot[,by]))
    
    if(!is.null(highlight)){
         df <- annot %>% mutate(group = ifelse(.data[[by]] == highlight, .data[[by]], "Other")) %>%
             group_by(group) %>% 
            summarise(percent_active = 100*sum(.data[[gene]]) / n())
         p <- df %>%
             ggplot() + geom_bar(aes(x=group, y= percent_active,
                                     fill=group),color ="black", stat = "identity", alpha=0.8) + 
             theme_classic() +
             theme(axis.text.x = element_text(angle=90)) + 
             ylab(paste0("% cell active - ",gene)) + xlab("") 
         cols = c(cols[highlight],"grey85")
         names(cols)[2] = "Other"
    } else{
        df <- annot %>% group_by(.data[[by]]) %>% 
            summarise(percent_active = 100*sum(.data[[gene]]) / n())
        p <- df %>%
            ggplot() + geom_bar(aes(x=.data[[by]], y= percent_active,
                                    fill=.data[[by]]),color ="black", stat = "identity", alpha=0.8) + 
            theme_classic() +
            theme(axis.text.x = element_text(angle=90)) + 
            ylab(paste0("% cell active - ",gene)) + xlab("") 
        
    }
    p <- p + scale_fill_manual(values = cols)
    
    
    
    return(p + theme(legend.title = element_text("")))
}
vallotlab/ChromSCape documentation built on Oct. 15, 2023, 1:47 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
vallotlab/ChromSCape
Analysis of single-cell epigenomics datasets with a Shiny App

R/plotting_functions.R
In vallotlab/ChromSCape: Analysis of single-cell epigenomics datasets with a Shiny App

R Package Documentation

Browse R Packages

We want your feedback!

vallotlab/ChromSCape Analysis of single-cell epigenomics datasets with a Shiny App

R/plotting_functions.R In vallotlab/ChromSCape: Analysis of single-cell epigenomics datasets with a Shiny App

R Package Documentation

Browse R Packages

We want your feedback!

vallotlab/ChromSCape
Analysis of single-cell epigenomics datasets with a Shiny App

R/plotting_functions.R
In vallotlab/ChromSCape: Analysis of single-cell epigenomics datasets with a Shiny App