R/fct_multi_cond.R

Defines functions getSignif_table getSignificantFunctions_multiCond subsetAnnot_multiCond getUniqueIntpairs_byCond getPieChart getUniqueDotplot getDistinctCouplets getRadar_df getBack2BackBarplot

Documented in getBack2BackBarplot getDistinctCouplets getPieChart getRadar_df getSignificantFunctions_multiCond getSignif_table getUniqueDotplot getUniqueIntpairs_byCond subsetAnnot_multiCond

#' Get back-to-back barplot for 2 conditions comparison
#'
#' @param tab_c1 barplot dataframe generated by getBarplotDF() for condition 1
#' @param tab_c2 barplot dataframe generated by getBarplotDF() for condition 1
#' @param lab_c1 label for condition 1
#' @param lab_c2 label for condition 2
#'
#' @return ggplot object
#' @importFrom tidyr pivot_longer
#' @importFrom dplyr bind_rows
getBack2BackBarplot <- function(tab_c1, tab_c2, 
                                lab_c1,
                                lab_c2){
    
    df <- merge(tab_c1, tab_c2, by = "cluster_names", all = TRUE)
    colnames(df) <- c("cluster_names", "n_paracrine_c1", "n_autocrine_c1", "n_paracrine_c2", "n_autocrine_c2")
    df[is.na(df)] <- 0
    
    difference_df <- data.frame(cluster_names = df$cluster_names)
    difference_df$tot_c1 <- df$n_paracrine_c1 + df$n_autocrine_c1
    difference_df$tot_c2 <- df$n_paracrine_c2 + df$n_autocrine_c2
    difference_df$diff_c1_c2 <- difference_df$tot_c1 - difference_df$tot_c2
    
    # From wide to long
    tab_c1 <- tidyr::pivot_longer(tab_c1,
                                  cols = c("n_paracrine","n_autocrine"), 
                                  names_to = "type", 
                                  names_prefix = "n_",
                                  values_to = "n_int")
    tab_c2 <- tidyr::pivot_longer(tab_c2, 
                                  cols = c("n_paracrine","n_autocrine"), 
                                  names_to = "type", 
                                  names_prefix = "n_",
                                  values_to = "n_int")
    # Add column for condition
    tab_c1$condition <- lab_c1
    tab_c2$condition <- lab_c2
    
    # Bind dfs
    barplot_df <- dplyr::bind_rows(tab_c1, tab_c2)
    
    cluster.colors <- scales::hue_pal(c = 80, l = 80)(
        length(unique(barplot_df$cluster_names)))
    
    g <- ggplot() +
        geom_bar(data = tab_c1,
                 aes(y = n_int, x = cluster_names,
                     fill = type,
                     color = cluster_names),
                 position="stack", stat="identity") +
        geom_bar(data = tab_c2,
                 aes(y = -n_int, x = cluster_names,
                     fill = type,
                     color = cluster_names),
                 position="stack", stat="identity") +
        geom_point(data = difference_df, aes(x = cluster_names, y = diff_c1_c2)) +
        geom_line(data = difference_df, aes(x = cluster_names, y = diff_c1_c2, group = 1), 
                  stat = "identity") +
        theme_minimal() +
        scale_fill_manual(values = c("#606060", "#C0C0C0")) + 
        scale_color_manual(values = cluster.colors) +
        theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5), 
              text = element_text(size=20),
              strip.text = element_blank()) +
        guides(color = "none") + 
        labs(x = "Clusters", y = "# Interactions", 
             title = "Total number of interactions per cluster",
             subtitle = paste0(lab_c1, " (positive y-axis) vs ", lab_c2,
                               "  (negative y-axis)"))
    
    return(g)
    
}


#' #' Get radar plot of relative numbers of interactions for a certain cell type
#' #'
#' #' @param tab_c1 barplot dataframe from Viewpoint generated by getBarplotDF2() containing data for condition 1
#' #' @param tab_c2 barplot dataframe from Viewpoint generated by getBarplotDF2() containing data for condition 2
#' #' @param tab_c3 barplot dataframe from Viewpoint generated by getBarplotDF2() containing data for condition 3
#' #' @param lab_c1 label for condition 1
#' #' @param lab_c2 label for condition 2
#' #' @param lab_c3 label for condition 3
#' #' @param cell_name label of cell type of interest
#' #'
#' #' @return plot
#' #' @importFrom fmsb radarchart
#' #' @importFrom data.table transpose
#' getRadarPlot <- function(tab_c1, tab_c2, tab_c3,
#'                          lab_c1, lab_c2, lab_c3,
#'                          cell_name){
#'     if(is.null(tab_c3)){
#'         df <- merge(tab_c1, tab_c2, by = "Clusters", all = TRUE)
#'         colnames(df) <- c("Clusters", "nint_c1", "nint_c2")
#'     } else {
#'         df <- merge(tab_c1, tab_c2, by = "Clusters", all = TRUE)
#'         df <- merge(df, tab_c3, by = "Clusters", all = TRUE)
#'         colnames(df) <- c("Clusters", "nint_c1", "nint_c2", "nint_c3")
#'     }
#'     
#'     df[is.na(df)] <- 0
#'     
#'     cluster_names <- df$Clusters
#'     # add max and min
#'     max_nint <- max(df[, -1])
#'     df <- add_column(df, max_nint, .after = "Clusters")
#'     df <- add_column(df, "min_nint" = 0, .after = "max_nint")
#'     
#'     radar_df <- data.table::transpose(df[, -1])
#'     
#'     if(is.null(lab_c3)){
#'         rownames(radar_df) <- c("max", "min", lab_c1, lab_c2)
#'     } else {
#'         rownames(radar_df) <- c("max", "min", lab_c1, lab_c2, lab_c3)
#'     }
#'     
#'     colnames(radar_df) <- cluster_names
#'     
#'     color <- c("#438ECC", "#E97778", "#00BA38")
#'     
#'     fmsb::radarchart(
#'         radar_df, axistype = 1,
#'         # Customize the polygon
#'         pcol = color, 
#'         pfcol = scales::alpha(color, 0.5), plwd = 2, plty = 1,
#'         # Customize the grid
#'         cglcol = "grey", cglty = 1, cglwd = 0.8,
#'         # Customize the axis
#'         axislabcol = "grey30", 
#'         # Variable labels
#'         vlcex = 1.2, vlabels = colnames(radar_df),
#'         caxislabels = round(seq(from = 0, to = radar_df["max",1], length.out = 5)), 
#'         title = cell_name
#'     )
#'     legend(
#'         x = "bottomleft", legend = rownames(radar_df[-c(1,2),]), horiz = FALSE,
#'         bty = "n", pch = 20 , col = color,
#'         text.col = "black", cex = 1, pt.cex = 1.5
#'     )
#'     
#' }

#' Get radar df of relative numbers of interactions for a certain cell type
#'
#' @param tab_c1 barplot dataframe from Viewpoint generated by getBarplotDF2() containing data for condition 1
#' @param tab_c2 barplot dataframe from Viewpoint generated by getBarplotDF2() containing data for condition 2
#' @param tab_c3 barplot dataframe from Viewpoint generated by getBarplotDF2() containing data for condition 3
#' @param lab_c1 label for condition 1
#' @param lab_c2 label for condition 2
#' @param lab_c3 label for condition 3
#'
#' @return df to be then used with fmsb radarchart
#' @importFrom data.table transpose
getRadar_df <- function(tab_c1, tab_c2, tab_c3,
                         lab_c1, lab_c2, lab_c3){
    if(is.null(tab_c3)){
        df <- merge(tab_c1, tab_c2, by = "Clusters", all = TRUE)
        colnames(df) <- c("Clusters", "nint_c1", "nint_c2")
    } else {
        df <- merge(tab_c1, tab_c2, by = "Clusters", all = TRUE)
        df <- merge(df, tab_c3, by = "Clusters", all = TRUE)
        colnames(df) <- c("Clusters", "nint_c1", "nint_c2", "nint_c3")
    }
    
    df[is.na(df)] <- 0
    
    cluster_names <- df$Clusters
    # add max and min
    max_nint <- max(df[, -1])
    df <- add_column(df, max_nint, .after = "Clusters")
    df <- add_column(df, "min_nint" = 0, .after = "max_nint")
    
    radar_df <- data.table::transpose(df[, -1])
    
    if(is.null(lab_c3)){
        rownames(radar_df) <- c("max", "min", lab_c1, lab_c2)
    } else {
        rownames(radar_df) <- c("max", "min", lab_c1, lab_c2, lab_c3)
    }
    
    colnames(radar_df) <- cluster_names
    
    return(radar_df)
    
    
}

#' Get table of unique int-pairs/clust-pairs couplets
#'
#' @param data_cond1 filt.data() corresponding to chosen condition 1
#' @param data_cond2 filt.data() corresponding to chosen condition 2
#' @param data_cond3 filt.data() corresponding to chosen condition 3
#' @param lab_c1 data label for condition 1
#' @param lab_c2 data label for condition 2
#' @param lab_c3 data label for condition 3
#'
#' @return modified filt.data containing only unique couplets

getDistinctCouplets <- function(data_cond1, data_cond2, data_cond3 = NULL,
                                lab_c1, lab_c2, lab_c3 = NULL){
    # Add condition column
    data_cond1 <- data_cond1 %>%
        mutate(condition = lab_c1)
    data_cond2 <- data_cond2 %>%
        mutate(condition = lab_c2)
    if(!is.null(data_cond3)){
        data_cond3 <- data_cond3 %>%
            mutate(condition = lab_c3)
    }
    
    # Merge multiple conditions
    if(!is.null(data_cond3)){
        merged_data <- dplyr::bind_rows(data_cond1, data_cond2, data_cond3)
    } else{
        merged_data <- dplyr::bind_rows(data_cond1, data_cond2)
    }
    
    
    # Create column cluster_pair
    merged_data <- merged_data %>%
        tidyr::unite(col="cluster_pair", clustA:clustB, sep = "::", remove = FALSE)
    
    # Unique int-pairs/cluster-pairs
    distinct_pairs_clust <- merged_data %>%
        group_by(int_pair, cluster_pair) %>%
        mutate(n = n()) %>%
        filter(n == 1) 
    
    distinct_pairs_clust <- distinct_pairs_clust %>%
        arrange(int_pair) %>%
        select(-n)
    
    return(distinct_pairs_clust)
}

#' Plot dotplot containing only unique int-pair/cluster pairs with many conditions
#'
#' @param data_dotplot table with selected int_pairs for multiple conditions
#' @param clust.order how to order clusters
#' 
#' @return ggplot object

getUniqueDotplot <- function(data_dotplot, clust.order){
    
    data_dotplot$groups_x <- factor(data_dotplot$clustA, levels = clust.order)
    
    g <- ggplot(data_dotplot, aes(x = int_pair, y = cluster_pair)) +
        geom_point(aes(color = condition, size=4)) + 
        theme_minimal() +
        scale_color_manual(values = c("#438ECC", "#E97778", "#00BA38")) + 
        facet_grid(groups_x ~ ., scales = "free", space = "free") +
        theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1), 
              text = element_text(size=20),
              strip.text.y = element_blank(),
              strip.text.x = element_text(angle = 0)) +
        guides(size = "none") + 
        labs(x = "Int-pairs", y = "Cluster-pairs")
    return(g)
}


#' Get Pie Chart of unique couplets 
#'
#' @param data_dotplot same data used to generate dotplot
#' @importFrom dplyr desc
#' @return pie chart

getPieChart <- function(data_dotplot) {
    data <- data_dotplot %>%
        group_by(condition) %>%
        summarise(value = n())
    
    # Compute the position of labels
    data <- data %>% 
        arrange(dplyr::desc(condition)) %>%
        mutate(prop = value / sum(data$value) *100) %>%
        mutate(ypos = cumsum(prop)- 0.5*prop ) %>%
        mutate(perc = paste(round(prop, digits = 0), "%", sep = " "))
    
    # Basic pie chart
    g <- ggplot(data, aes(x="", y=prop, fill=condition)) +
        geom_bar(stat="identity", width=1, color="white") +
        coord_polar("y", start=0) +
        theme_void() + 
        geom_text(aes(y = ypos, label = perc), color = "white", size=8) +
        scale_fill_manual(values = c("#438ECC", "#E97778", "#00BA38")) +
        theme(text = element_text(size=20)) 
    return(g)
}


#' Get table of unique int-pairs by condition
#'
#' @param data_cond1 filt.data() corresponding to chosen condition 1
#' @param data_cond2 filt.data() corresponding to chosen condition 2
#' @param data_cond3 filt.data() corresponding to chosen condition 3
#' @param lab_c1 data label for condition 1
#' @param lab_c2 data label for condition 2
#' @param lab_c3 data label for condition 3
#'
#' @return modified merged filt.data containing only unique intpairs
#' 
getUniqueIntpairs_byCond <- function(data_cond1, data_cond2, data_cond3 = NULL,
                                     lab_c1, lab_c2, lab_c3 = NULL){
    # Get table of unique int-pairs per condition
    # Add condition column
    data_cond1 <- data_cond1 %>%
        mutate(condition = lab_c1)
    data_cond2 <- data_cond2 %>%
        mutate(condition = lab_c2)
    if(!is.null(data_cond3)){
        data_cond3 <- data_cond3 %>%
            mutate(condition = lab_c3)
    }
    
    # Merge multiple conditions
    if(!is.null(data_cond3)){
        merged_data <- dplyr::bind_rows(data_cond1, data_cond2, data_cond3)
    } else{
        merged_data <- dplyr::bind_rows(data_cond1, data_cond2)
    }
    
    
    # Unique int-pairs by condition
    unique_int_pairs <- merged_data %>%
        group_by(int_pair, condition) %>%
        summarise(n_int_pair = n()) %>%
        group_by(int_pair) %>%
        mutate(n_cond = n()) %>%
        filter(n_cond == 1)
    
    clust_info <- merged_data %>%
        group_by(int_pair, condition) %>%
        mutate(clust_pair = paste(clustA, clustB, sep = "::")) %>%
        summarise(clust_pairs = paste(clust_pair, collapse = ","))
    
    unique_int_pairs$clust_pairs <- clust_info$clust_pairs[clust_info$int_pair %in% unique_int_pairs$int_pair]
    
    
    return(unique_int_pairs)
}

#' Subset int-pair by function matrices to unique int-pairs by condition
#'
#' @param annot_cond1 binary matrix int-pair by functions for cond1
#' @param annot_cond2 binary matrix int-pair by functions for cond2
#' @param annot_cond3 binary matrix int-pair by functions for cond3
#' @param unique_intpairs table of unique int-pairs by condition
#' @param lab_c1 label cond1
#' @param lab_c2 label cond2
#' @param lab_c3 label cond3
#'
#' @return subset merged matrix

subsetAnnot_multiCond <- function(annot_cond1, annot_cond2, annot_cond3,
                                  unique_intpairs,
                                  lab_c1, lab_c2, lab_c3){
    
    sub_annot_cond1 <- annot_cond1[rownames(annot_cond1) %in% unique_intpairs$int_pair[unique_intpairs$condition == lab_c1],]
    sub_annot_cond2 <- annot_cond2[rownames(annot_cond2) %in% unique_intpairs$int_pair[unique_intpairs$condition == lab_c2],]
    
    #remove empty columns
    sub_annot_cond1 <- sub_annot_cond1[, colSums(sub_annot_cond1) != 0]
    sub_annot_cond2 <- sub_annot_cond2[, colSums(sub_annot_cond2) != 0]
    
    if(!is.null(annot_cond3)){
        sub_annot_cond3 <- annot_cond3[rownames(annot_cond3) %in% unique_intpairs$int_pair[unique_intpairs$condition == lab_c3],]
        sub_annot_cond3 <- sub_annot_cond3[, colSums(sub_annot_cond3) != 0]
    }
    
    
    if(!is.null(annot_cond3)){
        sub_annot <- data.table::rbindlist(list(data.table::as.data.table(sub_annot_cond1), 
                                                data.table::as.data.table(sub_annot_cond2),
                                                data.table::as.data.table(sub_annot_cond3)), 
                                           fill = TRUE,
                                           use.names = TRUE)
    } else{
        sub_annot <- data.table::rbindlist(list(data.table::as.data.table(sub_annot_cond1), 
                                                data.table::as.data.table(sub_annot_cond2)), 
                                           fill = TRUE,
                                           use.names = TRUE)
    }
    
    
    # replace NA with zeros
    sub_annot <- as.matrix(sub_annot)
    sub_annot[is.na(sub_annot)] <- 0
    if(!is.null(annot_cond3)){
        rownames(sub_annot) <- c(rownames(sub_annot_cond1), 
                                 rownames(sub_annot_cond2), 
                                 rownames(sub_annot_cond3))
    } else {
        rownames(sub_annot) <- c(rownames(sub_annot_cond1), 
                                 rownames(sub_annot_cond2))
    }
    
    
    return(sub_annot)
}




#' Get significance of functional terms related to unique int-pairs per condition
#'
#' @param sub_annot annotation matrix subset to unique int-pairs
#' @param unique_intpairs data.frame with unique int-pairs by condition
#'
#' @return data.frame with calculated pvalue of significance

getSignificantFunctions_multiCond <- function(sub_annot,
                                              unique_intpairs){
    permMat <- t(sub_annot)
    
    # vector of conditions
    condition_vec <- unique_intpairs$condition
    names(condition_vec) <- unique_intpairs$int_pair
    
    # subset to unique int-pairs that are actually annotated
    condition_vec <- condition_vec[unique_intpairs$int_pair %in% colnames(permMat)]

    hits_true <- getHitsf(permMat, condition_vec)
    hits_perm <- list()

    for(np in seq_len(999)){
        # shuffle cols of original matrix (int-pairs, assigned to modules)
        shufMat <- permMat[,sample(colnames(permMat), ncol(permMat),
                                   replace = FALSE)]
        colnames(shufMat) <- colnames(permMat)
        hits_perm[[np]] <- getHitsf(shufMat, condition_vec)
    }

    # calculate empirical pvalue
    emp_pvalue <- matrix(0, nrow = nrow(permMat),
                         ncol = length(unique(condition_vec)))
    rownames(emp_pvalue) <- rownames(permMat)
    colnames(emp_pvalue) <- unique(condition_vec)
    for(gM in seq_len(ncol(hits_true))){
        for(fM in seq_len(nrow(hits_true))){
            hits_gm_fm <- unlist(lapply(hits_perm, function(x) x[fM, gM]))
            emp_pvalue[fM,gM] <- (1 + sum(hits_gm_fm >= hits_true[fM,gM]))/1000
        }
    }

    pvalue_df <- cbind(emp_pvalue, functionalTerm = rownames(emp_pvalue))
    pvalue_df <- tidyr::gather(as.data.frame(pvalue_df),
                               key = "condition",
                               value = "p_value",
                               unique(condition_vec),
                               factor_key = FALSE)

    
    # Adding int_pairs to each functional term
    if(nrow(pvalue_df) > 0){
        for(r in seq_len(nrow(pvalue_df))){
            int_pairs_all <- rownames(sub_annot)[
                sub_annot[, pvalue_df$functionalTerm[r]] == 1]
            pvalue_df[r, "int_pair_list"] <- paste(
                intersect(int_pairs_all, names(condition_vec)[
                    condition_vec %in% pvalue_df$condition[r]]), collapse = ",")
        }


    }

    return(pvalue_df)
}


#' Wrapper for other functions to get significant table of func terms
#'
#' @param data_cond1 filt.data() corresponding to chosen condition 1
#' @param data_cond2 filt.data() corresponding to chosen condition 2
#' @param data_cond3 filt.data() corresponding to chosen condition 3
#' @param lab_c1 data label for condition 1
#' @param lab_c2 data label for condition 2
#' @param lab_c3 data label for condition 3
#' @param annot_cond1 binary matrix int-pair by functions for cond1
#' @param annot_cond2 binary matrix int-pair by functions for cond2
#' @param annot_cond3 binary matrix int-pair by functions for cond3
#' 
#' @return list containing pvalue_df and unique_intpairs df

getSignif_table <- function(data_cond1, data_cond2, data_cond3,
                            lab_c1, lab_c2, lab_c3,
                            annot_cond1, annot_cond2, annot_cond3){
    #table of unique int-pairs by condition
    unique_intpairs <- getUniqueIntpairs_byCond(data_cond1 = data_cond1,
                                                data_cond2 = data_cond2,
                                                data_cond3 = data_cond3,
                                                lab_c1 = lab_c1,
                                                lab_c2 = lab_c2,
                                                lab_c3 = lab_c3)
    # subset annotation matrices to only unique int-pairs and merge
    sub_annot <- subsetAnnot_multiCond(annot_cond1, annot_cond2, annot_cond3,
                                       unique_intpairs,
                                       lab_c1, lab_c2, lab_c3)
    
    pvalue_df <- getSignificantFunctions_multiCond(sub_annot,
                                                   unique_intpairs)
    
    unique_intpairs <- unique_intpairs %>%
        select(-c(n_int_pair, n_cond))
    
    return(list(pvalue_df = pvalue_df, unique_intpairs = unique_intpairs))
    
}
martaint/InterCellar documentation built on April 7, 2022, 11:44 a.m.