knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message = FALSE)
library(ggplot2)
library(reshape2)
library(dplyr)
library(pheatmap)
library(limma)
library(DT)
library(tidyr)    
library(igraph)
library(grid)

library(plotly)


organism = "org.Hs.eg.db"
library(organism, character.only = TRUE)

library(EnsDb.Hsapiens.v79)
library(msigdbr)


library(clusterProfiler)
library(DOSE)
library(enrichplot)

library(data.table)
library(DT)
find_de_CCI <- function(data){

   data <- t(data)
   patient <- unlist( lapply( strsplit( colnames(data), "_cond_"), `[`, 1) ) 

    remove <- which(patient == "NA")
    if (length(remove) > 0 ){
       patient <-  patient[-c(remove)]
       data <- data[ ,  -c(remove)]
    }
    condition  <- unlist( lapply( strsplit( colnames(data), "_cond_"), `[`, 2))
    condition <- data.frame(condition = condition )


    design <- model.matrix(~condition, data = condition)
    fit <- lmFit(data, design)
    fit <- eBayes(fit)

    tT <- topTable(fit, n = Inf) 

    tT <- tT[ tT$P.Value < 0.1, ]

    celltype <- unlist(lapply ( strsplit( rownames(tT), split = "--", fixed=TRUE ), `[`, 1))

    celltype <-  sort(table(celltype), decreasing = TRUE) 
    celltype <- celltype[1:min(length(celltype), 3)]
    celltype <- data.frame(data.frame(celltype))

    number_celltype <- nrow( tT )


    return( result = list(number_celltype,  celltype))
}




find_de_pathway_specific <- function(data){

    celltype <- unlist( lapply(strsplit(colnames(data) ,split = "-", fixed=TRUE ) , `[`, 2) )
    df <- NULL
    for (thiscelltype in unique(celltype)){

      index <- which(celltype == thiscelltype)
      thisdata <- data[, index ]
      thisresult <- find_de_not_celltype_specific( thisdata )

      df <- rbind(df, data.frame( thiscelltype  , thisresult[[1]][1],  thisresult[[1]][1] / ncol(thisdata) ) )
    }
    colnames(df) <- c("celltype" , "num_sig_features" , "prop_sig_features")
    df <- df[order(df$prop_sig_features, decreasing = TRUE), ]
    df <- df[ 1: min(nrow(df), 3), ]

    return(df )
}

find_de_celltype_specific <- function(data){

    celltype <- unlist( lapply(strsplit(colnames(data) , split = "-", fixed=TRUE ) , `[`, 1) )
    df <- NULL
    for (thiscelltype in unique(celltype)){

      index <- which(celltype == thiscelltype)
      thisdata <- data[, index, drop=FALSE ]
      thisresult <- find_de_not_celltype_specific( thisdata )

      df <- rbind(df, data.frame( thiscelltype  , thisresult[[1]][1],  thisresult[[1]][1] / ncol(thisdata) ) )
    }
    colnames(df) <- c("celltype" , "num_sig_features" , "prop_sig_features")
    df <- df[order(df$prop_sig_features, decreasing = TRUE), ]
    df <- df[ 1: min(nrow(df), 3), ]

    return(df )
}








find_de_not_celltype_specific <- function(data){

   data <- t(data)
   patient <- unlist( lapply( strsplit( colnames(data), "_cond_"), `[`, 1) ) 

    remove <- which(patient == "NA")
    if (length(remove) > 0 ){
       patient <-  patient[-c(remove)]
       data <- data[ ,  -c(remove)]
    }
    condition  <- unlist( lapply( strsplit( colnames(data), "_cond_"), `[`, 2))
    condition <- data.frame(condition = condition )


    design <- model.matrix(~condition, data = condition)
    fit <- lmFit(data, design)
    fit <- eBayes(fit)

    tT <- topTable(fit, n = Inf) 

    tT <- tT[ tT$P.Value < 0.1, ]

    number_celltype <- nrow( tT )
    top_celltype <- rownames(tT)[1:min(number_celltype, 3)]

    return( result = list(number_celltype, top_celltype))
}




find_de_celltype_interaction <- function(data){

   data <- t(data)
   patient <- unlist( lapply( strsplit( colnames(data), "_cond_"), `[`, 1) ) 

    remove <- which(patient == "NA")
    if (length(remove) > 0 ){
       patient <-  patient[-c(remove)]
       data <- data[ ,  -c(remove)]
    }
    condition  <- unlist( lapply( strsplit( colnames(data), "_cond_"), `[`, 2))
    condition <- data.frame(condition = condition )


    design <- model.matrix(~condition, data = condition)
    fit <- lmFit(data, design)
    fit <- eBayes(fit)

    tT <- topTable(fit, n = Inf) 

    tT <- tT[ tT$P.Value < 0.1, ]

    celltype <- rownames(tT)

    celltype <-  sort(table(celltype), decreasing = TRUE) 
    celltype <- celltype[1:min(length(celltype), 3)]
    celltype <- data.frame(data.frame(celltype))

    number_celltype <- nrow( tT )


    return( result = list(number_celltype,  celltype))
}

This file runs association study using the given features and sample conditions and plots the key features from each feature category using a representative figure. The purpose is not to provide a comprehensive analysis in a single HTML but to help point directions for future investigation.

Overview of the association study result

Here we provide a brief overview of the association study result, including the number of features in each feature type, and the number of features that are significantly associated ( P-value < 0.1) with the conditions of the interest.

num_features <- lapply(scfeatures_result, dim)
num_features <- t( data.frame(  lapply(num_features, `[` , 2) )) 
colnames(num_features) <- "Number of features"
# get number of DE's in proportion_raw

df <- NULL

if ( "proportion_raw" %in% names( scfeatures_result ) ){
  data <- scfeatures_result$proportion_raw
  de_proportion_raw <- find_de_not_celltype_specific(data)

  de_proportion_raw <- data.frame("proportion_raw" ,
                                  de_proportion_raw[[1]][1],
                    paste0( unlist( de_proportion_raw[[2]]),  collapse=",  " ) )

  df <- rbindlist(list(df ,de_proportion_raw ), use.names=FALSE )

}


if ("proportion_logit" %in% names( scfeatures_result )){
  data <- scfeatures_result$proportion_logit
de_proportion_logit <- find_de_not_celltype_specific(data)
de_proportion_logit  <- data.frame("proportion_logit" , 
                                   de_proportion_logit [[1]][1],
                              paste0( unlist( de_proportion_logit [[2]]),   collapse=",  "  ) )
df <- rbindlist(list(df , de_proportion_logit ), use.names=FALSE )


}

if ("proportion_ratio" %in% names(scfeatures_result)){
  data <- scfeatures_result$proportion_ratio
  de_proportion_ratio <- find_de_CCI(data)
  de_proportion_ratio   <- data.frame("proportion_ratio" ,
                                de_proportion_ratio[[1]][1],
                                paste0( unlist( de_proportion_ratio[[2]]$celltype ),   collapse="\n"  ) )
  df <- rbindlist(list(df , de_proportion_ratio ), use.names=FALSE )
}



if ("gene_mean_celltype" %in% names(scfeatures_result)){
  data <- scfeatures_result$gene_mean_celltype
  de_gene_mean_celltype <- find_de_celltype_specific(data)
  de_gene_mean_celltype   <- data.frame("gene_mean_celltype" ,
                                 sum( de_gene_mean_celltype$num_sig_features),
                   paste0( unlist( de_gene_mean_celltype$celltype ),   collapse=",  "  ) )
  df <- rbindlist(list(df , de_gene_mean_celltype ), use.names=FALSE )

}


if ("gene_prop_celltype" %in% names(scfeatures_result)){
  data <- scfeatures_result$gene_prop_celltype
 de_gene_prop_celltype <- find_de_celltype_specific(data)
 de_gene_prop_celltype    <- data.frame("gene_prop_celltype" ,
                                 sum( de_gene_prop_celltype$num_sig_features),
                   paste0( unlist( de_gene_prop_celltype$celltype ),  collapse=",  "  ) )
 df <- rbindlist(list(df , de_gene_prop_celltype ), use.names=FALSE )

}


if ("gene_cor_celltype" %in% names(scfeatures_result)){
  data <- scfeatures_result$gene_cor_celltype
de_gene_cor_celltype <- find_de_celltype_specific(data)
de_gene_cor_celltype    <- data.frame("gene_cor_celltype" ,
                                 sum( de_gene_cor_celltype$num_sig_features),
                   paste0( unlist( de_gene_cor_celltype$celltype ),  collapse=",  "  ) )
df <- rbindlist(list(df , de_gene_cor_celltype), use.names=FALSE )



}


if ("pathway_gsva" %in% names(scfeatures_result)){

  data <- scfeatures_result$pathway_gsva
  de_pathway_gsva <-  find_de_pathway_specific(data)
  de_pathway_gsva  <- data.frame("pathway_gsva" ,
                                   sum( de_pathway_gsva$num_sig_features),
                     paste0( unlist( de_pathway_gsva$celltype ),  collapse=",  "  ) )
  df <- rbindlist(list(df , de_pathway_gsva  ), use.names=FALSE )


}


if ("pathway_mean" %in% names(scfeatures_result)){

  data <- scfeatures_result$pathway_mean
  de_pathway_mean <-  find_de_pathway_specific(data)
  de_pathway_mean  <- data.frame("pathway_mean" ,
                                   sum( de_pathway_mean$num_sig_features),
                     paste0( unlist( de_pathway_mean$celltype ),   collapse=",  " ) )
  df <- rbindlist(list(df , de_pathway_mean  ), use.names=FALSE )

}


if ("pathway_prop" %in% names(scfeatures_result)){

  data <- scfeatures_result$pathway_prop
  de_pathway_prop <-  find_de_pathway_specific(data)
  de_pathway_prop  <- data.frame("pathway_prop" ,
                                   sum(de_pathway_prop$num_sig_features),
                     paste0( unlist( de_pathway_prop$celltype ), collapse=",  "  ) )
  df <- rbindlist(list(df , de_pathway_prop ), use.names=FALSE )

}


if ("CCI" %in% names(scfeatures_result)){

  data <- scfeatures_result$CCI
  de_CCI <- find_de_CCI(data)
  de_CCI   <- data.frame("CCI" ,
                     de_CCI[[1]][1] ,
                    paste0( unlist( de_CCI[[2]]$celltype  ),  collapse=",  " ) )
  df <- rbindlist(list(df , de_CCI  ), use.names=FALSE )

} 


if ("gene_mean_bulk" %in% names(scfeatures_result)){

  data <- scfeatures_result$gene_mean_bulk
  de_gene_mean_bulk <-  find_de_not_celltype_specific(data)
  de_gene_mean_bulk    <- data.frame("gene_mean_bulk" ,
                       de_gene_mean_bulk[[1]][1] ,
                        "not applicable" )
  df <- rbindlist(list(df , de_gene_mean_bulk  ), use.names=FALSE )
} 

if ("gene_cor_bulk" %in% names(scfeatures_result)){
  data <- scfeatures_result$gene_cor_bulk
  de_gene_cor_bulk <-  find_de_not_celltype_specific(data)
  de_gene_cor_bulk    <- data.frame("gene_cor_bulk" ,
                      de_gene_cor_bulk[[1]][1] ,
                        "not applicable" )
  df <- rbindlist(list(df ,de_gene_cor_bulk), use.names=FALSE )
} 

if ("gene_prop_bulk" %in% names(scfeatures_result)){
  data <- scfeatures_result$gene_prop_bulk
  de_gene_prop_bulk <-  find_de_not_celltype_specific(data)
  de_gene_prop_bulk    <- data.frame("gene_prop_bulk" ,
                      de_gene_prop_bulk[[1]][1] ,
                        "not applicable" )
  df <- rbindlist(list(df , de_gene_prop_bulk ), use.names=FALSE )

} 



if ("L_stats" %in% names(scfeatures_result)){
  data <- scfeatures_result$L_stats
  de_L_stats <-  find_de_celltype_interaction(data)
  de_L_stats  <- data.frame("L_stats" ,
                     de_L_stats[[1]][1] ,   
                paste0( unlist( de_L_stats [[2]]$celltype  ),   collapse=",  "  )  )
  df <- rbindlist(list(df , de_L_stats ), use.names=FALSE )

} 



if ("morans_I" %in% names(scfeatures_result)){

  data <- scfeatures_result$morans_I
  de_morans_I <-  find_de_not_celltype_specific(data)
  de_morans_I    <- data.frame("morans_I" ,
                    de_morans_I[[1]][1] ,   "not applicable" )
  df <- rbindlist(list(df , de_morans_I), use.names=FALSE )

} 


if ("celltype_interaction" %in% names(scfeatures_result)){

  data <- scfeatures_result$celltype_interaction
  de_celltype_interaction <-  find_de_celltype_interaction(data)
  de_celltype_interaction    <- data.frame("celltype_interaction" ,
              de_celltype_interaction [[1]][1] ,
              paste0( unlist( de_celltype_interaction [[2]]$celltype  ),  collapse=",  "  ) )
  df <- rbindlist(list(df , de_celltype_interaction), use.names=FALSE )

} 

if ("nn_correlation" %in% names(scfeatures_result)){
  data <- scfeatures_result$nn_correlation
  de_nn_correlation <-  find_de_not_celltype_specific(data)
  de_nn_correlation  <- data.frame("nn_correlation" ,
                de_nn_correlation[[1]][1] ,  "not applicable")
  df <- rbindlist(list(df ,de_nn_correlation ), use.names=FALSE )

} 


num_features <- data.frame(num_features)

num_features$feature_type <- rownames(num_features)  
colnames(df) <- c("feature_type", "number of significant features", "top three important cell types")

df <- merge(df, num_features, by.x = "feature_type" , by.y = "feature_type")

df <- df[, c(1, 4, 2, 3)]
colnames(df)  <-  c("feature type", "total number of features",  "number of significant features", "top three important cell types")
datatable(df)
plot_barplot <- function(data , dodge=F  ){


  data$patient <- unlist( lapply( strsplit( rownames(data ), "_cond_"), `[`, 1))
  data$condition <- unlist( lapply( strsplit( rownames(data ), "_cond_"), `[`, 2))

  data <- as.data.frame( melt(data, id=c("patient", "condition")) )

  if(dodge){
   p <- ggplot(data , aes( x = patient , y = value , fill = variable) ) +   
    geom_bar(stat="identity"  ) + facet_wrap(~variable+condition, scale="free") + theme_minimal() + 
    theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) 

  } else{
  p <-   ggplot(data , aes( x = patient , y = value , fill = variable) ) +   
    geom_bar(stat="identity"   ) + facet_wrap(~condition, scale="free") + 
      theme_minimal() + 
    theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) 
  }


 return (p)



}
plot_boxplot <- function(data, num_feature = 3   ){

   data <- t(data)
   patient <- unlist( lapply( strsplit( colnames(data), "_cond_"), `[`, 1) ) 

    remove <- which(patient == "NA")
    if (length(remove) > 0 ){
       patient <-  patient[-c(remove)]
       data <- data[ ,  -c(remove)]
    }
    condition  <- unlist( lapply( strsplit( colnames(data), "_cond_"), `[`, 2))
    condition <- data.frame(condition = condition )


    design <- model.matrix(~condition, data = condition)
    fit <- lmFit(data, design)
    fit <- eBayes(fit)

    tT <- topTable(fit, n = Inf) 

    data <- melt(data)


    print(paste0( "up regulated in ", unique(condition$condition)[1]))
    top_gene <- tT[ tT$logFC > 0 , ]
    top_gene <- rownames(top_gene )
    top_gene  <- top_gene[1:min(num_feature, length(top_gene) ) ]

   if (length(top_gene ) == 0){

   }else {
      data_toplot <- data[data$Var1 %in%  top_gene, ]
      data_toplot$cond <-  unlist( lapply( strsplit(as.character(data_toplot$Var2), "_cond_"), `[`, 2))
      data_toplot$Var1 <- factor(data_toplot$Var1, levels =top_gene )

      p <- ggplot( data_toplot, aes( y = value, x = Var1 , colour = cond, text = Var2)) + geom_boxplot() +   geom_point(aes(fill = cond), size = 1, shape = 21, position = position_jitterdodge()) +  theme_minimal() + theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))   

   }

   return (p ) 

}
plot_pca <- function(data, filename ){

  gse_pca <- prcomp( data)
  condition <- unlist( lapply( strsplit(rownames(data), "_cond_" ) , `[`, 2))

  df_toplot <- data.frame(condition , 
                          pc1 = gse_pca$x[,1], pc2 = gse_pca$x[,2]  )
  df_toplot$patient <- rownames(df_toplot)
  p <- ggplot(df_toplot, aes(x = pc1, y = pc2, color = condition , text = patient  )) + 
    geom_point(size = 4) + 
    theme_minimal() 

  return (p )

}

Cell type proportions {.tabset .tabset-fade .tabset-pills}

knitr::include_graphics( system.file("extdata/figure", "celltypeproportion_example_figures.png",   package = "scFeatures")  )
  1. Barplot shows the composition of cells types
  2. Boxplot shows the top cell types that differs between conditions
  3. PCA plot shows the separation of conditions based on the cell type proportion features

Composition barplot

if ("proportion_raw" %in% names(scfeatures_result)){

  data <- scfeatures_result$proportion_raw

  p <-  plot_barplot(data  ) 

  ggplotly(p)

} 

Boxplot of top features

if ("proportion_raw" %in% names(scfeatures_result)){
  data <- scfeatures_result$proportion_raw
  p <- plot_boxplot(data ) 
   ggplotly(p) |> layout(boxmode = "group") 
} 

PCA plot

if ("proportion_raw" %in% names(scfeatures_result)){
    data <- scfeatures_result$proportion_raw
    p <- plot_pca(data )  
    ggplotly(p) 
} 
plot_heatmap_bulk <- function(data , num_features = 20  ){



    patient <- unlist( lapply( strsplit( colnames(data), "_cond_"), `[`, 1) ) 
    remove <- which(patient == "NA")
    if (length(remove) > 0 ){
       patient <-  patient[-c(remove)]
       data <- data[ ,  -c(remove)]
    }
    condition  <- unlist( lapply( strsplit( colnames(data), "_cond_"), `[`, 2))
    condition <- data.frame(condition = condition )


    design <- model.matrix(~condition, data = condition)
    fit <- lmFit(data, design)
    fit <- eBayes(fit)

    suppressMessages ( tT <- topTable(fit, n = Inf) )
    gene_output <- tT

    # print ( DT::datatable(round(tT, 2)) )

    print(paste0( "up regulated in ", unique(condition$condition)[1]))
    top_gene <- tT[ tT$logFC > 0 , ]
    top_gene <- rownames(top_gene )[1:min( num_features, nrow(top_gene)) ]
    rownames( condition) <- colnames(data)

   if ( all(is.na(top_gene)) ){
          p <- "no significant genes"
    }else if (length(top_gene) == 1){
             pheatmap( data[top_gene, , drop=FALSE]  ,   
                  annotation = condition,
                   cluster_rows = FALSE  , color=colorRampPalette(c("navy", "white", "red"))(50) )
   }else{

        p <-  pheatmap( data[top_gene, ]  ,  annotation = condition,
              fontsize_row = 5 , fontsize_col = 5,
                scale = "row" , color=colorRampPalette(c("navy", "white", "red"))(50) )
   }

    return(list( gene_output = gene_output, p = p ) ) 

}



plot_go_bulk  <- function(gene_output ){


      try({
        print("up regulated")

        up <- rownames(gene_output[gene_output$logFC > 0, ])[1:200]

        gse <- enrichGO(gene = up , 
                   ont ="BP" ,  keyType = "SYMBOL" , 
                   minGSSize = 3,   maxGSSize = 800, 
                   pvalueCutoff =0.05,  pAdjustMethod =  "fdr",
                   OrgDb = organism  )

         dotplot <- dotplot(gse, showCategory=10,  font.size = 8 ) 


         gse <- enrichplot::pairwise_termsim(gse)


        emapplot <-  emapplot(gse, showCategory = 10,  
                         cex_label_category=0.7, cex_line = 0.7 ,   cex_category = 0.7) 



       treeplot <-  treeplot(gse,  fontsize = 3 , cex_category = 0.2, label_format = 5,
                        offset = 5)


      }) 

     return (list(  dotplot =   dotplot ,  emapplot =  emapplot ,   treeplot  =   treeplot  )) 

}

Cell type specific gene expressions {.tabset .tabset-fade .tabset-pills}

knitr::include_graphics( system.file("extdata/figure",  "celltypegene_example_figures.png" ,   package = "scFeatures")  )
  1. Heatmaps shows the top cell type specific gene expression features that differs between conditions
  2. MA plot shows the expression and log2 fold change of the cell type specific gene expression features
  3. Volcano plot shows the log2 fold change and P-values of the cell type specific gene expression features
  4. PCA plot shows the separation of conditions based on the cell type specific gene expression features
  5. Dot plot shows the pathway enrichment of the top cell type specific gene expression features that differs between conditions
  6. Enrichment map of the top cell type specific gene expression features that differs between conditions
  7. Functional grouping of the top cell type specific gene expression features that differs between conditions

Heatmap

if ("gene_mean_celltype" %in% names(scfeatures_result)){
  data <- scfeatures_result$gene_mean_celltype
  data <- t(data)


  celltype <- rownames(data)
  celltype <- unlist( lapply( strsplit(celltype,split = "-", fixed=TRUE  ), `[`, 1) )

  topcelltype <- strsplit( de_gene_mean_celltype[, 3] , ", ")[[1]][1]

  data <- data[ which(celltype == topcelltype ) , ]

  result <- plot_heatmap_bulk(data , num_features = 20 )

  gene_output <- result$gene_output

  result$p
}

MA plot

if ("gene_mean_celltype" %in% names(scfeatures_result)){

  gene_output$gene <- rownames(gene_output)

  g <- ggplot(gene_output, aes(x = AveExpr, y = logFC , text  = gene))+
      geom_point(aes(colour=-log10(P.Value)), alpha=1/3, size=1) +
      scale_colour_gradient(low="blue",high="red")+
      ylab("log2 fold change") + xlab("Average expression")+ theme_minimal()

  ggplotly(g)

}

Volcano plot

if ("gene_mean_celltype" %in% names(scfeatures_result)){


  p <- ggplot( gene_output , aes(logFC,-log10(P.Value) , text = gene ) )+
      geom_point(aes(colour=-log10(P.Value)), alpha=1/3, size=1) +
      scale_colour_gradient(low="blue",high="red")+
      xlab("log2 fold change") + ylab("-log10 p-value") + theme_minimal()


  ggplotly(p)


}

PCA plot

if ("gene_mean_celltype" %in% names(scfeatures_result)){

  p <- plot_pca(t( data) )

  ggplotly(p)

}

Dot plot

if ("gene_mean_celltype" %in% names(scfeatures_result)){

  rownames( gene_output  ) <- make.unique(unlist( lapply ( strsplit( rownames( gene_output  ) , split = "-", fixed=TRUE ), `[` , 2) ))
  result <- plot_go_bulk(gene_output )

  print(result$dotplot)

}

Enrichment map

if ("gene_mean_celltype" %in% names(scfeatures_result)){

  print(result$emapplot) 

}

Functional grouping

if ("gene_mean_celltype" %in% names(scfeatures_result)){

   print(result$treeplot) 
}

Cell type specific pathway expressions {.tabset .tabset-fade .tabset-pills}

knitr::include_graphics(  system.file("extdata/figure",  "pathway_example_figures.png" ,   package = "scFeatures")    )
  1. Heatmaps shows the top cell type specific pathway expression features that differs between conditions
  2. Boxplot shows the top cell type specific pathway expression features that differs between conditions
  3. PCA plot shows the separation of conditions based on the cell type specific pathway expression features

Heatmap

if ("pathway_mean" %in% names(scfeatures_result)){

  data <- scfeatures_result$pathway_mean
  data <- t(data)

  gene_output <- plot_heatmap_bulk(data )
  gene_output$p

} 

Boxplot

if ("pathway_mean" %in% names(scfeatures_result)){

  p <- plot_boxplot(t(data) )

  ggplotly(p)  |>layout(boxmode = "group") 

}

PCA plot

if ("pathway_mean" %in% names(scfeatures_result)){

  p <- plot_pca(t( data) )
  ggplotly(p)

}
plot_CCI <- function(data ){


    patient <- unlist( lapply( strsplit( colnames(data), "_cond_"), `[`, 1) ) 

    remove <- which(patient == "NA")
    if (length(remove) > 0 ){
       patient <-  patient[-c(remove)]
       data <- data[ , -c(remove)]
    }

    condition <- unlist( lapply( strsplit( colnames(data), "_cond_"), `[`, 2) ) 
    celltype <- unlist( lapply( strsplit( rownames(data), split = "--", fixed=TRUE ), `[`, 1) )

    thiscelltype <-  unique(celltype)[1]

    df <- NULL
    for (thiscelltype in unique(celltype)){
       data_thiscelltype <- data[which(celltype == thiscelltype), ]
       cond1_count <- data_thiscelltype[, which( condition == unique( condition)[1])]
       cond1_count <-  sum(cond1_count != 0)/ncol(cond1_count)

       cond2_count <-data_thiscelltype[ , which( condition == unique( condition)[2])]
       cond2_count <- sum( cond2_count !=0 )/ncol(cond2_count)

       temp <- data.frame(ligand  = strsplit( thiscelltype, split = "->", fixed=TRUE )[[1]][1],
                          receptor = strsplit( thiscelltype, split = "->", fixed=TRUE )[[1]][2],
                          diff = cond1_count - cond2_count )
       df <- rbind(df, temp)
    }

    df <- as.data.frame( tidyr::pivot_wider(df, names_from = receptor, values_from = diff) )

    rownames(df) <- df$ligand
    df <- df[, -1]




   setHook("grid.newpage", function() pushViewport(viewport(x=1,y=1,width=0.9, height=0.9, name="vp", just=c("right","top"))), action="prepend")
     pheatmap(  df , fontsize_row = 10 , fontsize_col = 10,  cluster_cols = FALSE,
                 cluster_rows = FALSE ,
                display_numbers = TRUE,  number_color = "black" )
  setHook("grid.newpage", NULL, "replace")
  grid::grid.text("sender (ligand)", y=-0.07, gp=gpar(fontsize=16))
  grid::grid.text("target (receptor)", x=-0.07, rot=90, gp=gpar(fontsize=16)) 



  return(df)

}    

plot_CCI_difference <- function(df){

   #  https://stackoverflow.com/questions/49171958/igraph-edge-width-and-color-positive-and-negative-correlation
    df_net <- round( as.matrix(df) , 2)
    g <- graph_from_adjacency_matrix(df_net, mode = "directed", weighted = TRUE)
    E(g)$color <- ifelse(E(g)$weight > 0,'coral1','cornflowerblue')  
    E(g)$label.color<- "black"




   plot(g,  
         edge.curved=0.3 , edge.arrow.size=0.4, vertex.label.dist=4 ,
         edge.width=as.integer(cut(abs(E(g)$weight), breaks = 5)) ,
        edge.label = E(g)$weight,

         vertex.color="azure",
         vertex.label.color="black"  ,
         vertex.label.family="Helvetica", edge.label.family="Helvetica")

}

Cell type specific cell-cell communications {.tabset .tabset-fade .tabset-pills}

knitr::include_graphics(  system.file("extdata/figure",  
                   "CCI_example_figures.png"  ,   package = "scFeatures")    )
  1. Heatmap shows the top cell cell interactions features that differs between conditions
  2. Heatmap shows the difference in the number of interactions between conditions

For each interacting cell type, the difference is calculated as:
$$ \frac{total\: number\: of\: non-zero\: interactions \:in \: condition 1\: patients}{number \:of\: condition 1\:patients} - \frac{total\: number\: of\: non-zero\: interactions \:in \: condition 2\: patients}{number \:of\: condition 2 \:patients} $$

  1. PCA plot shows the separation of conditions based on the cell type specific pathway expression features
  2. Network plot shows the difference in the number of interactions between conditions
  3. Boxplot shows the top cell cell interaction features that differs between conditions

Heatmap of top cell cell interactions

if ("CCI" %in% names(scfeatures_result)){

   data <- scfeatures_result$CCI
   data <- t(data)

  data <- data[, !colnames(data) == "NA"]

  result <- plot_heatmap_bulk(data,  num_feature = 20 )

  gene_output <- result$gene_output

  result$p
}

Heatmap of difference in number of interactions

if ("CCI" %in% names(scfeatures_result)){

  df <- plot_CCI(data )

}

PCA plot

if ("CCI" %in% names(scfeatures_result)){

  p <- plot_pca(t(data) )

  ggplotly(p)

}

Network plot

if ("CCI" %in% names(scfeatures_result)){

  plot_CCI_difference(df)

}

Boxplot

if ("CCI" %in% names(scfeatures_result)){

  p <- plot_boxplot(t(data))  

  ggplotly(p)  |>layout(boxmode = "group") 

}

Overall aggregated gene expressions {.tabset .tabset-fade .tabset-pills}

knitr::include_graphics(  system.file("extdata/figure",  
         "aggregatedgene_example_figures.png" ,   package = "scFeatures")    )
  1. Heatmaps shows the top aggregated gene expression features that differs between conditions
  2. MA plot shows the expression and log2 fold change of the aggregated gene expression features
  3. Volcano plot shows the log2 fold change and P-values of the aggregated gene expression features
  4. PCA plot shows the separation of conditions based on the aggregated gene expression features
  5. Dot plot shows the pathway enrichment of the top aggregated gene expression features that differs between conditions
  6. Enrichment map of the top aggregatedgene expression features that differs between conditions
  7. Functional grouping of the top aggregated gene expression features that differs between conditions

Heatmap

if ("gene_mean_bulk" %in% names(scfeatures_result)){

  data <- as.matrix ( scfeatures_result$gene_mean_bulk)
  data <- t(data)

  result <- plot_heatmap_bulk(data ,  num_features = 20)

  gene_output <- result$gene_output

  result$p
}

MA plot

if ("gene_mean_bulk" %in% names(scfeatures_result)){

  gene_output$gene <- rownames(gene_output)

  g <-  ggplot(gene_output, aes(x = AveExpr, y = logFC, text = gene))+
      geom_point(aes(colour=-log10(P.Value)), alpha=1/3, size=1) +
      scale_colour_gradient(low="blue",high="red")+
      ylab("log2 fold change") + xlab("Average expression")+ theme_minimal()

  ggplotly(g)

}

Volcano plot

if ("gene_mean_bulk" %in% names(scfeatures_result)){

  p <-  ggplot( gene_output , aes(logFC,-log10(P.Value) , text = gene) )+
      geom_point(aes(colour=-log10(P.Value)), alpha=1/3, size=1) +
      scale_colour_gradient(low="blue",high="red")+
      xlab("log2 fold change") + ylab("-log10 p-value") + theme_minimal()

  ggplotly(p)

}

PCA plot

if ("gene_mean_bulk" %in% names(scfeatures_result)){
  plot_pca( t(data)  )  
}

Dot plot

if ("gene_mean_bulk" %in% names(scfeatures_result)){
  result <- plot_go_bulk( gene_output  ) 

  print(result$dotplot)
}

Enrichment map

if ("gene_mean_bulk" %in% names(scfeatures_result)){
  print(result$emapplot)
}

Functional grouping

if ("gene_mean_bulk" %in% names(scfeatures_result)){
  print(result$treeplot)
}

Spatial metrics {.tabset .tabset-fade .tabset-pills}

knitr::include_graphics(  system.file("extdata/figure",  
       "spatial_example_figures.png",   package = "scFeatures")    )
  1. Heatmaps shows the top spatial features that differs between conditions
  2. Boxplot shows the top spatial features that differs between conditions
  3. PCA plot shows the separation of conditions based on the spatial features

Heatmap

Moran's I

if ("morans_I" %in% names(scfeatures_result)){ 
  data <- scfeatures_result$morans_I
  data <- t(data)

  gene_output <- plot_heatmap_bulk(data,    num_features = 20 ) 


}

Boxplot

if ("morans_I" %in% names(scfeatures_result)){ 
   p <- plot_boxplot( t(data)  )  
  ggplotly(p) |>layout(boxmode = "group") 
}

PCA plot

if ("morans_I" %in% names(scfeatures_result)){ 
  p <- plot_pca( t(data)  )  
  ggplotly(p)
}


SydneyBioX/scFeatures documentation built on Aug. 29, 2024, 10:37 a.m.