In SydneyBioX/scFeatures: scFeatures: Multi-view representations of single-cell and spatial data for disease outcome prediction

knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message = FALSE)

library(ggplot2)
library(reshape2)
library(dplyr)
library(pheatmap)
library(limma)
library(DT)
library(tidyr)    
library(igraph)
library(grid)

library(plotly)


organism = "org.Hs.eg.db"
library(organism, character.only = TRUE)

library(EnsDb.Hsapiens.v79)
library(msigdbr)


library(clusterProfiler)
library(DOSE)
library(enrichplot)

library(data.table)
library(DT)

find_de_CCI <- function(data){

   data <- t(data)
   patient <- unlist( lapply( strsplit( colnames(data), "_cond_"), `[`, 1) ) 

    remove <- which(patient == "NA")
    if (length(remove) > 0 ){
       patient <-  patient[-c(remove)]
       data <- data[ ,  -c(remove)]
    }
    condition  <- unlist( lapply( strsplit( colnames(data), "_cond_"), `[`, 2))
    condition <- data.frame(condition = condition )


    design <- model.matrix(~condition, data = condition)
    fit <- lmFit(data, design)
    fit <- eBayes(fit)

    tT <- topTable(fit, n = Inf) 

    tT <- tT[ tT$P.Value < 0.1, ]

    celltype <- unlist(lapply ( strsplit( rownames(tT), split = "--", fixed=TRUE ), `[`, 1))

    celltype <-  sort(table(celltype), decreasing = TRUE) 
    celltype <- celltype[1:min(length(celltype), 3)]
    celltype <- data.frame(data.frame(celltype))

    number_celltype <- nrow( tT )


    return( result = list(number_celltype,  celltype))
}




find_de_pathway_specific <- function(data){

    celltype <- unlist( lapply(strsplit(colnames(data) ,split = "-", fixed=TRUE ) , `[`, 2) )
    df <- NULL
    for (thiscelltype in unique(celltype)){

      index <- which(celltype == thiscelltype)
      thisdata <- data[, index ]
      thisresult <- find_de_not_celltype_specific( thisdata )

      df <- rbind(df, data.frame( thiscelltype  , thisresult[[1]][1],  thisresult[[1]][1] / ncol(thisdata) ) )
    }
    colnames(df) <- c("celltype" , "num_sig_features" , "prop_sig_features")
    df <- df[order(df$prop_sig_features, decreasing = TRUE), ]
    df <- df[ 1: min(nrow(df), 3), ]

    return(df )
}

find_de_celltype_specific <- function(data){

    celltype <- unlist( lapply(strsplit(colnames(data) , split = "-", fixed=TRUE ) , `[`, 1) )
    df <- NULL
    for (thiscelltype in unique(celltype)){

      index <- which(celltype == thiscelltype)
      thisdata <- data[, index, drop=FALSE ]
      thisresult <- find_de_not_celltype_specific( thisdata )

      df <- rbind(df, data.frame( thiscelltype  , thisresult[[1]][1],  thisresult[[1]][1] / ncol(thisdata) ) )
    }
    colnames(df) <- c("celltype" , "num_sig_features" , "prop_sig_features")
    df <- df[order(df$prop_sig_features, decreasing = TRUE), ]
    df <- df[ 1: min(nrow(df), 3), ]

    return(df )
}








find_de_not_celltype_specific <- function(data){

   data <- t(data)
   patient <- unlist( lapply( strsplit( colnames(data), "_cond_"), `[`, 1) ) 

    remove <- which(patient == "NA")
    if (length(remove) > 0 ){
       patient <-  patient[-c(remove)]
       data <- data[ ,  -c(remove)]
    }
    condition  <- unlist( lapply( strsplit( colnames(data), "_cond_"), `[`, 2))
    condition <- data.frame(condition = condition )


    design <- model.matrix(~condition, data = condition)
    fit <- lmFit(data, design)
    fit <- eBayes(fit)

    tT <- topTable(fit, n = Inf) 

    tT <- tT[ tT$P.Value < 0.1, ]

    number_celltype <- nrow( tT )
    top_celltype <- rownames(tT)[1:min(number_celltype, 3)]

    return( result = list(number_celltype, top_celltype))
}




find_de_celltype_interaction <- function(data){

   data <- t(data)
   patient <- unlist( lapply( strsplit( colnames(data), "_cond_"), `[`, 1) ) 

    remove <- which(patient == "NA")
    if (length(remove) > 0 ){
       patient <-  patient[-c(remove)]
       data <- data[ ,  -c(remove)]
    }
    condition  <- unlist( lapply( strsplit( colnames(data), "_cond_"), `[`, 2))
    condition <- data.frame(condition = condition )


    design <- model.matrix(~condition, data = condition)
    fit <- lmFit(data, design)
    fit <- eBayes(fit)

    tT <- topTable(fit, n = Inf) 

    tT <- tT[ tT$P.Value < 0.1, ]

    celltype <- rownames(tT)

    celltype <-  sort(table(celltype), decreasing = TRUE) 
    celltype <- celltype[1:min(length(celltype), 3)]
    celltype <- data.frame(data.frame(celltype))

    number_celltype <- nrow( tT )


    return( result = list(number_celltype,  celltype))
}

This file runs association study using the given features and sample conditions and plots the key features from each feature category using a representative figure. The purpose is not to provide a comprehensive analysis in a single HTML but to help point directions for future investigation.

Overview of the association study result

Here we provide a brief overview of the association study result, including the number of features in each feature type, and the number of features that are significantly associated ( P-value < 0.1) with the conditions of the interest.

num_features <- lapply(scfeatures_result, dim)
num_features <- t( data.frame(  lapply(num_features, `[` , 2) )) 
colnames(num_features) <- "Number of features"

# get number of DE's in proportion_raw

df <- NULL

if ( "proportion_raw" %in% names( scfeatures_result ) ){
  data <- scfeatures_result$proportion_raw
  de_proportion_raw <- find_de_not_celltype_specific(data)

  de_proportion_raw <- data.frame("proportion_raw" ,
                                  de_proportion_raw[[1]][1],
                    paste0( unlist( de_proportion_raw[[2]]),  collapse=",  " ) )

  df <- rbindlist(list(df ,de_proportion_raw ), use.names=FALSE )

}


if ("proportion_logit" %in% names( scfeatures_result )){
  data <- scfeatures_result$proportion_logit
de_proportion_logit <- find_de_not_celltype_specific(data)
de_proportion_logit  <- data.frame("proportion_logit" , 
                                   de_proportion_logit [[1]][1],
                              paste0( unlist( de_proportion_logit [[2]]),   collapse=",  "  ) )
df <- rbindlist(list(df , de_proportion_logit ), use.names=FALSE )


}

if ("proportion_ratio" %in% names(scfeatures_result)){
  data <- scfeatures_result$proportion_ratio
  de_proportion_ratio <- find_de_CCI(data)
  de_proportion_ratio   <- data.frame("proportion_ratio" ,
                                de_proportion_ratio[[1]][1],
                                paste0( unlist( de_proportion_ratio[[2]]$celltype ),   collapse="\n"  ) )
  df <- rbindlist(list(df , de_proportion_ratio ), use.names=FALSE )
}



if ("gene_mean_celltype" %in% names(scfeatures_result)){
  data <- scfeatures_result$gene_mean_celltype
  de_gene_mean_celltype <- find_de_celltype_specific(data)
  de_gene_mean_celltype   <- data.frame("gene_mean_celltype" ,
                                 sum( de_gene_mean_celltype$num_sig_features),
                   paste0( unlist( de_gene_mean_celltype$celltype ),   collapse=",  "  ) )
  df <- rbindlist(list(df , de_gene_mean_celltype ), use.names=FALSE )

}


if ("gene_prop_celltype" %in% names(scfeatures_result)){
  data <- scfeatures_result$gene_prop_celltype
 de_gene_prop_celltype <- find_de_celltype_specific(data)
 de_gene_prop_celltype    <- data.frame("gene_prop_celltype" ,
                                 sum( de_gene_prop_celltype$num_sig_features),
                   paste0( unlist( de_gene_prop_celltype$celltype ),  collapse=",  "  ) )
 df <- rbindlist(list(df , de_gene_prop_celltype ), use.names=FALSE )

}


if ("gene_cor_celltype" %in% names(scfeatures_result)){
  data <- scfeatures_result$gene_cor_celltype
de_gene_cor_celltype <- find_de_celltype_specific(data)
de_gene_cor_celltype    <- data.frame("gene_cor_celltype" ,
                                 sum( de_gene_cor_celltype$num_sig_features),
                   paste0( unlist( de_gene_cor_celltype$celltype ),  collapse=",  "  ) )
df <- rbindlist(list(df , de_gene_cor_celltype), use.names=FALSE )



}


if ("pathway_gsva" %in% names(scfeatures_result)){

  data <- scfeatures_result$pathway_gsva
  de_pathway_gsva <-  find_de_pathway_specific(data)
  de_pathway_gsva  <- data.frame("pathway_gsva" ,
                                   sum( de_pathway_gsva$num_sig_features),
                     paste0( unlist( de_pathway_gsva$celltype ),  collapse=",  "  ) )
  df <- rbindlist(list(df , de_pathway_gsva  ), use.names=FALSE )


}


if ("pathway_mean" %in% names(scfeatures_result)){

  data <- scfeatures_result$pathway_mean
  de_pathway_mean <-  find_de_pathway_specific(data)
  de_pathway_mean  <- data.frame("pathway_mean" ,
                                   sum( de_pathway_mean$num_sig_features),
                     paste0( unlist( de_pathway_mean$celltype ),   collapse=",  " ) )
  df <- rbindlist(list(df , de_pathway_mean  ), use.names=FALSE )

}


if ("pathway_prop" %in% names(scfeatures_result)){

  data <- scfeatures_result$pathway_prop
  de_pathway_prop <-  find_de_pathway_specific(data)
  de_pathway_prop  <- data.frame("pathway_prop" ,
                                   sum(de_pathway_prop$num_sig_features),
                     paste0( unlist( de_pathway_prop$celltype ), collapse=",  "  ) )
  df <- rbindlist(list(df , de_pathway_prop ), use.names=FALSE )

}


if ("CCI" %in% names(scfeatures_result)){

  data <- scfeatures_result$CCI
  de_CCI <- find_de_CCI(data)
  de_CCI   <- data.frame("CCI" ,
                     de_CCI[[1]][1] ,
                    paste0( unlist( de_CCI[[2]]$celltype  ),  collapse=",  " ) )
  df <- rbindlist(list(df , de_CCI  ), use.names=FALSE )

} 


if ("gene_mean_bulk" %in% names(scfeatures_result)){

  data <- scfeatures_result$gene_mean_bulk
  de_gene_mean_bulk <-  find_de_not_celltype_specific(data)
  de_gene_mean_bulk    <- data.frame("gene_mean_bulk" ,
                       de_gene_mean_bulk[[1]][1] ,
                        "not applicable" )
  df <- rbindlist(list(df , de_gene_mean_bulk  ), use.names=FALSE )
} 

if ("gene_cor_bulk" %in% names(scfeatures_result)){
  data <- scfeatures_result$gene_cor_bulk
  de_gene_cor_bulk <-  find_de_not_celltype_specific(data)
  de_gene_cor_bulk    <- data.frame("gene_cor_bulk" ,
                      de_gene_cor_bulk[[1]][1] ,
                        "not applicable" )
  df <- rbindlist(list(df ,de_gene_cor_bulk), use.names=FALSE )
} 

if ("gene_prop_bulk" %in% names(scfeatures_result)){
  data <- scfeatures_result$gene_prop_bulk
  de_gene_prop_bulk <-  find_de_not_celltype_specific(data)
  de_gene_prop_bulk    <- data.frame("gene_prop_bulk" ,
                      de_gene_prop_bulk[[1]][1] ,
                        "not applicable" )
  df <- rbindlist(list(df , de_gene_prop_bulk ), use.names=FALSE )

} 



if ("L_stats" %in% names(scfeatures_result)){
  data <- scfeatures_result$L_stats
  de_L_stats <-  find_de_celltype_interaction(data)
  de_L_stats  <- data.frame("L_stats" ,
                     de_L_stats[[1]][1] ,   
                paste0( unlist( de_L_stats [[2]]$celltype  ),   collapse=",  "  )  )
  df <- rbindlist(list(df , de_L_stats ), use.names=FALSE )

} 



if ("morans_I" %in% names(scfeatures_result)){

  data <- scfeatures_result$morans_I
  de_morans_I <-  find_de_not_celltype_specific(data)
  de_morans_I    <- data.frame("morans_I" ,
                    de_morans_I[[1]][1] ,   "not applicable" )
  df <- rbindlist(list(df , de_morans_I), use.names=FALSE )

} 


if ("celltype_interaction" %in% names(scfeatures_result)){

  data <- scfeatures_result$celltype_interaction
  de_celltype_interaction <-  find_de_celltype_interaction(data)
  de_celltype_interaction    <- data.frame("celltype_interaction" ,
              de_celltype_interaction [[1]][1] ,
              paste0( unlist( de_celltype_interaction [[2]]$celltype  ),  collapse=",  "  ) )
  df <- rbindlist(list(df , de_celltype_interaction), use.names=FALSE )

} 

if ("nn_correlation" %in% names(scfeatures_result)){
  data <- scfeatures_result$nn_correlation
  de_nn_correlation <-  find_de_not_celltype_specific(data)
  de_nn_correlation  <- data.frame("nn_correlation" ,
                de_nn_correlation[[1]][1] ,  "not applicable")
  df <- rbindlist(list(df ,de_nn_correlation ), use.names=FALSE )

} 


num_features <- data.frame(num_features)

num_features$feature_type <- rownames(num_features)  
colnames(df) <- c("feature_type", "number of significant features", "top three important cell types")

df <- merge(df, num_features, by.x = "feature_type" , by.y = "feature_type")

df <- df[, c(1, 4, 2, 3)]
colnames(df)  <-  c("feature type", "total number of features",  "number of significant features", "top three important cell types")
datatable(df)

plot_barplot <- function(data , dodge=F  ){


  data$patient <- unlist( lapply( strsplit( rownames(data ), "_cond_"), `[`, 1))
  data$condition <- unlist( lapply( strsplit( rownames(data ), "_cond_"), `[`, 2))

  data <- as.data.frame( melt(data, id=c("patient", "condition")) )

  if(dodge){
   p <- ggplot(data , aes( x = patient , y = value , fill = variable) ) +   
    geom_bar(stat="identity"  ) + facet_wrap(~variable+condition, scale="free") + theme_minimal() + 
    theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) 

  } else{
  p <-   ggplot(data , aes( x = patient , y = value , fill = variable) ) +   
    geom_bar(stat="identity"   ) + facet_wrap(~condition, scale="free") + 
      theme_minimal() + 
    theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) 
  }


 return (p)



}

plot_boxplot <- function(data, num_feature = 3   ){

   data <- t(data)
   patient <- unlist( lapply( strsplit( colnames(data), "_cond_"), `[`, 1) ) 

    remove <- which(patient == "NA")
    if (length(remove) > 0 ){
       patient <-  patient[-c(remove)]
       data <- data[ ,  -c(remove)]
    }
    condition  <- unlist( lapply( strsplit( colnames(data), "_cond_"), `[`, 2))
    condition <- data.frame(condition = condition )


    design <- model.matrix(~condition, data = condition)
    fit <- lmFit(data, design)
    fit <- eBayes(fit)

    tT <- topTable(fit, n = Inf) 

    data <- melt(data)


    print(paste0( "up regulated in ", unique(condition$condition)[1]))
    top_gene <- tT[ tT$logFC > 0 , ]
    top_gene <- rownames(top_gene )
    top_gene  <- top_gene[1:min(num_feature, length(top_gene) ) ]

   if (length(top_gene ) == 0){

   }else {
      data_toplot <- data[data$Var1 %in%  top_gene, ]
      data_toplot$cond <-  unlist( lapply( strsplit(as.character(data_toplot$Var2), "_cond_"), `[`, 2))
      data_toplot$Var1 <- factor(data_toplot$Var1, levels =top_gene )

      p <- ggplot( data_toplot, aes( y = value, x = Var1 , colour = cond, text = Var2)) + geom_boxplot() +   geom_point(aes(fill = cond), size = 1, shape = 21, position = position_jitterdodge()) +  theme_minimal() + theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))   

   }

   return (p ) 

}

plot_pca <- function(data, filename ){

  gse_pca <- prcomp( data)
  condition <- unlist( lapply( strsplit(rownames(data), "_cond_" ) , `[`, 2))

  df_toplot <- data.frame(condition , 
                          pc1 = gse_pca$x[,1], pc2 = gse_pca$x[,2]  )
  df_toplot$patient <- rownames(df_toplot)
  p <- ggplot(df_toplot, aes(x = pc1, y = pc2, color = condition , text = patient  )) + 
    geom_point(size = 4) + 
    theme_minimal() 

  return (p )

}

Cell type proportions {.tabset .tabset-fade .tabset-pills}

knitr::include_graphics( system.file("extdata/figure", "celltypeproportion_example_figures.png",   package = "scFeatures")  )

Barplot shows the composition of cells types
Boxplot shows the top cell types that differs between conditions
PCA plot shows the separation of conditions based on the cell type proportion features

Composition barplot

if ("proportion_raw" %in% names(scfeatures_result)){

  data <- scfeatures_result$proportion_raw

  p <-  plot_barplot(data  ) 

  ggplotly(p)

}

Boxplot of top features

if ("proportion_raw" %in% names(scfeatures_result)){
  data <- scfeatures_result$proportion_raw
  p <- plot_boxplot(data ) 
   ggplotly(p) |> layout(boxmode = "group") 
}

PCA plot

if ("proportion_raw" %in% names(scfeatures_result)){
    data <- scfeatures_result$proportion_raw
    p <- plot_pca(data )  
    ggplotly(p) 
}

plot_heatmap_bulk <- function(data , num_features = 20  ){



    patient <- unlist( lapply( strsplit( colnames(data), "_cond_"), `[`, 1) ) 
    remove <- which(patient == "NA")
    if (length(remove) > 0 ){
       patient <-  patient[-c(remove)]
       data <- data[ ,  -c(remove)]
    }
    condition  <- unlist( lapply( strsplit( colnames(data), "_cond_"), `[`, 2))
    condition <- data.frame(condition = condition )


    design <- model.matrix(~condition, data = condition)
    fit <- lmFit(data, design)
    fit <- eBayes(fit)

    suppressMessages ( tT <- topTable(fit, n = Inf) )
    gene_output <- tT

    # print ( DT::datatable(round(tT, 2)) )

    print(paste0( "up regulated in ", unique(condition$condition)[1]))
    top_gene <- tT[ tT$logFC > 0 , ]
    top_gene <- rownames(top_gene )[1:min( num_features, nrow(top_gene)) ]
    rownames( condition) <- colnames(data)

   if ( all(is.na(top_gene)) ){
          p <- "no significant genes"
    }else if (length(top_gene) == 1){
             pheatmap( data[top_gene, , drop=FALSE]  ,   
                  annotation = condition,
                   cluster_rows = FALSE  , color=colorRampPalette(c("navy", "white", "red"))(50) )
   }else{

        p <-  pheatmap( data[top_gene, ]  ,  annotation = condition,
              fontsize_row = 5 , fontsize_col = 5,
                scale = "row" , color=colorRampPalette(c("navy", "white", "red"))(50) )
   }

    return(list( gene_output = gene_output, p = p ) ) 

}



plot_go_bulk  <- function(gene_output ){


      try({
        print("up regulated")

        up <- rownames(gene_output[gene_output$logFC > 0, ])[1:200]

        gse <- enrichGO(gene = up , 
                   ont ="BP" ,  keyType = "SYMBOL" , 
                   minGSSize = 3,   maxGSSize = 800, 
                   pvalueCutoff =0.05,  pAdjustMethod =  "fdr",
                   OrgDb = organism  )

         dotplot <- dotplot(gse, showCategory=10,  font.size = 8 ) 


         gse <- enrichplot::pairwise_termsim(gse)


        emapplot <-  emapplot(gse, showCategory = 10,  
                         cex_label_category=0.7, cex_line = 0.7 ,   cex_category = 0.7) 



       treeplot <-  treeplot(gse,  fontsize = 3 , cex_category = 0.2, label_format = 5,
                        offset = 5)


      }) 

     return (list(  dotplot =   dotplot ,  emapplot =  emapplot ,   treeplot  =   treeplot  )) 

}

Cell type specific gene expressions {.tabset .tabset-fade .tabset-pills}

knitr::include_graphics( system.file("extdata/figure",  "celltypegene_example_figures.png" ,   package = "scFeatures")  )

Heatmaps shows the top cell type specific gene expression features that differs between conditions
MA plot shows the expression and log2 fold change of the cell type specific gene expression features
Volcano plot shows the log2 fold change and P-values of the cell type specific gene expression features
PCA plot shows the separation of conditions based on the cell type specific gene expression features
Dot plot shows the pathway enrichment of the top cell type specific gene expression features that differs between conditions
Enrichment map of the top cell type specific gene expression features that differs between conditions
Functional grouping of the top cell type specific gene expression features that differs between conditions

Heatmap

if ("gene_mean_celltype" %in% names(scfeatures_result)){
  data <- scfeatures_result$gene_mean_celltype
  data <- t(data)


  celltype <- rownames(data)
  celltype <- unlist( lapply( strsplit(celltype,split = "-", fixed=TRUE  ), `[`, 1) )

  topcelltype <- strsplit( de_gene_mean_celltype[, 3] , ", ")[[1]][1]

  data <- data[ which(celltype == topcelltype ) , ]

  result <- plot_heatmap_bulk(data , num_features = 20 )

  gene_output <- result$gene_output

  result$p
}

MA plot

if ("gene_mean_celltype" %in% names(scfeatures_result)){

  gene_output$gene <- rownames(gene_output)

  g <- ggplot(gene_output, aes(x = AveExpr, y = logFC , text  = gene))+
      geom_point(aes(colour=-log10(P.Value)), alpha=1/3, size=1) +
      scale_colour_gradient(low="blue",high="red")+
      ylab("log2 fold change") + xlab("Average expression")+ theme_minimal()

  ggplotly(g)

}

Volcano plot

if ("gene_mean_celltype" %in% names(scfeatures_result)){


  p <- ggplot( gene_output , aes(logFC,-log10(P.Value) , text = gene ) )+
      geom_point(aes(colour=-log10(P.Value)), alpha=1/3, size=1) +
      scale_colour_gradient(low="blue",high="red")+
      xlab("log2 fold change") + ylab("-log10 p-value") + theme_minimal()


  ggplotly(p)


}

PCA plot

if ("gene_mean_celltype" %in% names(scfeatures_result)){

  p <- plot_pca(t( data) )

  ggplotly(p)

}

Dot plot

if ("gene_mean_celltype" %in% names(scfeatures_result)){

  rownames( gene_output  ) <- make.unique(unlist( lapply ( strsplit( rownames( gene_output  ) , split = "-", fixed=TRUE ), `[` , 2) ))
  result <- plot_go_bulk(gene_output )

  print(result$dotplot)

}

Enrichment map

if ("gene_mean_celltype" %in% names(scfeatures_result)){

  print(result$emapplot) 

}

Functional grouping

if ("gene_mean_celltype" %in% names(scfeatures_result)){

   print(result$treeplot) 
}

Cell type specific pathway expressions {.tabset .tabset-fade .tabset-pills}

knitr::include_graphics(  system.file("extdata/figure",  "pathway_example_figures.png" ,   package = "scFeatures")    )

Heatmaps shows the top cell type specific pathway expression features that differs between conditions
Boxplot shows the top cell type specific pathway expression features that differs between conditions
PCA plot shows the separation of conditions based on the cell type specific pathway expression features

Heatmap

if ("pathway_mean" %in% names(scfeatures_result)){

  data <- scfeatures_result$pathway_mean
  data <- t(data)

  gene_output <- plot_heatmap_bulk(data )
  gene_output$p

}

Boxplot

if ("pathway_mean" %in% names(scfeatures_result)){

  p <- plot_boxplot(t(data) )

  ggplotly(p)  |>layout(boxmode = "group") 

}

PCA plot

if ("pathway_mean" %in% names(scfeatures_result)){

  p <- plot_pca(t( data) )
  ggplotly(p)

}

plot_CCI <- function(data ){


    patient <- unlist( lapply( strsplit( colnames(data), "_cond_"), `[`, 1) ) 

    remove <- which(patient == "NA")
    if (length(remove) > 0 ){
       patient <-  patient[-c(remove)]
       data <- data[ , -c(remove)]
    }

    condition <- unlist( lapply( strsplit( colnames(data), "_cond_"), `[`, 2) ) 
    celltype <- unlist( lapply( strsplit( rownames(data), split = "--", fixed=TRUE ), `[`, 1) )

    thiscelltype <-  unique(celltype)[1]

    df <- NULL
    for (thiscelltype in unique(celltype)){
       data_thiscelltype <- data[which(celltype == thiscelltype), ]
       cond1_count <- data_thiscelltype[, which( condition == unique( condition)[1])]
       cond1_count <-  sum(cond1_count != 0)/ncol(cond1_count)

       cond2_count <-data_thiscelltype[ , which( condition == unique( condition)[2])]
       cond2_count <- sum( cond2_count !=0 )/ncol(cond2_count)

       temp <- data.frame(ligand  = strsplit( thiscelltype, split = "->", fixed=TRUE )[[1]][1],
                          receptor = strsplit( thiscelltype, split = "->", fixed=TRUE )[[1]][2],
                          diff = cond1_count - cond2_count )
       df <- rbind(df, temp)
    }

    df <- as.data.frame( tidyr::pivot_wider(df, names_from = receptor, values_from = diff) )

    rownames(df) <- df$ligand
    df <- df[, -1]




   setHook("grid.newpage", function() pushViewport(viewport(x=1,y=1,width=0.9, height=0.9, name="vp", just=c("right","top"))), action="prepend")
     pheatmap(  df , fontsize_row = 10 , fontsize_col = 10,  cluster_cols = FALSE,
                 cluster_rows = FALSE ,
                display_numbers = TRUE,  number_color = "black" )
  setHook("grid.newpage", NULL, "replace")
  grid::grid.text("sender (ligand)", y=-0.07, gp=gpar(fontsize=16))
  grid::grid.text("target (receptor)", x=-0.07, rot=90, gp=gpar(fontsize=16)) 



  return(df)

}    

plot_CCI_difference <- function(df){

   #  https://stackoverflow.com/questions/49171958/igraph-edge-width-and-color-positive-and-negative-correlation
    df_net <- round( as.matrix(df) , 2)
    g <- graph_from_adjacency_matrix(df_net, mode = "directed", weighted = TRUE)
    E(g)$color <- ifelse(E(g)$weight > 0,'coral1','cornflowerblue')  
    E(g)$label.color<- "black"




   plot(g,  
         edge.curved=0.3 , edge.arrow.size=0.4, vertex.label.dist=4 ,
         edge.width=as.integer(cut(abs(E(g)$weight), breaks = 5)) ,
        edge.label = E(g)$weight,

         vertex.color="azure",
         vertex.label.color="black"  ,
         vertex.label.family="Helvetica", edge.label.family="Helvetica")

}

Cell type specific cell-cell communications {.tabset .tabset-fade .tabset-pills}

knitr::include_graphics(  system.file("extdata/figure",  
                   "CCI_example_figures.png"  ,   package = "scFeatures")    )

Heatmap shows the top cell cell interactions features that differs between conditions
Heatmap shows the difference in the number of interactions between conditions

For each interacting cell type, the difference is calculated as:
$$ \frac{total\: number\: of\: non-zero\: interactions \:in \: condition 1\: patients}{number \:of\: condition 1\:patients} - \frac{total\: number\: of\: non-zero\: interactions \:in \: condition 2\: patients}{number \:of\: condition 2 \:patients} $$

PCA plot shows the separation of conditions based on the cell type specific pathway expression features
Network plot shows the difference in the number of interactions between conditions
Boxplot shows the top cell cell interaction features that differs between conditions

Heatmap of top cell cell interactions

if ("CCI" %in% names(scfeatures_result)){

   data <- scfeatures_result$CCI
   data <- t(data)

  data <- data[, !colnames(data) == "NA"]

  result <- plot_heatmap_bulk(data,  num_feature = 20 )

  gene_output <- result$gene_output

  result$p
}

Heatmap of difference in number of interactions

if ("CCI" %in% names(scfeatures_result)){

  df <- plot_CCI(data )

}

PCA plot

if ("CCI" %in% names(scfeatures_result)){

  p <- plot_pca(t(data) )

  ggplotly(p)

}

Network plot

if ("CCI" %in% names(scfeatures_result)){

  plot_CCI_difference(df)

}

Boxplot

if ("CCI" %in% names(scfeatures_result)){

  p <- plot_boxplot(t(data))  

  ggplotly(p)  |>layout(boxmode = "group") 

}

Overall aggregated gene expressions {.tabset .tabset-fade .tabset-pills}

knitr::include_graphics(  system.file("extdata/figure",  
         "aggregatedgene_example_figures.png" ,   package = "scFeatures")    )

Heatmaps shows the top aggregated gene expression features that differs between conditions
MA plot shows the expression and log2 fold change of the aggregated gene expression features
Volcano plot shows the log2 fold change and P-values of the aggregated gene expression features
PCA plot shows the separation of conditions based on the aggregated gene expression features
Dot plot shows the pathway enrichment of the top aggregated gene expression features that differs between conditions
Enrichment map of the top aggregatedgene expression features that differs between conditions
Functional grouping of the top aggregated gene expression features that differs between conditions

Heatmap

if ("gene_mean_bulk" %in% names(scfeatures_result)){

  data <- as.matrix ( scfeatures_result$gene_mean_bulk)
  data <- t(data)

  result <- plot_heatmap_bulk(data ,  num_features = 20)

  gene_output <- result$gene_output

  result$p
}

MA plot

if ("gene_mean_bulk" %in% names(scfeatures_result)){

  gene_output$gene <- rownames(gene_output)

  g <-  ggplot(gene_output, aes(x = AveExpr, y = logFC, text = gene))+
      geom_point(aes(colour=-log10(P.Value)), alpha=1/3, size=1) +
      scale_colour_gradient(low="blue",high="red")+
      ylab("log2 fold change") + xlab("Average expression")+ theme_minimal()

  ggplotly(g)

}

Volcano plot

if ("gene_mean_bulk" %in% names(scfeatures_result)){

  p <-  ggplot( gene_output , aes(logFC,-log10(P.Value) , text = gene) )+
      geom_point(aes(colour=-log10(P.Value)), alpha=1/3, size=1) +
      scale_colour_gradient(low="blue",high="red")+
      xlab("log2 fold change") + ylab("-log10 p-value") + theme_minimal()

  ggplotly(p)

}

PCA plot

if ("gene_mean_bulk" %in% names(scfeatures_result)){
  plot_pca( t(data)  )  
}

Dot plot

if ("gene_mean_bulk" %in% names(scfeatures_result)){
  result <- plot_go_bulk( gene_output  ) 

  print(result$dotplot)
}

Enrichment map

if ("gene_mean_bulk" %in% names(scfeatures_result)){
  print(result$emapplot)
}

Functional grouping

if ("gene_mean_bulk" %in% names(scfeatures_result)){
  print(result$treeplot)
}

Spatial metrics {.tabset .tabset-fade .tabset-pills}

knitr::include_graphics(  system.file("extdata/figure",  
       "spatial_example_figures.png",   package = "scFeatures")    )

Heatmaps shows the top spatial features that differs between conditions
Boxplot shows the top spatial features that differs between conditions
PCA plot shows the separation of conditions based on the spatial features

Heatmap

Moran's I

if ("morans_I" %in% names(scfeatures_result)){ 
  data <- scfeatures_result$morans_I
  data <- t(data)

  gene_output <- plot_heatmap_bulk(data,    num_features = 20 ) 


}

Boxplot

if ("morans_I" %in% names(scfeatures_result)){ 
   p <- plot_boxplot( t(data)  )  
  ggplotly(p) |>layout(boxmode = "group") 
}

PCA plot

if ("morans_I" %in% names(scfeatures_result)){ 
  p <- plot_pca( t(data)  )  
  ggplotly(p)
}

SydneyBioX/scFeatures documentation built on June 10, 2025, 5:35 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

SydneyBioX/scFeatures scFeatures: Multi-view representations of single-cell and spatial data for disease outcome prediction

In SydneyBioX/scFeatures: scFeatures: Multi-view representations of single-cell and spatial data for disease outcome prediction

Overview of the association study result

Cell type proportions {.tabset .tabset-fade .tabset-pills}

Composition barplot

Boxplot of top features

PCA plot

Cell type specific gene expressions {.tabset .tabset-fade .tabset-pills}

Heatmap

MA plot

Volcano plot

PCA plot

Dot plot

Enrichment map

Functional grouping

Cell type specific pathway expressions {.tabset .tabset-fade .tabset-pills}

Heatmap

Boxplot

PCA plot

Cell type specific cell-cell communications {.tabset .tabset-fade .tabset-pills}

Heatmap of top cell cell interactions

Heatmap of difference in number of interactions

PCA plot

Network plot

Boxplot

Overall aggregated gene expressions {.tabset .tabset-fade .tabset-pills}

Heatmap

MA plot

Volcano plot

PCA plot

Dot plot

Enrichment map

Functional grouping

Spatial metrics {.tabset .tabset-fade .tabset-pills}

Heatmap

Moran's I

Boxplot

PCA plot

R Package Documentation

Browse R Packages

We want your feedback!

SydneyBioX/scFeatures
scFeatures: Multi-view representations of single-cell and spatial data for disease outcome prediction