# this file contains the 17 functions that generate the 17 feature classes
#' Generate cell type proportion raw
#'
#' @description
#' This function calculates the proportions of cells belonging to each cell type.
#' The input data must contain `sample` and `celltype` metadata column.
#' The function supports scRNA-seq and spatial proteomics.
#' The function returns a dataframe with samples as rows and cell types as columns.
#'
#' @param data A list object containing `data` matrix and `celltype` and `sample` vector.
#' @param type The type of dataset, either "scrna", "spatial_t", or "spatial_p".
#' @param ncores Number of cores for parallel processing.
#'
#' @return a dataframe of samples x features.
#' The features are in the form of celltype a, celltype b, with the number
#' representing proportions.
#'
#' @examples
#' utils::data("example_scrnaseq" , package = "scFeatures")
#' data <- example_scrnaseq[1:50, 1:20]
#'
#' celltype <- data$celltype
#' sample <- data$sample
#' data <- data@assays$RNA@data
#'
#' alldata <- scFeatures:::formatData(data = data, celltype = celltype, sample = sample )
#'
#' feature_proportion_raw <- run_proportion_raw(
#' alldata,
#' type = "scrna", ncores = 1
#' )
#'
#' @importFrom gtools logit
#' @importFrom tidyr pivot_wider
#' @importFrom BiocParallel SerialParam bplapply
#'
#' @export
run_proportion_raw <- function(data, type = "scrna", ncores = 1) {
check_data(data, type)
if (type %in% c("scrna", "spatial_p")) {
if (ncores > 1) {
cli::cli_warn(
"{type} does not currently support parallel computation ",
"for feature `proportion_raw`"
)
}
X <- helper_proportion_raw(data, logit = FALSE)
} else if (type == "spatial_t") {
X <- helper_proportion_raw_st(data, logit = FALSE, ncores)
} else {
cli::cli_abort(c(
"Parameter {.var type} must be 'scrna', 'spatial_p' or 'spatial_t'",
"x" = "'{type}' is not a valid input data type"
))
}
X <- as.data.frame(X)
return(X)
}
#' Generate cell type proportions, with logit transformation
#'
#' @description
#' This function calculates the proportions of cells belonging to each cell type,
#' and applies a logit transformation to the proportions.
#' The input data must contain `sample` and `celltype` metadata column.
#' The function supports scRNA-seq and spatial proteomics.
#' The function returns a dataframe with samples as rows and cell types as columns.
#'
#' @param data A list object containing `data` matrix and `celltype` and `sample` vector.
#' @param type The type of dataset, either "scrna", "spatial_t", or "spatial_p".
#' @param ncores Number of cores for parallel processing.
#'
#' @return a dataframe of samples x features
#' The features are in the form of celltype a, celltype b, with the number
#' representing proportions.
#'
#' @examples
#' utils::data("example_scrnaseq" , package = "scFeatures")
#' data <- example_scrnaseq[1:50, 1:20]
#' celltype <- data$celltype
#' sample <- data$sample
#' data <- data@assays$RNA@data
#'
#' alldata <- scFeatures:::formatData(data = data, celltype = celltype, sample = sample )
#'
#' feature_proportion_logit <- run_proportion_logit(
#' alldata,
#' type = "scrna", ncores = 1
#' )
#'
#' @importFrom gtools logit
#' @importFrom tidyr pivot_wider
#' @importFrom BiocParallel SerialParam bplapply
#' @importFrom cli cli_abort cli_warn
#'
#' @export
run_proportion_logit <- function(data, type = "scrna", ncores = 1) {
check_data(data, type)
if (type %in% c("scrna", "spatial_p")) {
if (ncores > 1) {
cli::cli_warn(
"{type} does not currently support parallel computation ",
"for feature proportion_logit."
)
}
X <- helper_proportion_raw(data, logit = TRUE)
} else if (type == "spatial_t") {
X <- helper_proportion_raw_st(data, logit = TRUE, ncores)
} else {
cli::cli_abort(c(
"Parameter {.var type} must be 'scrna', 'spatial_p' or 'spatial_t'",
"x" = "'{type}' is not a valid input data type"
))
}
X <- as.data.frame(X)
return(X)
}
#' Generate cell type proportion ratio
#'
#' @description
#' This function calculates pairwise cell type proportion ratio for each sample.
#' and applies a logit transformation to the proportions.
#' The input data must contain `sample` and `celltype` metadata column.
#' The function supports scRNA-seq and spatial proteomics.
#' The function returns a dataframe with samples as rows and cell types as columns.
#'
#' @param data A list object containing `data` matrix and `celltype` and `sample` vector.
#' @param type The type of dataset, either "scrna", "spatial_t", or "spatial_p".
#' @param ncores Number of cores for parallel processing.
#'
#' @return a dataframe of samples x features.
#' The features are in the form of celltype a vs celltype b, celltype a vs celltype c,
#' with the number representing the ratio between the two cell types.
#'
#' @examples
#'
#' utils::data("example_scrnaseq" , package = "scFeatures")
#' data <- example_scrnaseq[1:50, 1:20]
#'
#' celltype <- data$celltype
#' sample <- data$sample
#' data <- data@assays$RNA@data
#'
#' alldata <- scFeatures:::formatData(data = data, celltype = celltype, sample = sample )
#'
#' feature_proportion_ratio <- run_proportion_ratio(
#' alldata,
#' type = "scrna", ncores = 1
#' )
#'
#' @importFrom tidyr pivot_wider
#' @importFrom BiocParallel SerialParam bplapply
#' @importFrom cli cli_abort
#'
#' @export
run_proportion_ratio <- function(data, type = "scrna", ncores = 1) {
check_data(data, type)
if (type %in% c("scrna", "spatial_p")) {
X <- helper_proportion_ratio(data, ncores)
} else if (type == "spatial_t") {
X <- helper_proportion_ratio_st(data, ncores)
} else {
cli::cli_abort(c(
"Parameter {.var type} must be 'scrna', 'spatial_p' or 'spatial_t'",
"x" = "'{type}' is not a valid input data type"
))
}
X <- as.data.frame(X)
return(X)
}
#' Generate cell type specific gene mean expression
#'
#' @description
#' This function computes the mean expression of a set of genes for
#' each cell type in the input data. The input data can be of three types:
#' 'scrna', 'spatial_p' or 'spatial_t'. If the genes parameter is not p
#' rovided by the user, the top variable genes will be selected based on
#' the num_top_gene parameter (defaults to 100).
#'
#' @param data A list object containing `data` matrix and `celltype` and `sample` vector.
#' @param type The type of dataset, either "scrna", "spatial_t", or "spatial_p".
#' @param genes Optional dataframe with 2 columns: 'marker' and 'celltype'.
#' The 'marker' column should contain the genes of interest (e.g. 'S100A11', 'CCL4'),
#' and the 'celltype' column should contain the celltype that the gene expression
#' is to be computed from (e.g. 'CD8', 'B cells').
#' If not provided, the top variable genes will be used based on the
#' num_top_gene parameter.
#' @param num_top_gene Number of top genes to use when genes is not provided.
#' Defaults to 100.
#' @param ncores Number of cores for parallel processing.
#'
#' @return a dataframe of samples x features.
#' The features are in the form of gene 1 celltype a, gene 2 celltype b ... etc,
#' with the number representing average gene expression of the given gene across
#' the cells of the the given celltype.
#'
#' @examples
#'
#' utils::data("example_scrnaseq" , package = "scFeatures")
#' data <- example_scrnaseq[1:200, 1:200]
#' celltype <- data$celltype
#' sample <- data$sample
#' data <- data@assays$RNA@data
#'
#' alldata <- scFeatures:::formatData(data = data, celltype = celltype, sample = sample )
#'
#' feature_gene_mean_celltype <- run_gene_mean_celltype(
#' alldata,
#' type = "scrna", num_top_gene = 100, ncores = 1
#' )
#'
#' @importFrom proxyC simil
#' @importFrom DelayedMatrixStats rowVars rowMeans2
#' @importFrom BiocParallel SerialParam bplapply
#' @importFrom DelayedArray DelayedArray
#'
#' @export
run_gene_mean_celltype <- function(data,
type = "scrna",
genes = NULL,
num_top_gene = NULL,
ncores = 1) {
# check_data(data, type)
if (type %in% c("scrna", "spatial_p")) {
X <- helper_gene_mean_celltype(data, genes, num_top_gene, ncores)
} else if (type == "spatial_t") {
X <- helper_gene_mean_celltype_st(data, genes, num_top_gene, ncores)
} else {
cli::cli_abort(c(
"Parameter {.var type} must be 'scrna', 'spatial_p' or 'spatial_t'",
"x" = "'{type}' is not a valid input data type"
))
}
X <- as.data.frame(X)
return(X)
}
# TODO: Several warnings below should probably be errors.
#' Generate cell type specific gene proportion expression
#'
#' This function computes the proportion of expression of a set of genes
#' for each cell type in the input data. The input data can be of three types:
#' 'scrna', 'spatial_p' or 'spatial_t'. If the genes parameter is not provided
#' by the user, the top variable genes will be selected based on the
#' num_top_gene parameter (defaults to 100).
#'
#' @param data A list object containing `data` matrix and `celltype` and `sample` vector.
#' @param type The type of dataset, either "scrna", "spatial_t", or "spatial_p".
#' @param genes Optional dataframe with 2 columns: 'marker' and 'celltype'.
#' The 'marker' column should contain the genes of interest (e.g. 'S100A11', 'CCL4'),
#' and the 'celltype' column should contain the celltype that the gene expression
#' is to be computed from (e.g. 'CD8', 'B cells').
#' If not provided, the top variable genes will be used based on the
#' num_top_gene parameter.
#' @param num_top_gene Number of top genes to use when genes is not provided.
#' Defaults to 100.
#' @param ncores Number of cores for parallel processing.
#'
#' @return a dataframe of samples x features.
#' The features are in the form of gene 1 celltype a, gene 2 celltype b ... etc,
#' with the number representing proportion of gene expression of the given gene across
#' the cells of the the given celltype.
#'
#' @examples
#'
#' utils::data("example_scrnaseq" , package = "scFeatures")
#' data <- example_scrnaseq[, 1:20]
#' celltype <- data$celltype
#' sample <- data$sample
#' data <- data@assays$RNA@data
#'
#' alldata <- scFeatures:::formatData(data = data, celltype = celltype, sample = sample )
#'
#' feature_gene_prop_celltype <- run_gene_prop_celltype(
#' alldata,
#' type = "scrna", num_top_gene = 100, ncores = 1
#' )
#'
#' @importFrom proxyC simil
#' @importFrom DelayedMatrixStats rowVars rowMeans2
#' @importFrom BiocParallel SerialParam bplapply
#' @importFrom DelayedArray DelayedArray
#'
#' @export
run_gene_prop_celltype <- function(data,
type = "scrna",
genes = NULL,
num_top_gene = NULL,
ncores = 1) {
check_data(data, type)
if (type %in% c("scrna", "spatial_p")) {
X <- helper_gene_prop_celltype(data, genes, num_top_gene, ncores)
} else if (type == "spatial_t") {
print( "`gene_prop_celltype` currently does not support spatial transcriptomics")
return(NULL)
} else {
cli::cli_abort(c(
"Parameter {.var type} must be 'scrna', 'spatial_p' or 'spatial_t'",
"x" = "'{type}' is not a valid input data type"
))
}
X <- as.data.frame(X)
return(X)
}
#' Generate cell type specific gene expression correlation
#'
#' This function computes the correlation of expression of a set of genes
#' for each cell type in the input data. The input data can be of three types:
#' 'scrna', 'spatial_p' or 'spatial_t'. If the genes parameter is not provided
#' by the user, the top variable genes will be selected based on the
#' num_top_gene parameter (defaults to 100).
#'
#' @param data A list object containing `data` matrix and `celltype` and `sample` vector.
#' @param type The type of dataset, either "scrna", "spatial_t", or "spatial_p".
#' @param genes Optional dataframe with 2 columns: 'marker' and 'celltype'.
#' The 'marker' column should contain the genes of interest (e.g. 'S100A11', 'CCL4'),
#' and the 'celltype' column should contain the celltype that the gene expression
#' is to be computed from (e.g. 'CD8', 'B cells').
#' If not provided, the top variable genes will be used based on the
#' num_top_gene parameter.
#' @param num_top_gene Number of top genes to use when genes is not provided.
#' Defaults to 5.
#' @param ncores Number of cores for parallel processing.
#'
#' @return a dataframe of samples x features.
#' The features are in the form of gene 1 vs gene 2 cell type a ,
#' gene 1 vs gene 3 cell type b ... etc, with the numbers representing the
#' correlation of the two given genes in the given cell type.
#'
#' @examples
#'
#' utils::data("example_scrnaseq" , package = "scFeatures")
#' data <- example_scrnaseq[1:50, 1:20]
#' celltype <- data$celltype
#' sample <- data$sample
#' data <- data@assays$RNA@data
#'
#' alldata <- scFeatures:::formatData(data = data, celltype = celltype, sample = sample )
#'
#' feature_gene_cor_celltype <- run_gene_cor_celltype(
#' alldata,
#' type = "scrna", num_top_gene = 5, ncores = 1
#' )
#'
#' @importFrom proxyC simil
#' @importFrom DelayedMatrixStats rowVars rowMeans2
#' @importFrom BiocParallel SerialParam bplapply
#' @importFrom DelayedArray DelayedArray
#'
#' @export
run_gene_cor_celltype <- function(data,
type = "scrna",
genes = NULL,
num_top_gene = NULL,
ncores = 1) {
check_data(data, type)
if (type %in% c("scrna", "spatial_p")) {
X <- helper_gene_cor_celltype(data, genes, num_top_gene, ncores)
} else if (type == "spatial_t") {
print( "`gene_cor_celltype` currently does not support spatial transcriptomics")
return(NULL)
} else {
cli::cli_abort(c(
"Parameter {.var type} must be 'scrna', 'spatial_p' or 'spatial_t'",
"x" = "'{type}' is not a valid input data type"
))
}
X <- as.data.frame(X)
return(X)
}
#' Generate pathway score using gene set enrichement analysis
#'
#' @description
#' This function calculates pathway scores for a given input
#' dataset and gene set using gene set enrichment analysis (GSVA).
#' It supports scRNA-seq, spatial proteomics and spatial transcriptomics.
#' It currently supports two pathway analysis methods: ssgsea and aucell.
#' By default, it uses the 50 hallmark gene sets from msigdb.
#' Alternatively, users can provide their own gene sets of interest
#' in a list format.
#'
#' @param data A list object containing `data` matrix and `celltype` and `sample` vector.
#' @param type The type of dataset, either "scrna", "spatial_t", or "spatial_p".
#' @param method Type of pathway analysis method, currently support `ssgsea`
#' and `aucell`
#' @param geneset By default (when the `geneset` argument is not specified),
#' we use the 50 hallmark gene set from msigdb.
#' The users can also provide their geneset of interest in a list format, with
#' each list entry containing a vector of the names of genes in a gene set.
#' eg, geneset <- list("pathway_a" = c("CAPN1", ...), "pathway_b" = c("PEX6"))
#' @param species Whether the species is "Homo sapiens" or "Mus musculus".
#' Default is "Homo sapiens".
#' @param subsample Whether to subsample, either TRUE or FALSE. For larger datasets
#' (eg, over 30,000 cells), the subsample function can be used to increase
#' speed.
#' @param ncores Number of cores for parallel processing.
#'
#' @return a dataframe of samples x features
#' The features are in the form of pathway 1 celltype a, pathway 2 celltype b ...
#' etc, with the number representing the gene set enrichment score of a given
#' pathway in cells from a given celltype.
#'
#' @examples
#'
#' utils::data("example_scrnaseq" , package = "scFeatures")
#' data <- example_scrnaseq[, 1:20]
#' celltype <- data$celltype
#' sample <- data$sample
#' data <- data@assays$RNA@data
#'
#' alldata <- scFeatures:::formatData(data = data, celltype = celltype, sample = sample )
#'
#' feature_pathway_gsva <- run_pathway_gsva(
#' alldata,
#' geneset = NULL, species = "Homo sapiens",
#' type = "scrna", subsample = FALSE, ncores = 1
#' )
#'
#' @importFrom msigdbr msigdbr
#' @importFrom GSVA gsva
#' @importFrom AUCell AUCell_buildRankings AUCell_calcAUC getAUC
#' @import EnsDb.Hsapiens.v79
#' @import EnsDb.Mmusculus.v79
#' @importFrom dplyr select
#' @importFrom reshape2 melt
#' @importFrom BiocParallel SerialParam bplapply
#' @importFrom DelayedArray DelayedArray
#' @importFrom DelayedMatrixStats colMeans2
#'
#' @export
run_pathway_gsva <- function(data, method = "ssgsea", geneset = NULL,
species = "Homo sapiens",
type = "scrna", subsample = TRUE, ncores = 1) {
check_data(data, type)
if (is.null(geneset)) {
geneset <- get_geneset(species = species)
}
if (subsample && ncol(data$data) > 90000) {
data$data <- data$data[, sample(seq_len(ncol(data$data)), 90000)]
}
if (type == "scrna") {
# TODO: if the user does not provide geneset, need to get the geneset from msigdb
capture.output( suppressMessages( X <- helper_pathway_gsva(
data,
method = method, geneset = geneset, ncores = ncores
) ))
} else if (type == "spatial_p") {
print( "Pathway GSVA currently does not support spatial proteomics" )
return(NULL)
} else if (type == "spatial_t") {
print( "Pathway GSVA currently does not support spatial transcriptiomics" )
return(NULL)
} else {
cli::cli_abort(c(
"Parameter {.var type} must be 'scrna', 'spatial_p' or 'spatial_t'",
"x" = "'{type}' is not a valid input data type"
))
}
X <- as.data.frame(X)
return(X)
}
#' Generate pathway score using expression level
#'
#' @description
#' This function calculates pathway scores for a given dataset and gene set
#' using gene expression levels. It supports scRNA-seq, spatial transcriptomics
#' and spatial proteomics and spatial transcriptomics).
#' By default, it uses the 50 hallmark gene sets from msigdb.
#' Alternatively, users can provide their own gene sets of interest in a list format.
#'
#' @param data A list object containing `data` matrix and `celltype` and `sample` vector.
#' @param type The type of dataset, either "scrna", "spatial_t", or "spatial_p".
#' @param geneset By default (when the `geneset` argument is not specified),
#' we use the 50 hallmark gene set from msigdb.
#' The users can also provide their geneset of interest in a list format, with
#' each list entry containing a vector of the names of genes in a gene set.
#' eg, geneset <- list("pathway_a" = c("CANS1", ...), "pathway_b" = c("PEX6"))
#' @param species Whether the species is "Homo sapiens" or "Mus musculus".
#' Default is "Homo sapiens".
#' @param ncores Number of cores for parallel processing.
#'
#' @return a dataframe of samples x features
#' The features are in the form of pathway 1 celltype a, pathway 2 celltype b ...
#' etc, with the number representing the averaged expression of a given pathway in
#' cells from a given celltype.
#'
#' @examples
#'
#' utils::data("example_scrnaseq" , package = "scFeatures")
#' data <- example_scrnaseq[1:500, 1:200]
#' celltype <- data$celltype
#' sample <- data$sample
#' data <- data@assays$RNA@data
#'
#' alldata <- scFeatures:::formatData(data = data, celltype = celltype, sample = sample )
#' feature_pathway_mean <- run_pathway_mean(
#' alldata ,
#' geneset = NULL, species = "Homo sapiens",
#' type = "scrna", ncores = 1
#' )
#'
#'
#' @importFrom msigdbr msigdbr
#' @importFrom GSVA gsva
#' @importFrom AUCell AUCell_buildRankings AUCell_calcAUC getAUC
#' @import EnsDb.Hsapiens.v79
#' @import EnsDb.Mmusculus.v79
#' @importFrom dplyr select
#' @importFrom reshape2 melt
#' @importFrom BiocParallel SerialParam bplapply
#' @importFrom DelayedArray DelayedArray
#' @importFrom DelayedMatrixStats colMeans2
#'
#' @export
run_pathway_mean <- function(data, geneset = NULL,
species = "Homo sapiens",
type = "scrna",
ncores = 1) {
check_data(data, type)
if (is.null(geneset)) {
geneset <- get_geneset(species = species)
}
if (type == "scrna") {
X <- helper_pathway_mean(data, geneset = geneset, ncores = ncores)
} else if (type == "spatial_p") {
print( "`pathway_mean` currently does not support spatial proteomics")
return(NULL)
} else if (type == "spatial_t") {
X <- helper_pathway_mean_st(data, geneset = geneset, ncores = ncores)
} else {
cli::cli_abort(c(
"Parameter {.var type} must be 'scrna', 'spatial_p' or 'spatial_t'",
"x" = "'{type}' is not a valid input data type"
))
}
X <- as.data.frame(X)
return(X)
}
#' Generate pathway score using proportion of expression
#'
#' @description
#' This function calculates pathway scores for a given input dataset and gene set
#' using the proportion of gene expression levels. It supports scRNA-seq, spatial transcriptomics
#' and spatial proteomics and spatial transcriptomics).
#' By default, it uses the 50 hallmark gene sets from msigdb.
#' Alternatively, users can provide their own gene sets of interest in a list format.
#'
#' @param data A list object containing `data` matrix and `celltype` and `sample` vector.
#' @param type The type of dataset, either "scrna", "spatial_t", or "spatial_p".
#' @param geneset By default (when the `geneset` argument is not specified),
#' we use the 50 hallmark gene set from msigdb.
#' The users can also provide their geneset of interest in a list format, with
#' each list entry containing a vector of the names of genes in a gene set.
#' eg, geneset <- list("pathway_a" = c("CANS1", ...), "pathway_b" = c("PEX6"))
#' @param species Whether the species is "Homo sapiens" or "Mus musculus".
#' Default is "Homo sapiens".
#' @param ncores Number of cores for parallel processing.
#'
#' @return a dataframe of samples x features
#' The features are in the form of pathway 1 celltype a, pathway 2 celltype b ...
#' etc, with the number representing the proportion of expression of a given pathway
#' in cells from a given celltype.
#'
#' @examples
#'
#' utils::data("example_scrnaseq" , package = "scFeatures")
#' data <- example_scrnaseq[1:100, 1:100]
#' celltype <- data$celltype
#' sample <- data$sample
#' data <- data@assays$RNA@data
#'
#' alldata <- scFeatures:::formatData(data = data, celltype = celltype, sample = sample )
#'
#' feature_pathway_prop <- run_pathway_prop(
#' alldata,
#' geneset = NULL, species = "Homo sapiens",
#' type = "scrna", ncores = 1
#' )
#'
#'
#' @importFrom msigdbr msigdbr
#' @importFrom GSVA gsva
#' @importFrom AUCell AUCell_buildRankings AUCell_calcAUC getAUC
#' @import EnsDb.Hsapiens.v79
#' @import EnsDb.Mmusculus.v79
#' @importFrom dplyr select
#' @importFrom reshape2 melt
#' @importFrom BiocParallel SerialParam bplapply
#' @importFrom DelayedArray DelayedArray
#' @importFrom DelayedMatrixStats colMeans2
#'
#' @export
run_pathway_prop <- function(data, geneset = NULL,
species = "Homo sapiens",
type = "scrna",
ncores = 1) {
check_data(data, type)
if (is.null(geneset)) {
geneset <- get_geneset(species = species)
}
if (type == "scrna") {
X <- helper_pathway_prop(data, geneset = geneset, ncores = ncores)
} else if (type == "spatial_p") {
print( "`pathway_prop` currently does not support spatial proteomics")
return(NULL)
} else if (type == "spatial_t") {
print( "`pathway_prop` currently does not support spatial transcriptomics")
return(NULL)
} else {
cli::cli_abort(c(
"Parameter {.var type} must be 'scrna', 'spatial_p' or 'spatial_t'",
"x" = "'{type}' is not a valid input data type"
))
}
X <- as.data.frame(X)
return(X)
}
#' Generate cell cell communication score
#'
#' @description
#' This function calculates the ligand receptor interaction score using SingleCellSignalR.
#' The output features are in the form of celltype a -> celltype b -- ligand 1 -> receptor 2 ,
#' which indicates the interaction between ligand 1 in celltype a and receptor 2 in celltype b.
#'
#' It supports scRNA-seq.
#'
#' @param data A list object containing `data` matrix and `celltype` and `sample` vector.
#' @param type input data type, either scrna, spatial_p, or spatial_t
#' @param ncores number of cores
#'
#' @return a matrix of samples x features
#' The features are in the form of ligand 1 receptor 2 celltype a, ligand 1 receptor 2 celltype b ...
#' etc, with the numbers representing cell-cell interaction probability.
#'
#' @examples
#'
#' utils::data("example_scrnaseq" , package = "scFeatures")
#' data <- example_scrnaseq[1:1000, 1:100]
#' celltype <- data$celltype
#' sample <- data$sample
#' data <- as.matrix(data@assays$RNA@data)
#'
#' alldata <- scFeatures:::formatData(data = data, celltype = celltype, sample = sample )
#' feature_CCI <- run_CCI(alldata, type = "scrna" , ncores = 1 )
#'
#' @import dplyr
#' @import DelayedArray
#' @import SingleCellSignalR
#'
#' @export
run_CCI <- function( data, type = "scrna" , ncores = 1 ){
check_data(data, type)
if ( type == "scrna" ) {
X <- helper_CCI(data, ncores = ncores )
}
if ( type == "spatial_p" ) {
print("This feature class currently does not support spatial proteomics")
return(NULL)
}
if ( type == "spatial_t" ) {
print("This feature class currently does not support spatial transcriptomics")
return(NULL)
}
return (X)
}
#' Generate overall aggregated mean expression
#'
#' @description
#' This function computes the mean expression of genes across samples. The user
#' can specify the genes of interest, or let the function use the top variable
#' genes by default. The function supports scRNA-seq, spatial proteomics,
#' and spatial transcriptomics.
#'
#' @param data A list object containing `data` matrix and `celltype` and `sample` vector.
#' @param type The type of dataset, either "scrna", "spatial_t", or "spatial_p".
#' @param genes Default to NULL, in which case the top variable genes will be
#' used. If provided by user, need to be in the format of a list containing the
#' genes of interest, eg, genes <- c(GZMA", "GZMK", "CCR7", "RPL38" )
#' @param num_top_gene Number of top variable genes to use when genes is not provided.
#' Defaults to 1500.
#' @param ncores Number of cores for parallel processing.
#'
#' @return a dataframe of samples x features
#' The features are in the form of gene 1, gene 2 ... etc, with the numbers representing
#' averaged gene expression across all cells.
#'
#' @examples
#'
#' utils::data("example_scrnaseq" , package = "scFeatures")
#' data <- example_scrnaseq
#' celltype <- data$celltype
#' sample <- data$sample
#' data <- data@assays$RNA@data
#'
#' alldata <- scFeatures:::formatData(data = data, celltype = celltype, sample = sample )
#' feature_gene_mean <- run_gene_mean(
#' alldata,
#' type = "scrna", num_top_gene = 150, ncores = 1
#' )
#'
#' @importFrom proxyC simil
#' @importFrom DelayedMatrixStats rowMeans2
#' @importFrom DelayedArray DelayedArray
#' @importFrom BiocParallel SerialParam bplapply
#' @importFrom reshape2 melt
#' @importFrom cli cli_abort
#'
#' @export
run_gene_mean <- function(data,
type = "scrna",
genes = NULL,
num_top_gene = NULL,
ncores = 1) {
check_data(data, type)
if (type %in% c("scrna", "spatial_p", "spatial_t")) {
X <- helper_gene_mean(data, genes, num_top_gene, ncores)
} else {
cli::cli_abort(c(
"Parameter {.var type} must be 'scrna', 'spatial_p' or 'spatial_t'",
"x" = "'{type}' is not a valid input data type"
))
}
X <- as.data.frame(X)
return(X)
}
#' Generate overall aggregated gene proportion expression
#'
#' @description
#' This function computes the proportion of gene expression across samples. The user
#' can specify the genes of interest, or let the function use the top variable
#' genes by default. The function supports scRNA-seq, spatial proteomics,
#' and spatial transcriptomics.
#'
#' @param data A list object containing `data` matrix and `celltype` and `sample` vector.
#' @param type The type of dataset, either "scrna", "spatial_t", or "spatial_p".
#' @param genes Default to NULL, in which case the top variable genes will be
#' used. If provided by user, need to be in the format of a list containing the
#' genes of interest, eg, genes <- c(GZMA", "GZMK", "CCR7", "RPL38" )
#' @param num_top_gene Number of top variable genes to use when genes is not provided.
#' Defaults to 1500.
#' @param ncores Number of cores for parallel processing.
#'
#' @return a dataframe of samples x features
#' The features are in the form of gene 1 vs gene 2, gene 1 vs gene 3 ... etc,
#' with the numbers representing correlation of gene expressions.
#'
#' @examples
#'
#' utils::data("example_scrnaseq" , package = "scFeatures")
#' data <- example_scrnaseq[1:50, 1:20]
#' celltype <- data$celltype
#' sample <- data$sample
#' data <- data@assays$RNA@data
#'
#' alldata <- scFeatures:::formatData(data = data, celltype = celltype, sample = sample )
#' feature_gene_prop <- run_gene_prop(alldata, type = "scrna", num_top_gene = 10, ncores = 1)
#'
#' @importFrom proxyC simil
#' @importFrom DelayedMatrixStats rowMeans2
#' @importFrom DelayedArray DelayedArray
#' @importFrom BiocParallel SerialParam bplapply
#' @importFrom reshape2 melt
#' @importFrom cli cli_abort
#'
#' @export
run_gene_prop <- function(data, type = "scrna", genes = NULL, num_top_gene = NULL, ncores = 1) {
check_data(data, type)
if (type %in% c("scrna", "spatial_p", "spatial_t")) {
X <- helper_gene_prop(data, genes, num_top_gene, ncores)
} else {
cli::cli_abort(c(
"Parameter {.var type} must be 'scrna', 'spatial_p' or 'spatial_t'",
"x" = "'{type}' is not a valid input data type"
))
}
X <- as.data.frame(X)
return(X)
}
#' Generate overall aggregated gene correlation
#'
#' @description
#' This function computes the correlation of gene expression across samples. The user
#' can specify the genes of interest, or let the function use the top variable
#' genes by default. The function supports scRNA-seq, spatial proteomics,
#' and spatial transcriptomics.
#'
#' @param data A list object containing `data` matrix and `celltype` and `sample` vector.
#' @param type The type of dataset, either "scrna", "spatial_t", or "spatial_p".
#' @param genes Default to NULL, in which case the top variable genes will be
#' used. If provided by user, need to be in the format of a list containing the
#' genes of interest, eg, genes <- c(GZMA", "GZMK", "CCR7", "RPL38" )
#' @param num_top_gene Number of top variable genes to use when genes is not provided.
#' Defaults to 5.
#' @param ncores Number of cores for parallel processing.
#'
#' @return a dataframe of samples x features
#' The features are in the form of gene 1, gene 2 ... etc, with the numbers representing
#' the proportion that the gene is expressed across all cells.
#'
#' @examples
#'
#' utils::data("example_scrnaseq" , package = "scFeatures")
#' data <- example_scrnaseq[1:100, 1:200]
#' celltype <- data$celltype
#' sample <- data$sample
#' data <- data@assays$RNA@data
#'
#' alldata <- scFeatures:::formatData(data = data, celltype = celltype, sample = sample )
#' feature_gene_cor <- run_gene_cor(
#' alldata, type = "scrna", num_top_gene = 5, ncores = 1
#' )
#'
#' @importFrom proxyC simil
#' @importFrom DelayedMatrixStats rowMeans2
#' @importFrom DelayedArray DelayedArray
#' @importFrom BiocParallel SerialParam bplapply
#' @importFrom reshape2 melt
#' @importFrom cli cli_abort
#'
#' @export
run_gene_cor <- function(data, type = "scrna", genes = NULL, num_top_gene = NULL, ncores = 1) {
check_data(data, type)
if (type %in% c("scrna", "spatial_p", "spatial_t")) {
X <- helper_gene_cor(data, genes, num_top_gene, ncores)
} else {
cli::cli_abort(c(
"Parameter {.var type} must be 'scrna', 'spatial_p' or 'spatial_t'",
"x" = "'{type}' is not a valid input data type"
))
}
X <- as.data.frame(X)
return(X)
}
#' Generate L stats
#'
#' @description
#' This function calculates L-statistics to measure spatial autocorrelation.
#' L value greater than zero indicates spatial attraction of the pair of proteins
#' whereas L value less than zero indicates spatial repulsion.
#' The function supports spatial proteomics and spatial transcriptomics.
#'
#' @param data A list object containing `data` matrix and `celltype` and `sample` vector.
#' @param type The type of dataset, either "scrna", "spatial_t", or "spatial_p".
#' @param ncores Number of cores for parallel processing.
#'
#' @return a dataframe of samples x features
#' The features are in the form of protein 1 vs protein 2, protein 1 vs protein 3 ...
#' etc, with the numbers representing the L values.
#'
#' @examples
#'
#' utils::data("example_scrnaseq" , package = "scFeatures")
#' celltype <- example_scrnaseq$celltype
#' data <- example_scrnaseq@assays$RNA@data
#' sample <- sample( c("patient1", "patient2", "patient3"), ncol(data) , replace= TRUE )
#' x <- sample(1:100, ncol(data) , replace = TRUE)
#' y <- sample(1:100, ncol(data) , replace = TRUE)
#' spatialCoords <- list( x , y)
#' alldata <- scFeatures:::formatData(data = data, sample = sample, celltype = celltype,
#' spatialCoords = spatialCoords )
#'
#' feature_L_function <- run_L_function(alldata, type = "spatial_p", ncores = 1)
#'
#' @importFrom spatstat.geom ppp pairdist
#' @importFrom BiocParallel SerialParam bplapply
#' @importFrom reshape2 melt
#' @importFrom ape Moran.I
#' @importFrom spatstat.explore nncorr
#'
#' @export
run_L_function <- function(data, type = "spatial_p", ncores = 1) {
check_data(data, type)
if (type == "scrna") {
print("`L_function` currently does not support 'scrna'")
return(NULL)
} else if (type == "spatial_p") {
X <- helper_L_stat_sp(data, ncores = ncores)
} else if (type == "spatial_t") {
X <- helper_L_stat_st(data, ncores = ncores)
} else {
cli::cli_abort(c(
"Parameter {.var type} must be 'scrna', 'spatial_p' or 'spatial_t'",
"x" = "'{type}' is not a valid input data type"
))
}
X <- as.data.frame(X)
return(X)
}
#' Generate cell type interaction
#'
#' @description
#' This function calculates the pairwise distance between cell types
#' for a sample by using the coordinates and cell types of the cells.
#' We find the nearest neighbours of each cell and the cell types of these neighbours.
#' These are considered as spatial interaction pairs. The cell type composition of the
#' spatial interaction pairs are used as features.
#' The function supports spatial proteomics and spatial transcriptomics.
#'
#' @param data A list object containing `data` matrix and `celltype` and `sample` vector.
#' @param type The type of dataset, either "scrna", "spatial_t", or "spatial_p".
#' @param ncores Number of cores for parallel processing.
#'
#' @return a dataframe of samples x features
#' The features are in the form of protein 1 vs protein 2, protein 1 vs protein 3 ...
#' etc, with the numbers representing the proportion of each interaction pairs in a
#' give sample.
#'
#' @examples
#'
#' utils::data("example_scrnaseq" , package = "scFeatures")
#' data <- example_scrnaseq[1:50, 1:20]
#' celltype <- data$celltype
#' data <- data@assays$RNA@data
#' sample <- sample( c("patient1", "patient2", "patient3"), ncol(data) , replace= TRUE )
#' x <- sample(1:100, ncol(data) , replace = TRUE)
#' y <- sample(1:100, ncol(data) , replace = TRUE)
#' spatialCoords <- list( x , y)
#' alldata <- scFeatures:::formatData(data = data, sample = sample, celltype = celltype,
#' spatialCoords = spatialCoords )
#'
#' feature_celltype_interaction <- run_celltype_interaction(
#' alldata, type = "spatial_p", ncores = 1
#' )
#'
#' @importFrom spatstat.geom ppp pairdist
#' @importFrom BiocParallel SerialParam bplapply
#' @importFrom reshape2 melt
#' @importFrom ape Moran.I
#' @importFrom spatstat.explore nncorr
#'
#' @export
run_celltype_interaction <- function(data, type = "spatial_p", ncores = 1) {
check_data(data, type)
if (type == "scrna") {
print( "`celltype_interaction` currently does not support 'scrna'")
return(NULL)
} else if (type == "spatial_p") {
X <- helper_celltype_interaction_sp(data, ncores = ncores)
} else if (type == "spatial_t") {
X <- helper_celltype_interaction_st(data,ncores = ncores)
}
X <- as.data.frame(X)
return(X)
}
#' Generate Moran's I
#'
#' @description
#' This function calculates Moran's I to measure spatial autocorrelation,
#' which an indicattion of how strongly the feature(ie, genes/proteins)
#' expression values in a sample cluster or disperse. A value closer to 1
#' indicates clustering of similar values and a value closer to -1 indicates
#' clustering of dissimilar values. A value of 0 indicates no particular clustering
#' structure, ie, the values are spatially distributed randomly.
#' The function supports spatial proteomics and spatial transcriptomics.
#'
#' @param data A list object containing `data` matrix and `celltype` and `sample` vector.
#' @param type The type of dataset, either "scrna", "spatial_t", or "spatial_p".
#' @param ncores Number of cores for parallel processing.
#'
#' @return a dataframe of samples x features
#' The features are in the form of protein 1, protein 2 ... etc, with the numbers
#' representing Moran's value.
#'
#' @examples
#'
#' utils::data("example_scrnaseq" , package = "scFeatures")
#' data <- example_scrnaseq[1:50, 1:20]
#' celltype <- data$celltype
#' data <- data@assays$RNA@data
#' sample <- sample( c("patient1", "patient2", "patient3"), ncol(data) , replace= TRUE )
#' x <- sample(1:100, ncol(data) , replace = TRUE)
#' y <- sample(1:100, ncol(data) , replace = TRUE)
#' spatialCoords <- list( x , y)
#' alldata <- scFeatures:::formatData(data = data, sample = sample, celltype = celltype,
#' spatialCoords = spatialCoords )
#'
#' feature_Morans_I <- run_Morans_I(alldata, type = "spatial_p", ncores = 1)
#'
#' @importFrom spatstat.geom ppp pairdist
#' @importFrom BiocParallel SerialParam bplapply
#' @importFrom reshape2 melt
#' @importFrom ape Moran.I
#' @importFrom spatstat.explore nncorr
#'
#' @export
run_Morans_I <- function(data, type = "spatial_p", ncores = 1) {
check_data(data, type)
if (type == "scrna") {
print( "`Morans_I` currently does not support 'scrna'")
return(NULL)
} else if (type %in% c("spatial_p", "spatial_t")) {
X <- helper_moran(data, num_top_gene = NULL,ncores = ncores)
} else {
cli::cli_abort(c(
"Parameter {.var type} must be 'scrna', 'spatial_p' or 'spatial_t'",
"x" = "'{type}' is not a valid input data type"
))
}
X <- as.data.frame(X)
return(X)
}
#' Generate nearest neighbour correlation
#'
#' @description
#' This function calculates the nearest neighbour correlation for each feature
#' (eg, proteins) in each sample. This is calculated by taking the correlation
#' between each cell and its nearest neighbours cell for a particular feature.
#' This function supports spatial proteomics, and spatial transcriptomics.
#'
#' @param data A list object containing `data` matrix and `celltype` and `sample` vector.
#' @param type The type of dataset, either "scrna", "spatial_t", or "spatial_p".
#' @param ncores Number of cores for parallel processing.
#' @param num_top_gene Number of top variable genes to use when genes is
#' not provided. Defaults to 1500.
#'
#' @return a dataframe of samples x features
#' The features are in the form of protein 1, protein 2 ... etc, with the numbers
#' representing Pearson's correlation.
#'
#' @examples
#'
#' utils::data("example_scrnaseq" , package = "scFeatures")
#' data <- example_scrnaseq[1:50, 1:20]
#' celltype <- data$celltype
#' data <- data@assays$RNA@data
#' sample <- sample( c("patient1", "patient2", "patient3"), ncol(data) , replace= TRUE )
#' x <- sample(1:100, ncol(data) , replace = TRUE)
#' y <- sample(1:100, ncol(data) , replace = TRUE)
#' spatialCoords <- list( x , y)
#' alldata <- scFeatures:::formatData(data = data, sample = sample, celltype = celltype,
#' spatialCoords = spatialCoords )
#' feature_nn_correlation <- run_nn_correlation(
#' alldata, type = "spatial_p", ncores = 1
#' )
#'
#' @importFrom spatstat.geom ppp pairdist
#' @importFrom BiocParallel SerialParam bplapply
#' @importFrom reshape2 melt
#' @importFrom ape Moran.I
#' @importFrom spatstat.explore nncorr
#'
#' @export
run_nn_correlation <- function(data, type = "spatial_p", num_top_gene = NULL, ncores = 1) {
check_data(data, type)
if (type == "scrna") {
print( "`nn_correlation` currently does not support 'scrna'" )
return(NULL)
} else if (type %in% c("spatial_p", "spatial_t")) {
X <- helper_nncorr_protein(data, num_top_gene, ncores = ncores)
} else {
cli::cli_abort(c(
"Parameter {.var type} must be 'scrna', 'spatial_p' or 'spatial_t'",
"x" = "'{type}' is not a valid input data type"
))
}
X <- as.data.frame(X)
return(X)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.