# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# pre-defined default global variables
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
#' Predefined data format in given fgenes
#'
#' @description
#' Given data on differentially expressed genes and their belonging clusters should be in the right format.
#' Some columns must be maintained, which will be further used in downstream analysis.
#'
#'
kmusthave.colnames <- c("cluster", "gene", "LogFC", "PVal") # NOT CHANGE
#' Predefined Action Mode & Color Usage for It
#'
#' \code{kpred.action.mode} is extracted from databases depicting mode of actions for functional usages.
#' expression is transcriptional regulation.
#'
#' @rdname pred-action-mode
#' @export
#'
kpred.action.mode <- c("activation", "inhibition", "binding", "catalysis", "reaction", "expression", "ptmod", "other")
#' Predefined Action Mode & Color Usage for It
#'
#' \code{kpred.color.mode} is aligned with \code{kpred.action.mode} one-to-one. In default setting,
#' the color used for \bold{action mode} in plotting will use colors listed in this variable.
#'
#' @rdname pred-action-mode
#' @export
#'
kpred.color.mode <- c("#D70051", "#00913A", "#1296D4", "#956134", "#F46D42", "#0A0AFF", "#762A83", "#B5B5B6")
#c("#FB8072", "#80B1D3", "#B3DE69", "#8DD3C7", "#FFFFB3", "#BEBADA", "#FDB462", "#FCCDE5")
#' Predefined Action Effect & Color Usage for It
#'
#' \code{kpred.action.effect} is extracted from databases depicting action effect of actions for functional usages.
#'
#' @rdname pred-action-effect
#' @export
#'
kpred.action.effect <- c("positive", "negative", "unspecified", "undirected")
#' Predefined Action Effect & Color Usage for It
#'
#' \code{kpred.color.effect} is aligned with \code{kpred.action.effect} one-to-one. In default setting,
#' the color used for \bold{action effect} in plotting will use colors listed in this variable.
#'
#' @rdname pred-action-effect
#' @export
#'
kpred.color.effect <- c("#FB8072", "#B3DE69", "#80B1D3", "#8DD3C7")
#' Predefined Action Effect & Color Usage for It
#'
#' \code{kpred.ext.action.effect} is the \bold{extended} format, which depicts action effect and direction of action together,
#' and extends action effect to 7 different types.
#'
#' @rdname pred-action-effect
#' @export
#'
kpred.ext.action.effect <- c(
"A---B", # #1, undirected, others are directed
"A-->B", # #2
"A<--B", # #3
"A--|B", # #4
"A|--B", # #5
"A--oB", # #6
"Ao--B" # #7
# "undefined for (> 7) and all other(< 0)"
)
#' Predefined Action Effect & Color Usage for It
#'
#' \code{kpred.color.ext.effect} is aligned with \code{kpred.ext.action.effect} one-to-one. In default setting,
#' the color used for \bold{extened action effect} in plotting will use colors listed in this variable.
#'
#' @rdname pred-action-effect
#' @export
#'
kpred.color.ext.effect <- c("#8DD3C7", "#FB8072", "#FF5740", "#B3DE69", "#81EF48", "#80B1D3", "#6B6AEA")
#
TgView.formula.onExprs.default <- function(
data.f,
data.b
) {
if (length(data.f) != length(data.b)) {
stop("Unexpected non-identical length data input!")
}
return(data.f * data.b)
}
#' Formula on Power for One Gene Pair
#'
#' @description
#' This is the default formula on calculating power for every gene pair, and the
#' sum value of LogFC for gene partners will be regarded as power.
#'
#' @param data.f vector. The LogFC values for one list of gene partners. LogFC values are
#' specified when creating \code{\link{InterCell}} object.
#' @param data.b vector. The LogFC values for another list of gene partners, which is
#' one-by-one matched to those in parameter \code{data.f}.
#'
TgView.formula.onLogFC.default <- function(
data.f,
data.b
) {
if (length(data.f) != length(data.b)) {
stop("Unexpected non-identical length data input!")
}
return(data.f + data.b)
}
#' Formula on Confidence for One Gene Pair
#'
#' @description
#' This is the default formula on calculating confidence for every gene pair, and the
#' multiplying value of PVal for gene partners will be regarded as confidence.
#'
#' @param data.f vector. The PVal values for one list of gene partners. PVal values are
#' specified when creating \code{\link{InterCell}} object.
#' @param data.b vector. The PVal values for another list of gene partners, which is
#' one-by-one matched to those in parameter \code{data.f}.
#' @param pval.log.max The number for replacing the infinite number when \code{log(PVal = 0)}.
#'
TgView.formula.onPVal.default <- function(
data.f,
data.b,
pval.log.max = 300
) {
# 'pval.log.max' is 300 in default setting.
# The use of 300 as default maximum, because e-303 are usual lowest limit when all log(values) are infinite.
default.max.replace <- pval.log.max
if (length(data.f) != length(data.b)) {
stop("Unexpected non-identical length data input!")
}
tmp.f <- abs(log10(data.f))
tmp.b <- abs(log10(data.b))
inds.tmp.f <- which(is.finite(tmp.f))
inds.tmp.b <- which(is.finite(tmp.b))
if (length(inds.tmp.f) == 0 && length(inds.tmp.b) == 0) {
tmp.f <- rep(default.max.replace, times = length(tmp.f))
tmp.b <- rep(default.max.replace, times = length(tmp.b))
} else {
max.f <- suppressWarnings(max(tmp.f[inds.tmp.f])) # if length(inds.tmp.f) == 0, max returns -Inf
max.f <- ifelse(is.infinite(max.f), default.max.replace, max.f)
max.b <- suppressWarnings(max(tmp.b[inds.tmp.b])) # if length(inds.tmp.b) == 0, max returns -Inf
max.b <- ifelse(is.infinite(max.b), default.max.replace, max.b)
tmp.f[which(is.infinite(tmp.f))] <- 10 * max.f
tmp.b[which(is.infinite(tmp.b))] <- 10 * max.b
}
return(tmp.f * tmp.b)
}
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# InterCell Object definitions
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
#' The Class for Database Storage in InterCellDB
#'
#' The InterCellDBPack class provides a standard storage format of all databases for running analysis.
#' This object is mostly created along with \code{\link{InterCell-class}}.
#' Users need to specify the species firstly. Then, user-defined subset of database could be
#' collected by function \code{\link{SelectDBSubset}}.
#'
#' @slot species InterCellDB only allows 'human' and 'mouse' species.
#' @slot genes.db The gene reference database, which is generated from NCBI gene site. It records all the authorized gene names.
#' @slot pairs.db The database records all gene pairs, which are transferred from protein interaction database - 'STRING'.
#' @slot actions.db The database records all action properties (including 'action.mode', 'action.effect') for gene pairs in slot:`pairs.db`.
#' @slot anno.location.db The database collects all subcellular location from COMPARTMENTS for genes in slot:`pairs.db`.
#' @slot anno.type.db The database collects all molecular function from Uniprot for genes in slot:`pairs.db`.
#' @slot go.ref.db The database collects all GO terms for genes in slot:`pairs.db`.
#' @slot accessory.db A list of accessory database, for example, the mapping list of raw keywords from Uniport.
#' @slot misc The preserved area for miscellaneous things. Currently no usage.
#'
#' @name InterCellDBPack-class
#' @rdname InterCellDBPack-class
#' @exportClass InterCellDBPack
#'
InterCellDBPack <- setClass(
Class = "InterCellDBPack",
slots = c(
species = "character",
genes.db = "list",
pairs.db = "data.frame",
actions.db = "data.frame",
anno.location.db = "data.frame",
anno.type.db = "data.frame",
go.ref.db = "data.frame",
accessory.db = "list",
misc = "list"
)
)
#' The InterCell Class
#'
#' The InterCell object is used to in all the analysis \pkg{InterCellDB} provided.
#' It incoporates the necessary databases, and stores every important intermediate result.
#'
#' @slot fgenes InterCellDB needs the differential expressed genes and their belonging clusters as input.
#' The given data need to have columns named 'gene', 'cluster', 'LogFC', 'PVal'. Those each means, 'gene': the
#' authorized gene names, 'cluster': cell cluster, 'LogFC': the log transformed fold change of genes, 'PVal': the
#' P val for regarding one gene as differentially expressed one.
#' The expression level of each gene is also needed when statistical test is required and it should be stored in column named 'Exprs'.
#' @slot database Database stored in \code{InterCellDBPack} object.
#' @slot formulae The default formulae to be used in analysis.
#' @slot inter.fullview This is used to store the result of network analysis.
#' @slot tg.itinfo This stores information about one intercellular communication.
#' @slot tg.action.pairs This is used to store the intermediate result of action properties and gene pairs.
#' @slot tg.veinfo This is used to store the intermediate result of one intercellular communication (for one interacting 2-cell group).
#' @slot tg.action.comp This is used to store the result of analyzing composition of action mode and action effect in one interacellular communication.
#' @slot tg.spgenes This is used to store the selected gene pairs and all their participating interacitons in one intercellular communication.
#' @slot tool.vars This stores several variables to be used embedded in program.
#' @slot misc The not important intermediate result will be put in this parameter as well as some
#' pre-defined settings.
#'
#' @name InterCell-class
#' @rdname InterCell-class
#' @exportClass InterCell
#'
InterCell <- setClass(
Class = "InterCell",
slots = c(
fgenes = "data.frame",
database = "InterCellDBPack",
formulae = "list",
inter.fullview = "list",
tg.itinfo = "list",
tg.action.pairs = "list",
tg.veinfo = "list",
tg.action.comp = "list",
tg.spgenes = "list",
#pred.action = "list", pred.action This stores the pre-defined action modes and action effects, and terms beyond definitions in this parameter will be regarded as invalid ones.
tool.vars = "list",
misc = "list"
)
)
# validation function for \code{InterCellDBPack-class}
validInterCellDBPackObject <- function(object) {
# check species
allowed.species <- c("human", "mouse")
if (length(object@species) != 1 || sum(allowed.species %in% object@species) == 0) {
return(paste("InterCellDB only supports species in human and mouse."))
}
TRUE
}
setValidity("InterCellDBPack", validInterCellDBPackObject)
# validation function for \code{InterCell-class}
# only the most un-modifiable slots should be checked,
# and all checked ones should set its initial value in the first time, or get errors
validInterCellObject <- function(object) {
# check pred.action
# if (length(object@pred.action$action.mode) == 0 || !all(object@pred.action$action.mode %in% kpred.action.mode)) {
# return(paste0("Using undefined action mode: ", paste0(setdiff(object@pred.action$action.mode, kpred.action.mode), collapse = ", ")))
# }
# if (length(object@pred.action$action.mode) != length(object@pred.action$color.mode)) {
# return(paste0("Using different length of 'action.mode' and its used color 'color.mode'!"))
# }
# if (length(object@pred.action$action.effect) == 0 || !all(object@pred.action$action.effect %in% kpred.action.effect)) {
# return(paste0("Using undefined action effect: ", paste0(setdiff(object@pred.action$action.effect, kpred.action.effect), collapse = ", ")))
# }
# if (length(object@pred.action$action.effect) != length(object@pred.action$color.effect)) {
# return(paste0("Using different length of 'action.effect' and its used color 'color.effect'!"))
# }
# check k***Split
if (length(object@tool.vars$gene.pair.split) != 1) {
return(paste0("Invalid symbol for splitting genes in gene pairs!"))
}
if (length(object@tool.vars$cluster.split) != 1) {
return(paste0("Invalid symbol for splitting clusters in cluster groups!"))
}
# check musthave colnames in given fgenes
if (!all(kmusthave.colnames %in% object@misc$musthave.colnames)) {
return(paste0("Detect manual modification on object internal variables. ",
"Program will be failed in unexpected situations. "))
}
validInterCellDBPackObject(object@database)
TRUE
}
setValidity("InterCell", validInterCellObject)
setMethod(
f = "initialize",
signature = c("InterCellDBPack"),
definition = function(.Object, ...) {
.Object <- methods::callNextMethod(.Object, ...)
.Object@accessory.db <- list(merge.type.list = Uniprot.key.map.list)
.Object@misc <- list(TAKEN = "nothing yet")
methods::validObject(.Object)
return(.Object)
}
)
# initialize \code{InterCell-class}
setMethod(
f = "initialize",
signature = c("InterCell"),
definition = function(.Object, ...) {
.Object <- methods::callNextMethod(.Object, ...)
.Object@formulae <- list(
TG.EXPRS = TgView.formula.onExprs.default,
TG.LOGFC = TgView.formula.onLogFC.default,
TG.PVAL = TgView.formula.onPVal.default)
#.Object@pred.action <- list(action.mode = kpred.action.mode, action.effect = kpred.action.effect)
#.Object@tool.vars <- list(gene.pair.split = "-~-", cluster.split = "~")
methods::validObject(.Object)
return(.Object)
}
)
setMethod(
f = "show",
signature = "InterCellDBPack",
definition = function(object) {
cat("Using '", object@species, "' database.\n", sep = "")
}
)
setMethod(
f = "show",
signature = "InterCell",
definition = function(object) {
cat("A InterCell object, with ", nrow(object@fgenes), " differentially expressed genes spanning ",
length(unique(object@fgenes$cluster)), " clusters.\n", sep = "")
# show the used database
show(object@database)
# show if intercellular analysis is processed
if (!is.null(object@tg.veinfo) && length(object@tg.veinfo) > 0) {
this.involved.clusters <- getOrigClusterNameTgVEInfo(object)
cat("Intercellular analysis performed between ", this.involved.clusters$cluster.name.A, " and ", this.involved.clusters$cluster.name.B)
} else {
cat("Intercellular analysis not processed yet.")
}
cat("\n")
}
)
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Class Creation Method
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
#' Create InterCellDBPack Object
#'
#' @description
#' This function is used to create \code{InterCellDBPack-class}.
#'
#' @inheritParams InsideParam.species
#'
#' @return A \code{InterCellDBPack} object.
#'
#' @import methods
#'
#' @export
#'
CreateDBPackObject <- function(
species
) {
# Check species, only support human and mouse yet
species <- CheckSpeciesValidity(species)
# set initial database
ret.obj <- NULL
if (species == "human") {
ret.obj <- new(
Class = "InterCellDBPack",
species = species,
genes.db = genes.human.ref.db,
pairs.db = pairs.human.db,
actions.db = actions.human.ref.db,
anno.location.db = anno.location.human.ref.db,
anno.type.db = anno.type.human.ref.db,
go.ref.db = go.human.ref.db
)
}
if (species == "mouse") {
ret.obj <- new(
Class = "InterCellDBPack",
species = species,
genes.db = genes.mouse.ref.db,
pairs.db = pairs.mouse.db,
actions.db = actions.mouse.ref.db,
anno.location.db = anno.location.mouse.ref.db,
anno.type.db = anno.type.mouse.ref.db,
go.ref.db = go.mouse.ref.db
)
}
# get database slimmed to useful data in analyzing only
genes.cols.reserve <- c("tax_id", "GeneID", "Symbol", "Synonyms", "Symbol_from_nomenclature_authority")
ret.obj@genes.db$gene.ncbi.db <- ret.obj@genes.db$gene.ncbi.db[, genes.cols.reserve]
anno.locs.cols.reserve <- c("GeneID", "Gene.name", "GO.ID.target", "GO.Term.target", "Source", "Evidence", "score")
ret.obj@anno.location.db <- ret.obj@anno.location.db[, anno.locs.cols.reserve]
anno.type.cols.reserve <- c("GeneID", "Gene.name", "Keyword.ID", "Keyword.Name")
ret.obj@anno.type.db <- ret.obj@anno.type.db[, anno.type.cols.reserve]
return(ret.obj)
}
#' Analyze interaction network in full view
#'
#' @description
#' This function analyzes count and power of interaction pairs among all given clusters.
#'
#' @param DEG.table A table on differentially expressed genes and their belonging cell clusters.
#' @inheritParams InsideParam.species
#' @param add.exprs It decides whether to add gene expression information.
#' @param exprs.data It gives the normalized count matrix as expression information.
#' @param force.write.exprs If there is column 'Exprs' in parameter \code{DEG.table}, users should
#' decide whether to overwrite the existed one (set TRUE) or not (set FALSE).
#' @param remap.genes This decides whether to use \pkg{InterCellDB} integrated gene database to
#' standardize all genes to get more perfectly matched with protein/gene pairs database. Default is \code{FALSE}.
#' @param cluster.split The letters used to split 2 cell clusters in one interaction. It can also be modified later by
#' using \code{setClusterSplit} if not decided yet.
#' @param gene.pair.split The letters used to split 2 gene partners in one gene pair. It can also be modified later by
#' using \code{setGenePairSplit} if not decided yet.
#'
#'
#' @details
#' The parameter \code{DEG.table} is recommended to be generated by \pkg{Seurat}. Other packages are also applicable, if
#' they could handle scRNA-seq data, do cell clustering and do calculation on cluster-specific differentially expressed genes. The input
#' format of \code{DEG.table} should be one data.frame with 4 required columns that are named 'cluster', 'gene', 'LogFC', 'PVal'.
#' \itemize{
#' \item{cluster}: the cell cluster.
#' \item{gene}: differentially expressed genes, which are grouped by their belonging clusters.
#' \item{LogFC}: the fold change of genes.
#' \item{PVal}: the P value for gene being calculated as differentially expressed gene.
#' }
#'
#' Gene expression data can be added when parameter \code{add.exprs} is set TRUE. It will be stored in column 'Exprs' and
#' only those genes given in parameter \code{DEG.table[, "gene"]} will be perserved.
#'
#' To represent one interaction, like interaction between 'Myeloid cell' and 'T cell',
#' it will be looked like 'Myeloid cell~T cell' if \code{cluster.split = "~"}.
#' To represent oen gene pair, like gene pair of IL6 and IL6R,
#' it will be looked like 'IL6-~-IL6R' if \code{gene.pair.split = "-~-"}.
#'
#' To avoid program failure, the letters appearing in gene names are not recommended for \code{gene.pair.split},
#' and the letters appearing in cluster names are not recommended for \code{cluster.split}.
#' The program will test for those situation but users should keep this in mind.
#'
#' @return A \code{InterCell} object.
#'
#' @importFrom methods new show callNextMethod validObject
#'
#' @export
#'
CreateInterCellObject <- function(
DEG.table,
species,
add.exprs = FALSE,
exprs.data = NULL, # could be expression matrix or seurat[["RNA"]]@data, etc
force.write.exprs = FALSE,
remap.genes = FALSE,
cluster.split = "~",
gene.pair.split = "#~#"
) {
# pre-check part
if (!all(kmusthave.colnames %in% colnames(DEG.table))) {
stop("Required columns are not given!\n",
paste("Column named ", paste0(setdiff(kmusthave.colnames, colnames(DEG.table)), collapse = ", "),
" are not included in given feature genes table.",
"Please use colnames(<var>)[<index>] <- '<name>' to set proper columns corresponding to those.",
"Contents represented by <> should be replaced by user definitions!")
)
}
# set database object
DBPack.obj <- CreateDBPackObject(species)
# add expression data when needed
tmp.musthave.new <- kmusthave.colnames
DEG.align.res <- NA
if (add.exprs == TRUE) {
DEG.table <- DataPrep.AddExprs(DEG.table, exprs.data, force.write.exprs)
tmp.musthave.new <- c(tmp.musthave.new, "Exprs")
if (remap.genes == TRUE) {
dummy.gdf <- data.frame(gene = rownames(exprs.data), ID = seq_len(nrow(exprs.data)), cluster = "IT", stringsAsFactors = FALSE)
DEG.align.res <- DataPrep.RemapClustersMarkers(dummy.gdf, species, final.dup.rm = FALSE)
colnames(dummy.gdf)[1] <- c("orig.gene")
dummy.gdf[, "mapped.gene"] <- DEG.align.res$result[order(DEG.align.res$result$ID), "gene"]
# use this to remap genes in every cluster
DEG.table$gene <- dummy.gdf[match(DEG.table$gene, dummy.gdf[, "orig.gene"]), "mapped.gene"]
}
} else {
# re-align input table with gene reference database in InterCellDB
if (remap.genes == TRUE) {
DEG.align.res <- DataPrep.RemapClustersMarkers(DEG.table, species)
DEG.table <- DEG.align.res$result
}
}
IT.InterCell.Obj <- new(
Class = "InterCell",
fgenes = DEG.table,
database = DBPack.obj,
#pred.action = list(action.mode = kpred.action.mode, action.effect = kpred.action.effect,
# color.mode = kpred.color.mode, color.effect = kpred.color.effect),
tool.vars = list(gene.pair.split = gene.pair.split, cluster.split = cluster.split),
misc = list(musthave.colnames = tmp.musthave.new, if.remap.genes = remap.genes)
)
# give the result of alignment in the misc
IT.InterCell.Obj@misc$input.align.result <- DEG.align.res
# set object
IT.InterCell.Obj <- setGenePairSplit(IT.InterCell.Obj, gene.pair.split)
IT.InterCell.Obj <- setClusterSplit(IT.InterCell.Obj, cluster.split)
return(IT.InterCell.Obj)
}
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Accessory Function
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
#' Using Reference Database
#'
#' The \code{setRefDatabase} function is to \bold{set} the reference database for \code{InterCell}.
#'
#' @inheritParams InsideObjectInterCell
#' @param ... Parameters passed to other methods.
#'
#' @rdname RefDatabase-InterCell
#' @export
#'
setGeneric(name = "setRefDatabase", def = function(object, ...) {
standardGeneric("setRefDatabase")
}
)
#' @param new.ref.database A new reference database.
#'
#' @examples
#' \dontrun{
#' setRefDatabase(object, some.new.ref.database)
#' }
#'
#' @rdname RefDatabase-InterCell
#'
setMethod(
f = "setRefDatabase",
signature = "InterCell",
definition = function(object, new.ref.database) {
if (class(new.ref.database) != "InterCellDBPack") {
stop("Given new reference database is not in right format.")
}
print("Change reference database.")
object@database <- new.ref.database
return(object)
}
)
#' Using Reference Database
#'
#' The \code{getRefDatabase} function is to \bold{get} the reference database for \code{InterCell}.
#'
#' @inheritParams InsideObjectInterCell
#' @param ... Parameters passed to other methods.
#'
#' @rdname RefDatabase-InterCell
#' @export
#'
setGeneric(name = "getRefDatabase", def = function(object, ...) {
standardGeneric("getRefDatabase")
}
)
#'
#' @examples
#' \dontrun{
#' getRefDatabase(object)
#' }
#'
#' @rdname RefDatabase-InterCell
#'
setMethod(
f = "getRefDatabase",
signature = "InterCell",
definition = function(object) {
if (class(object@database) != "InterCellDBPack") {
stop("Unexpected error: Reference database is invalid!")
}
return(object@database)
}
)
#' Using FullView Result
#'
#' The \code{setFullViewResult} function is to \bold{set} the result of network analysis.
#'
#' @inheritParams InsideObjectInterCell
#' @param ... Parameters passed to other methods.
#'
#' @rdname FullViewResult-InterCell
#' @export
#'
setGeneric(name = "setFullViewResult", def = function(object, ...) {
standardGeneric("setFullViewResult")
}
)
#' @param new.inter.fullview A new result of network analysis.
#'
#' @examples
#' \dontrun{
#' setFullViewResult(object, some.new.fullview.result)
#' }
#'
#' @rdname FullViewResult-InterCell
#'
setMethod(
f = "setFullViewResult",
signature = "InterCell",
definition = function(object, new.inter.fullview) {
if (!is.null(object@inter.fullview) && length(object@inter.fullview) != 0) {
warning("Overwrite existed result on network analysis. Former downstream result is cleaned.")
}
object@inter.fullview <- new.inter.fullview
object@tg.action.pairs <- list()
object@tg.action.comp <- list()
object@tg.itinfo <- list()
object@tg.veinfo <- list()
object@tg.spgenes <- list()
return(object)
}
)
#' Using FullView Result
#'
#' The \code{getFullViewResult} function is to \bold{get} the result of network analysis.
#'
#' @inheritParams InsideObjectInterCell
#' @param ... Parameters passed to other methods.
#'
#' @rdname FullViewResult-InterCell
#' @export
#'
setGeneric(name = "getFullViewResult", def = function(object, ...) {
standardGeneric("getFullViewResult")
}
)
#' @examples
#' \dontrun{
#' getFullViewResult(object)
#' }
#' @rdname FullViewResult-InterCell
#'
setMethod(
f = "getFullViewResult",
signature = "InterCell",
definition = function(object) {
if (is.null(object@inter.fullview) || length(object@inter.fullview) == 0) {
stop("Network analysis is not performed yet. Please use function `AnalyzeInterInFullView` to generate it.")
}
return(object@inter.fullview)
}
)
#' Using Target Plain Information
#'
#' The \code{setTgItInfo} function is to \bold{set} plain information about gene pairs,
#' which are directly fetched from full view analysis.
#'
#' @inheritParams InsideObjectInterCell
#' @param ... Parameters passed to other methods.
#'
#' @rdname TgItInfo-InterCell
#' @export
#'
setGeneric(name = "setTgItInfo", def = function(object, ...) {
standardGeneric("setTgItInfo")
}
)
#' @param new.itinfo A new plain information on gene pairs, which are directly fetched from full view analysis.
#'
#' @examples
#' \dontrun{
#' setTgItInfo(object, some.new.itinfo)
#' }
#'
#' @rdname TgItInfo-InterCell
#'
setMethod(
f = "setTgItInfo",
signature = "InterCell",
definition = function(object, new.itinfo) {
object@tg.itinfo <- new.itinfo
return(object)
}
)
#' Using Target Plain Information
#'
#' The \code{getTgItInfo} function is to \bold{get} plain information about gene pairs,
#' which are directly fetched from full view analysis.
#'
#' @inheritParams InsideObjectInterCell
#' @param ... Parameters passed to other methods.
#'
#' @rdname TgItInfo-InterCell
#' @export
#'
setGeneric(name = "getTgItInfo", def = function(object, ...) {
standardGeneric("getTgItInfo")
}
)
#' @examples
#' \dontrun{
#' getTgItInfo(object)
#' }
#' @rdname TgItInfo-InterCell
#'
setMethod(
f = "getTgItInfo",
signature = "InterCell",
definition = function(object) {
if (is.null(object@tg.itinfo) || length(object@tg.itinfo) == 0) {
stop("No plain information between specific 2 cells is fetched. Please use `FetchInterOI` to generate that. ")
}
return(object@tg.itinfo)
}
)
#' Using TargetView Action Pairs
#'
#' The \code{setTgActionPairs} function is to \bold{set} the result of intercellular analysis
#' on action properties for one interaction between 2 cell clusters.
#'
#' @inheritParams InsideObjectInterCell
#' @param ... Parameters passed to other methods.
#'
#' @rdname TgActionPairs-InterCell
#' @export
#'
setGeneric(name = "setTgActionPairs", def = function(object, ...) {
standardGeneric("setTgActionPairs")
}
)
#' @param new.action.pairs A new result of intercellular analysis on action properties
#'
#' @examples
#' \dontrun{
#' setTgActionPairs(object, some.new.tg.action.pairs)
#' }
#'
#' @rdname FullViewResult-InterCell
#'
setMethod(
f = "setTgActionPairs",
signature = "InterCell",
definition = function(object, new.action.pairs) {
object@tg.action.pairs <- new.action.pairs
return(object)
}
)
#' Using TargetView Action Pairs
#'
#' The \code{getTgActionPairs} function is to \bold{get} the result of intercellular analysis
#' on action properties for one interaction between 2 cell clusters.
#'
#' @inheritParams InsideObjectInterCell
#' @param ... Parameters passed to other methods.
#'
#' @rdname TgActionPairs-InterCell
#' @export
#'
setGeneric(name = "getTgActionPairs", def = function(object, ...) {
standardGeneric("getTgActionPairs")
}
)
#' @examples
#' \dontrun{
#' getTgActionPairs(object)
#' }
#' @rdname TgActionPairs-InterCell
#'
setMethod(
f = "getTgActionPairs",
signature = "InterCell",
definition = function(object) {
if (is.null(object@tg.action.pairs) || length(object@tg.action.pairs) == 0) {
stop("The action pairs for selected interaction is not generated. Please use `FetchInterOI` to generate that. ")
}
return(object@tg.action.pairs)
}
)
#' Using TargetView VEinfo
#'
#' The \code{setTgVEInfo} function is to \bold{set} the result of intercellular analysis
#' on detailed gene pairs (forming the interaction network) for one interaction between 2 cell clusters.
#'
#' @inheritParams InsideObjectInterCell
#' @param ... Parameters passed to other methods.
#'
#' @rdname TgVEInfo-InterCell
#' @export
#'
setGeneric(name = "setTgVEInfo", def = function(object, ...) {
standardGeneric("setTgVEInfo")
}
)
#' @param new.veinfo A new result of intercellular analysis on detailed gene pairs.
#'
#' @examples
#' \dontrun{
#' setTgVEInfo(object, some.new.veinfo)
#' }
#'
#' @rdname TgVEInfo-InterCell
#'
setMethod(
f = "setTgVEInfo",
signature = "InterCell",
definition = function(object, new.veinfo) {
object@tg.veinfo <- new.veinfo
return(object)
}
)
#' Using TargetView VEinfo
#'
#' The \code{getTgVEInfo} function is to \bold{get} the result of intercellular analysis
#' on detailed gene pairs (forming the interaction network) for one interaction between 2 cell clusters.
#'
#' @inheritParams InsideObjectInterCell
#' @param ... Parameters passed to other methods.
#'
#' @rdname TgVEInfo-InterCell
#' @export
#'
setGeneric(name = "getTgVEInfo", def = function(object, ...) {
standardGeneric("getTgVEInfo")
}
)
#' @examples
#' \dontrun{
#' getTgVEInfo(object)
#' }
#' @rdname TgVEInfo-InterCell
#'
setMethod(
f = "getTgVEInfo",
signature = "InterCell",
definition = function(object) {
if (is.null(object@tg.veinfo) || length(object@tg.veinfo) == 0) {
stop("No interaction between specific 2 cells is fetched. Please use `FetchInterOI` to generate that. ")
}
return(object@tg.veinfo)
}
)
#' Using TargetView Action Composition
#'
#' The \code{setTgActionComp} function is to \bold{set} the result of intercellular analysis
#' on composition of action mode and action effect for one interaction between 2 cell clusters.
#'
#' @inheritParams InsideObjectInterCell
#' @param ... Parameters passed to other methods.
#'
#' @rdname TgActionComp-InterCell
#' @export
#'
setGeneric(name = "setTgActionComp", def = function(object, ...) {
standardGeneric("setTgActionComp")
}
)
#' @param new.action.comp A new result of intercellular analysis on composition of action mode and action effect.
#'
#' @examples
#' \dontrun{
#' setTgActionComp(object, some.new.action.comp)
#' }
#'
#' @rdname TgActionComp-InterCell
#'
setMethod(
f = "setTgActionComp",
signature = "InterCell",
definition = function(object, new.action.comp) {
object@tg.action.comp <- new.action.comp
return(object)
}
)
#' Using TargetView Action Composition
#'
#' The \code{getTgActionComp} function is to \bold{get} the result of intercellular analysis
#' on composition of action mode and action effect for one interaction between 2 cell clusters.
#'
#' @inheritParams InsideObjectInterCell
#' @param ... Parameters passed to other methods.
#'
#' @rdname TgActionComp-InterCell
#' @export
#'
setGeneric(name = "getTgActionComp", def = function(object, ...) {
standardGeneric("getTgActionComp")
}
)
#' @examples
#' \dontrun{
#' getTgActionComp(object)
#' }
#' @rdname TgActionComp-InterCell
#'
setMethod(
f = "getTgActionComp",
signature = "InterCell",
definition = function(object) {
if (is.null(object@tg.action.comp) || length(object@tg.action.comp) == 0) {
stop("The analysis of composition of action properties is not performed. Please use `AnalyzeInterInAction` to generate that. ")
}
return(object@tg.action.comp)
}
)
#' Using TargetView Pair Specificity
#'
#' The \code{setTgSpGenes} function is to \bold{set} the result of intercellular analysis
#' on gene pair specificity for one interaction between 2 cell clusters.
#'
#' @inheritParams InsideObjectInterCell
#' @param ... Parameters passed to other methods.
#'
#' @rdname TgSpGenes-InterCell
#' @export
#'
setGeneric(name = "setTgSpGenes", def = function(object, ...) {
standardGeneric("setTgSpGenes")
}
)
#' @param new.spgenes A new result of intercellular analysis on gene pair specificity.
#'
#' @examples
#' \dontrun{
#' setTgSpGenes(object, some.new.spgenes)
#' }
#'
#' @rdname TgSpGenes-InterCell
#'
setMethod(
f = "setTgSpGenes",
signature = "InterCell",
definition = function(object, new.spgenes) {
object@tg.spgenes <- new.spgenes
return(object)
}
)
#' Using TargetView Pair Specificity
#'
#' The \code{getTgSpGenes} function is to \bold{get} the result of intercellular analysis
#' on gene pair specificity for one interaction between 2 cell clusters.
#'
#' @inheritParams InsideObjectInterCell
#' @param ... Parameters passed to other methods.
#'
#' @rdname TgSpGenes-InterCell
#' @export
#'
setGeneric(name = "getTgSpGenes", def = function(object, ...) {
standardGeneric("getTgSpGenes")
}
)
#' @examples
#' \dontrun{
#' getTgSpGenes(object)
#' }
#' @rdname TgSpGenes-InterCell
#'
setMethod(
f = "getTgSpGenes",
signature = "InterCell",
definition = function(object) {
if (is.null(object@tg.spgenes) || length(object@tg.spgenes) == 0) {
stop("Analysis of gene pair specificity is not performed. Please use `AnalyzeInterSpecificity` to generate that. ")
}
return(object@tg.spgenes)
}
)
#' Using Gene Pair Split
#'
#' The \code{setGenePairSplit} function is to \bold{set} the gene pair split,
#' which is either one charater or string, and used to split 2 gene partners in one gene pair.
#'
#' @inheritParams InsideObjectInterCell
#' @param ... Parameters passed to other methods.
#'
#' @rdname GenePairSplit-InterCell
#' @export
#'
setGeneric(name = "setGenePairSplit", def = function(object, ...) {
standardGeneric("setGenePairSplit")
}
)
#' @param new.gene.pair.split A new gene pair split, which is either one character or string.
#'
#' @examples
#' \dontrun{
#' setGenePairSplit(object, some.new.gene.pair.split)
#' }
#'
#' @rdname GenePairSplit-InterCell
#'
setMethod(
f = "setGenePairSplit",
signature = "InterCell",
definition = function(object, new.gene.pair.split) {
if (length(new.gene.pair.split) != 1) {
stop("Only one character or string is allowed, like '~' or '-~-'.")
}
# check if symbol are in fgenes
use.genes <- unique(object@fgenes$gene)
check.split.res <- strsplit(use.genes, split = new.gene.pair.split, fixed = TRUE)
if (length(unlist(check.split.res)) > length(use.genes)) {
stop("Used gene pair split symbol exists in gene names given in processing data (fgenes), and will be discarded.")
}
object@tool.vars$gene.pair.split <- new.gene.pair.split
return(object)
}
)
#' Using Gene Pair Split
#'
#' The \code{getGenePairSplit} function is to \bold{get} the gene pair split,
#' which is either one charater or string, and used to split 2 gene partners in one gene pair.
#'
#' @inheritParams InsideObjectInterCell
#' @param ... Parameters passed to other methods.
#'
#' @rdname GenePairSplit-InterCell
#' @export
#'
setGeneric(name = "getGenePairSplit", def = function(object) {
standardGeneric("getGenePairSplit")
}
)
#' @examples
#' \dontrun{
#' getGenePairSplit(object)
#' }
#' @rdname GenePairSplit-InterCell
#'
setMethod(
f = "getGenePairSplit",
signature = "InterCell",
definition = function(object) {
return(object@tool.vars$gene.pair.split)
}
)
#' Using Cluster Group Split
#'
#' The \code{setClusterSplit} function is to \bold{set} the cluster group split,
#' which is either one charater or string, and used to split 2 cell clusters in one interaction.
#'
#' @inheritParams InsideObjectInterCell
#' @param ... Parameters passed to other methods.
#'
#' @rdname ClusterSplit-InterCell
#' @export
#'
setGeneric(name = "setClusterSplit", def = function(object, ...) {
standardGeneric("setClusterSplit")
}
)
#' @param new.cluster.split A new cluster group split, which is either one character or string.
#'
#' @examples
#' \dontrun{
#' setClusterSplit(object, some.new.cluster.split)
#' }
#'
#' @rdname ClusterSplit-InterCell
#'
setMethod(
f = "setClusterSplit",
signature = "InterCell",
definition = function(object, new.cluster.split) {
if (length(new.cluster.split) != 1) {
stop("Only one character or string is allowed, like '~' or '~~'.")
}
# check if symbol are in cluster names
use.clusters <- unique(as.character(object@fgenes$cluster))
check.split.res <- strsplit(use.clusters, split = new.cluster.split, fixed = TRUE)
if (length(unlist(check.split.res)) > length(use.clusters)) {
stop("Used cluster group split symbol exists in cluster name given in processing data (fgenes), and will be discarded.")
}
object@tool.vars$cluster.split <- new.cluster.split
return(object)
}
)
#' Using Cluster Group Split
#'
#' The \code{getClusterSplit} function is to \bold{set} the cluster group split,
#' which is either one charater or string, and used to split 2 cell clusters in one interaction.
#'
#' @inheritParams InsideObjectInterCell
#' @param ... Parameters passed to other methods.
#'
#' @rdname ClusterSplit-InterCell
#' @export
#'
setGeneric(name = "getClusterSplit", def = function(object) {
standardGeneric("getClusterSplit")
}
)
#' @examples
#' \dontrun{
#' getClusterSplit(object)
#' }
#' @rdname ClusterSplit-InterCell
#'
setMethod(
f = "getClusterSplit",
signature = "InterCell",
definition = function(object) {
return(object@tool.vars$cluster.split)
}
)
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Database Selection Functions
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
#' Select Subset of Gene Pair Database
#'
#' This function is to select subset of gene pair databases, and options on evidence sources,
#' confidence level, action properties (mode and effect) will be used. The result from different evidence sources
#' will be \bold{union} in default settings. The result from action properties could be either intersection result or union.
#'
#' @param object A \code{InterCellDBPack} or \code{InterCell} object. \code{SelectDBSubset.default} uses
#' \code{InterCellDBPack} as input.
#' @param combined.score.range The combined score from all evidence sources, which works on the whole database.
#' @param use.exp It adds the data whose evidence is experimentally validatd.
#' @param exp.score.range It controls the score range when selecting subset by experimentally validatd evidence, i.e. when \code{use.exp = TRUE}.
#' The score should be 2 numbers within 1~1000.
#' @param use.know It adds the data whose evidence is pathway curated.
#' @param know.score.range It controls the score range when selecting subset by pathway curated evidence, i.e. when \code{use.know = TRUE}.
#' The score should be 2 numbers within 1~1000.
#' @param use.pred It adds the data whose evidence is predicted.
#' @param pred.score.range It controls the score range when selecting subset by predicted evidence, i.e. when \code{use.pred = TRUE}.
#' The score should be 2 numbers within 1~1000.
#' @param sel.physical It selects the subset of gene pairs whose corresponding protein pairs are physical associated. This parameter is
#' identical to set \code{sel.action.mode = "binding"} for now due to the database limitation. It may change in future.
#' @param sel.action.mode Selection by action mode. "ALL" means not use this to select subset.
#' Other options will be directly select gene pair in that action mode. Supported options are listed in \code{kpred.action.mode}.
#' @param sel.action.effect Selection by action effect. "ALL" means not use this to select subset. Other
#' options will be directly select gene pair in that action effect. Supported options are listed in \code{kpred.action.effect}.
#' @param sel.action.merge.option Either 'intersect' or 'union'. The option for merging the result from selection on action mode and action effect.
#' @param action.score.range It controls the score range when selecting subset from action databases.
#' The score should be 2 numbers within 1~1000.
#' @param slim.along.with.pairs This decides whether to select the corresponding subset of action pair database
#' after selecting subset of gene pair database.
#'
#' @details
#' The 3 evidence channels (\code{use.exp}, \code{use.know}, \code{use.pred}) are in identical priority.
#' For one gene pair, it could have scores from 3 evidence channels at the same time. In default setting, the
#' result of this function will be \bold{union} of results from 3 channels. If union is not satisfied, the function
#' \code{\link{MergeDBSubset}} will help.
#'
#' The score is within 1~1000. Score (>=700) would be consider as high confidence, and (>=900) is even higher confident.
#' Score (>=400) would be consider over medium confidence.
#' Score (<400) would be consider low confidence.
#'
#' @return A \code{InterCellDBPack} or \code{InterCell} object, which is the same as given parameter \code{object}.
#'
#' @rdname SelectDBSubset
#' @order 4
#'
#'
SelectDBSubset.default <- function(
object,
combined.score.range = c(1, 1000),
use.exp = TRUE,
exp.score.range = c(1, 1000),
use.know = TRUE,
know.score.range = c(1, 1000),
use.pred = TRUE,
pred.score.range = c(1, 1000),
sel.physical = FALSE,
sel.action.mode = "ALL",
sel.action.effect = "ALL",
sel.action.merge.option = "intersect",
action.score.range = c(1, 1000),
slim.along.with.pairs = TRUE
) {
# score standardize
inside.score.stdfunc <- function(score.range, score.name) {
score.range <- as.numeric(score.range)
if (length(score.range) != 2) {
stop(paste0("Given `", score.name, "` not in right format, should be like c(1, 1000)."))
}
if (score.range[1] < 1) {
score.range[1] <- 1
}
if (score.range[2] > 1000) {
score.range[2] <- 1000
}
return(score.range)
}
# check score range
combined.score.range <- inside.score.stdfunc(combined.score.range, "combined.score.range")
exp.score.range <- inside.score.stdfunc(exp.score.range, "exp.score.range")
know.score.range <- inside.score.stdfunc(know.score.range, "know.score.range")
pred.score.range <- inside.score.stdfunc(pred.score.range, "pred.score.range")
action.score.range <- inside.score.stdfunc(action.score.range, "action.score.range")
# check action options
allowed.action.options <- c("intersect", "union")
sel.action.merge.option <- CheckParamStd(sel.action.merge.option, allowed.action.options, opt.name = "`sel.action.merge.option`", stop.on.zero = TRUE)
# check action
if (sel.action.mode[1] != "ALL") {
not.valid.action.mode <- setdiff(sel.action.mode, kpred.action.mode)
if (length(not.valid.action.mode) > 0) {
warning("Given undefined action mode: ", paste0(not.valid.action.mode, collapse = ", ", ". "))
}
sel.action.mode <- intersect(sel.action.mode, kpred.action.mode)
if (length(sel.action.mode) == 0) {
stop("No valid action mode is selected!")
}
}
if (sel.action.effect[1] != "ALL") {
not.valid.action.effect <- setdiff(sel.action.effect, kpred.action.effect)
if (length(not.valid.action.effect) > 0) {
warning("Given undefined aciton effect: ", paste0(not.valid.action.effect, collapse = ", "), ". ")
}
sel.action.effect <- intersect(sel.action.effect, kpred.action.effect)
if (length(sel.action.effect) == 0) {
stop("No valid action effect is selected!")
}
}
this.pairs.db <- object@pairs.db
this.actions.db <- object@actions.db
# top-range subset
this.pairs.db <- subset(this.pairs.db, inter.Combined.Score >= combined.score.range[1] & inter.Combined.Score <= combined.score.range[2])
this.actions.db <- subset(this.actions.db, score >= action.score.range[1] & score <= action.score.range[2])
if (nrow(this.pairs.db) == 0 || nrow(this.actions.db) == 0) {
stop("Select too small range of score, and get no valid gene pairs!")
}
# select from scores
retDB.list <- list()
# get exp part
if (use.exp == TRUE) {
retDB.list <- c(retDB.list, list(intersect(which(this.pairs.db$inter.Experiments.Score >= exp.score.range[1]),
which(this.pairs.db$inter.Experiments.Score <= exp.score.range[2]))))
}
if (use.know == TRUE) {
retDB.list <- c(retDB.list, list(intersect(which(this.pairs.db$inter.Database.Score >= know.score.range[1]),
which(this.pairs.db$inter.Database.Score <= know.score.range[2]))))
}
if (use.pred == TRUE) {
retDB.list <- c(retDB.list, list(intersect(which(this.pairs.db$inter.Predicted.Score >= pred.score.range[1]),
which(this.pairs.db$inter.Predicted.Score <= pred.score.range[2]))))
}
# collect result from score selection
this.pairs.db <- this.pairs.db[Reduce(union, retDB.list), ]
#this.pairs.db <- DoPartUnique(this.pairs.db, match(c("inter.GeneName.A", "inter.GeneName.B"), colnames(this.pairs.db)))
## select from actions
use.action.pairs.list <- list()
use.cut.symbol <- "->-"
# in mode
if (sel.physical == TRUE) { # currently it's the same to set sel.action.mode => 'binding'
if (sel.action.mode[1] == "ALL") {
sel.action.mode <- "binding"
} else {
sel.action.mode <- unique(c(sel.action.mode, "binding"))
}
}
if (sel.action.mode[1] != "ALL") {
tmp.sel.mode.pairs.df <- FastAlignPairs(this.actions.db[which(this.actions.db$mode %in% sel.action.mode),
c("inter.GeneID.A", "inter.GeneID.B", "inter.GeneName.A", "inter.GeneName.B")], 4)
tmp.sel.mode.pairs <- paste(tmp.sel.mode.pairs.df[, "inter.GeneID.A"], tmp.sel.mode.pairs.df[, "inter.GeneID.B"], sep = use.cut.symbol)
use.action.pairs.list <- c(use.action.pairs.list, list(action.mode = tmp.sel.mode.pairs))
}
# in effect
if (sel.action.effect[1] != "ALL") {
# action
tmp.col.action <- tapply(seq_len(nrow(this.actions.db)), this.actions.db$action, function(x) { x })
# set list name right to be "non", "activation", "inhibition"
ind.non.action <- setdiff(seq_along(tmp.col.action), match(c("activation", "inhibition"), names(tmp.col.action)))
if (length(ind.non.action) != 1) {
stop("Database Failed by incorporating undefined action mode.")
}
names(tmp.col.action)[ind.non.action] <- "non"
# direction
tmp.col.direction <- tapply(seq_len(nrow(this.actions.db)), this.actions.db$is_directional, function(x) { x })
#tmp.col.a.act <- tapply(seq_len(nrow(this.actions.db)), this.actions.db$a_is_acting, function(x) { x })
tmp.ret.inds <- integer()
if ("positive" %in% sel.action.effect) {
tmp.ret.inds <- c(tmp.ret.inds, Reduce(intersect, list(tmp.col.action[["activation"]], tmp.col.direction[["t"]])))
}
if ("negative" %in% sel.action.effect) {
tmp.ret.inds <- c(tmp.ret.inds, Reduce(intersect, list(tmp.col.action[["inhibition"]], tmp.col.direction[["t"]])))
}
if ("unspecified" %in% sel.action.effect) {
tmp.ret.inds <- c(tmp.ret.inds, Reduce(intersect, list(tmp.col.action[["non"]], tmp.col.direction[["t"]])))
}
if ("undirected" %in% sel.action.effect) {
tmp.ret.inds <- c(tmp.ret.inds, Reduce(intersect, list(tmp.col.action[["non"]], tmp.col.direction[["f"]])))
}
tmp.sel.effect.pairs.df <- FastAlignPairs(this.actions.db[tmp.ret.inds,
c("inter.GeneID.A", "inter.GeneID.B", "inter.GeneName.A", "inter.GeneName.B")], 4)
tmp.sel.effect.pairs <- paste(tmp.sel.effect.pairs.df[, "inter.GeneID.A"], tmp.sel.effect.pairs.df[, "inter.GeneID.B"], sep = use.cut.symbol)
use.action.pairs.list <- c(use.action.pairs.list, list(action.effect = tmp.sel.effect.pairs))
}
# merge result
use.action.pairs <- character()
if (length(use.action.pairs.list) == 2 && all(c("action.mode", "action.effect") %in% names(use.action.pairs.list))) {
use.action.pairs <- switch(sel.action.merge.option,
"intersect" = {
intersect(use.action.pairs.list[["action.mode"]], use.action.pairs.list[["action.effect"]])
},
"union" = {
union(use.action.pairs.list[["action.mode"]], use.action.pairs.list[["action.effect"]])
},
stop("Undefined result merging option!")
)
} else {
use.action.pairs <- as.character(unlist(use.action.pairs.list))
}
use.action.pairs <- unique(use.action.pairs)
## check if selection is applied on actions.db, IF DO, slim the actions.db
use.action.to.limit.pairs <- character()
if (length(use.action.pairs) > 0 || !isTRUE(all.equal(action.score.range, c(1, 1000)))) {
# TO apply selected pairs from actions.db to limit pairs.db
if (!isTRUE(all.equal(action.score.range, c(1, 1000)))) {
tmp.actions.add.df <- FastAlignPairs(this.actions.db[, c("inter.GeneID.A", "inter.GeneID.B", "inter.GeneName.A", "inter.GeneName.B")], 4)
tmp.add.pairs <- paste(tmp.actions.add.df[, "inter.GeneID.A"], tmp.actions.add.df[, "inter.GeneID.B"], sep = use.cut.symbol)
if (length(use.action.pairs) > 0) {
use.action.to.limit.pairs <- intersect(tmp.add.pairs, use.action.pairs)
} else {
use.action.to.limit.pairs <- tmp.add.pairs
}
} else {
use.action.to.limit.pairs <- use.action.pairs
}
# collect actions result from actions selection
if (length(use.action.to.limit.pairs) > 0) {
# as actions.db slightly different, get conv and rev use.action.to.limit.pairs
tmp.use.action.to.limit.pairs.rev <- unique(unlist(lapply(strsplit(use.action.to.limit.pairs, split = use.cut.symbol), t.cut = use.cut.symbol, function(x, t.cut) {
paste(rev(x), collapse = t.cut)
})))
tmp.use.action.to.limit.pairs.rev <- c(tmp.use.action.to.limit.pairs.rev, use.action.to.limit.pairs)
tmp.cur.act.pairs <- paste(this.actions.db[, "inter.GeneID.A"], this.actions.db[, "inter.GeneID.B"], sep = use.cut.symbol)
this.actions.db <- this.actions.db[which(tmp.cur.act.pairs %in% tmp.use.action.to.limit.pairs.rev), ]
object@actions.db <- this.actions.db
}
}
# collect pairs result from actions selection
if (length(use.action.to.limit.pairs) > 0) {
this.pairs.db <- FastAlignPairs(this.pairs.db, 4)
tmp.cur.pairs <- paste(this.pairs.db[, "inter.GeneID.A"], this.pairs.db[, "inter.GeneID.B"], sep = use.cut.symbol)
this.pairs.db <- this.pairs.db[which(tmp.cur.pairs %in% use.action.to.limit.pairs), ]
}
## get pairs settled, and slim other accessory database
object@pairs.db <- this.pairs.db
if (slim.along.with.pairs == TRUE) {
use.cut.symbol.slim <- "->-"
# add direct way ones (conv)
tmp.sel.pairs <- paste(object@pairs.db[, "inter.GeneID.A"], object@pairs.db[, "inter.GeneID.B"], sep = use.cut.symbol.slim)
# add reverse direction ones (rev)
tmp.sel.pairs.conv <- c(tmp.sel.pairs, paste(object@pairs.db[, "inter.GeneID.B"], object@pairs.db[, "inter.GeneID.A"], sep = use.cut.symbol.slim))
# slim actions
tmp.sel.actions.p <- paste(object@actions.db[, "inter.GeneID.A"], object@actions.db[, "inter.GeneID.B"], sep = use.cut.symbol.slim)
object@actions.db <- object@actions.db[which(tmp.sel.actions.p %in% tmp.sel.pairs.conv), ]
}
return(object)
}
#' @param ... Parameters passed to function \code{SelectDBSubset.default}.
#'
#' @rdname SelectDBSubset
#' @order 1
#' @export
#'
setGeneric(name = "SelectDBSubset", def = function(object, ...) {
standardGeneric("SelectDBSubset")
}
)
#' @rdname SelectDBSubset
#' @order 2
#' @export
#'
setMethod(
f = "SelectDBSubset",
signature = "InterCellDBPack",
definition = function(object, ...) {
SelectDBSubset.default(object, ...)
}
)
#' @rdname SelectDBSubset
#' @order 3
#' @export
#'
setMethod(
f = "SelectDBSubset",
signature = "InterCell",
definition = function(object, ...) {
setRefDatabase(object, SelectDBSubset.default(object@database, ...))
}
)
#' Merge Database
#'
#' This function is to merge 2 databases to get user-desired database.
#'
#' @param db.object.1 A \code{InterCellDBPack} object to be merged.
#' @param db.object.2 Another \code{InterCellDBPack} object to be merged.
#' @param merge.option The supported options are 'intersect' and 'union'.
#'
#' @return A \code{InterCellDBPack} object.
#'
#' @export
#'
MergeDBSubset <- function(
db.object.1,
db.object.2,
merge.option = "intersect"
) {
if (class(db.object.1) != "InterCellDBPack" || class(db.object.2) != "InterCellDBPack") {
ret.mg <- character()
if (class(db.object.1) != "InterCellDBPack") ret.mg <- c(ret.mg, "db.object.1")
if (class(db.object.2) != "InterCellDBPack") ret.mg <- c(ret.mg, "db.object.2")
stop("Given database subset in parameter ", paste0(ret.mg, collapse = ", ") , " not in `InterCellDBPack-class`. ",
"Please use either `CreateDBPackObject` or `SelectDBSubset` to generate.")
}
if (length(merge.option) > 1) {
merge.option <- merge.option[1]
}
merge.option <- CheckParamStd(merge.option, c("intersect", "union"), "parameter `merge.option`", stop.on.zero = TRUE)
# before merging, check species
if (!identical(db.object.1@species, db.object.2@species)) {
stop("Merging database subset from different species ", paste(db.object.1@species, db.object.2@species, sep = ", "), ". ")
} else {
if (merge.option == "union") {
# genes.db
# $gene.ncbi.db
addon.ugene.ncbi.db <- setdiff(rownames(db.object.2@genes.db$gene.ncbi.db), rownames(db.object.1@genes.db$gene.ncbi.db))
db.object.1@genes.db$gene.ncbi.db <- rbind(db.object.1@genes.db$gene.ncbi.db, db.object.2@genes.db$gene.ncbi.db[which(rownames(db.object.2@genes.db$gene.ncbi.db) %in% addon.ugene.ncbi.db), ])
# $gene.synonyms.db
addon.ugene.synonyms.db <- setdiff(rownames(db.object.2@genes.db$gene.synonyms.db), rownames(db.object.1@genes.db$gene.synonyms.db))
db.object.1@genes.db$gene.synonyms.db <- rbind(db.object.1@genes.db$gene.synonyms.db, db.object.2@genes.db$gene.synonyms.db[which(rownames(db.object.2@genes.db$gene.synonyms.db) %in% addon.ugene.synonyms.db), ])
# $gene.dup.synonyms.db
db.object.1@genes.db$gene.dup.synonyms.db <- unique(rbind(db.object.1@genes.db$gene.dup.synonyms.db, db.object.2@genes.db$gene.dup.synonyms.db))
# pairs.db
addon.pairs.db <- setdiff(rownames(db.object.2@pairs.db), rownames(db.object.1@pairs.db))
db.object.1@pairs.db <- rbind(db.object.1@pairs.db, db.object.2@pairs.db[which(rownames(db.object.2@pairs.db) %in% addon.pairs.db), ])
# actions.db
addon.actions.db <- setdiff(rownames(db.object.2@actions.db), rownames(db.object.1@actions.db))
db.object.1@actions.db <- rbind(db.object.1@actions.db, db.object.2@actions.db[which(rownames(db.object.2@actions.db) %in% addon.actions.db), ])
# anno.location.db
addon.anno.location.db <- setdiff(rownames(db.object.2@anno.location.db), rownames(db.object.1@anno.location.db))
db.object.1@anno.location.db <- rbind(db.object.1@anno.location.db, db.object.2@anno.location.db[which(rownames(db.object.2@anno.location.db) %in% addon.anno.location.db), ])
# anno.type.db
addon.anno.type.db <- setdiff(rownames(db.object.2@anno.type.db), rownames(db.object.1@anno.type.db))
db.object.1@anno.type.db <- rbind(db.object.1@anno.type.db, db.object.2@anno.type.db[which(rownames(db.object.2@anno.type.db) %in% addon.anno.type.db), ])
# go.ref.db
addon.go.ref.db <- setdiff(rownames(db.object.2@go.ref.db), rownames(db.object.1@go.ref.db))
db.object.1@go.ref.db <- rbind(db.object.1@go.ref.db, db.object.2@go.ref.db[which(rownames(db.object.2@go.ref.db) %in% addon.go.ref.db), ])
}
if (merge.option == "intersect") {
# genes.db
# $gene.ncbi.db
addon.ugene.ncbi.db <- intersect(rownames(db.object.2@genes.db$gene.ncbi.db), rownames(db.object.1@genes.db$gene.ncbi.db))
db.object.1@genes.db$gene.ncbi.db <- db.object.2@genes.db$gene.ncbi.db[which(rownames(db.object.2@genes.db$gene.ncbi.db) %in% addon.ugene.ncbi.db), ]
# $gene.synonyms.db
addon.ugene.synonyms.db <- intersect(rownames(db.object.2@genes.db$gene.synonyms.db), rownames(db.object.1@genes.db$gene.synonyms.db))
db.object.1@genes.db$gene.synonyms.db <- db.object.2@genes.db$gene.synonyms.db[which(rownames(db.object.2@genes.db$gene.synonyms.db) %in% addon.ugene.synonyms.db), ]
# $gene.dup.synonyms.db
addon.ugene.dup.synonyms.db <- intersect(rownames(db.object.2@genes.db$gene.dup.synonyms.db), rownames(db.object.1@genes.db$gene.dup.synonyms.db))
db.object.1@genes.db$gene.dup.synonyms.db <- db.object.2@genes.db$gene.dup.synonyms.db[which(rownames(db.object.2@genes.db$gene.dup.synonyms.db) %in% addon.ugene.dup.synonyms.db), ]
# pairs.db
addon.pairs.db <- intersect(rownames(db.object.2@pairs.db), rownames(db.object.1@pairs.db))
db.object.1@pairs.db <- db.object.2@pairs.db[which(rownames(db.object.2@pairs.db) %in% addon.pairs.db), ]
# actions.db
addon.actions.db <- intersect(rownames(db.object.2@actions.db), rownames(db.object.1@actions.db))
db.object.1@actions.db <- db.object.2@actions.db[which(rownames(db.object.2@actions.db) %in% addon.actions.db), ]
# anno.location.db
addon.anno.location.db <- intersect(rownames(db.object.2@anno.location.db), rownames(db.object.1@anno.location.db))
db.object.1@anno.location.db <- db.object.2@anno.location.db[which(rownames(db.object.2@anno.location.db) %in% addon.anno.location.db), ]
# anno.type.db
addon.anno.type.db <- intersect(rownames(db.object.2@anno.type.db), rownames(db.object.1@anno.type.db))
db.object.1@anno.type.db <- db.object.2@anno.type.db[which(rownames(db.object.2@anno.type.db) %in% addon.anno.type.db), ]
# go.ref.db
addon.go.ref.db <- intersect(rownames(db.object.2@go.ref.db), rownames(db.object.1@go.ref.db))
db.object.1@go.ref.db <- db.object.2@go.ref.db[which(rownames(db.object.2@go.ref.db) %in% addon.go.ref.db), ]
}
}
return(db.object.1)
}
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Other Related Functions
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
#' Remap Gene Symbols to Their Authorized Genenames
#'
#' @description
#' Use species-specific reference database to remap genes to their authorized genenames.
#'
#' @param markers.all Data.frame. Feature genes that are generated from \code{Seurat::FindAllMarkers()} or
#' similar functions in other packages.
#' @inheritParams InsideParam.species
#' @param if.used.inside Logic. If used inside, some process will not run.
#' @param final.dup.rm Logic. If set TRUE, duplicated genes of each cluster in final mapping result will be
#' removed. If set FALSE, those genes will be applied with function \code{make.unique}.
#'
#' @export
#'
DataPrep.RemapClustersMarkers <- function(
markers.all,
species,
if.used.inside = FALSE,
final.dup.rm = TRUE
) {
# force character
markers.all$gene <- as.character(markers.all$gene)
markers.all$cluster <- as.character(markers.all$cluster)
## set each database
# Check species, only support human and mouse yet
genes.ref.db <- NULL
species <- CheckSpeciesValidity(species)
if (species == "human") {
genes.ref.db <- genes.human.ref.db
} else {
genes.ref.db <- genes.mouse.ref.db
}
entrez.db <- genes.ref.db$gene.ncbi.db
map.synonyms.db <- genes.ref.db$gene.synonyms.db
# split genes to already authorized ones and un-authorized ones
inds.raw.match <- which(markers.all$gene %in% entrez.db$Symbol_from_nomenclature_authority)
markers.raw.match <- markers.all[inds.raw.match, ]
markers.raw.unmatch <- markers.all[setdiff(seq_len(nrow(markers.all)), inds.raw.match), ]
proc.unmatch.genes <- unique(markers.raw.unmatch$gene)
## un-authorized genes have 2 ways to go:
# 1. cannot mapping from synonyms, keep still
# 2. mapping from synonyms
#
ret.d0.cannot.match <- ret.d1.map.to.diff <- ret.d2.diff.map.one <- ret.d3.map.to.exist <- character()
unmatch.map.results <- data.frame(unmatched = character(), match.res = character(), stringsAsFactors = FALSE)
if (nrow(markers.raw.unmatch) > 0) {
# 1> extract unmatch-able genes
ret.d0.cannot.match <- setdiff(proc.unmatch.genes, map.synonyms.db$Synonym.each)
# short warning
if (length(ret.d0.cannot.match) > 0 && if.used.inside == FALSE) {
print(paste0("Get ", length(ret.d0.cannot.match),
" genes cannot be mapped from synonyms.",
" See return value `$unmapping.genes`."))
}
# 2> mapping the rest
genes.poss.map <- setdiff(proc.unmatch.genes, ret.d0.cannot.match)
inds.map.possible <- which(map.synonyms.db$Synonym.each %in% genes.poss.map)
# check un-identical synonyms. One synonyms could map to different authorized genes
check.non.identical <- tapply(map.synonyms.db$Symbol_from_nomenclature_authority[inds.map.possible], map.synonyms.db$Synonym.each[inds.map.possible], function(x) {x}, simplify = FALSE)
inds.dups <- sapply(check.non.identical, function(x) { ifelse(length(x) > 1, TRUE, FALSE) })
# short warning
ret.d1.warning <- names(check.non.identical)[inds.dups]
if (length(ret.d1.warning) > 0 && if.used.inside == FALSE) {
print(paste0("Get ", length(ret.d1.warning),
" synonyms that cannot be mapped to unified authorized genes.",
" See return value `$one.synonym.map.to.some.genes`."))
}
# detailed hint
ret.d1.map.to.diff <- vapply(which(inds.dups == TRUE), all.non.identical = check.non.identical,
function(x, all.non.identical) {
this.gene <- names(all.non.identical)[x]
against.authorized.genes <- paste0("(", paste0(all.non.identical[[x]], collapse = ", "), ")")
paste(this.gene, "-->", against.authorized.genes)
},
FUN.VALUE = character(1), USE.NAMES = FALSE
)
# check if mapping result the same as other mapped genes
check.map.dups <- tapply(map.synonyms.db$Synonym.each[inds.map.possible],
map.synonyms.db$Symbol_from_nomenclature_authority[inds.map.possible],
function(x) {x}, simplify = FALSE)
inds.map.dups <- sapply(check.map.dups, function(x) { ifelse(length(x) > 1, TRUE, FALSE) })
# short warning
ret.d2.warning <- unique(as.character(unlist(lapply(which(inds.map.dups == TRUE), all.map.dups = check.map.dups,
function(x, all.map.dups) {
all.map.dups[[x]]
}
))))
if (length(ret.d2.warning) > 0 && if.used.inside == FALSE) {
print(paste0("Get ", length(ret.d2.warning),
" synonyms that get overlap mapping genes with at least one other.",
" See return value `$some.synonyms.map.to.one.gene`."))
}
# detailed hint
ret.d2.diff.map.one <- vapply(which(inds.map.dups == TRUE), all.map.dups = check.map.dups,
function(x, all.map.dups) {
this.tg <- names(all.map.dups)[x]
from.synonyms <- paste0("(", paste0(all.map.dups[[x]], collapse = ", "), ")")
paste(from.synonyms, "-->", this.tg)
},
FUN.VALUE = character(1), USE.NAMES = FALSE
)
# check if mapping to exist genes
check.map.to.exist <- intersect(markers.raw.match$gene, names(check.map.dups))
# short warning
ret.d3.warning <- as.character(unlist(lapply(check.map.to.exist, all.map.dups = check.map.dups,
function(x, all.map.dups) {
all.map.dups[[which(names(all.map.dups) == x)]]
}
)))
if (length(ret.d3.warning) > 0 && if.used.inside == FALSE) {
print(paste0("Get ", length(ret.d3.warning),
" synonyms that get mapping to existing authorized genes.",
" See return value `$synonyms.map.to.exist.gene`."))
}
# detailed hint
ret.d3.map.to.exist <- vapply(check.map.to.exist, all.map.dups = check.map.dups,
function(x, all.map.dups) {
this.to.map.genes <- all.map.dups[[which(names(all.map.dups) == x)]]
orig.to.map <- paste0("(", paste0(this.to.map.genes, collapse = ", "), ")")
paste(orig.to.map, "-->", x)
},
FUN.VALUE = character(1), USE.NAMES = FALSE
)
## get mapping result
# unmatch-able ones
unmatch.map.result.0 <- data.frame(unmatched = ret.d0.cannot.match, match.res = ret.d0.cannot.match, stringsAsFactors = FALSE)
# match-able ones, in default: the first matched gene name will be used
inds.map.matches <- match(genes.poss.map, map.synonyms.db$Synonym.each)
unmatch.map.result.1 <- data.frame(unmatched = map.synonyms.db$Synonym.each[inds.map.matches], match.res = map.synonyms.db$Symbol_from_nomenclature_authority[inds.map.matches], stringsAsFactors = FALSE)
# collect all
unmatch.map.results <- rbind(unmatch.map.result.0, unmatch.map.result.1)
}
# collect markers result after mapping
markers.raw.unmatch.dummy <- left_join(markers.raw.unmatch[, "gene", drop = FALSE],
unmatch.map.results, by = c("gene" = "unmatched"))
markers.raw.unmatch$gene <- markers.raw.unmatch.dummy$match.res
markers.all <- rbind(markers.raw.match, markers.raw.unmatch)
# after remapping, genes get to be duplicate with existing ones,
# re-check if mapping result has duplicate genes
if (final.dup.rm == TRUE) {
fcheck.result <- lapply(unique(markers.all$cluster), ref.markers = markers.all,
function(x, ref.markers) {
this.c.markers <- ref.markers[which(ref.markers$cluster == x), ]
this.c.len <- tapply(seq_along(this.c.markers$gene), this.c.markers$gene, length)
this.f.dup.genes <- names(this.c.len)[which(this.c.len > 1)]
# in default: remove the latter one in given data
this.c.markers <- DoPartUnique(this.c.markers, match(c("gene", "cluster"), colnames(this.c.markers)))
list(dup.genes = this.f.dup.genes, markers = this.c.markers)
})
names(fcheck.result) <- unique(markers.all$cluster)
} else {
fcheck.result <- lapply(unique(markers.all$cluster), ref.markers = markers.all,
function(x, ref.markers) {
this.c.markers <- ref.markers[which(ref.markers$cluster == x), ]
# in default: make unique of those genes
this.c.markers$gene <- make.unique(this.c.markers$gene)
list(dup.genes = character(), markers = this.c.markers)
})
names(fcheck.result) <- unique(markers.all$cluster)
}
# ret
ret.markers.all <- bind_rows(lapply(fcheck.result, function(x) { x$markers }))
# detailed hint
ret.dx.fcheck.dup <- lapply(seq_along(fcheck.result), all.fcheck.res = fcheck.result,
function(x, all.fcheck.res) {
this.cluster <- names(all.fcheck.res)[x]
this.dup.genes <- all.fcheck.res[[x]][["dup.genes"]]
ret.detailed <- NA
if (length(this.dup.genes) > 0) {
ret.detailed <- paste(this.cluster, "~", paste0(this.dup.genes, collapse = ", "))
}
list(detailed = ret.detailed, raw = unique(this.dup.genes))
}
)
un.fin.check <- as.character(sapply(ret.dx.fcheck.dup, function(x) { x$detailed }))
un.fin.check <- un.fin.check[which(!is.na(un.fin.check))]
if (length(un.fin.check) > 0) {
warning("There remain genes group by clusters to be checked manually. ",
paste0(un.fin.check, collapse = "; "),
". The program automatically remove duplicate ones in cluster scale.")
}
ret.d4.final.dup.genes <- lapply(ret.dx.fcheck.dup, function(x) { x$raw })
names(ret.d4.final.dup.genes) <- names(fcheck.result)
ret.d4.len <- vapply(ret.d4.final.dup.genes, FUN = length, FUN.VALUE = integer(1))
ret.d4.final.dup.genes <- ret.d4.final.dup.genes[which(ret.d4.len > 0)]
return(list(result = ret.markers.all,
unmapping.genes = ret.d0.cannot.match,
one.synonym.map.to.some.genes = ret.d1.map.to.diff,
some.synonyms.map.to.one.gene = ret.d2.diff.map.one,
synonyms.map.to.exist.gene = ret.d3.map.to.exist,
after.map.dup.genes = ret.d4.final.dup.genes))
}
#' Add Expression Data in InterCell Object
#'
#' This function adds expression data to InterCell Object. It is mostly internally used.
#'
#' @param fgenes feature gene table, stored in slot \code{fgenes} of \code{InterCell} object.
#' @param exprs.data normalized count matrix
#' @param force.overwrite If column 'Exprs' already exists, set this parameter \code{TRUE} to overwrite that.
#'
#' @export
#'
DataPrep.AddExprs <- function(
fgenes,
exprs.data,
force.overwrite = FALSE
) {
if (("Exprs" %in% colnames(fgenes)) && !force.overwrite) {
warning("Column 'Exprs' exists. If want to modify, use 'force.overwrite = TRUE' to overwrite existed one.")
return(fgenes)
}
# handle with format of exprs.data
if ("Seurat" %in% class(exprs.data)) {
stop("Please use <SeuratObj>[[<assay>]]@<data>, for example SeuratObj[['RNA']]@data, to give the expression data.")
#exprs.data <- AverageExpression(exprs.data, assays = "RNA", slot = "counts")[[1]]
#exprs.data <- as.matrix(exprs.data)
} else { # transform to matrix
if (is.null(ncol(exprs.data)) || ncol(exprs.data) == 0 ||
is.null(nrow(exprs.data)) || nrow(exprs.data) == 0) {
stop("Try add 'Exprs' but lack data. Please provided valid expression data.")
}
exprs.data <- as.matrix(exprs.data)
}
# give warning when duplicated genes are given
if (anyDuplicated(rownames(exprs.data))) {
warning("Duplicated gene names in expression data detected. Unexpected result may be generated.")
}
# check if cluster are matched with current data
if (!all(unique(fgenes$cluster) %in% colnames(exprs.data))) {
stop("Given data lacks some required clusters: ",
paste0(setdiff(unique(fgenes$cluster), colnames(exprs.data)), collapse = ", "),
"!")
}
# add expression data in every cluster
fgenes <- bind_rows(lapply(unique(fgenes$cluster),
fgenes = fgenes, exprs.data = exprs.data,
function(x, fgenes, exprs.data) {
tmp.exprs <- exprs.data[, which(colnames(exprs.data) == x), drop = FALSE]
tmp.exprs <- rowSums(tmp.exprs) / ncol(tmp.exprs)
fgenes <- subset(fgenes, cluster == x)
inds.exprs <- match(fgenes$gene, names(tmp.exprs))
fgenes[, "Exprs"] <- as.numeric(tmp.exprs[inds.exprs])
return(fgenes)
}
))
return(fgenes)
}
#' Generate Gene Pairs in Standard Format
#'
#' @description
#' This function generates gene pairs in standard format(in data frame),
#' and gets these pairs easier to be compared with others.
#'
#' @param VEinfos standard storage for one interaction.
#' @param musthave.colnames column names that must be included.
#'
#' @details
#' The standard format in this package is that gene pairs are maintained in data.frame, and the 2 genes
#' participated in each gene pair are recorded in columns named "inter.GeneName.A" and "inter.GeneName.B".
#'
#' @importFrom dplyr left_join
#'
DataPrep.GenStdGenePairs.from.VEinfos <- function(
VEinfos,
musthave.colnames
) {
vertices.infos <- VEinfos$vertices.infos
edges.infos <- VEinfos$edges.infos
# pre-check
if (!all(kmusthave.colnames %in% musthave.colnames)) {
stop("Provided column names are not matched with InterCellDB requirement!")
}
musthave.colnames <- setdiff(musthave.colnames, c("gene", "cluster"))
#
tmp.res <- left_join(edges.infos[, c("from", "to")], vertices.infos[, c(c("UID", "ClusterName", "GeneName"), musthave.colnames)], by = c("from" = "UID"))
colnames(tmp.res)[c(ncol(tmp.res) - (1+length(musthave.colnames)):0)] <- paste("inter", c(c("Cluster", "GeneName"), musthave.colnames), "A", sep = ".")
tmp.res <- left_join(tmp.res, vertices.infos[, c(c("UID", "ClusterName", "GeneName"), musthave.colnames)], by = c("to" = "UID"))
colnames(tmp.res)[c(ncol(tmp.res) - (1+length(musthave.colnames)):0)] <- paste("inter", c(c("Cluster", "GeneName"), musthave.colnames), "B", sep = ".")
# form std data.frame
align.colnames <- paste("inter", rep(c(c("GeneName"), musthave.colnames, c("Cluster")), each = 2), c("A", "B"), sep = ".")
tmp.res <- tmp.res[, match(align.colnames, colnames(tmp.res))]
# match cluster
# get conv ones
std.res.conv <- tmp.res[intersect(which(tmp.res$inter.Cluster.A == VEinfos$cluster.name.A), which(tmp.res$inter.Cluster.B == VEinfos$cluster.name.B)), ]
# get rev ones
std.res.rev <- tmp.res[intersect(which(tmp.res$inter.Cluster.A == VEinfos$cluster.name.B), which(tmp.res$inter.Cluster.B == VEinfos$cluster.name.A)), ]
std.res.rev <- std.res.rev[, ReverseOddEvenCols(length(align.colnames))] # reverse all paired columns
colnames(std.res.rev) <- colnames(std.res.conv)
# get the result
std.res.all <- rbind(std.res.conv, std.res.rev)
std.res.all <- DoPartUnique(std.res.all, 1:2)
return(std.res.all)
}
#' List cell clusters
#'
#' This function is to list all cell clusters.
#'
#' @inheritParams InsideObjectInterCell
#'
#' @export
#'
ListAllClusters <- function(
object
) {
return(unique(object@fgenes$cluster))
}
#' List Options for Gene Selection
#'
#' This group of functions are served for giving options on selecting gene subset. It ranges from
#' subcellular locations, molecular functions and GO terms.
#'
#' @param object Allowed object should be in either class \code{InterCell} or class \code{InterCellDBPack}.
#' @param ... Parameters passed to corresponding function with suffix 'default', like use \code{ListAllGeneLocation}
#' and check parameters in \code{ListAllGeneLocation.default}.
#'
#' @return Character. The options.
#'
#' @name ListGeneSelectionProperty
#' @rdname ListGeneSelectionProperty
#'
#'
NULL
#' @rdname ListGeneSelectionProperty
#' @export
#'
ListAllGeneLocation.default <- function(
object
) {
return(unique(object@anno.location.db$GO.Term.target))
}
#' @rdname ListGeneSelectionProperty
#' @order 1
#' @export
#'
setGeneric(name = "ListAllGeneLocation", def = function(object, ...) {
standardGeneric("ListAllGeneLocation")
}
)
#' @rdname ListGeneSelectionProperty
#' @export
#'
setMethod(
f = "ListAllGeneLocation",
signature = "InterCellDBPack",
definition = function(object, ...) {
ListAllGeneLocation.default(object, ...)
}
)
#' @rdname ListGeneSelectionProperty
#' @export
#'
setMethod(
f = "ListAllGeneLocation",
signature = "InterCell",
definition = function(object, ...) {
ListAllGeneLocation.default(object@database, ...)
}
)
#' @rdname ListGeneSelectionProperty
#' @export
#'
ListAllGeneType.default <- function(
object
) {
return(unique(object@anno.type.db$Keyword.Name))
}
#' @rdname ListGeneSelectionProperty
#' @order 2
#' @export
#'
setGeneric(name = "ListAllGeneType", def = function(object, ...) {
standardGeneric("ListAllGeneType")
}
)
#' @rdname ListGeneSelectionProperty
#' @export
#'
setMethod(
f = "ListAllGeneType",
signature = "InterCellDBPack",
definition = function(object, ...) {
ListAllGeneType.default(object, ...)
}
)
#' @rdname ListGeneSelectionProperty
#' @export
#'
setMethod(
f = "ListAllGeneType",
signature = "InterCell",
definition = function(object, ...) {
ListAllGeneType.default(object@database, ...)
}
)
#' @rdname ListGeneSelectionProperty
#' @export
#'
ListAllGeneMergeType.default <- function(
object
) {
return(unique(object@accessory.db$merge.type.list[, "merged.molecular.function"]))
}
#' @rdname ListGeneSelectionProperty
#' @order 3
#' @export
#'
setGeneric(name = "ListAllGeneMergeType", def = function(object, ...) {
standardGeneric("ListAllGeneMergeType")
}
)
#' @rdname ListGeneSelectionProperty
#' @export
#'
setMethod(
f = "ListAllGeneMergeType",
signature = "InterCellDBPack",
definition = function(object, ...) {
ListAllGeneMergeType.default(object, ...)
}
)
#' @rdname ListGeneSelectionProperty
#' @export
#'
setMethod(
f = "ListAllGeneMergeType",
signature = "InterCell",
definition = function(object, ...) {
ListAllGeneMergeType.default(object@database, ...)
}
)
#' @param n.output It gives the first N GO terms in the order of AlphaBet.
#'
#' @rdname ListGeneSelectionProperty
#' @export
#'
ListAllGeneGOTerm.default <- function(
object,
n.output = +Inf
) {
go.terms <- unique(object@go.ref.db$GO_term)
go.terms <- go.terms[order(go.terms, decreasing = FALSE)]
if (n.output < 1) {
n.output <- 1
}
if (length(go.terms) > n.output) {
go.terms <- go.terms[seq_len(n.output)]
}
return(go.terms)
}
#' @rdname ListGeneSelectionProperty
#' @order 4
#' @export
#'
setGeneric(name = "ListAllGeneGOTerm", def = function(object, ...) {
standardGeneric("ListAllGeneGOTerm")
}
)
#' @rdname ListGeneSelectionProperty
#' @export
#'
setMethod(
f = "ListAllGeneGOTerm",
signature = "InterCellDBPack",
definition = function(object, ...) {
ListAllGeneGOTerm.default(object, ...)
}
)
#' @rdname ListGeneSelectionProperty
#' @export
#'
setMethod(
f = "ListAllGeneGOTerm",
signature = "InterCell",
definition = function(object, ...) {
ListAllGeneGOTerm.default(object@database, ...)
}
)
#' Replace Cluster name
#'
#' This is function is to change the names of cell clusters.
#'
#' @inheritParams InsideObjectInterCell
#' @param cluster.names.current The currently used name of cell clusters.
#' @param cluster.names.replace The new used name of cell clusters.
#'
#' @return A \code{InterCell} object.
#'
#' @examples
#' \dontrun{
#' ReplaceClusterName(object, "Macrophage", "Myeloid")
#' }
#'
#' @export
#'
ReplaceClusterName <- function(
object,
cluster.names.current,
cluster.names.replace
) {
markers.all <- object@fgenes
colnames.cluster <- "cluster"
# process
if ((colnames.cluster %in% colnames(markers.all)) == FALSE) {
stop("Selected column name defining clusters is not in the given data!")
}
if (length(cluster.names.current) != length(cluster.names.replace)) {
stop("The replaced names are of different length of the current used ones.")
}
reserve.oldnames.col <- reserve.oldnames.col.proto <- paste(colnames.cluster, "oldv", sep = ".")
for (try.i in 1:100) {
reserve.oldnames.col <- paste(reserve.oldnames.col.proto, as.character(try.i), sep = ".")
if ((reserve.oldnames.col %in% colnames(markers.all)) == FALSE) {
break
}
if (try.i == 100) {
stop("Cannot allocate proper colnames for old cluster names, program failed! Please check given data!")
}
}
markers.all[, reserve.oldnames.col] <- markers.all[, colnames.cluster]
tmp.fac <- factor(markers.all[, colnames.cluster])
lvl.tmp.fac <- levels(tmp.fac)
inds.match <- match(cluster.names.current, lvl.tmp.fac)
if (length(which(is.na(inds.match))) != 0) {
stop(paste0("Please give right current used cluster names! ",
"Wrong given ones are: ", paste0(cluster.names.current[which(is.na(inds.match))], collapse = ", "),
"."))
}
lvl.tmp.fac[inds.match] <- cluster.names.replace
levels(tmp.fac) <- lvl.tmp.fac
markers.all[, colnames.cluster] <- as.character(tmp.fac)
# return
object@fgenes <- markers.all
return(object)
}
#' Fetch Genes of Interest
#'
#' This function is to fetch genes of interest by selecting on subcellular locations,
#' molecular functions and GO terms.
#'
#' @param object A \code{InterCell} object or \code{InterCellDBPack} object. \code{FetchGeneOI.default}
#' gets \code{InterCellDBPack} as input.
#' @param sel.location Use subcellular location of gene product to select gene, and options are listed
#' in \code{\link{ListAllGeneLocation}}.
#' @param sel.location.score The score of corresponding subcellular location, range from 1 to 5. Consider \code{score = c(4, 5)}
#' as high and common usage.
#' @param sel.type Use molecular function of gene product to select gene, and options are listed in
#' \code{\link{ListAllGeneType}}.
#' @param sel.merge.type The merged types. Comparing to \code{sel.type}, it has less options, which are given
#' in \code{\link{ListAllGeneMergeType}}. See details for help.
#' @param ret.with.property It decides whether the return values are attached with gene properties(location, type, etc).
#' @param sel.go.terms Use GO terms to select gene, and supported options are listed in \code{\link{ListAllGeneGOTerm}}.
#' @param go.use.relative Decide if the go terms in the related GO term tree are used. See details for help.
#' @param go.relative.option Decide which relation to the selected GO term is used. Options are
#' 'ancestor', 'parents', 'offspring', 'children'. See details for help.
#'
#' @details
#' The parameter \code{sel.merge.type} is the summary for \code{sel.type}. The options in \code{sel.type} are originally
#' generated directly from Uniprot, which comprises over 100 types. For the convenience of usage, we summarize those types and gather
#' them to 16 merged types, which comprise the common used types: 'Receptor', 'Cytokine', 'Growth Factor', etc.
#'
#' GO terms have 3 basic words: 'cellular_component', 'molecular_function', 'biological_process', and all other GO terms are the offspring of
#' one of them. As a result, GO terms form 3 family tree. Use parameter \code{go.use.relative}, it can extend one given GO term to all its
#' related GO terms. There are 4 pre-defined options for selecting specific relative group, which are 'ancestor', 'parents', 'offspring', 'children'.
#' The 'parents' and 'children' are selecting the most close GO terms. For example, 'GO:0006955-immune response' is the nearest level above
#' 'GO:0002250-adaptive immune response', and in turn, 'adaptive immune response' is the children of 'immune response'.
#' The 'ancestor' and 'offspring' goes further than 'parents' and 'children'. The 'ancestor' iteratively searches for the 'parents'.
#' Conversely, the 'offspring' iteratively searches for the 'children'. For example, 'immune response' is the parents of 'adaptive immune response', and
#' 'adaptive immune response' is the parents of 'adaptive immune effector response'. Then, 'immune response' is the 'ancestor' of 'adaptive immune effector response'.
#' The 'offspring' goes the same way by propagating 'children'.
#'
#' @return Character, the selected genes. Or, a list, with gene property attached.
#'
#' @rdname FetchGeneOI
#' @order 4
#' @export
#'
FetchGeneOI.default <- function(
object,
sel.location = NULL,
sel.location.score = c(1:5),
sel.type = NULL,
sel.merge.type = NULL, # merged type only have 16 options. Prioritize upon sel.type
ret.with.property = TRUE,
# whether the selection should be carried in the result,
# for analysis usage it's TRUE, but only to get some genes, it should be false.
# GO terms are not included by this functions
sel.go.terms = NULL, # ID and Term are supported
go.use.relative = TRUE,
go.relative.option = "offspring"
) {
## input parameter process
# check location
if (!is.null(sel.location)) {
avb.opt.location <- unique(object@anno.location.db$GO.Term.target)
not.valid.location <- setdiff(sel.location, avb.opt.location)
if (length(not.valid.location) > 0) {
warning("Given undefined location: ", paste0(not.valid.location, collapse = ", ", ". "))
}
sel.location <- intersect(sel.location, avb.opt.location)
if (length(sel.location) == 0) {
sel.location <- NULL
}
}
if ((length(sel.location.score) > 1 && !is.numeric(sel.location.score)) ||
(length(sel.location.score) == 1 && !is.numeric(sel.location.score))) {
stop("Location score ranges from 1 to 5, and only those 5 integers are supported!")
}
if (!is.null(sel.location.score)) {
avb.location.score <- c(1:5)
not.valid.location.score <- setdiff(sel.location.score, avb.location.score)
if (length(not.valid.location.score) > 0) {
warning("Given invalid location score: ", paste0(not.valid.location.score, collapse = ", ", ". "))
}
sel.location.score <- intersect(sel.location.score, avb.location.score)
}
# check type
if (!is.null(sel.type)) {
avb.opt.type <- unique(object@anno.type.db$Keyword.Name)
not.valid.type <- setdiff(sel.type, avb.opt.type)
if (length(not.valid.type) > 0) {
warning("Given undefined type: ", paste0(not.valid.type, collapse = ", ", ". "))
}
sel.type <- intersect(sel.type, avb.opt.type)
if (length(sel.type) == 0) {
sel.type <- NULL
}
}
# check merged type (if given, then override type) and overwrite type
if (!is.null(sel.merge.type)) {
if (!is.null(sel.type)) {
warning("Select genes using both parameter 'sel.type' & 'sel.merge.type'. Only the options in 'sel.merge.type' are used!")
}
avb.opt.mg.type <- unique(object@accessory.db$merge.type.list$merged.molecular.function)
sel.merge.type <- CheckParamStd(sel.merge.type, avb.opt.mg.type, "merged type", stop.on.zero = FALSE)
if (length(sel.merge.type) == 0) {
sel.type <- NULL
} else { # overwrite sel.type
ref.db <- object@accessory.db$merge.type.list
sel.type <- ref.db[which(ref.db$merged.molecular.function %in% sel.merge.type), "orig.uniprot.keywords"]
}
}
## save all above limitations
property.saved <- list(
location = sel.location,
location.score = sel.location.score,
type = sel.type
)
# check GO terms
# - put in the tool function
## fetch genes
ret.gene.oi <- character()
inside.set.gene.oi <- function(gene.oi.res, new.get.genes) {
if (length(gene.oi.res) == 0) {
gene.oi.res <- new.get.genes
} else {
gene.oi.res <- intersect(gene.oi.res, new.get.genes)
}
return(gene.oi.res)
}
# use location
if (!is.null(sel.location)) {
gene.oi.from.locs <- object@anno.location.db[intersect(which(object@anno.location.db$GO.Term.target %in% sel.location),
which(object@anno.location.db$score %in% sel.location.score)), "Gene.name"]
ret.gene.oi <- inside.set.gene.oi(ret.gene.oi, gene.oi.from.locs)
}
if (!is.null(sel.type)) {
gene.oi.from.type <- object@anno.type.db[which(object@anno.type.db$Keyword.Name %in% sel.type), "Gene.name"]
ret.gene.oi <- inside.set.gene.oi(ret.gene.oi, gene.oi.from.type)
}
if (!is.null(sel.go.terms)) {
gene.oi.from.go <- Tool.FindGenesFromGO(sel.go.terms, object@genes.db, object@go.ref.db,
go.use.relative = go.use.relative, go.relative.option = go.relative.option)
gene.oi.from.go <- as.character(unlist(gene.oi.from.go))
ret.gene.oi <- inside.set.gene.oi(ret.gene.oi, gene.oi.from.go)
}
# get unique result
ret.gene.oi <- unique(ret.gene.oi)
print(paste("Fetch", length(ret.gene.oi), "genes of interest."))
if (ret.with.property == TRUE) {
return(list(genes = ret.gene.oi, property = property.saved))
} else {
return(ret.gene.oi)
}
}
#' @param ... Parameters passed to function \code{FetchGeneOI.default}.
#'
#' @rdname FetchGeneOI
#' @order 1
#' @export
#'
setGeneric(name = "FetchGeneOI", def = function(object, ...) {
standardGeneric("FetchGeneOI")
}
)
#' @rdname FetchGeneOI
#' @order 2
#' @export
#'
setMethod(
f = "FetchGeneOI",
signature = "InterCellDBPack",
definition = function(object, ...) {
return(FetchGeneOI.default(object, ...))
}
)
#' @rdname FetchGeneOI
#' @order 3
#' @export
#'
setMethod(
f = "FetchGeneOI",
signature = "InterCell",
definition = function(object, ...) {
return(FetchGeneOI.default(object@database, ...))
}
)
#' Merge Gene of Interest
#'
#' This function is to merge genes of interest fetched by \code{FetchGeneOI}.
#'
#' @param gene.oi.1 Genes of interest, generated by \code{FetchGeneOI}.
#' @param gene.oi.2 Genes of interest, generated by \code{FetchGeneOI}.
#' @param merge.option The supported options are 'intersect' and 'union'.
#'
#' @return Character, the selected genes. Or, a list, with gene property attached.
#'
#' @export
#'
MergeGeneOI <- function(
gene.oi.1,
gene.oi.2,
merge.option = "intersect"
) {
# check param
if (length(merge.option) > 1) {
merge.option <- merge.option[1]
}
merge.option <- CheckParamStd(merge.option, c("intersect", "union"), "parameter `merge.option`", stop.on.zero = TRUE)
#
if (!is.character(gene.oi.1) &&
!is.list(gene.oi.1) &&
(is.list(gene.oi.1) && !identical(names(gene.oi.1), c("genes", "property")))) {
stop("Wrong format of input `gene.oi.1`. Please use `FetchGeneOI` to get gene of interest!")
}
if (!is.character(gene.oi.2) &&
!is.list(gene.oi.2) &&
(is.list(gene.oi.2) && !identical(names(gene.oi.2), c("genes", "property")))) {
stop("Wrong format of input `gene.oi.2`. Please use `FetchGeneOI` to get gene of interest!")
}
gene.oi.ret <- NA
# s1: if 2 character merge
if (is.character(gene.oi.1) && is.character(gene.oi.2)) {
gene.oi.ret <- switch(merge.option,
"intersect" = intersect(gene.oi.1, gene.oi.2),
"union" = union(gene.oi.1, gene.oi.2))
}
# s2: either one is character
if (is.character(gene.oi.1) && is.list(gene.oi.2) ||
is.list(gene.oi.1) && is.character(gene.oi.2)) {
if (is.character(gene.oi.2)) {
tmp.1 <- gene.oi.1
gene.oi.1 <- gene.oi.2
gene.oi.2 <- tmp.1
}
gene.oi.ret.genes <- switch(merge.option,
"intersect" = intersect(gene.oi.1, gene.oi.2$genes),
"union" = union(gene.oi.1, gene.oi.2$genes))
gene.oi.ret.property <- switch(merge.option,
"intersect" = gene.oi.2$property,
"union" = NULL)
if (is.null(gene.oi.ret.property)) {
gene.oi.ret <- gene.oi.ret.genes
} else {
gene.oi.ret <- list(genes = gene.oi.ret.genes, property = gene.oi.ret.property)
}
}
# s3: both two are list
if (is.list(gene.oi.1) && is.list(gene.oi.2)) {
tmp.s3.func <- switch(merge.option,
"intersect" = intersect,
"union" = union)
gene.oi.ret.genes <- tmp.s3.func(gene.oi.1$genes, gene.oi.2$genes)
gene.oi.ret.property <- list(location = tmp.s3.func(gene.oi.1$property$location, gene.oi.2$property$location),
location.score = tmp.s3.func(gene.oi.1$property$location.score, gene.oi.2$property$location.score),
type = tmp.s3.func(gene.oi.1$property$type, gene.oi.2$property$type))
gene.oi.ret <- list(genes = gene.oi.ret.genes, property = gene.oi.ret.property)
}
return(gene.oi.ret)
}
#' Format User Given Gene Pairs
#'
#' This function makes 2-column table (each row represents one gene pair) to be
#' formatted to be used in this package.
#'
#' @inheritParams InsideObjectInterCell
#' @param gene.pairs.table 2-column table, and each column records one list of genes.
#' @param extend.reverse Decide whether to extend pairs with the reverse pairs. Default is \code{FALSE}.
#'
#' @details
#' The formatting process will remap the genes in 1st column from input to 'inter.*.A' columns in result.
#' The result of 2nd column from input will be put in 'inter.*.B' columns.
#'
#' Explanation on \code{extend.reverse}:
#' For example, given pair C3~C3ar1, if set \code{extend.reverse = TRUE}, then
#' both C3~C3ar1 and C3ar1~C3 will be generated in result.
#'
#' @return A list with \code{$result} storing the formatted gene pairs.
#'
#' @export
#'
FormatCustomGenePairs <- function(
object,
gene.pairs.table,
extend.reverse = FALSE
) {
species <- object@database@species
if.remap.genes <- object@misc$if.remap.genes
# check input
if (class(gene.pairs.table) != "data.frame") {
stop("Given gene pairs must be stored in table (R `data.frame` structure).")
}
if (ncol(gene.pairs.table) != 2) {
warning("Given table has more than 2 columns, only the first 2 columns will be used.")
}
# process
result.list <- list()
apx.list <- list()
for (i in 1:2) { # only use the first 2 columns
tmp.genes <- as.character(gene.pairs.table[, i])
# create dummy df to meet the requirement of musthave columns
dummy.fgenes <- data.frame(gene = tmp.genes,
cluster = seq_along(tmp.genes), # give one gene one cluster to make duplicate ones preserved, but program may get quite slow
LogFC = 1, PVal = 0,
num.id = seq_along(tmp.genes), stringsAsFactors = FALSE)
if (if.remap.genes == TRUE) {
dummy.remap.res <- suppressWarnings(DataPrep.RemapClustersMarkers(dummy.fgenes, species, if.used.inside = TRUE))
dummy.fgenes <- dummy.remap.res$result
}
dummy.fgenes <- dummy.fgenes[order(dummy.fgenes$num.id, decreasing = FALSE), ]
result.list <- c(result.list, list(dummy.fgenes$gene))
if (if.remap.genes == TRUE) {
apx.list <- c(apx.list, dummy.remap.res[setdiff(names(dummy.remap.res), "result")])
} else {
apx.list <- c(apx.list, list(NULL))
}
}
# get genename corresponding IDs
genes.ref.db <- switch(species,
"human" = genes.human.ref.db$gene.ncbi.db,
"mouse" = genes.mouse.ref.db$gene.ncbi.db)
result.id.A <- genes.ref.db[match(result.list[[1]], genes.ref.db$Symbol_from_nomenclature_authority), "GeneID"]
result.id.B <- genes.ref.db[match(result.list[[2]], genes.ref.db$Symbol_from_nomenclature_authority), "GeneID"]
result.IT <- data.frame(inter.GeneID.A = result.id.A,
inter.GeneID.B = result.id.B,
inter.GeneName.A = result.list[[1]],
inter.GeneName.B = result.list[[2]],
stringsAsFactors = FALSE)
if (extend.reverse == TRUE) {
tmp.rev <- result.IT[, ReverseOddEvenCols(4)]
colnames(tmp.rev) <- colnames(result.IT)
result.IT <- rbind(result.IT, tmp.rev)
}
# return
list(result = result.IT, match.status = apx.list)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.