InterCellDB: Tool and Database for Analyzing Interactions

Documented in CreateDBPackObject CreateInterCellObject DataPrep.AddExprs DataPrep.GenStdGenePairs.from.VEinfos DataPrep.RemapClustersMarkers FetchGeneOI.default FormatCustomGenePairs ListAllClusters ListAllGeneGOTerm.default ListAllGeneLocation.default ListAllGeneMergeType.default ListAllGeneType.default MergeDBSubset MergeGeneOI ReplaceClusterName SelectDBSubset.default TgView.formula.onLogFC.default TgView.formula.onPVal.default

# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# pre-defined default global variables
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

#' Predefined data format in given fgenes
#'
#' @description
#' Given data on differentially expressed genes and their belonging clusters should be in the right format.
#' Some columns must be maintained, which will be further used in downstream analysis.
#'
#'
kmusthave.colnames <- c("cluster", "gene", "LogFC", "PVal")  # NOT CHANGE

#' Predefined Action Mode & Color Usage for It
#'
#' \code{kpred.action.mode} is extracted from databases depicting mode of actions for functional usages.
#' expression is transcriptional regulation.
#'
#' @rdname pred-action-mode
#' @export
#'
kpred.action.mode <- c("activation", "inhibition", "binding", "catalysis", "reaction", "expression", "ptmod", "other")

#' Predefined Action Mode & Color Usage for It 
#'
#' \code{kpred.color.mode} is aligned with \code{kpred.action.mode} one-to-one. In default setting,
#' the color used for \bold{action mode} in plotting will use colors listed in this variable.
#'
#' @rdname pred-action-mode
#' @export
#'
kpred.color.mode <- c("#D70051", "#00913A", "#1296D4", "#956134", "#F46D42", "#0A0AFF", "#762A83", "#B5B5B6")
#c("#FB8072", "#80B1D3", "#B3DE69", "#8DD3C7", "#FFFFB3", "#BEBADA", "#FDB462", "#FCCDE5")

#' Predefined Action Effect & Color Usage for It
#'
#' \code{kpred.action.effect} is extracted from databases depicting action effect of actions for functional usages.
#'
#' @rdname pred-action-effect
#' @export
#'
kpred.action.effect <- c("positive", "negative", "unspecified", "undirected")

#' Predefined Action Effect & Color Usage for It
#'
#' \code{kpred.color.effect} is aligned with \code{kpred.action.effect} one-to-one. In default setting,
#' the color used for \bold{action effect} in plotting will use colors listed in this variable.
#'
#' @rdname pred-action-effect
#' @export
#'
kpred.color.effect <- c("#FB8072", "#B3DE69", "#80B1D3", "#8DD3C7")

#' Predefined Action Effect & Color Usage for It
#'
#' \code{kpred.ext.action.effect} is the \bold{extended} format, which depicts action effect and direction of action together, 
#' and extends action effect to 7 different types.
#'
#' @rdname pred-action-effect
#' @export
#'
kpred.ext.action.effect <- c(
	"A---B", # #1, undirected, others are directed
	"A-->B", # #2
	"A<--B", # #3
	"A--|B", # #4
	"A|--B", # #5
	"A--oB", # #6
	"Ao--B"  # #7
	# "undefined for (> 7) and all other(< 0)"
)

#' Predefined Action Effect & Color Usage for It
#'
#' \code{kpred.color.ext.effect} is aligned with \code{kpred.ext.action.effect} one-to-one. In default setting,
#' the color used for \bold{extened action effect} in plotting will use colors listed in this variable.
#'
#' @rdname pred-action-effect
#' @export
#'
kpred.color.ext.effect <- c("#8DD3C7", "#FB8072", "#FF5740", "#B3DE69", "#81EF48", "#80B1D3", "#6B6AEA")



#
TgView.formula.onExprs.default <- function(
	data.f,
	data.b
) {
	if (length(data.f) != length(data.b)) {
		stop("Unexpected non-identical length data input!")
	}
	return(data.f * data.b)
}



#' Formula on Power for One Gene Pair
#'
#' @description
#' This is the default formula on calculating power for every gene pair, and the 
#' sum value of LogFC for gene partners will be regarded as power.
#'
#' @param data.f vector. The LogFC values for one list of gene partners. LogFC values are 
#' specified when creating \code{\link{InterCell}} object. 
#' @param data.b vector. The LogFC values for another list of gene partners, which is 
#' one-by-one matched to those in parameter \code{data.f}.
#'
TgView.formula.onLogFC.default <- function(
	data.f, 
	data.b
) {
	if (length(data.f) != length(data.b)) {
		stop("Unexpected non-identical length data input!")
	}
	return(data.f + data.b)
}



#' Formula on Confidence for One Gene Pair
#'
#' @description
#' This is the default formula on calculating confidence for every gene pair, and the 
#' multiplying value of PVal for gene partners will be regarded as confidence. 
#'
#' @param data.f vector. The PVal values for one list of gene partners. PVal values are 
#' specified when creating \code{\link{InterCell}} object. 
#' @param data.b vector. The PVal values for another list of gene partners, which is 
#' one-by-one matched to those in parameter \code{data.f}.
#' @param pval.log.max The number for replacing the infinite number when \code{log(PVal = 0)}.
#'
TgView.formula.onPVal.default <- function(
	data.f, 
	data.b,
	pval.log.max = 300
) {
	# 'pval.log.max' is 300 in default setting.  
	# The use of 300 as default maximum, because e-303 are usual lowest limit when all log(values) are infinite.
	default.max.replace <- pval.log.max
	if (length(data.f) != length(data.b)) {
		stop("Unexpected non-identical length data input!")
	}
	tmp.f <- abs(log10(data.f))
	tmp.b <- abs(log10(data.b))
	inds.tmp.f <- which(is.finite(tmp.f))
	inds.tmp.b <- which(is.finite(tmp.b))
	if (length(inds.tmp.f) == 0 && length(inds.tmp.b) == 0) {
		tmp.f <- rep(default.max.replace, times = length(tmp.f))
		tmp.b <- rep(default.max.replace, times = length(tmp.b))
	} else {
		max.f <- suppressWarnings(max(tmp.f[inds.tmp.f]))  # if length(inds.tmp.f) == 0, max returns -Inf
		max.f	<- ifelse(is.infinite(max.f), default.max.replace, max.f)
		max.b <- suppressWarnings(max(tmp.b[inds.tmp.b]))  # if length(inds.tmp.b) == 0, max returns -Inf
		max.b	<- ifelse(is.infinite(max.b), default.max.replace, max.b)
		tmp.f[which(is.infinite(tmp.f))] <- 10 * max.f
		tmp.b[which(is.infinite(tmp.b))] <- 10 * max.b
	}
	return(tmp.f * tmp.b)
}



# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# InterCell Object definitions
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

#' The Class for Database Storage in InterCellDB
#'
#' The InterCellDBPack class provides a standard storage format of all databases for running analysis. 
#' This object is mostly created along with \code{\link{InterCell-class}}. 
#' Users need to specify the species firstly. Then, user-defined subset of database could be 
#' collected by function \code{\link{SelectDBSubset}}.
#'
#' @slot species InterCellDB only allows 'human' and 'mouse' species.
#' @slot genes.db The gene reference database, which is generated from NCBI gene site. It records all the authorized gene names.
#' @slot pairs.db The database records all gene pairs, which are transferred from protein interaction database - 'STRING'.
#' @slot actions.db The database records all action properties (including 'action.mode', 'action.effect') for gene pairs in slot:`pairs.db`.
#' @slot anno.location.db The database collects all subcellular location from COMPARTMENTS for genes in slot:`pairs.db`.
#' @slot anno.type.db The database collects all molecular function from Uniprot for genes in slot:`pairs.db`.
#' @slot go.ref.db The database collects all GO terms for genes in slot:`pairs.db`.
#' @slot accessory.db A list of accessory database, for example, the mapping list of raw keywords from Uniport. 
#' @slot misc The preserved area for miscellaneous things. Currently no usage.
#'
#' @name InterCellDBPack-class
#' @rdname InterCellDBPack-class
#' @exportClass InterCellDBPack
#'
InterCellDBPack <- setClass(
	Class = "InterCellDBPack",
	slots = c(
		species = "character",
		genes.db = "list", 
		pairs.db = "data.frame",
		actions.db = "data.frame", 
		anno.location.db = "data.frame",
		anno.type.db = "data.frame",
		go.ref.db = "data.frame",
		accessory.db = "list",
		misc = "list"
	)
)



#' The InterCell Class
#'
#' The InterCell object is used to in all the analysis \pkg{InterCellDB} provided.
#' It incoporates the necessary databases, and stores every important intermediate result.
#'
#' @slot fgenes InterCellDB needs the differential expressed genes and their belonging clusters as input. 
#'  The given data need to have columns named 'gene', 'cluster', 'LogFC', 'PVal'. Those each means, 'gene': the 
#'  authorized gene names, 'cluster': cell cluster, 'LogFC': the log transformed fold change of genes, 'PVal': the 
#'  P val for regarding one gene as differentially expressed one. 
#'  The expression level of each gene is also needed when statistical test is required and it should be stored in column named 'Exprs'.
#' @slot database Database stored in \code{InterCellDBPack} object. 
#' @slot formulae The default formulae to be used in analysis. 
#' @slot inter.fullview This is used to store the result of network analysis. 
#' @slot tg.itinfo This stores information about one intercellular communication.
#' @slot tg.action.pairs This is used to store the intermediate result of action properties and gene pairs.  
#' @slot tg.veinfo This is used to store the intermediate result of one intercellular communication (for one interacting 2-cell group).
#' @slot tg.action.comp This is used to store the result of analyzing composition of action mode and action effect in one interacellular communication.
#' @slot tg.spgenes This is used to store the selected gene pairs and all their participating interacitons in one intercellular communication.
#' @slot tool.vars This stores several variables to be used embedded in program. 
#' @slot misc The not important intermediate result will be put in this parameter as well as some 
#' pre-defined settings. 
#'
#' @name InterCell-class
#' @rdname InterCell-class
#' @exportClass InterCell
#'
InterCell <- setClass(
	Class = "InterCell",
	slots = c(
		fgenes = "data.frame", 
		database = "InterCellDBPack", 
		formulae = "list", 
		inter.fullview = "list", 
		tg.itinfo = "list",
		tg.action.pairs = "list", 
		tg.veinfo = "list", 
		tg.action.comp = "list",
		tg.spgenes = "list", 
		#pred.action = "list", pred.action This stores the pre-defined action modes and action effects, and terms beyond definitions in this parameter will be regarded as invalid ones.
		tool.vars = "list", 
		misc = "list" 
	)
)



# validation function for \code{InterCellDBPack-class} 
validInterCellDBPackObject <- function(object) {
	# check species
	allowed.species <- c("human", "mouse")
	if (length(object@species) != 1 || sum(allowed.species %in% object@species) == 0) {
		return(paste("InterCellDB only supports species in human and mouse."))
	}
	TRUE
}
setValidity("InterCellDBPack", validInterCellDBPackObject)

# validation function for \code{InterCell-class}
# only the most un-modifiable slots should be checked, 
# and all checked ones should set its initial value in the first time, or get errors
validInterCellObject <- function(object) {
	# check pred.action
	# if (length(object@pred.action$action.mode) == 0 || !all(object@pred.action$action.mode %in% kpred.action.mode)) {
	# 	return(paste0("Using undefined action mode: ", paste0(setdiff(object@pred.action$action.mode, kpred.action.mode), collapse = ", ")))
	# }
	# if (length(object@pred.action$action.mode) != length(object@pred.action$color.mode)) {
	# 	return(paste0("Using different length of 'action.mode' and its used color 'color.mode'!"))
	# }
	# if (length(object@pred.action$action.effect) == 0 || !all(object@pred.action$action.effect %in% kpred.action.effect)) {
	# 	return(paste0("Using undefined action effect: ", paste0(setdiff(object@pred.action$action.effect, kpred.action.effect), collapse = ", ")))
	# }
	# if (length(object@pred.action$action.effect) != length(object@pred.action$color.effect)) {
	# 	return(paste0("Using different length of 'action.effect' and its used color 'color.effect'!"))
	# }

	# check k***Split
	if (length(object@tool.vars$gene.pair.split) != 1) {
		return(paste0("Invalid symbol for splitting genes in gene pairs!"))
	}
	if (length(object@tool.vars$cluster.split) != 1) {
		return(paste0("Invalid symbol for splitting clusters in cluster groups!"))
	}

	# check musthave colnames in given fgenes
	if (!all(kmusthave.colnames %in% object@misc$musthave.colnames)) {
		return(paste0("Detect manual modification on object internal variables. ", 
			"Program will be failed in unexpected situations. "))
	}

	validInterCellDBPackObject(object@database)
	TRUE
}
setValidity("InterCell", validInterCellObject)



setMethod(
	f = "initialize",
	signature = c("InterCellDBPack"),
	definition = function(.Object, ...) {
		.Object <- methods::callNextMethod(.Object, ...)
		.Object@accessory.db <- list(merge.type.list = Uniprot.key.map.list)
		.Object@misc <- list(TAKEN = "nothing yet")
		methods::validObject(.Object)
		return(.Object)
	}
)

# initialize \code{InterCell-class}
setMethod(
	f = "initialize",
	signature = c("InterCell"),
	definition = function(.Object, ...) {
		.Object <- methods::callNextMethod(.Object, ...)
		.Object@formulae <- list(
			TG.EXPRS = TgView.formula.onExprs.default,
			TG.LOGFC = TgView.formula.onLogFC.default,
			TG.PVAL = TgView.formula.onPVal.default)
		#.Object@pred.action <- list(action.mode = kpred.action.mode, action.effect = kpred.action.effect)
		#.Object@tool.vars <- list(gene.pair.split = "-~-", cluster.split = "~")
		methods::validObject(.Object)
		return(.Object)
	}
)



setMethod(
	f = "show",
	signature = "InterCellDBPack",
	definition = function(object) {
		cat("Using '", object@species, "' database.\n", sep = "")
	}
)

setMethod(
	f = "show",
	signature = "InterCell",
	definition = function(object) {
		cat("A InterCell object, with ", nrow(object@fgenes), " differentially expressed genes spanning ", 
			length(unique(object@fgenes$cluster)), " clusters.\n", sep = "")

		# show the used database
		show(object@database)

		# show if intercellular analysis is processed
		if (!is.null(object@tg.veinfo) && length(object@tg.veinfo) > 0) {
			this.involved.clusters <- getOrigClusterNameTgVEInfo(object)
			cat("Intercellular analysis performed between ", this.involved.clusters$cluster.name.A, " and ", this.involved.clusters$cluster.name.B)
		} else {
			cat("Intercellular analysis not processed yet.")
		}
		cat("\n")
	}
)


# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Class Creation Method
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

#' Create InterCellDBPack Object
#'
#' @description
#' This function is used to create \code{InterCellDBPack-class}.
#'
#' @inheritParams InsideParam.species
#'
#' @return A \code{InterCellDBPack} object.
#'
#' @import methods
#'
#' @export
#'
CreateDBPackObject <- function(
	species
) {
	# Check species, only support human and mouse yet
	species <- CheckSpeciesValidity(species)
	# set initial database
	ret.obj <- NULL
	if (species == "human") {
		ret.obj <- new(
			Class = "InterCellDBPack",
			species = species, 
			genes.db = genes.human.ref.db,
			pairs.db = pairs.human.db,
			actions.db = actions.human.ref.db,
			anno.location.db = anno.location.human.ref.db,
			anno.type.db = anno.type.human.ref.db,
			go.ref.db = go.human.ref.db
		)
	}
	if (species == "mouse") {
		ret.obj <- new(
			Class = "InterCellDBPack",
			species = species, 
			genes.db = genes.mouse.ref.db,
			pairs.db = pairs.mouse.db,
			actions.db = actions.mouse.ref.db,
			anno.location.db = anno.location.mouse.ref.db,
			anno.type.db = anno.type.mouse.ref.db,
			go.ref.db = go.mouse.ref.db
		)
	}

	# get database slimmed to useful data in analyzing only
	genes.cols.reserve <- c("tax_id", "GeneID", "Symbol", "Synonyms", "Symbol_from_nomenclature_authority")
	ret.obj@genes.db$gene.ncbi.db <- ret.obj@genes.db$gene.ncbi.db[, genes.cols.reserve]
	anno.locs.cols.reserve <- c("GeneID", "Gene.name", "GO.ID.target", "GO.Term.target", "Source", "Evidence", "score")
	ret.obj@anno.location.db <- ret.obj@anno.location.db[, anno.locs.cols.reserve]
	anno.type.cols.reserve <- c("GeneID", "Gene.name", "Keyword.ID", "Keyword.Name")
	ret.obj@anno.type.db <- ret.obj@anno.type.db[, anno.type.cols.reserve]

	return(ret.obj)
}



#' Analyze interaction network in full view
#'
#' @description
#' This function analyzes count and power of interaction pairs among all given clusters.
#'
#' @param DEG.table A table on differentially expressed genes and their belonging cell clusters.
#' @inheritParams InsideParam.species
#' @param add.exprs It decides whether to add gene expression information.
#' @param exprs.data It gives the normalized count matrix as expression information.
#' @param force.write.exprs If there is column 'Exprs' in parameter \code{DEG.table}, users should 
#'  decide whether to overwrite the existed one (set TRUE) or not (set FALSE).
#' @param remap.genes This decides whether to use \pkg{InterCellDB} integrated gene database to
#' standardize all genes to get more perfectly matched with protein/gene pairs database. Default is \code{FALSE}.
#' @param cluster.split The letters used to split 2 cell clusters in one interaction. It can also be modified later by 
#' using \code{setClusterSplit} if not decided yet.
#' @param gene.pair.split The letters used to split 2 gene partners in one gene pair. It can also be modified later by 
#' using \code{setGenePairSplit} if not decided yet.
#'
#'
#' @details
#' The parameter \code{DEG.table} is recommended to be generated by \pkg{Seurat}. Other packages are also applicable, if
#' they could handle scRNA-seq data, do cell clustering and do calculation on cluster-specific differentially expressed genes. The input 
#' format of \code{DEG.table} should be one data.frame with 4 required columns that are named 'cluster', 'gene', 'LogFC', 'PVal'.
#' \itemize{
#'   \item{cluster}: the cell cluster.
#'   \item{gene}: differentially expressed genes, which are grouped by their belonging clusters.
#'   \item{LogFC}: the fold change of genes.
#'   \item{PVal}: the P value for gene being calculated as differentially expressed gene.
#' }
#' 
#' Gene expression data can be added when parameter \code{add.exprs} is set TRUE. It will be stored in column 'Exprs' and 
#' only those genes given in parameter \code{DEG.table[, "gene"]} will be perserved.
#'
#' To represent one interaction, like interaction between 'Myeloid cell' and 'T cell', 
#' it will be looked like 'Myeloid cell~T cell' if \code{cluster.split = "~"}.
#' To represent oen gene pair, like gene pair of IL6 and IL6R,
#' it will be looked like 'IL6-~-IL6R' if \code{gene.pair.split = "-~-"}.
#' 
#' To avoid program failure, the letters appearing in gene names are not recommended for \code{gene.pair.split},
#' and the letters appearing in cluster names are not recommended for \code{cluster.split}. 
#' The program will test for those situation but users should keep this in mind.
#'
#' @return A \code{InterCell} object.
#'
#' @importFrom methods new show callNextMethod validObject
#'
#' @export
#'
CreateInterCellObject <- function(
	DEG.table,
	species,
	add.exprs = FALSE,
	exprs.data = NULL,  # could be expression matrix or seurat[["RNA"]]@data, etc
	force.write.exprs = FALSE,
	remap.genes = FALSE,
	cluster.split = "~",
	gene.pair.split = "#~#"
) {
	# pre-check part
	if (!all(kmusthave.colnames %in% colnames(DEG.table))) {
		stop("Required columns are not given!\n",
			paste("Column named ", paste0(setdiff(kmusthave.colnames, colnames(DEG.table)), collapse = ", "), 
			" are not included in given feature genes table.",
			"Please use colnames(<var>)[<index>] <- '<name>' to set proper columns corresponding to those.",
			"Contents represented by <> should be replaced by user definitions!")
		)
	}

	# set database object
	DBPack.obj <- CreateDBPackObject(species)

	# add expression data when needed
	tmp.musthave.new <- kmusthave.colnames
	DEG.align.res <- NA
	if (add.exprs == TRUE) {
		DEG.table <- DataPrep.AddExprs(DEG.table, exprs.data, force.write.exprs)
		tmp.musthave.new <- c(tmp.musthave.new, "Exprs")
		if (remap.genes == TRUE) {
			dummy.gdf <- data.frame(gene = rownames(exprs.data), ID = seq_len(nrow(exprs.data)), cluster = "IT", stringsAsFactors = FALSE)
			DEG.align.res <- DataPrep.RemapClustersMarkers(dummy.gdf, species, final.dup.rm = FALSE)
			colnames(dummy.gdf)[1] <- c("orig.gene")
			dummy.gdf[, "mapped.gene"] <- DEG.align.res$result[order(DEG.align.res$result$ID), "gene"]
			# use this to remap genes in every cluster
			DEG.table$gene <- dummy.gdf[match(DEG.table$gene, dummy.gdf[, "orig.gene"]), "mapped.gene"]
		}
	} else {
		# re-align input table with gene reference database in InterCellDB
		if (remap.genes == TRUE) {
			DEG.align.res <- DataPrep.RemapClustersMarkers(DEG.table, species)
			DEG.table <- DEG.align.res$result
		}
	}

	
	IT.InterCell.Obj <- new(
		Class = "InterCell",
		fgenes = DEG.table,
		database = DBPack.obj,
		#pred.action = list(action.mode = kpred.action.mode, action.effect = kpred.action.effect,
		#	color.mode = kpred.color.mode, color.effect = kpred.color.effect),
		tool.vars = list(gene.pair.split = gene.pair.split, cluster.split = cluster.split), 
		misc = list(musthave.colnames = tmp.musthave.new, if.remap.genes = remap.genes)
	)
	# give the result of alignment in the misc
	IT.InterCell.Obj@misc$input.align.result <- DEG.align.res
	# set object
	IT.InterCell.Obj <- setGenePairSplit(IT.InterCell.Obj, gene.pair.split)
	IT.InterCell.Obj <- setClusterSplit(IT.InterCell.Obj, cluster.split)

	return(IT.InterCell.Obj)
}



# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Accessory Function
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

#' Using Reference Database
#'
#' The \code{setRefDatabase} function is to \bold{set} the reference database for \code{InterCell}.
#'
#' @inheritParams InsideObjectInterCell
#' @param ... Parameters passed to other methods.
#'
#' @rdname RefDatabase-InterCell
#' @export
#'
setGeneric(name = "setRefDatabase", def = function(object, ...) {
	standardGeneric("setRefDatabase")
	}
)

#' @param new.ref.database A new reference database.
#'
#' @examples
#' \dontrun{
#'   setRefDatabase(object, some.new.ref.database)
#' }
#'
#' @rdname RefDatabase-InterCell
#'
setMethod(
	f = "setRefDatabase",
	signature = "InterCell",
	definition = function(object, new.ref.database) {
		if (class(new.ref.database) != "InterCellDBPack") {
			stop("Given new reference database is not in right format.")
		}
		print("Change reference database.")
		object@database <- new.ref.database
		return(object)
	}
)

#' Using Reference Database
#'
#' The \code{getRefDatabase} function is to \bold{get} the reference database for \code{InterCell}.
#'
#' @inheritParams InsideObjectInterCell
#' @param ... Parameters passed to other methods.
#'
#' @rdname RefDatabase-InterCell
#' @export
#'
setGeneric(name = "getRefDatabase", def = function(object, ...) {
	standardGeneric("getRefDatabase")
	}
)

#'
#' @examples
#' \dontrun{
#'   getRefDatabase(object)
#' }
#'
#' @rdname RefDatabase-InterCell
#'
setMethod(
	f = "getRefDatabase",
	signature = "InterCell",
	definition = function(object) {
		if (class(object@database) != "InterCellDBPack") {
			stop("Unexpected error: Reference database is invalid!")
		}
		return(object@database)
	}
)


#' Using FullView Result
#' 
#' The \code{setFullViewResult} function is to \bold{set} the result of network analysis.
#'
#' @inheritParams InsideObjectInterCell
#' @param ... Parameters passed to other methods.
#'
#' @rdname FullViewResult-InterCell
#' @export
#'
setGeneric(name = "setFullViewResult", def = function(object, ...) {
	standardGeneric("setFullViewResult")
	}
)

#' @param new.inter.fullview A new result of network analysis.
#'
#' @examples
#' \dontrun{
#'   setFullViewResult(object, some.new.fullview.result)
#' }
#'
#' @rdname FullViewResult-InterCell
#'
setMethod(
	f = "setFullViewResult",
	signature = "InterCell",
	definition = function(object, new.inter.fullview) {
		if (!is.null(object@inter.fullview) && length(object@inter.fullview) != 0) {
			warning("Overwrite existed result on network analysis. Former downstream result is cleaned.")
		}
		object@inter.fullview <- new.inter.fullview
		object@tg.action.pairs <- list()
		object@tg.action.comp <- list()
		object@tg.itinfo <- list()
		object@tg.veinfo <- list()
		object@tg.spgenes <- list()
		return(object)
	}
)

#' Using FullView Result
#'
#' The \code{getFullViewResult} function is to \bold{get} the result of network analysis.
#'
#' @inheritParams InsideObjectInterCell
#' @param ... Parameters passed to other methods.
#'
#' @rdname FullViewResult-InterCell
#' @export
#'
setGeneric(name = "getFullViewResult", def = function(object, ...) {
	standardGeneric("getFullViewResult")
	}
)

#' @examples
#' \dontrun{
#'   getFullViewResult(object)
#' }
#' @rdname FullViewResult-InterCell
#'
setMethod(
	f = "getFullViewResult",
	signature = "InterCell",
	definition = function(object) {
		if (is.null(object@inter.fullview) || length(object@inter.fullview) == 0) {
			stop("Network analysis is not performed yet. Please use function `AnalyzeInterInFullView` to generate it.")
		}
		return(object@inter.fullview)
	}
)

#' Using Target Plain Information
#' 
#' The \code{setTgItInfo} function is to \bold{set} plain information about gene pairs,
#' which are directly fetched from full view analysis.
#'
#' @inheritParams InsideObjectInterCell
#' @param ... Parameters passed to other methods.
#'
#' @rdname TgItInfo-InterCell
#' @export
#'
setGeneric(name = "setTgItInfo", def = function(object, ...) {
	standardGeneric("setTgItInfo")
	}
)

#' @param new.itinfo A new plain information on gene pairs, which are directly fetched from full view analysis.
#'
#' @examples
#' \dontrun{
#'   setTgItInfo(object, some.new.itinfo)
#' }
#'
#' @rdname TgItInfo-InterCell
#'
setMethod(
	f = "setTgItInfo",
	signature = "InterCell",
	definition = function(object, new.itinfo) {
		object@tg.itinfo <- new.itinfo
		return(object)
	}
)

#' Using Target Plain Information
#' 
#' The \code{getTgItInfo} function is to \bold{get} plain information about gene pairs,
#' which are directly fetched from full view analysis.
#'
#' @inheritParams InsideObjectInterCell
#' @param ... Parameters passed to other methods.
#'
#' @rdname TgItInfo-InterCell
#' @export
#'
setGeneric(name = "getTgItInfo", def = function(object, ...) {
	standardGeneric("getTgItInfo")
	}
)

#' @examples
#' \dontrun{
#'   getTgItInfo(object)
#' }
#' @rdname TgItInfo-InterCell
#'
setMethod(
	f = "getTgItInfo",
	signature = "InterCell",
	definition = function(object) {
		if (is.null(object@tg.itinfo) || length(object@tg.itinfo) == 0) {
			stop("No plain information between specific 2 cells is fetched. Please use `FetchInterOI` to generate that. ")
		}
		return(object@tg.itinfo)
	}
)

#' Using TargetView Action Pairs
#' 
#' The \code{setTgActionPairs} function is to \bold{set} the result of intercellular analysis 
#' on action properties for one interaction between 2 cell clusters.
#'
#' @inheritParams InsideObjectInterCell
#' @param ... Parameters passed to other methods.
#'
#' @rdname TgActionPairs-InterCell
#' @export
#'
setGeneric(name = "setTgActionPairs", def = function(object, ...) {
	standardGeneric("setTgActionPairs")
	}
)

#' @param new.action.pairs A new result of intercellular analysis on action properties
#'
#' @examples
#' \dontrun{
#'   setTgActionPairs(object, some.new.tg.action.pairs)
#' }
#'
#' @rdname FullViewResult-InterCell
#'
setMethod(
	f = "setTgActionPairs",
	signature = "InterCell",
	definition = function(object, new.action.pairs) {
		object@tg.action.pairs <- new.action.pairs
		return(object)
	}
)

#' Using TargetView Action Pairs
#' 
#' The \code{getTgActionPairs} function is to \bold{get} the result of intercellular analysis 
#' on action properties for one interaction between 2 cell clusters.
#'
#' @inheritParams InsideObjectInterCell
#' @param ... Parameters passed to other methods.
#'
#' @rdname TgActionPairs-InterCell
#' @export
#'
setGeneric(name = "getTgActionPairs", def = function(object, ...) {
	standardGeneric("getTgActionPairs")
	}
)

#' @examples
#' \dontrun{
#'   getTgActionPairs(object)
#' }
#' @rdname TgActionPairs-InterCell
#'
setMethod(
	f = "getTgActionPairs",
	signature = "InterCell",
	definition = function(object) {
		if (is.null(object@tg.action.pairs) || length(object@tg.action.pairs) == 0) {
			stop("The action pairs for selected interaction is not generated. Please use `FetchInterOI` to generate that. ")
		}
		return(object@tg.action.pairs)
	}
)

#' Using TargetView VEinfo
#' 
#' The \code{setTgVEInfo} function is to \bold{set} the result of intercellular analysis
#' on detailed gene pairs (forming the interaction network) for one interaction between 2 cell clusters.
#'
#' @inheritParams InsideObjectInterCell
#' @param ... Parameters passed to other methods.
#'
#' @rdname TgVEInfo-InterCell
#' @export
#'
setGeneric(name = "setTgVEInfo", def = function(object, ...) {
	standardGeneric("setTgVEInfo")
	}
)

#' @param new.veinfo A new result of intercellular analysis on detailed gene pairs.
#'
#' @examples
#' \dontrun{
#'   setTgVEInfo(object, some.new.veinfo)
#' }
#'
#' @rdname TgVEInfo-InterCell
#'
setMethod(
	f = "setTgVEInfo",
	signature = "InterCell",
	definition = function(object, new.veinfo) {
		object@tg.veinfo <- new.veinfo
		return(object)
	}
)

#' Using TargetView VEinfo
#'
#' The \code{getTgVEInfo} function is to \bold{get} the result of intercellular analysis
#' on detailed gene pairs (forming the interaction network) for one interaction between 2 cell clusters.
#'
#' @inheritParams InsideObjectInterCell
#' @param ... Parameters passed to other methods.
#'
#' @rdname TgVEInfo-InterCell
#' @export
#'
setGeneric(name = "getTgVEInfo", def = function(object, ...) {
	standardGeneric("getTgVEInfo")
	}
)

#' @examples
#' \dontrun{
#'   getTgVEInfo(object)
#' }
#' @rdname TgVEInfo-InterCell
#'
setMethod(
	f = "getTgVEInfo",
	signature = "InterCell",
	definition = function(object) {
		if (is.null(object@tg.veinfo) || length(object@tg.veinfo) == 0) {
			stop("No interaction between specific 2 cells is fetched. Please use `FetchInterOI` to generate that. ")
		}
		return(object@tg.veinfo)
	}
)


#' Using TargetView Action Composition
#' 
#' The \code{setTgActionComp} function is to \bold{set} the result of intercellular analysis
#' on composition of action mode and action effect for one interaction between 2 cell clusters.
#'
#' @inheritParams InsideObjectInterCell
#' @param ... Parameters passed to other methods.
#'
#' @rdname TgActionComp-InterCell
#' @export
#'
setGeneric(name = "setTgActionComp", def = function(object, ...) {
	standardGeneric("setTgActionComp")
	}
)

#' @param new.action.comp A new result of intercellular analysis on composition of action mode and action effect.
#'
#' @examples
#' \dontrun{
#'   setTgActionComp(object, some.new.action.comp)
#' }
#'
#' @rdname TgActionComp-InterCell
#'
setMethod(
	f = "setTgActionComp",
	signature = "InterCell",
	definition = function(object, new.action.comp) {
		object@tg.action.comp <- new.action.comp
		return(object)
	}
)

#' Using TargetView Action Composition
#'
#' The \code{getTgActionComp} function is to \bold{get} the result of intercellular analysis
#' on composition of action mode and action effect for one interaction between 2 cell clusters.
#'
#' @inheritParams InsideObjectInterCell
#' @param ... Parameters passed to other methods.
#'
#' @rdname TgActionComp-InterCell
#' @export
#'
setGeneric(name = "getTgActionComp", def = function(object, ...) {
	standardGeneric("getTgActionComp")
	}
)

#' @examples
#' \dontrun{
#'   getTgActionComp(object)
#' }
#' @rdname TgActionComp-InterCell
#'
setMethod(
	f = "getTgActionComp",
	signature = "InterCell",
	definition = function(object) {
		if (is.null(object@tg.action.comp) || length(object@tg.action.comp) == 0) {
			stop("The analysis of composition of action properties is not performed. Please use `AnalyzeInterInAction` to generate that. ")
		}
		return(object@tg.action.comp)
	}
)


#' Using TargetView Pair Specificity
#' 
#' The \code{setTgSpGenes} function is to \bold{set} the result of intercellular analysis
#' on gene pair specificity for one interaction between 2 cell clusters.
#'
#' @inheritParams InsideObjectInterCell
#' @param ... Parameters passed to other methods.
#'
#' @rdname TgSpGenes-InterCell
#' @export
#'
setGeneric(name = "setTgSpGenes", def = function(object, ...) {
	standardGeneric("setTgSpGenes")
	}
)

#' @param new.spgenes A new result of intercellular analysis on gene pair specificity.
#'
#' @examples
#' \dontrun{
#'   setTgSpGenes(object, some.new.spgenes)
#' }
#'
#' @rdname TgSpGenes-InterCell
#'
setMethod(
	f = "setTgSpGenes",
	signature = "InterCell",
	definition = function(object, new.spgenes) {
		object@tg.spgenes <- new.spgenes
		return(object)
	}
)

#' Using TargetView Pair Specificity
#'
#' The \code{getTgSpGenes} function is to \bold{get} the result of intercellular analysis
#' on gene pair specificity for one interaction between 2 cell clusters.
#'
#' @inheritParams InsideObjectInterCell
#' @param ... Parameters passed to other methods.
#'
#' @rdname TgSpGenes-InterCell
#' @export
#'
setGeneric(name = "getTgSpGenes", def = function(object, ...) {
	standardGeneric("getTgSpGenes")
	}
)

#' @examples
#' \dontrun{
#'   getTgSpGenes(object)
#' }
#' @rdname TgSpGenes-InterCell
#'
setMethod(
	f = "getTgSpGenes",
	signature = "InterCell",
	definition = function(object) {
		if (is.null(object@tg.spgenes) || length(object@tg.spgenes) == 0) {
			stop("Analysis of gene pair specificity is not performed. Please use `AnalyzeInterSpecificity` to generate that. ")
		}
		return(object@tg.spgenes)
	}
)


#' Using Gene Pair Split
#' 
#' The \code{setGenePairSplit} function is to \bold{set} the gene pair split, 
#' which is either one charater or string, and used to split 2 gene partners in one gene pair. 
#'
#' @inheritParams InsideObjectInterCell
#' @param ... Parameters passed to other methods.
#'
#' @rdname GenePairSplit-InterCell
#' @export
#'
setGeneric(name = "setGenePairSplit", def = function(object, ...) {
		standardGeneric("setGenePairSplit")
	}
)

#' @param new.gene.pair.split A new gene pair split, which is either one character or string.
#'
#' @examples
#' \dontrun{
#'   setGenePairSplit(object, some.new.gene.pair.split)
#' }
#'
#' @rdname GenePairSplit-InterCell
#'
setMethod(
	f = "setGenePairSplit", 
	signature = "InterCell",
	definition = function(object, new.gene.pair.split) {
		if (length(new.gene.pair.split) != 1) {
			stop("Only one character or string is allowed, like '~' or '-~-'.")
		}
		# check if symbol are in fgenes
		use.genes <- unique(object@fgenes$gene)
		check.split.res <- strsplit(use.genes, split = new.gene.pair.split, fixed = TRUE)
		if (length(unlist(check.split.res)) > length(use.genes)) {
			stop("Used gene pair split symbol exists in gene names given in processing data (fgenes), and will be discarded.")
		}
		object@tool.vars$gene.pair.split <- new.gene.pair.split
		return(object)
	}
)


#' Using Gene Pair Split
#' 
#' The \code{getGenePairSplit} function is to \bold{get} the gene pair split, 
#' which is either one charater or string, and used to split 2 gene partners in one gene pair. 
#'
#' @inheritParams InsideObjectInterCell
#' @param ... Parameters passed to other methods.
#'
#' @rdname GenePairSplit-InterCell
#' @export
#'
setGeneric(name = "getGenePairSplit", def = function(object) {
		standardGeneric("getGenePairSplit")
	}
)

#' @examples
#' \dontrun{
#'   getGenePairSplit(object)
#' }
#' @rdname GenePairSplit-InterCell
#'
setMethod(
	f = "getGenePairSplit", 
	signature = "InterCell",
	definition = function(object) {
		return(object@tool.vars$gene.pair.split)
	}
)

#' Using Cluster Group Split
#' 
#' The \code{setClusterSplit} function is to \bold{set} the cluster group split, 
#' which is either one charater or string, and used to split 2 cell clusters in one interaction.
#'
#' @inheritParams InsideObjectInterCell
#' @param ... Parameters passed to other methods.
#'
#' @rdname ClusterSplit-InterCell
#' @export
#'
setGeneric(name = "setClusterSplit", def = function(object, ...) {
		standardGeneric("setClusterSplit")
	}
)

#' @param new.cluster.split A new cluster group split, which is either one character or string.
#'
#' @examples
#' \dontrun{
#'   setClusterSplit(object, some.new.cluster.split)
#' }
#'
#' @rdname ClusterSplit-InterCell
#'
setMethod(
	f = "setClusterSplit", 
	signature = "InterCell",
	definition = function(object, new.cluster.split) {
		if (length(new.cluster.split) != 1) {
			stop("Only one character or string is allowed, like '~' or '~~'.")
		}
		# check if symbol are in cluster names
		use.clusters <- unique(as.character(object@fgenes$cluster))
		check.split.res <- strsplit(use.clusters, split = new.cluster.split, fixed = TRUE)
		if (length(unlist(check.split.res)) > length(use.clusters)) {
			stop("Used cluster group split symbol exists in cluster name given in processing data (fgenes), and will be discarded.")
		}
		object@tool.vars$cluster.split <- new.cluster.split
		return(object)
	}
)

#' Using Cluster Group Split
#'
#' The \code{getClusterSplit} function is to \bold{set} the cluster group split, 
#' which is either one charater or string, and used to split 2 cell clusters in one interaction.
#'
#' @inheritParams InsideObjectInterCell
#' @param ... Parameters passed to other methods.
#'
#' @rdname ClusterSplit-InterCell
#' @export
#'
setGeneric(name = "getClusterSplit", def = function(object) {
		standardGeneric("getClusterSplit")
	}
)

#' @examples
#' \dontrun{
#'   getClusterSplit(object)
#' }
#' @rdname ClusterSplit-InterCell
#'
setMethod(
	f = "getClusterSplit", 
	signature = "InterCell",
	definition = function(object) {
		return(object@tool.vars$cluster.split)
	}
)



# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Database Selection Functions
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


#' Select Subset of Gene Pair Database
#'
#' This function is to select subset of gene pair databases, and options on evidence sources, 
#' confidence level, action properties (mode and effect) will be used. The result from different evidence sources
#' will be \bold{union} in default settings. The result from action properties could be either intersection result or union.
#' 
#' @param object A \code{InterCellDBPack} or \code{InterCell} object. \code{SelectDBSubset.default} uses
#'  \code{InterCellDBPack} as input.
#' @param combined.score.range The combined score from all evidence sources, which works on the whole database.
#' @param use.exp It adds the data whose evidence is experimentally validatd. 
#' @param exp.score.range It controls the score range when selecting subset by experimentally validatd evidence, i.e. when \code{use.exp = TRUE}. 
#'  The score should be 2 numbers within 1~1000.
#' @param use.know It adds the data whose evidence is pathway curated. 
#' @param know.score.range It controls the score range when selecting subset by pathway curated evidence, i.e. when \code{use.know = TRUE}. 
#'  The score should be 2 numbers within 1~1000.
#' @param use.pred It adds the data whose evidence is predicted.
#' @param pred.score.range It controls the score range when selecting subset by predicted evidence, i.e. when \code{use.pred = TRUE}. 
#'  The score should be 2 numbers within 1~1000.
#' @param sel.physical It selects the subset of gene pairs whose corresponding protein pairs are physical associated. This parameter is 
#'  identical to set \code{sel.action.mode = "binding"} for now due to the database limitation. It may change in future. 
#' @param sel.action.mode Selection by action mode. "ALL" means not use this to select subset.
#'  Other options will be directly select gene pair in that action mode. Supported options are listed in \code{kpred.action.mode}.
#' @param sel.action.effect Selection by action effect. "ALL" means not use this to select subset. Other 
#'  options will be directly select gene pair in that action effect. Supported options are listed in \code{kpred.action.effect}.
#' @param sel.action.merge.option Either 'intersect' or 'union'. The option for merging the result from selection on action mode and action effect.
#' @param action.score.range It controls the score range when selecting subset from action databases. 
#'  The score should be 2 numbers within 1~1000.
#' @param slim.along.with.pairs This decides whether to select the corresponding subset of action pair database 
#'  after selecting subset of gene pair database.
#'
#' @details
#' The 3 evidence channels (\code{use.exp}, \code{use.know}, \code{use.pred}) are in identical priority. 
#' For one gene pair, it could have scores from 3 evidence channels at the same time. In default setting, the 
#' result of this function will be \bold{union} of results from 3 channels. If union is not satisfied, the function 
#' \code{\link{MergeDBSubset}} will help.
#'
#' The score is within 1~1000. Score (>=700) would be consider as high confidence, and (>=900) is even higher confident.
#' Score (>=400) would be consider over medium confidence.
#' Score (<400) would be consider low confidence.
#'
#' @return A \code{InterCellDBPack} or \code{InterCell} object, which is the same as given parameter \code{object}.
#'
#' @rdname SelectDBSubset
#' @order 4
#'
#'
SelectDBSubset.default <- function(
	object,
	combined.score.range = c(1, 1000),
	use.exp = TRUE,
	exp.score.range = c(1, 1000),
	use.know = TRUE,
	know.score.range = c(1, 1000),
	use.pred = TRUE,
	pred.score.range = c(1, 1000),
	sel.physical = FALSE,
	sel.action.mode = "ALL",
	sel.action.effect = "ALL",
	sel.action.merge.option = "intersect",
	action.score.range = c(1, 1000),
	slim.along.with.pairs = TRUE
) {
	# score standardize
	inside.score.stdfunc <- function(score.range, score.name) {
		score.range <- as.numeric(score.range)
		if (length(score.range) != 2) {
			stop(paste0("Given `", score.name, "` not in right format, should be like c(1, 1000)."))
		}
		if (score.range[1] < 1) {
			score.range[1] <- 1
		}
		if (score.range[2] > 1000) {
			score.range[2] <- 1000
		}
		return(score.range)
	}

	# check score range
	combined.score.range <- inside.score.stdfunc(combined.score.range, "combined.score.range")
	exp.score.range <- inside.score.stdfunc(exp.score.range, "exp.score.range")
	know.score.range <- inside.score.stdfunc(know.score.range, "know.score.range")
	pred.score.range <- inside.score.stdfunc(pred.score.range, "pred.score.range")
	action.score.range <- inside.score.stdfunc(action.score.range, "action.score.range")

	# check action options
	allowed.action.options <- c("intersect", "union")
	sel.action.merge.option <- CheckParamStd(sel.action.merge.option, allowed.action.options, opt.name = "`sel.action.merge.option`", stop.on.zero = TRUE)

	# check action 
	if (sel.action.mode[1] != "ALL") {
		not.valid.action.mode <- setdiff(sel.action.mode, kpred.action.mode)
		if (length(not.valid.action.mode) > 0) {
			warning("Given undefined action mode: ", paste0(not.valid.action.mode, collapse = ", ", ". "))
		}
		sel.action.mode <- intersect(sel.action.mode, kpred.action.mode)
		if (length(sel.action.mode) == 0) {
			stop("No valid action mode is selected!")
		}
	}
	if (sel.action.effect[1] != "ALL") {
		not.valid.action.effect <- setdiff(sel.action.effect, kpred.action.effect)
		if (length(not.valid.action.effect) > 0) {
			warning("Given undefined aciton effect: ", paste0(not.valid.action.effect, collapse = ", "), ". ")
		}
		sel.action.effect <- intersect(sel.action.effect, kpred.action.effect)
		if (length(sel.action.effect) == 0) {
			stop("No valid action effect is selected!")
		}
	}

	this.pairs.db <- object@pairs.db
	this.actions.db <- object@actions.db
	# top-range subset
	this.pairs.db <- subset(this.pairs.db, inter.Combined.Score >= combined.score.range[1] & inter.Combined.Score <= combined.score.range[2])
	this.actions.db <- subset(this.actions.db, score >= action.score.range[1] & score <= action.score.range[2])
	if (nrow(this.pairs.db) == 0 || nrow(this.actions.db) == 0) {
		stop("Select too small range of score, and get no valid gene pairs!")
	}

	# select from scores
	retDB.list <- list()
	# get exp part
	if (use.exp == TRUE) {
		retDB.list <- c(retDB.list, list(intersect(which(this.pairs.db$inter.Experiments.Score >= exp.score.range[1]),
			which(this.pairs.db$inter.Experiments.Score <= exp.score.range[2]))))
	}
	if (use.know == TRUE) {
		retDB.list <- c(retDB.list, list(intersect(which(this.pairs.db$inter.Database.Score >= know.score.range[1]),
			which(this.pairs.db$inter.Database.Score <= know.score.range[2]))))
	}
	if (use.pred == TRUE) {
		retDB.list <- c(retDB.list, list(intersect(which(this.pairs.db$inter.Predicted.Score >= pred.score.range[1]),
			which(this.pairs.db$inter.Predicted.Score <= pred.score.range[2]))))
	}
	# collect result from score selection
	this.pairs.db <- this.pairs.db[Reduce(union, retDB.list), ]
	#this.pairs.db <- DoPartUnique(this.pairs.db, match(c("inter.GeneName.A", "inter.GeneName.B"), colnames(this.pairs.db)))

	## select from actions
	use.action.pairs.list <- list()
	use.cut.symbol <- "->-"
	# in mode
	if (sel.physical == TRUE) {  # currently it's the same to set sel.action.mode => 'binding'
		if (sel.action.mode[1] == "ALL") {
			sel.action.mode <- "binding"
		} else {
			sel.action.mode <- unique(c(sel.action.mode, "binding"))
		}
	}
	if (sel.action.mode[1] != "ALL") {
		tmp.sel.mode.pairs.df <- FastAlignPairs(this.actions.db[which(this.actions.db$mode %in% sel.action.mode), 
			c("inter.GeneID.A", "inter.GeneID.B", "inter.GeneName.A", "inter.GeneName.B")], 4)
		tmp.sel.mode.pairs <- paste(tmp.sel.mode.pairs.df[, "inter.GeneID.A"], tmp.sel.mode.pairs.df[, "inter.GeneID.B"], sep = use.cut.symbol)
		use.action.pairs.list <- c(use.action.pairs.list, list(action.mode = tmp.sel.mode.pairs))
	}
	# in effect
	if (sel.action.effect[1] != "ALL") {
		# action
		tmp.col.action <- tapply(seq_len(nrow(this.actions.db)), this.actions.db$action, function(x) { x })
		# set list name right to be "non", "activation", "inhibition"
		ind.non.action <- setdiff(seq_along(tmp.col.action), match(c("activation", "inhibition"), names(tmp.col.action)))
		if (length(ind.non.action) != 1) {
			stop("Database Failed by incorporating undefined action mode.")
		}
		names(tmp.col.action)[ind.non.action] <- "non"
		# direction
		tmp.col.direction <- tapply(seq_len(nrow(this.actions.db)), this.actions.db$is_directional, function(x) { x })
		#tmp.col.a.act <- tapply(seq_len(nrow(this.actions.db)), this.actions.db$a_is_acting, function(x) { x })
		
		tmp.ret.inds <- integer()
		if ("positive" %in% sel.action.effect) {
			tmp.ret.inds <- c(tmp.ret.inds, Reduce(intersect, list(tmp.col.action[["activation"]], tmp.col.direction[["t"]])))
		}
		if ("negative" %in% sel.action.effect) {
			tmp.ret.inds <- c(tmp.ret.inds, Reduce(intersect, list(tmp.col.action[["inhibition"]], tmp.col.direction[["t"]])))	
		}
		if ("unspecified" %in% sel.action.effect) {
			tmp.ret.inds <- c(tmp.ret.inds, Reduce(intersect, list(tmp.col.action[["non"]], tmp.col.direction[["t"]])))
		}
		if ("undirected" %in% sel.action.effect) {
			tmp.ret.inds <- c(tmp.ret.inds, Reduce(intersect, list(tmp.col.action[["non"]], tmp.col.direction[["f"]])))	
		}
		tmp.sel.effect.pairs.df <- FastAlignPairs(this.actions.db[tmp.ret.inds, 
			c("inter.GeneID.A", "inter.GeneID.B", "inter.GeneName.A", "inter.GeneName.B")], 4)
		tmp.sel.effect.pairs <- paste(tmp.sel.effect.pairs.df[, "inter.GeneID.A"], tmp.sel.effect.pairs.df[, "inter.GeneID.B"], sep = use.cut.symbol)
		use.action.pairs.list <- c(use.action.pairs.list, list(action.effect = tmp.sel.effect.pairs))
	}
	# merge result
	use.action.pairs <- character()
	if (length(use.action.pairs.list) == 2 && all(c("action.mode", "action.effect") %in% names(use.action.pairs.list))) {
		use.action.pairs <- switch(sel.action.merge.option,
			"intersect" = {
				intersect(use.action.pairs.list[["action.mode"]], use.action.pairs.list[["action.effect"]])
			},
			"union" = {
				union(use.action.pairs.list[["action.mode"]], use.action.pairs.list[["action.effect"]])
			},
			stop("Undefined result merging option!")
		)
	} else {
		use.action.pairs <- as.character(unlist(use.action.pairs.list))
	}
	use.action.pairs <- unique(use.action.pairs)

	## check if selection is applied on actions.db, IF DO, slim the actions.db
	use.action.to.limit.pairs <- character()
	if (length(use.action.pairs) > 0 || !isTRUE(all.equal(action.score.range, c(1, 1000)))) {
		# TO apply selected pairs from actions.db to limit pairs.db
		if (!isTRUE(all.equal(action.score.range, c(1, 1000)))) {
			tmp.actions.add.df <- FastAlignPairs(this.actions.db[, c("inter.GeneID.A", "inter.GeneID.B", "inter.GeneName.A", "inter.GeneName.B")], 4)
			tmp.add.pairs <- paste(tmp.actions.add.df[, "inter.GeneID.A"], tmp.actions.add.df[, "inter.GeneID.B"], sep = use.cut.symbol)
			if (length(use.action.pairs) > 0) {
				use.action.to.limit.pairs <- intersect(tmp.add.pairs, use.action.pairs)
			} else {
				use.action.to.limit.pairs <- tmp.add.pairs
			}
		} else {
			use.action.to.limit.pairs <- use.action.pairs
		}

		# collect actions result from actions selection
		if (length(use.action.to.limit.pairs) > 0) {
			# as actions.db slightly different, get conv and rev use.action.to.limit.pairs
			tmp.use.action.to.limit.pairs.rev <- unique(unlist(lapply(strsplit(use.action.to.limit.pairs, split = use.cut.symbol), t.cut = use.cut.symbol, function(x, t.cut) {
				paste(rev(x), collapse = t.cut)
			})))
			tmp.use.action.to.limit.pairs.rev <- c(tmp.use.action.to.limit.pairs.rev, use.action.to.limit.pairs)
			tmp.cur.act.pairs <- paste(this.actions.db[, "inter.GeneID.A"], this.actions.db[, "inter.GeneID.B"], sep = use.cut.symbol)
			this.actions.db <- this.actions.db[which(tmp.cur.act.pairs %in% tmp.use.action.to.limit.pairs.rev), ]
			object@actions.db <- this.actions.db
		}
	}

	# collect pairs result from actions selection
	if (length(use.action.to.limit.pairs) > 0) {
		this.pairs.db <- FastAlignPairs(this.pairs.db, 4)
		tmp.cur.pairs <- paste(this.pairs.db[, "inter.GeneID.A"], this.pairs.db[, "inter.GeneID.B"], sep = use.cut.symbol)
		this.pairs.db <- this.pairs.db[which(tmp.cur.pairs %in% use.action.to.limit.pairs), ]
	}

	## get pairs settled, and slim other accessory database
	object@pairs.db <- this.pairs.db
	if (slim.along.with.pairs == TRUE) {
		use.cut.symbol.slim <- "->-"
		# add direct way ones (conv)
		tmp.sel.pairs <- paste(object@pairs.db[, "inter.GeneID.A"], object@pairs.db[, "inter.GeneID.B"], sep = use.cut.symbol.slim)
		# add reverse direction ones (rev)
		tmp.sel.pairs.conv <- c(tmp.sel.pairs, paste(object@pairs.db[, "inter.GeneID.B"], object@pairs.db[, "inter.GeneID.A"], sep = use.cut.symbol.slim))
		# slim actions
		tmp.sel.actions.p <- paste(object@actions.db[, "inter.GeneID.A"], object@actions.db[, "inter.GeneID.B"], sep = use.cut.symbol.slim)
		object@actions.db <- object@actions.db[which(tmp.sel.actions.p %in% tmp.sel.pairs.conv), ]
	}

	return(object)
}

#' @param ... Parameters passed to function \code{SelectDBSubset.default}.
#'
#' @rdname SelectDBSubset
#' @order 1
#' @export
#'
setGeneric(name = "SelectDBSubset", def = function(object, ...) {
	standardGeneric("SelectDBSubset")
	}
)

#' @rdname SelectDBSubset
#' @order 2
#' @export
#'
setMethod(
	f = "SelectDBSubset",
	signature = "InterCellDBPack",
	definition = function(object, ...) {
		SelectDBSubset.default(object, ...)
	}
)

#' @rdname SelectDBSubset
#' @order 3
#' @export
#'
setMethod(
	f = "SelectDBSubset",
	signature = "InterCell",
	definition = function(object, ...) {
		setRefDatabase(object, SelectDBSubset.default(object@database, ...))
	}
)


#' Merge Database
#'
#' This function is to merge 2 databases to get user-desired database.
#'
#' @param db.object.1 A \code{InterCellDBPack} object to be merged.
#' @param db.object.2 Another \code{InterCellDBPack} object to be merged.
#' @param merge.option The supported options are 'intersect' and 'union'.
#'
#' @return A \code{InterCellDBPack} object.
#'
#' @export
#'
MergeDBSubset <- function(
	db.object.1,
	db.object.2,
	merge.option = "intersect"
) {
	if (class(db.object.1) != "InterCellDBPack" || class(db.object.2) != "InterCellDBPack") {
		ret.mg <- character()
		if (class(db.object.1) != "InterCellDBPack") ret.mg <- c(ret.mg, "db.object.1")
		if (class(db.object.2) != "InterCellDBPack") ret.mg <- c(ret.mg, "db.object.2")
		stop("Given database subset in parameter ", paste0(ret.mg, collapse = ", ") , " not in `InterCellDBPack-class`. ",
			 "Please use either `CreateDBPackObject` or `SelectDBSubset` to generate.")
	}

	if (length(merge.option) > 1) {
		merge.option <- merge.option[1]
	}
	merge.option <- CheckParamStd(merge.option, c("intersect", "union"), "parameter `merge.option`", stop.on.zero = TRUE)

	# before merging, check species
	if (!identical(db.object.1@species, db.object.2@species)) {
		stop("Merging database subset from different species ", paste(db.object.1@species, db.object.2@species, sep = ", "), ". ")
	} else {
		if (merge.option == "union") {
			# genes.db 
			# $gene.ncbi.db
			addon.ugene.ncbi.db <- setdiff(rownames(db.object.2@genes.db$gene.ncbi.db), rownames(db.object.1@genes.db$gene.ncbi.db))
			db.object.1@genes.db$gene.ncbi.db <- rbind(db.object.1@genes.db$gene.ncbi.db, db.object.2@genes.db$gene.ncbi.db[which(rownames(db.object.2@genes.db$gene.ncbi.db) %in% addon.ugene.ncbi.db), ])
			# $gene.synonyms.db
			addon.ugene.synonyms.db <- setdiff(rownames(db.object.2@genes.db$gene.synonyms.db), rownames(db.object.1@genes.db$gene.synonyms.db))
			db.object.1@genes.db$gene.synonyms.db <- rbind(db.object.1@genes.db$gene.synonyms.db, db.object.2@genes.db$gene.synonyms.db[which(rownames(db.object.2@genes.db$gene.synonyms.db) %in% addon.ugene.synonyms.db), ])
			# $gene.dup.synonyms.db
			db.object.1@genes.db$gene.dup.synonyms.db <- unique(rbind(db.object.1@genes.db$gene.dup.synonyms.db, db.object.2@genes.db$gene.dup.synonyms.db))

			# pairs.db
			addon.pairs.db <- setdiff(rownames(db.object.2@pairs.db), rownames(db.object.1@pairs.db))
			db.object.1@pairs.db <- rbind(db.object.1@pairs.db, db.object.2@pairs.db[which(rownames(db.object.2@pairs.db) %in% addon.pairs.db), ])
			# actions.db
			addon.actions.db <- setdiff(rownames(db.object.2@actions.db), rownames(db.object.1@actions.db))
			db.object.1@actions.db <- rbind(db.object.1@actions.db, db.object.2@actions.db[which(rownames(db.object.2@actions.db) %in% addon.actions.db), ])
			# anno.location.db
			addon.anno.location.db <- setdiff(rownames(db.object.2@anno.location.db), rownames(db.object.1@anno.location.db))
			db.object.1@anno.location.db <- rbind(db.object.1@anno.location.db, db.object.2@anno.location.db[which(rownames(db.object.2@anno.location.db) %in% addon.anno.location.db), ])
			# anno.type.db 
			addon.anno.type.db <- setdiff(rownames(db.object.2@anno.type.db), rownames(db.object.1@anno.type.db))
			db.object.1@anno.type.db <- rbind(db.object.1@anno.type.db, db.object.2@anno.type.db[which(rownames(db.object.2@anno.type.db) %in% addon.anno.type.db), ])
			# go.ref.db 
			addon.go.ref.db <- setdiff(rownames(db.object.2@go.ref.db), rownames(db.object.1@go.ref.db))
			db.object.1@go.ref.db <- rbind(db.object.1@go.ref.db, db.object.2@go.ref.db[which(rownames(db.object.2@go.ref.db) %in% addon.go.ref.db), ])
		}
		if (merge.option == "intersect") {
			# genes.db 
			# $gene.ncbi.db
			addon.ugene.ncbi.db <- intersect(rownames(db.object.2@genes.db$gene.ncbi.db), rownames(db.object.1@genes.db$gene.ncbi.db))
			db.object.1@genes.db$gene.ncbi.db <- db.object.2@genes.db$gene.ncbi.db[which(rownames(db.object.2@genes.db$gene.ncbi.db) %in% addon.ugene.ncbi.db), ]
			# $gene.synonyms.db
			addon.ugene.synonyms.db <- intersect(rownames(db.object.2@genes.db$gene.synonyms.db), rownames(db.object.1@genes.db$gene.synonyms.db))
			db.object.1@genes.db$gene.synonyms.db <- db.object.2@genes.db$gene.synonyms.db[which(rownames(db.object.2@genes.db$gene.synonyms.db) %in% addon.ugene.synonyms.db), ]
			# $gene.dup.synonyms.db
			addon.ugene.dup.synonyms.db <- intersect(rownames(db.object.2@genes.db$gene.dup.synonyms.db), rownames(db.object.1@genes.db$gene.dup.synonyms.db))
			db.object.1@genes.db$gene.dup.synonyms.db <- db.object.2@genes.db$gene.dup.synonyms.db[which(rownames(db.object.2@genes.db$gene.dup.synonyms.db) %in% addon.ugene.dup.synonyms.db), ]
	
			# pairs.db
			addon.pairs.db <- intersect(rownames(db.object.2@pairs.db), rownames(db.object.1@pairs.db))
			db.object.1@pairs.db <- db.object.2@pairs.db[which(rownames(db.object.2@pairs.db) %in% addon.pairs.db), ]
			# actions.db
			addon.actions.db <- intersect(rownames(db.object.2@actions.db), rownames(db.object.1@actions.db))
			db.object.1@actions.db <- db.object.2@actions.db[which(rownames(db.object.2@actions.db) %in% addon.actions.db), ]
			# anno.location.db
			addon.anno.location.db <- intersect(rownames(db.object.2@anno.location.db), rownames(db.object.1@anno.location.db))
			db.object.1@anno.location.db <- db.object.2@anno.location.db[which(rownames(db.object.2@anno.location.db) %in% addon.anno.location.db), ]
			# anno.type.db 
			addon.anno.type.db <- intersect(rownames(db.object.2@anno.type.db), rownames(db.object.1@anno.type.db))
			db.object.1@anno.type.db <- db.object.2@anno.type.db[which(rownames(db.object.2@anno.type.db) %in% addon.anno.type.db), ]
			# go.ref.db 
			addon.go.ref.db <- intersect(rownames(db.object.2@go.ref.db), rownames(db.object.1@go.ref.db))
			db.object.1@go.ref.db <- db.object.2@go.ref.db[which(rownames(db.object.2@go.ref.db) %in% addon.go.ref.db), ]
		}
	}

	return(db.object.1)
}



# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Other Related Functions
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

#' Remap Gene Symbols to Their Authorized Genenames
#'
#' @description
#' Use species-specific reference database to remap genes to their authorized genenames.
#'
#' @param markers.all Data.frame. Feature genes that are generated from \code{Seurat::FindAllMarkers()} or 
#' similar functions in other packages.
#' @inheritParams InsideParam.species
#' @param if.used.inside Logic. If used inside, some process will not run.
#' @param final.dup.rm Logic. If set TRUE, duplicated genes of each cluster in final mapping result will be 
#'  removed. If set FALSE, those genes will be applied with function \code{make.unique}.
#'
#' @export
#'
DataPrep.RemapClustersMarkers  <- function(
	markers.all,
	species,
	if.used.inside = FALSE,
	final.dup.rm = TRUE
) {
	# force character
	markers.all$gene <- as.character(markers.all$gene)
	markers.all$cluster <- as.character(markers.all$cluster)

	## set each database
	# Check species, only support human and mouse yet
	genes.ref.db <- NULL
	species <- CheckSpeciesValidity(species)
	if (species == "human") {
		genes.ref.db <- genes.human.ref.db
	} else {
		genes.ref.db <- genes.mouse.ref.db
	}
	
	entrez.db <- genes.ref.db$gene.ncbi.db
	map.synonyms.db <- genes.ref.db$gene.synonyms.db

	# split genes to already authorized ones and un-authorized ones
	inds.raw.match <- which(markers.all$gene %in% entrez.db$Symbol_from_nomenclature_authority)
	markers.raw.match   <- markers.all[inds.raw.match, ]
	markers.raw.unmatch <- markers.all[setdiff(seq_len(nrow(markers.all)), inds.raw.match), ]
	proc.unmatch.genes <- unique(markers.raw.unmatch$gene)
	
	## un-authorized genes have 2 ways to go:
	# 1. cannot mapping from synonyms, keep still
	# 2. mapping from synonyms
	#
	ret.d0.cannot.match <- ret.d1.map.to.diff <- ret.d2.diff.map.one <- ret.d3.map.to.exist <- character()
	unmatch.map.results <- data.frame(unmatched = character(), match.res = character(), stringsAsFactors = FALSE)
	if (nrow(markers.raw.unmatch) > 0) {
		# 1> extract unmatch-able genes
		ret.d0.cannot.match <- setdiff(proc.unmatch.genes, map.synonyms.db$Synonym.each)
		
		# short warning
		if (length(ret.d0.cannot.match) > 0 && if.used.inside == FALSE) {
			print(paste0("Get ", length(ret.d0.cannot.match), 
				" genes cannot be mapped from synonyms.",
				" See return value `$unmapping.genes`."))
		}

		# 2> mapping the rest
		genes.poss.map <- setdiff(proc.unmatch.genes, ret.d0.cannot.match)
		inds.map.possible <- which(map.synonyms.db$Synonym.each %in% genes.poss.map)
		
		# check un-identical synonyms. One synonyms could map to different authorized genes
		check.non.identical <- tapply(map.synonyms.db$Symbol_from_nomenclature_authority[inds.map.possible], map.synonyms.db$Synonym.each[inds.map.possible], function(x) {x}, simplify = FALSE)
		inds.dups <- sapply(check.non.identical, function(x) { ifelse(length(x) > 1, TRUE, FALSE) })
		# short warning
		ret.d1.warning <- names(check.non.identical)[inds.dups]
		if (length(ret.d1.warning) > 0 && if.used.inside == FALSE) {
			print(paste0("Get ", length(ret.d1.warning), 
				" synonyms that cannot be mapped to unified authorized genes.",
				" See return value `$one.synonym.map.to.some.genes`."))  
		}
		# detailed hint
		ret.d1.map.to.diff <- vapply(which(inds.dups == TRUE), all.non.identical = check.non.identical, 
			function(x, all.non.identical) {
				this.gene <- names(all.non.identical)[x]
				against.authorized.genes <- paste0("(", paste0(all.non.identical[[x]], collapse = ", "), ")")
				paste(this.gene, "-->", against.authorized.genes)
			},
			FUN.VALUE = character(1), USE.NAMES = FALSE
		)

		# check if mapping result the same as other mapped genes
		check.map.dups <- tapply(map.synonyms.db$Synonym.each[inds.map.possible], 
			map.synonyms.db$Symbol_from_nomenclature_authority[inds.map.possible], 
			function(x) {x}, simplify = FALSE)
		inds.map.dups <- sapply(check.map.dups, function(x) { ifelse(length(x) > 1, TRUE, FALSE) })
		# short warning
		ret.d2.warning <- unique(as.character(unlist(lapply(which(inds.map.dups == TRUE), all.map.dups = check.map.dups, 
			function(x, all.map.dups) {
				all.map.dups[[x]]
			}
		))))
		if (length(ret.d2.warning) > 0 && if.used.inside == FALSE) {
			print(paste0("Get ", length(ret.d2.warning), 
				" synonyms that get overlap mapping genes with at least one other.",
				" See return value `$some.synonyms.map.to.one.gene`."))  
		}
		# detailed hint
		ret.d2.diff.map.one <- vapply(which(inds.map.dups == TRUE), all.map.dups = check.map.dups,
			function(x, all.map.dups) {
				this.tg <- names(all.map.dups)[x]
				from.synonyms <- paste0("(", paste0(all.map.dups[[x]], collapse = ", "), ")")
				paste(from.synonyms, "-->", this.tg)
			},
			FUN.VALUE = character(1), USE.NAMES = FALSE
		)

		# check if mapping to exist genes
		check.map.to.exist <- intersect(markers.raw.match$gene, names(check.map.dups))
		# short warning
		ret.d3.warning <- as.character(unlist(lapply(check.map.to.exist, all.map.dups = check.map.dups, 
			function(x, all.map.dups) {
				all.map.dups[[which(names(all.map.dups) == x)]]
			}
		)))
		if (length(ret.d3.warning) > 0 && if.used.inside == FALSE) {
			print(paste0("Get ", length(ret.d3.warning), 
				" synonyms that get mapping to existing authorized genes.",
				" See return value `$synonyms.map.to.exist.gene`."))
		}
		# detailed hint
		ret.d3.map.to.exist <- vapply(check.map.to.exist, all.map.dups = check.map.dups, 
			function(x, all.map.dups) {
				this.to.map.genes <- all.map.dups[[which(names(all.map.dups) == x)]]
				orig.to.map <- paste0("(", paste0(this.to.map.genes, collapse = ", "), ")")
				paste(orig.to.map, "-->", x)
			},
			FUN.VALUE = character(1), USE.NAMES = FALSE
		)
		
		## get mapping result
		# unmatch-able ones
		unmatch.map.result.0 <- data.frame(unmatched = ret.d0.cannot.match, match.res = ret.d0.cannot.match, stringsAsFactors = FALSE)
		# match-able ones, in default: the first matched gene name will be used 
		inds.map.matches <- match(genes.poss.map, map.synonyms.db$Synonym.each)
		unmatch.map.result.1 <- data.frame(unmatched = map.synonyms.db$Synonym.each[inds.map.matches], match.res = map.synonyms.db$Symbol_from_nomenclature_authority[inds.map.matches], stringsAsFactors = FALSE)
		# collect all
		unmatch.map.results <- rbind(unmatch.map.result.0, unmatch.map.result.1)
	}
	
	# collect markers result after mapping
	markers.raw.unmatch.dummy <- left_join(markers.raw.unmatch[, "gene", drop = FALSE],
		unmatch.map.results, by = c("gene" = "unmatched"))
	markers.raw.unmatch$gene <- markers.raw.unmatch.dummy$match.res
	markers.all <- rbind(markers.raw.match, markers.raw.unmatch)

	# after remapping, genes get to be duplicate with existing ones, 
	# re-check if mapping result has duplicate genes
	if (final.dup.rm == TRUE) {
		fcheck.result <- lapply(unique(markers.all$cluster), ref.markers = markers.all,
			function(x, ref.markers) {
				this.c.markers <- ref.markers[which(ref.markers$cluster == x), ]
				this.c.len <- tapply(seq_along(this.c.markers$gene), this.c.markers$gene, length)
				this.f.dup.genes <- names(this.c.len)[which(this.c.len > 1)]
				# in default: remove the latter one in given data
				this.c.markers <- DoPartUnique(this.c.markers, match(c("gene", "cluster"), colnames(this.c.markers)))
				list(dup.genes = this.f.dup.genes, markers = this.c.markers)
			})
		names(fcheck.result) <- unique(markers.all$cluster)
	} else {
		fcheck.result <- lapply(unique(markers.all$cluster), ref.markers = markers.all,
			function(x, ref.markers) {
				this.c.markers <- ref.markers[which(ref.markers$cluster == x), ]
				# in default: make unique of those genes
				this.c.markers$gene <- make.unique(this.c.markers$gene)
				list(dup.genes = character(), markers = this.c.markers)
			})
		names(fcheck.result) <- unique(markers.all$cluster)
	}
	

	# ret 
	ret.markers.all <- bind_rows(lapply(fcheck.result, function(x) { x$markers }))
	# detailed hint
	ret.dx.fcheck.dup <- lapply(seq_along(fcheck.result), all.fcheck.res = fcheck.result,
		function(x, all.fcheck.res) {
			this.cluster <- names(all.fcheck.res)[x]
			this.dup.genes <- all.fcheck.res[[x]][["dup.genes"]]
			ret.detailed <- NA
			if (length(this.dup.genes) > 0) {
				ret.detailed <- paste(this.cluster, "~", paste0(this.dup.genes, collapse = ", "))
			}
			list(detailed = ret.detailed, raw = unique(this.dup.genes))
		}
	)
	un.fin.check <- as.character(sapply(ret.dx.fcheck.dup, function(x) { x$detailed }))
	un.fin.check <- un.fin.check[which(!is.na(un.fin.check))]
	if (length(un.fin.check) > 0) {
		warning("There remain genes group by clusters to be checked manually. ",
			paste0(un.fin.check, collapse = "; "),
			". The program automatically remove duplicate ones in cluster scale.")
	}
	ret.d4.final.dup.genes <- lapply(ret.dx.fcheck.dup, function(x) { x$raw })
	names(ret.d4.final.dup.genes) <- names(fcheck.result)
	ret.d4.len <- vapply(ret.d4.final.dup.genes, FUN = length, FUN.VALUE = integer(1))
	ret.d4.final.dup.genes <- ret.d4.final.dup.genes[which(ret.d4.len > 0)]

	return(list(result = ret.markers.all, 
		unmapping.genes = ret.d0.cannot.match, 
		one.synonym.map.to.some.genes = ret.d1.map.to.diff, 
		some.synonyms.map.to.one.gene = ret.d2.diff.map.one, 
		synonyms.map.to.exist.gene = ret.d3.map.to.exist,
		after.map.dup.genes = ret.d4.final.dup.genes))
}



#' Add Expression Data in InterCell Object
#'
#' This function adds expression data to InterCell Object. It is mostly internally used.
#'
#' @param fgenes feature gene table, stored in slot \code{fgenes} of \code{InterCell} object.
#' @param exprs.data normalized count matrix
#' @param force.overwrite If column 'Exprs' already exists, set this parameter \code{TRUE} to overwrite that.
#'
#' @export
#'
DataPrep.AddExprs <- function(
	fgenes,
	exprs.data,
	force.overwrite = FALSE
) {
	if (("Exprs" %in% colnames(fgenes)) && !force.overwrite) {
		warning("Column 'Exprs' exists. If want to modify, use 'force.overwrite = TRUE' to overwrite existed one.")
		return(fgenes)
	}

	# handle with format of exprs.data
	if ("Seurat" %in% class(exprs.data)) {
		stop("Please use <SeuratObj>[[<assay>]]@<data>, for example SeuratObj[['RNA']]@data, to give the expression data.")
		#exprs.data <- AverageExpression(exprs.data, assays = "RNA", slot = "counts")[[1]]
		#exprs.data <- as.matrix(exprs.data)
	} else {  # transform to matrix
		if (is.null(ncol(exprs.data)) || ncol(exprs.data) == 0 ||
			is.null(nrow(exprs.data)) || nrow(exprs.data) == 0) {
			stop("Try add 'Exprs' but lack data. Please provided valid expression data.")
		}
		exprs.data <- as.matrix(exprs.data)
	}

	# give warning when duplicated genes are given
	if (anyDuplicated(rownames(exprs.data))) {
		warning("Duplicated gene names in expression data detected. Unexpected result may be generated.")
	}

	# check if cluster are matched with current data
	if (!all(unique(fgenes$cluster) %in% colnames(exprs.data))) {
		stop("Given data lacks some required clusters: ", 
			paste0(setdiff(unique(fgenes$cluster), colnames(exprs.data)), collapse = ", "),
			"!")
	}

	# add expression data in every cluster
	fgenes <- bind_rows(lapply(unique(fgenes$cluster), 
		fgenes = fgenes, exprs.data = exprs.data,
		function(x, fgenes, exprs.data) {
			tmp.exprs <- exprs.data[, which(colnames(exprs.data) == x), drop = FALSE]
			tmp.exprs <- rowSums(tmp.exprs) / ncol(tmp.exprs)
			fgenes <- subset(fgenes, cluster == x)
			inds.exprs <- match(fgenes$gene, names(tmp.exprs))
			fgenes[, "Exprs"] <- as.numeric(tmp.exprs[inds.exprs])
			return(fgenes)
		}
	))

	return(fgenes)
}



#' Generate Gene Pairs in Standard Format
#' 
#' @description
#' This function generates gene pairs in standard format(in data frame), 
#' and gets these pairs easier to be compared with others.
#'
#' @param VEinfos standard storage for one interaction.
#' @param musthave.colnames column names that must be included.
#'
#' @details
#' The standard format in this package is that gene pairs are maintained in data.frame, and the 2 genes 
#' participated in each gene pair are recorded in columns named "inter.GeneName.A" and "inter.GeneName.B".
#'
#' @importFrom dplyr left_join
#'
DataPrep.GenStdGenePairs.from.VEinfos <- function(
  VEinfos,
  musthave.colnames
) {
	vertices.infos <- VEinfos$vertices.infos
	edges.infos <- VEinfos$edges.infos
	# pre-check
	if (!all(kmusthave.colnames %in% musthave.colnames)) {
		stop("Provided column names are not matched with InterCellDB requirement!")
	}
	musthave.colnames <- setdiff(musthave.colnames, c("gene", "cluster"))
	#
	tmp.res <- left_join(edges.infos[, c("from", "to")], vertices.infos[, c(c("UID", "ClusterName", "GeneName"), musthave.colnames)], by = c("from" = "UID"))
	colnames(tmp.res)[c(ncol(tmp.res) - (1+length(musthave.colnames)):0)] <- paste("inter", c(c("Cluster", "GeneName"), musthave.colnames), "A", sep = ".")
	tmp.res <- left_join(tmp.res, vertices.infos[, c(c("UID", "ClusterName", "GeneName"), musthave.colnames)], by = c("to" = "UID"))
	colnames(tmp.res)[c(ncol(tmp.res) - (1+length(musthave.colnames)):0)] <- paste("inter", c(c("Cluster", "GeneName"), musthave.colnames), "B", sep = ".")
	# form std data.frame
	align.colnames <- paste("inter", rep(c(c("GeneName"), musthave.colnames, c("Cluster")), each = 2), c("A", "B"), sep = ".")
	tmp.res <- tmp.res[, match(align.colnames, colnames(tmp.res))]

	# match cluster
	# get conv ones
	std.res.conv <- tmp.res[intersect(which(tmp.res$inter.Cluster.A == VEinfos$cluster.name.A), which(tmp.res$inter.Cluster.B == VEinfos$cluster.name.B)), ]
	# get rev ones
	std.res.rev <- tmp.res[intersect(which(tmp.res$inter.Cluster.A == VEinfos$cluster.name.B), which(tmp.res$inter.Cluster.B == VEinfos$cluster.name.A)), ]
	std.res.rev <- std.res.rev[, ReverseOddEvenCols(length(align.colnames))]  # reverse all paired columns
	colnames(std.res.rev) <- colnames(std.res.conv)
	# get the result
	std.res.all <- rbind(std.res.conv, std.res.rev)
	std.res.all <- DoPartUnique(std.res.all, 1:2)

	return(std.res.all)
}


#' List cell clusters
#'
#' This function is to list all cell clusters.
#'
#' @inheritParams InsideObjectInterCell
#'
#' @export
#'
ListAllClusters <- function(
	object
) {
	return(unique(object@fgenes$cluster))
}



#' List Options for Gene Selection
#' 
#' This group of functions are served for giving options on selecting gene subset. It ranges from 
#' subcellular locations, molecular functions and GO terms.
#'
#' @param object Allowed object should be in either class \code{InterCell} or class \code{InterCellDBPack}.
#' @param ... Parameters passed to corresponding function with suffix 'default', like use \code{ListAllGeneLocation} 
#'  and check parameters in \code{ListAllGeneLocation.default}.
#'
#' @return Character. The options.
#'
#' @name ListGeneSelectionProperty
#' @rdname ListGeneSelectionProperty
#'
#'
NULL


#' @rdname ListGeneSelectionProperty
#' @export
#'
ListAllGeneLocation.default <- function(
	object
) {
	return(unique(object@anno.location.db$GO.Term.target))
}

#' @rdname ListGeneSelectionProperty
#' @order 1
#' @export
#'
setGeneric(name = "ListAllGeneLocation", def = function(object, ...) {
	standardGeneric("ListAllGeneLocation")
	}
)
#' @rdname ListGeneSelectionProperty
#' @export
#'
setMethod(
	f = "ListAllGeneLocation",
	signature = "InterCellDBPack",
	definition = function(object, ...) {
		ListAllGeneLocation.default(object, ...)
	}
)
#' @rdname ListGeneSelectionProperty
#' @export
#'
setMethod(
	f = "ListAllGeneLocation",
	signature = "InterCell",
	definition = function(object, ...) {
		ListAllGeneLocation.default(object@database, ...)
	}
)


#' @rdname ListGeneSelectionProperty
#' @export
#'
ListAllGeneType.default <- function(
	object
) {
	return(unique(object@anno.type.db$Keyword.Name))
}

#' @rdname ListGeneSelectionProperty
#' @order 2
#' @export
#'
setGeneric(name = "ListAllGeneType", def = function(object, ...) {
	standardGeneric("ListAllGeneType")
	}
)
#' @rdname ListGeneSelectionProperty
#' @export
#'
setMethod(
	f = "ListAllGeneType",
	signature = "InterCellDBPack",
	definition = function(object, ...) {
		ListAllGeneType.default(object, ...)
	}
)
#' @rdname ListGeneSelectionProperty
#' @export
#'
setMethod(
	f = "ListAllGeneType",
	signature = "InterCell",
	definition = function(object, ...) {
		ListAllGeneType.default(object@database, ...)
	}
)

#' @rdname ListGeneSelectionProperty
#' @export
#'
ListAllGeneMergeType.default <- function(
	object
) {
	return(unique(object@accessory.db$merge.type.list[, "merged.molecular.function"]))
}
#' @rdname ListGeneSelectionProperty
#' @order 3
#' @export
#'
setGeneric(name = "ListAllGeneMergeType", def = function(object, ...) {
	standardGeneric("ListAllGeneMergeType")
	}
)
#' @rdname ListGeneSelectionProperty
#' @export
#'
setMethod(
	f = "ListAllGeneMergeType",
	signature = "InterCellDBPack",
	definition = function(object, ...) {
		ListAllGeneMergeType.default(object, ...)
	}
)
#' @rdname ListGeneSelectionProperty
#' @export
#'
setMethod(
	f = "ListAllGeneMergeType",
	signature = "InterCell",
	definition = function(object, ...) {
		ListAllGeneMergeType.default(object@database, ...)
	}
)


#' @param n.output It gives the first N GO terms in the order of AlphaBet.
#'
#' @rdname ListGeneSelectionProperty
#' @export
#'
ListAllGeneGOTerm.default <- function(
	object,
	n.output = +Inf
) {
	go.terms <- unique(object@go.ref.db$GO_term)
	go.terms <- go.terms[order(go.terms, decreasing = FALSE)]
	if (n.output < 1) {
		n.output <- 1
	}
	if (length(go.terms) > n.output) {
		go.terms <- go.terms[seq_len(n.output)]
	}

	return(go.terms)
}

#' @rdname ListGeneSelectionProperty
#' @order 4
#' @export
#'
setGeneric(name = "ListAllGeneGOTerm", def = function(object, ...) {
	standardGeneric("ListAllGeneGOTerm")
	}
)
#' @rdname ListGeneSelectionProperty
#' @export
#'
setMethod(
	f = "ListAllGeneGOTerm",
	signature = "InterCellDBPack",
	definition = function(object, ...) {
		ListAllGeneGOTerm.default(object, ...)
	}
)
#' @rdname ListGeneSelectionProperty
#' @export
#'
setMethod(
	f = "ListAllGeneGOTerm",
	signature = "InterCell",
	definition = function(object, ...) {
		ListAllGeneGOTerm.default(object@database, ...)
	}
)



#' Replace Cluster name
#'
#' This is function is to change the names of cell clusters.
#'
#' @inheritParams InsideObjectInterCell
#' @param cluster.names.current The currently used name of cell clusters.
#' @param cluster.names.replace The new used name of cell clusters.
#'
#' @return A \code{InterCell} object.
#'
#' @examples
#' \dontrun{
#'   ReplaceClusterName(object, "Macrophage", "Myeloid")
#' }
#'
#' @export
#'
ReplaceClusterName <- function(
	object,
	cluster.names.current,
	cluster.names.replace
) {
	markers.all <- object@fgenes
	colnames.cluster <- "cluster"
	# process
	if ((colnames.cluster %in% colnames(markers.all)) == FALSE) {
		stop("Selected column name defining clusters is not in the given data!")
	}
	if (length(cluster.names.current) != length(cluster.names.replace)) {
		stop("The replaced names are of different length of the current used ones.")
	}
	reserve.oldnames.col <- reserve.oldnames.col.proto <- paste(colnames.cluster, "oldv", sep = ".")
	for (try.i in 1:100) {
		reserve.oldnames.col <- paste(reserve.oldnames.col.proto, as.character(try.i), sep = ".")
		if ((reserve.oldnames.col %in% colnames(markers.all)) == FALSE) {
			break
		}
		if (try.i == 100) {
			stop("Cannot allocate proper colnames for old cluster names, program failed! Please check given data!")
		}
	}
	markers.all[, reserve.oldnames.col] <- markers.all[, colnames.cluster]
	tmp.fac <- factor(markers.all[, colnames.cluster])
	lvl.tmp.fac <- levels(tmp.fac)
	inds.match <- match(cluster.names.current, lvl.tmp.fac)
	if (length(which(is.na(inds.match))) != 0) {
		stop(paste0("Please give right current used cluster names! ",
			"Wrong given ones are: ", paste0(cluster.names.current[which(is.na(inds.match))], collapse = ", "),
			"."))    
	}
	lvl.tmp.fac[inds.match] <- cluster.names.replace
	levels(tmp.fac) <- lvl.tmp.fac
	markers.all[, colnames.cluster] <- as.character(tmp.fac)
	# return
	object@fgenes <- markers.all
	return(object)
}


#' Fetch Genes of Interest
#'
#' This function is to fetch genes of interest by selecting on subcellular locations, 
#' molecular functions and GO terms.
#'
#' @param object A \code{InterCell} object or \code{InterCellDBPack} object. \code{FetchGeneOI.default}
#'  gets \code{InterCellDBPack} as input.
#' @param sel.location Use subcellular location of gene product to select gene, and options are listed 
#'  in \code{\link{ListAllGeneLocation}}.
#' @param sel.location.score The score of corresponding subcellular location, range from 1 to 5. Consider \code{score = c(4, 5)} 
#'  as high and common usage.
#' @param sel.type Use molecular function of gene product to select gene, and options are listed in 
#'  \code{\link{ListAllGeneType}}.
#' @param sel.merge.type The merged types. Comparing to \code{sel.type}, it has less options, which are given 
#'  in \code{\link{ListAllGeneMergeType}}. See details for help.
#' @param ret.with.property It decides whether the return values are attached with gene properties(location, type, etc).
#' @param sel.go.terms Use GO terms to select gene, and supported options are listed in \code{\link{ListAllGeneGOTerm}}.
#' @param go.use.relative Decide if the go terms in the related GO term tree are used. See details for help.
#' @param go.relative.option Decide which relation to the selected GO term is used. Options are 
#'  'ancestor', 'parents', 'offspring', 'children'. See details for help.
#'
#' @details
#' The parameter \code{sel.merge.type} is the summary for \code{sel.type}. The options in \code{sel.type} are originally 
#' generated directly from Uniprot, which comprises over 100 types. For the convenience of usage, we summarize those types and gather
#' them to 16 merged types, which comprise the common used types: 'Receptor', 'Cytokine', 'Growth Factor', etc.
#'
#' GO terms have 3 basic words: 'cellular_component', 'molecular_function', 'biological_process', and all other GO terms are the offspring of 
#' one of them. As a result, GO terms form 3 family tree. Use parameter \code{go.use.relative}, it can extend one given GO term to all its 
#' related GO terms. There are 4 pre-defined options for selecting specific relative group, which are 'ancestor', 'parents', 'offspring', 'children'.
#' The 'parents' and 'children' are selecting the most close GO terms. For example, 'GO:0006955-immune response' is the nearest level above 
#' 'GO:0002250-adaptive immune response', and in turn, 'adaptive immune response' is the children of 'immune response'. 
#' The 'ancestor' and 'offspring' goes further than 'parents' and 'children'. The 'ancestor' iteratively searches for the 'parents'. 
#' Conversely, the 'offspring' iteratively searches for the 'children'. For example, 'immune response' is the parents of 'adaptive immune response', and 
#' 'adaptive immune response' is the parents of 'adaptive immune effector response'. Then, 'immune response' is the 'ancestor' of 'adaptive immune effector response'.
#' The 'offspring' goes the same way by propagating 'children'. 
#'
#' @return Character, the selected genes. Or, a list, with gene property attached.
#'
#' @rdname FetchGeneOI
#' @order 4
#' @export
#'
FetchGeneOI.default <- function(
	object,
	sel.location = NULL,
	sel.location.score = c(1:5),
	sel.type = NULL,
	sel.merge.type = NULL,  # merged type only have 16 options. Prioritize upon sel.type
	ret.with.property = TRUE,  
	# whether the selection should be carried in the result, 
	# for analysis usage it's TRUE, but only to get some genes, it should be false.
	# GO terms are not included by this functions
	sel.go.terms = NULL,  # ID and Term are supported
	go.use.relative = TRUE, 
	go.relative.option = "offspring"
) {
	## input parameter process
	# check location
	if (!is.null(sel.location)) {
		avb.opt.location <- unique(object@anno.location.db$GO.Term.target)
		not.valid.location <- setdiff(sel.location, avb.opt.location)
		if (length(not.valid.location) > 0) {
			warning("Given undefined location: ", paste0(not.valid.location, collapse = ", ", ". "))
		}
		sel.location <- intersect(sel.location, avb.opt.location)
		if (length(sel.location) == 0) {
			sel.location <- NULL
		}
	}
	if ((length(sel.location.score) > 1 && !is.numeric(sel.location.score)) || 
		(length(sel.location.score) == 1 && !is.numeric(sel.location.score))) {
		stop("Location score ranges from 1 to 5, and only those 5 integers are supported!")
	}
	if (!is.null(sel.location.score)) {
		avb.location.score <- c(1:5)
		not.valid.location.score <- setdiff(sel.location.score, avb.location.score)
		if (length(not.valid.location.score) > 0) {
			warning("Given invalid location score: ", paste0(not.valid.location.score, collapse = ", ", ". "))
		}
		sel.location.score <- intersect(sel.location.score, avb.location.score)
	}

	# check type
	if (!is.null(sel.type)) {
		avb.opt.type <- unique(object@anno.type.db$Keyword.Name)
		not.valid.type <- setdiff(sel.type, avb.opt.type)
		if (length(not.valid.type) > 0) {
			warning("Given undefined type: ", paste0(not.valid.type, collapse = ", ", ". "))
		}
		sel.type <- intersect(sel.type, avb.opt.type)
		if (length(sel.type) == 0) {
			sel.type <- NULL
		}
	}
	# check merged type (if given, then override type) and overwrite type
	if (!is.null(sel.merge.type)) {
		if (!is.null(sel.type)) {
			warning("Select genes using both parameter 'sel.type' & 'sel.merge.type'. Only the options in 'sel.merge.type' are used!")
		}
		avb.opt.mg.type <- unique(object@accessory.db$merge.type.list$merged.molecular.function)
		sel.merge.type <- CheckParamStd(sel.merge.type, avb.opt.mg.type, "merged type", stop.on.zero = FALSE)
		if (length(sel.merge.type) == 0) {
			sel.type <- NULL
		} else {  # overwrite sel.type
			ref.db <- object@accessory.db$merge.type.list
			sel.type <- ref.db[which(ref.db$merged.molecular.function %in% sel.merge.type), "orig.uniprot.keywords"]
		}
	}

	## save all above limitations
	property.saved <- list(
		location = sel.location,
		location.score = sel.location.score,
		type = sel.type
	)

	# check GO terms
	# - put in the tool function

	## fetch genes
	ret.gene.oi <- character()
	inside.set.gene.oi <- function(gene.oi.res, new.get.genes) {
		if (length(gene.oi.res) == 0) {
			gene.oi.res <- new.get.genes
		} else {
			gene.oi.res <- intersect(gene.oi.res, new.get.genes)
		}
		return(gene.oi.res)
	}
	# use location
	if (!is.null(sel.location)) {
		gene.oi.from.locs <- object@anno.location.db[intersect(which(object@anno.location.db$GO.Term.target %in% sel.location), 
			which(object@anno.location.db$score %in% sel.location.score)), "Gene.name"]
		ret.gene.oi <- inside.set.gene.oi(ret.gene.oi, gene.oi.from.locs)
	}
	if (!is.null(sel.type)) {
		gene.oi.from.type <- object@anno.type.db[which(object@anno.type.db$Keyword.Name %in% sel.type), "Gene.name"]
		ret.gene.oi <- inside.set.gene.oi(ret.gene.oi, gene.oi.from.type)
	}
	if (!is.null(sel.go.terms)) {
		gene.oi.from.go <- Tool.FindGenesFromGO(sel.go.terms, object@genes.db, object@go.ref.db, 
			go.use.relative = go.use.relative, go.relative.option = go.relative.option)
		gene.oi.from.go <- as.character(unlist(gene.oi.from.go))
		ret.gene.oi <- inside.set.gene.oi(ret.gene.oi, gene.oi.from.go)
	}
	# get unique result
	ret.gene.oi <- unique(ret.gene.oi)

	print(paste("Fetch", length(ret.gene.oi), "genes of interest."))
	if (ret.with.property == TRUE) {
		return(list(genes = ret.gene.oi, property = property.saved))	
	} else {
		return(ret.gene.oi)
	}
}


#' @param ... Parameters passed to function \code{FetchGeneOI.default}.
#'
#' @rdname FetchGeneOI
#' @order 1
#' @export 
#'
setGeneric(name = "FetchGeneOI", def = function(object, ...) {
		standardGeneric("FetchGeneOI")
	}
)

#' @rdname FetchGeneOI
#' @order 2
#' @export 
#'
setMethod(
	f = "FetchGeneOI", 
	signature = "InterCellDBPack",
	definition = function(object, ...) {
		return(FetchGeneOI.default(object, ...))
	}
)

#' @rdname FetchGeneOI
#' @order 3
#' @export 
#'
setMethod(
	f = "FetchGeneOI", 
	signature = "InterCell",
	definition = function(object, ...) {
		return(FetchGeneOI.default(object@database, ...))
	}
)



#' Merge Gene of Interest
#'
#' This function is to merge genes of interest fetched by \code{FetchGeneOI}.
#'
#' @param gene.oi.1 Genes of interest, generated by \code{FetchGeneOI}.
#' @param gene.oi.2 Genes of interest, generated by \code{FetchGeneOI}.
#' @param merge.option The supported options are 'intersect' and 'union'.
#'
#' @return Character, the selected genes. Or, a list, with gene property attached.
#'
#' @export
#'
MergeGeneOI <- function(
	gene.oi.1,
	gene.oi.2,
	merge.option = "intersect"
) {
	# check param
	if (length(merge.option) > 1) {
		merge.option <- merge.option[1]
	}
	merge.option <- CheckParamStd(merge.option, c("intersect", "union"), "parameter `merge.option`", stop.on.zero = TRUE)
	#
	if (!is.character(gene.oi.1) && 
		!is.list(gene.oi.1) && 
		(is.list(gene.oi.1) && !identical(names(gene.oi.1), c("genes", "property")))) {
		stop("Wrong format of input `gene.oi.1`. Please use `FetchGeneOI` to get gene of interest!")
	}
	if (!is.character(gene.oi.2) && 
		!is.list(gene.oi.2) && 
		(is.list(gene.oi.2) && !identical(names(gene.oi.2), c("genes", "property")))) {
		stop("Wrong format of input `gene.oi.2`. Please use `FetchGeneOI` to get gene of interest!")
	}

	gene.oi.ret <- NA
	# s1: if 2 character merge
	if (is.character(gene.oi.1) && is.character(gene.oi.2)) {
		gene.oi.ret <- switch(merge.option,
			"intersect" = intersect(gene.oi.1, gene.oi.2),
			"union" = union(gene.oi.1, gene.oi.2))
	}

	# s2: either one is character
	if (is.character(gene.oi.1) && is.list(gene.oi.2) ||
		is.list(gene.oi.1) && is.character(gene.oi.2)) {
		if (is.character(gene.oi.2)) {
			tmp.1 <- gene.oi.1
			gene.oi.1 <- gene.oi.2
			gene.oi.2 <- tmp.1
		}
		gene.oi.ret.genes <- switch(merge.option,
			"intersect" = intersect(gene.oi.1, gene.oi.2$genes),
			"union" = union(gene.oi.1, gene.oi.2$genes))
		gene.oi.ret.property <- switch(merge.option,
			"intersect" = gene.oi.2$property,
			"union" = NULL)
		if (is.null(gene.oi.ret.property)) {
			gene.oi.ret <- gene.oi.ret.genes
		} else {
			gene.oi.ret <- list(genes = gene.oi.ret.genes, property = gene.oi.ret.property)
		}
	}

	# s3: both two are list
	if (is.list(gene.oi.1) && is.list(gene.oi.2)) {
		tmp.s3.func <- switch(merge.option,
			"intersect" = intersect,
			"union" = union)

		gene.oi.ret.genes <- tmp.s3.func(gene.oi.1$genes, gene.oi.2$genes)
		gene.oi.ret.property <- list(location = tmp.s3.func(gene.oi.1$property$location, gene.oi.2$property$location),
					location.score = tmp.s3.func(gene.oi.1$property$location.score, gene.oi.2$property$location.score),
					type = tmp.s3.func(gene.oi.1$property$type, gene.oi.2$property$type))
		gene.oi.ret <- list(genes = gene.oi.ret.genes, property = gene.oi.ret.property)
	}

	return(gene.oi.ret)
}




#' Format User Given Gene Pairs
#' 
#' This function makes 2-column table (each row represents one gene pair) to be 
#' formatted to be used in this package.
#'
#' @inheritParams InsideObjectInterCell
#' @param gene.pairs.table 2-column table, and each column records one list of genes.
#' @param extend.reverse Decide whether to extend pairs with the reverse pairs. Default is \code{FALSE}.
#'
#' @details
#' The formatting process will remap the genes in 1st column from input to 'inter.*.A' columns in result.
#' The result of 2nd column from input will be put in 'inter.*.B' columns.
#'
#' Explanation on \code{extend.reverse}:
#' For example, given pair C3~C3ar1, if set \code{extend.reverse = TRUE}, then 
#' both C3~C3ar1 and C3ar1~C3 will be generated in result.
#'
#' @return A list with \code{$result} storing the formatted gene pairs.
#'
#' @export
#'
FormatCustomGenePairs <- function(
	object,
	gene.pairs.table,
	extend.reverse = FALSE
) {
	species <- object@database@species
	if.remap.genes <- object@misc$if.remap.genes

	# check input
	if (class(gene.pairs.table) != "data.frame") {
		stop("Given gene pairs must be stored in table (R `data.frame` structure).")
	}
	if (ncol(gene.pairs.table) != 2) {
		warning("Given table has more than 2 columns, only the first 2 columns will be used.")
	}

	# process
	result.list <- list()
	apx.list <- list()
	for (i in 1:2) {  # only use the first 2 columns
		tmp.genes <- as.character(gene.pairs.table[, i])
		# create dummy df to meet the requirement of musthave columns
		dummy.fgenes <- data.frame(gene = tmp.genes, 
			cluster = seq_along(tmp.genes),  # give one gene one cluster to make duplicate ones preserved, but program may get quite slow
			LogFC = 1, PVal = 0, 
			num.id = seq_along(tmp.genes), stringsAsFactors = FALSE)
		if (if.remap.genes == TRUE) {
			dummy.remap.res <- suppressWarnings(DataPrep.RemapClustersMarkers(dummy.fgenes, species, if.used.inside = TRUE))
			dummy.fgenes <- dummy.remap.res$result
		}
		dummy.fgenes <- dummy.fgenes[order(dummy.fgenes$num.id, decreasing = FALSE), ]
		result.list <- c(result.list, list(dummy.fgenes$gene))
		if (if.remap.genes == TRUE) {
			apx.list <- c(apx.list, dummy.remap.res[setdiff(names(dummy.remap.res), "result")])
		} else {
			apx.list <- c(apx.list, list(NULL))
		}
	}

	# get genename corresponding IDs
	genes.ref.db <- switch(species, 
		"human" = genes.human.ref.db$gene.ncbi.db,
		"mouse" = genes.mouse.ref.db$gene.ncbi.db)
	result.id.A <- genes.ref.db[match(result.list[[1]], genes.ref.db$Symbol_from_nomenclature_authority), "GeneID"]
	result.id.B <- genes.ref.db[match(result.list[[2]], genes.ref.db$Symbol_from_nomenclature_authority), "GeneID"]

	result.IT <- data.frame(inter.GeneID.A = result.id.A,
		inter.GeneID.B = result.id.B,
		inter.GeneName.A = result.list[[1]],
		inter.GeneName.B = result.list[[2]],
		stringsAsFactors = FALSE)

	if (extend.reverse == TRUE) {
		tmp.rev <- result.IT[, ReverseOddEvenCols(4)]
		colnames(tmp.rev) <- colnames(result.IT)
		result.IT <- rbind(result.IT, tmp.rev)
	}

	# return
	list(result = result.IT, match.status = apx.list)
}
ZJUDBlab/InterCellDB documentation built on March 19, 2023, 4:56 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
ZJUDBlab/InterCellDB
Tool and Database for Analyzing Interactions

R/InterCellDBclass.R
In ZJUDBlab/InterCellDB: Tool and Database for Analyzing Interactions

R Package Documentation

Browse R Packages

We want your feedback!

ZJUDBlab/InterCellDB Tool and Database for Analyzing Interactions

R/InterCellDBclass.R In ZJUDBlab/InterCellDB: Tool and Database for Analyzing Interactions

R Package Documentation

Browse R Packages

We want your feedback!

ZJUDBlab/InterCellDB
Tool and Database for Analyzing Interactions

R/InterCellDBclass.R
In ZJUDBlab/InterCellDB: Tool and Database for Analyzing Interactions