Nothing
# CellScore.R
# From CS_v1.4.R
#' CellScore evaluates the identity of cells undergoing cell type transition
#'
#' This function will calculate the CellScore (summary score) for a cell that
#' is undergoing a transition in cell identity from a starting cell type to a
#' target cell type.
#'
#' @param eset an ExpressionSet containing data matrices of normalized
#' expression data, present/absent calls, a gene annotation data frame and a
#' phenotype data frame.
#' @param cell.change a data frame containing three columns, one for the
#' start (donor) test and target cell type. Each row of the data.
#' frame describes one transition from the start to a target cell type.
#' @param scores.onoff a data.frame of OnOff Scores for all samples in the
#' expression matrix as generated by the function OnOff().
#' @param scores.cosine a numeric matrix of cosine similarity between
#' general groups, subgroups and individual samples as calculated by the
#' function CosineSimScore().
#' @return The function returns a data frame with 29 columns and M*N rows,
#' where M is the number of unqiue start and target cell types pairs listed in
#' the cell.change argument, while N is the number of all samples in the
#' input dataset eset. The columns include sample phenotype features and all
#' score (components), including the on/off score, cosine similarity and
#' CellScore.
#' @keywords cellscore cosine similarity
#' @seealso \code{\link[CellScore]{CosineSimScore}, \link[CellScore]{OnOff}} for
#' detials on specfic score calculations, and
#' \code{\link[hgu133plus2CellScore]{hgu133plus2CellScore}} for details on the
#' specific expressionSet object that shoud be provided as an input.
#' @export
#' @importClassesFrom Biobase ExpressionSet
#' @importMethodsFrom Biobase pData
#' @examples
#' ## Load the expression set for the standard cell types
#' library(Biobase)
#' library(hgu133plus2CellScore) # eset.std
#'
#' ## Locate the external data files in the CellScore package
#' rdata.path <- system.file("extdata", "eset48.RData", package = "CellScore")
#' tsvdata.path <- system.file("extdata", "cell_change_test.tsv",
#' package = "CellScore")
#'
#' if (file.exists(rdata.path) && file.exists(tsvdata.path)) {
#'
#' ## Load the expression set with normalized expressions of 48 test samples
#' load(rdata.path)
#'
#' ## Import the cell change info for the loaded test samples
#' cell.change <- read.delim(file= tsvdata.path, sep="\t",
#' header=TRUE, stringsAsFactors=FALSE)
#'
#' ## Combine the standards and the test data
#' eset <- combine(eset.std, eset48)
#'
#' ## Generate cosine similarity for the combined data
#' ## NOTE: May take 1-2 minutes on the full eset object
#' ## so we subset it for 4 cell types
#' pdata <- pData(eset)
#' sel.samples <- pdata$general_cell_type %in% c("ESC", "EC", "FIB", "KER")
#' eset.sub <- eset[, sel.samples]
#' cs <- CosineSimScore(eset.sub, cell.change, iqr.cutoff=0.1)
#'
#' ## Generate the on/off scores for the combined data
#' individ.OnOff <- OnOff(eset.sub, cell.change, out.put="individual")
#'
#' ## Generate the CellScore values for all samples
#' cellscore <- CellScore(eset.sub, cell.change, individ.OnOff$scores,
#' cs$cosine.samples)
#' }
CellScore <- function(eset, cell.change, scores.onoff, scores.cosine) {
## PSEUDOCODE
## Start with individ.OnOff and add cosine similarities as columns to it.
## o transpose the cs$cosine.samples so samples are in rows
## Does the subgroup have to be in the row name? this is bothersome.
## This is a remnant of CellScore_Cosine_V3.R, filtered ynorm consolidated
## matrix.
## o get rid of the subgroup so that the column names can be used
## later in the process without manipulation.
############################################################################
## PART 00. Check function arguments
############################################################################
fun.main <- as.character(match.call()[[1]])
.stopIfNotExpressionSet(eset, "eset", fun.main)
.stopIfNotDataFrame(cell.change, "cell.change", fun.main)
.stopIfNotDataFrame(scores.onoff, "scores.onoff", fun.main)
.stopIfNotSymetricMatrix0to1(scores.cosine, "scores.cosine", fun.main)
############################################################################
## PART 0. Filter samples according to the phenotype
############################################################################
## o get rid of samples with is.na(category)
## o there may be samples with NA categories as an easy way to exclude
## them from analysis without changing the composition of the input every
## time
## o also exclude any rows with NA values in "general_cell_type":
## this should not be NA
pdata <- pData(eset)
sel <- !is.na(pdata$category) & !is.na(pdata$general_cell_type)
## DO 'major group' comparisons only
sel.start <- cell.change$start %in% pdata[sel, "general_cell_type"]
sel.target <- cell.change$target %in% pdata[sel, "general_cell_type"]
sel.unique <- !duplicated(paste(cell.change$start,
cell.change$target,
sep="_"))
score.comparisons <- cell.change[sel.start & sel.target & sel.unique, ]
rownames(score.comparisons) <- seq_len(nrow(score.comparisons))
############################################################################
## PART I. Merge metrics into a nice big table
############################################################################
## A. Rearrange tables
## 1. standard types and test samples
## a. standards
target.group <- unique(unlist(score.comparisons[, -2]))
## b. ALL samples with valid labels
test.samples <- rownames(pdata[sel, ])
## 2. extract the cosine similarity for all GSM samples (rows) wrt
## each target group (columns)
ind.row <- match(test.samples, rownames(scores.cosine))
cols <- colnames(scores.cosine)
temp.list <- lapply(target.group,
function(group){
## select the column of the target group AND
## the test samples in rows
scores.cosine[ind.row, match(group, cols )]
})
temp.cosine <- do.call("cbind",
c(data.frame(test.samples, stringsAsFactors=FALSE),
temp.list))
colnames(temp.cosine) <- c("test.samples", paste0("cosine.", target.group))
## B. Merge cosine scores to on/off scores
## Use the composite id ("test" and "test.samples") for merging; We can
## have many rows per sample, because each sample may have scores in more
## than one transition
big.table <- merge(scores.onoff, temp.cosine, by.x="test",
by.y="test.samples", all.x=TRUE)
## Rename "test" column to "composite.ID"
colnames(big.table)[which(colnames(big.table) == "test")] <- "composite.ID"
## final size of table has to be number of rows in scores.onoff
stopifnot(nrow(big.table) == nrow(scores.onoff))
############################################################################
## PART II. CellScore
############################################################################
## A. THE CellScore = target-like - donor-like
## o target-like= fraction of target genes + cosine.sim compared to target
## o donor-like = fraction of donor genes + cosine.sim compared to donor
## Loop through each row in big.table
## o actually there are no unique rownames in this case;
## just make sure the row order is preserved
## o a truly unique rowname could be derived from sample_id, start,
## and target (for transition).
mat <- matrix(NA, nrow=nrow(big.table), ncol=7,
dimnames = list(c(1:nrow(big.table)),
c("fraction.target", "cosine.target",
"fraction.donor", "cosine.donor",
"target.like", "donor.like",
"CellScore")))
for (id in seq_len(nrow(big.table))) {
for (cell in c("target", "donor")){
col.fr <- paste0("fraction.", cell)
col.cos <- paste0("cosine.", cell)
mat[id, col.fr] <- .fractionScore(cell, big.table[id, ])
mat[id, col.cos] <- .cosineScore(cell, big.table[id, ])
mat[id, paste0(cell, ".like")] <- mat[id, col.fr] + mat[id, col.cos]
}
mat[id, "CellScore"] <- mat[id, "target.like"] - mat[id, "donor.like"]
}
## B. Final output
selected <- c("composite.ID","experiment_id","sample_id","platform_id",
"cell_type","disease_status","category",
"general_cell_type","donor_tissue","sub_cell_type1",
"transition_induction_method","donor_cell_body_location",
"start","target","markers.start","markers.target",
"start.mkrs.in.test", "target.mkrs.in.test",
"loss.start.mkrs","gain.target.mkrs", "OnOffScore")
data.frame(big.table[, intersect(selected, colnames(big.table))], mat,
index=c(1:nrow(big.table)), stringsAsFactors=FALSE)
}
## fractionScore
##
## Local function that calculates the onoffscore
.fractionScore <- function(type, instance){
switch(type,
donor = 1 - instance[, "loss.start.mkrs"],
target = instance[, "gain.target.mkrs"])
}
## cosineScore
##
## Local function that gets the cosine similarity score
.cosineScore <- function(type, instance){
x = instance[, paste0("cosine.",
instance[, ifelse(type == "donor", "start", type)])]
as.numeric(as.character(x))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.