## Using probabilities of marker expression, assign scores for each class
## in one or more dichotomies of interest via Naive Bayes
##
## by Artem Sokolov
#' Identifies a marker with the highest probability of expression
#' @param P Posterior probabilities, as generated by GMMreshape()
#' @param cid Column containing cell IDs
#' @param vm (optional) vector of markers to limit the argmax to
#' @param pthr probability threshold above which a cell is considered to express the marker
#' @return The original dataframe P augmented with the dominant marker column
#' @importFrom magrittr %>%
#' @export
findDominant <- function( P, cid, vm, pthr = 0.65 )
{
P1 <- P %>% dplyr::mutate( tmpID = 1:(dplyr::n()) )
if( missing(vm) )
vm <- dplyr::select(P, -{{cid}}) %>% colnames
vm <- c(vm, "None")
P1 %>%
dplyr::mutate( None = pthr ) %>%
dplyr::select( tmpID, all_of(vm) ) %>%
tidyr::gather( Dominant, Value, vm ) %>%
dplyr::group_by( tmpID ) %>% dplyr::slice(which.max(Value)) %>%
dplyr::ungroup() %>% dplyr::arrange( tmpID ) %>%
dplyr::inner_join( P1, by="tmpID" ) %>%
dplyr::select( -tmpID, -Value ) %>%
dplyr::select( {{cid}}, Dominant, everything() )
}
#' Combines probabilities of expression in individual markers to call cell states
#' according to a predefined marker -> cell state mapping
#'
#' @param P Posterior probabilities, as generated by GMMreshape()
#' @param cid Column containing cell IDs
#' @param M A data frame with columns Marker and State, defining the mapping between the two
#' @param fcomb A function for combining individual probabilities. Use "gmean" for geometric mean
#' and "hmean" for harmonic mean. (Default: "hmean")
#' @param pthr Probability threshold that must be exceeded for a cell state to be selected
#' @return A data frame of probabilities for each cell/state pair, along with final calls
#' @importFrom magrittr %>%
#' @export
callStates <- function(P, cid, M, fcomb="hmean", pthr=0.5 )
{
## Argument verification
if( !all(c("Marker", "State") %in% colnames(M)) )
stop( "Marker -> cell state map must contain columns \"Marker\" and \"State\"" )
## Determine the scoring function for combining probabilities
if( is.function(fcomb) )
fscore <- fcomb
else if( is.character(fcomb) && fcomb == "gmean" )
fscore <- function(x) {exp(mean(log(x)))}
else if( is.character(fcomb) && fcomb == "hmean" )
fscore <- function(x) {length(x) / sum( 1/x )}
else
stop( "fcomb must be a function, \"gmean\", or \"hmean\"" )
## Isolate relevant markers
S <- P %>% dplyr::select( CellID = {{cid}}, dplyr::all_of(M$Marker) )
## Fill out implicit negatives
M <- M %>% dplyr::mutate( Expressed = "Yes" ) %>%
tidyr::pivot_wider( names_from = Marker, values_from = Expressed, values_fill = "No" ) %>%
tidyr::pivot_longer( -State, names_to = "Marker", values_to = "Expressed" ) %>%
dplyr::group_by( State, Expressed ) %>%
dplyr::summarize( dplyr::across(Marker, list), .groups="drop" ) %>%
tidyr::pivot_wider( names_from = Expressed, values_from = Marker )
## Compute scores for each cellID/state pair
S <- S %>% tidyr::pivot_longer( -CellID, names_to="Marker", values_to="Value" ) %>%
dplyr::group_by( CellID ) %>%
dplyr::summarize( Expression = list(rlang::set_names(Value, Marker)), .groups="drop" ) %>%
tidyr::expand_grid( M ) %>%
dplyr::rowwise() %>%
dplyr::mutate(Contrib = list(c(Expression[Yes], 1-Expression[No])),
Prob = fscore(Contrib)) %>%
dplyr::ungroup() %>% dplyr::select( CellID, State, Prob )
## Make the final call via argmax
G <- S %>% dplyr::group_by( CellID ) %>%
dplyr::slice( which.max(Prob) ) %>%
dplyr::ungroup() %>%
dplyr::mutate( State = ifelse(Prob < pthr, "Other", State) ) %>%
dplyr::select( CellID, State )
## Reshape to cell-by-state matrix of probabilities and combine with final calls
F <- S %>% tidyr::pivot_wider( names_from = State, values_from = Prob ) %>%
dplyr::inner_join( G, ., by="CellID" ) %>%
dplyr::rename( !!rlang::sym(cid) := CellID )
F
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.