R/cell_types.R

Defines functions parse_metacell_types parse_cell_type_colors update_cell_type_colors update_metacell_types

Documented in update_cell_type_colors update_metacell_types

#' Update cell type assignment for each metacell
#'
#' Change the cell type assignments for each metacell to the ones listed at \code{metacell_types_file}.
#'
#' This is usually done after a first iteration of annotation using the "Annotate" tab in the MCView annotation, which can export a valid \code{metacell_types_file}.
#' The file should have a column named "metacell" with the metacell ids and another
#' column named "cell_type" or "cluster" with the cell type assignment.
#'
#' Note that the exported file from the __MCView__ app contains additional fields
#' which will be ignored in this function.
#'
#' Under the hood - MCView updates a file named "metacell_types.tsv" under \code{project/cache/dataset}, which can also be edited manually.
#'
#' If the file contains an additional 'color' field, the cell type colors would be updated as well.
#'
#' @param project path to the project directory
#' @param dataset name for the dataset, e.g. "PBMC"
#' @param metacell_types_file path to a tabular file (csv,tsv) with cell type assignement for
#' each metacell. The file should have a column named "metacell" with the metacell ids and another
#' column named "cell_type" or "cluster" with the cell type assignment. Metacell ids that do
#' not exists in the data would be ignored.
#'
#' @export
#'
#' @examples
#' \dontrun{
#' update_metacell_types("PBMC", "PBMC163k", "raw/metacell-clusters.csv")
#' }
#'
#' @export
update_metacell_types <- function(project, dataset, metacell_types_file) {
    init_project_dir(project)
    verify_app_cache(project)

    prev_metacell_types <- load_shiny_data("metacell_types", dataset, project_cache_dir(project))
    prev_metacell_types <- prev_metacell_types %>%
        mutate(metacell = as.character(metacell))

    raw_df <- fread(metacell_types_file)
    metacell_types <- parse_metacell_types(raw_df)

    metacell_types <- prev_metacell_types %>%
        select(-cell_type) %>%
        left_join(metacell_types %>% select(metacell, cell_type), by = "metacell")

    serialize_shiny_data(metacell_types, "metacell_types", dataset = dataset, cache_dir = project_cache_dir(project), flat = TRUE)

    cli_alert_success("Succesfully changed metacell cell type assignments")

    if (has_name(raw_df, "color")) {
        cli_alert_info("File has a field named 'color', updating also cell type colors.")
        update_cell_type_colors(project, dataset, raw_df)
    }
}


#' Update color assignment for each cell type
#'
#' Change the color assignments for each cell type to the ones listed at \code{cell_type_colors_file}.
#'
#' This is usually done after a first iteration of annotation using the "Annotate" tab in the MCView annotation, which can
#' export a valid \code{cell_type_colors_file}.
#'
#' The file should have a column named "cell_type" or "cluster" with the cell types and another column named "color" with the color assignment.
#' Note that the exported file from the __MCView__ app contains additional fields which will be
#' ignored in this function.
#'
#' Under the hood - MCView updates a file named "cell_type_colors.tsv" under \code{project/cache/dataset}, which can also be edited manually.
#'
#' @param project path to the project directory
#' @param dataset name for the dataset, e.g. "PBMC"
#' @param cell_type_colors_file path to a tabular file (csv,tsv) with color assignement for
#' each cell type. The file should have a column named "cell_type" or "cluster" with the
#' cell types and another column named "color" with the color assignment. Cell types that do not
#' exist in the metacell types would be ignored, so if you changed the names of cell types you would have to also
#' update the metacell types (using \code{update_metacell_types}).
#' The function also accepts output of the 'export' button from the application annotation page.
#' If this parameter is missing, MCView would use the \code{chameleon} package to assign a color for each cell type.
#'
#'
#' @examples
#' \dontrun{
#' update_metacell_types("PBMC", "PBMC163k", "raw/cluster-colors.csv")
#' }
#'
#' @export
update_cell_type_colors <- function(project, dataset, cell_type_colors_file) {
    init_project_dir(project)
    verify_app_cache(project)

    cell_type_colors <- parse_cell_type_colors(cell_type_colors_file)

    serialize_shiny_data(cell_type_colors, "cell_type_colors", dataset = dataset, cache_dir = project_cache_dir(project), flat = TRUE)

    cli_alert_success("Succesfully changed cell type color assignments")
}

parse_cell_type_colors <- function(cell_type_colors) {
    if (is.character(cell_type_colors)) {
        file <- cell_type_colors
        cell_type_colors <- fread(cell_type_colors) %>% as_tibble()
    } else {
        file <- "cell_type_colors"
    }


    if (!has_name(cell_type_colors, "cell_type") && !has_name(cell_type_colors, "cluster")) {
        cli_abort("{.field {file}} should have a column named {.field cell_type} or {.field cluster}")
    }

    if (!has_name(cell_type_colors, "color")) {
        cli_abort("{.field {file}} should have a column named {.field color}")
    }

    if (rlang::has_name(cell_type_colors, "cluster")) {
        cell_type_colors <- cell_type_colors %>% rename(cell_type = cluster)
    }

    cell_type_colors <- cell_type_colors %>%
        filter(!is.na(cell_type), !is.na(color)) %>%
        filter(cell_type != "(Missing)") %>%
        distinct(cell_type, .keep_all = TRUE)

    if (!has_name(cell_type_colors, "order")) {
        cell_type_colors <- cell_type_colors %>% mutate(order = 1:n())
    }

    cell_type_colors <- cell_type_colors %>%
        distinct(cell_type, color, order) %>%
        select(cell_type, color, order)

    n_colors <- cell_type_colors %>%
        count(cell_type) %>%
        pull(n)

    if (any(n_colors > 1)) {
        cli_abort("Some cell types appear more than once with different colors.")
    }

    return(cell_type_colors)
}


parse_metacell_types <- function(metacell_types, metacells = NULL) {
    if (is.character(metacell_types)) {
        metacell_types <- fread(metacell_types) %>% as_tibble()
    }

    if (!has_name(metacell_types, "metacell")) {
        cli_abort("{.field {file}} should have a column named {.field metacell}")
    }

    if (!has_name(metacell_types, "cell_type") && !has_name(metacell_types, "cluster")) {
        cli_abort("{.field {file}} should have a column named {.field cell_type} or {.field cluster}")
    }

    if (rlang::has_name(metacell_types, "cluster")) {
        metacell_types <- metacell_types %>% rename(cell_type = cluster)
    }

    metacell_types <- metacell_types %>%
        select(any_of(c("metacell", "cell_type", "age", "mc_age")))


    if ("age" %in% colnames(metacell_types)) {
        metacell_types <- metacell_types %>%
            rename(mc_age = age)
    }

    metacell_types <- metacell_types %>%
        mutate(metacell = as.character(metacell))

    if (!is.null(metacells)) {
        unknown_metacells <- metacell_types$metacell[!(metacell_types$metacell %in% metacells)]
        if (length(unknown_metacells) > 0) {
            mcs <- paste(unknown_metacells, collapse = ", ")
            cli_abort("Metacell types contains metacells that are missing from the data: {.field {mcs}}")
        }

        missing_metacells <- metacells[!(metacells %in% metacell_types$metacell)]
        if (length(missing_metacells) > 0) {
            mcs <- paste(missing_metacells, collapse = ", ")
            cli_warn("Some metacells are missing from metacell types: {.field {mcs}}")
        }
    }

    return(metacell_types)
}
tanaylab/MCView documentation built on June 1, 2025, 8:08 p.m.