R/main_class.R

Defines functions name_signatures drop_na_variants subset_bagel_by_annotation subset_bagel_by_counts get_variants get_sample_names get_sample_annotations add_sample_annotations init_sample_annotations set_sample_annotations subset_variants_by_samples

Documented in add_sample_annotations get_sample_annotations get_sample_names get_variants init_sample_annotations name_signatures set_sample_annotations subset_bagel_by_annotation subset_bagel_by_counts subset_variants_by_samples

# Count Tables object/methods -------------------------------

#' Object containing the count table matrices, their names and descriptions
#' that we generated by provided and by user functions. These are used to
#' discover and infer signatures and exposures.
#'
#' @slot name A name that describes the type of table (e.g. "SBS96")
#' @slot count_table An array of counts with samples as the columns and motifs
#' as the rows
#' @slot annotation A data.frame of annotations with three columns used for
#' plotting: motif, mutation, and context
#' @slot features Original features used to generate the count_table
#' @slot type The mutation type of each feature, in case we need to plot or
#' model they differently
#' @slot color_variable The variable used for plotting colors, selected from
#' the annotation slot
#' @slot color_mapping The mapping of the annotations chosen by color_variable
#' to color values for plotting
#' @slot description A summary table of the result objects in result_list
#' a list of lists. The nested lists created combined (rbind) tables, and the
#' tables at the first list level are modelled independantly. Combined tables
#' must be named.
#' list("tableA", comboTable = list("tableC", "tableD"))
#' @importFrom S4Vectors Rle
#' @export
setClass("count_table", slots = c(name = "character",
                                  count_table = "array",
                                  annotation = "data.frame",
                                  features = "data.frame",
                                  type = "Rle",
                                  color_variable = "character",
                                  color_mapping = "character",
                                  description = "character"))

#setMethod("show", "Count_Tables",
#          function(object)cat("Count_Tables Object containing: ",
#                              "\n**Count Tables: \n",
#                              apply(cbind(do.call("rbind", lapply(
#                                object@table_list, dim)), "\n"), 1, paste),
#                              "\n**Names: \n",
#                              paste(unlist(object@table_name), "\n", sep = ""),
#                              "\n**Descriptions: \n",
#                              paste(unlist(object@description), "\n", sep = ""))
#)

# Primary bagel object/methods -------------------------------

#' The primary object for BAGEL that contains all variants, samples annotations
#' and tables
#'
#' @slot variants \code{data.table} of variants
#' @slot count_tables Summary table with per-sample unnormalized motif counts
#' @slot sample_annotations Sample-level annotations (e.g. age, sex, primary)
#' @export
#' @import data.table BSgenome
setClass("bagel", slots = c(variants = "data.table",
                                 count_tables = "list",
                                 sample_annotations = "data.table"),
         prototype = list(variants = data.table::data.table(),
                   count_tables = list(),
                   sample_annotations = data.table::data.table()))

# setMethod("show", "bagel",
#           function(object)cat(cat("BAGEL Object containing \n**Variants: \n"),
#                               if (!all(is.na(object@variants))) {
#                                 cat(methods::show(object@variants))
#                                 }else{
#                                   cat("Empty")
#                                     },
#                               cat("\n**Count_Tables Object containing: \n"),
#                               if (length(object@count_tables@table_name) > 0) {
#                                 cat("\n**Count Tables: \n",
#                                     apply(cbind(do.call("rbind", lapply(
#                                       object@count_tables@table_list, dim)),
#                                       "\n"), 1, paste),
#                                     "\n**Names: \n", paste(
#                                         unlist(object@count_tables@table_name),
#                                         "\n", sep = ""), "\n**Descriptions: \n",
#                                     paste(unlist(
#                                       object@count_tables@description), "\n",
#                                       sep = ""))
#                                 }else{
#                                   cat("Empty")
#                                   },
#                               cat("\n**Sample Level Annotations: \n"),
#                               if (!all(is.na(object@sample_annotations))) {
#                                 cat(methods::show(object@sample_annotations))
#                               }else{
#                                 cat("Empty")
#                               })
# )

# Variant-Level object/methods -------------------------------

#' Return sample from bagel object
#'
#' @param bay Bagel object containing samples
#' @param sample_name Sample name to subset by
#' @return Returns sample dataframe subset to a single sample
#' @examples
#' bay <- readRDS(system.file("testdata", "bagel.rds", package = "BAGEL"))
#' subset_variants_by_samples(bay, "public_LUAD_TCGA-97-7938.vcf")
#' @export
subset_variants_by_samples <- function(bay, sample_name) {
  return(bay@variants[which(bay@variants$sample == sample_name),
                      ])
}

# Sample-Level object/methods -------------------------------

#' Set sample level annotations for bagel object
#'
#' @param bay Bagel object we input sample into
#' @param annotations Sample DataFrame
#' @return Sets sample_annotations slot {no return}
#' @examples
#' bay <- readRDS(system.file("testdata", "bagel_sbs96.rds", package = "BAGEL"))
#' sample_annotations <- read.table(system.file("testdata",
#' "sample_annotations.txt", package = "BAGEL"), sep = "\t", header=TRUE)
#' set_sample_annotations(bay, data.table::as.data.table(sample_annotations))
#' @export
set_sample_annotations <- function(bay, annotations) {
  eval.parent(substitute(bay@sample_annotations <- annotations))
}

#' Initialize sample annotation data.table with sample names from variants
#'
#' @param bay Bagel object we input sample into
#' @return Sets sample_annotations slot {no return}
#' @examples
#' bay <- readRDS(system.file("testdata", "bagel_sbs96.rds", package = "BAGEL"))
#' init_sample_annotations(bay)
#' bay
#' @export
init_sample_annotations <- function(bay) {
  #samples <- unique(tools::file_path_sans_ext(
  #  bay@variants$Tumor_Sample_Barcode))
  samples <- unique(bay@variants$sample)
  sample_dt <- data.table::data.table(Samples = samples)
  eval.parent(substitute(bay@sample_annotations <- sample_dt))
}

#' Adds sample annotation to bagel object with available samples
#'
#' @param bay Bagel object we input sample into
#' @param annotations table of sample-level annotations to add
#' @param sample_column name of sample name column
#' @param columns_to_add which annotation columns to add, defaults to all
#' @return Sets sample_annotations slot {no return}
#' @examples
#' bay <- readRDS(system.file("testdata", "bagel_sbs96.rds", package = "BAGEL"))
#' init_sample_annotations(bay)
#' sample_annotations <- read.table(system.file("testdata",
#' "sample_annotations.txt", package = "BAGEL"), sep = "\t", header=TRUE)
#' add_sample_annotations(bay = bay, annotations = sample_annotations,
#' sample_column = "Sample_Names", columns_to_add = "Tumor_Subtypes")
#' bay
#' @export
add_sample_annotations <- function(bay, annotations, sample_column =
                                     "Sample_ID", columns_to_add =
                                     colnames(annotations)) {
  bay_annotations <- get_sample_annotations(bay)
  if (all(is.na(bay_annotations))) {
    stop(strwrap(prefix = " ", initial = "", "Please run init_sample_annotations
                 on this bagel before adding sample annotations."))
  }
  if (!sample_column %in% colnames(annotations)) {
    stop(strwrap(prefix = " ", initial = "", "User-defined sample_column is
                 not in input annotations, please check and rerun."))
  }
  if (!all(bay_annotations$Samples %in%
          annotations[, sample_column])) {
    stop(strwrap(prefix = " ", initial = "", "Some samples are missing
                 annotations, please check input annotations and rerun."))
  }
  if (!all(columns_to_add %in% colnames(annotations))) {
    stop(strwrap(prefix = " ", initial = "", paste("Some user-defined
                                                   columns_to_add are not in
                                                   the input annotations, (",
                 toString(columns_to_add[which(!columns_to_add %in%
                                        colnames(annotations))]),
                 ") please check and rerun.", sep = "")))
  }
  matches <- match(bay_annotations$Samples,
                  annotations[, sample_column])
  bay_annotations <- cbind(bay_annotations, annotations[matches, columns_to_add,
                                                       drop = FALSE])
  eval.parent(substitute(bay@sample_annotations <- bay_annotations))
}

#' Return sample annotation from bagel object
#'
#' @param bay Bagel object we input sample into
#' @return Sets sample_annotations slot {no return}
#' @examples
#' bay <- readRDS(system.file("testdata", "bagel.rds", package = "BAGEL"))
#' init_sample_annotations(bay)
#' get_sample_annotations(bay)
#' @export
get_sample_annotations <- function(bay) {
  return(bay@sample_annotations)
}

#' Return samples names for bagel object
#'
#' @param bay Bagel object containing samples
#' @return Returns names of samples in bagel object
#' @examples
#' bay <- readRDS(system.file("testdata", "bagel.rds", package = "BAGEL"))
#' get_sample_names(bay)
#' @export
get_sample_names <- function(bay) {
  return(unique(bay@variants$sample))
}

#' Return variants for bagel object
#'
#' @param bay Bagel object containing variants
#' @return Returnsvariants in bagel object
#' @examples
#' bay <- readRDS(system.file("testdata", "bagel.rds", package = "BAGEL"))
#' get_variants(bay)
#' @export
get_variants <- function(bay) {
  return(bay@variants)
}

#' Creates a new bagel subsetted to only samples with enough variants
#'
#' @param bay Input bagel
#' @param table_name Name of table used for subsetting
#' @param num_counts Minimum sum count value to drop samples
#' @return Returns a new bagel object with sample annotations, count tables,
#' and variants subsetted to only contains samples with the specified minimum
#' number of counts (column sums) in the specified table
#' @examples
#' bay <- readRDS(system.file("testdata", "bagel_sbs96.rds", package = "BAGEL"))
#' subset_bagel_by_counts(bay, "SBS96", 20)
#' @export
subset_bagel_by_counts <- function(bay, table_name, num_counts) {
  tab <- .extract_count_table(bay, table_name)
  min_samples <- colnames(tab)[which(colSums(tab) >= num_counts)]

  bay@count_tables <- subset_count_tables(bay, min_samples)

  #Subset variants
  bay@variants <- bay@variants[which(bay@variants$Tumor_Sample_Barcode %in%
                                      min_samples), ]

  #Subset sample annotations
  if (nrow(bay@sample_annotations) != 0) {
    bay@sample_annotations <- bay@sample_annotations[which(
      bay@sample_annotations$Samples %in% min_samples), ]
  }
  return(bay)
}

#' Creates a new bagel subsetted to only one value of a sample annotation
#'
#' @param bay Input bagel
#' @param annot_col Annotation class to use for subsetting
#' @param annot_names Annotational value to subset to
#' @return Returns a new bagel object with sample annotations, count tables,
#' and variants subsetted to only contains samples of the specified annotation
#' type
#' @examples
#' bay <- readRDS(system.file("testdata", "bagel_sbs96.rds", package = "BAGEL"))
#' sample_annotations <- read.table(system.file("testdata",
#' "sample_annotations.txt", package = "BAGEL"), sep = "\t", header=TRUE)
#' init_sample_annotations(bay)
#' add_sample_annotations(bay, sample_annotations, sample_column =
#' "Sample_Names", columns_to_add = "Tumor_Subtypes")
#' subset_bagel_by_annotation(bay, "Tumor_Subtypes", "Lung")
#' @export
subset_bagel_by_annotation <- function(bay, annot_col, annot_names) {
  if (!all(annot_col %in% colnames(bay@sample_annotations))) {
    stop(paste(annot_col, " not found in annotation columns, please review.",
               sep = ""))
  }
  annotation_index <- which(bay@sample_annotations[[which(colnames(
    bay@sample_annotations) %in% annot_col)]] %in% annot_names)
  if (length(annotation_index) == 0) {
    stop(paste(annot_names, " not present in ", annot_col,
               " column, please review.", sep = "", collapse = TRUE))
  }
  bay@sample_annotations <- bay@sample_annotations[annotation_index, ]
  annotation_samples <- bay@sample_annotations$"Samples"
  bay@count_tables <- subset_count_tables(bay, annotation_samples)
  bay@variants <- bay@variants[which(bay@variants$Tumor_Sample_Barcode %in%
                                      annotation_samples), ]
  return(bay)
}

drop_na_variants <- function(variants, annot_col) {
  if (!annot_col %in% colnames(variants)) {
    stop(paste(annot_col, " not found in annotation columns, please review.",
               sep = ""))
  }
  if (length(which(variants[[annot_col]] == "NA")) == 0) {
    return(variants)
  } else {
    return(variants[-which(variants[[annot_col]] == "NA"), ])
  }
}

# Result object/methods -------------------------------

#' Object containing deconvolved/predicted signatures, sample weights, and
#' the bagel object the result was generated from
#'
#' @slot signatures A matrix of signatures by mutational motifs
#' @slot exposures A matrix of samples by signature weights
#' @slot tables A character vector of table names used to make the result
#' @slot type Describes how the signatures/weights were generated
#' @slot bagel The bagel object the results were generated from
#' @slot log_lik Posterior likelihood of the result (LDA only)
#' @slot perplexity Metric of goodness of model fit
#' @slot umap List of umap data.frames for plotting and analysis
#' @export
setClass("Result", representation(signatures = "matrix", exposures = "matrix",
                                  tables = "character",
                                  type = "character", bagel = "bagel",
                                  log_lik = "numeric", perplexity = "numeric",
                                  umap = "list"))

#' Return sample from bagel object
#'
#' @param result Result object containing signatures and weights
#' @param name_vector Vector of user-defined signature names
#' @return Result object with user-defined signatures names
#' @examples
#' result <- readRDS(system.file("testdata", "res.rds", package = "BAGEL"))
#' name_signatures(result, c("smoking", "apobec", "unknown"))
#' @export
name_signatures <- function(result, name_vector) {
  num_sigs <- length(colnames(result@signatures))
  if (length(name_vector) != num_sigs) {
    stop(paste("Please provide a full list of signatures names (length = ",
               num_sigs, ")", sep = ""))
  }
  eval.parent(substitute(colnames(result@signatures) <- name_vector))
  eval.parent(substitute(rownames(result@exposures) <- name_vector))
}

# Result Grid object/methods -------------------------------

#' Object containing the result objects generated from the combination of
#' annotations and a range of k values
#'
#' @slot grid_params The parameters the result grid was created using
#' @slot result_list A list of result objects with different parameters
#' @slot grid_table A summary table of the result objects in result_list
#' @export
setClass("Result_Grid", representation(grid_params = "data.table",
                                       result_list = "list",
                                       grid_table = "data.table"))
campbio/BAGEL documentation built on Oct. 6, 2020, 3:59 a.m.