R/data.R

# Useful data ---------------------------------------------------------

#' Comman human PBMC marker genes
#'
#' @examples
#' pbmc_marker_genes
#'
#' @export
# https://github.com/hesselberthlab/scrunchy/issues/47
pbmc_marker_genes <- tibble::tribble(
  ~label, ~descrip,            ~gene_name,
  "T4",   "CD4 T cells",       "IL7R",
  "MC14", "CD14+ Monocytes",   "CD14",
  "MC14", "CD14+ Monocytes",   "LYZ",
  "B",    "B cells",           "MS4A1",
  "T8",   "CD8 T cells",       "CD8A",
  "MC3A", "FCGR3A+ Monocytes", "FCGR3A",
  "MC3A", "FCGR3A+ Monocytes", "MS4A7",
  "NK",   "NK cells",          "GNLY",
  "NK",   "NK cells",          "NKG7",
  "DC",   "Dendritic Cells",   "FCER1A",
  "DC",   "Dendritic Cells",   "CST3",
  "MK",   "Megakaryocytes",    "PPBP"
)

#' Human gene identifiers
#'
#' Used for [`calc_cell_cycle()`]
#'
#' @examples
#' human_gene_ids
#'
#' @source Generated by `data-raw/human-gene-ids.R`
"human_gene_ids"

# Data sets -----------------------------------------------------------

#' An example FunctionalSingleCellExperiment
#'
#' Contains sample data for a functional single-cell experiment.
#'
#' There are two `SingleCellExperiments`:
#'
#' 1. `rnaseq`: a `SingleCellExperiment` object containing a single-cell mRNA sequencing
#'    (10x Genomics V2 3 prime) cell/gene counts matrix in the `counts` slot.
#'    Log-normalized counts are in the `logcounts` slot.
#'
#' 2. `haircut`: a `SingleCellExperiment` object containing a Haircut experiment using DNA
#'    repair oligos. Counts are in the `counts` slot, and centered log-ratio
#'    normalized counts are in the `logcounts` slot.
#'
#' Haircut repair substrates include:
#'
#' * A "normal" DNA oligo (`Normal_`)
#' * a ribonucleotide (rG) at position 45 (`Ribo_`)
#' * an Abasic site at position 45 (`Abasic_`)
#' * a uracil:adenosine base-pair, with uracil at position 45 (`Uracil_`)
#' * a uracil:guanosine base-pair, with uracil at position 45 (`GU_`)
#'
#' Dimensionality reduction calculations are in the `rnaseq` experiment,
#' including `PCA`, `UMAP`, and `TSNE` slots. In addition, clusters determined
#' by [`cluster_kmeans()`] and [`cluster_leiden()`] are available as `colData`.
#'
#' @examples
#' fsce_small
#'
#' # Individual experiments
#' fsce_small[["rnaseq"]]
#'
#' fsce_small[["haircut"]]
#'
#' # Gene and activity names (first 5 items)
#' rownames(fsce_small[["rnaseq"]])[1:5]
#'
#' rownames(fsce_small[["haircut"]])[1:5]
#'
#' # subset with `[` using `[features, cell_ids, experiments]`
#' features <- c("Uracil_45", "TP53")
#' cell_ids <- c("TGCGGGTGTAGAGTGC", "CTACGTCCACCACGTG")
#'
#' fsce_small[features, cell_ids, ]
#'
#' # dimensionality reduction results
#' SingleCellExperiment::reducedDimNames(fsce_small[["rnaseq"]])
#'
#' # UMAP results (first 5 rows)
#' SingleCellExperiment::reducedDim(fsce_small[["rnaseq"]], "UMAP")[1:5, ]
#'
#' # k-means cluster IDs
#' SingleCellExperiment::colData(fsce_small[["rnaseq"]])
#'
#' @source Generated by `data-raw/fsce_small.R`.
"fsce_small"

#' Tidied tibble of data from fsce_small
#'
#' Used for function examples.
#'
#' Variables include:
#'
#' * `cell_id`
#' * UMAP dimensions (`UMAP1` and `UMAP2`)
#' * k-means clusters (`k_cluster`)
#' * Leiden clusters (`leiden_cluster`)
#' * Top ten most variable activities (`Uracil_45`, `riboG_44`, etc.)
#'
#' @examples
#' fsce_tidy
#'
#' @source Generated by `data-raw/fsce_tidy.R`.
"fsce_tidy"


#' Cell index map for 10x Genomics V3 chemistry
#'
#' A tibble containing two columns: source (`src`) and destionation (`dst`).
#'
#' @details It may be possible ot reduce this data by half because the index
#' pairs are stored twice in the map (ID1 -> ID2 and ID2 -> ID1). Would need to
#' know which codes are mRNA and which are feature.
#'
#' Contains a map of mRNA (i.e., oligo-dT) and feature ("CS1" and CS2")
#' barcodes for 10x GEMs. Each pair is present twice in the tibble, once
#' as mRNA -> feature and another as feature -> mRNA.
#'
#' @source 3M-february-2018.txt.gz file from cellranger 3.0.2 source
# "barcode_map_10x_v3"
hesselberthlab/scrunchy documentation built on Nov. 11, 2019, 2:29 p.m.