R/data.R

#' The original metadata from the The Cancer Genome Atlas (TCGA).
#'
#' A dataset containing standardized biological phenotype and experimental
#' condition information such as tissue type, age, and diagnosis for 11284
#' samples within the TCGA project.
#'
#' @format A data frame with 11284 rows and 68 variables.
#'
#' @source \url{https://jhubiostatistics.shinyapps.io/recount/}
#'
#' @examples
#' \dontrun{
#'  tcga_meta_original
#' }
"tcga_meta_original"

# ==============================================================================

#' Clean and standardized metadata obtained from processing of original
#' metadata tcga_meta_original.
#'
#' A dataset containing standardized biological phenotype and experimental
#' condition information such as tissue type, age, and diagnosis for 11284
#' samples within the TCGA project.
#'
#' @format A data frame with 2147 rows and 6 variables:
#' \describe{
#'   \item{cgc_case_primary_therapy_outcome_success}{Treatment outcome}
#'   \item{cgc_case_primary_site}{Tissue type}
#'   \item{cgc_sample_tissue_source_site}{Site/location where cancer sample
#'   was retrieved}
#'   \item{cgc_case_histological_diagnosis}{Precise cancer diagnosis}
#'   \item{cgc_case_gender}{Gender of subject}
#'   \item{cgc_case_age_at_diagnosis}{Age when subject was diagnosed
#'   with cancer}
#' }
#'
#' @source \url{https://jhubiostatistics.shinyapps.io/recount/}
#'
#' @examples
#' \dontrun{
#'  tcga_meta_clean
#' }
"tcga_meta_clean"

# ==============================================================================

#' An example dataset of the most clinically relevant TCGA metadata variables
#' to subset. The format should be utilized for `variable_subset` parameter in
#' standardize_metadata().
#'
#' @format A character vector of metadata variable names.
#' \describe{
#'   \item{cgc_case_primary_therapy_outcome_success}{Treatment outcome}
#'   \item{cgc_case_primary_site}{Tissue type}
#'   \item{gc_sample_tissue_source_site}{Site/location where cancer sample
#'   was retrieved}
#'   \item{cgc_case_histological_diagnosis}{Precise cancer diagnosis}
#'   \item{cgc_case_gender}{Gender of subject}
#'   \item{cgc_case_race}{Race of subject}
#'   \item{cgc_case_age_at_diagnosis}{Age when subject was diagnosed
#'   with cancer}
#' }
#'
#' @source Jedid Ahn
#'
#' @examples
#' \dontrun{
#'  tcga_variable_subset
#' }
"tcga_variable_subset"

# ==============================================================================

#' An example dataset specifying the type of each metadata variable listed in
#' `tcga_variable_subset`. There are 3 types: categorical, numeric, and ordinal.
#'
#' @format A named character vector, where the name is the variable and the
#' corresponding value is the type.
#' \describe{
#'   \item{cgc_case_primary_therapy_outcome_success}{categorical}
#'   \item{cgc_case_primary_site}{categorical}
#'   \item{gc_sample_tissue_source_site}{categorical}
#'   \item{cgc_case_histological_diagnosis}{categorical}
#'   \item{cgc_case_gender}{categorical}
#'   \item{cgc_case_race}{categorical}
#'   \item{cgc_case_age_at_diagnosis}{numeric}
#' }
#'
#' @source Jedid Ahn
#'
#' @examples
#' \dontrun{
#'  tcga_variable_type_vec
#' }
"tcga_variable_type_vec"

# [END]
ahnjedid/MetaConIdentifier documentation built on Dec. 18, 2021, 11:26 p.m.