library(dplyr)
brca <- TCGA2STAT::getTCGA(disease = "BRCA",
data.type = "RNASeq2",
clinical = TRUE)
# Subset to patients with paired RNA sequence data and isolate clinical data
paired_data <- TCGA2STAT::TumorNormalMatch(brca$dat)
patients <- colnames(paired_data$normal)
clin_dat <- with(brca, subset(clinical, rownames(clinical) %in% patients))
# NOTES
# 1. Unnecessary vars: Composite Element REF, daystolastknownalive
# 2. Numeric vars: yearstobirth, daystodeath, daystolastfollowup,
# numberoflymphnodes
# 3. Almost everything else is a factor variable
clinical_data <- clin_dat %>%
as_tibble(rownames = "patient") %>%
select(-c(`Composite Element REF`, daystolastknownalive)) %>%
mutate_at(vars(yearstobirth,
daystodeath,
daystolastfollowup,
numberoflymphnodes),
as.numeric) %>%
mutate_at(vars(vitalstatus), factor, labels = c("Alive", "Dead")) %>%
mutate_at(vars(tumortissuesite,
pathologicstage,
pathologyTstage,
pathologyNstage,
pathologyMstage,
gender,
radiationtherapy,
histologicaltype,
race,
ethnicity,
dateofinitialpathologicdiagnosis),
factor)
# Make the merged data sets ---------------------------------------------------
dat <- brca$merged.dat %>%
tidyr::drop_na() %>%
select(-status, -OS) %>%
rename(patient = bcr)
# NOTES
# 1. Unnecessary vars: Composite Element REF, daystolastknownalive
# 2. Numeric vars: yearstobirth, daystodeath, daystolastfollowup,
# numberoflymphnodes
# 3. Almost everything else is a factor variable
all_clinical_data <- brca$clinical %>%
as_tibble(rownames = "patient") %>%
select(-c(`Composite Element REF`, daystolastknownalive)) %>%
mutate_at(vars(yearstobirth,
daystodeath,
daystolastfollowup,
numberoflymphnodes),
as.numeric) %>%
mutate_at(vars(vitalstatus), factor, labels = c("Alive", "Dead")) %>%
mutate_at(vars(tumortissuesite,
pathologicstage,
pathologyTstage,
pathologyNstage,
pathologyMstage,
gender,
radiationtherapy,
histologicaltype,
race,
ethnicity,
dateofinitialpathologicdiagnosis),
factor)
brca_full <- inner_join(x = all_clinical_data, y = dat, by = "patient")
gene_cols <- brca_full %>%
select(`A1BG`:`tAKR`)
gene_medians <- apply(gene_cols, 2, median)
gene_median_sort <- sort(gene_medians, decreasing = TRUE)
brca10 <- brca_full %>%
select(patient:ethnicity, names(head(gene_median_sort, 10)))
brca100 <- brca_full %>%
select(patient:ethnicity, names(head(gene_median_sort, 100)))
usethis::use_data(brca_full,
brca10,
brca100,
paired_data,
clinical_data,
overwrite = TRUE)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.