knitr::opts_chunk$set(echo = TRUE, cache = FALSE, warning = FALSE, message = FALSE)

library("tidyverse") # install.packages("tidyverse")
# if (!requireNamespace("BiocManager", quietly = TRUE))
#     install.packages("BiocManager")
# BiocManager::install("annotate")
library("annotate") 
library("hugene10sttranscriptcluster.db") # BiocManager::install("hugene10sttranscriptcluster.db")

Steps to reproduce asthma datasets

load(here::here("inst", "extdata", "dataCleaning", "asthma", "data", "asthmaDatasets.RDATA"))

## summary affy probe sets to gene symbols (average probes with the same gene symbol)
eset0 <- genDat[, -ncol(genDat)] %>% 
  mutate(genSym = getSYMBOL(rownames(.), "hugene10sttranscriptcluster")) %>% 
  filter(!is.na(genSym)) %>% group_by(genSym) %>% 
  summarise_all(funs(mean))
eset <- as.matrix(eset0[,-1])
rownames(eset) <- as.character(eset0$genSym)

## add subject to demo
demo$Subj <- sapply(strsplit(rownames(demo), "\\."), function(i){
  paste0(i[1], i[2], collapse = "")
})
## change time and response to factor
demo$Time <- factor(demo$Time, c("pre", "post"))
demo$Response <- factor(demo$Response, c("ER", "DR"))

asthma <- list(demo = demo,
               cc = cells,
               mrna = t(eset),
               met = t(metExp),
               metAnn = metabolites,
               mrnaMod = gene.module,
               metMod = metabolite.module)

Data summary

study design

with(asthma$demo, table(Time, Response))

number of variables of each data-type

dataTypes <- c("cc", "mrna", "met", "mrnaMod", "metMod")
data.frame(dataTypes = dataTypes,
  size = sapply(asthma, ncol)[dataTypes]) %>% 
  ggplot(aes(x = reorder(dataTypes, -size), y = size)) +
  geom_bar(stat = "identity") +
  scale_y_log10() +
  ylab("Number of variables") +
  xlab("dataset") +
  theme_classic()

Save package data

usethis::use_data(asthma, overwrite = TRUE)


singha53/omicsCentralDatasets documentation built on March 7, 2020, 2:02 a.m.