knitr::opts_chunk$set(echo = TRUE, cache = FALSE, warning = FALSE, message = FALSE)

library(tidyverse); library(knitr); library(GEOquery);

Steps to reproduce the COVID-19 case study data

1) download demographics using the GEOQuery Rpackage

g <- getGEO("GSE147507")
names(g)
e1 <- g[["GSE147507_series_matrix.txt.gz"]]

## assay characteristics
phenoData <- pData(e1)
dim(phenoData)
rownames(phenoData) <- make.names(sapply(strsplit(as.character(phenoData$description), "_"), function(i){i[[1]]}))

## transcriptomics
mrna <- as.matrix(read.csv(here::here("inst", "extdata", "covid19", "GSE147507_RawReadCounts.tsv"), row.names = 1, sep = "\t"))
colnames(mrna) <- gsub("_", ".", colnames(mrna))

## check if both datasets are in the same order
all(rownames(phenoData) == colnames(mrna))

demo <- setNames(phenoData[, c("cell type:ch1", "time point:ch1", "treatment:ch1")], c("cell", "time", "treatment")) %>% 
  mutate(group = paste(time, treatment, sep="_"))

filter genes with zero variance and each sample must have a count of at least 5

before filtering

nrow(mrna);

after filtering

mrna <- mrna[apply(mrna, 1, sd) !=0, ]
mrna[mrna < 6] <- NA
mrna <- t(na.omit(mrna))
dim(mrna);

Save package data

## save covid19 datasets
covid19 <- list(
  demo = demo,
  mrna = mrna
)

usethis::use_data(covid19, overwrite = TRUE)

Session Info

sessionInfo()


Jasondd66/covid_19bioMarkers documentation built on Dec. 18, 2021, 12:33 a.m.