library(XML)
library(ogbox)
library(affy)
library(dplyr)
library(magrittr)
library(XLConnect)
# download stanley C cohort data
downloadData = FALSE
downloadGemma = TRUE
dir.create('data-raw/StanleyC/cels',recursive=TRUE, showWarnings = FALSE)
if(downloadData){
# autogenerated password that was sent to me in plaintext form. They obviously dont care enough to be actually secure
# so why should I. Replace with your
system('wget --user OganM --password rMa3JbF4 https://www.stanleygenomics.org/stanley/standard/study_zips/study2_raw.zip -O data-raw/StanleyC/stanleyC.zip')
system('wget --user OganM --password rMa3JbF4 https://www.stanleygenomics.org/stanley/standard/studyPatients.jsp?study_id=2 -O data-raw/StanleyC/metadata.html')
system('unzip data-raw/StanleyC/stanleyC.zip -d data-raw/StanleyC/cels')
}
if(downloadGemma){
ogbox::getGemmaAnnot('GPL570','data-raw/GemmaAnnots/GPL96',annotType='noParents',overwrite=TRUE)
}
StanleyCMeta = readHTMLTable('data-raw/StanleyC/metadata.html')[[2]]
system('wget --user OganM --password rMa3JbF4 https://www.stanleygenomics.org/stanley/standard/studyQC.jsp?study_id=2 -O data-raw/StanleyC/qc.html')
StanleyCQC = readHTMLTable('data-raw/StanleyC/qc.html')[[1]]
StanleyCQC %<>% filter(Outlier != 1)
StanleyCMeta = StanleyCMeta[StanleyCMeta$Filename %in% StanleyCQC$Filename,]
cels= celFiles('data-raw/StanleyC/cels/',full.names=TRUE)
cels = cels[grepl(regexMerge(StanleyCMeta$Filename),cels)]
cels = cels[match(StanleyCMeta$Filename, basename(gsub('\\s.*$','',cels)))]
batches = cels%>% sapply(celfileDate)
cels = cels[!batches %in% names(which(table(batches)==1))]
StanleyCMeta = StanleyCMeta[!batches %in% names(which(table(batches)==1)),]
batches = batches[!batches %in% names(which(table(batches)==1))]
affy = ReadAffy(filenames = cels)
affy = affy::rma(affy)
affy = gemmaAnnot(affy, 'data-raw/GemmaAnnots/GPL96')
# remove samples of wrong sex
genders = bioGender(affy)
cels = cels[genders == StanleyCMeta$Sex]
batches = batches[genders== StanleyCMeta$Sex]
StanleyCMeta = StanleyCMeta[genders == StanleyCMeta$Sex,]
# batch correction is not possible due to covariate=batch confound
# list[gene,exp] = affy %>% sepExpr()
data.frame(batches,StanleyCMeta$Profile) %>% table
# exp = ComBat(exp,batch =batches,mod = model.matrix(~Profile,StanleyCMeta))
# affy = cbind(gene,exp)
list[gene,exp] = sepExpr(affy)
affy %<>% mostVariable(threshold= exp %>% unlist %>% median,
threshFun=median)
list[gene,exp] = sepExpr(affy)
colnames(exp) = gsub('\\s.*$','',cn(exp))
rownames(exp) = gene$Gene.Symbol
StanleyCExp = exp
StanleyCExp = StanleyCExp[match(StanleyCMeta$Filename, cn(StanleyCExp))]
patients = StanleyCMeta$`Stanley ID` %>% unique
devtools::use_data(StanleyCExp,overwrite=TRUE)
devtools::use_data(StanleyCMeta,overwrite=TRUE)
#'KDM5D' %in% rn(StanleyCExp)
#StanleyCExp['KDM5D',] %>% unlist %>% plot(col= StanleyCMeta$Sex %>% toColor %$% cols)
# Uranova analysis data-------------------
download.file('http://sncid.stanleyresearch.org/RawData/1999_03%20URANOVA.zip',destfile = 'data-raw/StanleyC/UranovaRawData.zip')
unzip('data-raw/StanleyC/UranovaRawData.zip',exdir = 'data-raw/StanleyC/')
rawUranova = loadWorkbook('data-raw/StanleyC/1999_03 URANOVA.xls')
rawUranova = readWorksheet(rawUranova, sheet = 'data', header = FALSE)
rawUranova = rawUranova[-(1:5),]
rawUranova %<>% filter(Col1 %in% c("Code", "Area 9", "Layer VI"))
rawUranova$Col1 <- as.character(rawUranova$Col1)
rawUranova$Col1[seq(4, 40, 4)] <- "WM"
rawUranova$Col1[seq(3, 39, 4)] <- "GMse"
rawUranova$Col1[seq(2, 38, 4)] <- "GM"
rawUranova <- data.frame(StanleyID = rawUranova %>% filter(Col1 == "Code") %>%
select(-Col1, -Col2) %>% as.list() %>%
unlist %>%
as.character(),
GM = rawUranova %>% filter(Col1 == "GM") %>%
select(-Col1, -Col2) %>%
as.list() %>%
unlist %>%
as.character %>%
as.numeric(),
GMse = rawUranova %>% filter(Col1 == "GMse") %>%
select(-Col1, -Col2) %>%
as.list() %>%
unlist %>%
as.character %>%
as.numeric(),
WM = rawUranova %>% filter(Col1 == "WM") %>% select(-Col1, -Col2) %>% as.list() %>% unlist %>% as.character %>% as.numeric())
use_data(rawUranova,overwrite = TRUE)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.