##Script to fetch HGNC symbols from GEO for all GPL in column 1 of
##platforms.csv.
library(GEOquery)
con <- bzfile("platform.csv.bz2")
all.platforms <- read.csv(con, as.is=TRUE)
library(org.Hs.eg.db)
hgnc.reference <- as.character(org.Hs.egSYMBOL)
names(hgnc.reference) <- NULL
library(HGNChelper)
raw.platform.dir <- "platforms"
dir.create(raw.platform.dir)
hgnc.vec.dir <- "hgnc.vecs"
dir.create(hgnc.vec.dir)
if(file.exists("gpls_already_tested.csv.bz2")){
con2 <- bzfile("gpls_already_tested.csv.bz2")
gpls.already.tested <- read.csv(con2, header=TRUE)
}else{
write.table(t(c("platform", "colname", "frac.hgnc", "nrow", "valid.frac", "valid.after.hgnchelper.frac", "distribution", "submission_date")), file="gpls_already_tested.csv", row.names=FALSE, col.names=FALSE, sep=",")
}
for (i in 1:nrow(all.platforms)){
gpl <- all.platforms[i, 1]
hgnc.vec.file <- paste(hgnc.vec.dir, "/", gpl, "_hgnc.vec.RData", sep="")
if(exists("gpls.already.tested") && gpl %in% gpls.already.tested$platform) next
gpldat <- try(getGEO(gpl, destdir=raw.platform.dir))
if(class(gpldat) == "try-error") next
gpltable <- try(Table(gpldat))
if(class(gpltable) == "try-error") next
hgnc.frac <- apply(gpltable, 2, function(x) sum(unique(x) %in% hgnc.reference) / length(unique(x)))
if(any(hgnc.frac > 0)){
hgnc.vec <- unique(as.character(gpltable[, which.max(hgnc.frac)]))
hgnc.vec <- gsub("[ ].+", "", hgnc.vec) ##get rid of anything after a space
HGNChelper.output <- checkGeneSymbols(iconv(hgnc.vec, "latin1", "ASCII", "")) #convert to ascii
valid.frac <- sum(HGNChelper.output$Approved) / length(hgnc.vec)
after.HGNChelper.valid.frac <- sum(!is.na(HGNChelper.output$Suggested.Symbol)) / length(hgnc.vec)
hgnc.vec <- c(gpldat@header$distribution, gpldat@header$submission_date, hgnc.vec)
save(hgnc.vec, file=hgnc.vec.file, compress="bzip2")
info.for.file <- t(c(gpl, colnames(gpltable)[which.max(hgnc.frac)], max(hgnc.frac), nrow(gpltable), valid.frac, after.HGNChelper.valid.frac, gpldat@header$distribution, gpldat@header$submission_date))
print(paste(i, gpl, length(hgnc.vec), "HGNC symbols found and saved."))
}else{
info.for.file <- t(c(gpl, colnames(gpltable)[which.max(hgnc.frac)], max(hgnc.frac), nrow(gpltable), NA, NA, gpldat@header$distribution, gpldat@header$submission_date))
print(paste(i, gpl, ": No HGNC symbols."))
}
write.table(info.for.file, file="gpls_already_tested.csv", append=TRUE, row.names=FALSE, col.names=FALSE, sep=",")
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.