# vec<- c("A_23_P111206", "A_23_P126540", "A_23_P145965", "A_23_P146233", "A_23_P17065", "A_23_P206120", "A_23_P259071",
# "A_23_P310460", "A_23_P321307", "A_23_P326963", "A_23_P362719", "A_23_P56494", "A_23_P79398", "A_23_P85082",
# "A_23_P8640", "A_23_P86470" , "A_23_P92672", "A_23_P99442" , "A_24_P230116", "A_24_P251866", "A_24_P257224",
# "A_24_P3016" , "A_24_P63019" , "A_24_P686247", "A_24_P917668", "A_24_P96505" , "A_32_P113190", "A_32_P142521",
# "A_32_P170879", "A_32_P17743")
# file<-"~/Dropbox (Kean Lab)/AWS/Scott/Rproj/SampleData/GSE23924/GSE23924_RAW/GPL6480.soft"
# file.exists(file)
annotateProbes<-function(vec, file=NULL, filetype="csv"){
if(length(file) == 0) { break("No File input") }
if(class(vec) != "character") { break("Not correct input") }
if(filetype == "csv") { input<-read.csv(file) }
if(filetype == "soft") { input<-read.csv(file) }
input.soft<-GEOquery::getGEO(filename=file)
annt<-loadGeoFile(file)
firstpass<-annt[vec,"GENE_SYMBOL"]
output<-vec
output[firstpass!=""]<-firstpass[firstpass!=""]
#output[firstpass==""]<-annt[output[firstpass==""],"REFSEQ"]
return(output)
}
masterannotate<-function(obj1, gene.multiples=FALSE, delete.NA=TRUE, method="maxRowVariance"){
require(WGCNA)
###gene.multiples = t/f allow multiple gene annotations - will make unique symbol names
###delete.NA = delete NA annotations; if FALSE will annotate with probe names
### This function removes unannotated genes ####
### This function also calculates median expression for genes that have multiple probes ####
annotation.file<-read.csv(paste(rootpath,"/Bioinformatics Resources/Rhesus Annotation/MasterAnnotation.csv", sep=""), header= TRUE, colClasses='character')
symbol<-data.frame(Symbol=annotation.file$Symbol, row.names=annotation.file$ID, stringsAsFactors=FALSE)
#colnames(symbol)<-"Symbol"
ags<-as.data.frame(symbol[row.names(obj1),], stringsAsFactors=FALSE)
colnames(ags)<-"Symbol"
obj1.an<-as.data.frame(obj1, stringsAsFactors=FALSE)
obj1.an$Symbol<-ags$Symbol
obj2.an<-obj1.an[complete.cases(obj1.an),]
obj2.pr<-obj1.an[is.na(obj1.an$Symbol==TRUE),]
###For no selection and all data preserved###
if(delete.NA==FALSE && gene.multiples ==TRUE){
rownames(obj2.an)<-make.unique(obj2.an$Symbol, sep = "#")
obj2.an$Symbol<-NULL
obj2.pr$Symbol<-NULL
final<-rbind(obj2.an, obj2.pr)
return(final)}
###For no selection and eliminate NA###
if(delete.NA==TRUE && gene.multiples ==TRUE){
rownames(obj2.an)<-make.unique(obj2.an$Symbol, sep = "#")
obj2.an$Symbol<-NULL
return(obj2.an)}
####For selection and eliminated NA####
if(delete.NA==TRUE && gene.multiples ==FALSE){
sel.obj.an<-obj2.an
Symbol<-sel.obj.an$Symbol
sel.obj.an$Symbol<-NULL
}
if(delete.NA==FALSE && gene.multiples ==FALSE){
obj2.pr$Symbol<-rownames(obj2.pr)
sel.obj.an<-rbind(obj2.an, obj2.pr)
Symbol<-sel.obj.an$Symbol
sel.obj.an$Symbol<-NULL
}
###SELECTION FUNCTIONS###
collapse.object=collapseRows(datET=sel.obj.an, rowGroup=Symbol, rowID=rownames(sel.obj.an), method=method)
return(collapse.object$datETcollapsed)
}
annotatePerFile<-function(obj1, file=NULL, gene.multiples=FALSE, delete.NA=TRUE, method="maxRowVariance", genecode="Symbol", probecode="ID"){
require(WGCNA)
###gene.multiples = t/f allow multiple gene annotations - will make unique symbol names
###delete.NA = delete NA annotations; if FALSE will annotate with probe names
### This function removes unannotated genes ####
### This function also calculates median expression for genes that have multiple probes ####
annotation.file<-read.csv(file, header= TRUE, colClasses='character', comment.char = "#")
gene.i<-which(colnames(annotation.file) %in% genecode)
probe.i<-which(colnames(annotation.file) %in% probecode)
symbol<-data.frame(Symbol=annotation.file[,gene.i], row.names=annotation.file[,probe.i], stringsAsFactors=FALSE)
#colnames(symbol)<-"Symbol"
ags<-as.data.frame(symbol[row.names(obj1),], stringsAsFactors=FALSE)
colnames(ags)<-"Symbol"
obj1.an<-as.data.frame(obj1, stringsAsFactors=FALSE)
obj1.an$Symbol<-ags$Symbol
obj2.an<-obj1.an[complete.cases(obj1.an),]
obj2.pr<-obj1.an[is.na(obj1.an$Symbol==TRUE),]
###For no selection and all data preserved###
if(delete.NA==FALSE && gene.multiples ==TRUE){
rownames(obj2.an)<-make.unique(obj2.an$Symbol, sep = "#")
obj2.an$Symbol<-NULL
obj2.pr$Symbol<-NULL
final<-rbind(obj2.an, obj2.pr)
return(final)}
###For no selection and eliminate NA###
if(delete.NA==TRUE && gene.multiples ==TRUE){
rownames(obj2.an)<-make.unique(obj2.an$Symbol, sep = "#")
obj2.an$Symbol<-NULL
return(obj2.an)}
####For selection and eliminated NA####
if(delete.NA==TRUE && gene.multiples ==FALSE){
sel.obj.an<-obj2.an
Symbol<-sel.obj.an$Symbol
sel.obj.an$Symbol<-NULL
}
if(delete.NA==FALSE && gene.multiples ==FALSE){
obj2.pr$Symbol<-rownames(obj2.pr)
sel.obj.an<-rbind(obj2.an, obj2.pr)
Symbol<-sel.obj.an$Symbol
sel.obj.an$Symbol<-NULL
}
###SELECTION FUNCTIONS###
collapse.object=collapseRows(datET=sel.obj.an, rowGroup=Symbol, rowID=rownames(sel.obj.an), method=method)
return(collapse.object$datETcollapsed)
}
annotatePerFileNEW<-function(obj1, file=NULL, gene.multiples=FALSE, delete.NA=TRUE, method="maxRowVariance", genecode="Symbol", probecode="ID", notfound="---"){
require(WGCNA)
require(data.table)
type="matrix"
if(class(obj1)=="ExpressionSet"){type="eset"}
if(type=="eset"){
eset_pre<-obj1
obj1<-exprs(obj1)
}
###gene.multiples = t/f allow multiple gene annotations - will make unique symbol names
###delete.NA = delete NA annotations; if FALSE will annotate with probe names
### This function removes unannotated genes ####
### This function also calculates median expression for genes that have multiple probes ####
annotation.file<-fread(file, header= TRUE, colClasses='character', sep=",")
genei<-which(colnames(annotation.file) %in% genecode)
probei<-which(colnames(annotation.file) %in% probecode)
symbol<-data.table(annotation.file[,..genei], annotation.file[,..probei], keep.rownames = F)
#colnames(symbol)<-c(genecode, probecode)
ags<-symbol[match(rownames(obj1), symbol[[probecode]]),]
symbol[[genecode]][which(symbol==notfound)]<-symbol[[probecode]][which(symbol==notfound)]
#symbol[match(rownames(obj1)[3434], symbol[[probecode]]),]
#symbol[which(symbol$gene_symbol=="FOXP3"),]$probeset_id
#which(rownames(obj1)=="8172631")
#8172631
#which(symbol$probeset_id=="8172631")
obj1.an<-as.data.frame(obj1, stringsAsFactors=FALSE)
obj1.an$Symbol<-ags[[genecode]]
obj2.an<-obj1.an[complete.cases(obj1.an),]
obj2.pr<-obj1.an[is.na(obj1.an$Symbol==TRUE),]
###For no selection and all data preserved###
if(delete.NA==FALSE && gene.multiples ==TRUE){
rownames(obj2.an)<-make.unique(obj2.an$Symbol, sep = "#")
obj2.an$Symbol<-NULL
obj2.pr$Symbol<-NULL
final<-rbind(obj2.an, obj2.pr)
return(final)}
###For no selection and eliminate NA###
if(delete.NA==TRUE && gene.multiples ==TRUE){
rownames(obj2.an)<-make.unique(obj2.an$Symbol, sep = "#")
obj2.an$Symbol<-NULL
return(obj2.an)}
####For selection and eliminated NA####
if(delete.NA==TRUE && gene.multiples ==FALSE){
sel.obj.an<-obj2.an
Symbol<-sel.obj.an$Symbol
sel.obj.an$Symbol<-NULL
}
if(delete.NA==FALSE && gene.multiples ==FALSE){
obj2.pr$Symbol<-rownames(obj2.pr)
sel.obj.an<-rbind(obj2.an, obj2.pr)
Symbol<-sel.obj.an$Symbol
sel.obj.an$Symbol<-NULL
}
###SELECTION FUNCTIONS###
collapse.object=collapseRows(datET=sel.obj.an, rowGroup=Symbol, rowID=rownames(sel.obj.an), method=method)
if(type=="matrix") return(collapse.object$datETcollapsed)
if(type=="eset") {
eset_post<-ExpressionSet(assayData = collapse.object$datETcollapsed)
pd<-pData(eset_pre)
phenoData(eset_post)<-AnnotatedDataFrame(pd)
return(eset_post)
}
}
screenAnnotationFile<-function(file){
require(data.table)
annotation.file<-fread(file, header= TRUE, colClasses='character', sep=",")
return(annotation.file)
}
loadGeoFile <- function(geoFilename) {
temp <- readLines(geoFilename) # Load the file
temp <- temp[grep("\t", temp)] # Keep only lines with tabs
temp <- gsub("\t$", "\tNA", temp) # Deal with NA
temp <- strsplit(temp, "\t") # Split the strings at each tab
temp <- t(sapply(temp, unlist)) # Turn each line into a vector, transpose
colnames(temp) <- temp[1, ]
rownames(temp) <- temp[ ,1]
#Remove the row/col names from the data, and return it.
#Note that all the entries are strings/characters, not numeric!
temp[-1,-1]
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.