Outputting other organism identifiers (id mapping tab files downloaded from uniprot ftp site).
library(data.table) path = "/Users/nickgiangreco/GitHub/Proteomic_Correlation_Shiny/DepLab/inst/extdata/ID_lists/" #MOUSE tab<-fread("~/Downloads/MOUSE_10090_idmapping.dat",sep="\t",header=F) colnames(tab)=c("uni","id","name") subtab<-tab[id == "Gene_Name"] subsubtab<-subtab[,c(1,3)] write.table(subsubtab,paste0(path,"mouse.id.txt"),quote=F,row.names = F,col.names = F,sep="\t") #RAT tab<-fread("~/Downloads/RAT_10116_idmapping.dat",sep="\t",header=F) colnames(tab)=c("uni","id","name") subtab<-tab[id == "Gene_Name"] subsubtab<-subtab[,c(1,3)] write.table(subsubtab,paste0(path,"rat.id.txt"),quote=F,row.names = F,col.names = F,sep="\t") #WORM tab<-fread("~/Downloads/CAEEL_6239_idmapping.dat",sep="\t",header=F) colnames(tab)=c("uni","id","name") subtab<-tab[id == "Gene_Name"] subsubtab<-subtab[,c(1,3)] write.table(subsubtab,paste0(path,"worm.id.txt"),quote=F,row.names = F,col.names = F,sep="\t")
Viewing id first character distribution in tabular format.
path = "/Users/nickgiangreco/GitHub/Proteomic_Correlation_Shiny/DepLab/inst/extdata/ID_lists/" h<-read.table(paste0(path,"human.id.txt"),sep="\t") fchar<-substr(h[,1],start=1,stop=1) table(fchar) y<-read.table(paste0(path,"yeast.id.txt"),sep="\t") fchar<-substr(y[,1],start=1,stop=1) table(fchar) m<-read.table(paste0(path,"mouse.id.txt"),sep="\t") fchar<-substr(m[,1],start=1,stop=1) table(fchar) r<-read.table(paste0(path,"rat.id.txt"),sep="\t") fchar<-substr(r[,1],start=1,stop=1) table(fchar) w<-read.table(paste0(path,"worm.id.txt"),sep="\t") fchar<-substr(w[,1],start=1,stop=1) table(fchar)
Implementation of other organisms in clean function.
cleaning_MQ <- function(mq.df, remove.contaminants = TRUE, remove.decoys = TRUE, poi = NULL, spikeIn = NULL){ # Currently, this function will subset the data.frame more and more, thus # multiple filtering options may clash. E.g., if the data.frame is already # filtered to only contain trypsin-related entries, it will most likely not # find anything related to a Uniprot search for non-trypsin proteins. if(dim(mq.df)[1] == 0)(warning("The input to cleaning_MQ is empty.")) mq.out <- mq.df if(!remove.contaminants && !remove.decoys && is.null(poi) && is.null(spikeIn)){ warning("Note that none of the offered filtering options is set. The in-going data frame should be the same as the out-going one.") } if(remove.contaminants){ mq.out <- subset(mq.out, !grepl("CON", mq.out$Protein.IDs)) } if(remove.decoys){ mq.out <- subset(mq.out, !grepl("REV", mq.out$Protein.IDs)) } if(!is.null(poi)){ if else(poi == "yeast"){ mq.out <- subset(mq.out, grepl("^[YQ]+", Protein.IDs)) }else{ # the massive regex in the middle is from TrEMBL (http://www.uniprot.org/help/accession_numbers) mq.out <- subset(mq.out, grepl("([OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2})", Protein.IDs)) } } # extracting spiked-in proteins if(!is.null(spikeIn)){ if(!is.null(poi)){ stop("The option to retrieve spike-in entries is not compatible with retrieving yeast or human entries. Set `poi = NULL`.") } if(remove.contaminants){ warning("Extracting the results for spike-ins while at the same time removing contaminants will probably not yield the desired results (if anything). Recommended settings: remove.contaminants = FALSE, remove.decoys = TRUE, poi = NULL") } if(all(spikeIn == "trypsin")){ mq.out <- subset(mq.out, grepl("P00761$|P00761[^a-zA-Z0-9]", Protein.IDs) & grepl("CON", Protein.IDs)) }else{ ID.check <- check_nomenclature(spikeIn) if(!all(ID.check)){stop("The ID(s) you supplied to `spikeIn =` do(es) not meet the UniProt or yeast gene nomenclature criteria.")} reg.1 <- paste(paste(spikeIn, "$", sep = ""), collapse="|") reg.2 <- paste(paste(spikeIn, "[^a-zA-Z0-9]", sep = ""), collapse="|") reg.combi <- paste(reg.1, reg.2, sep = "|", collapse = "") mq.out <- subset(mq.out, grepl( reg.combi, Protein.IDs)) } } # done cleaning if(dim(mq.out)[1] == 0){ warning("None of the entries in the MaxQuant output survived the cleaning. Check that you selected the correct organism for the data that you uploaded.") } return(mq.out) } mq.h <- reading_MQ(system.file("extdata","test_data", "proteinGroups_human.txt", package = "DepLab")) mq.h.clean <- cleaning_MQ(mq.h, remove.contaminants = TRUE, remove.decoys = TRUE, poi = "human")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.