#not to be run-only to understand the cleaning function remove.contaminants = TRUE remove.decoys = TRUE poi = NULL spikeIn = NULL # Currently, this function will subset the data.frame more and more, thus # multiple filtering options may clash. E.g., if the data.frame is already # filtered to only contain trypsin-related entries, it will most likely not # find anything related to a Uniprot search for non-trypsin proteins. if(dim(mq.df)[1] == 0)(warning("The input to cleaning_MQ is empty.")) mq.out <- mq.df if(!remove.contaminants && !remove.decoys && is.null(poi) && is.null(spikeIn)){ warning("Note that none of the offered filtering options is set. The in-going data frame should be the same as the out-going one.") } if(remove.contaminants){ mq.out <- subset(mq.out, !grepl("CON", mq.out$Protein.IDs)) } if(remove.decoys){ mq.out <- subset(mq.out, !grepl("REV", mq.out$Protein.IDs)) } if(!is.null(poi)){ if(poi == "yeast"){ mq.out <- subset(mq.out, grepl("^[YQ]+", Protein.IDs)) }else if(poi == "human"){ # the massive regex in the middle is from TrEMBL (http://www.uniprot.org/help/accession_numbers) mq.out <- subset(mq.out, grepl("([OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2})", Protein.IDs)) }else{stop("If you would like to retrieve the proteins of interest for the organism for which the experiment was done, specify one of the available options: ‘yeast’ or ‘human’.") } } # extracting spiked-in proteins if(!is.null(spikeIn)){ if(!is.null(poi)){ stop("The option to retrieve spike-in entries is not compatible with retrieving yeast or human entries. Set `poi = NULL`.") } if(remove.contaminants){ warning("Extracting the results for spike-ins while at the same time removing contaminants will probably not yield the desired results (if anything). Recommended settings: remove.contaminants = FALSE, remove.decoys = TRUE, poi = NULL") } if(all(spikeIn == "trypsin")){ mq.out <- subset(mq.out, grepl("P00761$|P00761[^a-zA-Z0-9]", Protein.IDs) & grepl("CON", Protein.IDs)) }else{ ID.check <- check_nomenclature(spikeIn) if(!all(ID.check)){stop("The ID(s) you supplied to `spikeIn =` do(es) not meet the UniProt or yeast gene nomenclature criteria.")} reg.1 <- paste(paste(spikeIn, "$", sep = ""), collapse="|") reg.2 <- paste(paste(spikeIn, "[^a-zA-Z0-9]", sep = ""), collapse="|") reg.combi <- paste(reg.1, reg.2, sep = "|", collapse = "") mq.out <- subset(mq.out, grepl( reg.combi, Protein.IDs)) } } # done cleaning if(dim(mq.out)[1] == 0){ warning("None of the entries in the MaxQuant output survived the cleaning. Check that you selected the correct organism for the data that you uploaded.") } dim(mq.out) #code for cleaning data #code for reading: reading_MQ <- function(filename){ mq.in <- read.table(filename, header=TRUE, sep="\t", strip.white = TRUE, fill = TRUE, stringsAsFactors = FALSE, comment.char = "") # important to also capture cases with a # in the Fasta header dim(mq.in) }
path = "/Users/nickgiangreco/GitHub/DepLabData/data/" objfile<-list.files(path=path)[1] load( paste0( path, objfile) ) dim(DN_trial1) require(devtools) #DepLabData package location in local #install( "/Users/nickgiangreco/GitHub/DepLabData" ) library(DepLabData) ?DN_trial1
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.