Nothing
#############################################################
# Peptide rollup functions (reference based) used in DAnTE #
# #
# By Ashoka D. Polpitiya #
#############################################################
RRollup.dialog <- list(
label = "\nCrosstab Rollup. Select a Data Crosstab with associated Row Metadata and the field to roll up.\n",
show.progress=TRUE,
Data.dataframeItem="T_Data", label="Data Source (must be log transformed)",
tooltip = "This method assumes that the data is in log scale.",
signal = c("default", "get.dataset.row.metadata.fields", "rollup.field"),
rollup.field.choiceItem = NULL, label = "Choose Field Containing Proteins",
minPresence.numericItem=50, label = "Minimum presence % of at least one peptide",
tooltip="Peptides with less values present than this percentage are dropped.",
Mode.radiobuttonItem=c(value="median", "mean"), labels=c("Median", "Mean"), label="Mode", BREAK=T,
minOverlap.integerItem=3, label="Minimum dataset presence",
tooltip = "Within a group of peptides for a specific protein, the ones that do not overlap well (controlled by this value) are not scaled but they are kept to calculate the final protein abundance.",
oneHitWonders.trueFalseItem=FALSE, label="Include One Hit Wonders",
tooltip = "Protein with only one observed peptide will be included in the final list of proteins. The rationale behind this is that if a particular protein may have only one peptide but it may be quite abundant and present throughout giving some strong confidence on the presence of the protein.",
reportCount.trueFalseItem=TRUE, label="Report Count Table",
tooltip = "Include a table containing peptide counts per protein in the output",
gpvalue.numericItem=0.05, label="p-value Cutoff for Grubbs' Test",
gminPCount.integerItem=5, label="Minimum Number of Peptides required for Grubbs' Test",
tooltip = "Grubb's test for outliers is performed for peptide groups in each dataset. This value controls the minimum number required to perform the test." ,
center.trueFalseItem=FALSE, label="Mean Center Proteins to Zero Mean"
)
CountRollup <- function(MSdat1, ProtInfo){
myTfrm <- function(x) {
rownames(x) <- x[,1]
x[,-1,drop=F]
}
library(reshape)
print("Reshaping data..."); flush.console()
MSdat <- cbind(ProtInfo, MSdat1)
print("Melting data..."); flush.console()
cc.melt <- melt(MSdat, id.vars = c("Reference", "Mass Tag ID"), variable_name = "Job")
print("Data melt done. Casting..."); flush.console()
MSpivot <- cast(cc.melt, Reference~Job, function(x) sum(!is.na(x)))
print("Data cast done."); flush.console()
MSpivot <- myTfrm(MSpivot)
MSpivot <- cbind(Total = MSpivot[,1], MSpivot[,colnames(MSdat1),drop=F])
return(MSpivot)
}
RRollup <- function(Data, rollup.field, minPresence=50, Mode="median",
minOverlap=3, oneHitWonders=TRUE, reportCount=FALSE, outfolder="C:/",
plotflag=FALSE, gpvalue=0.05, gminPCount=5, center=TRUE,
progressbar=NULL, progresslabel=NULL
)
#
# Ashoka Polpitiya - July 2007
#
{
# from metadata
ProtInfo <- get.ProtInfo(Data)
if(!rollup.field%in%colnames(ProtInfo)) stop("Rollup field not found in peptide metadata table")
data_attributes <- attributes(Data)
ProtInfo <- unique(ProtInfo)
MassTags <- ProtInfo[,1]
protIPI <- ProtInfo[,rollup.field]
minCountPerP <- 2
minPresence <- minPresence/100
uProtCounts <- table(protIPI)
sigIPI <- names(uProtCounts[uProtCounts >= minCountPerP])
if(!length(sigIPI)) stop(paste("No proteins were seen with more than", minCountPerP, "peptides"))
restIPI <- names(uProtCounts[(uProtCounts < minCountPerP) & (uProtCounts > 0)])
#print(restIPI)
threshold <- round(dim(Data)[2] * minPresence)
scaledData <- rep(numeric(0),dim(Data)[2])
orData <- rep(numeric(0),dim(Data)[2])
protData <- rep(numeric(0),dim(Data)[2]+1)
singlePepProtData <- rep(numeric(0),dim(Data)[2]+1)
# Check that all mass tags are matched
if(!any(row.names(Data)%in%MassTags)) stop("ProtInfo first column doesn't match any row names of data")
if(!all(row.names(Data)%in%MassTags)) stop("ProtInfo first column doesn't match all row names of data")
proteinNames <- "empty"
oneHitProtNames <- "empty"
k = 1
Nprots <- length(sigIPI)
for (prot in 1:length(sigIPI))
{
if(prot%%10==0) {
if(!missing(progressbar)) progressbar$setFraction(prot/Nprots)
if(!missing(progresslabel)) progresslabel$setText(paste("Status:", prot, "/", Nprots))
}
#print(prot); flush.console()
pidx <- which(sigIPI[prot]==protIPI)
data_idx <- is.element(row.names(Data),MassTags[pidx])
currProtData <- Data[data_idx,,drop=F]
if (is.matrix(currProtData) || is.data.frame(currProtData))
if (dim(currProtData)[1] > 1)
{
xPresenceCount <- rowSums(!is.na(currProtData), na.rm=TRUE)
if (max(xPresenceCount, na.rm=TRUE) >= threshold)
{
pData <- protein.rollup1(currProtData,minOverlap=minOverlap,
Mode=Mode, minPs=gminPCount, pvalue=gpvalue, center=center)
scaledData <- rbind(scaledData,pData$sData)
orData <- rbind(orData,pData$orData)
protData <- rbind(protData,pData$pData)
proteinNames[k] <- sigIPI[prot]
if (plotflag)
{
outfile = paste(outfolder,k,".png",sep="")
plotCurrProt.RefRup(currProtData, pData, file=outfile,IPI=sigIPI[prot])
}
k = k + 1
}
}
}
rownames(protData) <- proteinNames
scaledData <- remove.duplicates(scaledData)
orData <- remove.duplicates(orData)
if (oneHitWonders)
{
k = 1
for (prot in 1:length(restIPI))
{
pidx <- which(restIPI[prot]==protIPI)
data_idx <- is.element(row.names(Data),MassTags[pidx[1]])
currProtData <- Data[data_idx,,drop=F]
xPresenceCount <- sum(!is.na(currProtData))
if (xPresenceCount >= threshold)
{
#singlePepProt <- c(PepCount=1,currProtData)
singlePepProtData <- rbind(singlePepProtData,currProtData)
oneHitProtNames[k] <- restIPI[prot]
k = k + 1
}
}
rownames(singlePepProtData) <- oneHitProtNames
if (center)
{
singlePepProtData1 <- singlePepProtData[,-1]
scaledSinglePepProtData <- t(scale(t(singlePepProtData1),center=T,scale=F))
singlePepProtData <- cbind(singlePepProtData[,1],scaledSinglePepProtData)
}
outProtData <- rbind(protData,singlePepProtData)
}
else
outProtData <- protData
attr(outProtData, "Column_Metadata") <- attr(scaledData, "Column_Metadata") <- attr(Data, "Column_Metadata")
attr(scaledData, "Row_Metadata") <- attr(Data, "Row_Metadata")
attr(outProtData, "Row_Metadata") <- c(table=attr(Data, "Row_Metadata")[["table"]], key = rollup.field)
# out <- list(ScaledData=scaledData, OriginalData=orData, ProteinData=outProtData)
out <- list(Scaled=scaledData, RolledUp=outProtData)
if(reportCount) {
# uProtCounts <<- uProtCounts
# outProtData <<- outProtData
countArr <- cbind(Count=uProtCounts[names(uProtCounts)%in%rownames(outProtData)])
# print(dim(countArr))
attr(countArr, "Row_Metadata") <- attr(outProtData, "Row_Metadata")
out <- list(Scaled=scaledData, RolledUp=outProtData, Count = countArr)
}
# out <<- out
return(out)
}
protein.rollup2 <- function(xx, Nmax=5, ...){
if(nrow(xx) > Nmax)
xx <- xx[order(apply(xx, 1, median, na.rm=T), decreasing=T)[1:Nmax],,drop=F]
f_exp <- factor(rep(1:ncol(xx), nrow(xx)))
f_pep <- factor(rep(1:nrow(xx), each=ncol(xx)))
fit2 <- rlm(Y ~ f_pep + f_exp - 1, contrasts = list(f_pep = "contr.sum", f_exp = "contr.sum"))
ff1 <- fit2$coef[nrow(xx)+1:(ncol(xx)-1)]
median(fit2$coef[1:nrow(xx)]) + c(ff1, -sum(ff1))
}
#------------------------------------------------------------------
protein.rollup1 <- function(currSel, minOverlap=3, Mode="median",
minPs=5, pvalue=0.05, center=TRUE)
{
pepCounts <- rowSums(!is.na(currSel)) # presence of a peptide sample-wise
pepMaxCounts <- which(pepCounts == max(pepCounts)) # maximally present ones
if (length(pepMaxCounts) > 1)
{
possibleRefs <- currSel[pepMaxCounts,] # multiple refrence peptides?
totalAbundances <- rowSums(possibleRefs, na.rm=TRUE) # pick the one with highest overall abundance
refs <- which(totalAbundances == max(totalAbundances,na.rm=TRUE))
reference <- pepMaxCounts[refs[1]]
}
else
{
reference <- pepMaxCounts[1]
}
currSelAdj <- t(t(currSel) - as.vector(t(currSel[reference,]))) #get ratios
overlapCount <- rowSums(!is.na(currSelAdj)) # count the number of non-missing
overlapMedians <- apply(currSelAdj,1,median,na.rm=T) # median ratios for each peptide
overlapMedians[which(overlapCount < minOverlap)] <- 0 # get rid of medians for sparse peptides
currSel <- currSel - overlapMedians # adjust the originals with median ratios
orCurrSel <- remove.outliers(currSel, minPs=minPs, pvalue=pvalue)
if (center)
{
scaled <- t(scale(t(currSel),center=T,scale=F))
scaled.or <- t(scale(t(orCurrSel),center=T,scale=F))
}
else
{
scaled <- currSel
scaled.or <- orCurrSel
}
if (Mode=="median")
proteinVal <- apply(scaled.or, 2, median, na.rm=T) # rollup to proteins as medians
else
proteinVal <- apply(scaled.or, 2, mean, na.rm=T)
#proteinVal <- c(PepCount=dim(scaled.or)[1],proteinVal) # append the column with peptide counts
out <- list(sData=scaled, orData=scaled.or, pData=proteinVal)
return(out)
}
#-----------------------------------------------------------------------------
remove.outliers <- function(Data, minPs=5, pvalue=0.05)
# internal function used by normalize.proteins()
# Calculates the protein abundances after removing outliers
# Depends on package "outliers"
{
library(outliers)
ColNames = colnames(Data)
xPeptideCount <- colSums(!is.na(Data))
for (i in 1:dim(Data)[2])
{
if (xPeptideCount[i] >= minPs)
{
repeat
{
grubbs <- grubbs.test(Data[,i]) # Grubb's test
if ( (grubbs$p.value < pvalue) && (!is.nan(grubbs$statistic[2])) &&
(grubbs$statistic[2] != 0) ) # pass the p-value cutoff
{
Data[,i] <- rm.outlier.1(Data[,i],fill=TRUE,median=TRUE)
# fill the outlier with the median value
}
else { break }
}
}
}
return(Data)
}
##############################################################################
plotCurrProt.RefRup <- function(currData,pdata,file="deleteme.png",
bkground="transparent",
IPI="IPI:IPI00009793.1")
# pdata : output from protein rollup (scale.proteins)
{
#require(Cairo)
#CairoPNG(filename=file,width=IMGwidth,height=IMGheight,pointsize=FNTsize,bg=bkground,res=600)
#png(filename=file,width=1024,height=768,pointsize=12,bg=bkground,
# res=600)
tryCatch(
{
scaledData <- pdata$sData
protData <- pdata$pData
orData <- pdata$orData
par(mfrow=c(3,1))
matplot(t(currData),type="b",main=IPI,ylab="Raw Data")
matplot(t(scaledData),type="b",ylab="Scaled Data")
matplot(t(orData),type="b",ylab="Scaled and Outlier removed",
xlab=paste(dim(currData)[1],"Peptides",sep=" "))
lines(protData[-1],type="l",lwd=2)
},
interrupt = function(ex)
{
cat("An interrupt was detected.\n");
print(ex);
},
error = function(ex)
{
cat("An error was detected.\n");
print(ex);
},
finally =
{
cat("Releasing tempfile...");
dev.off()
cat("done\n");
}) # tryCatch()
}
#------------------------------------------------------------------
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.