Nothing
IADJ_pvals = function(mm, treatment, protein_group=2) {
# IADJ_pvals = function(mm, treatment, plot_hist=FALSE, plot_volc=FALSE, p=0.05){
# computes adjusted pvalues for imputed data
# here we assume that p-value distribution for the Null peptides
# (non-differentially expressed) are uniformly distributed. So we will adjust all
# p-values to assure that the tail of the distribution is uniformly distributed.
# For most label-free data this will be a correct assumption. But if this
# assumption is not valid for a dataset this adjusted computation shold nto be used.
#
# Input:
# input: An m x n (peptides x (n+2 samples)) matrix of expression data
# where first column contains peptide identifier and second column
# contains protein identifier
# treatment: vector indicating the treatment group of each sample ie [1 1 1 1 2 2 2 2...]
# plot_hist: T/F plot histogram of pvalues
# plot_vol: T/F volcano plot of pvalues
# p: level of significance for volcano plot
#
# Output: list of:
# protein_effect: matrix of effect sizes and p-values of differential
# expression for each protein. Null hypothesis:
# no group difference/differential expression
# peptide_effect: as above for all individual peptides.
#
# data<- input[,-(1:2)]
# rownames(data) <- input[,1]
# prot.info <- data.frame(Pep=input[,1], Prot=input[,2])
# treatment = as.factor(treatment)
prot.info <- get.ProtInfo(mm) # pepID, prID - not as in our text file data frame? here a mat
row.key <- colnames(prot.info)[1]
prot.info <- cbind(prot.info[,1], prot.info[,protein_group])
colnames(prot.info) <- c(row.key, protein_group)
formula.y_temp = as.formula(paste("y_temp ~", treatment))
formula.1pep = as.formula(paste("Y ~", treatment))
contrasts.1pep = list(f.peptide="contr.sum")
contrasts.Npep = list(.DanteR_block_name="contr.sum", f.peptide="contr.sum")
names(contrasts.Npep)[2] <- names(contrasts.Npep)[1] <- treatment
formula.Npep = as.formula(paste("Y ~ .DanteR_block_name +", treatment))
treatment.name <- treatment
treatment <- get.factors(mm)[,treatment,drop=F]
treatment.vector <- treatment[,1]
# print(treatment.vector)
# treatment factors
n.treatment <- length(treatment)
n.u.treatment <- length(unique(treatment))
# Match to protein
all.proteins <- unique(prot.info[,2])
betahat_prot <- array(NA, c(length(all.proteins), n.u.treatment-1))
betahat_pep <- NULL
pval_prot_imp <- array(NA, c(length(all.proteins), 501))
pval_pep_imp <- NULL
# Pre-fill the return array with names
names.df <- data.frame(Y = rnorm(length(treatment)), treatment)
fit1 <- lm(formula.1pep, data=names.df)
nv <- DanteR:::names.vector(fit1, 1)
prot.eff <- array(NA, c(length(all.proteins), length(nv)))
rownames(prot.eff) <- all.proteins
colnames(prot.eff) <- names(nv)
# print(prot.eff)
pep.eff <- array(NA, c(nrow(mm), length(nv)))
rownames(pep.eff) <- rownames(mm)
colnames(pep.eff) <- names(nv)
for (k in 1:length(all.proteins)) {
prot <- all.proteins[k]
pmid.matches <- prot.info[prot.info[,2]==prot,1]
# idx.prot <- which(rownames(data) %in% pmid.matches)
idx.prot <- which(rownames(mm) %in% pmid.matches)
# y_raw <- rbind(data[idx.prot,])
y_raw <- mm[idx.prot,,drop=F]
y <- as.vector(t(y_raw))
#rownames(y_raw) <- rownames(mm)[idx.prot]
n.peptide = nrow(y_raw)
if (nrow(y_raw) == 0) next
peptide <-rep(rep(1:n.peptide, each=n.treatment))
#f.treatment <- factor(rep(treatment, n.peptide))
f.peptide <- factor(peptide)
# adjusted pvalues for protein level
#stop("what")
imp <- protlevel_pvals(y_raw, treatment.vector)
pval_prot_imp[k,] <- imp$pval
# browser()
# calculate betahat for each peptide
beta_pep <- array(NA, n.peptide)
for (i in 1:n.peptide){
y_temp = as.numeric(y_raw[i,])
my.df = data.frame(y_temp=y_temp, treatment)
refit = lm(formula.y_temp, data = my.df)#, contrasts=contr.sum)
beta_pep[i] = coef(refit)[2]
nv <- DanteR:::names.vector(refit)
pep.eff[rownames(pep.eff)%in%rownames(y_raw)[i], names(nv)] <- nv
}
betahat_pep = c(betahat_pep, beta_pep)
# calculate betahat for each protein
if (n.peptide == 1){
my.df = data.frame(Y=y, treatment.vector)
colnames(my.df)[2] <- treatment.name
refit_prot <- lm(formula.1pep, data=my.df)#, contrasts=contrasts.1pep)
# refit_prot <- lm(y ~f.treatment, contrasts=list(f.treatment="contr.sum"))
} else {
my.df = data.frame(Y=y, .DanteR_block_name=f.peptide, treatment.vector)
colnames(my.df)[3] <- treatment.name
refit_prot <- lm(formula.Npep, data=my.df)#, contrasts=contrasts.Npep)
# refit_prot <- lm(y ~f.peptide+f.treatment,contrasts = list(f.treatment="contr.sum", f.peptide="contr.sum"))
}
nv <- DanteR:::names.vector(refit_prot)
# print(nv)
# print(prot.eff)
prot.eff[k,names(nv)] <- nv
betahat_prot[k,] <- coef(refit)[2]
} #end for each protein
# adjusted pvalues for protein level
#pval_prot_imp[is.na(pval_prot_imp)] <- 0
pval_prot_imp[is.na(pval_prot_imp)] <- NA # set to 0
prot_calc <- select_pvals(pval_prot_imp)
pval_prot <- array(prot_calc$select_pvals, c(length(all.proteins), 1))
rownames(pval_prot) <- all.proteins
# adjsuted pvalues fot peptide level
pval_pep_imp <- array(NA, c(nrow(mm), 501))
for (j in 1:nrow(mm)){
temp <- array(mm[j,], c(1, length(mm[j,])))
pepimp <- protlevel_pvals(temp, treatment.vector)
pval_pep_imp[j,] <- pepimp$pvals
}
# I assume pval will be NA if we were unable to estimate the grp difference
# ahouls not be an issue in imputed data
pval_pep_imp[is.na(pval_pep_imp)] <- 0
pep_calc <- select_pvals(pval_pep_imp)
pval_pep <- array(pep_calc$select_pvals, c(nrow(mm), 1))
rownames(pval_pep) <- rownames(mm)
ADJfactor <- as.matrix(pep_calc$ADJfactor)
attr(pep.eff, "Row_Metadata") <- c(key = colnames(prot.info)[1], table = attr(mm, "Row_Metadata")[["table"]])
attr(prot.eff, "Row_Metadata") <- c(key = colnames(prot.info)[2], table = attr(mm, "Row_Metadata")[["table"]])
return(list(peptide_effects = pep.eff,
protein_effects = prot.eff,
ADJfactor=ADJfactor))
# return(list(pval_pep=pval_pep, pval_prot=pval_prot,
# prot_betahat=betahat_prot, pep_betahat=betahat_pep,
# ADJfactor=ADJfactor))
#
}
######################################################################
# helper functions below
######################################################################
protlevel_pvals = function(pr, treatment){
# function [pvals, betahat, prnames] = urpvals_imp(dname, grps, nheadercols, nheaderrows)
# compute p-values for imputed proteins using adjusted (fudged) likelihood ratio statistics.
#
# Ours is model-based imputation as described in: "A Statistical Framework for Protein
# Quantification in Botton-Up MS-Based Proteomcs", Bioinformatics 2009.
#
# INPUT: pr - one protein with all peptides passed in. npep x nsamp (?)
# grps - vector of group assignments for each sample: [1 1 1 1 1 2 2 2 2 2 3 3 3 3 3]
#
# OUTPUT: pvals - vector of adjusted p-values (501 values) for thsi protein. From here
# the values will need to be selected to fit the null distribution.
#
ugrps = unique(treatment)
ng = length(ugrps)
fudgeFs = seq(1, 10, length=501) #this makes 500 factors, no other significance in 0.018
lf = length(fudgeFs)
nsamps = ncol(pr)
npep = nrow(pr)
allY <- c(as.matrix(t(pr)))
if (npep >=2){
grps2 <- factor(rep(treatment, npep))
peps <-factor(rep(rep(1:npep, each=nsamps)))
X2 <- model.matrix(~peps, contrasts = list(peps="contr.sum") )
X <- model.matrix(~grps2 + peps, contrasts = list(grps2="contr.sum", peps="contr.sum") )
fit_null <- lm(allY~ X2 -1)
fit_unrestricted <- lm(allY~ X -1)
results <- anova(fit_null, fit_unrestricted)
ff <- results[5][2,]
} else{
grps2 <- factor(rep(treatment, npep))
X <- model.matrix(~grps2, contrasts=list(grps2="contr.sum"))
X2 <- array(1, c(nrow(X),1))
fit_null <- lm(allY~ X2 -1)
fit_unrestricted <- lm(allY~ X -1)
results <- anova(fit_null, fit_unrestricted)
ff <- results[5][2,]
}
#DFtr = ncol(X)- ncol(X2) # tr - tretment group
#DFres = nrow(X2) - ncol(X) # sum(ngrps(varPreserve)) + 1
# TT - updated by YK 4-5-11
DFtr = ncol(X)- ncol(X2) # tr - tretment group
# the following lines incorrect as disregarded NAs...
# DFres = nrow(X2) - ncol(X) # sum(ngrps(varPreserve)) + 1
DFres = fit_unrestricted$df.residual
# OR equivalent: # DFres = sum(!is.na(allY)) - ncol(X)
pvals <- array(NA, lf)
# adjust the test statistic by [1, 1.18, ... 10] factors
for (j in 1:lf){
pvals[j] = 1-pf(ff/fudgeFs[j],DFtr,DFres)
}
rv = list(pvals=pvals)
return(rv)
}
#########################################################
select_pvals = function(pvals_impute){
# choose adjust pvalues using regression
# input:
# pvals_impute- an m x length(fudge factor) matrix of adjusted pvalues
#
# output:
# select_pval- column of pvalues corresponding to the smallest fudge
# factor that produced a positive slope
# k - fudge factor position
#
fudgeFs = seq(1, 10, by=0.018) #this makes 500 factors, no other significance in 0.018
intervals = array(NA, c(501, 6))
N_FUDGE = 501
for (k in 1:N_FUDGE){ #% 501 fudge factors for likelihood test statistic
intervals[k,1] = sum(pvals_impute[,k] >= 0.7 & pvals_impute[,k] <0.75) #[.7 .75]
intervals[k,2] = sum(pvals_impute[,k] >= 0.75 & pvals_impute[,k] <0.8) #[.75 .8]
intervals[k,3] = sum(pvals_impute[,k] >= 0.8 & pvals_impute[,k] <0.85) #[.8 .85]
intervals[k,4] = sum(pvals_impute[,k] >= 0.85 & pvals_impute[,k] <0.9) #[.85 .9]
intervals[k,5] = sum(pvals_impute[,k] >= 0.9 & pvals_impute[,k] <0.95) #[.9 .95]
intervals[k,6] = sum(pvals_impute[,k] >= 0.95 & pvals_impute[,k] <=1) #[.95 1]
#count_pvals <<- (pvals_impute[,k] >= 0.7 & pvals_impute[,k] <0.75)
# print("k: ")
# print(k)
# print("intervals:")
# print(intervals[1:6,])
#Sys.sleep(4)
# compute regression line between [1 2 3 4 5 6...] & counts
# select the min fudge factor for which slope is positive
counts = factor(c(1:6))
X = model.matrix(~counts, contrasts=list(counts="contr.sum"))
y <- as.vector(t(intervals[k,]))
betas = drop(solve(t(X) %*% X) %*% t(X) %*% y)
effects = X%*%betas
m = (effects[1] - effects[6]) / (-5)
if (m > 0){
select_pvals = pvals_impute[,k]
select_fudgeF = fudgeFs[k]
# select_fundgeF = k
break
}
# if we do not get a pos slope in 501 iteration, then use the lagest
# adjustment factor we got
if (m <= 0){
select_pvals = pvals_impute[,k]
select_fudgeF = fudgeFs[k]
}
}
return(list(select_pvals=select_pvals, ADJfactor=select_fudgeF))
}
##########################################################################
# dialog for DanteR
##########################################################################
IADJ_pvals.dialog = list(title='Compute p-values adjusted for imputation',
mm.dataframeItem=NULL, label='Data matrix',
signal = list("default", "get.dataset.factors", "treatment", user.data=list(include.primary=FALSE)),
signal = list("default", "get.dataset.row.metadata.fields", "protein_group", user.data=list(include.primary=FALSE)),
treatment.choiceItem=NULL, label='Factors',
protein_group.choiceItem = NULL, label = "Field Containing Proteins"
)
##signal = c("default", "toggle.sensitive", "plot_hist"),
# plot_hist.trueFalseItem = TRUE, label='Plot p-value histogram',
#
# # signal = c("default", "toggle.sensitive", "plot_volc"),
# plot_volc.trueFalseItem = TRUE, label='Plot volcano Plots',
#
# signal = c("default", "toggle.sensitive", "p"),
# p.numericItem=0.05, label='Significance level in volcano plots'
# )
CensorANOVA.dialog <- list(
title = "Model Based Impute/Filter/ANOVA",
label = "Filtering, imputation and ANOVA probability calculation with rescaling",
m.dataframeItem=NULL, label='Data matrix',
signal = c("default", "get.dataset.factors", "treatment"),
signal = c("default", "get.dataset.row.metadata.fields", "protein_group"),
treatment.choiceItem=NULL, label='Factors',
protein_group.choiceItem = NULL, label = "Field Containing Proteins",
BREAK=TRUE,
compute_pi.trueFalseItem=FALSE, label='Manually Estimate PI',
tooltip = "If this is checked, the user can set the estimated random missingness probability",
signal = c("default", "toggle.sensitive", "my.pi"),
my.pi.numericItem="0.05", label='PI', indent=10,
estimatePiOnly.trueFalseItem = FALSE, label = "Estimate Pi Only", tooltip = "Users should check this value before proceeding with filtering and imputation",
signal = c("default", function(item, userChoice) {
userChoice$setSensitive(TRUE)
if(get.value(item)) userChoice$setSensitive(FALSE)
}, "userChoice"),
userChoice.radiobuttonItem = c("Filter Only", value="Filter, Impute, ANOVA"), label ="Choose Task", tooltip = "Choose to do filtering, then imputation and adjust p-values", indent=10
)
CensorANOVA <- function(m, treatment, protein_group, compute_pi=TRUE,
my.pi = 0.05, topNPep=1E6, estimatePiOnly = FALSE,
userChoice = "Filter, Impute, Report P-values"){
if (userChoice == "Filter Only"){
doFilter=TRUE; doImpute=FALSE; doAdjustPvals=FALSE;
} else {
doFilter=TRUE; doImpute=TRUE; doAdjustPvals=TRUE;
}
protein_effects <- peptide_effects <- imputed <- filtered <- NULL
output_values <- array(NA, c(3,1))
colnames(output_values) <- deparse(substitute(m))
rownames(output_values) <- c("Pi_from_filter", "Pi_from_impute", "P_Adjfactor")
if(doFilter){
rv_filter <- MBfilter(m, treatment, protein_group=protein_group,
my.pi=my.pi, compute_pi=compute_pi, topNPep=topNPep, compute_pi_only=estimatePiOnly)
m <- filtered <- rv_filter$y_filtered
output_values[1,1] <- rv_filter$pi[1,1]
}
if(estimatePiOnly){
return(list(pi=rv_filter$pi))
}
if(doImpute){
rv_impute <- MBimpute(m, treatment, protein_group=protein_group, my.pi=my.pi, compute_pi=compute_pi)
m <- imputed <- rv_impute$y_imputed
output_values[2,1] <- rv_impute$pi[1,1]
}
if(doAdjustPvals){
rv_adjusted_pvals <- IADJ_pvals(m, treatment, protein_group=protein_group)
peptide_effects <- rv_adjusted_pvals$peptide_effects
protein_effects <- rv_adjusted_pvals$protein_effects
output_values[3,1] <- rv_adjusted_pvals$ADJfactor[1,1]
}
rv <- list(
filtered = filtered,
imputed = imputed,
peptide_effects = peptide_effects,
protein_effects = protein_effects,
output_values = output_values
)
return(rv)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.