#!/usr/bin/env Rscript
# title: tidyProt
# author: twab
# description: examine variance attributable to major experimental covariates
## ---- prepare the env
library(SwipProteomics)
# imports
suppressPackageStartupMessages({
library(dplyr)
library(ggplot2)
library(data.table)
library(doParallel)
})
## ---- inputs
data(swip_tmt)
tidy_prot <- swip_tmt
prot_col <- "Protein" # protein identifier column
sort_by <- "BioFraction" # how to arrange the plot
## ---- prepare to do work
# register parallel backend
doParallel::registerDoParallel(parallel::detectCores() - 1)
# loop through all proteins, fit the model with all
# experimental covariates modeled as random effects
fx <- log2(Intensity) ~ (1|Mixture) + (1|Genotype) + (1|BioFraction)
# NOTE: QC data were already removed!
# all prots
proteins <- unique(tidy_prot[[prot_col]])
## ---- loop to do work
# NOTE: this can take a couple minutes
pve_list <- foreach(prot = proteins) %dopar% {
lmer_control <- lme4::lmerControl(check.conv.singular="ignore")
fm <- lme4::lmer(fx, tidy_prot %>% subset(Protein == prot),
control = lmer_control)
vp <- getVariance(fm)
pve <- vp/sum(vp)
pve_df <- as.data.table(t(pve)) %>% mutate(Protein = prot) %>% select(-Fixed)
return(pve_df)
} #EOL
# collect results
prot_pve <- do.call(rbind, pve_list) %>%
reshape2::melt(id.var="Protein",
variable.name = "Parameter",
value.name = "Variance")
# summary
prot_pve %>% group_by(Parameter) %>%
summarize(Min=min(Variance),
Median=median(Variance),
Max=max(Variance), .groups="drop") %>%
knitr::kable()
## ---- generate a plot
df <- prot_pve
# set the order
sort_by <- "BioFraction"
xpos <- df %>% filter(Parameter == sort_by) %>%
arrange(desc(Variance)) %>% select(Protein) %>% unlist(use.names=FALSE)
df$xpos <- match(df$Protein,xpos)
# generate the plot
plot <- ggplot(df)
plot <- plot + aes(x=xpos)
plot <- plot + aes(y=Variance)
plot <- plot + aes(group=Parameter)
plot <- plot + aes(stat=Variance)
plot <- plot + aes(fill=Parameter)
plot <- plot + geom_area()
plot <- plot + xlab("Protein")
plot <- plot + ylab("Percentage of Variance")
plot <- plot + theme(axis.line.x=element_line())
plot <- plot + theme(axis.line.y=element_line())
plot <- plot + theme(panel.background = element_blank())
plot <- plot + theme(axis.text.x = element_text(color="black",size=11))
plot <- plot + theme(axis.text.x = element_text(angle=0,hjust=1,family="Arial"))
plot <- plot + theme(axis.text.y = element_text(color="black",size=11))
plot <- plot + theme(axis.text.y = element_text(angle=0,hjust=1,family="Arial"))
plot <- plot + theme(panel.border = element_rect(colour = "black", fill=NA))
plot <- plot + scale_x_continuous(expand=c(0,0))
plot <- plot + scale_y_continuous(expand=c(0,0))
## --- save the plot
# NOTE: warnings probs bc plot is really huge and gets saved w/o labels
#ggsave("protein-varpart.pdf", plot)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.