tutorials/plot-protein-varpart.R

#!/usr/bin/env Rscript

# title: tidyProt
# author: twab
# description: examine variance attributable to major experimental covariates

## ---- prepare the env

library(SwipProteomics)

# imports
suppressPackageStartupMessages({
  library(dplyr)
  library(ggplot2)
  library(data.table)
  library(doParallel)
})


## ---- inputs

data(swip_tmt)

tidy_prot <- swip_tmt

prot_col <- "Protein" # protein identifier column
sort_by <- "BioFraction" # how to arrange the plot


## ---- prepare to do work

# register parallel backend
doParallel::registerDoParallel(parallel::detectCores() - 1)

# loop through all proteins, fit the model with all
# experimental covariates modeled as random effects
fx <- log2(Intensity) ~ (1|Mixture) + (1|Genotype) + (1|BioFraction)

# NOTE: QC data were already removed!

# all prots
proteins <- unique(tidy_prot[[prot_col]])


## ---- loop to do work

# NOTE: this can take a couple minutes

pve_list <- foreach(prot = proteins) %dopar% {
  lmer_control <- lme4::lmerControl(check.conv.singular="ignore")
  fm <- lme4::lmer(fx, tidy_prot %>% subset(Protein == prot), 
		   control = lmer_control)
  vp <- getVariance(fm)
  pve <- vp/sum(vp)
  pve_df <- as.data.table(t(pve)) %>% mutate(Protein = prot) %>% select(-Fixed)
  return(pve_df)
} #EOL


# collect results
prot_pve <- do.call(rbind, pve_list) %>%
	reshape2::melt(id.var="Protein", 
		       variable.name = "Parameter", 
		       value.name = "Variance")

# summary
prot_pve %>% group_by(Parameter) %>%
	summarize(Min=min(Variance), 
		  Median=median(Variance), 
		  Max=max(Variance), .groups="drop") %>%
	knitr::kable()


## ---- generate a plot

df <- prot_pve

# set the order
sort_by <- "BioFraction"
xpos <- df %>% filter(Parameter == sort_by) %>%
	arrange(desc(Variance)) %>% select(Protein) %>% unlist(use.names=FALSE)
df$xpos <- match(df$Protein,xpos)

# generate the plot
plot <- ggplot(df)
plot <- plot + aes(x=xpos)
plot <- plot + aes(y=Variance)
plot <- plot + aes(group=Parameter)
plot <- plot + aes(stat=Variance)
plot <- plot + aes(fill=Parameter)
plot <- plot + geom_area()
plot <- plot + xlab("Protein")
plot <- plot + ylab("Percentage of Variance")
plot <- plot + theme(axis.line.x=element_line())
plot <- plot + theme(axis.line.y=element_line())
plot <- plot + theme(panel.background = element_blank())
plot <- plot + theme(axis.text.x = element_text(color="black",size=11))
plot <- plot + theme(axis.text.x = element_text(angle=0,hjust=1,family="Arial"))
plot <- plot + theme(axis.text.y = element_text(color="black",size=11))
plot <- plot + theme(axis.text.y = element_text(angle=0,hjust=1,family="Arial"))
plot <- plot + theme(panel.border = element_rect(colour = "black", fill=NA))
plot <- plot + scale_x_continuous(expand=c(0,0))
plot <- plot + scale_y_continuous(expand=c(0,0))

## --- save the plot

# NOTE: warnings probs bc plot is really huge and gets saved w/o labels
#ggsave("protein-varpart.pdf", plot)
twesleyb/tidyProt documentation built on Feb. 4, 2021, 4:42 p.m.