inst/doc/mass_comparison.R

## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(echo = TRUE)
knitr::opts_chunk$set(dev = "png", dev.args = list(type = "cairo-png"))

options(digits=3)

library(tidyr)
library(dplyr)

calculate_mass <- function(dat){
  
  proton_mass <- 1.00727647
  
  dat %>%
    mutate(exp_mass = Center*z - z*proton_mass) %>%
    select(-Center, -z, -Modification, -Fragment) %>%
    group_by(Sequence, Start, End, MHP, MaxUptake, State, Exposure, Protein, File) %>%
    summarize(avg_exp_mass = weighted.mean(exp_mass, Inten, na.rm = TRUE)) %>%
    ungroup(.) %>% 
    group_by(Sequence, Start, End, MHP, MaxUptake, State, Exposure, Protein) %>%
    summarize(mass = mean(avg_exp_mass, na.rm = TRUE),
              err_mass = coalesce(sd(avg_exp_mass, na.rm = TRUE)/sqrt(sum(!is.na(avg_exp_mass))), 0),
              num = (sum(!is.na(avg_exp_mass)))) %>%
    ungroup(.) %>%
    arrange(Start, End, Start, Exposure) %>%
    as.data.frame()
  
}

calculate_mass_no_inten <- function(dat){
  
  proton_mass <- 1.00727647
  
  dat %>%
    mutate(exp_mass = Center*z - z*proton_mass) %>%
    select(-Center, -z, -Modification, -Fragment) %>%
    group_by(Sequence, Start, End, MHP, MaxUptake, State, Exposure, Protein, File) %>%
    summarize(avg_exp_mass = mean(exp_mass, na.rm = TRUE)) %>%
    ungroup(.) %>% 
    group_by(Sequence, Start, End, MHP, MaxUptake, State, Exposure, Protein) %>%
    summarize(mass = mean(avg_exp_mass, na.rm = TRUE),
              err_mass = coalesce(sd(avg_exp_mass, na.rm = TRUE)/sqrt(sum(!is.na(avg_exp_mass))), 0)) %>%
    ungroup(.) %>%
    arrange(Start, End, Start, Exposure) %>%
    as.data.frame()
  
}


## -----------------------------------------------------------------------------
library(HaDeX)
dat <- read_hdx(system.file(package = "HaDeX", "HaDeX/data/KD_180110_CD160_HVEM.csv"))

## ----eval=FALSE---------------------------------------------------------------
#  avg_exp_mass = weighted.mean(exp_mass, Inten, na.rm = TRUE)

## ----include=FALSE------------------------------------------------------------
dat_no_weight <- calculate_mass_no_inten(dat) %>%
  arrange(Start, End, State, Exposure) %>%
  mutate(source = "no_weight") %>%
  select(Sequence, Start, End, State, Exposure, mass, err_mass, source)

dat_weight <- calculate_mass(dat) %>%
  arrange(Start, End, State, Exposure) %>%
  mutate(source = "weight") %>%
  select(Sequence, Start, End, State, Exposure, mass, err_mass, source)

tmp <- bind_rows(dat_no_weight, dat_weight) %>%
  gather(type, value, -Sequence, -Start, -End, -source, -State, -Exposure) %>%
  spread(source, value) %>%
  mutate(diff = (weight-no_weight))

## ----echo=FALSE---------------------------------------------------------------
tmp %>%
  filter(type == "mass") %>%
  select(-type) %>%
  filter(Sequence == "ARSQKSGIRLQGHF")

## ----echo=FALSE---------------------------------------------------------------
tmp %>%
  filter(type == "err_mass") %>%
  select(-type) %>%
  filter(Sequence == "ARSQKSGIRLQGHF")

## ----echo=FALSE---------------------------------------------------------------
mean(tmp[ tmp[["type"]] == "mass" , "diff"])

## ----echo=FALSE---------------------------------------------------------------
mean(tmp[ tmp[["type"]] == "err_mass" , "diff"])

## ----message=FALSE, warning=FALSE, echo=FALSE---------------------------------
library(gridExtra)
library(ggplot2)

p1 <- ggplot(filter(tmp, type == "mass"), aes(diff)) +
  geom_histogram() +
  labs(title = "Differences between mass",
       x = "Difference")

p2 <- ggplot(filter(tmp, type == "err_mass"), aes(diff)) +
  geom_histogram() +
  labs(title = "Differences between uncertainties of mass",
       x = "Difference")

grid.arrange(p1, p2, ncol=2)

Try the HaDeX package in your browser

Any scripts or data that you put into this service are public.

HaDeX documentation built on Aug. 12, 2021, 5:20 p.m.