Intro_To_net4pg.R
In net4pg: Handle Ambiguity of Protein Identifications from Shotgun Proteomics

## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(collapse = TRUE, comment = "#>", fig.width = 10, fig.height = 10)

## ----style, echo = FALSE, results = 'asis'------------------------------------
BiocStyle::markdown()

## ----message = FALSE, include = FALSE-----------------------------------------
library(net4pg)
library(igraph)
library(ggplot2)

## -----------------------------------------------------------------------------
incM_filename <- system.file("extdata"
                        , "incM_example"
                        , package = "net4pg"
                        , mustWork = TRUE)
rownames_filename <- system.file("extdata"
                        , "peptideIDs_incM_example"
                        , package = "net4pg"
                        , mustWork = TRUE)
colnames_filename <- system.file("extdata"
                        , "proteinIDs_incM_example"
                        , package = "net4pg"
                        , mustWork = TRUE)
incM <- read_inc_matrix(incM_filename = incM_filename
                , colnames_filename = colnames_filename
                , rownames_filename = rownames_filename)

## -----------------------------------------------------------------------------
dim(incM)

## -----------------------------------------------------------------------------
incM_reduced <- reduce_inc_matrix(incM)
dim(incM_reduced) # check the size of the reduced incidence matrix

## -----------------------------------------------------------------------------
adjM <- get_adj_matrix(incM_reduced)
dim(adjM) # check the size of the adjacency matrix:

## -----------------------------------------------------------------------------
multProteinCC <- get_cc(adjM)

## -----------------------------------------------------------------------------
cc.multProteins <- multProteinCC$ccs
length(cc.multProteins)

## -----------------------------------------------------------------------------
# Calculate CCs size and percentage of single- vs multi-protein CCs
CCstatsOut <- cc_stats(incM = incM
                       , cc.proteins = cc.multProteins
                       , reducedIncM = TRUE)

# Number of single-protein CCs:
CCstatsOut$N_singleProtCC

# Number of multi-protein CCs
CCstatsOut$N_multiProtCC

# Total number of CCs
totCCs <- CCstatsOut$N_singleProtCC + CCstatsOut$N_multiProtCC
totCCs

# Percentage of single-protein CCs:
PercSingleP <- round(CCstatsOut$N_singleProtCC / totCCs * 100, digits = 2)
PercSingleP

# View table of CC size distribution
CCstatsOut$NproteinsDistribution

# Plot CC size distribution
plot(factor(CCstatsOut$NproteinsDistribution$N_proteins
       , levels = c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", ">10"))
     , as.numeric(as.vector(CCstatsOut$NproteinsDistribution$N_CC))
     , type = "s"
     , xlab = "N_proteins"
     , ylab = "N_CCs")

## -----------------------------------------------------------------------------
peptideStatsOut <- peptide_stats(incM = incM)

# Number of shared peptides
peptideStatsOut$nbShared

# Number of specific peptides
peptideStatsOut$nbSpecific

# Percentage of specific peptides
peptideStatsOut$percSpecific

## -----------------------------------------------------------------------------
cc.peptides.incM <- cc_composition(cc.multProteins, incM = incM)

## ---- fig.height=7------------------------------------------------------------
# Generate the bipartite graph
prot <- "ENSP261"
subgraphCC <- plot_cc(prot = prot
        , cc.proteins = cc.multProteins
        , cc.subincM = cc.peptides.incM$cc.subincM
        , incM = incM
        , tagProt = "ENSP"
        , tagContam = "Contam")
# Plot it
plot.igraph(subgraphCC$g
            , layout = layout_as_bipartite
            , edge.width = 1
            ,  edge.arrow.width = 0.3
            , vertex.size = 35
            , edge.arrow.size = 0.5
            , vertex.size2 = 35
            , vertex.label.cex = 1
            , asp = 0.25
            , margin = -0.1) +
title(paste0("Protein ", prot, " in CC#", subgraphCC$cc_id), line = -1)

## -----------------------------------------------------------------------------
incM_filename <- system.file("extdata"
                        , "incM_example"
                        , package = "net4pg"
                        , mustWork = TRUE)
rownames_filename <- system.file("extdata"
                        , "peptideIDs_incM_example"
                        , package = "net4pg"
                        , mustWork = TRUE)
colnames_filename <- system.file("extdata"
                        , "proteinIDs_incM_example"
                        , package = "net4pg"
                        , mustWork = TRUE)
incM <- read_inc_matrix(incM_filename = incM_filename
                , colnames_filename = colnames_filename
                , rownames_filename = rownames_filename)

## -----------------------------------------------------------------------------
dim(incM)

## -----------------------------------------------------------------------------
# Read input file names
exprTranscriptsFile <- system.file("extdata"
                        , "expressed_transcripts.txt"
                        , package = "net4pg"
                        , mustWork = TRUE)
protein2transcriptFile <- system.file("extdata"
                        , "protein_to_transcript"
                        , package = "net4pg"
                        , mustWork = TRUE)

# Perform filtering
incM_filt <- transcriptome_filter(incM
                            , exprTranscriptsFile = exprTranscriptsFile
                            , proteinToTranscriptFile = protein2transcriptFile
                            , tagContam = "Contam"
                            , remove = "sharedOnly")

# Check size after transcriptome-informed filtering
dim(incM_filt)

## -----------------------------------------------------------------------------
# Reduce incidence matrix size to accelerate downstream computation
incM_filt_reduced <- reduce_inc_matrix(incM_filt)

# Calculate the adjacency matrix describing protein-to-protein connections
adjM_filt <- get_adj_matrix(incM_filt_reduced)

# Generate a graph of protein-to-protein connections by shared peptides and
# calculate its CCs (i.e., sets of proteins connected by shared peptides
multProteinCC_filt <- get_cc(adjM_filt)

# Extract the list of vectors enumerating protein members in each CC 
cc.multProteins_filt <- multProteinCC_filt$ccs

# Calculate CCs size and % of single- vs multi-protein CCs obtained after
# transcriptome-informed filtering
CCstatsOut_filt <- cc_stats(incM = incM_filt
                                , cc.proteins = multProteinCC_filt$ccs
                                , reducedIncM = TRUE)

# Number of single-protein CCs
CCstatsOut_filt$N_singleProtCC

# Number of multi-protein CCs
CCstatsOut_filt$N_multiProtCC

# Total number of CCs
totCCs_filt <- CCstatsOut_filt$N_singleProtCC + CCstatsOut_filt$N_multiProtCC
totCCs_filt

# Percentage of single-protein CCs
PercSingleP_filt <- round(CCstatsOut_filt$N_singleProtCC / totCCs_filt * 100
                          , digits = 2)

# View table of CC size distribution
CCstatsOut_filt$NproteinsDistribution

# Plot CC size distribution
plot(factor(CCstatsOut_filt$NproteinsDistribution$N_proteins
       , levels = c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", ">10"))
     , as.numeric(as.vector(CCstatsOut_filt$NproteinsDistribution$N_CC))
     , type = "s"
     , xlab = "N_proteins"
     , ylab = "N_CCs")

## -----------------------------------------------------------------------------
comp <- as.data.frame(cbind(as.character(as.vector(c("before_filter"
                                                  , "after_filter")))
                         , as.numeric(as.vector(c(PercSingleP
                                                  , PercSingleP_filt)))))
colnames(comp) <- c("Filter", "Perc_SingleP")

ggplot(data = comp
       , aes(x = as.factor(Filter), y = as.numeric(as.vector(Perc_SingleP)))) +
      geom_bar(stat = "identity") +
      theme_classic() +
      xlab("") +
      ylab("% single-prot CCs") +
      ylim(0, 100) +
      coord_flip() +
      geom_text(aes(label = as.numeric(as.vector(Perc_SingleP)))
                , hjust = 1.5, color = "white", size = 4)

## -----------------------------------------------------------------------------
old.par <- par(no.readonly = TRUE) # save default par values

ymax_before <- as.numeric(as.vector(CCstatsOut$NproteinsDistribution$N_CC))
ymax_after <- as.numeric(as.vector(CCstatsOut_filt$NproteinsDistribution$N_CC))

ymax <- max(max(ymax_before), max(ymax_after))

par(mfrow = c(1, 2))
plot(factor(CCstatsOut$NproteinsDistribution$N_proteins
        , levels = c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", ">10"))
      , as.numeric(as.vector(CCstatsOut$NproteinsDistribution$N_CC))
      , type = "s"
      , xlab = "N_proteins"
      , ylab = "N_CCs"
      , ylim = c(0, ymax)
      , main = "before filtering")
plot(factor(CCstatsOut_filt$NproteinsDistribution$N_proteins
      , levels = c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", ">10"))
      , as.numeric(as.vector(CCstatsOut_filt$NproteinsDistribution$N_CC))
      , type = "s"
      , xlab = "N_proteins"
      , ylab = "N_CCs"
      , ylim = c(0, ymax)
      , main = "after filtering")

par(old.par) # restore default par values

## -----------------------------------------------------------------------------
peptideStatsOut_filt <- peptide_stats(incM = incM_filt)

## -----------------------------------------------------------------------------
comp <- as.data.frame(cbind(
                      as.character(as.vector(c("before_filter"
                                            , "after_filter")))
                    , as.numeric(as.vector(c(peptideStatsOut$nbShared
                                        , peptideStatsOut_filt$nbShared)))))
colnames(comp) <- c("Filter", "Perc_sharedPeptides")

ggplot(data = comp
    , aes(x = as.factor(Filter)
          , y = as.numeric(as.vector(Perc_sharedPeptides)))) +
      geom_bar(stat = "identity") +
      theme_classic() +
      xlab("") +
      ylab("% shared peptides") +
      ylim(0, 100) +
      coord_flip() +
      geom_text(aes(label = as.numeric(as.vector(Perc_sharedPeptides)))
                , hjust = 1.5, color = "white", size = 4)

## ---- fig.height=14-----------------------------------------------------------
# Extract peptides and peptide-to-protein mappings for each CC after filtering
cc.peptides.incM_filt <- cc_composition(cc.multProteins_filt
                                            , incM = incM_filt)

# Generate a bipartite graph of the CC which contains the protein of interest,
# before and after transcriptome-informed filtering.
prot <- "ENSP261"
subgraphCC_beforeFilter <- plot_cc(prot = prot
                      , cc.proteins = cc.multProteins
                      , cc.subincM = cc.peptides.incM$cc.subincM
                      , incM = incM
                      , tagProt = "ENSP"
                      , tagContam = "Contam")

subgraphCC_afterFilter <- plot_cc(prot = prot
                     , cc.proteins = cc.multProteins_filt
                     , cc.subincM = cc.peptides.incM_filt$cc.subincM
                     , incM = incM_filt
                     , tagProt = "ENSP"
                     , tagContam = "Contam")

# Plot
old.par <- par(no.readonly = TRUE) # save default par values

par(mfrow = c(2, 1))
plot.igraph(subgraphCC_beforeFilter$g
            , layout = layout_as_bipartite
            , edge.width = 1
            , edge.arrow.width = 0.3
            , vertex.size = 35
            , edge.arrow.size = 0.5
            , vertex.size2 = 35
            , vertex.label.cex = 1
            , asp = 0.45
            , margin = -0.1) +
title(paste0("Protein "
             , prot
             , " in CC #"
             , subgraphCC_beforeFilter$cc_id
             , " before filtering")
      , line = -1)
plot.igraph(subgraphCC_afterFilter$g
            , layout = layout_as_bipartite
            , edge.width = 1
            , edge.arrow.width = 0.3
            , vertex.size = 35
            , edge.arrow.size = 0.5
            , vertex.size2 = 35
            , vertex.label.cex = 1
            , asp = 0.45
            , margin = -0.1) +
title(paste0("Protein "
             , prot, " in CC #"
             , subgraphCC_beforeFilter$cc_id
             , " after filtering")
      , line = -1)

par(old.par) # restore default par values

## -----------------------------------------------------------------------------
sessionInfo()
Any scripts or data that you put into this service are public.
net4pg documentation built on Sept. 7, 2022, 5:06 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
net4pg
Handle Ambiguity of Protein Identifications from Shotgun Proteomics

inst/doc/Intro_To_net4pg.R
In net4pg: Handle Ambiguity of Protein Identifications from Shotgun Proteomics

Try the net4pg package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

net4pg Handle Ambiguity of Protein Identifications from Shotgun Proteomics

inst/doc/Intro_To_net4pg.R In net4pg: Handle Ambiguity of Protein Identifications from Shotgun Proteomics

Try the net4pg package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

net4pg
Handle Ambiguity of Protein Identifications from Shotgun Proteomics

inst/doc/Intro_To_net4pg.R
In net4pg: Handle Ambiguity of Protein Identifications from Shotgun Proteomics