Nothing
## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(collapse = TRUE, comment = "#>", fig.width = 10, fig.height = 10)
## ----style, echo = FALSE, results = 'asis'------------------------------------
BiocStyle::markdown()
## ----message = FALSE, include = FALSE-----------------------------------------
library(net4pg)
library(igraph)
library(ggplot2)
## -----------------------------------------------------------------------------
incM_filename <- system.file("extdata"
, "incM_example"
, package = "net4pg"
, mustWork = TRUE)
rownames_filename <- system.file("extdata"
, "peptideIDs_incM_example"
, package = "net4pg"
, mustWork = TRUE)
colnames_filename <- system.file("extdata"
, "proteinIDs_incM_example"
, package = "net4pg"
, mustWork = TRUE)
incM <- read_inc_matrix(incM_filename = incM_filename
, colnames_filename = colnames_filename
, rownames_filename = rownames_filename)
## -----------------------------------------------------------------------------
dim(incM)
## -----------------------------------------------------------------------------
incM_reduced <- reduce_inc_matrix(incM)
dim(incM_reduced) # check the size of the reduced incidence matrix
## -----------------------------------------------------------------------------
adjM <- get_adj_matrix(incM_reduced)
dim(adjM) # check the size of the adjacency matrix:
## -----------------------------------------------------------------------------
multProteinCC <- get_cc(adjM)
## -----------------------------------------------------------------------------
cc.multProteins <- multProteinCC$ccs
length(cc.multProteins)
## -----------------------------------------------------------------------------
# Calculate CCs size and percentage of single- vs multi-protein CCs
CCstatsOut <- cc_stats(incM = incM
, cc.proteins = cc.multProteins
, reducedIncM = TRUE)
# Number of single-protein CCs:
CCstatsOut$N_singleProtCC
# Number of multi-protein CCs
CCstatsOut$N_multiProtCC
# Total number of CCs
totCCs <- CCstatsOut$N_singleProtCC + CCstatsOut$N_multiProtCC
totCCs
# Percentage of single-protein CCs:
PercSingleP <- round(CCstatsOut$N_singleProtCC / totCCs * 100, digits = 2)
PercSingleP
# View table of CC size distribution
CCstatsOut$NproteinsDistribution
# Plot CC size distribution
plot(factor(CCstatsOut$NproteinsDistribution$N_proteins
, levels = c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", ">10"))
, as.numeric(as.vector(CCstatsOut$NproteinsDistribution$N_CC))
, type = "s"
, xlab = "N_proteins"
, ylab = "N_CCs")
## -----------------------------------------------------------------------------
peptideStatsOut <- peptide_stats(incM = incM)
# Number of shared peptides
peptideStatsOut$nbShared
# Number of specific peptides
peptideStatsOut$nbSpecific
# Percentage of specific peptides
peptideStatsOut$percSpecific
## -----------------------------------------------------------------------------
cc.peptides.incM <- cc_composition(cc.multProteins, incM = incM)
## ---- fig.height=7------------------------------------------------------------
# Generate the bipartite graph
prot <- "ENSP261"
subgraphCC <- plot_cc(prot = prot
, cc.proteins = cc.multProteins
, cc.subincM = cc.peptides.incM$cc.subincM
, incM = incM
, tagProt = "ENSP"
, tagContam = "Contam")
# Plot it
plot.igraph(subgraphCC$g
, layout = layout_as_bipartite
, edge.width = 1
, edge.arrow.width = 0.3
, vertex.size = 35
, edge.arrow.size = 0.5
, vertex.size2 = 35
, vertex.label.cex = 1
, asp = 0.25
, margin = -0.1) +
title(paste0("Protein ", prot, " in CC#", subgraphCC$cc_id), line = -1)
## -----------------------------------------------------------------------------
incM_filename <- system.file("extdata"
, "incM_example"
, package = "net4pg"
, mustWork = TRUE)
rownames_filename <- system.file("extdata"
, "peptideIDs_incM_example"
, package = "net4pg"
, mustWork = TRUE)
colnames_filename <- system.file("extdata"
, "proteinIDs_incM_example"
, package = "net4pg"
, mustWork = TRUE)
incM <- read_inc_matrix(incM_filename = incM_filename
, colnames_filename = colnames_filename
, rownames_filename = rownames_filename)
## -----------------------------------------------------------------------------
dim(incM)
## -----------------------------------------------------------------------------
# Read input file names
exprTranscriptsFile <- system.file("extdata"
, "expressed_transcripts.txt"
, package = "net4pg"
, mustWork = TRUE)
protein2transcriptFile <- system.file("extdata"
, "protein_to_transcript"
, package = "net4pg"
, mustWork = TRUE)
# Perform filtering
incM_filt <- transcriptome_filter(incM
, exprTranscriptsFile = exprTranscriptsFile
, proteinToTranscriptFile = protein2transcriptFile
, tagContam = "Contam"
, remove = "sharedOnly")
# Check size after transcriptome-informed filtering
dim(incM_filt)
## -----------------------------------------------------------------------------
# Reduce incidence matrix size to accelerate downstream computation
incM_filt_reduced <- reduce_inc_matrix(incM_filt)
# Calculate the adjacency matrix describing protein-to-protein connections
adjM_filt <- get_adj_matrix(incM_filt_reduced)
# Generate a graph of protein-to-protein connections by shared peptides and
# calculate its CCs (i.e., sets of proteins connected by shared peptides
multProteinCC_filt <- get_cc(adjM_filt)
# Extract the list of vectors enumerating protein members in each CC
cc.multProteins_filt <- multProteinCC_filt$ccs
# Calculate CCs size and % of single- vs multi-protein CCs obtained after
# transcriptome-informed filtering
CCstatsOut_filt <- cc_stats(incM = incM_filt
, cc.proteins = multProteinCC_filt$ccs
, reducedIncM = TRUE)
# Number of single-protein CCs
CCstatsOut_filt$N_singleProtCC
# Number of multi-protein CCs
CCstatsOut_filt$N_multiProtCC
# Total number of CCs
totCCs_filt <- CCstatsOut_filt$N_singleProtCC + CCstatsOut_filt$N_multiProtCC
totCCs_filt
# Percentage of single-protein CCs
PercSingleP_filt <- round(CCstatsOut_filt$N_singleProtCC / totCCs_filt * 100
, digits = 2)
# View table of CC size distribution
CCstatsOut_filt$NproteinsDistribution
# Plot CC size distribution
plot(factor(CCstatsOut_filt$NproteinsDistribution$N_proteins
, levels = c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", ">10"))
, as.numeric(as.vector(CCstatsOut_filt$NproteinsDistribution$N_CC))
, type = "s"
, xlab = "N_proteins"
, ylab = "N_CCs")
## -----------------------------------------------------------------------------
comp <- as.data.frame(cbind(as.character(as.vector(c("before_filter"
, "after_filter")))
, as.numeric(as.vector(c(PercSingleP
, PercSingleP_filt)))))
colnames(comp) <- c("Filter", "Perc_SingleP")
ggplot(data = comp
, aes(x = as.factor(Filter), y = as.numeric(as.vector(Perc_SingleP)))) +
geom_bar(stat = "identity") +
theme_classic() +
xlab("") +
ylab("% single-prot CCs") +
ylim(0, 100) +
coord_flip() +
geom_text(aes(label = as.numeric(as.vector(Perc_SingleP)))
, hjust = 1.5, color = "white", size = 4)
## -----------------------------------------------------------------------------
old.par <- par(no.readonly = TRUE) # save default par values
ymax_before <- as.numeric(as.vector(CCstatsOut$NproteinsDistribution$N_CC))
ymax_after <- as.numeric(as.vector(CCstatsOut_filt$NproteinsDistribution$N_CC))
ymax <- max(max(ymax_before), max(ymax_after))
par(mfrow = c(1, 2))
plot(factor(CCstatsOut$NproteinsDistribution$N_proteins
, levels = c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", ">10"))
, as.numeric(as.vector(CCstatsOut$NproteinsDistribution$N_CC))
, type = "s"
, xlab = "N_proteins"
, ylab = "N_CCs"
, ylim = c(0, ymax)
, main = "before filtering")
plot(factor(CCstatsOut_filt$NproteinsDistribution$N_proteins
, levels = c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", ">10"))
, as.numeric(as.vector(CCstatsOut_filt$NproteinsDistribution$N_CC))
, type = "s"
, xlab = "N_proteins"
, ylab = "N_CCs"
, ylim = c(0, ymax)
, main = "after filtering")
par(old.par) # restore default par values
## -----------------------------------------------------------------------------
peptideStatsOut_filt <- peptide_stats(incM = incM_filt)
## -----------------------------------------------------------------------------
comp <- as.data.frame(cbind(
as.character(as.vector(c("before_filter"
, "after_filter")))
, as.numeric(as.vector(c(peptideStatsOut$nbShared
, peptideStatsOut_filt$nbShared)))))
colnames(comp) <- c("Filter", "Perc_sharedPeptides")
ggplot(data = comp
, aes(x = as.factor(Filter)
, y = as.numeric(as.vector(Perc_sharedPeptides)))) +
geom_bar(stat = "identity") +
theme_classic() +
xlab("") +
ylab("% shared peptides") +
ylim(0, 100) +
coord_flip() +
geom_text(aes(label = as.numeric(as.vector(Perc_sharedPeptides)))
, hjust = 1.5, color = "white", size = 4)
## ---- fig.height=14-----------------------------------------------------------
# Extract peptides and peptide-to-protein mappings for each CC after filtering
cc.peptides.incM_filt <- cc_composition(cc.multProteins_filt
, incM = incM_filt)
# Generate a bipartite graph of the CC which contains the protein of interest,
# before and after transcriptome-informed filtering.
prot <- "ENSP261"
subgraphCC_beforeFilter <- plot_cc(prot = prot
, cc.proteins = cc.multProteins
, cc.subincM = cc.peptides.incM$cc.subincM
, incM = incM
, tagProt = "ENSP"
, tagContam = "Contam")
subgraphCC_afterFilter <- plot_cc(prot = prot
, cc.proteins = cc.multProteins_filt
, cc.subincM = cc.peptides.incM_filt$cc.subincM
, incM = incM_filt
, tagProt = "ENSP"
, tagContam = "Contam")
# Plot
old.par <- par(no.readonly = TRUE) # save default par values
par(mfrow = c(2, 1))
plot.igraph(subgraphCC_beforeFilter$g
, layout = layout_as_bipartite
, edge.width = 1
, edge.arrow.width = 0.3
, vertex.size = 35
, edge.arrow.size = 0.5
, vertex.size2 = 35
, vertex.label.cex = 1
, asp = 0.45
, margin = -0.1) +
title(paste0("Protein "
, prot
, " in CC #"
, subgraphCC_beforeFilter$cc_id
, " before filtering")
, line = -1)
plot.igraph(subgraphCC_afterFilter$g
, layout = layout_as_bipartite
, edge.width = 1
, edge.arrow.width = 0.3
, vertex.size = 35
, edge.arrow.size = 0.5
, vertex.size2 = 35
, vertex.label.cex = 1
, asp = 0.45
, margin = -0.1) +
title(paste0("Protein "
, prot, " in CC #"
, subgraphCC_beforeFilter$cc_id
, " after filtering")
, line = -1)
par(old.par) # restore default par values
## -----------------------------------------------------------------------------
sessionInfo()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.