Nothing
#' Plot protein numbers
#'
#' \code{plot_numbers} generates a barplot
#' of the number of identified proteins per sample.
#'
#' @param se SummarizedExperiment,
#' Data object for which to plot protein numbers
#' (output from \code{\link{make_se}()} or \code{\link{make_se_parse}()}).
#' @param plot Logical(1),
#' If \code{TRUE} (default) the barplot is produced.
#' Otherwise (if \code{FALSE}), the data which the
#' barplot is based on are returned.
#' @return Barplot of the number of identified proteins per sample
#' (generated by \code{\link[ggplot2]{ggplot}})
#' @examples
#' # Load example
#' data <- UbiLength
#' data <- data[data$Reverse != "+" & data$Potential.contaminant != "+",]
#' data_unique <- make_unique(data, "Gene.names", "Protein.IDs", delim = ";")
#'
#' # Make SummarizedExperiment
#' columns <- grep("LFQ.", colnames(data_unique))
#' exp_design <- UbiLength_ExpDesign
#' se <- make_se(data_unique, columns, exp_design)
#'
#' # Filter and plot numbers
#' filt <- filter_missval(se, thr = 0)
#' plot_numbers(filt)
#' @export
plot_numbers <- function(se, plot = TRUE) {
# Show error if input is not the required classes
assertthat::assert_that(inherits(se, "SummarizedExperiment"),
is.logical(plot),
length(plot) == 1)
# Make a binary long data.frame (1 = valid value, 0 = missing value)
df <- assay(se) %>%
data.frame() %>%
rownames_to_column() %>%
gather(ID, bin, -rowname) %>%
mutate(bin = ifelse(is.na(bin), 0, 1))
# Summarize the number of proteins identified
# per sample and generate a barplot
stat <- df %>%
group_by(ID) %>%
summarize(n = n(), sum = sum(bin)) %>%
left_join(., data.frame(colData(se)), by = "ID")
p <- ggplot(stat, aes(x = ID, y = sum, fill = condition)) +
geom_col() +
geom_hline(yintercept = unique(stat$n)) +
labs(title = "Proteins per sample", x = "",
y = "Number of proteins") +
theme_DEP2()
if(plot) {
return(p)
} else {
df <- as.data.frame(stat)
colnames(df)[seq_len(3)] <- c("sample", "total_proteins", "proteins_in_sample")
return(df)
}
}
#' Plot protein overlap between samples
#'
#' \code{plot_frequency} generates a barplot
#' of the protein overlap between samples
#'
#' @param se SummarizedExperiment,
#' Data object for which to plot observation frequency.
#' @param plot Logical(1),
#' If \code{TRUE} (default) the barplot is produced.
#' Otherwise (if \code{FALSE}), the data which the
#' barplot is based on are returned.
#' @return Barplot of overlap of protein identifications
#' between samples (generated by \code{\link[ggplot2]{ggplot}})
#' @examples
#' # Load example
#' data <- UbiLength
#' data <- data[data$Reverse != "+" & data$Potential.contaminant != "+",]
#' data_unique <- make_unique(data, "Gene.names", "Protein.IDs", delim = ";")
#'
#' # Make SummarizedExperiment
#' columns <- grep("LFQ.", colnames(data_unique))
#' exp_design <- UbiLength_ExpDesign
#' se <- make_se(data_unique, columns, exp_design)
#'
#' # Filter and plot frequency
#' filt <- filter_missval(se, thr = 0)
#' plot_frequency(filt)
#' @export
plot_frequency <- function(se, plot = TRUE) {
# Show error if input is not the required classes
assertthat::assert_that(inherits(se, "SummarizedExperiment"),
is.logical(plot),
length(plot) == 1)
# Make a binary long data.frame (1 = valid value, 0 = missing value)
df <- assay(se) %>%
data.frame() %>%
rownames_to_column() %>%
gather(ID, bin, -rowname) %>%
mutate(bin = ifelse(is.na(bin), 0, 1))
# Identify the number of experiments a protein was observed
stat <- df %>%
group_by(rowname) %>%
summarize(sum = sum(bin))
# Get the frequency of the number of experiments proteins
# were observerd and plot these numbers
table <- table(stat$sum) %>% data.frame()
p <- ggplot(table, aes(x = Var1, y = Freq, fill = Var1)) +
geom_col() +
scale_fill_grey(start = 0.8, end = 0.2) +
labs(title = "Protein identifications overlap",
x = "Identified in number of samples",
y = "Number of proteins") +
theme_DEP2() +
theme(legend.position="none")
if(plot) {
return(p)
} else {
df <- as.data.frame(table)
colnames(df) <- c("samples", "proteins")
return(df)
}
}
#' Plot protein coverage
#'
#' \code{plot_coverage} generates a barplot
#' of the protein coverage in all samples.
#'
#' @param se SummarizedExperiment,
#' Data object for which to plot observation frequency.
#' @param plot Logical(1),
#' If \code{TRUE} (default) the barplot is produced.
#' Otherwise (if \code{FALSE}), the data which the
#' barplot is based on are returned.
#' @return Barplot of protein coverage in samples
#' (generated by \code{\link[ggplot2]{ggplot}})
#' @examples
#' # Load example
#' data <- UbiLength
#' data <- data[data$Reverse != "+" & data$Potential.contaminant != "+",]
#' data_unique <- make_unique(data, "Gene.names", "Protein.IDs", delim = ";")
#'
#' # Make SummarizedExperiment
#' columns <- grep("LFQ.", colnames(data_unique))
#' exp_design <- UbiLength_ExpDesign
#' se <- make_se(data_unique, columns, exp_design)
#'
#' # Filter and plot coverage
#' filt <- filter_missval(se, thr = 0)
#' plot_coverage(filt)
#' @export
plot_coverage <- function(se, plot = TRUE) {
# Show error if input is not the required classes
assertthat::assert_that(inherits(se, "SummarizedExperiment"),
is.logical(plot),
length(plot) == 1)
# Make a binary long data.frame (1 = valid value, 0 = missing value)
df <- assay(se) %>%
data.frame() %>%
rownames_to_column() %>%
gather(ID, bin, -rowname) %>%
mutate(bin = ifelse(is.na(bin), 0, 1))
# Identify the number of experiments a protein was observed
stat <- df %>%
group_by(rowname) %>%
summarize(sum = sum(bin))
# Get the frequency of the number of experiments proteins were observerd
# and plot the cumulative sum of these numbers
table <- table(stat$sum) %>%
data.frame()
p <- ggplot(table, aes(x = "all", y = Freq, fill = Var1)) +
geom_col(col = "white") +
scale_fill_grey(start = 0.8, end = 0.2) +
labs(title = "Protein coverage",
x = "",
y = "Number of proteins",
fill = "Samples") +
theme_DEP1()
if(plot) {
return(p)
} else {
df <- as.data.frame(table)
colnames(df) <- c("samples", "proteins")
return(df)
}
}
#' Plot frequency of significant conditions per protein
#'
#' \code{plot_cond_freq} generates a histogram of the number of significant conditions per protein.
#'
#' @param dep SummarizedExperiment,
#' Data object for which differentially enriched proteins are annotated
#' (output from \code{\link{test_diff}()} and \code{\link{add_rejections}()}).
#' @param plot Logical(1),
#' If \code{TRUE} (default) the histogram is produced.
#' Otherwise (if \code{FALSE}), the data which the
#' histogram is based on are returned.
#' @return A histogram (generated by \code{\link[ggplot2]{ggplot}})
#' @examples
#' # Load example
#' data <- UbiLength
#' data <- data[data$Reverse != "+" & data$Potential.contaminant != "+",]
#' data_unique <- make_unique(data, "Gene.names", "Protein.IDs", delim = ";")
#'
#' # Make SummarizedExperiment
#' columns <- grep("LFQ.", colnames(data_unique))
#' exp_design <- UbiLength_ExpDesign
#' se <- make_se(data_unique, columns, exp_design)
#'
#' # Filter, normalize and impute missing values
#' filt <- filter_missval(se, thr = 0)
#' norm <- normalize_vsn(filt)
#' imputed <- impute(norm, fun = "MinProb", q = 0.01)
#'
#' # Test for differentially expressed proteins
#' diff <- test_diff(imputed, "control", "Ctrl")
#' dep <- add_rejections(diff, alpha = 0.05, lfc = 1)
#'
#' # Plot frequency of significant conditions
#' plot_cond_freq(dep)
#' @export
plot_cond_freq <- function(dep, plot = TRUE) {
# Show error if inputs are not the required classes
assertthat::assert_that(inherits(dep, "SummarizedExperiment"),
is.logical(plot),
length(plot) == 1)
# Check for significance columns
row_data <- rowData(dep, use.names = FALSE)
if(length(grep("_significant", colnames(row_data))) < 1) {
stop("'[contrast]_significant' columns are not present in '",
deparse(substitute(dep)),
"'\nRun add_rejections() to obtain the required columns",
call. = FALSE)
}
# Check for significant column
if(!"significant" %in% colnames(row_data)) {
stop("'significant' column is not present in '",
deparse(substitute(dep)),
"'\nRun add_rejections() to obtain the required column",
call. = FALSE)
}
# Filter for significant proteins
significant <- dep[row_data$significant, ]
# Get significant columns and count significant conditions per protein
row_data <- rowData(significant, use.names = FALSE) %>% data.frame()
cols <- grep("_significant", colnames(row_data))
df <- row_data %>%
select(name, ID, cols) %>%
gather(condition, significant, -c(name, ID)) %>%
mutate(val = ifelse(significant, 1, 0))
stat <- df %>%
group_by(name) %>%
summarize(sum = sum(val), n = n())
# Get a counts table
table <- table(stat$sum) %>% data.frame()
# Plot the count table as a bar graph
p <- ggplot(table, aes(x = Var1, y = Freq, fill = Var1)) +
geom_col() +
labs(x = "Number of significant conditions",
y = "Number of proteins",
title = "Frequency of significant conditions per protein") +
DEP::theme_DEP1() +
theme(legend.position = "none")
if(plot) {
return(p)
} else {
df <- as.data.frame(table)
colnames(df) <- c("conditions", "proteins")
return(df)
}
}
#' Plot conditions overlap
#'
#' \code{plot_cond_overlap} generates a histogram of
#' the number of proteins per condition or overlapping conditions.
#'
#' @param dep SummarizedExperiment,
#' Data object for which differentially enriched proteins are annotated
#' (output from \code{\link{test_diff}()} and \code{\link{add_rejections}()}).
#' @param plot Logical(1),
#' If \code{TRUE} (default) the barplot is produced.
#' Otherwise (if \code{FALSE}), the data which the
#' barplot is based on are returned.
#' @return A histogram (generated by \code{\link[ggplot2]{ggplot}})
#' @examples
#' # Load example
#' data <- UbiLength
#' data <- data[data$Reverse != "+" & data$Potential.contaminant != "+",]
#' data_unique <- make_unique(data, "Gene.names", "Protein.IDs", delim = ";")
#'
#' # Make SummarizedExperiment
#' columns <- grep("LFQ.", colnames(data_unique))
#' exp_design <- UbiLength_ExpDesign
#' se <- make_se(data_unique, columns, exp_design)
#'
#' # Filter, normalize and impute missing values
#' filt <- filter_missval(se, thr = 0)
#' norm <- normalize_vsn(filt)
#' imputed <- impute(norm, fun = "MinProb", q = 0.01)
#'
#' # Test for differentially expressed proteins
#' diff <- test_diff(imputed, "control", "Ctrl")
#' dep <- add_rejections(diff, alpha = 0.05, lfc = 1)
#'
#' # Plot condition overlap
#' plot_cond_overlap(dep)
#' @export
plot_cond_overlap <- function(dep, plot = TRUE) {
# Show error if inputs are not the required classes
assertthat::assert_that(inherits(dep, "SummarizedExperiment"),
is.logical(plot),
length(plot) == 1)
# Check for significance columns
row_data <- rowData(dep, use.names = FALSE)
if(length(grep("_significant", colnames(row_data))) < 1) {
stop("'[contrast]_significant' columns are not present in '",
deparse(substitute(dep)),
"'\nRun add_rejections() to obtain the required columns",
call. = FALSE)
}
# Check for significant column
if(!"significant" %in% colnames(row_data)) {
stop("'significant' column is not present in '",
deparse(substitute(dep)),
"'\nRun add_rejections() to obtain the required column",
call. = FALSE)
}
# Filter for significant proteins
significant <- dep[row_data$significant, ]
# Get significant columns
row_data <- rowData(significant, use.names = FALSE) %>% data.frame()
cols <- grep("_significant", colnames(row_data))
colnames(row_data)[cols] <- gsub("_significant", "", colnames(row_data)[cols])
# Rename column names
row_data_renamed <- row_data
colnames(row_data_renamed)[cols] <- LETTERS[seq(to = length(cols))]
legend <- data.frame(symbol = colnames(row_data_renamed)[cols],
contrast = colnames(row_data)[cols])
# Get co-occurence matrix
df <- select(row_data_renamed, name, ID, cols)
counts <- table(df[,3:(length(cols)+2)]) %>%
as.data.frame() %>%
filter(Freq > 0)
# Parse condition names
mat <- counts[,seq_len(ncol(counts)-1)] == TRUE
counts$conditions <-
apply(mat, 1, function(x) {
paste0(colnames(counts[,seq_len(ncol(counts)-1)])[x], collapse = " ")
})
# Sort on number of conditions
counts$n_con <- apply(mat, 1, function(x) length(which(x)))
counts <- counts %>%
arrange(n_con, conditions)
counts$conditions <- parse_factor(counts$conditions, levels = counts$conditions)
if(nrow(counts) <= 40 ) {
labelsize = 12
} else {
labelsize = 12 / (nrow(counts) / 40)
}
# Plot conditions overlap histogram
p1 <- ggplot(counts, aes(x = conditions, y = Freq)) +
geom_col(fill = "black") +
labs(title = "Overlap between conditions",
x = "Conditions",
y = "Number of Proteins") +
theme_DEP2() +
theme(axis.text=element_text(size = labelsize))
# Legend table
ttheme <- gridExtra::ttheme_minimal(
core=list(fg_params=list(hjust=0, x=0.1)))
p2 <- gridExtra::tableGrob(legend, theme = ttheme, rows = NULL, cols = NULL)
if(!plot) {
df <- counts %>%
select(conditions, Freq, n_con)
colnames(df) <- c("conditions", "proteins", "number_of_conditions")
list <- list(counts = df, legend = legend)
return(list)
} else {
gridExtra::grid.arrange(p1, p2, ncol = 2, widths = c(0.8, 0.2))
}
}
#' Plot frequency of significant conditions per protein
#' and the overlap in proteins between conditions
#'
#' \code{plot_cond} generates a histogram of
#' the number of proteins per condition and stacks for overlapping conditions.
#'
#' @param dep SummarizedExperiment,
#' Data object for which differentially enriched proteins are annotated
#' (output from \code{\link{test_diff}()} and \code{\link{add_rejections}()}).
#' @param plot Logical(1),
#' If \code{TRUE} (default) the barplot is produced.
#' Otherwise (if \code{FALSE}), the data which the
#' barplot is based on are returned.
#' @return A histogram (generated by \code{\link[ggplot2]{ggplot}})
#' @examples
#' # Load example
#' data <- UbiLength
#' data <- data[data$Reverse != "+" & data$Potential.contaminant != "+",]
#' data_unique <- make_unique(data, "Gene.names", "Protein.IDs", delim = ";")
#'
#' # Make SummarizedExperiment
#' columns <- grep("LFQ.", colnames(data_unique))
#' exp_design <- UbiLength_ExpDesign
#' se <- make_se(data_unique, columns, exp_design)
#'
#' # Filter, normalize and impute missing values
#' filt <- filter_missval(se, thr = 0)
#' norm <- normalize_vsn(filt)
#' imputed <- impute(norm, fun = "MinProb", q = 0.01)
#'
#' # Test for differentially expressed proteins
#' diff <- test_diff(imputed, "control", "Ctrl")
#' dep <- add_rejections(diff, alpha = 0.05, lfc = 1)
#'
#' # Plot histogram with overlaps
#' plot_cond(dep)
#' @export
plot_cond <- function(dep, plot = TRUE) {
# Show error if inputs are not the required classes
assertthat::assert_that(inherits(dep, "SummarizedExperiment"),
is.logical(plot),
length(plot) == 1)
# Check for significance columns
row_data <- rowData(dep, use.names = FALSE)
if(length(grep("_significant", colnames(row_data))) < 1) {
stop("'[contrast]_significant' columns are not present in '",
deparse(substitute(dep)),
"'\nRun add_rejections() to obtain the required columns",
call. = FALSE)
}
# Check for significant column
if(!"significant" %in% colnames(row_data)) {
stop("'significant' column is not present in '",
deparse(substitute(dep)),
"'\nRun add_rejections() to obtain the required column",
call. = FALSE)
}
# Filter for significant proteins
significant <- dep[row_data$significant, ]
# Get significant columns
row_data <- rowData(significant, use.names = FALSE) %>% data.frame()
cols <- grep("_significant", colnames(row_data))
colnames(row_data)[cols] <- gsub("_significant", "", colnames(row_data)[cols])
# Rename column names
row_data_renamed <- row_data
colnames(row_data_renamed)[cols] <- LETTERS[seq(to = length(cols))]
legend <- data.frame(symbol = colnames(row_data_renamed)[cols], names = colnames(row_data)[cols])
# Get co-occurence matrix
df <- select(row_data_renamed, name, ID, cols)
counts <- table(df[,3:(length(cols)+2)]) %>%
as.data.frame() %>%
filter(Freq > 0)
# Parse condition names
mat <- counts[,seq_len(ncol(counts)-1)] == TRUE
counts$conditions <-
apply(mat, 1, function(x) {
paste0(colnames(counts[,seq_len(ncol(counts)-1)])[x], collapse = " ")
})
# Sort on number of conditions
counts$n_con <- apply(mat, 1, function(x) length(which(x)))
counts <- counts %>%
arrange(n_con, conditions)
counts$conditions <- parse_factor(counts$conditions, levels = counts$conditions)
counts <- mutate(counts, ID = paste0(conditions, ": ", Freq))
if(nrow(counts) <= 40 ) {
labelsize = 12
} else {
labelsize = 12 / (nrow(counts) / 40)
}
# Plot conditions overlap histogram
p1 <- ggplot(counts, aes(x = n_con, y = Freq, fill = conditions)) +
geom_col() +
geom_text(aes(label = ID), size = 3, hjust = 0.5,
position = position_stack(vjust = 0.5)) +
labs(x = "Number of significant conditions",
y = "Number of proteins",
title = "Overlap between conditions") +
DEP::theme_DEP1()
# Legend table
ttheme <- gridExtra::ttheme_minimal(
core=list(fg_params=list(hjust=0, x=0.1)))
p2 <- gridExtra::tableGrob(legend, theme = ttheme, rows = NULL, cols = NULL)
if(!plot) {
df <- counts %>%
select(conditions, Freq, n_con)
colnames(df) <- c("conditions", "proteins", "number_of_conditions")
list <- list(counts = df, legend = legend)
return(list)
} else{
gridExtra::grid.arrange(p1, p2, ncol = 2, widths = c(0.8, 0.2))
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.