Nothing
# Normalize intensities ----------------------------------------------------
#' Normalize intensity data
#' @author Chathurani Ranathunge
#' @description This function normalizes data using a user-specified
#' normalization method.
#' @import limma
#'
#' @param df An \code{imp_df} object with missing values imputed using
#' \code{impute_na} or a \code{raw_df} object containing missing values.
#' @param method Name of the normalization method to use. Choices are
#' \code{"none", "scale", "quantile" or "cyclicloess."}
#' Default is \code{"quantile."}
#'
#' @details \itemize{\item\code{normalize_data} is a wrapper function around
#' the \code{\link[limma]{normalizeBetweenArrays}} function from the
#' \code{limma} package. \item This function normalizes
#' intensity values to achieve consistency among samples.
#' \item It assumes that the intensities in the
#' data frame have been log-transformed, therefore, it is important to make sure
#' that \code{create_df} was run with \code{log_tr = TRUE}(default) when
#' creating the \code{raw_df} object.}
#'
#' @return A \code{norm_df} object, which is a data frame of
#' normalized protein intensities.
#'
#' @seealso \itemize{\item \code{create_df}
#' \item \code{impute_na}
#' \item See \code{\link[limma]{normalizeBetweenArrays}} in the R package
#' \code{limma} for more information on the different normalization methods
#' available.}
#'
#'
#' @examples
#' ## Generate a raw_df object with default settings. No technical replicates.
#' raw_df <- create_df(
#' prot_groups = "https://raw.githubusercontent.com/caranathunge/promor_example_data/main/pg1.txt",
#' exp_design = "https://raw.githubusercontent.com/caranathunge/promor_example_data/main/ed1.txt"
#' )
#'
#' ## Impute missing values in the data frame using the default minProb
#' ## method prioir to normalization.
#' imp_df <- impute_na(raw_df)
#'
#' ## Normalize the imp_df object using the default quantile method
#' norm_df1 <- normalize_data(imp_df)
#'
#' ## Use the cyclicloess method
#' norm_df2 <- normalize_data(imp_df, method = "cyclicloess")
#'
#' ## Normalize data in the raw_df object prior to imputation.
#' norm_df3 <- normalize_data(raw_df)
#'
#' @export
normalize_data <- function(df,
method = "quantile") {
norm_df <- limma::normalizeBetweenArrays(df,
method = method
)
return(norm_df)
}
# Visualize normalization effects -----------------------------------------
#' Visualize the effect of normalization
#' @author Chathurani Ranathunge
#' @description This function visualizes the impact of normalization on
#' the data
#' @importFrom reshape2 melt
#' @import ggplot2
#' @import viridis
#' @param original A \code{raw_df} object (output of \code{\link{create_df}})
#' containing missing values, or an \code{imp_df} object after
#' imputing the missing values with \code{impute_na}.
#' @param normalized A \code{norm_df} object after normalizing the data frame
#' provided as \code{original} using \code{normalize_data}.
#' @param type Type of plot to generate. Choices are "box" or "density." Default
#' is \code{"box."}
#' @param text_size Text size for plot labels, axis labels etc. Default is
#' \code{10}.
#' @param palette Viridis color palette option for plots. Default is
#' \code{"viridis"}. See
#' \code{\link[viridis:viridis]{viridis}}
#' for available options.
#' @param save Logical. If \code{TRUE} saves a copy of the plot in the
#' directory provided in \code{file_path}.
#' @param file_path A string containing the directory path to save the file.
#' @param file_name File name to save the plot.
#' Default is \code{"Norm_plot."}
#' @param file_type File type to save the plot.
#' Default is \code{"pdf"}.
#' @param plot_width Width of the plot. Default is \code{10}.
#' @param plot_height Height of the plot. Default is \code{7}.
#' @param dpi Plot resolution. Default is \code{80}.
#'
#' @details Given two data frames, one with data prior to normalization
#' (\code{original}), and the other, after normalization (\code{normalized}),
#' \code{norm_plot} generates side-by-side plots to visualize the effect of
#' normalization on the protein intensity data.
#'
#' @seealso \itemize{\item \code{\link{normalize_data}}
#' \item \code{create_df}
#' \item \code{impute_na}
#' }
#'
#' @return A \code{ggplot2} plot object.
#'
#' @examples
#' \donttest{
#' ## Generate a raw_df object with default settings. No technical replicates.
#' raw_df <- create_df(
#' prot_groups = "https://raw.githubusercontent.com/caranathunge/promor_example_data/main/pg1.txt",
#' exp_design = "https://raw.githubusercontent.com/caranathunge/promor_example_data/main/ed1.txt"
#' )
#'
#' ## Impute missing values in the data frame using the default minProb
#' ## method.
#' imp_df <- impute_na(raw_df)
#'
#' ## Normalize the imp_df object using the default quantile method
#' norm_df <- normalize_data(imp_df)
#'
#' ## Visualize normalization using box plots
#' norm_plot(original = imp_df, normalized = norm_df)
#'
#' ## Visualize normalization using density plots
#' norm_plot(imp_df, norm_df, type = "density")
#' }
#'
#' @export
norm_plot <- function(original,
normalized,
type = "box",
text_size = 10,
palette = "viridis",
save = FALSE,
file_path = NULL,
file_name = "Norm_plot",
file_type = "pdf",
dpi = 80,
plot_width = 10,
plot_height = 7) {
# Set global variables to null
intensity <- value <- group <- NULL
# Pre-prossesing data for plotting
normalized <- as.matrix(normalized)
norm1 <- reshape2::melt(normalized, na.rm = FALSE)
norm1$normstage <- "After normalization"
original <- as.matrix(original)
orig1 <- reshape2::melt(original, na.rm = FALSE)
orig1$normstage <- "Before normalization"
# combine the two data sets
plot_data <- rbind(orig1, norm1)
colnames(plot_data) <- c("prot", "sample", "intensity", "normstage")
plot_data$group <- factor(sapply(
strsplit(
as.character(plot_data[, "sample"]),
"_"
),
getElement, 1
))
plot_data$normstage <- factor(plot_data$normstage,
levels = c(
"Before normalization",
"After normalization"
)
)
# Make density plots
if (type == "density") {
n_plot <- ggplot2::ggplot(
plot_data,
ggplot2::aes(
x = intensity,
color = sample
)
) +
ggplot2::geom_density(lwd = text_size * 0.02) +
ggplot2::xlab("") +
ggplot2::ylab("") +
viridis::scale_color_viridis(
discrete = TRUE,
option = palette,
begin = 0.3,
end = 0.7,
direction = -1
) +
promor_facet_theme() +
ggplot2::theme(
legend.position = "none",
axis.text = element_text(size = text_size * 0.7)
) +
ggplot2::facet_wrap(~normstage)
# Default: Boxplots
} else {
n_plot <- ggplot2::ggplot(
plot_data,
ggplot2::aes(
x = stats::reorder(
sample,
group
),
y = intensity,
fill = group
)
) +
geom_boxplot(
color = "grey30",
alpha = 0.7,
outlier.shape = 1,
outlier.stroke = 0.1,
outlier.size = text_size * 0.04,
outlier.color = "grey30",
lwd = text_size * 0.02
) +
ggplot2::coord_flip() +
ggplot2::facet_wrap(~normstage) +
ggplot2::xlab("") +
ggplot2::ylab("") +
viridis::scale_fill_viridis(
discrete = TRUE,
option = palette,
begin = 0.3,
end = 0.7
) +
promor_facet_theme() +
ggplot2::theme(
legend.position = "bottom",
axis.text = element_text(size = text_size * 0.7)
)
}
# Set temporary file_path if not specified
if (is.null(file_path)) {
file_path <- tempdir()
}
if (save == TRUE) {
ggsave(paste0(file_path, "/", file_name, ".", file_type),
n_plot,
dpi = dpi,
width = plot_width,
height = plot_height
)
return(n_plot)
} else {
return(n_plot)
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.