#' Calculate fold change
#'
#' Calculates the fold changes between samples for a given set of comparisons.
#' \code{plotFC} plots these fold changes as a bar graph.
#'
#'
#' @param tidydf Tidy dataframe of RFI values and their corresponding sample
#' and phenotype information. There should NOT be any technical replicates
#' (samples with the same sample name).
#' @param comparisons Dataframe with 2 columns and n rows, where n is the
#' number of desired comparisons. The fold change to be calculated will be
#' column 1 divided by column 2, for each comparison/row. For \code{plotFC}
#' only the rows of the comparisons dataframe that you wish to plot should
#' be given.
#' @param logdata Single logical indicating: \code{calcFC} - whether the fold
#' change should be logged (base 2), \code{plotFC} - whether the fold change
#' has been logged.
#' @param RFI_col Name of column containing RFI values, as string.
#' @param samples Single logical indicating whether the comparison is of
#' samples or ABs.
#' @param ABnames Optional argument. Merges full antibody names to the output
#' dataframe.
#' @param fc_df Dataframe of fold changes generated by \code{calcFC}.
#' @param normalised Single logical indicating whether the data have been
#' normalised.
#'
#'
#' @importFrom assertthat assert_that
#' @importFrom magrittr %>%
#' @importFrom rlang !!
#' @import ggplot2
#'
#'
#' @describeIn calcFC Generates a dataframe of fold changes for each
#' comparison, with columns of corresponding names of samples compared and
#' their phenotype description.
#' @export
calcFC <- function(tidydf, comparisons, logdata = FALSE, RFI_col = "RFI",
samples = TRUE, ABnames) {
# check inputs
assert_that(is.character(RFI_col), length(RFI_col) == 1,
msg = "Check 'RFI_col' is a single string")
assert_that(
sum(c("X1", RFI_col, "AB") %in% colnames(tidydf)) == 3,
msg = "Check 'tidydf' has columns 'X1', 'AB' and your input for 'RFI_col'")
assert_that(is.logical(logdata), length(logdata) == 1,
msg = "Check 'logdata' is single logical")
if (samples) {
assert_that(sum(
c(comparisons[,1], comparisons[,2]) %in% tidydf$X1) == 2 * nrow(comparisons),
msg = "Check 'comparisons' df uses sample names")
} else {
assert_that(sum(
c(comparisons[,1], comparisons[,2]) %in% tidydf$AB) == 2 * nrow(comparisons),
msg = "Check 'comparisons' df uses AB names")
}
if (! missing(ABnames)){
assert_that(
sum(c("Antibody.Name","Ab.No.") %in% colnames(ABnames)) == 2,
dim(ABnames)[2] == 2,
msg = "Ensure columns 'Antibody.Name' and 'Ab.No.' exist in the
'ABnames' dataframe")
}
if (logdata) {
assert_that(length(tidydf[tidydf[[RFI_col]] < 0, RFI_col]) == 0,
msg = "Cannot take log of negative RFI values")
}
# convert to data.frame
if (sum(class(tidydf) %in% "tbl_df") >= 1){
tidydf <- as.data.frame(tidydf)
}
# obtain number of comparisons and Samples/ABs
num_comparisons <- nrow(comparisons)
num_ABs <- length(unique(tidydf$AB))
num_samples <- length(unique(tidydf$X1))
# convert to wide format
wide_df <- tidydf %>%
dplyr::select(X1, !!(as.name(RFI_col)), AB) %>%
tidyr::spread(value = !!(as.name(RFI_col)), key = AB)
# convert to matrix
numeric_mat <- as.matrix(wide_df[,-1])
rownames(numeric_mat) <- wide_df[,1]
## transpose data if ABs
if (! samples) {
numeric_mat <- t(numeric_mat)
}
# calculate fold changes
if (samples){
fc_mat <- matrix(nrow = num_comparisons, ncol = num_ABs)
} else {
fc_mat <- matrix(nrow = num_comparisons, ncol = num_samples)
}
if (logdata){
numeric_mat <- log2(numeric_mat)
}
for (i in 1:num_comparisons){
comp1 <- comparisons[i,1]
comp2 <- comparisons[i,2]
if (logdata){
# subtract if logdata
fc_mat[i,] <- numeric_mat[rownames(numeric_mat) == comp1,] -
numeric_mat[rownames(numeric_mat) == comp2,]
} else {
# divide if raw
fc_mat[i,] <- numeric_mat[rownames(numeric_mat) == comp1,] /
numeric_mat[rownames(numeric_mat) == comp2,]
}
}
# convert back to df
fc_df <- as.data.frame(fc_mat)
## add column names (ABs or samples)
colnames(fc_df) <- colnames(numeric_mat)
## add conditions column if comparing samples
if (samples) {
condition <- vector(mode = "character", length = num_comparisons)
for (i in 1:num_comparisons){
condition[i] <- tidydf[tidydf$X1 == comparisons[i,2], "Condition"][1]
}
fc_df$Condition <- condition
}
# add comparison columns
fc_df$comp1 <- comparisons[,1]
fc_df$comp2 <- comparisons[,2]
# change to long form
if (samples) {
fc_df <- fc_df %>%
tidyr::gather(1:num_ABs, key = "AB", value = "FoldChange")
} else {
fc_df <- fc_df %>%
tidyr::gather(1:num_samples, key = "Sample", value = "FoldChange")
}
if (samples) {
if (! missing(ABnames)){
fc_df <- merge(fc_df, ABnames, by.x = "AB", by.y = "Ab.No.",
all.x = TRUE, all.y = FALSE)
}
}
return(fc_df)
}
#' @describeIn calcFC Creates faceted barplots of fold changes of the
#' desired comparisons for each antibody/sample, in the current graphics
#' device.
#' @export
plotFC <- function(fc_df, comparisons, logdata = FALSE, normalised = FALSE) {
# check inputs
assert_that(sum(c("comp1","comp2", "FoldChange", "AB",
"Sample") %in% colnames(fc_df)) == 4,
msg = "'fc_df' should have one of each of the following columns:
'comp1','comp2' & 'FoldChange'. It should also
have EITHER a 'AB' or 'Sample' column")
assert_that(is.character(comparisons[,1]),
is.character(comparisons[,2]),
msg = "Check that both columns in the 'comparisons'
data frame is of string type")
assert_that(is.logical(logdata), length(logdata) == 1,
msg = "Check 'logdata' is a single logical")
assert_that(is.logical(normalised), length(normalised) == 1,
msg = "Check 'normalised' is a single logical")
# colour blind friendly palette
pal2 <- c("#000000", "#009E73", "#e79f00", "#9ad0f3", "#0072B2",
"#D55E00", "#CC79A7", "#F0E442")
# create titles
ylab <- "Fold Change"
if (logdata){
ylab <- paste("Log2", ylab)
}
if (normalised){
ylab <- paste("Normalised", ylab)
}
# number of comparisions
num_comparisons <- nrow(comparisons)
# main plot
gg <- fc_df %>%
dplyr::mutate(Comp = paste(comp1, "vs", comp2)) %>%
dplyr::filter(Comp %in% paste(comparisons[,1], "vs", comparisons[,2])) %>%
ggplot(aes(y = FoldChange, x = Comp)) +
geom_bar(stat = "identity", position = "dodge") +
coord_flip() +
labs(title = "Fold change for each comparison", x = "Comparison",
y = ylab) +
theme(plot.title = element_text(hjust = 0.5))
if (sum(colnames(fc_df) == "AB") == 1) {
gg <- gg +
facet_wrap(.~Antibody.Name, ncol = 4)
} else if (sum(colnames(fc_df) == "Sample") == 1) {
gg <- gg +
facet_wrap(.~Sample, ncol = 4)
}
return(gg)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.