exec/bsf_rnaseq_process_cuffdiff.R

#!/usr/bin/env Rscript
#
# Copyright 2013 - 2022 Michael K. Schuster
#
# Biomedical Sequencing Facility (BSF), part of the genomics core facility of
# the Research Center for Molecular Medicine (CeMM) of the Austrian Academy of
# Sciences and the Medical University of Vienna (MUW).
#
#
# This file is part of BSF R.
#
# BSF R is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# BSF R is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with BSF R.  If not, see <http://www.gnu.org/licenses/>.

# Description -------------------------------------------------------------


# BSF R script to post-processes Cuffdiff output utilising the cummeRbund
# package. This script creates a cummeRbund SQLite database, writes a set of QC
# plots in PDF and PNG format, splits the large and unwieldy differential data
# tables into pairwise comparisons and creates symbolic links to the original
# Cuffdiff differential tables.

# Option Parsing ----------------------------------------------------------


suppressPackageStartupMessages(expr = library(package = "optparse"))

argument_list <-
  optparse::parse_args(object = optparse::OptionParser(
    option_list = list(
      optparse::make_option(
        opt_str = "--verbose",
        action = "store_true",
        default = TRUE,
        help = "Print extra output [default]",
        type = "logical"
      ),
      optparse::make_option(
        opt_str = "--quiet",
        action = "store_false",
        default = FALSE,
        dest = "verbose",
        help = "Print little output",
        type = "logical"
      ),
      optparse::make_option(
        opt_str = "--comparison-name",
        dest = "comparison_name",
        help = "Comparison name",
        type = "character"
      ),
      optparse::make_option(
        opt_str = "--gtf-assembly",
        default = NULL,
        dest = "gtf_assembly",
        help = "GTF file specifying an assembled and merged transcriptome",
        type = "character"
      ),
      optparse::make_option(
        opt_str = "--gtf-reference",
        default = NULL,
        dest = "gtf_reference",
        help = "GTF file specifying a reference transcriptome",
        type = "character"
      ),
      optparse::make_option(
        opt_str = "--genome-version",
        default = NULL,
        dest = "genome_version",
        help = "Genome version",
        type = "character"
      ),
      optparse::make_option(
        opt_str = "--genome-directory",
        default = ".",
        dest = "genome_directory",
        help = "Genome directory path [.]",
        type = "character"
      ),
      optparse::make_option(
        opt_str = "--output-directory",
        default = ".",
        dest = "output_directory",
        help = "Output directory path [.]",
        type = "character"
      ),
      optparse::make_option(
        opt_str = "--plot-width",
        default = 7.0,
        dest = "plot_width",
        help = "Plot width in inches [7.0]",
        type = "double"
      ),
      optparse::make_option(
        opt_str = "--plot-height",
        default = 7.0,
        dest = "plot_height",
        help = "Plot height in inches [7.0]",
        type = "double"
      )
    )
  ))

if (is.null(x = argument_list$comparison_name)) {
  stop("Missing --comparison_name option")
}

if (is.null(x = argument_list$gtf_assembly)) {
  stop("Missing --gtf_assembly option")
}

if (is.null(x = argument_list$gtf_reference)) {
  stop("Missing --gtf_reference option")
}

if (is.null(x = argument_list$genome_version)) {
  stop("Missing --genome_version option")
}

# Library Import ----------------------------------------------------------


# CRAN r-lib
suppressPackageStartupMessages(expr = library(package = "sessioninfo"))
# CRAN Tidyverse
suppressPackageStartupMessages(expr = library(package = "ggplot2"))
# Bioconductor
suppressPackageStartupMessages(expr = library(package = "BiocVersion"))
suppressPackageStartupMessages(expr = library(package = "Biostrings"))
suppressPackageStartupMessages(expr = library(package = "cummeRbund"))
suppressPackageStartupMessages(expr = library(package = "rtracklayer"))

#' Convert character vector into a comma-separated value (CSV).
#' Apply unique() and sort(), before collapsing a character vector
#' into a single element, comma-separated value.
#'
#' @param x: Character vector
#' @return: Single element character vector of comma-sepatated values

character_to_csv <- function(x) {
  return(paste(sort(x = base::unique(x = x)), collapse = ","))
}

# Save plots in the following formats.

graphics_formats <- c("pdf" = "pdf", "png" = "png")

# Define Cuffdiff and output directory names relative to the
# working directory.

cuffdiff_directory <-
  file.path(
    argument_list$genome_directory,
    paste("rnaseq", "cuffdiff", argument_list$comparison_name, sep = "_")
  )

prefix <-
  paste("rnaseq",
        "process",
        "cuffdiff",
        argument_list$comparison_name,
        sep = "_")

# Create a new sub-directory for plots if it does not exist.

output_directory <-
  file.path(argument_list$output_directory, prefix)
if (!file.exists(output_directory)) {
  dir.create(path = output_directory,
             showWarnings = TRUE,
             recursive = FALSE)
}

# Create a cummeRbund database --------------------------------------------


# Read and index Cuffdiff output and create a CuffSet object.
# Load also the GTF file assembled by Cuffmerge, which fulfills foreign
# key constraints between the "features", "genes" and "isoforms" SQL tables.
# The CuffSet object has slots "genes", "isoforms", "TSS" and "CDS" that each
# are instances of the CuffData class. By default, CuffData accessor methods
# applied to a CuffSet class will operate on the "genes" slot.

message("Creating or loading a cummeRbund database")
cuff_set <-
  cummeRbund::readCufflinks(
    dir = cuffdiff_directory,
    gtfFile = argument_list$gtf_assembly,
    genome = argument_list$genome_version
  )

# Process Cuffdiff run information ----------------------------------------


frame_path <-
  file.path(output_directory, paste0(prefix, "_run_information.tsv"))
if (file.exists(frame_path) && file.info(frame_path)$size > 0L) {
  message("Skipping a run information table")
} else {
  message("Creating a run information table")

  utils::write.table(
    x = cummeRbund::runInfo(object = cuff_set),
    file = frame_path,
    quote = FALSE,
    sep = "\t",
    row.names = FALSE,
    col.names = TRUE
  )
}
rm(frame_path)

# Process Cuffdiff sample information -------------------------------------


samples_frame <- cummeRbund::samples(object = cuff_set)
# Get the number of samples.
sample_number <- base::nrow(x = samples_frame)
# Write the samples_frame table.
frame_path <-
  file.path(output_directory, paste0(prefix, "_samples.tsv"))
if (file.exists(frame_path) && file.info(frame_path)$size > 0L) {
  message("Skipping a sample table")
} else {
  message("Creating a sample table")

  utils::write.table(
    x = samples_frame,
    file = frame_path,
    quote = FALSE,
    sep = "\t",
    row.names = FALSE,
    col.names = TRUE
  )
}
rm(frame_path)
# Define an array of all possible pairwise sample comparisons or sample pairs.
sample_pairs <- combn(x = samples_frame$sample_name, m = 2L)
# Write a table of sample pair information by transposing the sample pairs
# array. This table allows the Python web code to link in pairwise plots.
frame_path <-
  file.path(output_directory, paste0(prefix, "_sample_pairs.tsv"))
if (file.exists(frame_path) && file.info(frame_path)$size > 0L) {
  message("Skipping a sample pairs table")
} else {
  message("Creating a sample pairs table")

  utils::write.table(
    x = aperm(a = sample_pairs),
    file = frame_path,
    quote = FALSE,
    sep = "\t",
    row.names = FALSE,
    col.names = TRUE
  )
}
rm(frame_path, samples_frame)

# Process Cuffdiff replicate information ----------------------------------


replicates_frame <- cummeRbund::replicates(object = cuff_set)
# Get the number of replicates.
replicate_number <- base::nrow(x = replicates_frame)
# Some plots require replicates. Check whether at least one row has a
# replicate column value greater than 0.
have_replicates <-
  (base::nrow(x = replicates_frame[replicates_frame$replicate > 0L,]) > 0L)

# Write the replicates frame.
frame_path <-
  file.path(output_directory, paste0(prefix, "_replicates.tsv"))
if (file.exists(frame_path) && file.info(frame_path)$size > 0L) {
  message("Skipping a replicate table")
} else {
  message("Creating a replicate table")

  utils::write.table(
    x = replicates_frame,
    file = frame_path,
    quote = FALSE,
    sep = "\t",
    row.names = FALSE,
    col.names = TRUE
  )
}
rm(frame_path)

# Plot the log10(internal_scale) of the replicates_frame to
# visualise outliers.
plot_paths <-
  file.path(output_directory, paste(
    paste(prefix, "replicate", "scale", sep = "_"),
    graphics_formats,
    sep = "."
  ))
if (all(file.exists(plot_paths)) &&
    all(file.info(plot_paths)$size > 0L)) {
  message("Skipping a library scale plot on replicates")
} else {
  message("Creating a library scale plot on replicates")
  ggplot_object <- ggplot2::ggplot(data = replicates_frame)
  ggplot_object <-
    ggplot_object + ggplot2::geom_point(mapping = ggplot2::aes(x = .data$rep_name, y = log10(.data$internal_scale)))
  ggplot_object <-
    ggplot_object + ggplot2::labs(x = "replicate", y = "log10(internal_scale)")
  ggplot_object <-
    ggplot_object + ggplot2::theme(
      axis.text.x = ggplot2::element_text(
        size = ggplot2::rel(x = 0.8),
        hjust = 0.0,
        vjust = 0.5,
        angle = 90.0
      )
    )
  plot_width <-
    argument_list$plot_width + (ceiling(x = replicate_number / 24L) - 1L) * argument_list$plot_width * 0.25
  for (plot_path in plot_paths) {
    ggplot2::ggsave(
      filename = plot_path,
      plot = ggplot_object,
      width = plot_width,
      height = argument_list$plot_height,
      limitsize = FALSE
    )
  }
  rm(plot_path, plot_width, ggplot_object)
}
rm(plot_paths, replicates_frame)

# Starting QC plotting ----------------------------------------------------


message("Starting QC plotting")

# Dispersion Plot on Genes ------------------------------------------------


plot_paths <-
  file.path(output_directory, paste(
    paste(prefix, "genes", "dispersion", sep = "_"),
    graphics_formats,
    sep = "."
  ))
if (all(file.exists(plot_paths)) &&
    all(file.info(plot_paths)$size > 0L)) {
  message("Skipping a Dispersion Plot on Genes")
} else {
  message("Creating a Dispersion Plot on Genes")
  ggplot_object <-
    cummeRbund::dispersionPlot(object = cummeRbund::genes(object = cuff_set))
  for (plot_path in plot_paths) {
    ggplot2::ggsave(
      filename = plot_path,
      plot = ggplot_object,
      width = argument_list$plot_width,
      height = argument_list$plot_height,
      limitsize = FALSE
    )
  }
  rm(plot_path, ggplot_object)
}
rm(plot_paths)

# Dispersion Plot on Isoforms ---------------------------------------------


plot_paths <-
  file.path(output_directory, paste(
    paste(prefix, "isoforms", "dispersion", sep = "_"),
    graphics_formats,
    sep = "."
  ))
if (all(file.exists(plot_paths)) &&
    all(file.info(plot_paths)$size > 0L)) {
  message("Skipping a Dispersion Plot on Isoforms")
} else {
  message("Creating a Dispersion Plot on Isoforms")
  base::tryCatch(
    expr = {
      ggplot_object <-
        cummeRbund::dispersionPlot(object = cummeRbund::isoforms(object = cuff_set))
      for (plot_path in plot_paths) {
        ggplot2::ggsave(
          filename = plot_path,
          plot = ggplot_object,
          width = argument_list$plot_width,
          height = argument_list$plot_height,
          limitsize = FALSE
        )
      }
      rm(plot_path, ggplot_object)
    },
    error = function(cond) {
      message("Dispersion Plot on Isoforms failed with message:")
      message(cond, appendLF = TRUE)
      return(NULL)
    }
  )
}
rm(plot_paths)

# Squared Coefficient of Variation (SCV) Plot on Genes --------------------


plot_paths <-
  file.path(output_directory, paste(paste(prefix, "genes", "scv", sep = "_"), graphics_formats, sep = "."))
if (all(file.exists(plot_paths)) &&
    all(file.info(plot_paths)$size > 0L)) {
  message("Skipping a SCV Plot on Genes")
} else {
  # The plot requires replicates.
  if (have_replicates) {
    message("Creating a SCV Plot on Genes")
    ggplot_object <-
      cummeRbund::fpkmSCVPlot(object = cummeRbund::genes(object = cuff_set))
    for (plot_path in plot_paths) {
      ggplot2::ggsave(
        filename = plot_path,
        plot = ggplot_object,
        width = argument_list$plot_width,
        height = argument_list$plot_height,
        limitsize = FALSE
      )
    }
    rm(plot_path, ggplot_object)
  } else {
    message("Skipping a SCV Plot on Genes in lack of replicates")
  }
}
rm(plot_paths)

# Squared Coefficient of Variation (SCV) Plot on Isoforms -----------------


plot_paths <-
  file.path(output_directory, paste(
    paste(prefix, "isoforms", "scv", sep = "_"),
    graphics_formats,
    sep = "."
  ))
if (all(file.exists(plot_paths)) &&
    all(file.info(plot_paths)$size > 0L)) {
  message("Skipping a SCV Plot on Isoforms")
} else {
  # The plot requires replicates.
  if (have_replicates) {
    message("Creating a SCV Plot on Isoforms")
    ggplot_object <-
      cummeRbund::fpkmSCVPlot(object = cummeRbund::isoforms(object = cuff_set))
    for (plot_path in plot_paths) {
      ggplot2::ggsave(
        filename = plot_path,
        plot = ggplot_object,
        width = argument_list$plot_width,
        height = argument_list$plot_height,
        limitsize = FALSE
      )
    }
    rm(plot_path, ggplot_object)
  } else {
    message("Skipping a SCV Plot on Isoforms in lack of replicates")
  }
}
rm(plot_paths)

# Density Plot on Genes without replicates --------------------------------


plot_paths <-
  file.path(output_directory, paste(
    paste(prefix, "genes", "density", "wo", "replicates", sep = "_"),
    graphics_formats,
    sep = "."
  ))
if (all(file.exists(plot_paths)) &&
    all(file.info(plot_paths)$size > 0L)) {
  message("Skipping a Density Plot on Genes without replicates")
} else {
  message("Creating a Density Plot on Genes without replicates")
  ggplot_object <-
    cummeRbund::csDensity(object = cummeRbund::genes(object = cuff_set),
                          replicates = FALSE)
  for (plot_path in plot_paths) {
    ggplot2::ggsave(
      filename = plot_path,
      plot = ggplot_object,
      width = argument_list$plot_width,
      height = argument_list$plot_height,
      limitsize = FALSE
    )
  }
  rm(plot_path, ggplot_object)
}
rm(plot_paths)

# Density Plot on Genes with replicates -----------------------------------


plot_paths <-
  file.path(output_directory, paste(
    paste(prefix, "genes", "density", "w", "replicates", sep = "_"),
    graphics_formats,
    sep = "."
  ))
if (all(file.exists(plot_paths)) &&
    all(file.info(plot_paths)$size > 0L)) {
  message("Skipping a Density Plot on Genes with replicates")
} else {
  message("Creating a Density Plot on Genes with replicates")
  ggplot_object <-
    cummeRbund::csDensity(object = cummeRbund::genes(object = cuff_set),
                          replicates = TRUE)
  for (plot_path in plot_paths) {
    ggplot2::ggsave(
      filename = plot_path,
      plot = ggplot_object,
      width = argument_list$plot_width,
      height = argument_list$plot_height,
      limitsize = FALSE
    )
  }
  rm(plot_path, ggplot_object)
}
rm(plot_paths)

# Density Plot on Isoforms without replicates -----------------------------


plot_paths <-
  file.path(output_directory, paste(
    paste(prefix, "isoforms", "density", "wo", "replicates", sep = "_"),
    graphics_formats,
    sep = "."
  ))
if (all(file.exists(plot_paths)) &&
    all(file.info(plot_paths)$size > 0L)) {
  message("Skipping a Density Plot on Isoforms without replicates")
} else {
  message("Creating a Density Plot on Isoforms without replicates")
  ggplot_object <-
    cummeRbund::csDensity(object = cummeRbund::isoforms(object = cuff_set),
                          replicates = FALSE)
  for (plot_path in plot_paths) {
    ggplot2::ggsave(
      filename = plot_path,
      plot = ggplot_object,
      width = argument_list$plot_width,
      height = argument_list$plot_height,
      limitsize = FALSE
    )
  }
  rm(plot_path, ggplot_object)
}
rm(plot_paths)

# Density Plot on Isoforms with replicates --------------------------------


plot_paths <-
  file.path(output_directory, paste(
    paste(prefix, "isoforms", "density", "w", "replicates", sep = "_"),
    graphics_formats,
    sep = "."
  ))
if (all(file.exists(plot_paths)) &&
    all(file.info(plot_paths)$size > 0L)) {
  message("Skipping a Density Plot on Isoforms with replicates")
} else {
  message("Creating a Density Plot on Isoforms with replicates")
  ggplot_object <-
    cummeRbund::csDensity(object = cummeRbund::isoforms(object = cuff_set),
                          replicates = TRUE)
  for (plot_path in plot_paths) {
    ggplot2::ggsave(
      filename = plot_path,
      plot = ggplot_object,
      width = argument_list$plot_width,
      height = argument_list$plot_height,
      limitsize = FALSE
    )
  }
  rm(plot_path, ggplot_object)
}
rm(plot_paths)

# Box Plot on Genes with replicates ---------------------------------------


plot_paths <-
  file.path(output_directory, paste(
    paste(prefix, "genes", "box", "w", "replicates", sep = "_"),
    graphics_formats,
    sep = "."
  ))
if (all(file.exists(plot_paths)) &&
    all(file.info(plot_paths)$size > 0L)) {
  message("Skipping a Box Plot on Genes with replicates")
} else {
  message("Creating a Box Plot on Genes with replicates")
  # By default, the csBoxplot function adds a pseudocount of 1e-04 for log10
  # transformed fpkms. In case of a large number of missing values in
  # shallowly sequenced samples, this affects the plot.
  # Hence reproduce the plot here.
  # ggplot_object <-
  #   cummeRbund::csBoxplot(object = cummeRbund::genes(object = cuff_set),
  #                         replicates = TRUE)
  genes_fpkm_frame <-
    cummeRbund::repFpkm(object = cummeRbund::genes(object = cuff_set))
  # Rename the "rep_name" column into "condition".
  base::names(x = genes_fpkm_frame)[base::names(x = genes_fpkm_frame) == "rep_name"] <-
    "condition"
  ggplot_object <- ggplot2::ggplot(data = genes_fpkm_frame)
  ggplot_object <-
    ggplot_object + ggplot2::geom_boxplot(
      mapping = ggplot2::aes(
        x = .data$condition,
        y = log10(.data$fpkm),
        fill = .data$condition
      ),
      size = 0.3,
      alpha = I(1 / 3)
    )
  ggplot_object <-
    ggplot_object + ggplot2::labs(x = "condition", y = "log10(FPKM)", fill = "condition")
  ggplot_object <-
    ggplot_object + ggplot2::theme(
      axis.text.x = ggplot2::element_text(
        size = ggplot2::rel(x = 0.8),
        hjust = 0.0,
        vjust = 0.5,
        angle = 90.0
      ),
      legend.text = ggplot2::element_text(size = ggplot2::rel(x = 0.8))
    )
  ggplot_object <-
    ggplot_object + ggplot2::scale_fill_hue(l = 50, h.start = 200)
  # Arrange a maximum of 24 replicates in each guide column.
  # ggplot_object <-
  #   ggplot_object + ggplot2::guides(fill = ggplot2::guide_legend(nrow = 24))
  # Use the base plot_width and add a quarter of the width for each additional
  # guide legend column.
  plot_width <-
    argument_list$plot_width + (ceiling(x = replicate_number / 24L) - 1L) * argument_list$plot_width * 0.25
  for (plot_path in plot_paths) {
    ggplot2::ggsave(
      filename = plot_path,
      plot = ggplot_object,
      width = plot_width,
      height = argument_list$plot_height,
      limitsize = FALSE
    )
  }
  rm(plot_path, plot_width, ggplot_object, genes_fpkm_frame)
}
rm(plot_paths)

# Box Plot on Genes without replicates ------------------------------------


plot_paths <-
  file.path(output_directory, paste(
    paste(prefix, "genes", "box", "wo", "replicates", sep = "_"),
    graphics_formats,
    sep = "."
  ))
if (all(file.exists(plot_paths)) &&
    all(file.info(plot_paths)$size > 0L)) {
  message("Skipping a Box Plot on Genes without replicates")
} else {
  message("Creating a Box Plot on Genes without replicates")
  # ggplot_object <-
  #   cummeRbund::csBoxplot(object = cummeRbund::genes(object = cuff_set),
  #                         replicates = FALSE)
  genes_fpkm_frame <-
    cummeRbund::fpkm(object = cummeRbund::genes(object = cuff_set))
  # Rename the "sample_name" column into "condition".
  base::names(x = genes_fpkm_frame)[base::names(x = genes_fpkm_frame) == "sample_name"] <-
    "condition"
  ggplot_object <- ggplot2::ggplot(data = genes_fpkm_frame)
  ggplot_object <-
    ggplot_object + ggplot2::geom_boxplot(
      mapping = ggplot2::aes(
        x = .data$condition,
        y = log10(.data$fpkm),
        fill = .data$condition
      ),
      size = 0.3,
      alpha = I(1 / 3)
    )
  ggplot_object <-
    ggplot_object + ggplot2::labs(x = "condition", y = "log10(FPKM)", fill = "condition")
  ggplot_object <-
    ggplot_object + ggplot2::theme(
      axis.text.x = ggplot2::element_text(
        size = ggplot2::rel(x = 0.8),
        hjust = 0.0,
        vjust = 0.5,
        angle = 90.0
      ),
      legend.text = ggplot2::element_text(size = ggplot2::rel(x = 0.8))
    )
  ggplot_object <-
    ggplot_object + ggplot2::scale_fill_hue(l = 50, h.start = 200)
  # Arrange a maximum of 24 samples in each guide column.
  # ggplot_object <-
  #   ggplot_object + ggplot2::guides(fill = ggplot2::guide_legend(nrow = 24))
  # Use the base plot_witdh and add a quarter of the width for each additional
  # guide legend column.
  plot_width <-
    argument_list$plot_width + (ceiling(x = sample_number / 24L) - 1L) * argument_list$plot_width * 0.25
  for (plot_path in plot_paths) {
    ggplot2::ggsave(
      filename = plot_path,
      plot = ggplot_object,
      width = plot_width,
      height = argument_list$plot_height,
      limitsize = FALSE
    )
  }
  rm(plot_path, plot_width, ggplot_object, genes_fpkm_frame)
}
rm(plot_paths)

# Box Plot on Isoforms with replicates ------------------------------------


plot_paths <-
  file.path(output_directory, paste(
    paste(prefix, "isoforms", "box", "w", "replicates", sep = "_"),
    graphics_formats,
    sep = "."
  ))
if (all(file.exists(plot_paths)) &&
    all(file.info(plot_paths)$size > 0L)) {
  message("Skipping a Box Plot on Isoforms with replicates")
} else {
  message("Creating a Box Plot on Isoforms with replicates")
  # By default, the csBoxplot function adds a pseudocount of 1e-04 for log10
  # transformed fpkms. In case of a large number of missing values in
  # shallowly sequenced samples, this affects the plot.
  # Hence reproduce the plot here.
  # ggplot_object <-
  #   cummeRbund::csBoxplot(object = cummeRbund::isoforms(object = cuff_set),
  #                         replicates = TRUE)
  isoforms_fpkm_frame <-
    cummeRbund::repFpkm(object = cummeRbund::isoforms(object = cuff_set))
  # Rename the "rep_name" column into "condition".
  base::names(x = isoforms_fpkm_frame)[base::names(x = isoforms_fpkm_frame) == "rep_name"] <-
    "condition"
  ggplot_object <- ggplot2::ggplot(data = isoforms_fpkm_frame)
  ggplot_object <-
    ggplot_object + ggplot2::geom_boxplot(
      mapping = ggplot2::aes(
        x = .data$condition,
        y = log10(.data$fpkm),
        fill = .data$condition
      ),
      size = 0.3,
      alpha = I(1 / 3)
    )
  ggplot_object <-
    ggplot_object + ggplot2::labs(x = "condition", y = "log10(FPKM)", fill = "condition")
  ggplot_object <-
    ggplot_object + ggplot2::theme(
      axis.text.x = ggplot2::element_text(
        size = ggplot2::rel(x = 0.8),
        hjust = 0.0,
        vjust = 0.5,
        angle = 90.0
      ),
      legend.text = ggplot2::element_text(size = ggplot2::rel(x = 0.8))
    )
  ggplot_object <-
    ggplot_object + ggplot2::scale_fill_hue(l = 50, h.start = 200)
  # Arrange a maximum of 24 replicates in each guide column.
  # ggplot_object <-
  #   ggplot_object + ggplot2::guides(fill = ggplot2::guide_legend(nrow = 24))
  # Use the base plot_witdh and add a quarter of the width for each additional
  # guide legend column.
  plot_width <-
    argument_list$plot_width + (ceiling(x = replicate_number / 24L) - 1L) * argument_list$plot_width * 0.25
  for (plot_path in plot_paths) {
    ggplot2::ggsave(
      filename = plot_path,
      plot = ggplot_object,
      width = plot_width,
      height = argument_list$plot_height,
      limitsize = FALSE
    )
  }
  rm(plot_path, plot_width, ggplot_object, isoforms_fpkm_frame)
}
rm(plot_paths)

# Box Plot on Isoforms without replicates ---------------------------------


plot_paths <-
  file.path(output_directory, paste(
    paste(prefix, "isoforms", "box", "wo", "replicates", sep = "_"),
    graphics_formats,
    sep = "."
  ))
if (all(file.exists(plot_paths)) &&
    all(file.info(plot_paths)$size > 0L)) {
  message("Skipping a Box Plot on Isoforms without replicates")
} else {
  message("Creating a Box Plot on Isoforms without replicates")
  # ggplot_object <-
  #   cummeRbund::csBoxplot(object = cummeRbund::isoforms(object = cuff_set),
  #                         replicates = FALSE)
  isoforms_fpkm_frame <-
    cummeRbund::fpkm(object = cummeRbund::isoforms(object = cuff_set))
  # Rename the "sample_name" column into "condition".
  base::names(x = isoforms_fpkm_frame)[base::names(x = isoforms_fpkm_frame) == "sample_name"] <-
    "condition"
  ggplot_object <- ggplot2::ggplot(data = isoforms_fpkm_frame)
  ggplot_object <-
    ggplot_object + ggplot2::geom_boxplot(
      mapping = ggplot2::aes(
        x = .data$condition,
        y = log10(.data$fpkm),
        fill = .data$condition
      ),
      size = 0.3,
      alpha = I(1 / 3)
    )
  ggplot_object <-
    ggplot_object + ggplot2::labs(x = "condition", y = "log10(FPKM)", fill = "condition")
  ggplot_object <-
    ggplot_object + ggplot2::theme(
      axis.text.x = ggplot2::element_text(
        size = ggplot2::rel(x = 0.8),
        hjust = 0.0,
        vjust = 0.5,
        angle = 90.0
      ),
      legend.text = ggplot2::element_text(size = ggplot2::rel(x = 0.8))
    )
  ggplot_object <-
    ggplot_object + ggplot2::scale_fill_hue(l = 50, h.start = 200)
  # Arrange a maximum of 24 replicates in each guide column.
  # ggplot_object <-
  #   ggplot_object + ggplot2::guides(fill = ggplot2::guide_legend(nrow = 24))
  # Use the base plot_witdh and add a quarter of the with for each additional
  # guide legend column.
  plot_width <-
    argument_list$plot_width + (ceiling(x = sample_number / 24L) - 1L) * argument_list$plot_width * 0.25
  for (plot_path in plot_paths) {
    ggplot2::ggsave(
      filename = plot_path,
      plot = ggplot_object,
      width = plot_width,
      height = argument_list$plot_height,
      limitsize = FALSE
    )
  }
  rm(plot_path, plot_width, ggplot_object, isoforms_fpkm_frame)
}
rm(plot_paths)

# Scatter Matrix Plot on Genes --------------------------------------------


# Only include the plot for less than or equal to 20 samples.
if (sample_number <= 20L) {
  plot_paths <-
    file.path(output_directory, paste(
      paste(prefix, "genes", "scatter", "matrix", sep = "_"),
      graphics_formats,
      sep = "."
    ))
  if (all(file.exists(plot_paths)) &&
      all(file.info(plot_paths)$size > 0L)) {
    message("Skipping a Scatter Matrix Plot on Genes")
  } else {
    message("Creating a Scatter Matrix Plot on Genes")
    ggplot_object <-
      cummeRbund::csScatterMatrix(object = cummeRbund::genes(object = cuff_set))
    for (plot_path in plot_paths) {
      ggplot2::ggsave(
        filename = plot_path,
        plot = ggplot_object,
        width = argument_list$plot_width,
        height = argument_list$plot_height,
        limitsize = FALSE
      )
    }
    rm(plot_path, ggplot_object)
  }
  rm(plot_paths)
}

# Scatter Matrix Plot on Isoforms -----------------------------------------


if (sample_number <= 20L) {
  plot_paths <-
    file.path(output_directory, paste(
      paste(prefix, "isoforms", "scatter", "matrix", sep = "_"),
      graphics_formats,
      sep = "."
    ))
  if (all(file.exists(plot_paths)) &&
      all(file.info(plot_paths)$size > 0L)) {
    message("Skipping a Scatter Matrix Plot on Isoforms")
  } else {
    message("Creating a Scatter Matrix Plot on Isoforms")
    ggplot_object <-
      cummeRbund::csScatterMatrix(object = cummeRbund::isoforms(object = cuff_set))
    for (plot_path in plot_paths) {
      ggplot2::ggsave(
        filename = plot_path,
        plot = ggplot_object,
        width = argument_list$plot_width,
        height = argument_list$plot_height,
        limitsize = FALSE
      )
    }
    rm(plot_path, ggplot_object)
  }
  rm(plot_paths)
}

# Scatter Plot on Genes for each sample pair ------------------------------


for (i in seq_along(along.with = sample_pairs[1L,])) {
  plot_paths <-
    file.path(output_directory, paste(
      paste(
        prefix,
        sample_pairs[1L, i],
        sample_pairs[2L, i],
        "genes",
        "scatter",
        sep = "_"
      ),
      graphics_formats,
      sep = "."
    ))
  if (all(file.exists(plot_paths)) &&
      all(file.info(plot_paths)$size > 0L)) {
    message("Skipping a Scatter Plot on Genes for ",
            sample_pairs[1L, i],
            " versus ",
            sample_pairs[2L, i])
  } else {
    message("Creating a Scatter Plot on Genes for ",
            sample_pairs[1L, i],
            " versus ",
            sample_pairs[2L, i])

    # Unfortunately, the standard cummeRbund csScatter() function
    # does not allow colouring by significance.
    # ggplot_object <-
    #   cummeRbund::csScatter(
    #     object = cummeRbund::genes(object = cuff_set),
    #     x = sample_pairs[1L, i],
    #     y = sample_pairs[2L, i],
    #     colorByStatus = TRUE
    #   )

    # Re-implement scatter plots here.
    #
    # Retrieve a data frame with differntial expression data for genes, but
    # remove the first "gene_id" column, which is duplicated in the second
    # column as a consequence of a SQL table join between the "genes" and
    # "geneExpDiffData" tables.
    genes_diff_data_frame <-
      cummeRbund::diffData(
        object = cummeRbund::genes(object = cuff_set),
        x = sample_pairs[1L, i],
        y = sample_pairs[2L, i],
        features = FALSE
      )[,-1L]
    ggplot_object <-
      ggplot2::ggplot(data = genes_diff_data_frame,
                      mapping = ggplot2::aes(x = .data$value_1, y = .data$value_2))
    ggplot_object <- ggplot_object + ggplot2::theme_light()
    ggplot_object <-
      ggplot_object + ggplot2::labs(x = sample_pairs[1L, i], y = sample_pairs[2L, i])
    if (TRUE) {
      # Plot the non-significant genes with ggplot2::geom_hex(),
      # which performs much better with typical numbers of genes.
      ggplot_object <-
        ggplot_object + ggplot2::geom_hex(
          data = genes_diff_data_frame[genes_diff_data_frame$significant == "no",],
          alpha = I(1 / 3),
          show.legend = TRUE,
          bins = 50
        )
      # Manually set scale colours.
      ggplot_object <-
        ggplot_object +
        ggplot2::scale_fill_continuous(low = "#e6f0ff", high = "#0066ff")
      # Plot the significant genes with ggplot2::geom_point() in red.
      ggplot_object <-
        ggplot_object + ggplot2::geom_point(
          data = genes_diff_data_frame[genes_diff_data_frame$significant == "yes",],
          colour = "red",
          size = 1.2,
          alpha = I(1 / 3)
        )
    } else {
      # Plot significant and non-significant genes with ggplot2::geom_point().
      ggplot_object <-
        ggplot_object + ggplot2::geom_point(
          mapping = ggplot2::aes(colour = .data$significant),
          size = 1.2,
          alpha = I(1 / 3)
        )
      # Manually set scale colours.
      ggplot_object <-
        ggplot_object + ggplot2::scale_colour_manual(values = c("black", "red"))
    }
    ggplot_object <-
      ggplot_object + ggplot2::geom_abline(intercept = 0,
                                           slope = 1,
                                           linetype = 2)
    ggplot_object <-
      ggplot_object + ggplot2::geom_rug(size = 0.8, alpha = 0.01)
    # ggplot_object <-
    #   ggplot_object + ggplot2::stat_smooth(method = "lm", fill = "blue", alpha = 0.2)
    ggplot_object <-
      ggplot_object + ggplot2::scale_y_log10() + ggplot2::scale_x_log10()

    # Annotate the plot with the (Pearson) correlation coefficient and the
    # number of significantly up and downregulated genes.
    # For defining the data range for label placement,
    # eliminate rows with value 0.0.
    range_value_1 <-
      range(genes_diff_data_frame[genes_diff_data_frame$value_1 > 0.0,]$value_1)
    range_value_2 <-
      range(genes_diff_data_frame[genes_diff_data_frame$value_2 > 0.0,]$value_2)

    # Calculate the (Pearson) correlation coefficient and place it on the plot
    # in the lower right corner at 95% x and 5% y.
    ggplot_object <-
      ggplot_object + ggplot2::annotate(
        geom = "text",
        x = 10L ^ (log10(x = range_value_1[1L]) + (
          log10(x = range_value_1[2L]) - log10(x = range_value_1[1L])
        ) * 0.95),
        y = 10L ^ (log10(x = range_value_2[1L]) + (
          log10(x = range_value_2[2L]) - log10(x = range_value_2[1L])
        ) * 0.05),
        label = paste0("r = ", round(
          x = cor(x = genes_diff_data_frame$value_1, y = genes_diff_data_frame$value_2),
          digits = 3L
        ))
      )

    # Calculate the numbers of up and down regulated genes and place them
    # on the plot in the upper left corner at 5%x and 95% y.
    ggplot_object <-
      ggplot_object + ggplot2::annotate(
        geom = "text",
        x = 10L ^ (log10(x = range_value_1[1L]) + (
          log10(x = range_value_1[2L]) - log10(x = range_value_1[1L])
        ) * 0.05),
        y = 10L ^ (log10(x = range_value_2[1L]) + (
          log10(x = range_value_2[2L]) - log10(x = range_value_2[1L])
        ) * 0.95),
        label = paste0(
          "Up: ",
          base::nrow(x = genes_diff_data_frame[genes_diff_data_frame$log2_fold_change > 0.0 &
                                                 genes_diff_data_frame$significant == "yes",]),
          "\n",
          "Down: ",
          base::nrow(x = genes_diff_data_frame[genes_diff_data_frame$log2_fold_change < 0.0 &
                                                 genes_diff_data_frame$significant == "yes",])
        ),
        colour = "red",
        hjust = 0.0
      )

    rm(range_value_1, range_value_2)

    for (plot_path in plot_paths) {
      ggplot2::ggsave(
        filename = plot_path,
        plot = ggplot_object,
        width = argument_list$plot_width,
        height = argument_list$plot_height,
        limitsize = FALSE
      )
    }
    rm(plot_path, ggplot_object, genes_diff_data_frame)
  }
  rm(plot_paths)
}

# Dendrogram Plot on Genes ------------------------------------------------


# The csDendro function returns a dendrogram object that cannot be saved with
# the ggplot2::ggsave() function.
plot_paths <-
  file.path(output_directory, paste(
    paste(prefix, "genes", "dendrogram", sep = "_"),
    graphics_formats,
    sep = "."
  ))
if (all(file.exists(plot_paths)) &&
    all(file.info(plot_paths)$size > 0L)) {
  message("Skipping a Dendrogram Plot on Genes")
} else {
  message("Creating a Dendrogram Plot on Genes")

  grDevices::pdf(
    file = plot_paths[1L],
    width = argument_list$plot_width,
    height = argument_list$plot_height
  )
  cummeRbund::csDendro(object = cummeRbund::genes(object = cuff_set))
  base::invisible(x = grDevices::dev.off())

  grDevices::png(
    filename = plot_paths[2L],
    width = argument_list$plot_width,
    height = argument_list$plot_height,
    units = "in",
    res = 300L
  )
  cummeRbund::csDendro(object = cummeRbund::genes(object = cuff_set))
  base::invisible(x = grDevices::dev.off())
}
rm(plot_paths)

# MA Plot on Genes for each sample pair based on FPKM values --------------


for (i in seq_along(along.with = sample_pairs[1L,])) {
  plot_paths <-
    file.path(output_directory, paste(
      paste(prefix, sample_pairs[1L, i], sample_pairs[2L, i], "genes", "ma", sep = "_"),
      graphics_formats,
      sep = "."
    ))
  if (all(file.exists(plot_paths)) &&
      all(file.info(plot_paths)$size > 0L)) {
    message("Skipping a MAplot on Genes for ",
            sample_pairs[1L, i],
            " versus ",
            sample_pairs[2L, i])
  } else {
    message("Creating a MAplot on Genes for ",
            sample_pairs[1L, i],
            " versus ",
            sample_pairs[2L, i])
    ggplot_object <-
      cummeRbund::MAplot(
        object = cummeRbund::genes(object = cuff_set),
        x = sample_pairs[1L, i],
        y = sample_pairs[2L, i]
      )
    for (plot_path in plot_paths) {
      ggplot2::ggsave(
        filename = plot_path,
        plot = ggplot_object,
        width = argument_list$plot_width,
        height = argument_list$plot_height,
        limitsize = FALSE
      )
    }
    rm(plot_path, ggplot_object)
  }
  rm(plot_paths)
}

# TODO: Create a MAplot on genes for each sample pair based on count data.

# Volcano Matrix Plot on Genes --------------------------------------------


plot_paths <-
  file.path(output_directory, paste(
    paste(prefix, "genes", "volcano", "matrix", sep = "_"),
    graphics_formats,
    sep = "."
  ))
if (all(file.exists(plot_paths)) &&
    all(file.info(plot_paths)$size > 0L)) {
  message("Skipping a Volcano Matrix Plot on Genes")
} else {
  message("Creating a Volcano Matrix Plot on Genes")
  ggplot_object <-
    cummeRbund::csVolcanoMatrix(object = cummeRbund::genes(object = cuff_set))
  for (plot_path in plot_paths) {
    ggplot2::ggsave(
      filename = plot_path,
      plot = ggplot_object,
      width = argument_list$plot_width,
      height = argument_list$plot_height,
      limitsize = FALSE
    )
  }
  rm(plot_path, ggplot_object)
}
rm(plot_paths)

# Volcano Plot on Genes for each sample pair ------------------------------


for (i in seq_along(along.with = sample_pairs[1L,])) {
  plot_paths <-
    file.path(output_directory,
              paste(
                paste(
                  prefix,
                  sample_pairs[1L, i],
                  sample_pairs[2L, i],
                  "genes",
                  "volcano",
                  sep = "_"
                ),
                graphics_formats,
                sep = "."
              ))
  if (all(file.exists(plot_paths)) &&
      all(file.info(plot_paths)$size > 0L)) {
    message("Skipping a Volcano Plot on Genes for ",
            sample_pairs[1L, i],
            " versus ",
            sample_pairs[2L, i])
  } else {
    message("Creating a Volcano Plot on Genes for ",
            sample_pairs[1L, i],
            " versus ",
            sample_pairs[2L, i])
    # FIXME: The definition of the generic function "csVolcano" does not
    # include the "alpha" and "showSignificant" options, although the function
    # definition contains them. It does not seem that the option defaults are
    # used.
    # In R/methods-CuffData.R:
    # .volcano <-
    #   function(object, x, y, alpha = 0.05, showSignificant = TRUE,
    #            features = FALSE, xlimits = c(-20, 20), ...)
    # setMethod("csVolcano", signature(object = "CuffData"), .volcano)
    # In R/AllGenerics.R:
    # setGeneric("csVolcano",
    #            function(object, x, y, features = F, ...) standardGeneric("csVolcano"))
    ggplot_object <-
      cummeRbund::csVolcano(
        object = cummeRbund::genes(object = cuff_set),
        x = sample_pairs[1L, i],
        y = sample_pairs[2L, i],
        alpha = 0.05,
        showSignificant = TRUE
      )
    for (plot_path in plot_paths) {
      ggplot2::ggsave(
        filename = plot_path,
        plot = ggplot_object,
        width = argument_list$plot_width,
        height = argument_list$plot_height,
        limitsize = FALSE
      )
    }
    rm(plot_path, ggplot_object)
  }
  rm(plot_paths)
}

# Multidimensional Scaling (MDS) Plot on Genes ----------------------------


# Plot only, if the CuffData object contains more than two replicates.
if (replicate_number > 2L) {
  plot_paths <-
    file.path(output_directory, paste(
      paste(prefix, "genes", "mds", sep = "_"),
      graphics_formats,
      sep = "."
    ))
  if (all(file.exists(plot_paths)) &&
      all(file.info(plot_paths)$size > 0L)) {
    message("Skipping a Multidimensional Scaling Plot on Genes")
  } else {
    # if (have_replicates) {
    message("Creating a Multidimensional Scaling Plot on Genes")
    # Nothing ever is simple. If the set has too many replicates, the
    # standard cummeRbund MDSplot() falls down.
    if (replicate_number <= 24L) {
      ggplot_object <-
        cummeRbund::MDSplot(object = cummeRbund::genes(object = cuff_set),
                            replicates = TRUE)
      for (plot_path in plot_paths) {
        ggplot2::ggsave(
          filename = plot_path,
          plot = ggplot_object,
          width = argument_list$plot_width,
          height = argument_list$plot_height,
          limitsize = FALSE
        )
      }
      rm(plot_path, ggplot_object)
    } else {
      # The standard MDSplot has too many replicates.
      gene_rep_fit <-
        stats::cmdscale(
          d = cummeRbund::JSdist(mat = cummeRbund::makeprobs(
            a = cummeRbund::repFpkmMatrix(object = cummeRbund::genes(object = cuff_set))
          )),
          eig = TRUE,
          k = 2
        )
      gene_rep_res <-
        data.frame(
          "names" = base::rownames(x = gene_rep_fit$points),
          "M1" = gene_rep_fit$points[, 1L],
          "M2" = gene_rep_fit$points[, 2L],
          stringsAsFactors = FALSE
        )
      ggplot_object <- ggplot2::ggplot(data = gene_rep_res)
      ggplot_object <-
        ggplot_object + ggplot2::theme_bw()  # Add theme black and white.
      ggplot_object <-
        ggplot_object + ggplot2::geom_point(mapping = ggplot2::aes(
          x = .data$M1,
          y = .data$M2,
          colour = .data$names
        ))  # Draw points in any case.
      if (replicate_number <= 24L) {
        # Only add text for a sensible number of replicates i.e. less than or
        # equal to 24.
        ggplot_object <-
          ggplot_object + ggplot2::geom_text(
            mapping = ggplot2::aes(
              x = .data$M1,
              y = .data$M2,
              label = .data$names,
              colour = .data$names
            ),
            size = 4
          )
      }
      # Arrange a maximum of 24 replicates in each guide column.
      ggplot_object <-
        ggplot_object + ggplot2::guides(colour = ggplot2::guide_legend(nrow = 24))
      # Use the base plot_witdh and add a quarter of the width for each
      # additional guide legend column.
      plot_width <-
        argument_list$plot_width + (ceiling(x = replicate_number / 24L) - 1L) * argument_list$plot_width * 0.25
      for (plot_path in plot_paths) {
        ggplot2::ggsave(
          filename = plot_path,
          plot = ggplot_object,
          width = plot_width,
          height = argument_list$plot_height,
          limitsize = FALSE
        )
      }
      rm(plot_path,
         plot_width,
         ggplot_object,
         gene_rep_res,
         gene_rep_fit)
    }
  }
  rm(plot_paths)
} else {
  message("Skipping Multidimensional Scaling Plot on genes in lack of sufficient replicates")
}

# Principal Component Analysis (PCA) Plot on Genes ------------------------


# TODO: Add also other principal components or even better,
# use plots of the PCA package?
plot_paths <-
  file.path(output_directory, paste(paste(prefix, "genes", "pca", sep = "_"), graphics_formats, sep = "."))
if (all(file.exists(plot_paths)) &&
    all(file.info(plot_paths)$size > 0L)) {
  message("Skipping a Principal Component Analysis Plot (PCA) on Genes")
} else {
  message("Creating a Principal Component Analysis Plot (PCA) on Genes")
  ggplot_object <-
    cummeRbund::PCAplot(
      object = cummeRbund::genes(object = cuff_set),
      x = "PC1",
      y = "PC2",
      replicates = TRUE
    )
  for (plot_path in plot_paths) {
    ggplot2::ggsave(
      filename = plot_path,
      plot = ggplot_object,
      width = argument_list$plot_width,
      height = argument_list$plot_height,
      limitsize = FALSE
    )
  }
  rm(plot_path, ggplot_object)
}
rm(plot_paths)

# Finishing QC plotting ---------------------------------------------------


message("Finishing QC plotting")

# Starting data table splitting -------------------------------------------


# TODO: Process gene and isoform sets and lists. Feature-level information can
# be accessed directly from a CuffData object using the fpkm, repFpkm, count,
# diffData, or annotation methods.

# Split the large and unwieldy differential data tables into
# pairwise comparisons, but read or assemble gene and isoform
# annotation first.

# Gene and Isoform Annotation ---------------------------------------------


genes_annotation_frame <- NULL
isoforms_annotation_frame <- NULL
frame_path_genes <-
  file.path(output_directory, paste0(prefix, "_genes_annotation.tsv"))
frame_path_isoforms <-
  file.path(output_directory,
            paste0(prefix, "_isoforms_annotation.tsv"))
if (file.exists(frame_path_genes) &&
    file.info(frame_path_genes)$size > 0L) {
  message("Reading gene annotation from file")
  genes_annotation_frame <-
    utils::read.table(file = frame_path_genes,
                      header = TRUE,
                      sep = "\t")
  isoforms_annotation_frame <-
    utils::read.table(file = frame_path_isoforms,
                      header = TRUE,
                      sep = "\t")
} else {
  # Unfortunately, cummeRbund does not seem to offer a convenient way to
  # correlate Cufflinks (XLOC) gene identifiers with the original
  # Ensembl (ENSG) gene identifiers.
  # Thus, the following steps are neccessary:
  #
  # 1. Read the reference GTF to correlate Ensembl (ENSG) gene and
  # (ENST) transcript identifiers, as well as official gene symbols.
  # Read only features of type "transcript".
  message("Reading reference transcriptome annotation")
  reference_granges <- rtracklayer::import(
    con = argument_list$gtf_reference,
    format = "gtf",
    genome = argument_list$genome_version,
    feature.type = "transcript"
  )
  # Selecting via S4Vectors::mcols()[] returns a S4Vectors::DataFrame object.
  reference_frame <- base::unique.data.frame(
    x = data.frame(
      "ensembl_gene_id" = S4Vectors::mcols(x = reference_granges)$gene_id,
      "ensembl_transcript_id" = S4Vectors::mcols(x = reference_granges)$transcript_id,
      stringsAsFactors = TRUE
    )
  )
  rm(reference_granges)

  # 2. Read the assembly GTF, which specifies Cufflinks (XLOC) gene and
  # Cufflinks (TCONS) transcript identifiers, but does no longer provide
  # information about Ensembl (ENSG) gene identifiers. Reference transcriptome
  # loci may have been merged or split by Cuffmerge.
  # The Cuffmerge GTF file has only features of type "exon".
  message("Reading assembled transcriptome annotation")
  assembly_granges <- rtracklayer::import(
    con = argument_list$gtf_assembly,
    format = "gtf",
    genome = argument_list$genome_version,
    feature.type = "exon"
  )
  if ("nearest_ref" %in% S4Vectors::colnames(x = S4Vectors::mcols(x = assembly_granges))) {
    # If a "nearest_ref" variable is defined, the GTF is a Cuffmerge assembly.
    #
    # Example: gene_id "XLOC_000001"; transcript_id "TCONS_00000001"; exon_number "1";
    #          gene_name "DDX11L1"; oId "CUFF.1.2"; nearest_ref "ENST00000450305";
    #          class_code "="; tss_id "TSS1";
    #
    # Selecting via S4Vectors::mcols()[] returns a S4Vectors::DataFrame object.
    assembly_frame <- base::unique.data.frame(
      x = data.frame(
        "gene_id" = S4Vectors::mcols(x = assembly_granges)$gene_id,
        "transcript_id" = S4Vectors::mcols(x = assembly_granges)$transcript_id,
        "gene_name" = S4Vectors::mcols(x = assembly_granges)$gene_name,
        "ensembl_transcript_id" = S4Vectors::mcols(x = assembly_granges)$nearest_ref,
        stringsAsFactors = TRUE
      )
    )

    # 3. Join the reference and assembly data frames to correlate
    # Cufflinks (XLOC) gene identifiers with Ensembl (ENSG) gene identifiers
    # via Ensembl (ENST) transcript identifiers.
    message("Merging reference and assembled transcriptome annotation")
    ensembl_frame <-
      base::merge.data.frame(
        x = assembly_frame,
        y = reference_frame,
        by = "ensembl_transcript_id",
        # Keep all XLOC entries.
        all.x = TRUE
      )
    rm(reference_frame, assembly_frame)

    # 4. Create a new, normalised Ensembl annotation data frame that correlates
    # each Cufflinks (XLOC) gene identifier with comma-separated lists of one or
    # more Ensembl (ENSG) gene and Ensembl (ENST) transcript identifiers.
    message("Aggregating Ensembl annotation by gene_id")
    aggregate_frame <-
      base::aggregate.data.frame(
        x = ensembl_frame,
        by = list("gene_id" = ensembl_frame$gene_id),
        FUN = paste
      )
    rm(ensembl_frame)
    # The aggregate frame consists of list objects of character vectors
    # with one or more elements aggregated in each group.
    # For each character vector, the character_to_csv() function finds unique
    # elements and sorts them, before collapsing them into a single,
    # comma-separated value.
    message("Collapsing aggregated Ensembl annotation")
    ensembl_annotation_frame <-
      data.frame(
        "gene_id" = unlist(x = lapply(
          X = aggregate_frame$gene_id, FUN = character_to_csv
        )),
        "transcript_ids" = unlist(
          x = lapply(X = aggregate_frame$transcript_id, FUN = character_to_csv)
        ),
        # "gene_names" = unlist(x = lapply(X = aggregate_frame$gene_name, FUN = character_to_csv)),
        "ensembl_gene_ids" = unlist(
          x = lapply(X = aggregate_frame$ensembl_gene_id, FUN = character_to_csv)
        ),
        "ensembl_transcript_ids" = unlist(
          x = lapply(X = aggregate_frame$ensembl_transcript_id, FUN = character_to_csv)
        ),
        stringsAsFactors = FALSE
      )
    rm(aggregate_frame)
  } else {
    # If a "nearest_ref" variable is missing,
    # the GTF is the original reference without de-novo transcript assembly.
    #
    # Example: gene_id "ENSG00000223972"; gene_version "5"; transcript_id "ENST00000456328"; transcript_version "2";
    #          exon_number "1"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene";
    #          havana_gene "OTTHUMG00000000961"; havana_gene_version "2";
    #          transcript_name "DDX11L1-002"; transcript_source "havana"; transcript_biotype "processed_transcript";
    #          havana_transcript "OTTHUMT00000362751"; havana_transcript_version "1";
    #          exon_id "ENSE00002234944"; exon_version "1";
    #          tag "basic"; transcript_support_level "1";
    #
    # 3. Joining reference and assembly frames is not required.
    # 4. Create a new, normalised Ensembl annotation data frame.
    ensembl_annotation_frame <- data.frame(
      "gene_id" = reference_frame$ensembl_gene_id,
      "transcript_ids" = reference_frame$ensembl_transcript_id,
      "ensembl_gene_ids" = reference_frame$ensembl_gene_id,
      "ensembl_transcript_ids" = reference_frame$ensembl_transcript_id,
      stringsAsFactors = FALSE
    )
    rm(reference_frame)
  }
  rm(assembly_granges)

  # 5. Create a gene annotation frame, ready for enriching
  # cummeRbund gene information, by merging the "gene_id", "gene_short_name"
  # and "locus" variables of the gene annotation frame with the "transcript_ids",
  # "ensembl_gene_ids" and "ensembl_transcript_ids" of the
  # Ensembl annotation frame.
  message("Creating gene annotation information")
  cufflinks_annotation_frame <-
    cummeRbund::annotation(object = cummeRbund::genes(object = cuff_set))
  genes_annotation_frame <- base::merge.data.frame(
    # Merge with a simplified cummeRbund gene annotation data frame, since
    # variables "class_code", "nearest_ref_id", "length" and "coverage" seem
    # empty by design. Remove hidden exon information by finding unique
    # rows only.
    x = base::unique.data.frame(x = cufflinks_annotation_frame[, c("gene_id", "gene_short_name", "locus"), drop = FALSE]),
    # Merge with the Ensembl annotation data frame that correlates gene (XLOC)
    # identifiers with comma-separated lists of Ensembl Gene and Transcript
    # identifiers.
    y = ensembl_annotation_frame,
    # Merge on gene (XLOC) identifiers, but also keep all rows of the
    # cufflinks_annotation_frame, or else, locus information would be lost.
    by = "gene_id",
    all.x = TRUE
  )
  rm(ensembl_annotation_frame, cufflinks_annotation_frame)

  # 6. Create an isoform annotation data frame, ready for enriching
  # cummeRbund isoform information.
  # Simplify the cummeRbund isoform annotation data frame,
  # since variables "CDS_id" and "coverage" seem empty by design.
  # Remove hidden exon information by finding unique rows only.

  message("Creating isoform annotation information")
  cufflinks_annotation_frame <-
    cummeRbund::annotation(object = cummeRbund::isoforms(object = cuff_set))
  isoforms_annotation_frame <-
    base::unique.data.frame(x = cufflinks_annotation_frame[, c(
      "isoform_id",
      "gene_id",
      "gene_short_name",
      "TSS_group_id",
      "class_code",
      "nearest_ref_id",
      "locus",
      "length"
    ), drop = FALSE])
  rm(cufflinks_annotation_frame)

  utils::write.table(
    x = genes_annotation_frame,
    file = frame_path_genes,
    sep = "\t",
    row.names = FALSE
  )

  utils::write.table(
    x = isoforms_annotation_frame,
    file = frame_path_isoforms,
    sep = "\t",
    row.names = FALSE
  )
}
rm(frame_path_genes, frame_path_isoforms)

# Update the cummeRbund SQLite database -----------------------------------


# Push the aggregated Ensembl gene annotation back into the SQLite database.
if ("ensembl_gene_ids" %in% base::names(x = cummeRbund::annotation(object = cummeRbund::genes(object = cuff_set)))) {
  message("Skipping Ensembl annotation for the SQLite database")
} else {
  message("Creating Ensembl annotation for the SQLite database")
  cummeRbund::addFeatures(
    object = cuff_set,
    features = data.frame(
      "gene_id" = genes_annotation_frame$gene_id,
      "ensembl_gene_ids" = genes_annotation_frame$ensembl_gene_ids,
      "ensembl_transcript_ids" = genes_annotation_frame$ensembl_transcript_ids,
      stringsAsFactors = FALSE
    ),
    level = "genes"
  )
}

# Differential Genes per sample pair --------------------------------------


# Create an annotated differential genes data frame for each sample pair.
for (i in seq_along(along.with = sample_pairs[1L,])) {
  frame_path <-
    file.path(
      output_directory,
      paste(prefix, sample_pairs[1L, i], sample_pairs[2L, i], "genes_diff.tsv", sep = "_")
    )
  if (file.exists(frame_path) && file.info(frame_path)$size > 0L) {
    message(
      "Skipping a differential data frame on Genes for ",
      sample_pairs[1L, i],
      " versus ",
      sample_pairs[2L, i]
    )
  } else {
    message(
      "Creating a differential data frame on Genes for ",
      sample_pairs[1L, i],
      " versus ",
      sample_pairs[2L, i]
    )
    # The cummeRbund::diffData() function allows automatic merging with feature
    # annotation, but that includes some empty columns. For cleaner result
    # tables, merge with the smaller genes_annotation_frame established above.
    #
    # Retrieve a data frame with differential expression data for genes, but
    # remove the first "gene_id" column, which is duplicated in the second
    # column as a consequence of a SQL table join between the "genes" and
    # "geneExpDiffData" tables.
    genes_diff_data_frame <-
      cummeRbund::diffData(
        object = cummeRbund::genes(object = cuff_set),
        x = sample_pairs[1L, i],
        y = sample_pairs[2L, i],
        features = FALSE
      )[,-1L]
    # Calculate ranks for the effect size (log2_fold_change), absolute level
    # and statistical significance (q_value).
    genes_diff_data_frame$rank_log2_fold_change <-
      rank(
        x = -abs(x = genes_diff_data_frame$log2_fold_change),
        ties.method = c("min")
      )
    genes_diff_data_frame$rank_value <-
      rank(
        x = -abs(x = genes_diff_data_frame$value_2 - genes_diff_data_frame$value_1),
        ties.method = c("min")
      )
    genes_diff_data_frame$rank_q_value <-
      rank(x = genes_diff_data_frame$q_value, ties.method = c("min"))
    # Calculate the rank sum for the three ranks.
    # genes_diff_data_frame$rank_sum <-
    #   genes_diff_data_frame$rank_log2_fold_change + genes_diff_data_frame$rank_value +
    #   genes_diff_data_frame$rank_q_value
    # Calculate the maximum rank of value and q-value, as
    # rank_log2_fold_change is dominated by +/- infinity resulting from genes
    # measured only once.
    genes_diff_data_frame$max_rank <-
      pmax(genes_diff_data_frame$rank_value,
           genes_diff_data_frame$rank_q_value)

    utils::write.table(
      x = base::merge.data.frame(
        x = genes_annotation_frame,
        y = genes_diff_data_frame,
        by = "gene_id",
        all = TRUE,
        sort = TRUE
      ),
      file = frame_path,
      quote = FALSE,
      sep = "\t",
      row.names = FALSE,
      col.names = TRUE
    )
    rm(genes_diff_data_frame)
  }
  rm(frame_path)
}

# Differential Isoforms per sample pair -----------------------------------


# Create an annotated differential isoforms data frame for each sample pair.
for (i in seq_along(along.with = sample_pairs[1L,])) {
  frame_path <-
    file.path(
      output_directory,
      paste(
        prefix,
        sample_pairs[1L, i],
        sample_pairs[2L, i],
        "isoforms_diff.tsv",
        sep = "_"
      )
    )
  if (file.exists(frame_path) && file.info(frame_path)$size > 0L) {
    message(
      "Skipping a differential data frame on Isoforms for ",
      sample_pairs[1L, i],
      " versus ",
      sample_pairs[2L, i]
    )
  } else {
    message(
      "Creating a differential data frame on Isoforms for ",
      sample_pairs[1L, i],
      " versus ",
      sample_pairs[2L, i]
    )
    # The cummeRbund::diffData() function allows automatic merging with feature
    # annotation, but that includes some empty columns. For cleaner result
    # tables, merge with the smaller isoforms_annotation_frame established above.
    #
    # Retrieve a data frame with differential expression data for genes, but
    # remove the first "isoform_id" column, which is duplicated in the second
    # column as a consequence of a SQL table join between the "isoforms" and
    # "isoformExpDiffData" tables.
    isoforms_diff_data_frame <-
      cummeRbund::diffData(
        object = cummeRbund::isoforms(object = cuff_set),
        x = sample_pairs[1L, i],
        y = sample_pairs[2L, i],
        features = FALSE
      )[,-1L]
    # Calculate ranks for the effect size (log2_fold_change), absolute level
    # and statistical significance (q_value).
    isoforms_diff_data_frame$rank_log2_fold_change <-
      rank(
        x = -abs(x = isoforms_diff_data_frame$log2_fold_change),
        ties.method = c("min")
      )
    isoforms_diff_data_frame$rank_value <-
      rank(
        x = -abs(x = isoforms_diff_data_frame$value_2 - isoforms_diff_data_frame$value_1),
        ties.method = c("min")
      )
    isoforms_diff_data_frame$rank_q_value <-
      rank(x = isoforms_diff_data_frame$q_value, ties.method = c("min"))
    # Calculate the rank sum for the three ranks.
    # isoforms_diff_data_frame$rank_sum <-
    #   isoforms_diff_data_frame$rank_log2_fold_change + isoforms_diff_data_frame$rank_value +
    #   isoforms_diff_data_frame$rank_q_value
    # Calculate the maximum rank of value and q-value, as rank_log2_fold_change
    # is dominated by +/- infinity resulting from genes measured only once.
    isoforms_diff_data_frame$max_rank <-
      pmax(isoforms_diff_data_frame$rank_value,
           isoforms_diff_data_frame$rank_q_value)

    utils::write.table(
      x = base::merge.data.frame(
        x = isoforms_annotation_frame,
        y = isoforms_diff_data_frame,
        by = "isoform_id",
        all = TRUE,
        sort = TRUE
      ),
      file = frame_path,
      quote = FALSE,
      sep = "\t",
      row.names = FALSE,
      col.names = TRUE
    )
    rm(isoforms_diff_data_frame)
  }
  rm(frame_path)
}

# Aggregate status information --------------------------------------------


# Aggregate the "status" variable of differential data frames to inform about
# the number of NOTEST, HIDATA, LOWDATA and FAIL states.
# It is silly to redo this outside of the loops splitting the results, but if
# files above were partially written, the status frame would not be complete.

# Status frame to aggregate the test status columns in pairwise comparisons of genes.
frame_path <-
  file.path(output_directory,
            paste0(prefix, "_genes_status.tsv"))
if (file.exists(frame_path) && file.info(frame_path)$size > 0L) {
  message("Skipping aggregated status information on Genes")
} else {
  message("Creating aggregated status information on Genes")
  status_frame <-
    data.frame(
      # The sample_pairs character matrix contains pairs in columns,
      # so that the apply function needs to process by columns (i.e. MARGIN = 2L).
      "PAIR" = apply(
        X = sample_pairs,
        MARGIN = 2L,
        FUN = function(x) {
          paste(x, collapse = "__")
        }
      ),
      "OK" = integer(length = length(x = sample_pairs[1L,])),
      "NOTEST" = integer(length = length(x = sample_pairs[1L,])),
      "HIDATA" = integer(length = length(x = sample_pairs[1L,])),
      "LOWDATA" = integer(length = length(x = sample_pairs[1L,])),
      "FAIL" = integer(length = length(x = sample_pairs[1L,])),
      "SUM" = integer(length = length(x = sample_pairs[1L,])),
      row.names = apply(
        X = sample_pairs,
        MARGIN = 2L,
        FUN = function(x) {
          paste(x, collapse = "__")
        }
      )
    )

  for (i in seq_along(along.with = sample_pairs[1L,])) {
    genes_diff_data_frame <-
      cummeRbund::diffData(
        object = cummeRbund::genes(object = cuff_set),
        x = sample_pairs[1L, i],
        y = sample_pairs[2L, i],
        features = FALSE
      )[,-1L]
    # Aggregate the test Status column.
    status_integer <- table(genes_diff_data_frame$status)
    for (status in base::names(x = status_frame)) {
      if (!is.na(x = status_integer[status])) {
        status_frame[i, status] <- status_integer[status]
      }
    }
    rm(status, status_integer, genes_diff_data_frame)
    status_frame$SUM[i] <- sum(status_frame[i, 2L:6L])
  }
  utils::write.table(
    x = status_frame,
    file = frame_path,
    row.names = FALSE,
    sep = "\t"
  )
  rm(status_frame)
}
rm(frame_path)

# Status frame to aggregate the test status columns in pairwise comparisons of isoforms.
frame_path <-
  file.path(output_directory,
            paste0(prefix, "_isoforms_status.tsv"))
if (file.exists(frame_path) && file.info(frame_path)$size > 0L) {
  message("Skipping aggregated status information on Isoforms")
} else {
  message("Creating aggregated status information on Isoforms")
  status_frame <-
    data.frame(
      # The sample_pairs character matrix contains pairs in columns,
      # so that the apply function needs to process by columns (i.e. MARGIN = 2L).
      "PAIR" = apply(
        X = sample_pairs,
        MARGIN = 2L,
        FUN = function(x) {
          paste(x, collapse = "__")
        }
      ),
      "OK" = integer(length = length(x = sample_pairs[1L,])),
      "NOTEST" = integer(length = length(x = sample_pairs[1L,])),
      "HIDATA" = integer(length = length(x = sample_pairs[1L,])),
      "LOWDATA" = integer(length = length(x = sample_pairs[1L,])),
      "FAIL" = integer(length = length(x = sample_pairs[1L,])),
      "SUM" = integer(length = length(x = sample_pairs[1L,])),
      row.names = apply(
        X = sample_pairs,
        MARGIN = 2L,
        FUN = function(x) {
          paste(x, collapse = "__")
        }
      )
    )

  for (i in seq_along(along.with = sample_pairs[1L,])) {
    isoforms_diff_data_frame <-
      cummeRbund::diffData(
        object = cummeRbund::isoforms(object = cuff_set),
        x = sample_pairs[1L, i],
        y = sample_pairs[2L, i],
        features = FALSE
      )[,-1L]
    # Aggregate the test Status column.
    status_integer <- table(isoforms_diff_data_frame$status)
    for (status in base::names(x = status_frame)) {
      if (!is.na(x = status_integer[status])) {
        status_frame[i, status] <- status_integer[status]
      }
    }
    rm(status, status_integer, isoforms_diff_data_frame)
    status_frame$SUM[i] <- sum(status_frame[i, 2L:6L])
  }

  utils::write.table(
    x = status_frame,
    file = frame_path,
    row.names = FALSE,
    sep = "\t"
  )
  rm(status_frame)
}
rm(frame_path)

# Matrix of FPKM values per replicate on Genes ----------------------------


frame_path <-
  file.path(output_directory,
            paste0(prefix, "_genes_fpkm_replicates.tsv"))
if (file.exists(frame_path) && file.info(frame_path)$size > 0L) {
  message("Skipping a matrix of FPKM values per replicates on genes")
} else {
  message("Creating a matrix of FPKM values per replicates on genes")
  genes_fpkm_frame <-
    cummeRbund::repFpkmMatrix(object = cummeRbund::genes(object = cuff_set))

  utils::write.table(
    x = base::merge.data.frame(
      x = genes_annotation_frame,
      y = genes_fpkm_frame,
      by.x = "gene_id",
      by.y = "row.names",
      all = TRUE,
      sort = TRUE
    ),
    file = frame_path,
    quote = FALSE,
    sep = "\t",
    row.names = FALSE,
    col.names = TRUE
  )
  rm(genes_fpkm_frame)
}
rm(frame_path)

# Matrix of count values per replicate on genes.

frame_path <-
  file.path(output_directory,
            paste0(prefix, "_genes_counts_replicates.tsv"))
if (file.exists(frame_path) && file.info(frame_path)$size > 0L) {
  message("Skipping a matrix of count values per replicates on genes")
} else {
  message("Creating a matrix of count values per replicates on genes")
  genes_count_frame <-
    cummeRbund::repCountMatrix(object = cummeRbund::genes(object = cuff_set))

  utils::write.table(
    x = base::merge.data.frame(
      x = genes_annotation_frame,
      y = genes_count_frame,
      by.x = "gene_id",
      by.y = "row.names",
      all = TRUE,
      sort = TRUE
    ),
    file = frame_path,
    quote = FALSE,
    sep = "\t",
    row.names = FALSE,
    col.names = TRUE
  )
  rm(genes_count_frame)
}
rm(frame_path)

# Matrix of FPKM values per replicate on Isoforms -------------------------


frame_path <-
  file.path(output_directory,
            paste0(prefix, "_isoforms_fpkm_replicates.tsv"))
if (file.exists(frame_path) && file.info(frame_path)$size > 0L) {
  message("Skipping a matrix of FPKM per replicates on isoforms")
} else {
  message("Creating a matrix of FPKM per replicates on isoforms")
  isoforms_fpkm_frame <-
    cummeRbund::repFpkmMatrix(object = cummeRbund::isoforms(object = cuff_set))

  utils::write.table(
    x = base::merge.data.frame(
      x = isoforms_annotation_frame,
      y = isoforms_fpkm_frame,
      by.x = "isoform_id",
      by.y = "row.names",
      all = TRUE,
      sort = TRUE
    ),
    file = frame_path,
    quote = FALSE,
    sep = "\t",
    row.names = FALSE,
    col.names = TRUE
  )
  rm(isoforms_fpkm_frame)
}
rm(frame_path)

# Matrix of count values per replicate on Isoforms ------------------------


frame_path <-
  file.path(output_directory,
            paste0(prefix, "_isoforms_counts_replicates.tsv"))
if (file.exists(frame_path) && file.info(frame_path)$size > 0L) {
  message("Skipping a matrix of count values per replicates on isoforms")
} else {
  message("Creating a matrix of count values per replicates on isoforms")
  isoforms_count_frame <-
    cummeRbund::repCountMatrix(object = cummeRbund::isoforms(object = cuff_set))

  utils::write.table(
    x = base::merge.data.frame(
      x = isoforms_annotation_frame,
      y = isoforms_count_frame,
      by.x = "isoform_id",
      by.y = "row.names",
      all = TRUE,
      sort = TRUE
    ),
    file = frame_path,
    quote = FALSE,
    sep = "\t",
    row.names = FALSE,
    col.names = TRUE
  )
  rm(isoforms_count_frame)
}
rm(frame_path)

# Significance Matrix on Genes --------------------------------------------


plot_paths <-
  file.path(output_directory, paste(
    paste(prefix, "genes", "significance", "matrix", sep = "_"),
    graphics_formats,
    sep = "."
  ))
if (all(file.exists(plot_paths)) &&
    all(file.info(plot_paths)$size > 0L)) {
  message("Skipping a significance matrix plot on Genes")
} else {
  message("Creating a significance matrix plot on Genes")
  ggplot_object <- sigMatrix(object = cuff_set, level = "genes")
  for (plot_path in plot_paths) {
    ggplot2::ggsave(
      filename = plot_path,
      plot = ggplot_object,
      width = argument_list$plot_width,
      height = argument_list$plot_height,
      limitsize = FALSE
    )
  }
  rm(plot_path, ggplot_object)
}
rm(plot_paths)

# Significance Matrix on Isoforms -----------------------------------------


plot_paths <-
  file.path(output_directory, paste(
    paste(prefix, "isoforms", "significance", "matrix", sep = "_"),
    graphics_formats,
    sep = "."
  ))
if (all(file.exists(plot_paths)) &&
    all(file.info(plot_paths)$size > 0L)) {
  message("Skipping a significance matrix plot on Isoforms")
} else {
  message("Creating a significance matrix plot on Isoforms")
  ggplot_object <- sigMatrix(object = cuff_set, level = "isoforms")
  for (plot_path in plot_paths) {
    ggplot2::ggsave(
      filename = plot_path,
      plot = ggplot_object,
      width = argument_list$plot_width,
      height = argument_list$plot_height,
      limitsize = FALSE
    )
  }
  rm(plot_path, ggplot_object)
}
rm(plot_paths)

rm(genes_annotation_frame, isoforms_annotation_frame)

# Get a CuffFeatureSet or CuffGeneSet of significant genes.

# TODO: Comment-out for the moment, as the data is currently not used.
# significant_gene_ids <-
#   cummeRbund::getSig(object = cuff_set, level = "genes")
# significant_genes <-
#   cummeRbund::getGenes(object = cuff_set, geneIdList = significant_gene_ids)

# The csHeatmap plot does not seem to be a sensible option for larger sets
# of significant genes.
# for (i in seq_along(along.with = sample_pairs[1L,])) {

# significant_genes_diff <-
# }

# Starting symbolic linking -----------------------------------------------


# Finally, create comparison-specific relative symbolic links for cuffdiff
# results in the rnaseq_process_cuffdiff_* directory to avoid identical file
# names between comparisons.

message("Starting symbolic linking to cuffdiff results")

link_path <-
  file.path(output_directory, paste0(prefix, "_cds_exp_diff.tsv"))
if (!file.exists(link_path)) {
  if (!file.symlink(from = file.path("..", cuffdiff_directory, "cds_exp.diff"),
                    to = link_path)) {
    warning("Encountered an error linking the cds_exp.diff file.")
  }
}
rm(link_path)

link_path <-
  file.path(output_directory, paste0(prefix, "_genes_exp_diff.tsv"))
if (!file.exists(link_path)) {
  if (!file.symlink(from = file.path("..", cuffdiff_directory, "gene_exp.diff"),
                    to = link_path)) {
    warning("Encountered an error linking the gene_exp.diff file.")
  }
}
rm(link_path)

link_path <-
  file.path(output_directory, paste0(prefix, "_isoforms_exp_diff.tsv"))
if (!file.exists(link_path)) {
  if (!file.symlink(from = file.path("..", cuffdiff_directory, "isoform_exp.diff"),
                    to = link_path)) {
    warning("Encountered an error linking the isoform_exp.diff file.")
  }
}
rm(link_path)

link_path <-
  file.path(output_directory, paste0(prefix, "_promoters_diff.tsv"))
if (!file.exists(link_path)) {
  if (!file.symlink(from = file.path("..", cuffdiff_directory, "promoters.diff"),
                    to = link_path)) {
    warning("Encountered an error linking the promoters.diff file.")
  }
}
rm(link_path)

link_path <-
  file.path(output_directory, paste0(prefix, "_splicing_diff.tsv"))
if (!file.exists(link_path)) {
  if (!file.symlink(from = file.path("..", cuffdiff_directory, "splicing.diff"),
                    to = link_path)) {
    warning("Encountered an error linking the splicing.diff file.")
  }
}
rm(link_path)

link_path <-
  file.path(output_directory, paste0(prefix, "_tss_group_exp_diff.tsv"))
if (!file.exists(link_path)) {
  if (!file.symlink(from = file.path("..", cuffdiff_directory, "tss_group_exp.diff"),
                    to = link_path)) {
    warning("Encountered an error linking the tss_group_exp.diff file.")
  }
}
rm(link_path)

# Finishing symbolic linking ----------------------------------------------


message("Finishing symbolic linking to cuffdiff results")

rm(
  cuff_set,
  output_directory,
  cuffdiff_directory,
  sample_pairs,
  sample_number,
  replicate_number,
  have_replicates,
  prefix,
  i,
  graphics_formats,
  argument_list,
  character_to_csv
)

message("All done")

# Finally, print all objects that have not been removed from the environment.
if (length(x = ls())) {
  print(x = ls())
}

print(x = sessioninfo::session_info())
mkschuster/bsfR documentation built on Aug. 15, 2023, 5:01 a.m.