knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
library(isoformic)

Motivation

This is a highly opinionated example of how to perform RNA-Seq pre-processing and quantification focusing on estimation o transcript abundance.

For this workflow we are going to use the {condathis} R package. This package allow you to run command line tools like Salmon [@salmon].

All those tools can be run from the command line directly.

Note for MacOS users

As of 2024-06-09, Bioconda does not support the Arm64 architecture of the Apple Silicon CPUs.

One way of bypassing that is using arguments platform = "osx-64" for the condathis::create_env(), to create the environment leveraging Rosetta 2 support. This option can also be added to the CLI command adding --platform osx-64 to the conda create command.

Note for Windows users

As of 2024-06-09 Bioconda does not support Windows native installations. Therefore this vignette can only be run, using a VM or container. The best approach would be running under the Windows Subsystem for Linux, if you have it available.

Install Salmon with {condathis}

# pak::pkg_install("local::~/projects/condathis")
if (isFALSE(rlang::is_installed("condathis"))) {
  install.packages("condathis", repos = c("https://luciorq.r-universe.dev", getOption("repos")))
}
library("condathis")
if (!condathis::env_exists(env_name = "salmon-env")) {
  if (fs::dir_exists(fs::path(condathis::get_install_dir(), "envs", "salmon-env"))) {
    fs::dir_delete(fs::path(condathis::get_install_dir(), "envs", "salmon-env"))
  }
}
# Workaround for ARM CPU based MacOS

# TODO: Also check if rosetta 2 is enabled.

if (isTRUE(condathis::get_sys_arch() %in% "Darwin-arm64")) {
  platform_var <- "osx-64"
} else {
  platform_var <- NULL
}

condathis::create_env(
  packages = "salmon==1.10.3",
  env_name = "salmon-env",
  # method = "native",
  platform = platform_var
)

Check Salmon version:

condathis::run(
  "salmon", "--version",
  env_name = "salmon-env"
)
salmon 1.10.3

This is equivalent to running in the CLI.

# if MacOS with arm64 CPU add `--platform osx-64`
conda create -n salmon-env \
  -c bioconda -c conda-forge -c nodefaults \
  salmon;

Running Salmon with {condathis}

Salmon index

For the CLI command.

Salmon index command:

salmon index \
  --transcripts <TRANSCRIPTME_FASTA> \
  --index <SALMON_INDEX_DIR> \
  --kmerLen 15 \
  --threads 4 \
  --keepDuplicates;
base_dir <- fs::path_temp("isoformic")
reference_version <- "46"
download_reference(
  version = reference_version,
  reference = "gencode",
  file_type = "fasta",
  organism = "human",
  output_path = base_dir
)

Using {condathis}

txome_fasta_path <- fs::path(base_dir, paste0("gencode.v", reference_version, ".transcripts.fa.gz"))
salmon_index_path <- fs::path(base_dir, paste0("salmon_index_gencode_v", reference_version))
if (!fs::dir_exists(salmon_index_path)) {
  fs::dir_create(salmon_index_path)
}

if (requireNamespace("parallelly", quietly = TRUE)) {
  num_threads <- min(parallelly::availableCores(), 4)
} else {
  num_threads <- 1
}

condathis::run(
  "salmon", "index",
  "--transcripts", txome_fasta_path,
  "--index", salmon_index_path,
  "--kmerLen", 15,
  "--threads", num_threads,
  "--keepDuplicates",
  env_name = "salmon-env"
)

Salmon Quant

Salmon quant command:

salmon \
  quant \
  --libType A \
  --index <SALMON_INDEX_PATH> \
  --mates1 <FASTQ_R1> \
  --mates2 <FASTQ_R2> \
  --output <SALMON_OUTPUT_PATH> \
  --threads {threads} \
  --softclip \
  --softclipOverhangs \
  --disableChainingHeuristic \
  --dumpEq \
  --dumpEqWeights \
  --posBias \
  --seqBias \
  --gcBias \
  --useVBOpt \
  --rangeFactorizationBins 8 \
  --thinningFactor 100 \
  --validateMappings \
  --writeMappings={output.quant_dir}/txome_align.sam \
  --minScoreFraction 0.65 \
  --numGibbsSamples 100 2>&1 | tee -a {log};

Using {condathis}

salmon_quant_path <- fs::path(base_dir, "salmon_quant")
if (!fs::dir_exists(salmon_quant_path)) {
  fs::dir_create(salmon_quant_path)
}
reads_path <- c(
  fs::path("data-raw/sample_R1.fastq.gz"),
  fs::path("data-raw/sample_R2.fastq.gz")
)

condathis::run(
  "salmon", "quant",
  "--libType", "A",
  "--index", salmon_index_path,
  "--mates1", reads_path[1],
  "--mates2", reads_path[2],
  "--output", salmon_quant_path,
  "--numGibbsSamples", 20,
  "--posBias",
  "--seqBias",
  "--gcBias",
  "--threads", num_threads,
  "--softclip",
  "--softclipOverhangs",
  "--disableChainingHeuristic",
  "--dumpEq",
  "--useVBOpt",
  "--validateMappings",
  "--minAssignedFrags", 1,
  "--minScoreFraction", "0.65",
  env_name = "salmon-env"
)

Session Information

sessioninfo::session_info()

References



luciorq/txomics documentation built on Dec. 23, 2024, 10:36 a.m.