knitr::opts_chunk$set( collapse = TRUE, comment = "#>" )
library(isoformic)
This is a highly opinionated example of how to perform RNA-Seq pre-processing and quantification focusing on estimation o transcript abundance.
For this workflow we are going to use the {condathis}
R package.
This package allow you to run command line tools like Salmon [@salmon].
All those tools can be run from the command line directly.
As of 2024-06-09, Bioconda does not support the Arm64 architecture of the Apple Silicon CPUs.
One way of bypassing that is using arguments platform = "osx-64"
for the condathis::create_env()
, to create the environment leveraging Rosetta 2 support. This option can also be added to the CLI command adding --platform osx-64
to the conda create
command.
As of 2024-06-09 Bioconda does not support Windows native installations. Therefore this vignette can only be run, using a VM or container. The best approach would be running under the Windows Subsystem for Linux, if you have it available.
{condathis}
# pak::pkg_install("local::~/projects/condathis") if (isFALSE(rlang::is_installed("condathis"))) { install.packages("condathis", repos = c("https://luciorq.r-universe.dev", getOption("repos"))) }
library("condathis") if (!condathis::env_exists(env_name = "salmon-env")) { if (fs::dir_exists(fs::path(condathis::get_install_dir(), "envs", "salmon-env"))) { fs::dir_delete(fs::path(condathis::get_install_dir(), "envs", "salmon-env")) } }
# Workaround for ARM CPU based MacOS # TODO: Also check if rosetta 2 is enabled. if (isTRUE(condathis::get_sys_arch() %in% "Darwin-arm64")) { platform_var <- "osx-64" } else { platform_var <- NULL } condathis::create_env( packages = "salmon==1.10.3", env_name = "salmon-env", # method = "native", platform = platform_var )
Check Salmon version:
condathis::run( "salmon", "--version", env_name = "salmon-env" )
salmon 1.10.3
This is equivalent to running in the CLI.
# if MacOS with arm64 CPU add `--platform osx-64` conda create -n salmon-env \ -c bioconda -c conda-forge -c nodefaults \ salmon;
{condathis}
For the CLI command.
Salmon index command:
salmon index \ --transcripts <TRANSCRIPTME_FASTA> \ --index <SALMON_INDEX_DIR> \ --kmerLen 15 \ --threads 4 \ --keepDuplicates;
base_dir <- fs::path_temp("isoformic") reference_version <- "46" download_reference( version = reference_version, reference = "gencode", file_type = "fasta", organism = "human", output_path = base_dir )
Using {condathis}
txome_fasta_path <- fs::path(base_dir, paste0("gencode.v", reference_version, ".transcripts.fa.gz")) salmon_index_path <- fs::path(base_dir, paste0("salmon_index_gencode_v", reference_version)) if (!fs::dir_exists(salmon_index_path)) { fs::dir_create(salmon_index_path) } if (requireNamespace("parallelly", quietly = TRUE)) { num_threads <- min(parallelly::availableCores(), 4) } else { num_threads <- 1 } condathis::run( "salmon", "index", "--transcripts", txome_fasta_path, "--index", salmon_index_path, "--kmerLen", 15, "--threads", num_threads, "--keepDuplicates", env_name = "salmon-env" )
Salmon quant command:
salmon \ quant \ --libType A \ --index <SALMON_INDEX_PATH> \ --mates1 <FASTQ_R1> \ --mates2 <FASTQ_R2> \ --output <SALMON_OUTPUT_PATH> \ --threads {threads} \ --softclip \ --softclipOverhangs \ --disableChainingHeuristic \ --dumpEq \ --dumpEqWeights \ --posBias \ --seqBias \ --gcBias \ --useVBOpt \ --rangeFactorizationBins 8 \ --thinningFactor 100 \ --validateMappings \ --writeMappings={output.quant_dir}/txome_align.sam \ --minScoreFraction 0.65 \ --numGibbsSamples 100 2>&1 | tee -a {log};
Using {condathis}
salmon_quant_path <- fs::path(base_dir, "salmon_quant") if (!fs::dir_exists(salmon_quant_path)) { fs::dir_create(salmon_quant_path) } reads_path <- c( fs::path("data-raw/sample_R1.fastq.gz"), fs::path("data-raw/sample_R2.fastq.gz") ) condathis::run( "salmon", "quant", "--libType", "A", "--index", salmon_index_path, "--mates1", reads_path[1], "--mates2", reads_path[2], "--output", salmon_quant_path, "--numGibbsSamples", 20, "--posBias", "--seqBias", "--gcBias", "--threads", num_threads, "--softclip", "--softclipOverhangs", "--disableChainingHeuristic", "--dumpEq", "--useVBOpt", "--validateMappings", "--minAssignedFrags", 1, "--minScoreFraction", "0.65", env_name = "salmon-env" )
sessioninfo::session_info()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.