inst/doc/AntibodyForests_vignette.R

## ----setup, echo = FALSE, eval = TRUE, include = FALSE------------------------
# Set global options
knitr::opts_chunk$set(eval = TRUE, include = TRUE, echo = TRUE, message = FALSE, warning = FALSE, error = FALSE, dpi = 72)
# Define custom hooks for handling errors, warnings, and messages during knitting
knitr::knit_hooks$set(
   error = function(x, options){
     paste('\n\n<div class="alert alert-danger">',
           gsub('##', '\n', gsub('^##\ Error', '**Error**', x)),
           '</div>', sep = '\n')},
   warning = function(x, options){
     paste('\n\n<div class="alert alert-warning">',
           gsub('##', '\n', gsub('^##\ Warning:', '**Warning**', x)),
           '</div>', sep = '\n')},
   message = function(x, options){
     paste('\n\n<div class="alert alert-info">',
           gsub('##', '\n', x),
           '</div>', sep = '\n')})

#Load AntibodyForests functions
devtools::load_all(".")

## ----eval = FALSE-------------------------------------------------------------
# #Install from CRAN
# install.packages("Platypus")
# install.packages("AntibodyForests")

## ----eval = FALSE-------------------------------------------------------------
# #Load the libraries
# library(Platypus)
# library(AntibodyForests)

## ----installation, echo = FALSE, eval = FALSE---------------------------------
# # List of CRAN packages required to run all functions
# CRAN_packages <- c("Platypus", "alakazam", "ape", "base",
#                    "base64enc", "BiocManager", "dplyr",
#                    "grDevices", "gtools", "igraph",
#                    "jsonlite", "knitr", "parallel",
#                    "phangorn", "scales", "stats",
#                    "stringdist", "stringr", "utils")
# 
# # List of Bioconductor packages required to run all functions
# Bioconductor_packages <- c("Biostrings", "msa")
# 
# # Iterate through packages
# for(package in c(CRAN_packages, Bioconductor_packages)){
#   # Check if the current packages is already installed
#   if(!requireNamespace(package, quietly = TRUE)){
#     # Install CRAN packages if not installed
#     if(package %in% CRAN_packages){install.packages(package)}
#     # Install Bioconductor packages if not installed
#     if(package %in% Bioconductor_packages){BiocManager::install(package)}
#   }
# }
# 

## ----quick-start1, eval = FALSE-----------------------------------------------
# # Import 10x Genomics output files into VDJ dataframe and only keep cells with one VDJ and one VJ transcript
# # For each clone, trim germline sequence
# VDJ_OVA <- Platypus::VDJ_build(VDJ.directory = "10x_output/VDJ/",
#                      remove.divergent.cells = TRUE,
#                      complete.cells.only = TRUE,
#                      trim.germlines = TRUE)
# 
# # Build lineage trees for all clones present in the VDJ dataframe with the default algorithm
# AntibodyForests_OVA_default <- Af_build(VDJ = VDJ_OVA,
#                                           construction.method = "phylo.network.default")

## ----quick-start2, eval = F---------------------------------------------------
# # Plot one of the lineage trees, constructed with the `phylo.network.default' method
# Af_plot_tree(AntibodyForests_object = AntibodyForests_OVA_default,
#                      sample = "S1",
#                      clonotype = "clonotype3")

## ----quick-start3, eval = F---------------------------------------------------
# # Cluster the trees in the repertoire based on the euclidean distance between various metrics of each tree
# out <- Af_compare_within_repertoires(input = AntibodyForests_OVA_default,
#                                      min.nodes = 5,
#                                      distance.method = "euclidean",
#                                      distance.metrics = c("mean.depth", "sackin.index", "spectral.density"),
#                                      clustering.method = "mediods",
#                                      visualization.methods = "PCA",
#                                      plot.label = T)
# 
# # Analyze the difference between the clusters based on the calculated sackin.index
# plots <- Af_cluster_metrics(input = AntibodyForests_OVA_default,
#                    clusters = out$clustering,
#                    metrics = c("spectral.density"),
#                    min.nodes = 5,
#                    significance = T)
# 

## ----quick-start4, eval=FALSE-------------------------------------------------
# # Plot the PCA colored on the assigned clusters
# out$plots$PCA_clusters

## ----quick-start5, eval=FALSE-------------------------------------------------
# # Plot the different number of modalities between the clusters
# plots$modalities

## ----vdj_build-example-3-code, eval = FALSE-----------------------------------
# # Read in data, keep only complete, non-divergent cells and trim germline sequences
# VDJ_OVA <- Platypus::VDJ_build(VDJ.directory = "10x_output/VDJ/",
#                      remove.divergent.cells = TRUE,
#                      complete.cells.only = TRUE,
#                      trim.germlines = TRUE,
#                      fill.germline.CDR3 = TRUE)

## ----vdj_build-example-3-message, echo = FALSE, results = "asis"--------------
# Print number of filtered cells/barcodes 
warning("Please be aware of the number of cells excluded, when 'remove.divergent.cells' or 'complete.cells.only' is set to TRUE:\n")
print(knitr::kable(data.frame(divergent.cells = c(360, 295, 79, 183 ,526), 
                              incomplete.cells = c(708, 503, 387, 380, 1113),
                              row.names = c("S1", "S2", "S3", "S4", "S5"))))

## ----vdj_build-example-3-df, echo = FALSE-------------------------------------
# Load the example VDJdataframe
load("objects/VDJ_OVA_dummy_3.RData")
knitr::kable(head(VDJ_OVA_dummy_3[,c("sample_id", "barcode", "isotype", "clonotype_id", "clonotype_frequency", "VDJ_vgene", "VDJ_jgene")], 10))

## ----AntibodyForests-example-default, eval = FALSE----------------------------
# # Infer lineage trees for all clones using the 'phylo.network.default' construction method and using all default parameter settings
# AntibodyForests_OVA_default <- Af_build(VDJ = VDJ_OVA,
#                                                sequence.columns = c("VDJ_sequence_nt_trimmed", "VJ_sequence_nt_trimmed"),
#                                                germline.columns = c("VDJ_germline_nt_trimmed", "VJ_germline_nt_trimmed"),
#                                                concatenate.sequences = FALSE,
#                                                node.features = "isotype",
#                                                construction.method = "phylo.network.default",
#                                                string.dist.metric = "lv",
#                                                resolve.ties = c("max.expansion", "close.germline.dist", "close.germline.edges", "random"),
#                                                parallel = TRUE)

## ----AntibodyForests-example-ML, eval = FALSE---------------------------------
# # Infer lineage trees for all clones using the 'phylo.tree.ml' construction method and using all default parameter settings
# AntibodyForests_OVA_ml <- Af_build(VDJ = VDJ_OVA,
#                                           sequence.columns = c("VDJ_sequence_nt_trimmed", "VJ_sequence_nt_trimmed"),
#                                           germline.columns = c("VDJ_germline_nt_trimmed", "VJ_germline_nt_trimmed"),
#                                           concatenate.sequences = FALSE,
#                                           construction.method = "phylo.tree.ml",
#                                           dna.model = "all",
#                                           remove.internal.nodes = "connect.to.parent",
#                                           parallel = TRUE)

## ----AntibodyForests-example-IgPhyML, eval = FALSE----------------------------
# # Infer lineage trees for all clones using the 'phylo.tree.IgPhyML' construction method and using all default parameter settings
# AntibodyForests_OVA_IgPhyML <- Af_build(VDJ = VDJ_OVA,
#                                                sequence.columns = "VDJ_sequence_nt_trimmed",
#                                                germline.columns = "VDJ_germline_nt_trimmed",
#                                                construction.method = "phylo.tree.IgPhyML",
#                                                IgPhyML.output.file = "data/OVA_scRNA-seq_data/VDJ/airr_rearrangement_igphyml-pass.tab",
#                                                remove.internal.nodes = "connect.to.parent",
#                                                parallel = TRUE)

## ----AntibodyForests_plot_ML_with_IN_example, eval = FALSE--------------------
# # Plot lineage tree, constructed with the `phylo.tree.ml' method, without internal nodes
# Af_plot_tree(AntibodyForests_object = AntibodyForests_OVA_ml,
#                      sample = "S1",
#                      clonotype = "clonotype8",
#                      show.inner.nodes = TRUE,
#                      label.by = "name",
#                      color.by = "isotype",
#                      node.size = "expansion",
#                      main.title = "Lineage tree - ML",
#                      sub.title = "OVA - S1 - clonotype8")

## ----AntibodyForests_plot_ML_without_IN_example, eval = FALSE-----------------
# # Plot lineage tree, constructed with the `phylo.tree.ml' method, without internal nodes
# Af_plot_tree(AntibodyForests_object = AntibodyForests_OVA_ml,
#                      sample = "S1",
#                      clonotype = "clonotype8",
#                      show.inner.nodes = FALSE,
#                      label.by = "name",
#                      color.by = "isotype",
#                      node.size = "expansion",
#                      main.title = "Lineage tree - ML",
#                      sub.title = "OVA - S1 - clonotype8")

## ----Af_sync_nodes, eval= FALSE-----------------------------------------------
# # Synchronize the nodes of the different AntibodyForests objects
# AntibodyForests_OVA_IgPhyML <- Af_sync_nodes(reference = AntibodyForests_OVA_default,
#                                                     subject = AntibodyForests_OVA_IgPhyML)
# AntibodyForests_OVA_mst <- Af_sync_nodes(reference = AntibodyForests_OVA_default,
#                                 subject = AntibodyForests_OVA_mst)
# AntibodyForests_OVA_nj <- Af_sync_nodes(reference = AntibodyForests_OVA_default,
#                                 subject = AntibodyForests_OVA_nj)
# AntibodyForests_OVA_mp <- Af_sync_nodes(reference = AntibodyForests_OVA_default,
#                                 subject = AntibodyForests_OVA_mp)
# AntibodyForests_OVA_ml <- Af_sync_nodes(reference = AntibodyForests_OVA_default,
#                                 subject = AntibodyForests_OVA_ml)

## ----compare_methods_example2, eval = FALSE-----------------------------------
# # Calculate the euclidean distance between different trees based on the GBLD
# out <- Af_compare_methods(input = list("Default" = AntibodyForests_OVA_default,
#                                        "MST" = AntibodyForests_OVA_mst,
#                                        "NJ" = AntibodyForests_OVA_nj,
#                                        "MP" = AntibodyForests_OVA_mp,
#                                        "ML" = AntibodyForests_OVA_ml,
#                                        "IgPhyML" = AntibodyForests_OVA_IgPhyML),
#                           min.nodes = 10,
#                           distance.method = "GBLD",
#                           visualization.methods = "heatmap",
#                           include.average = T)
# # Plot the average heatmap
# out$average$Heatmap

## ----metric_dataframe1, eval = FALSE------------------------------------------
# # Calculate the metric dataframe for the AntibodyForests object
# metric_df <- Af_metrics(input = AntibodyForests_OVA_default,
#                         min.nodes = 10,
#                         metrics = c("mean.depth", "sackin.index", "spectral.density"))
# 

## ----metric_dataframe2--------------------------------------------------------
load("objects/metric_df.RData")

## ----metric_dataframe3, echo=FALSE--------------------------------------------
# Print the dataframe
DT::datatable(data = metric_df,
              rownames = TRUE,
              options = list(scrollX = TRUE))

## ----compare_distance_isotype, eval=FALSE-------------------------------------
# # Cluster the trees in the repertoire based on the euclidean distance between various metrics of each tree
# Af_distance_boxplot(AntibodyForests_object = AntibodyForests_OVA_default,
#                     node.feature = "isotype",
#                     distance = "node.depth",
#                     min.nodes = 8,
#                     groups = c("IgG1", "IgA"),
#                     text.size = 16,
#                     unconnected = T,
#                     significance = T)

## ----compare_within_repertoire_example1, eval = FALSE-------------------------
# # Cluster the trees in the repertoire based on the euclidean distance between various metrics of each tree
# out <- Af_compare_within_repertoires(input = AntibodyForests_OVA_default,
#                                      min.nodes = 8,
#                                      distance.method = "euclidean",
#                                      distance.metrics = c("mean.depth", "sackin.index", "spectral.density"),
#                                      clustering.method = "mediods",
#                                      visualization.methods = "PCA")
# # Plot the PCA colored on the assigned clusters
# out$plots$PCA_clusters

## ----cluster_metrics_example1, eval = FALSE-----------------------------------
# # Analyze the difference between the clusters based on the calculated sackin.index
# plots <- Af_cluster_metrics(input = AntibodyForests_OVA_default,
#                    clusters = out$clustering,
#                    metrics = c("sackin.index", "spectral.density"),
#                    min.nodes = 8,
#                    significance = T)
# 
# plots$sackin.index
# plots$modalities

## ----cluster_node_features_example1, eval = FALSE-----------------------------
# # Analyze the difference between the clusters based on the calculated sackin.index
# plots <- Af_cluster_node_features(input = AntibodyForests_OVA_default,
#                    clusters = out$clustering,
#                    features = "isotype",
#                    fill = "max",
#                    significance = T)
# 
# plots$isotype

## ----compare_within_repertoire_example2, eval = FALSE-------------------------
# # Cluster the trees in the repertoire based on the Jensen-Shannon divergence between the Spectral Density profiles
# out <- Af_compare_within_repertoires(input = AntibodyForests_OVA_default,
#                                      min.nodes = 8,
#                                      distance.method = "jensen-shannon",
#                                      clustering.method = "mediods",
#                                      visualization.methods = "heatmap")
# # Plot the heatmap
# out$plots$heatmap_clusters

## ----cluster_metrics_example2, eval=FALSE-------------------------------------
# # Analyze the difference between the clusters
# plots <- Af_cluster_metrics(input = AntibodyForests_OVA_default,
#                    clusters = out$clustering,
#                    metrics = "spectral.density",
#                    min.nodes = 8,
#                    significance = T)
# 
# plots$spectral.asymmetry
# plots$modalities

## ----compare_across_repertoire_example1, eval=FALSE---------------------------
# #Calculate the difference between the two groups of repertoires
# boxplots <- Af_compare_across_repertoires(list("Blood" = AntibodyForests_example_blood,
#                                                "Lymph Nodes" = AntibodyForests_example_LN),
#                                           metrics = c("betweenness", "mean.depth"), significance = T)
# 
# #Plot the boxplots
# boxplots$betweenness
# boxplots$mean.depth

## ----add_pseudolikelihood_example, eval=FALSE---------------------------------
# # Add pseudolikelihood as node feature to the AntibodyForests object
# AntibodyForests_OVA_default <- Af_add_node_feature(AntibodyForests_object = AntibodyForests_OVA_default,
#                                                                 feature.df = VDJ_OVA,
#                                                                 feature.names = "pseudolikelihood")

## ----plot_pseudolikelihood_example1, eval=FALSE-------------------------------
# # Plot pseudolikelihood against distance to the germline
# Af_distance_scatterplot(AntibodyForests_object = AntibodyForests_OVA_default,
#                                      node.features = "pseudolikelihood",
#                                      distance = "edge.length",
#                                      min.nodes = 5,
#                                      correlation = "pearson",
#                                      color.by = "sample")

## ----get_sequences_probability_example, eval=FALSE----------------------------
# #Create a dataframe of the sequences with a sequence ID containing the sample, clonotype, and node number
# df <- Af_get_sequences(AntibodyForests_object = AntibodyForests_OVA_default,
#                                                sequence.name = "VDJ_sequence_aa_trimmed",
#                                                min.edges = 1)
# #Save the data frame as CSV to serve as input for the PLM-pipeline
# write.csv(df, file = "/directory/PLM_input.csv", row.names = FALSE)

## ----probability_matrix_example, eval=FALSE-----------------------------------
# #Create a PLM dataframe
# PLM_dataframe <- Af_PLM_dataframe(AntibodyForests_object = AntibodyForests_OVA_default,
#                                                sequence.name = "VDJ_sequence_aa_trimmed",
#                                                path_to_probabilities = "/directory/ProbabilityMatrix")

## ----load_plm_dataframe, echo=FALSE-------------------------------------------
load("objects/PLM_dataframe.RData")

## ----vdj_plm-df, echo = FALSE-------------------------------------------------
knitr::kable(PLM_dataframe)

## ----plot_likelihood_example1, eval = FALSE-----------------------------------
# # Plot the substitution rank
# Af_plot_PLM(PLM_dataframe = PLM_dataframe,
#                          values = "substitution_rank",
#                          group_by = "sample_id")

## ----load_structure_data, echo=FALSE------------------------------------------
load("objects/VDJ_ova_structure.RData") #vdj
load("objects/VDJ_structure_AbAg.RData") #vdj_structure_AbAg
load("objects/AntibodyForests_OVA_structure_AbAg.RData") #af_structure_AbAg

## ----structure_example2, eval = F---------------------------------------------
# #Biophysical properties of the heavy and light chain sequence of the antibodies are calculated
# vdj_structure_AbAg <- VDJ_3d_properties(VDJ = vdj,
#                                    pdb.dir = "~/path/PDBS_superimposed/",
#                                    file.df = files,
#                                    properties = c("charge",
#                                                   "3di_germline",
#                                                   "hydrophobicity",
#                                                   "RMSD_germline",
#                                                   "pKa_shift",
#                                                   "free_energy",
#                                                   "pLDDT"),
#                                    chain = "whole.complex",
#                                    sequence.region = "binding.residues",
#                                    propka.dir = "~/path/Propka_output/",
#                                    germline.pdb = "~/path/PDBS_superimposed/germline_5_model_0.pdb",
#                                    foldseek.dir = "~/path/3di_sequences/")
# 
# #Add the biophysical properties to the AntibodyForests object
# af_structure_AbAg <- Af_add_node_feature(AntibodyForests_object = af,
#                                                  feature.df = vdj_structure_AbAg,
#                                                  feature.names = c("average_charge",
#                                                                    "3di_germline",
#                                                                    "average_hydrophobicity",
#                                                                    "RMSD_germline",
#                                                                    "average_pKa_shift",
#                                                                    "free_energy",
#                                                                    "average_pLDDT"))

## ----structure_example4_plot, eval = T, fig.width=5, fig.height=5-------------
Af_distance_scatterplot(af_structure_AbAg,
                        node.features = "free_energy",
                        correlation = "pearson",
                        color.by = "average_pLDDT",
                        color.by.numeric = T,
                        geom_smooth.method = "lm")

## ----plot_structure_example1, eval = F----------------------------------------
# rmsd <- Af_edge_RMSD(af,
#              VDJ = vdj,
#              pdb.dir = "~/path/PDBS_superimposed/",
#              file.df = files,
#              sequence.region = "full.sequence",
#              chain = "HC+LC")
# rmsd$plot

## ----write-AIRR-rearrangement, eval = FALSE-----------------------------------
# # Append the IgBLAST annotations and alignment to the VDJ dataframe
# VDJ_OVA <- VDJ_import_IgBLAST_annotations(VDJ = VDJ_OVA,
#                                           VDJ.directory = "../data/OVA_scRNA-seq_data/VDJ/",
#                                           method = "append")
# 
# # Write the VDJ dataframe into an AIRR-formatted TSV file
# VDJ_to_AIRR(VDJ = VDJ_OVA,
#             output.file = "../data/OVA_scRNA-seq_data/VDJ/airr_rearrangement.tsv")

## ----bulk_example, eval = FALSE-----------------------------------------------
# # Integrate bulk RNA-seq data into the VDJ dataframe, if multiple clonotypes match the bulk sequence, the bulk transcript is added to all clonotypes (tie.resolvement = "all)
# VDJ_OVA <- VDJ_integrate_bulk(sc.VDJ = VDJ_OVA,
#                    bulk.tsv = "path/to/bulk/OVA_bulkRNA.tsv",
#                    bulk.tsv.sequence.column="VDJ_sequence_nt_raw",
#                    bulk.tsv.sample.column="sample_id",
#                    bulk.tsv.barcode.column="barcode",
#                    bulk.tsv.isotype.column="isotype",
#                    organism="mouse",
#                    igblast.dir="/path/to/anaconda3/envs/igphyml/share/igblast",
#                    tie.resolvement = "all")
# 
# #Create the lineage tree of the integrated VDJ dataframe
# af <- Af_build(VDJ = VDJ_OVA,
#                sequence.columns = "VDJ_sequence_aa_trimmed",
#                germline.columns = "VDJ_germline_aa_trimmed",
#                construction.method = "phylo.network.default",
#                node.features = c("dataset"))
# 
# #Plot lineage tree
# Af_plot_tree(af,
#              sample = "S1",
#              clonotype = "clonotype3",
#              color.by = "dataset",
#              label.by = "none",
#              main.title = "OVA S1",
#              sub.title = "clonotype3")

Try the AntibodyForests package in your browser

Any scripts or data that you put into this service are public.

AntibodyForests documentation built on Aug. 8, 2025, 6:49 p.m.