Nothing
## ----setup, echo = FALSE, eval = TRUE, include = FALSE------------------------
# Set global options
knitr::opts_chunk$set(eval = TRUE, include = TRUE, echo = TRUE, message = FALSE, warning = FALSE, error = FALSE, dpi = 72)
# Define custom hooks for handling errors, warnings, and messages during knitting
knitr::knit_hooks$set(
error = function(x, options){
paste('\n\n<div class="alert alert-danger">',
gsub('##', '\n', gsub('^##\ Error', '**Error**', x)),
'</div>', sep = '\n')},
warning = function(x, options){
paste('\n\n<div class="alert alert-warning">',
gsub('##', '\n', gsub('^##\ Warning:', '**Warning**', x)),
'</div>', sep = '\n')},
message = function(x, options){
paste('\n\n<div class="alert alert-info">',
gsub('##', '\n', x),
'</div>', sep = '\n')})
#Load AntibodyForests functions
devtools::load_all(".")
## ----eval = FALSE-------------------------------------------------------------
# #Install from CRAN
# install.packages("Platypus")
# install.packages("AntibodyForests")
## ----eval = FALSE-------------------------------------------------------------
# #Load the libraries
# library(Platypus)
# library(AntibodyForests)
## ----installation, echo = FALSE, eval = FALSE---------------------------------
# # List of CRAN packages required to run all functions
# CRAN_packages <- c("Platypus", "alakazam", "ape", "base",
# "base64enc", "BiocManager", "dplyr",
# "grDevices", "gtools", "igraph",
# "jsonlite", "knitr", "parallel",
# "phangorn", "scales", "stats",
# "stringdist", "stringr", "utils")
#
# # List of Bioconductor packages required to run all functions
# Bioconductor_packages <- c("Biostrings", "msa")
#
# # Iterate through packages
# for(package in c(CRAN_packages, Bioconductor_packages)){
# # Check if the current packages is already installed
# if(!requireNamespace(package, quietly = TRUE)){
# # Install CRAN packages if not installed
# if(package %in% CRAN_packages){install.packages(package)}
# # Install Bioconductor packages if not installed
# if(package %in% Bioconductor_packages){BiocManager::install(package)}
# }
# }
#
## ----quick-start1, eval = FALSE-----------------------------------------------
# # Import 10x Genomics output files into VDJ dataframe and only keep cells with one VDJ and one VJ transcript
# # For each clone, trim germline sequence and replace CDR3 region in germline with most-frequently observed CDR3 sequence
# VDJ_OVA <- Platypus::VDJ_build(VDJ.directory = "10x_output/VDJ/",
# remove.divergent.cells = TRUE,
# complete.cells.only = TRUE,
# trim.germlines = TRUE)
#
# # Build lineage trees for all clones present in the VDJ dataframe with the default algorithm
# AntibodyForests_OVA_default <- Af_build(VDJ = VDJ_OVA,
# construction.method = "phylo.network.default")
## ----quick-start2, eval = F---------------------------------------------------
# # Plot one of the lineage trees, constructed with the `phylo.network.default' method
# Af_plot_tree(AntibodyForests_object = AntibodyForests_OVA_default,
# sample = "S1",
# clonotype = "clonotype2")
## ----quick-start3, eval = F---------------------------------------------------
# # Cluster the trees in the repertoire based on the euclidean distance between various metrics of each tree
# out <- Af_compare_within_repertoires(input = AntibodyForests_OVA_default,
# min.nodes = 5,
# distance.method = "euclidean",
# distance.metrics = c("mean.depth", "sackin.index", "spectral.density"),
# clustering.method = "mediods",
# visualization.methods = "PCA",
# plot.label = T)
#
# # Analyze the difference between the clusters based on the calculated sackin.index
# plots <- Af_cluster_metrics(input = AntibodyForests_OVA_default,
# clusters = out$clustering,
# metrics = c("spectral.density"),
# min.nodes = 5,
# significance = T)
#
## ----quick-start4, eval=FALSE-------------------------------------------------
# # Plot the PCA colored on the assigned clusters
# out$plots$PCA_clusters
## ----quick-start5, eval=FALSE-------------------------------------------------
# # Plot the different number of modalities between the clusters
# plots$modalities
## ----vdj_build-example-3-code, eval = FALSE-----------------------------------
# # Read in data, keep only complete, non-divergent cells and trim germline sequences
# VDJ_OVA <- Platypus::VDJ_build(VDJ.directory = "10x_output/VDJ/",
# remove.divergent.cells = TRUE,
# complete.cells.only = TRUE,
# trim.germlines = TRUE,
# fill.germline.CDR3 = TRUE)
## ----vdj_build-example-3-message, echo = FALSE, results = "asis"--------------
# Print number of filtered cells/barcodes
warning("Please be aware of the number of cells excluded, when 'remove.divergent.cells' or 'complete.cells.only' is set to TRUE:\n")
print(knitr::kable(data.frame(divergent.cells = c(360, 295, 79, 183 ,526),
incomplete.cells = c(708, 503, 387, 380, 1113),
row.names = c("S1", "S2", "S3", "S4", "S5"))))
## ----vdj_build-example-3-df, echo = FALSE-------------------------------------
# Load the example VDJdataframe
load("objects/VDJ_OVA_dummy_3.RData")
knitr::kable(head(VDJ_OVA_dummy_3[,c("sample_id", "barcode", "isotype", "clonotype_id", "clonotype_frequency", "VDJ_vgene", "VDJ_jgene")], 10))
## ----AntibodyForests-example-default, eval = FALSE----------------------------
# # Infer lineage trees for all clones using the 'phylo.network.default' construction method and using all default parameter settings
# AntibodyForests_OVA_default <- Af_build(VDJ = VDJ_OVA,
# sequence.columns = c("VDJ_sequence_nt_trimmed", "VJ_sequence_nt_trimmed"),
# germline.columns = c("VDJ_germline_nt_trimmed", "VJ_germline_nt_trimmed"),
# concatenate.sequences = FALSE,
# node.features = "isotype",
# construction.method = "phylo.network.default",
# string.dist.metric = "lv",
# resolve.ties = c("max.expansion", "close.germline.dist", "close.germline.edges", "random"),
# parallel = TRUE)
## ----AntibodyForests-example-ML, eval = FALSE---------------------------------
# # Infer lineage trees for all clones using the 'phylo.tree.ml' construction method and using all default parameter settings
# AntibodyForests_OVA_ml <- Af_build(VDJ = VDJ_OVA,
# sequence.columns = c("VDJ_sequence_nt_trimmed", "VJ_sequence_nt_trimmed"),
# germline.columns = c("VDJ_germline_nt_trimmed", "VJ_germline_nt_trimmed"),
# concatenate.sequences = FALSE,
# construction.method = "phylo.tree.ml",
# dna.model = "all",
# remove.internal.nodes = "connect.to.parent",
# parallel = TRUE)
## ----AntibodyForests-example-IgPhyML, eval = FALSE----------------------------
# # Infer lineage trees for all clones using the 'phylo.tree.IgPhyML' construction method and using all default parameter settings
# AntibodyForests_OVA_IgPhyML <- Af_build(VDJ = VDJ_OVA,
# sequence.columns = "VDJ_sequence_nt_trimmed",
# germline.columns = "VDJ_germline_nt_trimmed",
# construction.method = "phylo.tree.IgPhyML",
# IgPhyML.output.file = "data/OVA_scRNA-seq_data/VDJ/airr_rearrangement_igphyml-pass.tab",
# remove.internal.nodes = "connect.to.parent",
# parallel = TRUE)
## ----AntibodyForests_plot_ML_with_IN_example, eval = FALSE--------------------
# # Plot lineage tree, constructed with the `phylo.tree.ml' method, without internal nodes
# Af_plot_tree(AntibodyForests_object = AntibodyForests_OVA_ml,
# sample = "S1",
# clonotype = "clonotype8",
# show.inner.nodes = TRUE,
# label.by = "name",
# color.by = "isotype",
# node.size = "expansion",
# main.title = "Lineage tree - ML",
# sub.title = "OVA - S1 - clonotype8")
## ----AntibodyForests_plot_ML_without_IN_example, eval = FALSE-----------------
# # Plot lineage tree, constructed with the `phylo.tree.ml' method, without internal nodes
# Af_plot_tree(AntibodyForests_object = AntibodyForests_OVA_ml,
# sample = "S1",
# clonotype = "clonotype8",
# show.inner.nodes = FALSE,
# label.by = "name",
# color.by = "isotype",
# node.size = "expansion",
# main.title = "Lineage tree - ML",
# sub.title = "OVA - S1 - clonotype8")
## ----Af_sync_nodes, eval= FALSE-----------------------------------------------
# # Synchronize the nodes of the different AntibodyForests objects
# AntibodyForests_OVA_IgPhyML <- Af_sync_nodes(reference = AntibodyForests_OVA_default,
# subject = AntibodyForests_OVA_IgPhyML)
# AntibodyForests_OVA_mst <- Af_sync_nodes(reference = AntibodyForests_OVA_default,
# subject = AntibodyForests_OVA_mst)
# AntibodyForests_OVA_nj <- Af_sync_nodes(reference = AntibodyForests_OVA_default,
# subject = AntibodyForests_OVA_nj)
# AntibodyForests_OVA_mp <- Af_sync_nodes(reference = AntibodyForests_OVA_default,
# subject = AntibodyForests_OVA_mp)
# AntibodyForests_OVA_ml <- Af_sync_nodes(reference = AntibodyForests_OVA_default,
# subject = AntibodyForests_OVA_ml)
## ----compare_methods_example2, eval = FALSE-----------------------------------
# # Calculate the euclidean distance between different trees based on the GBLD
# out <- Af_compare_methods(input = list("Default" = AntibodyForests_OVA_default,
# "MST" = AntibodyForests_OVA_mst,
# "NJ" = AntibodyForests_OVA_nj,
# "MP" = AntibodyForests_OVA_mp,
# "ML" = AntibodyForests_OVA_ml,
# "IgPhyML" = AntibodyForests_OVA_IgPhyML),
# min.nodes = 10,
# distance.method = "GBLD",
# visualization.methods = "heatmap",
# include.average = T)
# # Plot the average heatmap
# out$average$Heatmap
## ----metric_dataframe1, eval = FALSE------------------------------------------
# # Calculate the metric dataframe for the AntibodyForests object
# metric_df <- Af_metrics(input = AntibodyForests_OVA_default,
# min.nodes = 10,
# metrics = c("mean.depth", "sackin.index", "spectral.density"))
#
## ----metric_dataframe2--------------------------------------------------------
load("objects/metric_df.RData")
## ----metric_dataframe3, echo=FALSE--------------------------------------------
# Print the dataframe
DT::datatable(data = metric_df,
rownames = TRUE,
options = list(scrollX = TRUE))
## ----compare_distance_isotype, eval=FALSE-------------------------------------
# # Cluster the trees in the repertoire based on the euclidean distance between various metrics of each tree
# Af_distance_boxplot(AntibodyForests_object = AntibodyForests_OVA_default,
# node.feature = "isotype",
# distance = "node.depth",
# min.nodes = 8,
# groups = c("IgG1", "IgA"),
# text.size = 16,
# unconnected = T,
# significance = T)
## ----compare_within_repertoire_example1, eval = FALSE-------------------------
# # Cluster the trees in the repertoire based on the euclidean distance between various metrics of each tree
# out <- Af_compare_within_repertoires(input = AntibodyForests_OVA_default,
# min.nodes = 8,
# distance.method = "euclidean",
# distance.metrics = c("mean.depth", "sackin.index", "spectral.density"),
# clustering.method = "mediods",
# visualization.methods = "PCA")
# # Plot the PCA colored on the assigned clusters
# out$plots$PCA_clusters
## ----cluster_metrics_example1, eval = FALSE-----------------------------------
# # Analyze the difference between the clusters based on the calculated sackin.index
# plots <- Af_cluster_metrics(input = AntibodyForests_OVA_default,
# clusters = out$clustering,
# metrics = c("sackin.index", "spectral.density"),
# min.nodes = 8,
# significance = T)
#
# plots$sackin.index
# plots$modalities
## ----cluster_node_features_example1, eval = FALSE-----------------------------
# # Analyze the difference between the clusters based on the calculated sackin.index
# plots <- Af_cluster_node_features(input = AntibodyForests_OVA_default,
# clusters = out$clustering,
# features = "isotype",
# fill = "max",
# significance = T)
#
# plots$isotype
## ----compare_within_repertoire_example2, eval = FALSE-------------------------
# # Cluster the trees in the repertoire based on the Jensen-Shannon divergence between the Spectral Density profiles
# out <- Af_compare_within_repertoires(input = AntibodyForests_OVA_default,
# min.nodes = 8,
# distance.method = "jensen-shannon",
# clustering.method = "mediods",
# visualization.methods = "heatmap")
# # Plot the heatmap
# out$plots$heatmap_clusters
## ----cluster_metrics_example2, eval=FALSE-------------------------------------
# # Analyze the difference between the clusters based on the calculated sackin.index
# plots <- Af_cluster_metrics(input = AntibodyForests_OVA_default,
# clusters = out$clustering,
# metrics = "spectral.density",
# min.nodes = 8,
# significance = T)
#
# plots$spectral.asymmetry
# plots$modalities
## ----compare_across_repertoire_example1, eval=FALSE---------------------------
# #Calculate the difference between the two groups of repertoires
# boxplots <- Af_compare_across_repertoires(list("Blood" = AntibodyForests_example_blood,
# "Lymph Nodes" = AntibodyForests_example_LN),
# metrics = c("betweenness", "mean.depth"), significance = T)
#
# #Plot the boxplots
# boxplots$betweenness
# boxplots$mean.depth
## ----add_pseudolikelihood_example, eval=FALSE---------------------------------
# # Add pseudolikelihood as node feature to the AntibodyForests object
# AntibodyForests_OVA_default <- Af_add_node_feature(AntibodyForests_object = AntibodyForests_OVA_default,
# feature.df = VDJ_OVA,
# feature.names = "pseudolikelihood")
## ----plot_pseudolikelihood_example1, eval=FALSE-------------------------------
# # Plot pseudolikelihood against distance to the germline
# Af_distance_scatterplot(AntibodyForests_object = AntibodyForests_OVA_default,
# node.features = "pseudolikelihood",
# distance = "edge.length",
# min.nodes = 5,
# correlation = "pearson",
# color.by = "sample")
## ----get_sequences_probability_example, eval=FALSE----------------------------
# #Create a dataframe of the sequences with a sequence ID containing the sample, clonotype, and node number
# df <- Af_get_sequences(AntibodyForests_object = AntibodyForests_OVA_default,
# sequence.name = "VDJ_sequence_aa_trimmed",
# min.edges = 1)
# #Save the data frame as CSV to serve as input for the PLM-pipeline
# write.csv(df, file = "/directory/PLM_input.csv", row.names = FALSE)
## ----probability_matrix_example, eval=FALSE-----------------------------------
# #Create a PLM dataframe
# PLM_dataframe <- Af_PLM_dataframe(AntibodyForests_object = AntibodyForests_OVA_default,
# sequence.name = "VDJ_sequence_aa_trimmed",
# path_to_probabilities = "/directory/ProbabilityMatrix")
## ----load_plm_dataframe, echo=FALSE-------------------------------------------
load("objects/PLM_dataframe.RData")
## ----vdj_plm-df, echo = FALSE-------------------------------------------------
knitr::kable(PLM_dataframe)
## ----plot_likelihood_example1, eval = FALSE-----------------------------------
# # Plot the substitution rank
# Af_plot_PLM(PLM_dataframe = PLM_dataframe,
# values = "substitution_rank",
# group_by = "sample_id")
## ----load_structure_data, echo=FALSE------------------------------------------
load("objects/VDJ_ova_structure.RData") #vdj
load("objects/VDJ_structure_AbAg.RData") #vdj_structure_AbAg
load("objects/AntibodyForests_OVA_structure_AbAg.RData") #af_structure_AbAg
## ----structure_example2, eval = F---------------------------------------------
# #Biophysical properties of the heavy and light chain sequence of the antibodies are calculated
# vdj_structure_AbAg <- VDJ_3d_properties(VDJ = vdj,
# pdb.dir = "~/path/PDBS_superimposed/",
# file.df = files,
# properties = c("charge",
# "3di_germline",
# "hydrophobicity",
# "RMSD_germline",
# "pKa_shift",
# "free_energy",
# "pLDDT"),
# chain = "whole.complex",
# sequence.region = "binding.residues",
# propka.dir = "~/path/Propka_output/",
# germline.pdb = "~/path/PDBS_superimposed/germline_5_model_0.pdb",
# foldseek.dir = "~/path/3di_sequences/")
#
# #Add the biophysical properties to the AntibodyForests object
# af_structure_AbAg <- Af_add_node_feature(AntibodyForests_object = af,
# feature.df = vdj_structure_AbAg,
# feature.names = c("average_charge",
# "3di_germline",
# "average_hydrophobicity",
# "RMSD_germline",
# "average_pKa_shift",
# "free_energy",
# "average_pLDDT"))
## ----structure_example4_plot, eval = T, fig.width=5, fig.height=5-------------
Af_distance_scatterplot(af_structure_AbAg,
node.features = "free_energy",
correlation = "pearson",
color.by = "average_pLDDT",
color.by.numeric = T,
geom_smooth.method = "lm")
## ----plot_structure_example1, eval = F----------------------------------------
# rmsd <- Af_edge_RMSD(af,
# VDJ = vdj,
# pdb.dir = "~/path/PDBS_superimposed/",
# file.df = files,
# sequence.region = "full.sequence",
# chain = "HC+LC")
# rmsd$plot
## ----write-AIRR-rearrangement, eval = FALSE-----------------------------------
# # Append the IgBLAST annotations and alignment to the VDJ dataframe
# VDJ_OVA <- VDJ_import_IgBLAST_annotations(VDJ = VDJ_OVA,
# VDJ.directory = "../data/OVA_scRNA-seq_data/VDJ/",
# method = "append")
#
# # Write the VDJ dataframe into an AIRR-formatted TSV file
# VDJ_to_AIRR(VDJ = VDJ_OVA,
# output.file = "../data/OVA_scRNA-seq_data/VDJ/airr_rearrangement.tsv")
## ----bulk_example, eval = FALSE-----------------------------------------------
# # Integrate bulk RNA-seq data into the VDJ dataframe, if multiple clonotypes match the bulk sequence, the bulk transcript is added to all clonotypes (tie.resolvement = "all)
# VDJ_OVA <- VDJ_integrate_bulk(sc.VDJ = VDJ_OVA,
# bulk.tsv = "path/to/bulk/OVA_bulkRNA.tsv",
# bulk.tsv.sequence.column="VDJ_sequence_nt_raw",
# bulk.tsv.sample.column="sample_id",
# bulk.tsv.barcode.column="barcode",
# bulk.tsv.isotype.column="isotype",
# organism="mouse",
# igblast.dir="/path/to/anaconda3/envs/igphyml/share/igblast",
# tie.resolvement = "all")
#
# #Create the lineage tree of the integrated VDJ dataframe
# af <- Af_build(VDJ = VDJ_OVA,
# sequence.columns = "VDJ_sequence_aa_trimmed",
# germline.columns = "VDJ_germline_aa_trimmed",
# construction.method = "phylo.network.default",
# node.features = c("dataset"))
#
# #Plot lineage tree
# Af_plot_tree(af,
# sample = "S1",
# clonotype = "clonotype3",
# color.by = "dataset",
# label.by = "none",
# main.title = "OVA S1",
# sub.title = "clonotype3")
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.