Installation

Required dependencies

inferCNV uses the R packages r CRANpkg("ape"), r Biocpkg("BiocGenerics"), r CRANpkg("binhf"), r CRANpkg("caTools"), r CRANpkg("coda"), r CRANpkg("coin"), r CRANpkg("dplyr"), r CRANpkg("doparallel"), r Biocpkg("edgeR"), r CRANpkg("fastcluster"), r CRANpkg("fitdistrplus"), r CRANpkg("foreach"), r CRANpkg("futile.logger"), r CRANpkg("future"), r CRANpkg("gplots"), r CRANpkg("ggplot2"), r CRANpkg("HiddenMarkov"), r CRANpkg("reshape"), r CRANpkg("rjags"), r CRANpkg("RColorBrewer"), r Biocpkg("SingleCellExperiment"), r Biocpkg("SummarizedExperiment") and imports functions from the archived r CRANpkg("GMD").

Optional extension

If you want to use the interactive heatmap visualization, please check the add-on packge R r Githubpkg ("broadinstitute/inferCNV_NGCHM") after installing the packages r CRANpkg("tibble"), r Githubpkg("bmbroom/tsvio") and r Githubpkg("bmbroom/NGCHMR"). To install optional packages, type the following in an R command window:

install.packages("tibble")

install.packages("devtools")
devtools::install_github("bmbroom/tsvio")
devtools::install_github("bmbroom/NGCHMR", ref="stable")
devtools::install_github("broadinstitute/inferCNV_NGCHM")

And download the NGCHM java application by typing the following in a regular shell: ```{bash, eval = FALSE} wget http://tcga.ngchm.net/NGCHM/ShaidyMapGen.jar

```r
knitr::opts_chunk$set(echo = TRUE)
library(infercnv)

Running InferCNV

Create the InferCNV Object

Reading in the raw counts matrix and meta data, populating the infercnv object

infercnv_obj = CreateInfercnvObject(
  raw_counts_matrix="../inst/extdata/oligodendroglioma_expression_downsampled.counts.matrix.gz",
  annotations_file="../inst/extdata/oligodendroglioma_annotations_downsampled.txt",
  delim="\t",
  gene_order_file="../inst/extdata/gencode_downsampled.EXAMPLE_ONLY_DONT_REUSE.txt",
  ref_group_names=c("Microglia/Macrophage","Oligodendrocytes (non-malignant)"))

Running the full default analysis

infercnv_obj_default = infercnv::run(
    infercnv_obj,
    cutoff=1, # cutoff=1 works well for Smart-seq2, and cutoff=0.1 works well for 10x Genomics
    out_dir="../example_output", 
    cluster_by_groups=TRUE, 
    plot_steps=FALSE,
    denoise=TRUE,
    HMM=TRUE,
    no_prelim_plot=TRUE,
    png_res=60,
    num_threads=2,
    BayesMaxPNormal=0
)

Basic ouput from running inferCNV.

knitr::include_graphics("../example_output/infercnv.png")

HMM preditions

knitr::include_graphics("../example_output/infercnv.13_HMM_predHMMi6.hmm_mode-samples.repr_intensities.png")

Filtering genes

Removing those genes that are very lowly expressed or present in very few cells

# filter out low expressed genes
cutoff=1
infercnv_obj <- require_above_min_mean_expr_cutoff(infercnv_obj, cutoff)

# filter out bad cells
min_cells_per_gene=3
infercnv_obj <- require_above_min_cells_ref(infercnv_obj, min_cells_per_gene=min_cells_per_gene)

## for safe keeping
infercnv_orig_filtered = infercnv_obj

Normalize each cell's counts for sequencing depth

infercnv_obj <- infercnv::normalize_counts_by_seq_depth(infercnv_obj)

Log transform the normalized counts:

infercnv_obj <- log2xplus1(infercnv_obj)

Apply maximum bounds to the expression data to reduce outlier effects

threshold = mean(abs(get_average_bounds(infercnv_obj)))
infercnv_obj <- apply_max_threshold_bounds(infercnv_obj, threshold=threshold)

Perform smoothing across chromosomes

infercnv_obj = smooth_by_chromosome(infercnv_obj, window_length=101, smooth_ends=TRUE)
# re-center each cell
infercnv_obj <- center_cell_expr_across_chromosome(infercnv_obj, method = "median")
plot_cnv(infercnv_obj, 
         out_dir='../example_output/',
         output_filename='infercnv.chr_smoothed', 
         x.range="auto", 
         title = "chr smoothed and cells re-centered", 
         png_res=60,
         color_safe_pal = FALSE)
knitr::include_graphics("../example_output/infercnv.chr_smoothed.png")

Subtract the reference values from observations, now have log(fold change) values

infercnv_obj <- subtract_ref_expr_from_obs(infercnv_obj, inv_log=TRUE)
plot_cnv(infercnv_obj,
         out_dir='../example_output/',
         output_filename='infercnv.ref_subtracted', 
         x.range="auto", 
         title="ref subtracted",
         png_res=60,
         color_safe_pal = FALSE)
knitr::include_graphics("../example_output/infercnv.ref_subtracted.png")

Invert log values

Converting the log(FC) values to regular fold change values, centered at 1 (no fold change)

This is important because we want (1/2)x to be symmetrical to 1.5x, representing loss/gain of one chromosome region.

infercnv_obj <- invert_log2(infercnv_obj)

Removing noise

infercnv_obj <- clear_noise_via_ref_mean_sd(infercnv_obj, sd_amplifier = 1.5)
plot_cnv(infercnv_obj,
         out_dir='../example_output/',
         output_filename='infercnv.denoised', 
         x.range="auto", 
         x.center=1, 
         title="denoised",
         png_res=60,
         color_safe_pal = FALSE)
knitr::include_graphics("../example_output/infercnv.denoised.png")

Remove outlier data points

This generally improves on the visualization

infercnv_obj = remove_outliers_norm(infercnv_obj)

Additional Information

Online Documentation

For additional explanations on files, usage, and a tutorial please visit the wiki.

TrinityCTAT

This tool is a part of the TrinityCTAT toolkit focused on leveraging the use of RNA-Seq to better understand cancer transcriptomes. To find out more please visit TrinityCTAT

Applications

This methodology was used in:

Anoop P. Patel et al. Single-cell RNA-seq highlights intratumoral heterogeneity in primary glioblastoma. Science. 2014 Jun 20: 1396-1401

Tirosh I et al.Dissecting the multicellular ecosystem of metastatic melanoma by single-cell RNA-seq. Science. 2016 Apr 8;352(6282):189-96

Session info

sessionInfo()


broadinstitute/infercnv documentation built on April 26, 2024, 4:11 a.m.