knitr::opts_chunk$set(collapse = T, comment = "#>")
options(tibble.print_min = 4L, tibble.print_max = 4L)
knitr::opts_knit$set(warning = FALSE, error = TRUE)
library(infercna)
library(magrittr)
set.seed(1014)

Prepare your data

Set your genome with useGenome()

useGenome('hg19')
retrieveGenome()

Load example data with useData()

m = useData(mgh125)
dim(m)
range(m)
lengths(refCells)

Infer CNAs and plot a heatmap

Infer cells' CNA profiles with infercna()

args(infercna)
cna = infercna(m = m, refCells = refCells, n = 5000, noise = 0.1, isLog = TRUE, verbose = FALSE)
cnaM = cna[, !colnames(cna) %in% unlist(refCells)]

Plot heatmap with cnaPlot()

args(cnaPlot)
obj = cnaPlot(cna = cnaM,
              order.cells = TRUE,
              subtitle = 'Copy-Number Aberrations in a patient with Glioblastoma')
names(obj)

obj$data

obj$p

Ordering cells in cnaPlot()

In cases where you can see the subclones in the heatmap but don't yet have the subclone assignments to direct the cell clustering, you can instead give a set of chromosome and chromosome arms that you would like the clustering to use. In the example above, we probably want the cells belonging to the presumed subclones (chromosome 2, chromosome arm 4p, etc) to be grouped together. We can specify this using the order.with argument in infercna::cnaPlot:

obj = cnaPlot(cna = cnaM,
              orderCells = TRUE,
              order.with = c(2, "4p", 7, 10, 12, 13, 15),
              subtitle = 'Copy-Number Aberrations in a patient with Glioblastoma')

obj$p

Find malignant cells

In the above section, we filtered out the infercna::refCells that we know are normal prior to looking at the CNA heatmap. In reality, our reference normal cells will only comprise a subset of all normal cells in the data, and we will need to identify the remaining ones to exclude them from downstream analyses concerning the malignant cells only.

infercna defines two parameters, infercna::cnaCor and infercna::cnaSignal that quantify the extent of copy-number aberrations in individual cells and thus help to separate the malignant and non-malignant subsets.

Compute cnaSignal() and cnaCor()

cnaSignal()

CNA signal reflects the overall extent of CNAs. It's defined as the mean of the squares of CNA values across the genome and should therefore accentuate genome-wide differences in CNA profiles between malignant and non-malignant cells.

args(cnaSignal)

cnaCor()

CNA corrleation refers to the correlation between the CNA profile of each cell and the average CNA profile of all cells. Best results are seen when correlating cells to the average CNA profile of cells from the corresponding tumour and, if possible, excluding from the average those already classified as non-malignant.

args(cnaCor)

Visualise CNA Signal and Correlation with cnaScatterPlot()

Plotting cnaCor against cnaSignal for all cells is a good first approximation to the malignant and non-malignant groups. infercna::cnaScatterPlot simply integrates these two function calls and their respective parameters into one function, and plots the results.

args(cnaScatterPlot)

Let's quickly go over the note-worthy arguments:

cnaScatterPlot(cna = cna,
               signal.threshold = NULL,
               main = 'Default')
cnaScatterPlot(cna = cna,
               signal.threshold = 0.9,
               main = 'threshold: 0.9')
cnaScatterPlot(cna = cna, 
               signal.threshold = 0.9, 
               main = 'signal threshold = 0.9')
cnaScatterPlot(cna = cna, 
               threshold = 0.9, 
               main = 'signal and cor threshold: 0.9')
cnaScatterPlot(cna = cna,
               threshold = 0.9, 
               samples = 'MGH125', 
               excludeFromAvg = unlist(refCells), 
               main = "threshold: 0.9, samples:'MGH125', excludeFromAvg", 
               group = unlist(refCells))

threshold finds top genes with cnaHotspotGenes()

When the <threshold> argument is set (to a value between 0 and 1) as above, infercna::cnaHotspotGenes() is called, and does two things:

  1. Compute CNA signal values for each gene

  2. Return genes with values of CNA signal in the top nth quantile

    • where n is the value in threshold
args(cnaHotspotGenes)

# Number of genes in top 50%
length(cnaHotspotGenes(cna = cnaM, threshold = .5))

# Number of genes in top 10%
length(cnaHotspotGenes(cna = cnaM, threshold = .9))

Find malignant cells with findMalignant()

infercna::findMalignant() provides a first attempt at identifying malignant (and non-malignant) cells in the data. It fits bimodal distributions to the cells' cnaSignal() and cnaCor() values, respectively. If two modes are found for each parameter, these are cross-checked and -- if compatible --, joined and returned. If any of these steps fail, the return value is FALSE.

args(findMalignant)
Modes = findMalignant(cna, signal.threshold = .9, samples = 'MGH125')
lengths(Modes)

scrabble::jaccard(Modes, refCells)

names(Modes) = c('nonmalignant', 'malignant')

Find genetic subclones

args(findClones)

cnaCancer = cna[, Modes$malignant]
L = Map(filterGenes, value = c(2, '4p', 15), attribute = c('chr', 'arm', 'chr'), MoreArgs = list(cnaCancer))
out = sapply(L, fitBimodal, assign = T)
lengths(out)
out[[3]] = fitBimodal(L[[3]], assign = T, bySampling = T, nsamp = 300)
lengths(out)
sapply(out, lengths)
expandToClones(out, greaterThan = 3) %>% lengths %>% as.matrix

Additional featuers

Split, Filter and Order vectors or matrices by Chromosome (Arms)

genesByChr = splitGenes(rownames(m), by = 'chr')
lengths(genesByChr)

datByChr = splitGenes(m, by = 'arm')
names(datByChr)
filterGenes(m, '4p', 'arm') %>% nrow

filterGenes(m, '4p', 'arm', out = TRUE) %>% nrow

filterGenes(m, 2, 'chr') %>% nrow

filterGenes(m, c(2, '4p'), c('chr', 'arm')) %>% nrow
# you can provide a matrix or ch
all(orderGenes(rownames(m)) == rownames(orderGenes(m)))

Find genes with genesOn()

chr7 = genesOn(7, 'chr')

# First 5 genes on chromosome 7:
head(chr7, 5)

'EGFR' %in% chr7


jlaffy/infercna documentation built on Jan. 26, 2024, 11:24 p.m.