#' Cluster samples based on SNVs.
#'
#' Perform hierarchical clustering of samples based on their somatic SNVs.
#'
#' Each vcf file should contain called somatic SNVs. use exclude.filtered =
#' TRUE (the default) to only use SNVs with 'PASS' or '.' in the FILTER column.
#'
#' @param vcf.files A character vector containing the paths to the VCF files to
#' process.
#' @param genome A character string identifying the genome
#' @param sample.names A character vector of samples names corresponding to
#' each VCF file, by default the file names of the VCFs will be used as sample
#' names.
#' @param ... Additional parameters pass to \code{read.vcf}. These can be used
#' to apply additional filtering of the SNVs, see the documentation of
#' \code{read.vcf} for further details.
#' @return Invisible returns an object of class \code{hclust} that describes
#' the tree produced by the clustering process.
#' @author Jonathan Ellis <jonathan.j.ellis@@gmail.com>
#' @examples
#' vcf.files <- c(
#' system.file('extdata', 'LC1_TUMOUR_A.vcf', package = 'GenomicVis'),
#' system.file('extdata', 'LC1_TUMOUR_B.vcf', package = 'GenomicVis'),
#' system.file('extdata', 'LC2_TUMOUR_A.vcf', package = 'GenomicVis'),
#' system.file('extdata', 'LC2_TUMOUR_B.vcf', package = 'GenomicVis'),
#' system.file('extdata', 'LC3_TUMOUR_A.vcf', package = 'GenomicVis'),
#' system.file('extdata', 'LC3_TUMOUR_B.vcf', package = 'GenomicVis'))
#' sample.names <- tools::file_path_sans_ext(basename(vcf.files))
#' snv.clustering(vcf.files, sample.names, genome = 'hg19')
#' @import VariantAnnotation
#' @export
snv.clustering <- function(vcf.files, genome, sample.names = NULL, ...) {
if (is.null(sample.names))
sample.names <- basename(vcf.files)
read.vcffiles <- function(vcf.files, sample.names, genome, ...) {
x <- lapply(vcf.files, function(f) names(rowData(read.vcf(f, genome, ...))))
names(x) <- sample.names
x
}
x <- read.vcffiles(
vcf.files,
sample.names = sample.names,
genome,
...
)
a <- unlist(sapply(sample.names, function(i) rep(i, length(x[[i]]))))
b <- unlist(x)
dat <- table(b, a)
cl <- hclust(dist(t(dat)))
plot(cl, hang = -1, xlab = '', ylab = '', sub = '', main = '', axes = FALSE)
invisible(cl)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.