#' Run FACETS
#'
#' Runs FACETS on an input count file, with specified parameter settings.
#'
#' @param read_counts Read counts object, generated by `snp-pileup`.
#' @param cval Segmentation parameter, higher values for higher sensitivity.
#' @param dipLogR Manual dipLogR value, if left empty `facets` finds the most likely sample baseline.
#' @param ndepth Minimum depth in normal to retain SNP, see `facets` help.
#' @param snp_nbhd Minimum basepair distance for SNPs, see `facets` help.
#' @param min_nhet Minimum number of heterozygous SNPs on segment required for clustering, see `facets` help.
#' @param genome Genome build.
#' @param seed Seed value for random number generation, set to enable full reproducibility.
#'
#' @return A list object containing the following items. See \href{www.github.com/mskcc/facets}{FACETS documentation} for more details:
#' \itemize{
#' \item{\code{snps}:} {SNPs used for copy-number segmentation, \code{het==1} for heterozygous loci.}
#' \item{\code{segs}:} {Inferred copy-number segmentation.}
#' \item{\code{purity}:} {Inferred sample purity, i.e. fraction of tumor cells of the total cellular population.}
#' \item{\code{ploidy}:} {Inferred sample ploidy.}
#' \item{\code{dipLogR}:} {Inferred dipLogR, the sample-specific baseline corresponding to the diploid state.}
#' \item{\code{alBalLogR}:} {Alternative dipLogR value(s) at which a balanced solution was found.}
#' \item{\code{flags}:} {Warning flags from the naïve segmentation algorithm.}
#' \item{\code{em_flags}:} {Warning flags from the expectation-maximization segmentation algorithm.}
#' \item{\code{loglik}:} {Log-likelihood value of the fitted model.}
#' }
#'
#' @examples
#' \dontrun{
#' library(pctGCdata)
#' run_facets(test_read_counts, cval = 500, genome = 'hg38')
#' }
#'
#' @import facets
#' @import pctGCdata
#' @export run_facets
run_facets = function(read_counts,
cval = 100,
dipLogR = NULL,
ndepth = 35,
snp_nbhd = 250,
min_nhet = 15,
genome = c('hg18', 'hg19', 'hg38', 'mm9', 'mm10'),
seed = 100,
facets_lib_path = '') {
if ( facets_lib_path != '' ) {
library(facets, lib.loc = facets_lib_path)
} else {
library(facets)
}
print(paste0('loaded facets version: ', packageVersion('facets')))
# Check input
missing_cols = setdiff(c('Chromosome', 'Position', 'NOR.DP', 'TUM.DP', 'NOR.RD', 'TUM.RD'), names(read_counts))
if (length(missing_cols) > 0) {
stop(paste0('Input missing column(s)', paste(missing_cols, collapse = ', '), '.'), call. = FALSE)
}
set.seed(seed)
genome = match.arg(genome)
# Run FACETS algorithm
dat = facets::preProcSample(read_counts, ndepth = ndepth, het.thresh = 0.25, snp.nbhd = snp_nbhd, cval = 25,
gbuild = genome, hetscale = TRUE, unmatched = FALSE, ndepthmax = 1000)
out = facets::procSample(dat, cval = cval, min.nhet = min_nhet, dipLogR = dipLogR)
fit = facets::emcncf(out)
# Fix bad NAs
fit$cncf = cbind(fit$cncf, cf = out$out$cf, tcn = out$out$tcn, lcn = out$out$lcn)
fit$cncf$lcn[fit$cncf$tcn == 1] = 0
fit$cncf$lcn.em[fit$cncf$tcn.em == 1] = 0
# Generate output
list(
snps = out$jointseg,
segs = fit$cncf,
purity = as.numeric(fit$purity),
ploidy = as.numeric(fit$ploidy),
dipLogR = out$dipLogR,
alBalLogR = out$alBalLogR,
flags = out$flags,
em_flags = fit$emflags,
loglik = fit$loglik
)
}
## load facets output from .Rds file or legacy run of .Rdata file
#' @export load_facets_output
load_facets_output <- function(rds_rdata_file) {
if (grepl('rds$', rds_rdata_file, ignore.case = T)) {
readRDS(rds_rdata_file)
}
else {
load(rds_rdata_file)
list(
snps = out$jointseg,
segs = fit$cncf,
purity = as.numeric(fit$purity),
ploidy = as.numeric(fit$ploidy),
dipLogR = out$dipLogR,
alBalLogR = out$alBalLogR,
flags = out$flags,
em_flags = fit$emflags,
loglik = fit$loglik
)
}
}
#' @export create_legacy_output
create_legacy_output <- function(facets_output, directory, sample_id, counts_file, sel_run_type, run_details) {
sample_id = ifelse(sel_run_type == '', sample_id, paste0(sample_id, '_', sel_run_type))
output_prefix = paste0(directory, '/', sample_id)
### create cncf.txt
facets_output$segs %>%
mutate(ID=sample_id,
loc.start = start,
loc.end = end) %>%
select(ID, chrom, loc.start, loc.end, seg, num.mark, nhet, cnlr.median, mafR,
segclust, cnlr.median.clust, mafR.clust, cf, tcn, lcn,
cf.em, tcn.em, lcn.em) %>%
write.table(file=paste0(output_prefix, '.cncf.txt'), quote=F, row.names=F, sep='\t')
### create .Rdata
out =
list(
jointseg = facets_output$snps,
out = facets_output$segs %>%
select(chrom, seg, num.mark, nhet, cnlr.median, mafR,
segclust, cnlr.median.clust, mafR.clust, cf, tcn, lcn),
nX=23,
chromlevels = c(1:22, "X"),
dipLogR = facets_output$dipLogR,
alBalLogR = facets_output$alBalLogR,
IGV = NULL
)
fit =
list(
loglik = facets_output$loglik,
purity = facets_output$purity,
ploidy = facets_output$ploidy,
dipLogR = facets_output$dipLogR,
seglen = -1,
cncf = facets_output$segs,
emflags = facets_output$em_flags
)
save(out, fit, file=paste0(output_prefix, ".Rdata"), compress=T)
### create .CNCF.png
system(paste0('mv ', output_prefix, '.png ', output_prefix, '.CNCF.png'))
#### create .out file
purity_cval = cval = NA
if(nrow(run_details) == 2) {
cval = (run_details %>% filter(run_type == 'hisens'))$cval[1]
purity_cval = (run_details %>% filter(run_type == 'purity'))$cval[1]
} else {
cval = run_details$cval[1]
purity = run_details$purity[1]
}
run = run_details %>% filter(run_type == sel_run_type) %>% head(n=1)
out_txt = "# INPUT PARAMETERS GIVEN\n"
out_txt = paste0(out_txt, '# TAG = ', sample_id, '\n')
out_txt = paste0(out_txt, '# Facets version = ', as.character(packageVersion('facets')), '\n')
out_txt = paste0(out_txt, '# Input = ', basename(as.character(run$input_file)),'\n')
out_txt = paste0(out_txt, '# Seed = ', run$seed, '\n')
out_txt = paste0(out_txt, '# snp.nbhd = ', run$snp_nbhd, ' \n')
out_txt = paste0(out_txt, '# ndepth = ', run$ndepth,' \n')
out_txt = paste0(out_txt, '# purity_cval = ', purity_cval,'\n')
out_txt = paste0(out_txt, '# unmatched = FALSE \n')
out_txt = paste0(out_txt, '# cval = ', cval,'\n')
out_txt = paste0(out_txt, '# min.nhet = ', run$min_nhet,'\n')
out_txt = paste0(out_txt, '# genome = hg19\n')
out_txt = paste0(out_txt, '# tumor_id = \n')
out_txt = paste0(out_txt, '\n# LOADED MODULE INFO\n')
out_txt = paste0(out_txt, '# Facets version = ', as.character(packageVersion('facets')) , '\n\n')
out_txt = paste0(out_txt, '\n# FACETS OUTPUT\n')
out_txt = paste0(out_txt, '# Purity = ', run$purity, '\n')
out_txt = paste0(out_txt, '# Ploidy = ', run$ploidy,' \n')
out_txt = paste0(out_txt, '# dipLogR = ', run$dipLogR,'\n')
out_txt = paste0(out_txt, '# dipt = -1\n')
out_txt = paste0(out_txt, '# loglik = \n')
out_txt = paste0(out_txt, '# output flags\n')
out_txt = paste0(out_txt, '# ', run$flags,'\n\n')
cat(out_txt, file=paste0(directory, '/', sample_id, '.out'))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.