#
# Author: lubo
# Created: Mar 28, 2018
###############################################################################
#'Integration with 'sGAINS' tool.
#'
#' This function is called by sGAINS tools to perform the final step in preparation
#' of results: phylogenetic analysis of single-cell genomes represented by their
#' copy-number profiles
#'
#' @param scgv_dir directory where the results of the analysis should be stored
#' @param case_name name of the case to be used for storing results of the
#' analysis
#' @param varbin_dir directory where output of 'varbin' step of sGAINS (the
#' binning scheme) is located
#' @param varbin_suffix common suffix for files produced by 'varbin' step of
#' sGAINS
#' @param bins_boundaries file name for binning scheme to use in the analysis
#' @param cytoband file name for a cytoband coordinate table for the version of
#' the genome being used
#' @param badbins a file name for a table of bad bins (bins with outlying read
#' counts) for the specified binning scheme
#' @param nsim number of simulations to run for calculating simulated FDR
#' distribution
#' @param sharemin a feature is considered 'widely shared' by leaves of a tree
# node if present in sharemin fraction of leaves
#' @export
sgains_pipeline <- function(
scgv_dir, case_name,
varbin_dir, varbin_suffix,
bins_boundaries_filename,
cytoband,
badbins=NULL,
nsim=150,
sharemin=0.85,
fdrthres=-2,
nshare=3,
climbtoshare=3) {
if(!file.exists(scgv_dir)) {
dir.create(scgv_dir)
}
filenames <- case_filenames(scgv_dir, case_name)
gc_df <-load_table(bins_boundaries_filename)
gc_df$chrom.numeric <- chrom_numeric(gc_df$bin.chrom)
if(is.null(badbins)) {
cyto_data <- read.table(cytoband, header=F,as.is=T)
dropareas <- calc_centroareas(cyto_data)
badbins <- calc_regions2bins(gc_df, dropareas)
}
gc_df <- gc_df[-badbins, ]
varbin_files <- varbin_input_files(varbin_dir, varbin_suffix)
cells <- varbin_files$cells
res <- segment_varbin_files(varbin_files, gc_df, badbins)
uber_seg <- res$seg
uber_ratio <- res$ratio
cells <- uber_cells(uber_seg, skip=3)$cells
save_table(filenames$cells, data.frame(cell=cells))
save_table(filenames$seg, uber_seg)
save_table(filenames$ratio, uber_ratio)
pins <- calc_pinmat(gc_df, uber_seg, dropareas=dropareas)
pinmat_df <- pins$pinmat
pins_df <- pins$pins
save_table(filenames$featuremat, pinmat_df)
save_table(filenames$features, pins_df)
fisher <- sim_fisher_wrapper(
pinmat_df, pins_df, njobs=30, nsim=nsim, nsweep=10)
true_pv <- fisher$true
sim_pv <- fisher$sim
mfdr <- fisher_fdr(true_pv, sim_pv, cells)
mdist <- fisher_dist(true_pv, cells)
hc <- hclust_tree(pinmat_df, mfdr, mdist)
tree_df <- tree_py(mdist, method='average')
hc <- find_clones(hc,
fdrthres=fdrthres, sharemin=sharemin,
nshare=nshare, climbtoshare=climbtoshare)
subclones <- find_subclones(hc, pinmat_df, pins_df,
nsim=nsim,
fdrthres=fdrthres, sharemin=sharemin,
baseshare=nshare, climbtoshare=climbtoshare)
save_table(filenames$tree, tree_df)
save_table(filenames$clone, subclones)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.