
Defines functions getSubcloneProfiles removeEmptyClusters printSDbw getMajorMinorCN allelicRatioBasedCN logRbasedCN correctIntegerCN mergeSegsByCol outputTitanSegments outputModelParameters outputTitanResults decodeLOH decoupleMegaVar sdbw.density computeSDbwIndex computeBIC setupClonalParameters correctReadDepth setGenomeStyle getPositionOverlap getOverlap excludeGarbageState removeCentromereSegs extendSegments removeCentromere filterData extractAlleleReadCounts loadAlleleCounts loadDefaultParameters

Documented in computeSDbwIndex correctIntegerCN correctReadDepth filterData getPositionOverlap loadAlleleCounts loadDefaultParameters outputModelParameters outputTitanResults outputTitanSegments setGenomeStyle

#' author: Gavin Ha 
#' 		Dana-Farber Cancer Institute
#'		Broad Institute
#' contact: <gavinha@gmail.com> or <gavinha@broadinstitute.org>
#' date:	  June 26, 2018

loadDefaultParameters <- function(copyNumber = 5, numberClonalClusters = 1, 
    skew = 0, hetBaselineSkew = NULL, alleleEmissionModel = "binomial", symmetric = TRUE, data = NULL) {
    if (copyNumber < 3 || copyNumber > 8) {
        stop("loadDefaultParameters: Fewer than 3 or more than 8 copies are 
             being specified. Please use minimum 3 or maximum 8 'copyNumber'.")
    if (!alleleEmissionModel %in% c("binomial", "Gaussian")){
      stop("loadDefaultParameters: alleleEmissionModel must be either \"binomial\" or \"Gaussian\".")
    if (!symmetric){
      message("loadDefaultParameters: symmetric=FALSE is deprecated; using symmetric=TRUE.")
    ## Data without allelic skew rn is theoretical
    ## normal reference allelic ratio initialize to
    ## theoretical values
    rn <- 0.5
    if (symmetric) {
        rt = c(rn, 1, 1, 1/2, 1, 2/3, 1, 3/4, 2/4, 
            1, 4/5, 3/5, 1, 5/6, 4/6, 3/6, 1, 6/7, 
            5/7, 4/7, 1, 7/8, 6/8, 5/8, 4/8)
        rt = rt + skew
        rt[rt > 1] <- 1
        rt[rt < (rn + skew)] <- rn + skew
        ## shift heterozygous states to account for noise 
        ##   when using symmetric = TRUE
        if (!is.null(hetBaselineSkew)){
        	hetARshift <- hetBaselineSkew + 0.5
        }else if (!is.null(data)){
        	hetARshift <- median(data$ref / data$tumDepth, na.rm = TRUE)
        	hetARshift <- 0.55
        hetState <- c(4, 9, 16, 25)
        rt[hetState] <- hetARshift
        ZS = 0:24
        ct = c(0, 1, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 
            6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, 8)
    } #else {
      #  rt = c(rn, 1, 1e-05, 1, 1/2, 1e-05, 1, 2/3, 
      #      1/3, 1e-05, 1, 3/4, 2/4, 1/4, 1e-05, 1, 
      #      4/5, 3/5, 2/5, 1/5, 1e-05, 1, 5/6, 4/6, 
      #      3/6, 2/6, 1/6, 1e-05, 1, 6/7, 5/7, 4/7, 
      #      3/7, 2/7, 1/7, 1e-05, 1, 7/8, 6/8, 5/8, 
      #      4/8, 3/8, 2/8, 1/8, 1e-05)
      #  rt = rt + skew
      #  rt[rt > 1] <- 1
      #  rt[rt < 0] <- 1e-05
      #  ZS = c(0, 1, 1, 2, 3, 2, 4, 5, 5, 4, 6, 7, 
      #      8, 7, 6, 9, 10, 11, 11, 10, 9, 12, 13, 
      #      14, 15, 14, 13, 12, 16, 17, 18, 19, 19, 
      #      18, 17, 16, 20, 21, 22, 23, 24, 23, 22, 
      #      21, 20)
      #  ct = c(0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 
      #      4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 
      #      6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 
      #      8, 8, 8, 8, 8, 8, 8)
      #  highStates <- c(1,16:length(rt))
      #  hetState <- c(5, 13, 25, 41)
    ZS[hetState[1]] <- -1
    rn = rn + skew
    ind <- ct <= copyNumber
    hetState <- hetState[hetState <= sum(ind)]
    rt <- rt[ind]
    ZS <- ZS[ind]
    ct <- ct[ind]  #reassign rt and ZS based on specified copy number
    K <- length(rt)
    N <- nrow(data)
    ## Dirichlet hyperparameter for initial state
    ## distribution, kappaG
    kappaGHyper_base <- 100
    if (length(data$ref) > 0 && length(data$logR) > 0){
      corRho_0 <- cor(data$ref / data$tumDepth, data$logR, use = "pairwise.complete.obs")
      corRho_0 <- NULL
    var_base <- 1/20 #var(data$logR, na.rm = TRUE)
    var0_base <- 1/20 #var(data$ref / data$tumDepth, na.rm = TRUE)
    if (!is.null(data)){
      #alphaK <- 1 / (var(data$logR, na.rm = TRUE) / sqrt(K))
      #betaK <- alphaK * var(data$logR, na.rm = TRUE)
      #alphaR <- 1 / (var(data$ref / data$tumDepth, na.rm = TRUE) / sqrt(K))
      #betaR <- alphaK * var(data$ref / data$tumDepth, na.rm = TRUE)
      betaK <- 25 
      alphaK <- betaK / var(data$logR) 
      betaR <- 25 
      alphaR <- betaR / var(data$ref / data$tumDepth, na.rm = TRUE)
      alphaK <- 10000   
      betaK <- 25
      alphaR <- 10000
      betaR <- 25
    ## Gather all genotype related parameters into a list
    genotypeParams <- vector("list", 0)
    genotypeParams$rt <- rt
    genotypeParams$rn <- rn
    genotypeParams$ZS <- ZS
    genotypeParams$ct <- ct
    ## VARIANCE for Gaussian to model copy number, var
    genotypeParams$corRho_0 <- corRho_0
    genotypeParams$var_0 <- rep(var_base, K) 
    #genotypeParams$var_0[ct %in% c(2, 4, 8)] <- var_base / 10
    genotypeParams$alphaKHyper <- rep(alphaK, K)
    genotypeParams$alphaKHyper[ct >= 5] <- alphaK  #AMP(11-15),HLAMP(16-21) states
    genotypeParams$betaKHyper <- rep(betaK, K)
    genotypeParams$alleleEmissionModel <- alleleEmissionModel
    ## VARIANCE for Gaussian to model allelic fraction, varR
    genotypeParams$varR_0 <- rep(var0_base, K) 
    #genotypeParams$varR_0[hetState] <- var0_base / 10
    genotypeParams$alphaRHyper <- rep(alphaR, K)
    genotypeParams$betaRHyper <- rep(betaR, K)
    genotypeParams$kappaGHyper <- rep(kappaGHyper_base, K) + 1
    genotypeParams$kappaGHyper[hetState] <- kappaGHyper_base * 5
    genotypeParams$kappaGHyper[ct == 0] <- kappaGHyper_base / 50
    genotypeParams$piG_0 <- estimateDirichletParamsMap(genotypeParams$kappaGHyper)  #add the outlier state
    genotypeParams$outlierVar <- 10000
    genotypeParams$symmetric <- symmetric
    ## NORMAL, n
    normalParams <- vector("list", 0)
    normalParams$n_0 <- 0.5
    normalParams$alphaNHyper <- 2
    normalParams$betaNHyper <- 2
    #rm(list = c("rt", "rn", "ZS", "ct", "var_0", "kappaGHyper", "skew"))
    ## PLOIDY, phi
    ploidyParams <- vector("list", 0)
    ploidyParams$phi_0 <- 2
    ploidyParams$alphaPHyper <- 20
    ploidyParams$betaPHyper <- 42
    sParams <- setupClonalParameters(Z = numberClonalClusters)
    sParams$piZ_0 <- estimateDirichletParamsMap(sParams$kappaZHyper)
    # return
    output <- vector("list", 0)
    output$genotypeParams <- genotypeParams
    output$ploidyParams <- ploidyParams
    output$normalParams <- normalParams
    output$cellPrevParams <- sParams

loadAlleleCounts <- function(inCounts, symmetric = TRUE, 
			genomeStyle = "NCBI", sep = "\t", header = TRUE) {
	if (is.character(inCounts)){
    	message("titan: Loading data ", inCounts)
    	data <- read.delim(inCounts, header = header, stringsAsFactors = FALSE, 
        		sep = sep)
        if (typeof(data[,2])!="integer" || typeof(data[,4])!="integer" || 
        	stop("loadAlleleCounts: Input counts file format does not 
        		match required specifications.")		
    }else if (is.data.frame(inCounts)){  #inCounts is a data.frame
    	data <- inCounts
    	stop("loadAlleleCounts: Must provide a filename or data.frame 
    		to inCounts")
    ## use GenomeInfoDb
    # convert to desired genomeStyle and only include autosomes, sex chromosomes
    data[, 1] <- setGenomeStyle(data[, 1], genomeStyle)
    ## sort chromosomes
	indChr <- orderSeqlevels(as.character(data[, 1]), X.is.sexchrom = TRUE)
	data <- data[indChr, ]
	## sort positions within each chr
	for (x in unique(data[, 1])){
		ind <- which(data[, 1] == x)
		data[ind, ] <- data[ind[sort(data[ind, 2], index.return = TRUE)$ix], ]
    refOriginal <- as.numeric(data[, 4])
    nonRef <- as.numeric(data[, 6])
    tumDepth <- refOriginal + nonRef
    if (symmetric) {
        ref <- pmax(refOriginal, nonRef)
    } else {
        ref <- refOriginal
    return(data.table(chr = as.character(data[, 1]), posn = data[, 2], ref = ref, 
        refOriginal = refOriginal, nonRef = nonRef, 
        tumDepth = tumDepth))

extractAlleleReadCounts <- function(bamFile, bamIndex, 
			positions, outputFilename = NULL, 
			pileupParam = PileupParam()){

## read in vcf file of het positions
	vcf <- BcfFile(positions)
	vcfPosns <- scanBcf(vcf)

## setup the positions of interest to generate the pileup for
	which <- GRanges(as.character(vcfPosns$CHROM), 
		IRanges(vcfPosns$POS, width = 1))
## setup addition BAM filters, such as excluding duplicate reads
	sbp <- ScanBamParam(flag = scanBamFlag(isDuplicate = FALSE), which = which)

## generate pileup using function (Rsamtools >= 1.17.11)
## this step can take a while
	tumbamObj <- BamFile(bamFile, index = bamIndex)
	counts <- pileup(tumbamObj, scanBamParam = sbp,  pileupParam = pileupParam)

## set of command to manipulate the "counts" data.frame output
##     by pileup() such that multiple nucleotides are in a single
##     row rather than in multiple rows.
	countsMerge <- xtabs(count ~ which_label + nucleotide, counts)
	label <- do.call(rbind, strsplit(rownames(countsMerge), ":"))
	posn <- do.call(rbind, strsplit(label[, 2],"-"))
	countsMerge <- cbind(data.frame(chr = label[, 1]), 
		position = posn[, 1], countsMerge[,1:7])
## this block of code is used to match up the reference and 
##   non-reference nucleotide when assigning read counts
##   final output data.frame is "countMat"
## setup output data.frame
	countMat <- data.frame(chr = vcfPosns$CHROM, 
			position = as.numeric(vcfPosns$POS), 
			ref = vcfPosns$REF, refCount = 0, 
			Nref = vcfPosns$ALT, NrefCount = 0, 
			stringsAsFactors = FALSE)

## match rows with vcf positions of interest
	countMat <- merge(countMat, countsMerge, by = c("chr","position"), 
		sort = FALSE, stringsAsFactors = FALSE)

## assign the flattened table of nucleotide counts to ref, Nref
## note that non-reference (Nref) allele is sum of other bases
##    that is not matching the ref.
	NT <- c("A", "T", "C", "G")
	for (n in 1:length(NT)){	
		indRef <- countMat$ref == NT[n]
		countMat[indRef, "refCount"] <- countMat[indRef, NT[n]]
		countMat[indRef, "NrefCount"] <- rowSums(countMat[indRef, NT[-n]])

## remove "chr" string from chromosome
	countMat$chr <- gsub("chr","",countMat$chr)	
## only use autosomes and sex chrs
	countMat <- countMat[countMat$chr %in% c(as.character(1:22),"X","Y"),]
## only use first 6 columns for TitanCNA
	countMat <- countMat[,1:6]

	if (!is.null(outputFilename)){
		## output text file will have the same format required by TitanCNA
		message("extractAlleleReadCounts: writing to ", outputFilename)
		write.table(countMat, file = outputFilename, row.names = FALSE, 
			col.names = TRUE, quote = FALSE, sep = "\t")

## filter data by depth and mappability.  data is a
## logical vector data is a list containing all our
## data: ref, depth, logR, etc.  
filterData <- function(data, chrs = NULL, minDepth = 10, 
    maxDepth = 200, positionList = NULL, map = NULL, 
    mapThres = 0.9, centromeres = NULL, centromere.flankLength = 0) {
    genomeStyle <- seqlevelsStyle(data$chr)[1]
    if (!is.null(map)) {
        keepMap <- map >= mapThres
    } else {
        keepMap <- !logical(length = length(data$refOriginal))
    if (!is.null(positionList)) {
        chrPosnList <- paste(positionList[, 1], positionList[, 
            2], sep = ":")  #chr:posn
        chrPosnData <- paste(data$chr, data$posn, sep = ":")
        keepPosn <- is.element(chrPosnData, chrPosnList)
    } else {
        keepPosn <- !logical(length = length(data$chr))
    keepTumDepth <- data$tumDepth <= maxDepth & data$tumDepth >= minDepth
    cI <- keepTumDepth & !is.na(data$logR) & 
        keepMap & keepPosn
    data <- data[which(cI), ]

    ## remove centromere SNPs ##
    if (!is.null(centromeres)){
    	colnames(centromeres)[1:3] <- c("Chr", "Start", "End") 
    	centromeres$Chr <- setGenomeStyle(centromeres$Chr, genomeStyle = genomeStyle[1])
    	data <- removeCentromere(data, centromeres, flankLength = centromere.flankLength)
    if (is.null(chrs)){
      keepChrs <- logical(length = length(data$chr))
      keepChrs <- is.element(data$chr, chrs)
      data <- data[keepChrs, ]
      message("Removed Chrs: ", names(which(table(data$chr) < 2)))
      data <- data[data$chr %in% names(which(table(data$chr) > 1)), ]

## input: 
# 1) data object output by loadAlleleCounts(); 6 element list: chr, posn, ref, refOriginal, nonRef, tumDepth)
# 2) data.frame containing coordinates of centromeres; 4 columns: Chr, Start, End, arbitrary
removeCentromere <- function(data, centromere, flankLength = 0){
	keepInd <- !logical(length = length(data$chr))
	for (c in 1:nrow(centromere)){
		ind <- which((data$chr == centromere[c, "Chr"]) &
				(data$posn >= (centromere[c, "Start"] - flankLength)) &
				(data$posn <= (centromere[c, "End"] + flankLength)))
		keepInd[ind] <- FALSE			
	message("Removed ", sum(!keepInd), " centromeric positions")
	## remove positions in all elements of list	  
	data <- data[which(keepInd), ]

## segs is a data.table object
extendSegments <- function(segs, removeCentromeres = FALSE, centromeres = NULL,
	extendToTelomeres = FALSE, seqInfo = NULL, chrs = c(1:22, "X", "Y"), genomeStyle = "NCBI"){
	newSegs <- copy(segs)
	newStartStop <- newSegs[, {totalLen = c(Start[-1], NA) - End
					extLen = round(totalLen / 2)
					End.ext = c(End[-.N] + round(extLen)[-.N], End[.N])
					Start.ext = c(Start[1], End.ext[-.N] + 1)
					list(Start=Start.ext, End=End.ext) 
				  }, by=Chromosome]
	newSegs[, Start.snp := Start]
	newSegs[, End.snp := End]
	newSegs[, Start := newStartStop[, Start]]
	newSegs[, End := newStartStop[, End]]
	stopColInd <- which(names(newSegs) == "End")
	setcolorder(newSegs, c(names(newSegs)[1:stopColInd], "Start.snp", "End.snp", names(newSegs)[(stopColInd+1):(ncol(newSegs)-2)]))
	if (removeCentromeres){
		if (is.null(centromeres)){
			stop("If removeCentromeres=TRUE, must provide centromeres data.table object.")
		message("Removing centromeres from segments.")
		newSegs <- removeCentromereSegs(newSegs, centromeres, chrs = chrs, genomeStyle = genomeStyle)
	if (extendToTelomeres){
		if (is.null(seqInfo)){
			stop("If extendToTelomeres=TRUE, must provide SeqInfo object with chromosome lengths.")
		message("Extending segments to telomeres.")
		newSegs[, Start.telo := { endCoord = End
				endCoord[.N] = seqlengths(seq.info)[seqnames(seqInfo)[.GRP]]		
				}, by=Chromosome]		

removeCentromereSegs <- function(segs, centromeres, chrs = c(1:22, "X", "Y"), genomeStyle = "NCBI"){	
	#seqlevelsStyle(chrs) <- genomeStyle
	chrs <- mapSeqlevels(chrs, genomeStyle, drop = FALSE)[1, ]
  segs <- copy(segs)
	for (i in 1:nrow(centromeres)){
		x <- as.data.frame(centromeres[i,]); 
		names(x)[1:3] <- c("Chr","Start","End")
		chrInd <- which(segs[, Chromosome == x$Chr])
		## start is in centromere ##
		ind <- which(segs[chrInd, Start >= x$Start & Start <= x$End])
		if (length(ind)){
			#message("Chr:", i, "Start in centromere.")
			# move start to end of centromere
			segs[chrInd[ind], Start := x$End + 1]
			#segs[chrInd[ind], Length.snp. := NA]
		## end is in centromere ##
		ind <- which(segs[chrInd, End >= x$Start & End <= x$End])
		if (length(ind)){
			#message("Chr:", i, "Stop in centromere.")
			# move end to start of centromere
			segs[chrInd[ind], End := x$Start - 1]
			#segs[chrInd[ind], Length.snp. := NA]
		## both start and end in centromere ##
		ind <- which(segs[chrInd, Start >= x$Start & End <= x$End])
		if (length(ind)){
			#message("Chr:", i, "Seg in centromere.")
			# remove segment #
			segs <- segs[-chrInd[ind]]
		## segment spans centromere ##
		ind <- which(segs[chrInd, Start <= x$Start & End >= x$End])
		if (length(ind)){
			#message("Chr:", i, "Seg span centromere.")
			# break into 2 segments #
			newRegion1 <- copy(segs[chrInd[ind]])
			#newRegion1[, Length.snp. := NA]
			newRegion2 <- copy(newRegion1)
			newRegion1[, End := x$Start - 1] #left segment before centromere
			newRegion2[, Start := x$End + 1] #right segment after centromere
			# remove old segment and add in 2 new ones
			segs <- segs[-chrInd[ind]]
			segs <- rbind(segs, newRegion1, newRegion2)
	## re-order the segments ##
	segs[, Chromosome := factor(Chromosome, levels = chrs)]
	segs <- segs[do.call(order, segs[, c("Chromosome", "Start")])]

excludeGarbageState <- function(params, K) {
    newParams <- params
    for (i in 1:length(newParams)) {
        if (length(newParams[[i]]) == K) {
            newParams[[i]] <- newParams[[i]][2:K]

getOverlap <- function(x, y, type = "within", colToReturn = "Copy_Number", method = "common"){
  if (!type %in% c("any", "within")){
    stop("getOverlap type must be \'any\' or \'within\'.")
  cn <- rep(NA, nrow(x))
  x <- as(x, "GRanges")
  y <- as(y, "GRanges")
  hits <- findOverlaps(query = x, subject = y, type = type)
  cn[queryHits(hits)] <- values(y)[subjectHits(hits), colToReturn]
  # find genes split by segment #
  runs <- rle(queryHits(hits))
  splitInd <- which(runs$lengths > 1)
  if (length(splitInd) > 0){ # take the larger overlap
  	if (method == "common"){
  		for (i in 1:length(splitInd)){
  			hitInd <- which(queryHits(hits)==runs$values[splitInd[i]])
				ind <- which.max(width(IRanges::overlapsRanges(query = ranges(x), subject = ranges(y), hits = hits[hitInd])))
				cn[unique(queryHits(hits)[hitInd])] <- values(y)[subjectHits(hits)[hitInd][ind], colToReturn]
			stop("Method other than 'common' is not yet supported.")

getPositionOverlap <- function(chr, posn, dataVal) {
# use RangedData to perform overlap
	colnames(dataVal)[4] <- "logR"
	dataGR <- as(dataVal, "GRanges")		
    ## load chr/posn as data.frame first to use proper chr ordering by factors/levels
    chrDF <- data.frame(seqnames=chr, start=posn, end=posn)
    chrDF$seqnames <- factor(chrDF$seqnames, levels = unique(chr))    
    chrGR <- as(chrDF, "GRanges")
    hits <- GenomicRanges::findOverlaps(query = chrGR, subject = dataGR)
    ## create full dataval list ##
    hitVal <- rep(NA, length = length(chr))
    hitVal[from(hits)] <- dataGR$logR[to(hits)]

setGenomeStyle <- function(x, genomeStyle = "NCBI", species = "Homo_sapiens", 
  filterExtraChr = TRUE){
  #chrs <- genomeStyles(species)[c("NCBI","UCSC")]
  if (!genomeStyle %in% seqlevelsStyle(as.character(x))[1]){
  	x <- suppressWarnings(mapSeqlevels(as.character(x), 
  					genomeStyle, drop = FALSE)[1,])
  if (filterExtraChr){
    autoSexMChr <- extractSeqlevelsByGroup(species = species, 
    				style = genomeStyle, group = "all")
    x <- x[x %in% autoSexMChr]

correctReadDepth <- function(tumWig, normWig, gcWig, mapWig, 
	genomeStyle = "NCBI", targetedSequence = NULL) {
  message("Reading GC and mappability files")
  gc <- wigToGRanges(gcWig)
  map <- wigToGRanges(mapWig)
  message("Loading tumour file:", tumWig)
  tumour_reads <- wigToGRanges(tumWig)
  message("Loading normal file:", normWig)
  normal_reads <- wigToGRanges(normWig)
  ### set the genomeStyle: NCBI or UCSC
  seqlevelsStyle(gc) <- genomeStyle
  seqlevelsStyle(map) <- genomeStyle
  seqlevelsStyle(tumour_reads) <- genomeStyle
  seqlevelsStyle(normal_reads) <- genomeStyle

  ### make sure tumour wig and gc/map wigs have same
  ### chromosomes
  gc <- gc[seqnames(gc) %in% seqnames(tumour_reads)]
  map <- map[seqnames(map) %in% seqnames(tumour_reads)]
  samplesize <- 50000
  ### for targeted sequencing (e.g.  exome capture),
  ### ignore bins with 0 for both tumour and normal
  ### targetedSequence = RangedData (IRanges object)
  ### containing list of targeted regions to consider;
  ### 3 columns: chr, start, end
  if (!is.null(targetedSequence)) {
    message("Analyzing targeted regions...")
    targetIR <- GRanges(ranges = IRanges(start = targetedSequence[, 2], 
                end = targetedSequence[, 3]), seqnames = targetedSequence[, 1])
    names(targetIR) <- setGenomeStyle(names(targetIR), genomeStyle)
    hits <- findOverlaps(query = tumour_reads, subject = targetIR)
    keepInd <- unique(queryHits(hits))    
    tumour_reads <- tumour_reads[keepInd, ]
    normal_reads <- normal_reads[keepInd, ]
    gc <- gc[keepInd, ]
    map <- map[keepInd, ]
    samplesize <- min(ceiling(nrow(tumour_reads) * 
        0.1), samplesize)
  ### add GC and Map data to IRanges objects ###
  tumour_reads$gc <- gc$value
  tumour_reads$map <- map$value
  colnames(values(tumour_reads)) <- c("reads", "gc", "map")
  normal_reads$gc <- gc$value
  normal_reads$map <- map$value
  colnames(values(normal_reads)) <- c("reads", "gc", "map")
  message("Correcting Tumour")
  tumour_copy <- correctReadcount(tumour_reads, samplesize = samplesize)
  message("Correcting Normal")
  normal_copy <- correctReadcount(normal_reads, samplesize = samplesize)
  message("Normalizing Tumour by Normal")
  tumour_copy$copy <- tumour_copy$copy - normal_copy$copy
  temp <- cbind(chr = as.character(seqnames(tumour_copy)), 
      start = start(tumour_copy), end = end(tumour_copy), 
      logR = tumour_copy$copy)
  temp <- as.data.frame(temp, stringsAsFactors = FALSE)
  mode(temp$start) <- "numeric"
  mode(temp$end) <- "numeric"
  mode(temp$logR) <- "numeric"

setupClonalParameters <- function(Z, sPriorStrength = 2) {
    alphaSHyper = rep(sPriorStrength, Z)
    betaSHyper = rep(sPriorStrength, Z)
    kappaZHyper = rep(1, Z) + 1
    # use naive initialization
    s_0 <- (1:Z)/10
    #s_0 <- seq(0,1-1/Z,1/Z)
    ## first cluster should be clonally dominant (use
    ## 0.001) ##
    s_0[1] <- 0.001
    output <- vector("list", 0)
    output$s_0 <- s_0
    output$alphaSHyper <- alphaSHyper
    output$betaSHyper <- betaSHyper
    output$kappaZHyper <- kappaZHyper

computeBIC <- function(maxLoglik, M, N) {
    bic <- -2 * maxLoglik + (M * log(N))

computeSDbwIndex <- function(x, centroid.method = "median", data.type = "LogRatio", 
						use.corrected.cn = TRUE,  
						S_Dbw.method = "Halkidi", symmetric = TRUE) {
    ## input x: Titan results dataframe from
    ## 'outputTitanResults()' S_Dbw Validity Index
    ## Halkidi and Vazirgiannis (2001). Clustering
    ## Validity Assessment: Finding the Optimal
    ## Partition of a Data Set
    ## AND
    ## Tong and Tan (2009) Cluster validity based on the 
    ## improved S_Dbw index
    if (!data.type %in% c("LogRatio", "AllelicRatio", "HaplotypeRatio")){
    	stop("computeSDbwIndex: data.type must be either 'LogRatio' or 'AllelicRatio'")
      if (!S_Dbw.method %in% c("Halkidi", "Tong")){
    	stop("computeSDbwIndex: S_Dbw.method must be either 'Halkidi' or 'Tong'")
    if (use.corrected.cn && "Corrected_Copy_Number" %in% names(x)){
    	cn.colname <- "Corrected_Copy_Number"
    	state.colName <- "TITANstate"
    	cn.colname <- "CopyNumber"
    	state.colName <- "TITANstate"
    ## flatten copynumber-clonalclusters to single vector
    if (data.type=="LogRatio"){
    	cn <- x[, get(cn.colname)] + 1
		  cn[cn == 3] <- NA  ## remove all CN=2 positions
    	flatState <- (x[, ClonalCluster] - 1) * (max(cn, na.rm = TRUE)) + cn
    	flatState[is.na(flatState)] <- 3 ### assign all the CN=2 positions to cluster 3
    	CNdata <- scale(x[, get(data.type)])
    	x <- as.matrix(cbind(as.numeric(flatState), CNdata))
    }else if (data.type=="AllelicRatio" | data.type=="HaplotypeRatio"){
    	st <- x[, get(state.colName)] + 1
    	st[x[, which(get(state.colName) == "HET")]] <- NA
    	flatState <- (x[, ClonalCluster] - 1) * (max(st, na.rm = TRUE)) + st
    	if (symmetric){
    		flatState[is.na(flatState)] <- 4
    		flatState[is.na(flatState)] <- 5
    	## for allelic ratios, compute the symmetric allelic ratio
    	ARdata <- x[, pmax(get(data.type), 1 - get(data.type))]
    	ARdata <- scale(ARdata)
    	x <- as.matrix(cbind(as.numeric(flatState), ARdata))
    clust <- sort(unique(x[, 1]))
    K <- length(clust)
    N <- nrow(x)
    ## find average standard deviation and scatter (compactness)
    stdev <- rep(NA, K)
    scat.Ci <- rep(NA, K)
    for (i in 1:K) {
        ind.i <- x[, 1] == clust[i]
        ni <- sum(ind.i)
        stdev[i] <- var(x[ind.i, 2], na.rm = TRUE)
        ## compute scatter based on variances within objects
        ## of a cluster (compactness)
        var.Ci <- var(x[ind.i, 2], na.rm = TRUE)
        var.D <- var(x[, 2], na.rm = TRUE)
        if (S_Dbw.method == "Halkidi"){
        	scat.Ci[i] <- var.Ci/var.D
        }else if (S_Dbw.method == "Tong"){
        	scat.Ci[i] <- ((N - ni) / N) * (var.Ci/var.D)
    avgStdev <- sqrt(sum(stdev, na.rm = TRUE))/K
    ## compute density between clusters (separation)
    sumDensityDiff <- matrix(NA, nrow = K, ncol = K)
    for (i in 1:K) {
        # cat('Calculating S_Dbw for cluster # ',clust[i],'\n')
        ind.i <- x[, 1] == clust[i]
        ni <- sum(ind.i)
        xci <- x[ind.i, 2]

        #density of Ci
        sumDiff.xci <- sdbw.density(xci, avgStdev, method = S_Dbw.method, 
        					centroid.method = centroid.method)
        for (j in 1:K) {
            if (i == j) {
            ind.j <- x[, 1] == clust[j]
            nj <- sum(ind.j)
            xcj <- x[ind.j, 2]

            #density of Cj
            sumDiff.xcj <- sdbw.density(xcj, avgStdev, method = S_Dbw.method, 
        					centroid.method = centroid.method)
            ## union and midpoint of both clusters
            x.ci.cj <- union(xci, xcj)
            ci <- median(xci, na.rm = TRUE)  #centroid of cluster Ci
            cj <- median(xcj, na.rm = TRUE)  #centroid of cluster Cj
            nij <- ni + nj
            stdCiCj <- (sd(xci) + sd(xcj)) / 2
            if (S_Dbw.method == "Halkidi"){
            	cij <- (ci + cj)/2
            	#cij <- median(x.ci.cj, na.rm = TRUE)
            }else if (S_Dbw.method == "Tong"){
            	lambda <- 0.7
            	cij <- lambda * ((nj * ci + ni * cj) / nij) + (1 - lambda) * 
            		(sumDiff.xci * ci + sumDiff.xcj * cj) /
            		(sumDiff.xci + sumDiff.xcj) 
            #density of union of both clusters using special centroid
            sumDiff.xci.xcj <- sdbw.density(x.ci.cj, avgStdev, stDev = stdCiCj, 
            				method = S_Dbw.method, centroid = cij, 
            				centroid.method = centroid.method)            
            maxDiff <- max(sumDiff.xci, sumDiff.xcj)
            if (maxDiff == 0) {
                maxDiff <- 0.1
            sumDensityDiff[i, j] <- sumDiff.xci.xcj/maxDiff
    if (S_Dbw.method == "Halkidi"){
        scat <- sum(scat.Ci, na.rm = TRUE)/(K)
    }else if (S_Dbw.method == "Tong"){
   		scat <- sum(scat.Ci, na.rm = TRUE)/(K - 1)
    dens.bw <- sum(sumDensityDiff, na.rm = TRUE)/(K * (K - 1))
    S_DbwIndex <- scat + dens.bw
    # return(S_DbwIndex)
    return(list(S_DbwIndex = S_DbwIndex, dens.bw = dens.bw, scat = scat))

sdbw.density <- function(x, avgStdev, stDev = NULL, method = "Halkidi", 
					centroid = NULL, centroid.method = "median"){
	if (is.null(centroid)){
		if (centroid.method == "median") {
        	centroid <- median(x, na.rm = TRUE)  #centroid of cluster Cj
    	} else if (centroid.method == "mean") {
        	centroid <- mean(x, na.rm = TRUE)  #centroid of cluster Cj
	#density of Ci
    if (method == "Halkidi"){
        sumDiff <- sum(abs(x - centroid) <= avgStdev, na.rm = TRUE) 
    }else if (method == "Tong"){
    	if (is.null(stDev)){
    		stDev <- sd(x, na.rm = TRUE)
        conf.int <- 1.96 * (stDev / sqrt(length(x)))
        sumDiff <- sum(abs(x - centroid) <= conf.int, na.rm = TRUE)  

# G = sequence of states for mega-variable K =
# number of unit states per cluster excluding
# outlier state precondition: If outlierState is
# included, it must be at G=1, else HOMD is G=1
decoupleMegaVar <- function(G, K, useOutlierState = FALSE) {
    if (useOutlierState) {
        G <- G - 1  #do this to make OUT=0 and HOMD=1
        G[G == 0] <- NA  #assign NA to OUT states
    newG <- G%%K
    newG[newG == 0] <- K
    newG[is.na(newG)] <- 0
    newZ <- ceiling(G/K)
    output <- vector("list", 0)
    output$G <- newG
    output$Z <- newZ

# pre-condition: outlier state is -1 if included
decodeLOH <- function(G, symmetric = TRUE) {
    T <- length(G)
    Z <- rep("NA", T)
    CN <- rep(NA, T)
    if (symmetric) {
        DLOH <- G == 1
        NLOH <- G == 2
        ALOH <- G == 4 | G == 6 | G == 9 | G == 12 | 
            G == 16 | G == 20
        HET <- G == 3
        GAIN <- G == 5
        ASCNA <- G == 7 | G == 10 | G == 13 | G == 
            17 | G == 21
        BCNA <- G == 8 | G == 15 | G == 24
        UBCNA <- G == 11 | G == 14 | G == 18 | G == 
            19 | G == 22 | G == 23
    } else {
        DLOH <- G == 1 | G == 2
        NLOH <- G == 3 | G == 5
        ALOH <- G == 6 | G == 9 | G == 10 | G == 14 | 
            G == 15 | G == 20 | G == 22 | G == 28 | 
            G == 29 | G == 36 | G == 37 | G == 45
        HET <- G == 4
        GAIN <- G == 7 | G == 8
        ASCNA <- G == 11 | G == 13 | G == 16 | G == 
            19 | G == 23 | G == 27 | G == 30 | G == 
            35 | G == 38 | G == 44
        BCNA <- G == 12 | G == 25 | G == 41
        UBCNA <- G == 17 | G == 18 | G == 24 | G == 
            26 | G == 31 | G == 32 | G == 33 | G == 
            34 | G == 39 | G == 40 | G == 42 | G == 
    HOMD <- G == 0
    OUT <- G == -1
    Z[HOMD] <- "HOMD"
    Z[DLOH] <- "DLOH"
    Z[NLOH] <- "NLOH"
    Z[ALOH] <- "ALOH"
    Z[HET] <- "HET"
    Z[GAIN] <- "GAIN"
    Z[ASCNA] <- "ASCNA"
    Z[BCNA] <- "BCNA"
    Z[UBCNA] <- "UBCNA"
    Z[OUT] <- "OUT"
    if (symmetric) {
        CN[HOMD] <- 0
        CN[DLOH] <- 1
        CN[G >= 2 & G <= 3] <- 2
        CN[G >= 4 & G <= 5] <- 3
        CN[G >= 6 & G <= 8] <- 4
        CN[G >= 9 & G <= 11] <- 5
        CN[G >= 12 & G <= 15] <- 6
        CN[G >= 16 & G <= 19] <- 7
        CN[G >= 20 & G <= 24] <- 8
    } else {
        CN[HOMD] <- 0
        CN[DLOH] <- 1
        CN[G >= 3 & G <= 5] <- 2
        CN[G >= 6 & G <= 9] <- 3
        CN[G >= 10 & G <= 14] <- 4
        CN[G >= 15 & G <= 20] <- 5
        CN[G >= 21 & G <= 28] <- 6
        CN[G >= 29 & G <= 36] <- 7
        CN[G >= 37 & G <= 45] <- 8
    output <- vector("list", 0)
    output$G <- Z
    output$CN <- CN

outputTitanResults <- function(data, convergeParams, 
    optimalPath, filename = NULL, is.haplotypeData = FALSE, posteriorProbs = FALSE, 
    subcloneProfiles = TRUE, correctResults = TRUE, proportionThreshold = 0.05, 
    proportionThresholdClonal = 0.05, recomputeLogLik = TRUE, rerunViterbi = FALSE, verbose = TRUE) {
    # check if useOutlierState is in convergeParams
    if (length(convergeParams$useOutlierState) == 0) {
        stop("convergeParams does not contain element: useOutlierState.")
     useOutlierState <- convergeParams$useOutlierState
     # check if symmetric is in convergeParams
    if (length(convergeParams$symmetric) == 0) {
        stop("convergeParams does not contain element: symmetric.")
    numClust <- dim(convergeParams$s)[1]
    K <- dim(convergeParams$var)[1]
    if (useOutlierState) {
        K <- K - 1
    Z <- dim(convergeParams$s)[1]
    i <- dim(convergeParams$s)[2]  #iteration of training to use (last iteration)
    partGZ <- decoupleMegaVar(optimalPath, K, useOutlierState)
    G <- partGZ$G - 1  #assign analyzed points, minus 2 so HOMD=0
    sortS <- sort(convergeParams$s[, i], decreasing = FALSE, 
        index.return = TRUE)
    s <- sortS$x
    Zclust <- partGZ$Z  #assign analyzed points
    Zclust <- sortS$ix[Zclust]  #reassign sorted cluster membership
    ### OUTPUT RESULTS #### Output Z ##
    Gdecode <- decodeLOH(G, symmetric = convergeParams$symmetric)
    Gcalls <- Gdecode$G
    CN <- Gdecode$CN
    Zclust[Gcalls == "HET" & CN == 2] <- NA  #diploid HET positions do not have clusters
    Sout <- rep(NA, length(Zclust))  #output cluster frequencies
    Sout[Zclust > 0 & !is.na(Zclust)] <- s[Zclust[Zclust > 
        0 & !is.na(Zclust)]]
    Sout[Zclust == 0] <- 0
    clonalHeaderStr <- rep(NA, Z)
    for (j in 1:Z) {
        clonalHeaderStr[j] <- sprintf("pClust%d", j)
    outmat <- data.table(Chr = data$chr, Position = data$posn, RefCount = data$refOriginal)
    if (is.haplotypeData){
      outmat <- cbind(outmat, NRefCount = data$tumDepthOriginal - data$refOriginal, 
        Depth = data$tumDepthOriginal, 
        AllelicRatio = data$refOriginal/data$tumDepthOriginal,
        HaplotypeCount = data$ref, #data$haplotypeCount,
        HaplotypeDepth = data$tumDepth,
        #HaplotypeRatio =  sprintf("%0.2f", data$haplotypeCount/data$tumDepth), 
        HaplotypeRatio = data$HaplotypeRatio,
        PhaseSet = data$phaseSet)
      outmat <- cbind(outmat, Depth = data$tumDepth, 
        AllelicRatio = data$refOriginal/data$tumDepth)
    outmat <- cbind(outmat, LogRatio = log2(exp(data$logR)), 
        CopyNumber = CN, TITANstate = G, TITANcall = Gcalls, 
        ClonalCluster = Zclust, CellularPrevalence = 1 - Sout)
    ## filter results to remove empty clusters or set normal contamination to 1.0 if few events 
    if (correctResults){
      if (verbose)
        message("outputTitanResults: Correcting results...")
      corrResults <- removeEmptyClusters(data, convergeParams, outmat, 
                                         proportionThreshold = proportionThreshold, 
                                         proportionThresholdClonal = proportionThresholdClonal, 
                                         recomputeLogLik = recomputeLogLik, verbose = verbose)
      #rerunViterbi = rerunViterbi, subcloneProfiles = subcloneProfiles, is.haplotypeData = is.haplotypeData)
      outmatOriginal <- outmat
      outmat <- corrResults$results
      convergeParams <- corrResults$convergeParams
      corrResults <- NULL
   	if (subcloneProfiles & numClust <= 2){
   		#outmat <- as.data.frame(outmat, stringsAsFactors = FALSE)
    	outmat <- cbind(outmat, getSubcloneProfiles(outmat))
    	message("outputTitanResults: More than 2 clusters or subclone profiles not requested.")
    if (posteriorProbs) {
    	rhoG <- t(convergeParams$rhoG)
    	rhoZ <- t(convergeParams$rhoZ)
    	rhoZ <- rhoZ[, sortS$ix, drop = FALSE]
   		colnames(rhoZ) <- clonalHeaderStr
        outmat <- cbind(outmat, format(round(rhoZ, 4), 
        nsmall = 4, scientific = FALSE), format(round(rhoG, 4), 
        nsmall = 4, scientific = FALSE))
    if (!is.null(filename)) {
        message("titan: Writing results to ", filename)
        write.table(format(outmat, digits = 2, scientific = FALSE), file = filename, col.names = TRUE, 
            row.names = FALSE, quote = FALSE, sep = "\t")
    if (correctResults){
      corrmat <- outmat
      outmat <- outmatOriginal
      corrmat <- NULL
    return(list(results = outmat, corrResults = corrmat, convergeParams = convergeParams))

outputModelParameters <- function(convergeParams, results, filename, 
		S_Dbw.scale = 1, S_Dbw.method = "Tong", S_Dbw.useCorrectedCN = TRUE) {
    message("titan: Saving parameters to ", filename)
    Z <- dim(convergeParams$s)[1]
    i <- dim(convergeParams$s)[2]  #iteration of training to use (last iteration)
    sortS <- sort(convergeParams$s[, i], decreasing = FALSE, 
        index.return = TRUE)
    s <- sortS$x
    fc <- file(filename, "w+")
    norm_str <- paste0("Normal contamination estimate:\t", signif(convergeParams$n[i], digits = 4))
    write.table(norm_str, file = fc, col.names = FALSE, 
        row.names = FALSE, quote = FALSE, sep = "", append = TRUE)
    ploid_str <- paste0("Average tumour ploidy estimate:\t", signif(convergeParams$phi[i], digits = 4))
    write.table(ploid_str, file = fc, col.names = FALSE, 
        row.names = FALSE, quote = FALSE, sep = "", append = TRUE)
    s_str <- signif(1 - s, digits = 4)
    s_str <- gsub(" ", "", s_str)
    outStr <- paste0("Clonal cluster cellular prevalence Z=", Z , ":\t", paste(s_str, collapse = " "))
    write.table(outStr, file = fc, col.names = FALSE, 
        row.names = FALSE, quote = FALSE, sep = "", append = TRUE)
    if ("HaplotypeRatio" %in% names(results)){
    	ratioColName <- "HaplotypeRatio"
    	ratioColName <- "AllelicRatio"
    for (j in 1:Z) {
        musR_str <- signif(convergeParams$muR[, j, i], digits = 4)
        musR_str <- gsub(" ", "", musR_str)
        outStr <- paste0(ratioColName, " ", convergeParams$genotypeParams$alleleEmissionModel, 
                         " means for clonal cluster Z=", j, ":\t", paste(musR_str, collapse = " "))
        write.table(outStr, file = fc, col.names = FALSE, 
            row.names = FALSE, quote = FALSE, sep = "", 
            append = TRUE)
        musC_str <- signif(log2(exp(convergeParams$muC[, j, i])), digits = 4)
        musC_str <- gsub(" ", "", musC_str)
        outStr <- paste0("logRatio Gaussian means for clonal cluster Z=", j, ":\t",paste(musC_str, collapse = " "))
        write.table(outStr, file = fc, col.names = FALSE, 
            row.names = FALSE, quote = FALSE, sep = "", 
            append = TRUE)
    if (convergeParams$genotypeParams$alleleEmissionModel == "Gaussian"){
        varR_str <- signif(convergeParams$varR[, i], digits = 4)
        varR_str <- gsub(" ", "", varR_str)
        outStr <- paste0(ratioColName, " Gaussian variance:\t", paste(varR_str, collapse = " "))
        write.table(outStr, file = fc, col.names = FALSE, 
          row.names = FALSE, quote = FALSE, sep = "", append = TRUE)
    var_str <- signif(convergeParams$var[, i], digits = 4)
    var_str <- gsub(" ", "", var_str)
    outStr <- paste0("logRatio Gaussian variance:\t", paste(var_str, collapse = " "))
    write.table(outStr, file = fc, col.names = FALSE, 
        row.names = FALSE, quote = FALSE, sep = "", append = TRUE)
    iter_str <- paste0("Number of iterations:\t", length(convergeParams$phi))
    write.table(iter_str, file = fc, col.names = FALSE, 
        row.names = FALSE, quote = FALSE, sep = "", 
        append = TRUE)
    loglik_str <- signif(convergeParams$loglik[i], digits = 6)
    loglik_str <- gsub(" ", "", loglik_str)
    outStr <- paste0("Log likelihood:\t", paste(loglik_str, collapse = " "))
    write.table(outStr, file = fc, col.names = FALSE, 
        row.names = FALSE, quote = FALSE, sep = "", 
        append = TRUE)
    # compute SDbw_index
    sdbw.LR <- computeSDbwIndex(results, centroid.method = "median", 
    					data.type = "LogRatio", use.corrected.cn = S_Dbw.useCorrectedCN, 
    					S_Dbw.method = S_Dbw.method,
    					symmetric = convergeParams$symmetric)
    sdbw.AR <- computeSDbwIndex(results, centroid.method = "median", 
    					data.type = ratioColName, use.corrected.cn = S_Dbw.useCorrectedCN,
    					S_Dbw.method = S_Dbw.method,
    					symmetric = convergeParams$symmetric)
    ## element-wise addition -> returns list
    ## add the values for allelicRatio and logRatio
    sdbw <- mapply('+', sdbw.LR, sdbw.AR, SIMPLIFY = FALSE)        
    ## print out combined S_Dbw ##
	printSDbw(sdbw.LR, fc, S_Dbw.scale, "LogRatio")
    printSDbw(sdbw.AR, fc, S_Dbw.scale, ratioColName)
    printSDbw(sdbw, fc, S_Dbw.scale, "Both")
    return(list(dens.bw = sdbw$dens.bw, scat = sdbw$scat, 
    			S_Dbw = S_Dbw.scale * sdbw$dens.bw + sdbw$scat))

outputTitanSegments <- function(results, id, convergeParams, filename = NULL, igvfilename = NULL){
  # get all possible states in this set of results
  #stateTable <- unique(results[, c("TITANstate", "TITANcall")])
  #rownames(stateTable) <- stateTable[, 1]
  rleResults <- t(sapply(unique(results$Chr), function(x){
  	ind <- results$Chr == x
    r <- rle(results$TITANstate[ind])
	rleLengths <- unlist(rleResults[, "lengths"])
	rleValues <- unlist(rleResults[, "values"])
	numSegs <- length(rleLengths)
  # convert allelic ratio to symmetric ratios #
  results$AllelicRatio <- pmax(results$AllelicRatio, 1-results$AllelicRatio)
	if (!is.null(results$HaplotypeRatio)){
	  results$HaplotypeRatio <- pmax(results$HaplotypeRatio, 1-results$HaplotypeRatio)
  segs <- data.table(Sample = character(), Chromosome = character(), Start_Position.bp. = integer(), 
                     End_Position.bp. = integer(), Length.snp. = integer(), Median_Ratio = numeric())
  # add HaplotypeRatio column if also present in results object
  if (!is.null(results$HaplotypeRatio)){
  	segs <- cbind(segs, Median_HaplotypeRatio = numeric())
  segs <- cbind(segs, Median_logR = numeric(), TITAN_state = integer(),
                     TITAN_call = character(), Copy_Number = integer(), MinorCN = integer(), MajorCN = integer(),
                     Clonal_Cluster = integer(), Cellular_Prevalence = numeric())[1:numSegs]
	segs[, Sample := id]
	#colNames <- c("Chr", "Position", "TITANstate", "AllelicRatio", "LogRatio")
	prevInd <- 0
	for (j in 1:numSegs){
		start <- prevInd + 1
		end <- prevInd + rleLengths[j]
		segDF <- results[start:end, ]
		prevInd <- end
		numR <- nrow(segDF)
		segs[j, "Chromosome"] <- as.character(segDF[1, "Chr"])
		segs[j, "Start_Position.bp."] <- segDF[1, "Position"]
		segs[j, "TITAN_state"] <- rleValues[j]
		segs[j, "TITAN_call"] <- segDF[1, "TITANcall"]#stateTable[as.character(rleValues[j]), 2]
		segs[j, "Copy_Number"] <- segDF[1, "CopyNumber"]
		segs[j, "Median_Ratio"] <- round(median(segDF$AllelicRatio, na.rm = TRUE), digits = 6)
		segs[j, "Median_logR"] <- round(median(segDF$LogRatio, na.rm = TRUE), digits = 6)
		segs[j, "MinorCN"] <- getMajorMinorCN(rleValues[j], convergeParams$symmetric)$majorCN
		segs[j, "MajorCN"] <- getMajorMinorCN(rleValues[j], convergeParams$symmetric)$minorCN
		segs[j, "Clonal_Cluster"] <- segDF[1, "ClonalCluster"]
		segs[j, "Cellular_Prevalence"] <- segDF[1, "CellularPrevalence"]
		if (!is.null(segDF$HaplotypeRatio)){
		  segs[j, "Median_HaplotypeRatio"] <- round(median(segDF$HaplotypeRatio, na.rm = TRUE), digits = 6)
		if (segDF[1, "Chr"] == segDF[numR, "Chr"]){
			segs[j, "End_Position.bp."] <- segDF[numR, "Position"]
			segs[j, "Length.snp."] <- numR
		}else{ # segDF contains 2 different chromosomes
  if (!is.null(filename)){
		# write out detailed segment file #
  	#write.table(segs, file = filename, col.names = TRUE, row.names = FALSE, quote = FALSE, sep = "\t")
    fwrite(segs, file = filename, sep = "\t")  
  # write out IGV seg file #
  if (!is.null(igvfilename)){
  	igv <- segs[, c("Sample", "Chromosome", "Start_Position.bp.", 
  								"End_Position.bp.", "Length.snp.", "Median_logR")]
  	colnames(igv) <- c("sample", "chr", "start", "end", "num.snps", "median.logR")
  	#write.table(igv, file = igvfilename, col.names = TRUE, row.names = FALSE, quote = FALSE, sep = "\t")
    fwrite(igv, file = igvfilename, sep = "\t")  

## merge segments based on same values in given column
mergeSegsByCol <- function(segs, colToMerge = "Copy_Number", centromeres = NULL){
	rleResults <- t(sapply(unique(segs$Chr), function(x){
		ind <- segs$Chr == x
		r <- rle(segs[ind, get(colToMerge)])
	rleLengths <- unlist(rleResults[, "lengths"])
	rleValues <- unlist(rleResults[, "values"])
	numSegs <- length(rleLengths)
	newSegs <- NULL
	prevInd <- 0
	for (j in 1:numSegs){
		start <- prevInd + 1
		end <- prevInd + rleLengths[j]
		segDF <- segs[start:end, ]
		prevInd <- end
		numR <- nrow(segDF)
		newSegs <- rbind(newSegs, segDF[1,])
		newSegs[j, (colToMerge) := rleValues[j]]
		newSegs[j, Median_Ratio := round(median(segDF$Median_Ratio, na.rm = TRUE), digits = 6)]
		newSegs[j, Median_logR := round(median(segDF$Median_logR, na.rm = TRUE), digits = 6)]
		if (segDF[1, "Chromosome"] == segDF[numR, "Chromosome"]){
			newSegs[j, End := segDF[numR, End]]
			newSegs[j, Length.snp. := sum(segDF$Length.snp.)]
		}else{ # segDF contains 2 different chromosomes
	if (!is.null(centromeres)){
		message("Removing centromeres from segments.")
		newSegs <- removeCentromereSegs(newSegs, centromeres)

## Recompute integer CN for high-level amplifications ##
## compute logR-corrected copy number ##
correctIntegerCN <- function(cn, segs, purity, ploidy, maxCNtoCorrect.autosomes = NULL, 
		maxCNtoCorrect.X = NULL, correctHOMD = TRUE, minPurityToCorrect = 0.2, gender = "male", chrs = c(1:22, "X")){
	names <- c("HOMD","HETD","NEUT","GAIN","AMP","HLAMP", rep("HLAMP", 1000))
	names.chrX <- c("HETD","NEUT","GAIN","AMP","HLAMP", rep("HLAMP", 1000))
	cn.tmp <- copy(cn)
	segs.tmp <- copy(segs)
  if (is.null(cn.tmp[["Start"]])){
    cn.tmp[, c("Start", "End") := list(Position, Position)]
  if (is.null(segs.tmp[["Start"]])){
    segs.tmp[, c("start", "end") := list(Start_Position.bp., End_Position.bp.)]
  cn.gr <- as(cn.tmp, "GRanges")
  segs.gr <- as(segs.tmp, "GRanges")
  rm(cn.tmp, segs.tmp)

	## determine if Median_HaplotypeRatio (segs) and HaplotypeRatio (cn) columns exists (i.e. 10X analysis)
	segs.allelicRatioColName <- "Median_Ratio"
	if ("Median_HaplotypeRatio" %in% names(segs)){
		segs.allelicRatioColName <- "Median_HaplotypeRatio"
	cn.allelicRatioColName <- "AllelicRatio"
	if ("HaplotypeRatio" %in% names(cn)){
		cn.allelicRatioColName <- "HaplotypeRatio"
	## set up chromosome style
	autosomeStr <- grep("X|Y", chrs, value=TRUE, invert=TRUE)
	chrXStr <- grep("X", chrs, value=TRUE)
	if (is.null(maxCNtoCorrect.autosomes)){
		maxCNtoCorrect.autosomes <- segs[Chromosome %in% autosomeStr, max(Copy_Number, na.rm=TRUE)]
	if (is.null(maxCNtoCorrect.X) & gender == "female" & length(chrXStr) > 0){
		maxCNtoCorrect.X <- segs[Chromosome == chrXStr, max(Copy_Number, na.rm=TRUE)]
	## correct log ratio and compute corrected CN
	segs[Chromosome %in% chrs, logR_Copy_Number := logRbasedCN(Median_logR, purity, ploidy, Cellular_Prevalence, cn=2)]
  segs[Chromosome %in% chrs, Corrected_logR := log2(logR_Copy_Number / ploidy)]
	cn[Chr %in% chrs, logR_Copy_Number := logRbasedCN(LogRatio, purity, ploidy, CellularPrevalence, cn=2)]
  cn[Chr %in% chrs, Corrected_logR := log2(logR_Copy_Number / ploidy)]
	## correct allelic ratio and compute corrected major/minor CN (exclude chrX for males since no allelic CN)
	segs[Chromosome %in% chrs, Corrected_Ratio := allelicRatioBasedCN(get(segs.allelicRatioColName), logR_Copy_Number, purity, Cellular_Prevalence, rn=0.5, cn=2)]
	cn[Chr %in% chrs, Corrected_Ratio := allelicRatioBasedCN(get(cn.allelicRatioColName), logR_Copy_Number, purity, CellularPrevalence, rn=0.5, cn=2)]
	if (gender == "male" & length(chrXStr) > 0){ ## analyze chrX separately
		segs[Chromosome == chrXStr, logR_Copy_Number := logRbasedCN(Median_logR, purity, ploidy, Cellular_Prevalence, cn=1)]
    segs[Chromosome == chrXStr, Corrected_logR := log2(logR_Copy_Number / (ploidy / 2))]
		cn[Chr == chrXStr, logR_Copy_Number := logRbasedCN(LogRatio, purity, ploidy, CellularPrevalence, cn=1)]
    cn[Chr == chrXStr, Corrected_logR := log2(logR_Copy_Number / (ploidy / 2))]
		segs[Chromosome == chrXStr, Corrected_Ratio := NA]
		cn[Chr == chrXStr, Corrected_Ratio := NA]

	####### assign copy number to use - Corrected_Copy_Number #######
	# 1. initialize to same TITAN calls for autosomes - no change in copy number at this point
	segs[, Corrected_Copy_Number := as.integer(Copy_Number)]
	segs[, Corrected_Call := TITAN_call]
	segs[, Corrected_MajorCN := as.integer(MajorCN)]
	segs[, Corrected_MinorCN := as.integer(MinorCN)]
	cn[, Corrected_Copy_Number := as.integer(CopyNumber)]
	cn[, Corrected_Call := TITANcall]

	if (purity >= minPurityToCorrect){
		# 2. TITAN calls adjusted for >= maxCNtoCorrect.autosomes copies - HLAMP e.g. 8 max copies)
		ind.seg.maxCN <- segs[Chromosome %in% chrs & Copy_Number >= maxCNtoCorrect.autosomes, which = TRUE]
		segs[ind.seg.maxCN, Corrected_Copy_Number := as.integer(round(logR_Copy_Number))]
    ind.cn.maxCN <- cn[Chr %in% chrs & CopyNumber >= maxCNtoCorrect.autosomes, which = TRUE]
		cn[ind.cn.maxCN, Corrected_Copy_Number := as.integer(round(logR_Copy_Number))]

		# 3. TITAN calls adjust for HOMD
    ind.seg.homd <- NULL
    ind.cn.homd <- NULL
		if (correctHOMD){
			ind.seg.homd <- segs[Chromosome %in% chrs & Copy_Number == 0, which = TRUE]
			segs[ind.seg.homd, Corrected_Copy_Number := as.integer(round(logR_Copy_Number))]
      ind.cn.homd <- cn[Chr %in% chrs & CopyNumber == 0, which = TRUE]
			cn[ind.cn.homd, Corrected_Copy_Number := as.integer(round(logR_Copy_Number))]
		# 4. Add corrected calls for bins with CopyNumber = NA (ie. not included in TITAN analysis)
    ind.cn.naBins <- cn[Chr %in% chrs & is.na(CopyNumber), which = TRUE]
		cn[ind.cn.naBins, Corrected_Copy_Number := as.integer(round(logR_Copy_Number))]
		# 5. Adjust chrX copy number if purity is sufficiently high
		# males - all data points in chrX will be corrected
		# females - will already be corrected but will do special correction for copy number > maxCNtoCorrect.X (might be diff than maxCNtoCorrect.autosomes)
    ind.seg.chrX <- NULL
    ind.cn.chrX <- NULL
		if (gender == "male" & length(chrXStr) > 0){
      ind.seg.chrX <- segs[Chromosome == chrXStr, which = TRUE]
			segs[ind.seg.chrX, Corrected_Copy_Number := as.integer(round(logR_Copy_Number))]
      ind.cn.chrX <- cn[Chr == chrXStr, which = TRUE]
			cn[ind.cn.chrX, Corrected_Copy_Number := as.integer(round(logR_Copy_Number))]
		}else if (gender == "female"){  # already handled in Step 2
      # ind.seg.chrX <- segs[Chromosome == chrXStr & Copy_Number >= maxCNtoCorrect.X, which = TRUE]
			# segs[ind.seg.chrX, Corrected_Copy_Number := as.integer(round(logR_Copy_Number))]
      # ind.cn.chrX <- cn[Chr == chrXStr & CopyNumber >= maxCNtoCorrect.X, which = TRUE]
			# cn[Cind.cn.chrX, Corrected_Copy_Number := as.integer(round(logR_Copy_Number))]

    # 6. Adjust copy number for inconsistent logR and copy number prediction (e.g. opposite copy number direction)
    # mostly affects outliers, which are short or single point segments
    # since chrX for males have all data corrected, it will by default not be included in this anyway
    # chrX for females are treated as regular diploid chromosomes here
    ind.seg.oppCNA <- segs[((round(logR_Copy_Number) < ploidy & Corrected_Copy_Number > ploidy) | 
                            (round(logR_Copy_Number) > ploidy & Corrected_Copy_Number < ploidy)) &
                           (abs(round(logR_Copy_Number) - Corrected_Copy_Number) > 2), which = TRUE]
    # ind.seg.oppCNA1 <- segs[((round(logR_Copy_Number / ploidy) <= 1 & Corrected_Copy_Number / ploidy > 1) | 
    #                         (round(logR_Copy_Number / ploidy) >= 1 & Corrected_Copy_Number / ploidy < 1)) &
    #                        (abs(round(logR_Copy_Number) - Corrected_Copy_Number) > 2), which = TRUE]
    segs[ind.seg.oppCNA, Corrected_Copy_Number := as.integer(round(logR_Copy_Number))]
    # correct bins overlapping adjusted segs
    if (length(ind.seg.oppCNA) > 0){    
      hits <- findOverlaps(query = cn.gr, subject = segs.gr[ind.seg.oppCNA])
      cn[queryHits(hits), Corrected_Copy_Number := segs[ind.seg.oppCNA][subjectHits(hits), Corrected_Copy_Number]]
    ind.seg <- unique(c(ind.seg.maxCN, ind.seg.homd, ind.seg.chrX, ind.seg.oppCNA))
    # assign copy number for Corrected_MajorCN, Corrected_MinorCN (for corrected segments only)
    segs[ind.seg, Corrected_MajorCN := as.integer(round(Corrected_Ratio * Corrected_Copy_Number))]
    segs[ind.seg, Corrected_MinorCN := as.integer(round((1 - Corrected_Ratio) * Corrected_Copy_Number))]
	## assign copy number call (string) based on Corrected_Copy_Number
	## for autosomes
  # Corrected_Copy_Number, Corrected_logR
	segs[, Corrected_Call := names[Corrected_Copy_Number + 1]]
  cn[, Corrected_Call := names[Corrected_Copy_Number + 1]]
	# for chrX
	if (gender == "male" & length(chrXStr) > 0){
		segs[Chromosome == chrXStr, Corrected_Call := names.chrX[Corrected_Copy_Number + 1]]
		cn[Chr == chrXStr, Corrected_Call := names.chrX[Corrected_Copy_Number + 1]]
	}else{ # female
		segs[Chromosome == chrXStr & Copy_Number >= maxCNtoCorrect.X, Corrected_Call := "HLAMP"]
		cn[Chr == chrXStr & CopyNumber >= maxCNtoCorrect.X, Corrected_Call := "HLAMP"]		

	return(list(cn = copy(cn), segs = copy(segs)))

## compute copy number using corrected log ratio ##
logRbasedCN <- function(x, purity, ploidyT, cellPrev=NA, cn = 2){
	if (length(cellPrev) == 1 && is.na(cellPrev)){
		cellPrev <- 1
	}else{ #if cellPrev is a vector
		cellPrev[is.na(cellPrev)] <- 1
	ct <- (2^x 
		* (cn * (1 - purity) + purity * ploidyT * (cn / 2)) 
		- (cn * (1 - purity)) 
		- (cn * purity * (1 - cellPrev))) 
	ct <- ct / (purity * cellPrev)
	ct <- sapply(ct, max, 1/2^6)

allelicRatioBasedCN <- function(x, ct, purity, cellPrev=NA, rn = 0.5, cn = 2){
	if (length(cellPrev) == 1 && is.na(cellPrev)){
		cellPrev <- 1
	}else{ #if cellPrev is a vector
		cellPrev[is.na(cellPrev)] <- 1
	totalAlleles <- ((1 - purity) * cn) + (purity * (1 - cellPrev)) * cn + (purity * cellPrev * ct)
	rt <- (x * totalAlleles - (((1 - purity) * rn) * cn + (purity * (1 - cellPrev) * rn * cn))) / (purity * cellPrev * ct)
	rt <- sapply(rt, min, 1)
	rt <- sapply(rt, max, 0)

getMajorMinorCN <- function(state, symmetric = TRUE){
	majorCN <- NA
	minorCN <- NA
	if (symmetric){
		if (state==0){
			majorCN = 0; minorCN = 0;
		}else if (state==1){
			majorCN = 0; minorCN = 1;
		}else if(state==2){
			majorCN = 0; minorCN = 2;
		}else if (state==3){
			majorCN = 1; minorCN = 1;
		}else if (state==4){
			majorCN = 0; minorCN = 3;
		}else if (state==5){
			majorCN = 1; minorCN = 2;
		}else if (state==6){
			majorCN = 0; minorCN = 4;
		}else if (state==7){
			majorCN = 1; minorCN = 3;
		}else if (state==8){
			majorCN = 2; minorCN = 2;
		}else if (state==9){
			majorCN = 0; minorCN = 5;
		}else if (state==10){
			majorCN = 1; minorCN = 4;
		}else if (state==11){
			majorCN = 2; minorCN = 3;
		}else if (state==12){
			majorCN = 0; minorCN = 6;
		}else if (state==13){
			majorCN = 1; minorCN = 5;
		}else if (state==14){
			majorCN = 2; minorCN = 4;
		}else if (state==15){
			majorCN = 3; minorCN = 3;
		}else if (state==16){
			majorCN = 0; minorCN = 7;
		}else if (state==17){
			majorCN = 1; minorCN = 6;
		}else if (state==18){
			majorCN = 2; minorCN = 5;
		}else if (state==19){
			majorCN = 3; minorCN = 4;
		}else if (state==20){
			majorCN = 0; minorCN = 8;
		}else if (state==21){
			majorCN = 1; minorCN = 7;
		}else if (state==22){
			majorCN = 2; minorCN = 6;
		}else if (state==23){
			majorCN = 3; minorCN = 5;
		}else if (state==24){
			majorCN = 4; minorCN = 4;
		#stop("symmetric=FALSE not yet supported.")	
	return(list(majorCN = majorCN, minorCN = minorCN))

printSDbw <- function(sdbw, fc, scale, data.type = ""){
	sdbw_str <- sprintf("S_Dbw dens.bw (%s):\t%0.4f ", data.type, sdbw$dens.bw)
    write.table(sdbw_str, file = fc, col.names = FALSE, 
        row.names = FALSE, quote = FALSE, sep = "", 
        append = TRUE)
    sdbw_str <- sprintf("S_Dbw scat (%s):\t%0.4f ", data.type, sdbw$scat)
    write.table(sdbw_str, file = fc, col.names = FALSE, 
        row.names = FALSE, quote = FALSE, sep = "", 
        append = TRUE)
    sdbw_str <- sprintf("S_Dbw validity index (%s):\t%0.4f ", 
        data.type, scale * sdbw$dens.bw + sdbw$scat)
    write.table(sdbw_str, file = fc, col.names = FALSE, 
        row.names = FALSE, quote = FALSE, sep = "", 
        append = TRUE)

## TODO: Add documentation
removeEmptyClusters <- function(data, convergeParams, results, proportionThreshold = 0.001, 
	proportionThresholdClonal = 0.05, recomputeLogLik = TRUE, verbose = TRUE){
  #rerunViterbi = TRUE, subcloneProfiles = FALSE, is.haplotypeData = FALSE){
	clust <- 1:nrow(convergeParams$s)
	names(clust) <- clust
	#newClust <- clust #original clusters
	for (cl in clust){
		ind <- which(results$ClonalCluster == cl)
		if (length(ind) / nrow(results) < proportionThreshold || 
				(length(ind) / nrow(results) < proportionThresholdClonal && cl == 1)){
			#newClust <- newClust[-which(names(newClust) == cl)]
			clust[cl] <- NA #assign cluster without sufficient data with NA
	k <- ncol(convergeParams$s)
	# sort the cellular prevalence since they are sorted in "results"
	convergeParams$s <- convergeParams$s[order(convergeParams$s[, k], decreasing = FALSE), , drop = FALSE]
	# if there is at least 1 cluster with sufficient data
	if (length(which(clust > 0)) > 0){
		#set new normal estimate as cluster 1 -> lowers purity from original estimate
	  clustToKeep <- which(!is.na(clust))
		purity <- (1 - convergeParams$s[clustToKeep[1], k]) * (1 -  convergeParams$n[k])
		convergeParams$n[k] <- 1 - purity
		#set new cellular prevalence using new clusters and renormalize to new cluster 1		
		convergeParams$s <- convergeParams$s[clustToKeep, , drop = FALSE]
		convergeParams$s[, k] <- 1 - (1 - convergeParams$s[, k]) / (1 - convergeParams$s[1, k])
		convergeParams$piZ <- convergeParams$piZ[clustToKeep, , drop = FALSE]
		convergeParams$rhoZ <- convergeParams$rhoZ[clustToKeep, , drop = FALSE]
		convergeParams$muC <- convergeParams$muC[, clustToKeep, , drop = FALSE]
		convergeParams$muR <- convergeParams$muR[, clustToKeep, , drop = FALSE]
		convergeParams$cellPrevParams <- lapply(convergeParams$cellPrevParams, "[", clustToKeep)
		#names(newClust) <- 1:length(newClust)
		clust[!is.na(clust)] <- 1:sum(!is.na(clust))

		#set new cellular prevalence and clonal cluster in results file	
		for (cl in 1:length(clust)){
			# assign data in removed cluster cl to next non-NA cluster
			if (is.na(clust[cl])){
			    # assign to the right (larger cluster number)
  				if (length(which(!is.na(clust) & names(clust) > cl)) > 0){
  					results[which(results$ClonalCluster == names(clust)[cl]), "CellularPrevalence"] <- 1 - convergeParams$s[clust[which(!is.na(clust) & names(clust) > cl)[1]], k]
  					results[which(results$ClonalCluster == names(clust)[cl]), "ClonalCluster"] <- clust[which(!is.na(clust) & names(clust) > cl)][1]
  				# assign to the left (smaller cluster number)
  				}else if (length(which(!is.na(clust) & names(clust) < cl)) > 0){
  					results[which(results$ClonalCluster == names(clust)[cl]), "CellularPrevalence"] <- 1 - convergeParams$s[clust[tail(which(!is.na(clust) & names(clust) < cl), 1)], k]
  					results[which(results$ClonalCluster == names(clust)[cl]), "ClonalCluster"] <- clust[tail(which(!is.na(clust) & names(clust) < cl), 1)]
			}else{ # update cluster and cellPrev info for kept clusters
				results[which(results$ClonalCluster == names(clust)[cl]), "CellularPrevalence"] <- 1 - convergeParams$s[clust[cl], k]
				results[which(results$ClonalCluster == names(clust)[cl]), "ClonalCluster"] <- clust[cl]		
	}else{ # no clusters with sufficient data
		# set params to only cluster with data or to default cluster01 if no cluster with data
		clustData <- which.max(table(results$ClonalCluster))
		if (length(clustData) >= 0){
			clustData <- 1
		#set normal contamination to 100%
		convergeParams$n[k] <- 1 - convergeParams$s[clustData, k]
		convergeParams$s <- convergeParams$s[clustData, , drop = FALSE]
		convergeParams$s[, k] <- 0.0
		convergeParams$cellPrevParams <- lapply(convergeParams$cellPrevParams, "[", clustData)
			# set all clusters to 1 and all cellular prevalence to 1.0; leave HET as NA
		results[which(results$TITANcall != "HET"), "CellularPrevalence"] <- 1
		results[which(results$TITANcall != "HET"), "ClonalCluster"] <- 1
	# rerun viterbi with new adjusted param settings
	#if (rerunViterbi){
	#  optimalPath <- viterbiClonalCN(data,convergeParams)
	#  newResults <- outputTitanResults(data,convergeParams,optimalPath,
	#                                filename=NULL,posteriorProbs=F,subcloneProfiles=subcloneProfiles,
	#                                correctResults=FALSE, proportionThreshold = 0, is.haplotypeData=is.haplotypeData,
	#                                proportionThresholdClonal = 0, rerunViterbi = FALSE, recomputeLogLik = FALSE)
	#  results <- newResults$results
	if (recomputeLogLik){
	  if (verbose)
  	  message("outputTitanResults: Recomputing log-likelihood.")
  	newParams <- convergeParams
  	iter <- length(newParams$n)
  	newNumClust <- nrow(newParams$s)
  	newParams$genotypeParams$var_0 <- newParams$var[, iter]
  	newParams$genotypeParams$varR_0 <- newParams$varR[, iter]
  	newParams$genotypeParams$piG_0 <- newParams$piG[, iter]
  	newParams$normalParams$n_0 <- newParams$n[iter]
  	newParams$ploidyParams$phi_0 <- newParams$phi[iter]
  	newParams$cellPrevParams$s_0 <- newParams$s[, iter]
  	newParams$cellPrevParams$piZ_0 <- newParams$piZ[1:newNumClust, iter]
  	p <- runEMclonalCN(data, newParams, maxiter=1, txnExpLen=convergeParams$txn_exp_len, 
  	                   txnZstrength=convergeParams$txn_z_strength, useOutlierState=FALSE, 
  	                   normalEstimateMethod="fixed", estimateS=FALSE,estimatePloidy=F, verbose=verbose)
    convergeParams$loglik[iter] <- tail(p$loglik, 1)
    convergeParams$muC[, , iter] <- p$muC[, , 2]
    convergeParams$muR[, , iter] <- p$muR[, , 2]
    convergeParams$rhoZ <- p$rhoZ
    convergeParams$rhoG <- p$rhoG
	return(list(convergeParams = convergeParams, results = results))

getSubcloneProfiles <- function(titanResults){
	if (is.character(titanResults)){
		titanResults <- read.delim(titanResults, header = TRUE, 
				stringsAsFactors = FALSE, sep = "\t")
	}else if (!is.data.frame(titanResults)){
		stop("getSubcloneProfiles: titanResults is not character or
	clonalClust <- titanResults$ClonalCluster
	clonalClust[is.na(clonalClust)] <- 0
	numClones <- as.numeric(max(clonalClust, na.rm = TRUE))
	if (is.na(numClones)){ numClones <- 0 }
	cellPrev <- unique(cbind(Cluster = titanResults$ClonalCluster, 
			Prevalence = titanResults$CellularPrevalence))
	if (numClones == 0 || is.infinite(numClones)){
		subc1 <- data.table(cbind(CopyNumber = as.numeric(titanResults$CopyNumber), 
				TITANcall = titanResults$TITANcall, Prevalence = "NA"))
	if (numClones == 1){
		subc1Prev <- cellPrev[which(cellPrev[, "Cluster"] == "1"), "Prevalence"]
		subc1 <- data.table(cbind(CopyNumber = as.numeric(titanResults$CopyNumber), 
				TITANcall = titanResults$TITANcall,
				Prevalence = as.numeric(subc1Prev)))
	if (numClones == 2){
		subc2Prev <- as.numeric(cellPrev[which(cellPrev[, "Cluster"] == "2"),
		subc1Prev <- as.numeric(cellPrev[which(cellPrev[, "Cluster"] == "1"),
		subc1Prev <- subc1Prev - subc2Prev
		subc2 <- data.table(CopyNumber = as.numeric(titanResults$CopyNumber), 
			TITANcall = titanResults$TITANcall, Prevalence = as.numeric(subc2Prev))
		#mode(subc2[, 1]) <- "numeric"; mode(subc2[, 3]) <- "numeric"
		subc1 <- copy(subc2)
		ind <- which(titanResults$ClonalCluster == 2)
		subc1[ind, CopyNumber := 2]
		subc1[ind, TITANcall := "HET"]
		subc1[, "Prevalence"] <- subc1Prev
	## Add subclone 1, 2 and 3 if they are defined
	outMat <- data.table(Subclone1 = subc1)
	if (exists("subc2")){
		outMat <- cbind(outMat, Subclone2 = subc2)
	#if (exists("subc3")){
	#	outMat <- cbind(outMat, Subclone3 = subc3, stringsAsFactors = FALSE)
gavinha/TitanCNA documentation built on April 22, 2021, 9:38 a.m.