MinimumDistance: A Package for De Novo CNV Detection in Case-Parent Trios

Documented in acf2

catFun2 <- function(rd.query, rd.subject, ...){
	##stopifnot(nrow(rd.query) == nrow(rd.subject)) ## must compare same list size
	ir.q <- IRanges(start(rd.query), end(rd.query))
	ir.s <- IRanges(start(rd.subject), end(rd.subject))
	mm <- findOverlaps(ir.q, ir.s, ...)
	query.index <- queryHits(mm)
	if(length(query.index) == 0){
		return(0)
	}
	subject.index <- subjectHits(mm)
	index <- which(chromosome(rd.query)[query.index] == chromosome(rd.subject)[subject.index] &
		       sampleNames(rd.query)[query.index] == sampleNames(rd.subject)[subject.index])
	if(length(index) > 0){
		query.index <- unique(query.index[index])
		p <- length(query.index)/nrow(rd.query)
		if(p > 1) {
      stop("Reached a place in catFun2 that we shouldn't have")
    }
	} else p <- 0
	return(p)
}

splitByDistance <- function(x, thr=100e3){
	d <- diff(x)
	if(all(d < thr)) return(rep(0, length(x)))
	f <- c(0, cumsum(d > thr))
	tab.f <- table(f)
	## combine regions if number of markers is very small
	while(any(tab.f < 1000) & length(x) > 1000){
		j <- which(tab.f < 1000)[[1]]
		factor.val <- as.integer(names(tab.f)[j])
		if(factor.val < max(f)){
			f[f==factor.val] <- factor.val+1
		} else {
			f[f==factor.val] <- factor.val-1
		}
		tab.f <- table(f)
	}
	return(f)
}

splitIndicesByLength2 <- function(x, MIN.LENGTH=1000, ...){
	f <- splitIndicesByLength(x, ...)
	l <- sapply(f, length)
	L <- length(l)
	l <- l[L]
	if(l < MIN.LENGTH & length(f) > 1){
		f[[L-1]] <- c(f[[L-1]], f[[L]])
		f <- f[-L]
	}
	return(f)
}



discAtTop <- function(ranges.query, ranges.subject, verbose=TRUE,...){
	ir.q <- IRanges(start(ranges.query), end(ranges.query))
	ir.s <- IRanges(start(ranges.subject), end(ranges.subject))
	mm <- findOverlaps(ir.q, ir.s,...)
	query.index <- queryHits(mm)
	subject.index <- subjectHits(mm)
	index <- which(chromosome(ranges.query)[query.index] == chromosome(ranges.subject)[subject.index] &
		       sampleNames(ranges.query)[query.index] == sampleNames(ranges.subject)[subject.index])
	query.index <- unique(query.index[index])
	##subject.index <- unique(subject.index[index])
	notOverlapping.index <- seq(length=nrow(ranges.query))[!seq(length=nrow(ranges.query)) %in% query.index]
	res <- ranges.query[notOverlapping.index, ]
	return(res)
}

#' @importFrom utils setTxtProgressBar txtProgressBar
concAtTop <- function(ranges.query, ranges.subject, list.size, verbose=TRUE, ...){
	p <- rep(NA, length(list.size))
	pAny1 <- rep(NA, length(list.size))
	pAny2 <- rep(NA, length(list.size))
	if(verbose) {
		message("Calculating the proportion of ranges in common for the first ", max(list.size), " ranges")
		pb <- txtProgressBar(min=0, max=length(p), style=3)
	}
	for(i in seq_along(list.size)){
		if(verbose) setTxtProgressBar(pb, i)
		L <- list.size[i]
		p[i] <- catFun2(ranges.query[seq(length=L), ], ranges.subject[seq(length=L), ], ...)
		pAny1[i] <- catFun2(ranges.query[seq(length=L), ], ranges.subject, ...)
		pAny2[i] <- catFun2(ranges.subject[seq(length=L), ], ranges.query, ...)
	}
	if(verbose) close(pb)
	res <- list(p=p, pAny.queryList=pAny1, pAny.subjectList=pAny2)
	names(res) <- c("cat", "topMD", "topPenn")
	return(res)
}


correspondingCall <- function(ranges.query, ranges.subject, subject.method){
	overlap <- findOverlaps(ranges.query, ranges.subject)
	subj.index <- subjectHits(overlap)
	quer.index <- queryHits(overlap)
	## what are the chromosomes for the subject hits
	index <- which(chromosome(ranges.query)[quer.index] == chromosome(ranges.subject)[subj.index] &
		       sampleNames(ranges.query)[quer.index] == sampleNames(ranges.subject)[subj.index])
	if(length(index) == 0) return("no overlap")
	matching.index <- subj.index[index]
	res <- ranges.subject[matching.index, ]
	if(!missing(subject.method)) res$method <- subject.method
	return(res)
}

isDeletion <- function(x){
	if(length(grep("-", x)) > 0){
		tmp <- strsplit(x, "_")[[1]]
		state <- substr(tmp, 3, 3)
		state <- ifelse(any(state < 3), TRUE, FALSE)
	} else{
		state <- as.integer(substr(x, 3, 3))
		state <- ifelse(state < 3, TRUE, FALSE)
	}
	state
}

overlapsCentromere <- function(myranges){
	##require(SNPchip)
	data(chromosomeAnnotation, package="SNPchip", envir=environment())
	chromosomeAnnotation <- get("chromosomeAnnotation")
	centromere.ranges <- RangedData(IRanges(chromosomeAnnotation[, "centromereStart"],
						chromosomeAnnotation[, "centromereEnd"]),
					chrom=rownames(chromosomeAnnotation))
	myranges.bak <- myranges
	chrom <- unique(myranges$chrom)
	overlaps.centromere <- rep(NA, nrow(myranges))
	for(CHR in chrom){
		centromere.ir <- IRanges(start(centromere.ranges)[CHR],
					 end(centromere.ranges)[CHR])
		ix <- which(myranges$chrom==CHR)
		ir <- IRanges(start(myranges)[ix],
			      end(myranges)[ix])
		overlaps.centromere[ix] <- countOverlaps(ir, centromere.ir) > 0
	}
	return(overlaps.centromere)
}

#' @importFrom utils read.delim
getRefGene <- function(filename="~/Data/Downloads/hg18_refGene.txt"){
	colClasses <- c("integer", "character", "character", "factor",
			"integer", "integer",
			"integer", "integer",
			"integer",
			"character", "character",
			"integer", rep("character", 4))
	tmp <- read.delim(filename, header=FALSE,
			  colClasses=colClasses)
	tmp <- tmp[, c(2:6, 13)]
	colnames(tmp) <- c("NM", "chrom", "strand", "start", "end", "gene_name")
	chrom <- sapply(tmp$chrom, function(x) strsplit(x, "chr")[[1]][2])
	tmp$chrom <- chromosome2integer(chrom)
	tmp <- tmp[!is.na(tmp$chrom), ]
	refGene <- RangedData(IRanges(tmp$start, tmp$end),
			      chrom=tmp$chrom,
			      strand=tmp$strand,
			      NM=tmp$NM,
			      gene_name=tmp$gene_name)
	refGene
}

combineRanges <- function(deletion.ranges, amp.ranges){
	state <- deletion.ranges$state
	hemizygous.states <- c("332", "432", "342")
	homozygous.states <- c("331", "321", "231", "431", "341", "441", "221")
	deletion.ranges <- deletion.ranges[state %in% hemizygous.states | state %in% homozygous.states, ]
	amp.ranges <- amp.ranges[, colnames(amp.ranges) %in% colnames(deletion.ranges)]
	index <- match(colnames(amp.ranges), colnames(deletion.ranges))
	deletion.ranges2 <- deletion.ranges[,  index]
	stopifnot(all.equal(colnames(deletion.ranges2), colnames(amp.ranges)))
	ranges.all <- RangedData(IRanges(c(start(deletion.ranges2), start(amp.ranges)),
					 c(end(deletion.ranges2), end(amp.ranges))),
				 id=c(deletion.ranges2$id, amp.ranges$id),
				 chrom=c(deletion.ranges2$chrom, amp.ranges$chrom),
				 num.mark=c(deletion.ranges2$num.mark, amp.ranges$num.mark),
				 seg.mean=c(deletion.ranges2$seg.mean, amp.ranges$seg.mean),
				 state=c(deletion.ranges2$state, amp.ranges$state))
	ranges.all
}


#' @importFrom utils setTxtProgressBar txtProgressBar
pruneByFactor <- function(range.object, f, verbose=FALSE){
	rd <- list()
	id.chr <- paste(sampleNames(range.object), chromosome(range.object), sep="_")
	ff <- unique(id.chr)
	##for(i in seq_along(unique(range.object$id))){
	if(verbose){
		message("Pruning ", length(ff), " files.")
		pb <- txtProgressBar(min=0, max=length(ff), style=3)
	}
	## do for each element in a GRangesList
	for(i in seq_along(ff)){
		if(verbose) setTxtProgressBar(pb, i)
		##id <- unique(range.object$id)[i]
		##(index <- which(range.object$id == id))
		index <- which(id.chr==ff[i])
		rd[[i]] <- combineRangesByFactor(range.object[index, ], f=f[index])
	}
	if(verbose) close(pb)
	##ok <- tryCatch(stack(RangedDataList(rd)), error=function(e) FALSE)
	##rd <- GRangesList(rd)
	rd <- unlist(GRangesList(rd))
	##rd <- stackRangedDataList(rd)
##	names(rd) <- unique(sampleNames(range.object))
##	if(!is(ok, "RangedData")) {
##		message("trouble combining RangedData objects.  Returning list")
##		ok <- rd
##	} else {
##		j <- match("sample",colnames(ok))
##		if(length(j) == 1)
##			ok <- ok[, -j]
##	}
	return(rd)
}

combineRangesByFactor <- function(range.object, f){
	##range.object <- range.object[!is.na(state(range.object)), ]
	i <- which(is.na(f))
	j <- 1
	while(length(i) > 0){
		if(is.na(f[1])){
			f[1] <- f[2]
		} else {
			f[is.na(f)] <- f[i-1]
		}
		i <- which(is.na(f))
		j <- j+1
		if(j > 10) stop("too many na's in f")
	}
	##stopifnot(all(!is.na(f)))
	ff <- cumsum(c(0, abs(diff(as.integer(as.factor(f))))))
	if(!any(duplicated(ff))) {
		return(range.object)
	}
	for(i in seq_along(unique(ff))){
		x <- unique(ff)[i]
		if(sum(ff==x) == 1) next()
		index <- which(ff==x)
		min.index <- min(index)
		max.index <- max(index)
		end(range.object)[index] <- max(end(range.object)[index])
		emd <- elementMetadata(range.object)
		emd$lik.state[index] <- sum(emd$lik.state[index], na.rm=TRUE)
		emd$seg.mean[index] <- sum((numberProbes(range.object)[index]*emd$seg.mean[index]), na.rm=TRUE)/sum(numberProbes(range.object)[index], na.rm=TRUE)
		emd$numberProbes[index] <- sum(numberProbes(range.object)[index], na.rm=TRUE)
		emd$lik.norm[index] <- sum(emd$lik.norm[index], na.rm=TRUE)
		elementMetadata(range.object) <- emd
		j <- seq_len(length(range.object))
		index <- index[-1]
		j <- j[-index]
		if(length(j) == 0){
			stop()
		}
		ff <- ff[j]
		range.object <- range.object[j, ]
	}
	return(range.object)
}

madVsCoverage <- function(lambda=0.1, MIN=1, MAX=4, coverage=3:100){
	p <- lambda*exp(-lambda*coverage) ## 0 - 0.04 (Pr (X=x)
	b <- 1/(MAX - MIN)
	a <- MIN * b
	numberMads <- ((p-min(p))/(max(p)-min(p)) + a)/b
	list(x=coverage, y=numberMads)
}

thresholdSegMeans <- function(ranges.object, ylim){
	ranges.object$seg.mean[ranges.object$seg.mean < ylim[1]] <- ylim[1]
	ranges.object$seg.mean[ranges.object$seg.mean > ylim[2]] <- ylim[2]
	ranges.object
}

combine.data.frames <- function(dist.df, penn.df){
	if(is.null(dist.df) & is.null(penn.df)) return(NULL)
	if(is.null(dist.df)) dist.df <- penn.df[integer(0), ]
	if(is.null(penn.df)) penn.df <- dist.df[integer(0), ]
	combined.df <- rbind(dist.df, penn.df)
	combined.df <- combined.df[order(combined.df$chr), ]
	return(combined.df)
}



##offspring.hemizygousPenn <- function() c("332", "432", "342", "442")
offspring.hemizygousPenn <- function(){
  tmp <- expand.grid(c(1,3,5,6), c(1,3,5,6), 1)
  dels <- paste(tmp$Var1, tmp$Var2, tmp$Var3, sep="")
  dels <- dels[-1]
}
##offspring.hemizygous <- function() c("221", "321", "231", "441", "341", "431")

offspring.hemizygous <- function() {
	tmp <- expand.grid(c(0,2,3,4), c(0,2,3,4), 1)
	dels <- paste(tmp$Var1, tmp$Var2, tmp$Var3, sep="")
	dels <- dels[-1]
	dels
}
offspring.homozygous <- function(){
	tmp <- expand.grid(c(1,2,3,4), c(1,2,3,4), 0)
	dels <- paste(tmp$Var1, tmp$Var2, tmp$Var3, sep="")
	dels
}
deletionStates <- function(){
	st1 <- offspring.hemizygous()
	st2 <- offspring.homozygous()
	as.integer(c(st1,st2))
}
duplicationStates <- function(){
	tmp <- expand.grid(c(0,1,2,4), c(0,1,2,4), 3)
	sdups <- paste(tmp$Var1, tmp$Var2, tmp$Var3, sep="")
	sdups <- sdups[-1]
	tmp <- expand.grid(c(0,1,2,3), c(0,1,2,3), 4)
	ddups <- paste(tmp$Var1, tmp$Var2, tmp$Var3, sep="")
	ddups <- ddups[-1]
	c(sdups, ddups)
}
duplicationStatesPenn <- function() {
	tmp <- expand.grid(c(1,2,3,6), c(1,2,3,6), 5)
	sdups <- paste(tmp$Var1, tmp$Var2, tmp$Var3, sep="")
	sdups <- sdups[-1]
	tmp <- expand.grid(c(1,2,3,6), c(1,2,3,6), 5)
	ddups <- paste(tmp$Var1, tmp$Var2, tmp$Var3, sep="")
	ddups <- ddups[-1]
	c(sdups, ddups)
}



calculateChangeSd <- function(coverage=1:500, lambda=0.05, a=0.2, b=0.025)
	a + lambda*exp(-lambda*coverage)/b

pruneMD <- function(genomdat,
		  range.object,
		  physical.pos,
		  ##trimmed.SD, ##
		  lambda=0.05,
		  MIN.CHANGE=0.1,
		  SCALE.EXP=0.02,
		  MIN.COVERAGE=3,
		  weighted=FALSE,
		  weights=NULL) {
	if(length(unique(range.object$id)) != 1) stop("multiple ids in range.object")
	if(length(unique(chromosome(range.object))) > 1) stop("Multiple chromosomes in range.object")
	##change.SD <- trimmed.SD*change.SD
	genomdat <- as.numeric(genomdat)/100
	coverage <- range.object$num.mark
	trimmed.SD <- max(mad(genomdat, na.rm=TRUE), .15)
	##trimmed.SD <- unique(range.object$mindist.mad)
	##stopifnot(length(trimmed.SD)==1)
	coverage <- coverage[-length(coverage)]
	if(FALSE){
		numberSds <- calculateChangeSd(coverage=3:100, lambda=lambda, a=MIN.CHANGE, b=SCALE.EXP)
		y <- MIN.CHANGE+lambda*exp(-lambda*(3:100))/SCALE.EXP
		plot(3:100, y, ylab="number of MADs", xlab="coverage")
	}
	##thrSD <- calculateChangeSd(coverage, lambda, trimmed.SD, change.SD)
		##change.SD <- change.SD  ##Thresholds for right cutpoint
	##cpt.loc <- cumsum(lseg) ## indices of the cutpoints same as coverage.
	cpt.loc <- range.object$end.index
	sdundo <- TRUE
	while(sdundo) {
		k <- length(cpt.loc)
		if (k>1) {
			coverage <- diff(c(0, cpt.loc))
			coverage <- coverage[-length(coverage)]
			##
			##  number of sds as a function of coverage
			##  -- segments with high coverage have small y
			##
			requiredNumberSd <- calculateChangeSd(coverage=coverage, lambda=lambda, a=MIN.CHANGE, b=SCALE.EXP)
			##
			## number of standard deviations
			segments0 <- cbind(c(1,1+cpt.loc[-k]),cpt.loc)
			## median copy number for each segment
			segmed <- apply(segments0, 1, function(i,x) {median(x[i[1]:i[2]], na.rm=TRUE)}, genomdat)
			## absolute copy number difference of adjacent segments
 			##adsegmed <- abs(diff(segmed))
			adsegmed <- abs(diff(segmed))
			## number of standard deviations of observed shift
			empiricalNumberSd <- adsegmed/trimmed.SD
			if(any(empiricalNumberSd < requiredNumberSd | coverage < MIN.COVERAGE)){
				## drop order: coverage then distance
				##i <- which(adsegmed < thrSD | coverage < MIN.COVERAGE)
				i <- which(empiricalNumberSd < requiredNumberSd | coverage < MIN.COVERAGE)
				if(length(i) > 1){
					i <- i[order(coverage[i], adsegmed[i], decreasing=FALSE)[1]]
				}
				cpt.loc <- cpt.loc[-i]
			} else {
				sdundo <- FALSE
			}
		} else {
			sdundo <- FALSE
		}
	}
	lseg <- diff(c(0,cpt.loc)) ## back to coverage
	## update segment means
	segmeans <- 0*lseg
	ll <- uu <- 0
	for(i in 1:length(lseg)) {
		uu <- uu + lseg[i]
		if (weighted) {
			segmeans[i] <- sum(genomdat[(ll+1):uu]*weights[(ll+1):uu])/sum(weights[(ll+1):uu])
		} else {
			segmeans[i] <- mean(genomdat[(ll+1):uu], na.rm=TRUE)
		}
		ll <- uu
	}
	segments0 <- cbind(c(1,1+cpt.loc[-k]),cpt.loc)
	starts <- physical.pos[segments0[, 1]]
	ends <- physical.pos[segments0[, 2]]
	if(length(ends) < length(starts)) ends <- c(ends, max(end(range.object)))
	id <- unique(range.object$id)
	res <- RangedDataCBS(IRanges(starts, ends),
			     sampleId=id,
			     chrom=unique(range.object$chrom),
			     coverage=lseg,
			     seg.mean=segmeans,
			     mindist.mad=mad(genomdat, na.rm=TRUE))
	return(res)
}

## pdf of standard normal
## the msm package has this stuff, but it seemed slow...
#' @importFrom stats dnorm
phi <- function(x, mu, sigma) dnorm(x, mu, sigma)

## cdf of standard normal
#' @importFrom stats pnorm
Phi <- function(x, mu, sigma) pnorm(x, mu, sigma)

## pdf of truncated normal on support [0, 1]
tnorm <- function(x, mean, sd, lower=0, upper=1){
	res <- phi(x, mean, sd)/(Phi(upper, mean, sd)-Phi(lower, mean, sd))
	ind <- which(x < lower | x > upper)
	if(any(ind)){
		res[ind] <- 0
	}
	res
}
TN <- tnorm


addRangeIndex <- function(id, trioSet, ranges){
	ranges <- ranges[sampleNames(ranges) %in% id, ]
	stopifnot(nrow(ranges) > 0)
	stopifnot(id %in% sampleNames(trioSet))
	ir1 <- IRanges(start=position(trioSet), end=position(trioSet))
	ir2 <- IRanges(start(ranges), end(ranges))
	mm <- findOverlaps(ir1, ir2)
	## there should be no query that is in more than 1 subject
	qhits <- queryHits(mm)
	shits <- subjectHits(mm)
	right.chromosome <- chromosome(ranges)[shits] == chromosome(trioSet)[qhits]
	qhits <- qhits[right.chromosome]
	shits <- shits[right.chromosome]
	range.index <- rep(NA, nrow(trioSet))
	##fData(object)$range.index <- NA
	##fData(object)$range.index[qhits] <- shits
	range.index[qhits] <- shits
	if(sum(table(range.index)) != nrow(trioSet)){
		msg <- "# of markers in the ranges not equal to total number of markers"
    stop(msg)
	}
	return(range.index)
}

pHet <- function(i, id, trioSet){
	j <- match(id, sampleNames(trioSet))
	stopifnot(length(j) > 0)
	is.ff <- is(baf(trioSet), "ff")
	if(is.ff){
		open(baf(trioSet))
	}
	b <- baf(trioSet)[i, j, 3]
	if(is.ff){
		close(baf(trioSet))
	}
	mean(b > 0.4 & b < 0.6, na.rm=TRUE)
}
meanLogR <- function(i, id, trioSet){
	j <- match(id, sampleNames(trioSet))
	stopifnot(length(j) > 0)
	is.ff <- is(lrr(trioSet), "ff")
	if(is.ff){
		open(lrr(trioSet))
	}
	r <- lrr(trioSet)[i, j, 3]
	if(is.ff){
		close(lrr(trioSet))
	}
	mean(r, na.rm=TRUE)
}


LikSet <- function(trioSet, pedigreeData, id, CHR, ranges){
	is.ff <- is(lrr(trioSet), "ff")
	if(missing(id)) id <- sampleNames(trioSet)[1]
	if(is.ff){
		open(baf(trioSet))
		open(lrr(trioSet))
	}
	i <- match(id, sampleNames(trioSet))
	stopifnot(length(i) == 1)
	## the trios are in the same order as the sampleNames of the trioSetList object
	## validity methods for the class ensure that this is correct
	indNames <- as.character(trios(pedigreeData)[i,])
	##offspring.id <- id[id %in% offspringNames(trioSet)]
	##i <- match(offspring.id, sampleNames(trioSet))
	##i <- match(id[["O"]], offspringNames(trioSet))
	mads <- mad(trioSet)[i, ]
	##S <- length(states)
	loglik <- array(NA, dim=c(2, nrow(trioSet), 3, 5))
	dimnames(loglik) <- list(c("logR", "baf"),
				 featureNames(trioSet),
				 indNames,
				 0:4)
	object <- new("LikSet",
		      logR=as.matrix(lrr(trioSet)[ ,i,]),
		      BAF=as.matrix(baf(trioSet)[ ,i , ]),
		      featureData=featureData(trioSet),
		      loglik=loglik)
	object$MAD <- mads
	fData(object)$range.index <- NA
	##tmp=findOverlaps(featureData(object), ranges)
	fo <- findOverlaps(ranges, featureData(object))
	i1 <- subjectHits(fo)
	i2 <- queryHits(fo)
	fData(object)$range.index[i1] <- i2
	if(any(is.na(range.index(object)))){
		msg <- paste("Segmentation was run on chunks of the data for which the markers are less than 75kb apart.\n",
			     "When log R ratios are missing at the boundaries of the partioned data, not all markers \n",
			     "will be covered by a segment.\n")
		if(is.null(.GlobalEnv[[".warningMessageAlreadyDisplayed"]])){
			warning(msg)
			.GlobalEnv[[".warningMessageAlreadyDisplayed"]] <- TRUE
		}
		object <- object[!is.na(range.index(object)), ]
	}
	## NA values occur if there are ranges that do not overlap the
	## marker locations in object.
##	ir1 <- IRanges(start=position(object), end=position(object))
##	ir2 <- IRanges(start(ranges), end(ranges))
##	mm <- findOverlaps(ir1, ir2)
##	subject.index <- subjectHits(mm)
##	## there should be no query that is in more than 1 subject
##	qhits <- queryHits(mm)
##	shits <- subjectHits(mm)
##	fData(object)$range.index <- NA
##	fData(object)$range.index[qhits] <- shits
	if(is.ff){
		close(baf(trioSet))
		close(lrr(trioSet))
	}
	return(object)
}

fillInMissing <- function(rangeIndex){
	if(!any(is.na(rangeIndex))) return(rangeIndex)
	if(sum(is.na(rangeIndex)) > 1000 & length(unique(rangeIndex[!is.na(rangeIndex)])) == 1){
		## for calculating the posterior for a single range
		## essentially, ignoring what comes before and what comes after
		ii <- range(which(!is.na(rangeIndex)))
		if(ii[1] > 1){
			rangeIndex[1:(ii[1]-1)] <- 0
		}
		if(ii[2] < length(rangeIndex)){
			rangeIndex[(ii[2]+1):length(rangeIndex)] <- 2
		}
	} else {
		ii <- which(is.na(rangeIndex))
		if(max(ii) < length(rangeIndex)){
			rangeIndex[ii] <- rangeIndex[ii+1]
		} else{
			rangeIndex[max(ii)] <- rangeIndex[max(ii)-1]
			if(any(ii != max(ii))){
				iii <- ii[ii != max(ii)]
				rangeIndex[iii] <- rangeIndex[iii+1]
			}
		}
	}
	return(rangeIndex)
}

#' @importFrom matrixStats rowMads
rowMAD <- function(x, y, ...){
	rowMads(x, ...)
}

dups.penn <- expand.grid(c(1,2,3,5,6), c(1,2,3,5,6), c(5,6))

trioStates <- function(states=0:4){
	trio.states <- as.matrix(expand.grid(states, states, states))
	index <- which(trio.states[, 1] == 0 & trio.states[, 2] == 0 & trio.states[, 3] > 0)
	##trio.states <- trio.states+1
	colnames(trio.states) <- c("F", "M", "O")
	## 125 possible
	## remove 00 > 0 as possibilities
	trio.states <- trio.states[-index, ]
}

trioStateNames <- function(trio.states){
  if(missing(trio.states)) trio.states <- trioStates()
  paste(paste(trio.states[,1], trio.states[,2], sep=""), trio.states[,3], sep="")
}


transitionProbability <- function(nstates=5, epsilon=1-0.999){
	off.diag <- epsilon/(nstates-1)
	tpm <- matrix(off.diag, nstates, nstates)
	diag(tpm) <- 1-epsilon
	tpm
}



readTable1 <- function(states=0:4, a=0.0009){
	S <- length(states)
	tmp <- array(NA, dim=rep(S,3))
	dimnames(tmp) <- list(paste("F", states, sep=""),
			      paste("M", states, sep=""),
			      paste("O", states, sep=""))
	tmp["F0", "M0", ] <- c(1, rep(0,4))
	tmp["F0", "M1", ] <- c(0.5, 0.5, 0, 0, 0)
	tmp["F0", "M2", ] <- c(0.5*a, 1-a, 0.5*a, 0, 0)
	tmp["F0", "M3", ] <- c(0.5*a, 0.5*(1-a), 0.5*(1-a), 0.5*a, 0)
	tmp["F0", "M4", ] <- c(0, 0.25, 0.5, 0.25, 0)

	tmp["F1", "M1", ] <- c(0.25, 0.5, 0.25, 0, 0)
	tmp["F1", "M2", ] <- c(0.25, 0.5-0.25*a, 0.5-0.25*a, 0.25*a, 0)
	tmp["F1", "M3", ] <- c(0.25*a, 0.25, 0.5*(1-a), 0.25, 0.25*a)
	tmp["F1", "M4", ] <- c(0, 0.125, 0.375, 0.375, 0.125)

	tmp["F2", "M2", ] <- c(0.25*a^2, a*(1-a), (1-a)^2 + 0.5*a^2, a*(1-a), 0.25*a^2)
	tmp["F2", "M3", ] <- c(0.25*a^2, 0.75*a*(1-a), 0.5*(1-a)^2+0.25*a*(1-a)+0.25*a^2,
			       0.5*(1-a)^2+0.25*a*(1-a)+0.25*a^2,
			       0.75*a*(1-a)+0.25*a^2)
	tmp["F2", "M4", ] <- c(0,0.125*a, 0.25, 0.5-0.25*a,0.25+0.125*a)

	tmp["F3", "M3", ] <- c(0.25^2, 0.5*a*(1-a), 0.5*a*(1-a)+0.25*(1-a)^2, 0.5*(1-a)^2+0.5^2,
			       0.25*(1-a)^2 + a*(1-a)+0.25^2)
	tmp["F3", "M4", ] <- c(0, 0.125*a, 0.125*(1+a), 0.125*(3-2*a), 0.5)

	tmp["F4", "M4", ] <- c(0, 0, 0.0625, 0.25, 0.6875)
	return(tmp)
}

lookUpTable1 <- function(state, table1){
  index <- state+1
  p <- table1[index[1], index[2], index[3]]
  if(is.na(p)){
    p <- table1[index[2], index[1], index[3]]
  }
  p
}

vectorizeTable1 <- function(table1, stateMatrix){
  setNames(apply(stateMatrix, 1, lookUpTable1, table1=table1), rownames(stateMatrix))
}

lookUpTable3 <- function(table3, state.prev, state.curr){
  f1 <- state.prev[1]
  m1 <- state.prev[2]
  o1 <- state.prev[3]
  f2 <- state.curr[1]
  m2 <- state.curr[2]
  ## Each element in the result is for a specific offspring state
  result <- table3[f1, f2, m1, m2, o1, ]
}

stateIndex <- function(param, state) state(param)[state, ] + 1L

##bothMendelian <- function(param, prev_index, state_index, transitionNM){
##  ## Pr(cTS, pNM, cNM | pTS) =
##  ## Pr(cO | cTS[-O], pTS, cNM, pNM) * Pr(cTS[-O] | pTS, cNM=0, pNM=0) * Pr(cNM=0 | pNM=0) * Pr(pNM=0)
##  ## A = term1 * term2 * term3 * term4
##  term1 <- lookUpTable3(table3(param), prev_index, state.curr=state_index)
##  ## Pr(cTS[-O] | pTS, cNM=0, pNM=0) = Pr(cF, cM | pF, pM)
##  ##                                 = Pr(cF | pF) * Pr(cM | pM) *
##  term2 <- tau.f * tau.m
##  term3 <- transitionNM
##  term4 <- 1-probNM
##  term1*term2*term3*term4
##}
##
##bothNonMendelian <- function(param, prev_index, state_index, transitionNM){
##  ## Pr(cTS, pNM, cNM | pTS) =
##  ## Pr(cO | cTS[-O], pTS, cNM, pNM) * Pr(cTS[-O] | pTS, cNM=1, pNM=1) * Pr(cNM=1 | pNM=1) * Pr(pNM=1)
##  ## = Pr(cO | pO, cNM, pNM) * Pr(cF | ...) * Pr(cM | ...) * Pr(cNM=1|pNM=1) * Pr(pNM=1)
##  ## = term1 * term2 * term3 * term4 * term5
##  term1 <- 1/5
##  term2 <- tau.f
##  term3 <- tau.m
##  term4 <- transitionNM
##  term5 <- probNM
####  tau <- transitionProb(param)
####  probNM <- prNonMendelian(param)
####  tau.o <- tau[prev_index[3], state_index[3]]
##  ## check below
##  ##  p.11 <- 1/5*tau.o * transitionNM * probNM
##  term1*term2*term3*term4*term5
##}

##currentNonMendelian <- function(param, prev_index, state_index, transitionNM){
##  ## Pr(cTS, pNM, cNM | pTS) =
##  ## Pr(cO | cTS[-O], pTS, cNM, pNM) * Pr(cTS[-O] | pTS, cNM=1, pNM=1) * Pr(cNM=1 | pNM=1) * Pr(pNM=1)
##  ## = Pr(cO | pO, cNM, pNM) * Pr(cF | ...) * Pr(cM | ...) * Pr(cNM=1|pNM=1) * Pr(pNM=1)
##  prev_name <- rownames(state(param))[prev_index]
##  1/5 * table1(param)[prev_name] * transitionNM * probNM
##}

computeB <- function(param, current_state, previous_state){
  ## B = Pr(S_iO, S_{i-1, O} | S_iF, S_iM, S_{i-1,F}, S_{i-1,M}, NM)
  B <- setNames(vector("list", 4), c("NM=0,0","NM=0,1", "NM=1,0", "NM=1,1"))
  ## For each pair of mendelian indicators, return a vector of length <#CN STATES>
  ##    - element i of the vector is the probability for S_i0 = CN[i], CN = [0,1,2,3,4]
  nms <- paste0("CN (offspr):", 0:4)
  ## NM = 0, 0
  current_index <- stateIndex(param, current_state)
  previous_index <- stateIndex(param, previous_state)
  B[[1]] <- lookUpTable3(table3(param), previous_index, state.curr=current_index[1:2])
  B[[1]] <- setNames(B[[1]], nms)
  ## NM = 0, 1 denotes [previous NM, current Mendelian]
  ## Pr(S_iO, S_{i-1, O} | S_iF, S_iM, S_{i-1,F}, S_{i-1,M}, NM)
  ##  assume offspring states are independent
  ## = Pr(S_iO | S_iF, S_iM, NM_i)*Pr(S_{i-1,O} | S_{i-1,F}, S_{i-1,M} NM_{i-1})
  ## = Pr(S_iO | NM_i=1) * Tabled probability
  ## = 1/5 * tabled probability
  prev_name <- paste0(previous_state, collapse="")
  B[[2]] <- rep(1/5, 5) * table1(param)[prev_name]  ## previous state is Mendelian
  B[[2]] <- setNames(B[[2]], nms)
  ## NM = 1, 0  Similar to above, but current state is Mendelian
  states <- paste0(substr(current_state, 1, 2), 0:4)
  B[[3]] <- 1/5*table1(param)[states]
  B[[3]] <- setNames(B[[3]], nms)
  ## NM = 1, 1  Both non-Mendelian
  ## Pr(S_iO, S_{i-1, O} | S_iF, S_iM, S_{i-1,F}, S_{i-1,M}, NM)
  ## = Pr(S_iO | S_{i-1}, NM_i=1) Pr(S_{i-1} | NM_{i-1}=1)
  ## = transition probability between offspring states * 1/5
  tau <- transitionProb(param)
  tau.o <- tau[previous_index[3], current_index[3]]
  B[[4]] <- rep(1/5, 5) * tau.o
  B[[4]] <- setNames(B[[4]], nms)
  ## Assign Pr=0 to states that can not occur (instead of NA)
  B <- lapply(B, function(x) ifelse(is.na(x), 0, x))
  B
}

computeC <- function(param, current_state, previous_state){
  ## C = Pr(S_iF, S_iM, S_{i-1,F}, S_{i-1,M} | NM)
  ##   = Pr(S_iF | S_{i-1,F}) * Pr(S_iM | S_{i-1,M})
  ##     transition prob mom * transition prob father
  previous_state <- paste0(previous_state, collapse="")
  current_index <- stateIndex(param, current_state)[c("F", "M")]
  previous_index <- stateIndex(param, previous_state)[c("F", "M")]
  tau <- transitionProb(param)
  tau.m <- tau[previous_index["M"], current_index["M"]]
  tau.f <- tau[previous_index["F"], current_index["F"]]
  tau.m*tau.f
}

## Function for computing posterior probabilities
  ##
  ## Let S_i = [S_iO, S_iF, S_iO]
  ##
  ## The posterior probability if the trio state is given by
  ##
  ## Pr(S_i | S_{i-1}, data) \propto Pr(data | S_i, S_{i-1}) Pr(S_i | S_{i-1})/ Pr(S_i)
  ##                             =   Likelihood x "Prior Model"
  ##
  ## Tedious calculations with the Prior Model show
  ##
  ## Pr(S_i | S_{i-1}) = sum_{NM_i} sum_{NM_{i-1}}  Pr(S_i, NM_i, NM_{i-1} | S_{i-1})
  ##                   = sum_{NM_i} sum_{NM_{i-1}}  Pr(S_i | S_{i-1}, NM_i, NM_{i-1}) * Pr(NM_i, NM_{i-1} | S_{i-1})
  ## Denote sum_{NM_i} sum_{NM_{i-1}} by SUM, let Pr(NM_i, NM_{i-1} | S_{i-1}) = A, and denote [NM_i, NM_{i-1}] by NM. Then,
  ##                   = SUM Pr(S_i, S_{i-1} | NM) / Pr(S_{i-1} | NM) * A
  ##                   = SUM (B*C)/D * A, where
  ## B = Pr(S_iO, S_{i-1, O} | S_iF, S_iM, S_{i-1,F}, S_{i-1,M}, NM)
  ## C = Pr(S_iF, S_iM, S_{i-1,F}, S_{i-1,M} | NM)
  ## D = Pr(S_{i-1} | NM)
  ## A = Pr(NM_i , NM_{i-1} | S_{i-1})
  ##
  ## Rewriting D, we have
  ## Pr(S_{i-1} | NM ) = sum_{S_{i}} Pr(S_i, S_{i-1} | NM)
  ##                   = sum_{S_{i}} Pr(S_iO, S_{i-1,O} | NM, S_iF, S_iM, S_{i-1,F}, S_{i-1,M}) * C
  ##                   = sum_{S_{i}} B
  ##
  ## =>
  ##
  ## Pr(S_i | S_{i-1}) = SUM[ (B*C*A)/(sum_{S_{i}} B) ]
  ## since term C does not depend on the non-Mendelian indicators, we have
  ##                   = C * SUM [ (B*A)/(sum_{S_{i} B)]
  ##
  ## For offspring state index i, we have
  ##
  ## Pr(S_i | S_{i-1}) = C * ( B[["NM=0,0"]][i]/sum(B[["NM=0,0"]]) * A[["NM=0,0"]]  + B[["NM=0,1"]][i]/sum(B[["NM=0,1"]]) * A[["NM=0,1"]] ...)
  ##
  ## The numerator sums over the non-mendelian indicator and involves 4 terms
  ## The denominator sums over all possible offspring states at segment i and therefore involves 5 terms
  ##
posterior <- function(state,
                      param,
		      state.prev,
		      log.lik){
  if(is.null(state.prev)) {
    result <- setNames(loglikInitial(param, LLT=log.lik, state), state)
    return(result)
  }
  A <- transitionNM(param) ## precomputed, length 4
  B <- computeB(param, state, state.prev) ## length 4 list
  C <- computeC(param, state, state.prev)
  i <- stateIndex(param, state)
  ## sum over the mendelian indicators for the offspring state indexed by i
  ## sum over all possible offspring states
  totalB <- sapply(B, sum)
  prior <- mapply(function(B, A, totalB) (B*A)/totalB, B=B, A=A, totalB=totalB)
  ## Set 0/0 to 0
  prior[is.nan(prior)] <- 1e-5
  prior <- sum(prior[i["O"], ])
  loglik <- sum(diag(log.lik[, i]))
  posterior <- loglik + log(prior)
  if(all(is.na(posterior))) {
    stop("all NAs in posterior")
  }
  posterior
}

##xypanelMD <- function(x, y,
##		      id,
##		      gt,
##		      is.snp,
##		      range,
##		      cex,
##		      col.hom="grey20",
##		      fill.hom="lightblue",
##		      col.het="grey20" ,
##		      fill.het="salmon",
##		      col.np="grey20",
##		      fill.np="grey60",
##		      show.state=TRUE,
##		      lrr.segs,
##		      md.segs,
##		      ..., subscripts){
##	xypanel(x, y,
##                gt,
##                is.snp,
##                range,
##                col.hom=col.hom,
##                fill.hom=fill.hom,
##                col.het=col.het,
##                fill.het=fill.het,
##                col.np=col.np,
##                fill.np=fill.np,
##                show.state, cex=cex, ..., subscripts=subscripts)
##	id <- unique(id[subscripts])
##	range <- range[1, ]
##	CHR <- chromosome(range)
##	##stopifnot(length(CHR)==1)
##	if(id != "min dist" & !missing(lrr.segs)){
##		cbs.sub <- lrr.segs[sampleNames(lrr.segs)==as.character(id) & chromosome(lrr.segs)==CHR, ]
##		segments <- TRUE && nrow(cbs.sub) > 0
##	} else segments <- FALSE
##	if(!missing(md.segs) & id == "min dist"){
##		cbs.sub <- md.segs[sampleNames(md.segs) %in% sampleNames(range), ]
##		cbs.sub <- cbs.sub[chromosome(cbs.sub) == chromosome(range), ]
##		##cbs.sub$seg.mean <- -1*cbs.sub$seg.mean
##		segments.md <- TRUE && nrow(cbs.sub) > 0
##	} else segments.md <- FALSE
##	if(segments | segments.md){
##		##if(missing(ylimit)) ylimit <- range(y, na.rm=TRUE) ##else ylim <- ylimit
##		ylimit <- current.panel.limits()$ylim
##		if(nrow(cbs.sub) > 0){
##			index <- which(cbs.sub$seg.mean < ylimit[1])
##			if(length(index) > 0)
##				cbs.sub$seg.mean[index] <- ylimit[1] + 0.2
##			index <- which(cbs.sub$seg.mean > ylimit[2])
##			if(length(index) > 0)
##				cbs.sub$seg.mean[index] <- ylimit[2] - 0.2
##			panel.segments(x0=start(cbs.sub)/1e6, x1=end(cbs.sub)/1e6, y0=cbs.sub$seg.mean, y1=cbs.sub$seg.mean, lwd=2, col="black")#gp=gpar("lwd"=2))
##		}
##	}
##}
##
##xypanelMD2 <- function(x, y,
##		       id,
##		       gt,
##		       is.snp,
##		       range,
##		       show.state=TRUE,
##		       lrr.segs,
##		       md.segs,
##		       col,
##		       cex=1,
##		       cex.state=1,
##		       col.state="blue",
##		       ..., subscripts){
##	panel.grid(v=0, h=4, "grey", lty=2)
##	panel.xyplot(x, y, cex=cex, col=col[subscripts], ...)
##	##lpoints(x, y, col=col[subscripts], cex=cex, ...)
####	lpoints(x[!is.snp], y[!is.snp], col=col.np, cex=cex, ...)
##	id <- unique(id[subscripts])
##	range <- range[1, ]
##	CHR <- chromosome(range)
##	##stopifnot(length(CHR)==1)
##	if(id != "min dist" & !missing(lrr.segs)){
##		cbs.sub <- lrr.segs[sampleNames(lrr.segs)==as.character(id) & chromosome(lrr.segs)==CHR, ]
##		segments <- TRUE && nrow(cbs.sub) > 0
##	} else segments <- FALSE
##	if(!missing(md.segs) & id == "min dist"){
##		cbs.sub <- md.segs[sampleNames(md.segs) %in% sampleNames(range), ]
##		cbs.sub <- cbs.sub[chromosome(cbs.sub) == chromosome(range), ]
##		##cbs.sub$seg.mean <- -1*cbs.sub$seg.mean
##		segments.md <- TRUE && nrow(cbs.sub) > 0
##	} else segments.md <- FALSE
##	if(segments | segments.md){
##		##if(missing(ylimit)) ylimit <- range(y, na.rm=TRUE) ##else ylim <- ylimit
##		ylimit <- current.panel.limits()$ylim
##		if(nrow(cbs.sub) > 0){
##			index <- which(cbs.sub$seg.mean < ylimit[1])
##			if(length(index) > 0)
##				cbs.sub$seg.mean[index] <- ylimit[1] + 0.2
##			index <- which(cbs.sub$seg.mean > ylimit[2])
##			if(length(index) > 0)
##				cbs.sub$seg.mean[index] <- ylimit[2] - 0.2
##			panel.segments(x0=start(cbs.sub)/1e6, x1=end(cbs.sub)/1e6, y0=cbs.sub$seg.mean, y1=cbs.sub$seg.mean, lwd=2, col="black")#gp=gpar("lwd"=2))
##		}
##	}
##	j <- panel.number()
##	st <- start(range)[j]/1e6
##	lrect(xleft=st, xright=end(range)[j]/1e6,
##	      ybottom=-10, ytop=10, ...)
##	if(show.state){
##		## left justify the label to the start of the range
##		y.max <- current.panel.limits()$ylim[2]
##		ltext(st, y.max, labels=paste("state", state(range)[j]),
##		      adj=c(0,1), cex=cex.state, col=col.state)
##	}
##}

##narrow <- function(object, lrr.segs, thr=0.9,
##		   mad.minimumdistance, verbose=TRUE,
##		   fD, genome) .Defunct("The 'narrow' function is defunct in MinimumDistance. Use narrowRanges instead.")

#' @importFrom utils txtProgressBar
narrowRanges <- function(object,
                         lrr.segs,
                         thr=0.9,
                         mad.minimumdistance,
                         verbose=TRUE,
                         fD, genome){
  if(missing(fD)) stop("fD not specified. fD must be a list of GenomeAnnotatedDataFrames (if multiple chromosomes are in 'object'), or a single GenomeAnnotatedDataFrame (one chromosome represented in 'object')")
  if(!is(names(mad.minimumdistance), "character")) stop("mad.minimumdistance must be named")
  if(!missing(genome)) metadata(lrr.segs) <- list(genome=genome)
  ix <- match(sampleNames(object), names(mad.minimumdistance))
  object$mindist.mad <- mad.minimumdistance[ix]
  lrr.segs <- lrr.segs[sampleNames(lrr.segs) %in% sampleNames(object), ]
  if(length(unique(chromosome(object))) > 1){
    if(verbose)
      message("narrowing the ranges by chromosome")
    if(!is(fD, "list")) stop("when object contains multiple chromosomes, fD should be a list of GenomeAnnotatedDataFrames")
    chromsInFD <- paste("chr", sapply(fD, function(x) chromosome(x)[1]), sep="")
    indexList <- split(seq_len(length(object)), as.character(chromosome(object)))
    indexList2 <- split(seq_len(length(lrr.segs)), as.character(chromosome(lrr.segs)))
    if(!all.equal(names(indexList), names(indexList2))){
      stop("the chromosomes represented in the minimum distance genomic intervals (object) must be the same as the chromosomes represented in the offspring genomic intervals (lrr.segs)")
    }
    fD <- fD[match(names(indexList), as.character(chromsInFD))]
    if(length(fD) != length(indexList)){
      stop("The list of GenomeAnnotatedDataFrames (argument fD) must be the same length as the number of chromosomes represented in the minimum distange genomic intervals (object)")
    }
    if(verbose)  pb <- txtProgressBar(min=0, max=length(indexList), style=3)
    segList <- vector("list", length(indexList))
    pkgs <- neededPkgs()
    j <- k <- NULL
    segList <- foreach(j=indexList, k=indexList2, featureData=fD, .packages=pkgs) %dopar%{
      narrowRangeForChromosome(object[j, ],
                               lrr.segs[k, ],
                               thr=thr,
                               verbose=FALSE,
                               fD=featureData)
    }
    if(verbose) close(pb)
    segs <- unlist(GRangesList(segList))
  } else {
    if(is(fD, "list")) {
      chroms <- paste("chr", sapply(fD, function(x) chromosome(x)[1]), sep="")
      fD <- fD[[match(as.character(chromosome(object))[1], chroms)]]
      chr <- as.character(chromosome(object)[1])
      if(paste("chr", chromosome(fD)[1], sep="") != chr) stop("The supplied GenomeAnnotatedDataFrame (fD) does not have the same chromosome as object")
    }
    segs <- narrowRangeForChromosome(object, lrr.segs, thr, verbose, fD=fD)
  }
  metadata(segs) <- metadata(lrr.segs)
  return(segs)
}


narrowRangeForChromosome <- function(md.range, cbs.segs, thr=0.9, verbose=TRUE, fD){
  md.range <- md.range[order(md.range$sample, start(md.range))]
  mads <- pmax(md.range$mindist.mad, .1)
  abs.thr <- abs(md.range$seg.mean)/mads
  md.range2 <- md.range[abs.thr > thr]
  if(length(md.range2) < 1) return(md.range)

  cbs.segs <- cbs.segs[order(sampleNames(cbs.segs), start(cbs.segs))]
  o <- findOverlaps(md.range2, cbs.segs)
  j <- subjectHits(o)
  ## only consider the cbs segments that have an overlap
  if(!is.na(match("sample", colnames(cbs.segs)))) cbs.segs <- cbs.segs[-match("sample", colnames(cbs.segs))]
  offspring.segs <- cbs.segs[j]
  sns <- unique(sampleNames(md.range2))
  chr <- chromosome(md.range)[1]
  rdlist <- list()
  for(j in seq_along(sns)){
    md <- md.range2[md.range2$sample == sns[j]]
    of <- offspring.segs[offspring.segs$sample==sns[j]]
    md.mad <- md$mindist.mad
    mcols(md) <- mcols(md)[, match(colnames(mcols(of)), colnames(mcols(md)))]
    ## stack the ranges of the minimum distance segments and the offspring segments
    un <- unlist(GRangesList(list(md, of)))
    ## find the disjoint ranges
    disj <- disjoin(un)
    o <- findOverlaps(md, disj)
    ## which minimumdistance intervals are spanned by a disjoint interval
    r <- subjectHits(o)
    s <- queryHits(o)
    ## only keep the disjoint intervals for which a minimum distance segment is overlapping
    ##  (filters intervals that have a minimum distance of approx. zero)
    disj <- disj[r, ]
    disj$sample <- md$sample[s]
    disj$numberProbes <- 0L ## update later
    disj$seg.mean <- md$seg.mean[s]
    disj$mindist.mad <- md.mad[s]
    rdlist[[j]] <- disj
  }
  rd <- unlist(GRangesList(rdlist))
  metadata(rd) <- metadata(md.range)
  frange <- makeFeatureGRanges(fD, metadata(rd)[["genome"]])
  cnt <- countOverlaps(rd, frange)
  rd$numberProbes <- cnt
  rd <- rd[numberProbes(rd) > 0L]
  mrd <- md.range[abs.thr <= thr]
  mcols(mrd) <- mcols(mrd)[, match(colnames(mcols(rd)), colnames(mcols(mrd)))]
  rd2 <- c(rd, mrd)
  rd2 <- rd2[order(rd2$sample, start(rd2))]
  return(rd2)
}


##narrow <- function(object, lrr.segs, thr=0.9, mad.minimumdistance, verbose=TRUE){
##	if(!is(names(mad.minimumdistance), "character")) stop("mad.minimumdistance must be named")
##	##stopifnot(!is.null(names(mad.minimumdistance)))
##	ix <- match(sampleNames(object), names(mad.minimumdistance))
##	object$mindist.mad <- mad.minimumdistance[ix]
##	stopifnot("mindist.mad" %in% colnames(object))
##	lrr.segs <- lrr.segs[sampleNames(lrr.segs) %in% sampleNames(object), ]
##	if(length(unique(chromosome(object))) > 1){
##		if(verbose)
##			message("narrowing the ranges by chromosome")
##		indexList <- split(seq_len(nrow(object)), chromosome(object))
##		indexList2 <- split(seq_len(nrow(lrr.segs)), chromosome(lrr.segs))
##		stopifnot(all.equal(names(indexList), names(indexList2)))
##		if(verbose) {
##			pb <- txtProgressBar(min=0, max=length(indexList), style=3)
##		}
##		segList <- vector("list", length(indexList))
##		for(i in seq_along(indexList)){
##			if(verbose) setTxtProgressBar(pb, i)
##			j <- indexList[[i]]
##			k <- indexList2[[i]]
##			md.segs <- object[j, ]
##			lr.segs <- lrr.segs[k, ]
##			segList[[i]] <- narrowRangeForChromosome(md.segs, lr.segs, thr=thr, verbose=FALSE)
##			rm(md.segs, lr.segs); gc()
##		}
##		if(verbose) close(pb)
##		segs <- stack(RangedDataList(segList))
##		j <- match("sample", colnames(segs))
##		if(length(j) == 1) segs <- segs[, -j]
##	} else {
##		segs <- narrowRangeForChromosome(object, lrr.segs, thr, verbose)
##	}
##	rd.cbs <- RangedDataCBS(ranges=ranges(segs), values=values(segs))
##	return(rd.cbs)
##}
##
##
##narrowRangeForChromosome <- function(md.range, cbs.segs, thr=0.9, verbose=TRUE){
##	md.range <- md.range[order(sampleNames(md.range), start(md.range)), ]
##	cbs.segs <- cbs.segs[order(sampleNames(cbs.segs), start(cbs.segs)), ]
##	ir1 <- IRanges(start(md.range), end(md.range))
##	ir2 <- IRanges(start(cbs.segs), end(cbs.segs))
##	mm <- findOverlaps(ir1, ir2)
##	qhits <- queryHits(mm)
##	shits <- subjectHits(mm)
##	index <- which(sampleNames(md.range)[qhits] == sampleNames(cbs.segs)[shits])
##	if(length(index) > 0){
##		qhits <- qhits[index]
##		shits <- shits[index]
##	} else stop("no overlap")
##	##---------------------------------------------------------------------------
##	##
##	## only narrow the range if the minimum distance segment is
##	## bigger than some nominal value. Otherwise, we use the
##	## minimum distance range as is.
##	##
##	##
##	##---------------------------------------------------------------------------
##	##abs.thr <- abs(md.range$seg.mean)/md.range$mindist.mad > thr
##	mads <- pmax(md.range$mindist.mad, .1)
##	abs.thr <- abs(md.range$seg.mean)/mads > thr
##	## I1 is an indicator for whether to use the cbs start
##	deltaStart <- start(cbs.segs)[shits] > start(md.range)[qhits] & (start(cbs.segs)[shits] - start(md.range)[qhits] < 100e3)
##	deltaEnd <- end(cbs.segs)[shits] < end(md.range)[qhits]  & (end(md.range)[qhits] - end(cbs.segs)[shits] < 100e3)
##	I1 <- deltaStart & start(cbs.segs)[shits] >= start(md.range)[qhits] & start(cbs.segs)[shits] <= end(md.range)[qhits] & abs.thr[qhits]
##	## indicator of whether to use the cbs end
##	I2 <- deltaEnd & end(cbs.segs)[shits] <= end(md.range)[qhits] & end(cbs.segs)[shits] >= start(md.range)[qhits] & abs.thr[qhits]
##	st <- start(cbs.segs)[shits] * I1 + start(md.range)[qhits] * (1-I1)
##	en <- end(cbs.segs)[shits] * I2 + end(md.range)[qhits] * (1-I2)
##	st.index <- (cbs.segs$start.index[shits] * I1 + md.range$start.index[qhits]*(1-I1))
##	en.index <- (cbs.segs$end.index[shits] * I2 + md.range$end.index[qhits]*(1-I2))
##	## For each md.range range, there should only be one I1 that is TRUE
##	## If I1 and I2 are true, then a range is completely contained within the md.range segment
##	ids <- md.range$id[qhits]
##	##  |--------------|
##	## ---|--------|-----
##	## Becomes
##	##  |-|--------|---|
##	.i <- which(I1 & I2)
##	if(length(.i) > 0){
##		## new intervals
##		ir <- IRanges(st[.i], en[.i])
##		## remove intervals from ir1
##		## these intervals in ir1 must be bigger
##		ix <- subjectHits(findOverlaps(ir, ir1))
##		originalIntervals <- ir1[ix, ]
##		ids <- md.range$id[ix]
##		means <- md.range$seg.mean[ix]
##		chr <- chromosome(md.range)[ix]
##		mads <- md.range$mindist.mad[ix]
##		firstNew <- IRanges(start(originalIntervals), start(ir)-1)
##		endNew <- IRanges(end(ir)+1, end(originalIntervals))
##		ids <- rep(ids, each=3)
##		chr <- rep(chr, each=3)
##		means <- rep(means, each=3)
##		mads <- rep(mads, each=3)
##		res <- c(ir, firstNew, endNew)
##		## remove originalIntervals from the original set
##		ir1 <- ir1[-ix, ]
##		ids1 <- md.range$id[-ix]
##		chr1 <- chromosome(md.range)[-ix]
##		means1 <- md.range$seg.mean[-ix]
##		mads1 <- md.range$seg.mean[-ix]
##		irnew <- c(ir1, res)
##		ids2 <- c(ids1, ids)
##		chr2 <- c(chr1, chr)
##		mads2 <- c(mads1, mads)
##		means2 <- c(means1, means)
##		tmp <- RangedDataCBS(irnew,
##				     sampleId=ids2,
##				     chrom=chr2,
##				     seg.mean=means2,
##				     mindist.mad=mads2)
##	} else return(md.range)
##		##irnew <- irnew[order(start(irnew)), ]
####	index <- which(I1 & I2)-1
####	index <- index[index!=0]
####	index <- index[ids[index] == ids[index+1]]
#### 	ir3 <- IRanges(st[index], en[index])
####	ir4 <- IRanges(st[index+1], en[index+1])
####	##newRanges1 <- IRanges(st[index], st[index+1]-1)
####	##newRanges2 <- IRanges(st[index+1], en[index])
####	if(length(index) > 1){
####		originalEnd <- en[index]
####		originalStart <- st[index]
####		newEnd <- st[index+1]-1
####		newStart <- st[index+1]
####		en[index] <- newEnd
####		st <- c(st, newStart)
####		en <- c(en, originalEnd)
####		##en.index[index] <- st.index[index+1]-1
####	}
##	##split(I1, qhits)
##	##split(I2, qhits)
##	##stopifnot(sapply(split(st, qhits), function(x) all(diff(x) >= 0)))
####	nm <- apply(cbind(st.index, en.index), 1, function(x) length(x[1]:x[2]))
##	## keep segment means the same as the minimum distance
####	tmp <- RangedData(IRanges(st, en),
####			  ##id=sampleNames(md.range)[qhits],
####			  id=ids2,
####			  ##chrom=chromosome(md.range)[qhits],
####			  chrom=chr2,
####			  ##num.mark=nm,
####			  seg.mean=md.range$seg.mean[qhits],
####			  start.index=st.index,
####			  end.index=en.index,
####			  mindist.mad=md.range$mindist.mad[qhits])
##	##family=md.range$family[qhits])
##	##ranges.below.thr <- split(!abs.thr[qhits], qhits)
##	##ns <- sapply(ranges.below.thr, sum)
####	uid <- paste(tmp$id, start(tmp), tmp$chrom, sep="")
##	##duplicated(uid)
##	##stopifnot(!all(duplicated(uid)))
####	tmp <- tmp[!duplicated(uid), ]
##	## for each subject, the following must be true
####	index <- which(tmp$id[-nrow(tmp)] == tmp$id[-1])
##	##stopifnot(all(end(tmp)[index] < start(tmp)[index+1]))
##	res <- tmp[order(tmp$id, start(tmp)), ]
##	return(res)
##}

stackListByColIndex <- function(object, i, j){
	X <- vector("list", length(object))
	is.matrix <- is(object[[1]], "matrix") || is(object[[1]], "ff_matrix")
	if(is.matrix){
		for(k in seq_along(X)){
			X[[k]] <- object[[k]][, i, drop=FALSE]
		}
		X <- do.call("rbind", X)
	} else {
		is.array <- is(object[[1]], "array")
		stopifnot(length(j)==1)
		for(k in seq_along(X)){
			X[[k]] <- object[[k]][, i, j, drop=FALSE]
			dim(X[[k]]) <- c(nrow(X[[k]]), ncol(X[[k]])) ## drop 3rd dimension
		}
		X <- do.call("rbind", X)
	}
	return(X)
}


callDenovoSegments <- function(path="",
			       pedigreeData,
			       ext="",
			       featureData,
			       cdfname,
			       chromosome=1:22,
			       segmentParents,
			       mdThr=0.9,
			       prOutlierBAF=list(initial=1e-3, max=1e-1, maxROH=1e-3),
			       verbose=FALSE, genome=c("hg19", "hg18"), ...){
  pkgs <- c("VanillaICE", "oligoClasses", "matrixStats", "MinimumDistance")
  genome <- match.arg(genome)
  if(!is(pedigreeData, "Pedigree")) stop("pedigreeData must be an object of class Pedigree")
  filenames <- file.path(path, paste(originalNames(allNames(pedigreeData)), ext, sep=""))
  ##obj <- read.bsfiles(filenames=filenames, path="", ext="")
  headers <- names(read.bsfiles(filenames[1], nrows=0))
  select <- match(c("SNP Name", "Allele1 - AB", "Allele2 - AB",
                    "Log R Ratio", "B Allele Freq"), headers)
##  ##labels <- setNames(c("Allele1", "Allele2", "LRR", "BAF"), keep[-1])
##  classes <- c(rep("character", 3), rep("numeric", 2))
##  header_info <- VanillaICE:::headerInfo(filenames[1], skip=10, sep=",",
##                                         keep=keep, labels=labels,
##                                         classes=classes)
##  obj <- read_beadstudio(filenames=filenames)
  obj <- fread(filenames, select=select)
  if(missing(featureData)){
    trioSetList <- TrioSetList(lrr=integerMatrix(obj[, "lrr",], 100),
                               baf=integerMatrix(obj[, "baf",], 1000),
                               pedigreeData=pedigreeData,
                               chromosome=chromosome,
                               cdfname=cdfname,
                               genome=genome)
  } else {
    trioSetList <- TrioSetList(lrr=integerMatrix(obj[, "lrr",], 100),
                               baf=integerMatrix(obj[, "baf",], 1000),
                               pedigreeData=pedigreeData,
                               featureData=featureData,
                               chromosome=chromosome,
                               genome=genome)
  }
  isff <- is(lrr(trioSetList)[[1]], "ff")
  if(isff) pkgs <- c("ff", pkgs)
  md <- calculateMindist(lrr(trioSetList), verbose=verbose)
  mads.md <- mad2(md, byrow=FALSE)
  fns <- featureNames(trioSetList)
  md.segs <- segment2(object=md,
                      pos=position(trioSetList),
                      chrom=chromosome(trioSetList, as.list=TRUE),
                      verbose=verbose,
                      id=offspringNames(trioSetList),
                      featureNames=fns,
                      genome=genome,
                      ...)
  lrrs <- lrr(trioSetList)
  if(!segmentParents){
    ## when segmenting only the offspring,
    ## the trio names are the same as the sampleNames
    lrrs <- lapply(lrrs, function(x){
      dns <- dimnames(x)
      x <- x[, , 3, drop=FALSE]
      dim(x) <- c(nrow(x), ncol(x))
      dimnames(x) <- list(dns[[1]], dns[[2]])
      return(x)
    })
    id <- offspringNames(trioSetList)
  } else{
    id=trios(trioSetList)
  }
  pos <- position(trioSetList)
  lrr.segs <- segment2(object=lrrs,
                       pos=position(trioSetList),
                       chrom=chromosome(trioSetList, as.list=TRUE),
                       id=id, ## NULL if segmentParents is FALSE
                       verbose=verbose,
                       featureNames=fns, genome=genome, ...)
  md.segs2 <- narrowRanges(md.segs, lrr.segs, 0.9, mad.minimumdistance=mads.md, fD=Biobase::featureData(trioSetList))
  index <- splitIndicesByLength(seq_len(ncol(trioSetList)), 1)
  ##if(!getDoParRegistered()) registerDoSEQ()
  outdir <- ldPath()
  id <- sampleNames(trioSetList)
  object <- i <- NULL
  map.segs <- foreach(i=index,
                      .packages=pkgs, .combine="unlist") %dopar% {
                        MAP(object=trioSetList,
                            ranges=md.segs2,
                            id=id[i],
                            mdThr=mdThr,
                            prOutlierBAF=prOutlierBAF)
                      }
  return(map.segs)
}

make.unique2 <- function(names, sep="___DUP") make.unique(names, sep)
originalNames <- function(names){
	if(length(names) ==0) return(names)
	sep <- formals(make.unique2)[["sep"]]
	index <- grep(sep, names)
	if(length(index) > 0) names[index] <- sapply(names[index], function(x) strsplit(x, sep)[[1]][[1]])
	names
}

read.bsfiles2 <- function(path, filenames, sampleNames, z, marker.index,
			  lrrlist, baflist, featureNames){
  i <- seq_along(sampleNames)
  ## this is simply to avoid having a large 'dat' object below.
  if(isPackageLoaded("ff")){
    NN <- min(length(sampleNames), 2)
    ilist <- splitIndicesByLength(i, NN)
    for(k in seq_along(ilist)){
      j <- ilist[[k]]
      sns <- sampleNames[j]
      datlist <- lapply(file.path(path, filenames[j]), read.bsfiles)
      id <- datlist[[1]][[1]]
      datlist <- lapply(datlist, "[", c(2,3))
      r <- do.call(cbind, lapply(datlist, "[[", 1))
      b <- do.call(cbind, lapply(datlist, "[[", 2))
      dimnames(r) <- dimnames(b) <- list(id, filenames)
      if(!identical(id, featureNames)){
        r <- r[featureNames, , drop=FALSE]
        b <- b[featureNames, , drop=FALSE]
      }
      l <- match(sns, colnames(baflist[[1]]))
      for(m in seq_along(marker.index)){
        M <- marker.index[[m]]
        baflist[[m]][, l, z] <- integerMatrix(b, scale=1000)
        lrrlist[[m]][, l, z] <- integerMatrix(r, scale=100)
      }
    }
    return(TRUE)
  } else {
    ##dat <- read.bsfiles(path=path, filenames=filenames)
    fnames <- file.path(path, filenames)
    datlist <- lapply(fnames, read.bsfiles)
    id <- datlist[[1]][[1]]
    datlist <- lapply(datlist, "[", c(2,3))
    r <- do.call(cbind, lapply(datlist, "[[", 1))
    b <- do.call(cbind, lapply(datlist, "[[", 2))
    tmp <- array(NA, dim=c(nrow(r), 2, length(fnames)))
    tmp[, 1, ] <- integerMatrix(r, 100)
    tmp[, 2, ] <- integerMatrix(b, 1000)
    dat <- tmp
    dimnames(dat) <- list(id, c("lrr", "baf"), basename(fnames))
  }
  return(dat)
}

stackRangedDataList <- function(...) {
  ##object <- stack(RangedDataList(...))
  object <- GRangesList(list(...)[[1]])
  unlist(object)
  ##j <- match("sample", colnames(object))
  ##if(is.na(j))  object else object[, -j]
}

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#~~~~ The rest is old code that has been commented out.~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
##inferMissingRanges <- function(start, end){
##}
##
##inferNormalRangesFromPenn <- function(penn.object, vi.object){
##	chr <- chromosome(penn.object)
##	starts <- split(start(penn.object), chr)
##	ends <- split(end(penn.object), chr)
##	foreach(start=starts, end=ends) %do% inferMissingRanges(start=start,
##			      end=end)
##}


##shrinkTo <- function(x, x.0, DF.PRIOR){
##	DF <- ncol(x)-1
##	DF <- Ns-1
##	DF[DF < 1] <- 1
##	x.0 <- apply(x, 2, median, na.rm=TRUE)
##	x <- (x*DF + x.0*DF.PRIOR)/(DF.PRIOR + DF)
##	for(j in 1:ncol(x)) x[is.na(x[, j]), j] <- x.0[j]
##	return(x)
##}


##dna <- function(object) harmonizeDnaLabels(phenoData2(object[[1]])[, "DNA.Source", ])
##plate <- function(object) phenoData2(object[[1]])[, "Sample.Plate", ]

##readTable3 <- function(a=0.009){
##	## initialize with small value to avoid -Inf
##	results <- .C("calculateCHIT", a=a, M=array(0, dim=c(rep(5,6))))$M
##	## Make sure to transpose!
##	aperm(results)
##}

##arrangeSideBySide2 <- function(object1, object2){
##	grid.newpage()
##	lvp <- viewport(x=0,
##			y=0.05,
##			width=unit(0.50, "npc"),
##			height=unit(0.95, "npc"), just=c("left", "bottom"),
##			name="lvp")
##	pushViewport(lvp)
##	nfigs1 <- length(object1$condlevels[[1]])
##	nfigs2 <- length(object2$condlevels[[1]])
##	stopifnot(length(nfigs1) == length(nfigs2))
##	pushViewport(dataViewport(xscale=c(0,1), yscale=c(0.05,1), clip="on"))
##	object1$layout <- c(1, nfigs1)
##	print(object1, newpage=FALSE, prefix="plot1", more=TRUE)
##	upViewport(0)
##	lvp2 <- viewport(x=0.5,
##			 y=0.25,
##			 width=unit(0.50, "npc"),
##			 height=unit(0.95, "npc"), just=c("left", "bottom"),
##			 name="lvp2")
##	pushViewport(lvp2)
##	pushViewport(dataViewport(xscale=c(0,1), yscale=c(0.05,1), clip="on"))
##	object2$layout <- c(1, nfigs1)
##	print(object2, newpage=FALSE, prefix="plot2", more=TRUE)
##}


##read.bsfiles <- function(path="./", filenames, ext="", row.names=1,
##			 sep="\t",
##			 as.is=TRUE, header=TRUE,
##			 drop=FALSE, ...){
##	fnames <- file.path(path, paste(filenames, ext, sep=""))
##	stopifnot(all(file.exists(fnames)))
##	for(i in seq_along(filenames)){
##		cat(".")
##		tmp <- read.table(file.path(path, paste(filenames[i], ext, sep="")),
##				  row.names=row.names,
##				  sep=sep,
##				  header=header,
##				  as.is=as.is, ...)
##		if(i==1){
##			j <- grep("Log.R.Ratio", colnames(tmp))
##			k <- grep("B.Allele", colnames(tmp))
##			dat <- array(NA, dim=c(nrow(tmp), 2, length(filenames)))
##			if(!drop){
##				dimnames(dat) <- list(rownames(tmp),
##						      c("lrr", "baf"),
##						      basename(filenames))
##			}
##			##lrr.data <- matrix(NA, nrow(tmp), length(filenames))
##			##baf.data <- matrix(NA, nrow(tmp), length(filenames))
##		}
##		dat[, 1, i] <- tmp[, j]
##		dat[, 2, i] <- tmp[, k]
##	}
##	cat("\n")
##	return(dat)
##}

initializeLrrAndBafArrays <- function(dims, col.names, outdir, name=""){
	ldPath(outdir)
	if(name != ""){
		bafname <- paste(name, "baf", sep="_")
		lrrname <- paste(name, "lrr", sep="_")
	} else {
		bafname <- "baf"
		lrrname <- "lrr"
	}
	bafs <- initializeBigArray(bafname, dim=dims, vmode="integer")
	lrrs <- initializeBigArray(lrrname, dim=dims, vmode="integer")
	colnames(bafs) <- colnames(lrrs) <- col.names
	res <- list(baf=bafs, lrr=lrrs)
	return(res)
}

trioSetListExample <- function(){
	data(trioSetListExample, envir=environment())
	ad <- assayData(trioSetList)
	b <- lapply(ad[["BAF"]], integerArray, scale=1000)
	r <- lapply(ad[["logRRatio"]], integerArray, scale=100)
	ad2 <- AssayDataList(BAF=b, logRRatio=r)
	trioSetList@assayDataList <- ad2
	return(trioSetList)
}

neededPkgs <- function() c("oligoClasses", "Biobase", "MinimumDistance")

#' @importFrom stats lowess
gcSubtractMatrix <- function(object, center=TRUE, gc, pos, smooth.gc=TRUE, ...){
	if(ncol(object) !=3) stop("Must pass one trio at a time.")
	cnhat <- matrix(NA, nrow(object), ncol(object))
	isna <- rowSums(is.na(object)) > 0
	if(any(isna)){
		i <- which(isna)
		gc <- gc[-i]
		pos <- pos[-i]
		if(smooth.gc)
			gc <- lowess(gc~pos, ...)$y
		object <- object[-i, , drop=FALSE]
	}
	X <- cbind(1, gc)
	## needs to be a big enough window such that we do not remove a true deletion/amplification
	index <- splitIndicesByLength2(x=seq_along(pos), lg=10000, MIN.LENGTH=2000)
	fitGCmodel <- function(X, Y){
		betahat <- solve(crossprod(X), crossprod(X, Y))
		yhat <- X %*% betahat
		resid <- Y-yhat
		resid
	}
	j <- NULL
	resid <- foreach(j=index, .combine="rbind") %do% fitGCmodel(X=X[j, ], Y=object[j, ])
	## ensure that the chromosome arm has the same median as in the original Y's
	if(center) resid <- resid+median(object,na.rm=TRUE)
	if(any(isna)) cnhat[-i, ] <- resid else cnhat <- resid
	cnhat
}


rescale2 <- function(x, l, u){
  y <- (x-min(x,na.rm=TRUE))/(max(x,na.rm=TRUE)-min(x,na.rm=TRUE))
  rescale(y, l, u)
}

dataFrameFromRange2 <- function(object, range, range.index, frame=0, nchar_sampleid=15){
	frange <- makeFeatureGRanges(featureData(object), genomeBuild(object))
	rm <- findOverlaps(range, frange, maxgap=frame)
	mm <- IRanges::as.matrix(rm)
	mm.df <- data.frame(mm)
	mm.df$featureNames <- featureNames(object)[mm.df$subject]
	marker.index <- mm.df$subject
	sample.index <- match(sampleNames(range), sampleNames(object))
	if(any(is.na(sample.index))) stop("sampleNames in RangedData do not match sampleNames in ", class(data), " object")
	sample.index <- unique(sample.index)
	obj <- object[marker.index, sample.index]
	mm.df$subject <- match(mm.df$featureNames, featureNames(obj))
	##
	## coersion to data.frame
	##
	df <- trioSet2data.frame(obj)
	chr <- unique(chromosome(object))
	oindex <- which(df$memberId=="offspring")
	findex <- which(df$memberId=="father")
	mindex <- which(df$memberId=="mother")
	memberid <- as.character(df$memberId)
	nchr <- min(nchar_sampleid, nchar(sampleNames(obj)[1]))
	oid <- substr(sampleNames(obj)[1], 1, nchr)
	fid <- substr(fatherNames(obj)[1], 1, nchr)
	mid <- substr(motherNames(obj)[1], 1, nchr)
	memberid[oindex] <- paste(oid, "(offspring)")
	memberid[findex] <- paste(fid, "(father)")
	memberid[mindex] <- paste(mid, "(mother)")
	memberid <- paste("chr", chr, ": ", memberid, sep="")
	memberid <- factor(memberid, levels=rev(unique(memberid)))
	df$memberId <- I(memberid)
	##df$range <- rep(i, nrow(df))##mm.df$query
	##dfList[[i]] <- df
	df$range <- range.index
	df
}

trioSet2data.frame <- function(from){
	cn <- lrr(from)[, 1, ]/100
	md.null <- is.null(mindist(from))
	if(!md.null) {
		md <- as.numeric(mindist(from))/100
		mdlabel <- "md"
	} else {
		mdlabel <- md <- NULL
	}
	labels <- c("father", "mother", "offspring", mdlabel)
	J <- length(labels)
	sns <- matrix(labels, nrow(cn), J, byrow=TRUE)
	sns <- as.character(sns)
	cn <- as.numeric(cn)
	y <- c(cn, md)
	bf <- as.numeric(baf(from)[, 1, ])/1000
	bf <- c(bf, rep(NA, length(md)))
	x <- rep(position(from)/1e6, J)
	is.snp <- rep(isSnp(from), J)
	df <- data.frame(x=x,
			 y=y,
			 baf=bf,
			 memberId=sns,
			 trioId=rep(sampleNames(from), length(y)),
			 is.snp=is.snp,
			 stringsAsFactors=FALSE)
	df$memberId <- factor(df$memberId, ordered=TRUE, levels=rev(labels))
	return(df)
}





referenceIndex <- function(param) which(stateNames(param) == referenceState(param))

cumulativeLogLik <- function(log_emit){
  LLT <- apply(log_emit, c(2, 3), sum, na.rm=TRUE)
  ## copy number 2 prob is max(diploid not ROH, diploid ROH)
  LLT[, 3] <- pmax(LLT[, 3], LLT[, 4])
  ## remove diploid ROH state
  LLT <- LLT[, -4]
  LLT
}

prTrioState <- function(param, state){
  ## We need to compute Pr(trio state | model)
  ##
  ## See Additional File 1, p6 (Scharpf et al., 2012)
  ##
  ## Pr(trio state | model )      =  Pr(trio state, nonmendelian | model) + Pr (trio state, Mendlianl | model)
  ##                              =  Pr( offspring | parents, nonmendelian) * Pr(parents | nonmendelian) * Pr(nonmendelian) + Pr (offspring | parents, mendelian) * Pr(parents | mendelian) * Pr (mendelian)
  ##                              =  Pr( offspring | nonmendelian) * Pr(parents) * Pr(nonmendelian) + Pr(offspring | mendelian, parents) * pr(parents)* Pr(mendelian)
  ##                              =  Term 1  *  Term2  * Term 3  +  Term4 * Term2 * (1-Term3)
  ##                              =  Term 2 [Term1 * Term3 + Term4*(1-Term3)]
  ## we assume a priori that any of the states are equally likely for the parents
  Term2 <- 1/5^2
  Term3 <- prNonMendelian(param)
  ##
  ## Term 4, or Pr(offspring | parents, mendelian), is given by tabled values in Wang et al. (Suppl Table 1)
  ##
  Term4 <- table1(param)[state]
  Term1 <- 1/5
  Term2 * (Term1 * Term3 + Term4 * (1-Term3))
}


loglikInitial <- function(param, LLT, state){
  ## assume Pr(state_1,father | lambda) = Pr(state_2,mother | lambda) = pi
  ##
  ## Equation 3: Scharpf et al., 2012
  ##
  ## We need to compute the posterior probability of the trio states, or
  ## Pr(trio state | B, R, model)  propto  Likelihood * prior
  ##                               = likelihood * Pr(trio state | model)
  ## Taking logarithms, we have
  ## log lik + log Pr(trio state | model)
  state_index <- state(param)[state, ] + 1L
  ##
  loglik <- sum(diag(LLT[, state_index]))
  ##
  pr_triostate <- prTrioState(param, state)
  ##
  loglik + log(pr_triostate)
}

statesToEvaluate <- function(param, above_thr){
  nms <- stateNames(param)
  if(above_thr){
    x <- setNames(rep(TRUE, length(nms)), nms)
  } else {
    x <- setNames(rep(FALSE, length(nms)), nms)
    x[referenceIndex(param)] <- TRUE
  }
  x
}



setSequenceLengths <- function(build, names){ ## names are unique(seqnames(object))
  sl <- getSequenceLengths(build)
  sl[match(unique(names), names(sl))]
}

.getArm <- function(chrom, pos, genome){
  if(is.integer(chrom)) chrom <- paste("chr", integer2chromosome(chrom), sep="")
  path.gap <- system.file("extdata", package="SNPchip")
  gaps <- readRDS(list.files(path.gap, pattern=paste("gap_", genome, ".rda", sep=""), full.names=TRUE))
  centromere.starts <- start(gaps)
  centromere.ends <- end(gaps)
  names(centromere.ends) <- names(centromere.starts) <- seqnames(gaps)
  centromere.starts <- centromere.starts[chrom]
  centromere.ends <- centromere.ends[chrom]
  chr.arm <- arm <- rep(NA, length(pos))
  arm[pos <= centromere.starts] <- "p"
  arm[pos >= centromere.ends] <- "q"
  ##arm <- ifelse(pos <= centromere.starts, "p", "q")
  chr.arm[!is.na(arm)] <- paste(chrom[!is.na(arm)], arm[!is.na(arm)], sep="")
  chr.arm
}


.checkOrder <- function(object, verbose=FALSE){
  d <- diff(order(chromosome(object), position(object)))
  if(any(d < 0)){
    if(verbose)
      warning("Object should be ordered by chromosome and physical position.\n",
              "Try \n",
              "> object <- order(object) \n")
    return(FALSE)
  }
  TRUE
}

isFF <- function(object){
  names <- ls(assayData(object))
  is(assayData(object)[[names[[1]]]], "ff") | is(assayData(object)[[names[[1]]]], "ffdf")
}


logEmissionArray <- function(object){
  emitlist <- assays(object)
  ##emitlist <- lapply(emitlist, function(x, epsilon) log(x+epsilon), epsilon=epsilon)
  lemit_array <- array(NA, dim=c(nrow(object), length(emitlist), 6))
  for(i in seq_len(length(emitlist))) lemit_array[, i, ] <- log(emitlist[[i]])
  lemit_array
}

#' Function for computing autocorrelations
#'
#' By default, this function returns the lag-10 autocorrelations of a
#' numeric vector and omits missing values.
#'
#' @param x a numeric vector
#' @param lag.max see \code{acf}
#' @param type see \code{acf}
#' @param plot logical, as in \code{acf}
#' @param na.action ignored.  Missing values are automattically omitted.
#' @param demean logical, as in \code{acf}
#' @param ... additional arguments passed to \code{acf}
#' @seealso \code{\link[stats]{acf}}
#' @examples
#' x <- rnorm(100)
#' x[5] <- NA
#' acf2(x)
#' @importFrom stats acf
#' @export
acf2 <- function(x, lag.max=10, type = c("correlation", "covariance", "partial"),
                 plot = FALSE, na.action = na.omit, demean = TRUE,
                 ...){
  x <- x[!is.na(x)]
  y <- acf(x, lag.max=lag.max, type=type, plot=plot,
           na.action=na.action, demean=demean, ...)
  y[[1]][lag.max+1, , 1]
}

colAcfs <- function(X, lag.max=10, plot=FALSE) {
  res <- rep(NA, ncol(X))
  apply(X, 2, acf2, lag.max=lag.max)
}
rscharpf/MinimumDistance documentation built on Sept. 16, 2019, 4:12 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
rscharpf/MinimumDistance
A Package for De Novo CNV Detection in Case-Parent Trios

R/functions.R
In rscharpf/MinimumDistance: A Package for De Novo CNV Detection in Case-Parent Trios

Documented in acf2

R Package Documentation

Browse R Packages

We want your feedback!

rscharpf/MinimumDistance A Package for De Novo CNV Detection in Case-Parent Trios

R/functions.R In rscharpf/MinimumDistance: A Package for De Novo CNV Detection in Case-Parent Trios

Documented in acf2

R Package Documentation

Browse R Packages

We want your feedback!

rscharpf/MinimumDistance
A Package for De Novo CNV Detection in Case-Parent Trios

R/functions.R
In rscharpf/MinimumDistance: A Package for De Novo CNV Detection in Case-Parent Trios