#######################################################################
## ##
## Package: BatchMap ##
## ##
## File: compare.R ##
## Contains: compare, print.compare ##
## ##
## Written by Gabriel R A Margarido & Marcelo Mollinari ##
## copyright (c) 2009, Gabriel R A Margarido & Marcelo Mollinari ##
## ##
## First version: 02/27/2009 ##
## Last update: 07/25/2015 (only documentation, by Augusto Garcia) ##
## License: GNU General Public License version 2 (June, 1991) or later ##
## #
#######################################################################
##' Compare all possible orders (exhaustive search) for a given sequence of
##' markers
##'
##' For a given sequence with \eqn{n}{n} markers, computes the multipoint
##' likelihood of all \eqn{\frac{n!}{2}}{n!/2} possible orders.
##'
##' Since the number \eqn{\frac{n!}{2}}{n!/2} is large even for moderate values
##' of \eqn{n}{n}, this function is to be used only for sequences with
##' relatively few markers. If markers were genotyped in an outcross population,
##' linkage phases need to be estimated and therefore more states need to be
##' visited in the Markov chain; when segregation types are D1, D2 and C,
##' computation can required a very long time (specially when markers linked in
##' repulsion are involved), so we recomend to use this function up to 6 or 7 markers.
##' For inbred-based populations, up to 10 or 11 markers can be ordered with this function,
##' since linkage phase are known.
##' The multipoint likelihood is calculated according to Wu et al.
##' (2002b) (Eqs. 7a to 11), assuming that the recombination fraction is the
##' same in both parents. Hidden Markov chain codes adapted from Broman et al.
##' (2008) were used.
##'
##' @param input.seq an object of class \code{sequence}.
##' @param n.best the number of best orders to store in object (defaults to
##' 50).
##' @param tol tolerance for the C routine, i.e., the value used to evaluate
##' convergence.
##' @param verbose if \code{FALSE} (default), simplified output is displayed.
##' if \code{TRUE}, detailed output is displayed.
##' @return An object of class \code{compare}, which is a list containing the
##' following components: \item{best.ord}{a \code{matrix} containing the best
##' orders.} \item{best.ord.rf}{a \code{matrix} with recombination frequencies
##' for the corresponding best orders.} \item{best.ord.phase}{a \code{matrix}
##' with linkage phases for the best orders.} \item{best.ord.like}{a
##' \code{vector} with log-likelihood values for the best orders.}
##' \item{best.ord.LOD}{a \code{vector} with LOD Score values for the best
##' orders.} \item{data.name}{name of the object of class \code{outcross} with
##' the raw data.} \item{twopt}{name of the object of class \code{rf.2pts} with
##' the 2-point analyses.}
##' @author Marcelo Mollinari, \email{mmollina@@usp.br}
##' @seealso \code{\link[BatchMap]{marker.type}} for details about segregation
##' types and \code{\link[BatchMap]{make.seq}}.
##' @references Broman, K. W., Wu, H., Churchill, G., Sen, S., Yandell, B.
##' (2008) \emph{qtl: Tools for analyzing QTL experiments} R package version
##' 1.09-43
##'
##' Jiang, C. and Zeng, Z.-B. (1997). Mapping quantitative trait loci with
##' dominant and missing markers in various crosses from two inbred lines.
##' \emph{Genetica} 101: 47-58.
##'
##' Lander, E. S., Green, P., Abrahamson, J., Barlow, A., Daly, M. J., Lincoln,
##' S. E. and Newburg, L. (1987) MAPMAKER: An interactive computer package for
##' constructing primary genetic linkage maps of experimental and natural
##' populations. \emph{Genomics} 1: 174-181.
##'
##' Mollinari, M., Margarido, G. R. A., Vencovsky, R. and Garcia, A. A. F.
##' (2009) Evaluation of algorithms used to order markers on genetics maps.
##' _Heredity_ 103: 494-502.
##'
##' Wu, R., Ma, C.-X., Painter, I. and Zeng, Z.-B. (2002a) Simultaneous maximum
##' likelihood estimation of linkage and linkage phases in outcrossing species.
##' \emph{Theoretical Population Biology} 61: 349-363.
##'
##' Wu, R., Ma, C.-X., Wu, S. S. and Zeng, Z.-B. (2002b). Linkage mapping of
##' sex-specific differences. \emph{Genetical Research} 79: 85-96
##' @keywords utilities
##' @examples
##'
##' \dontrun{
##' #outcrossing example
##' data(example.out)
##' twopt <- rf.2pts(example.out)
##' markers <- make.seq(twopt,c(12,14,15,26,28))
##' (markers.comp <- compare(markers))
##' (markers.comp <- compare(markers,verbose=TRUE))
##'
##' }
##'
##'
compare<- function(input.seq,n.best=50,tol=10E-4,verbose=FALSE) {
return(compare.outcross(input.seq=input.seq,n.best=n.best,tol=tol,verbose=verbose))
}
## Compare all possible orders (exhaustive search) for a given sequence of
## markers (for outcrosses)
compare.outcross<- function(input.seq,n.best=50,tol=10E-4,verbose=FALSE)
{
## checking for correct objects
if(!any(class(input.seq)=="sequence"))
stop(sQuote(deparse(substitute(input.seq)))," is not an object of class 'sequence'")
if(length(input.seq$seq.num) > 5)
cat("WARNING: this operation may take a VERY long time\n")
flush.console()
if(length(input.seq$seq.num) > 10) {
cat("\nIt is not wisely trying to use 'compare' with more than 10 markers \n")
ANSWER <- readline("Are you sure you want to proceed? [y or n]\n")
while(substr(ANSWER, 1, 1) != "n" & substr(ANSWER, 1, 1) != "y")
ANSWER <- readline("\nPlease answer: 'y' or 'n' \n")
if (substr(ANSWER, 1, 1) == "n") stop("Execution stopped!")
}
if(length(input.seq$seq.num) == 2)
return(map(input.seq, tol=tol)) ## nothing to be done for 2 markers
else {
## allocating variables
rf.init <- vector("list",length(input.seq$seq.num)-1)
phase.init <- vector("list",length(input.seq$seq.num)-1)
best.ord <- matrix(NA,(n.best+1),length(input.seq$seq.num))
best.ord.rf <- matrix(NA,(n.best+1),length(input.seq$seq.num)-1)
best.ord.phase <- matrix(NA,(n.best+1),length(input.seq$seq.num)-1)
best.ord.like <- best.ord.LOD <- rep(-Inf,(n.best+1))
## 'phases' gathers information from two-point analyses
list.init <- phases(input.seq)
## 'perm.pars' generates all n!/2 orders
all.ord <- perm.pars(input.seq$seq.num)
cat("\nComparing",nrow(all.ord),"orders: \n\n")
if (verbose){
for(i in 1:nrow(all.ord)){
## print output for each order
cat("Order", i, ":", all.ord[i,], "\n")
flush.console()
## get initial values for the HMM
all.match <- match(all.ord[i,],input.seq$seq.num)
for(j in 1:(length(input.seq$seq.num)-1)){
if(all.match[j] > all.match[j+1]){
rf.init[[j]] <- list.init$rf.init[[acum(all.match[j]-2)+all.match[j+1]]]
phase.init[[j]] <- list.init$phase.init[[acum(all.match[j]-2)+all.match[j+1]]]
}
else {
rf.init[[j]] <- list.init$rf.init[[acum(all.match[j+1]-2)+all.match[j]]]
phase.init[[j]] <- list.init$phase.init[[acum(all.match[j+1]-2)+all.match[j]]]
}
}
Ph.Init <- comb.ger(phase.init)
Rf.Init <- comb.ger(rf.init)
if(nrow(Ph.Init)>1){
##Removing ambigous phases
rm.ab<-rem.amb.ph(M=Ph.Init, w=input.seq, seq.num=all.ord[i,])
Ph.Init <- Ph.Init[rm.ab,]
Rf.Init <- Rf.Init[rm.ab,]
if(class(Ph.Init)=="integer"){
Ph.Init<-matrix(Ph.Init,nrow=1)
Rf.Init<-matrix(Rf.Init,nrow=1)
}
}
for(j in 1:nrow(Ph.Init)){
## estimate parameters
final.map <- est_map_hmm_out(geno=t(get(input.seq$data.name, pos=1)$geno[,all.ord[i,]]),
type=get(input.seq$data.name, pos=1)$segr.type.num[all.ord[i,]],
phase=Ph.Init[j,],
rf.vec=Rf.Init[j,],
verbose=FALSE,
tol=tol)
best.ord[(n.best+1),] <- all.ord[i,]
best.ord.rf[(n.best+1),] <- final.map$rf
best.ord.phase[(n.best+1),] <- Ph.Init[j,]
best.ord.like[(n.best+1)] <- final.map$loglike
## arrange orders according to the likelihood
like.order <- order(best.ord.like, decreasing=TRUE)
best.ord <- best.ord[like.order,]
best.ord.rf <- best.ord.rf[like.order,]
best.ord.phase <- best.ord.phase[like.order,]
best.ord.like <- sort(best.ord.like, decreasing=TRUE)
}
}
}
else{
count <- 0
pb <- txtProgressBar(style=3)
setTxtProgressBar(pb, 0)
## nc<-NA
## out.pr <- seq(from=1,to=nrow(all.ord), length.out=20)
cat(" ")
for(i in 1:nrow(all.ord)){
## print output for each order
## if (sum(i == round(out.pr))){
## cat(rep("\b",nchar(nc)+1),sep="")
## nc<-round(i*100/nrow(all.ord))
## cat(nc,"%", sep="")
## flush.console()
## }
## get initial values for the HMM
all.match <- match(all.ord[i,],input.seq$seq.num)
for(j in 1:(length(input.seq$seq.num)-1)){
if(all.match[j] > all.match[j+1]){
rf.init[[j]] <- list.init$rf.init[[acum(all.match[j]-2)+all.match[j+1]]]
phase.init[[j]] <- list.init$phase.init[[acum(all.match[j]-2)+all.match[j+1]]]
}
else {
rf.init[[j]] <- list.init$rf.init[[acum(all.match[j+1]-2)+all.match[j]]]
phase.init[[j]] <- list.init$phase.init[[acum(all.match[j+1]-2)+all.match[j]]]
}
}
Ph.Init <- comb.ger(phase.init)
Rf.Init <- comb.ger(rf.init)
if(nrow(Ph.Init)>1){
##Removing ambigous phases
rm.ab<-rem.amb.ph(M=Ph.Init, w=input.seq, seq.num=all.ord[i,])
Ph.Init <- Ph.Init[rm.ab,]
Rf.Init <- Rf.Init[rm.ab,]
if(class(Ph.Init)=="integer"){
Ph.Init<-matrix(Ph.Init,nrow=1)
Rf.Init<-matrix(Rf.Init,nrow=1)
}
}
for(j in 1:nrow(Ph.Init)){
## estimate parameters
final.map <- est_map_hmm_out(geno=t(get(input.seq$data.name, pos=1)$geno[,all.ord[i,]]),
type=get(input.seq$data.name, pos=1)$segr.type.num[all.ord[i,]],
phase=Ph.Init[j,],
rf.vec=Rf.Init[j,],
verbose=FALSE,
tol=tol)
best.ord[(n.best+1),] <- all.ord[i,]
best.ord.rf[(n.best+1),] <- final.map$rf
best.ord.phase[(n.best+1),] <- Ph.Init[j,]
best.ord.like[(n.best+1)] <- final.map$loglike
## arrange orders according to the likelihood
like.order <- order(best.ord.like, decreasing=TRUE)
best.ord <- best.ord[like.order,]
best.ord.rf <- best.ord.rf[like.order,]
best.ord.phase <- best.ord.phase[like.order,]
best.ord.like <- sort(best.ord.like, decreasing=TRUE)
}
count<-count+1
setTxtProgressBar(pb, count/nrow(all.ord))
}
close(pb)
}
cat("\n")
best.ord.LOD <- round((best.ord.like-max(best.ord.like))/log(10),4)
structure(list(best.ord = best.ord,
best.ord.rf = best.ord.rf,
best.ord.phase = best.ord.phase,
best.ord.like = best.ord.like,
best.ord.LOD = best.ord.LOD,
data.name=input.seq$data.name,
twopt=input.seq$twopt), class = "compare")
}
}
## print method for object class 'compare'
print.compare <- function(x,...) {
FLAG<-0
if(!any(class(get(x$data.name, pos=1)) == "outcross")) FLAG<-1
phases.char <- c("CC","CR","RC","RR")
n.ord <- max(which(head(x$best.ord.LOD,-1) != -Inf))
unique.orders <- unique(x$best.ord[1:n.ord,])
n.ord.nest <- nrow(unique.orders)
phases.nested <- vector("list",n.ord.nest)
LOD <- vector("list",n.ord.nest)
if(!FLAG){
for (i in 1:n.ord.nest) {
same.order <- which(apply(x$best.ord[1:n.ord,],1,function(x) all(x==unique.orders[i,])))
ifelse(length(same.order)==1,phases.nested[[i]] <- t(as.matrix(x$best.ord.phase[same.order,])),phases.nested[[i]] <- x$best.ord.phase[same.order,])
LOD[[i]] <- x$best.ord.LOD[same.order]
}
}
skip <- c(nchar(n.ord.nest),max(nchar(unique.orders[1,])+2))
cat("\nNumber of orders:",n.ord,"\n")
if(FLAG==0){ ## outcrossing
leng.print <- nchar(paste("order ",format(n.ord.nest,width=skip[1]),": ",paste(format(unique.orders[1,],width=skip[2]),collapse="")," ",format(11.11,digits=2,format="f",width=6)," ",format(11.11,digits=2,format="f",width=6),"\n",sep=""))
cat(paste("Best ",n.ord.nest," unique orders",paste(rep(" ",leng.print-37),collapse=""),"LOD Nested LOD","\n",sep=""))
cat(paste(rep("-",leng.print),collapse=""),"\n")
}
else if(FLAG==1){ ## other
leng.print <- nchar(paste("order ",format(n.ord.nest,width=skip[1]),": ",paste(format(unique.orders[1,],width=skip[2]),collapse="")," ",format(11.11,digits=2,format="f",width=6),"\n",sep=""))
cat(paste("Best ",n.ord.nest," unique orders",paste(rep(" ",leng.print-25),collapse=""),"LOD","\n",sep=""))
cat(paste(rep("-",leng.print),collapse=""),"\n")
}
else stop ("Should not get here!")
if(FLAG==0){ ## outcrossing
for (i in 1:n.ord.nest) {
cat(paste("order ",format(i,width=skip[1]),": ",paste(format(unique.orders[i,],width=skip[2]),collapse=""),"\n",sep=""))
for (j in 1:dim(phases.nested[[i]])[1]) {
cat(paste("\t",paste(rep(" ",1+skip[1]+skip[2]),collapse=""),paste(format(phases.char[phases.nested[[i]][j,]],width=skip[2]),collapse="")," ",formatC(round(LOD[[i]][j],2),digits=2,format="f",width=6)," ",formatC(round(LOD[[i]][j]-LOD[[i]][1],2),digits=2,format="f",width=6),"\n",sep=""))
}
cat(paste(rep("-",leng.print),collapse=""))
cat("\n")
}
}
else if(FLAG==1){ ## other
for (i in 1:n.ord.nest) {
cat(paste("order ",format(i,width=skip[1]),": ",paste(format(unique.orders[i,],width=skip[2]),collapse=""), " ",formatC(round(x$best.ord.LOD[i],2),digits=2,format="f",width=6), "\n",sep=""))
}
cat(paste(rep("-",leng.print),collapse=""))
cat("\n")
}
else stop ("Should not get here!")
}
## end of file
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.