Nothing
############
## generics
############
#' @export
seqTrack <- function(...){
UseMethod("seqTrack")
}
#' @method seqTrack default
#' @export
seqTrack.default <- function(x, ...){
cat("\nseqTrack not implemented for object of the class", class(x),"\n")
return(invisible(NULL))
}
## seqTrackG <- function(...){
## UseMethod("seqTrackG")
## }
## optimize.seqTrack <- function(...){
## UseMethod("optimize.seqTrack")
## }
get.likelihood <- function(...){
UseMethod("get.likelihood")
}
########################
## seqTrack - basic version
########################
##
## - x is a matrix giving weights x[i,j] such that:
## 'i is ancestor j'
## - prox.mat is a directed proximity measure, so that prox.mat[i,j] is
## the 'proximity when going from i to j'
##
#' @method seqTrack matrix
#' @export
seqTrack.matrix <- function(x, x.names, x.dates, best=c("min","max"),
prox.mat=NULL, mu=NULL, haplo.length=NULL, ...){
## CHECKS ##
best <- match.arg(best)
if(best=="min"){
best <- min
which.best <- which.min
} else {
best <- max
which.best <- which.max
}
if(length(x.names) != length(x.dates)){
stop("inconsistent length for x.dates")
}
if(is.character(x.dates)){
msg <- paste("x.dates is a character vector; " ,
"please convert it as dates using 'as.POSIXct'" ,
"\n(making sure dates are given as 'YYYY/MM/DD' or 'YYYY-MM-DD').", sep="")
stop(msg)
}
x <- as.matrix(x)
if(!is.null(prox.mat) && !identical(dim(prox.mat),dim(x))) {
stop("prox.mat is provided but its dimensions are inconsistent with that of x")
}
N <- length(x.names)
id <- 1:N
x.dates <- as.POSIXct(round.POSIXt(x.dates,units="days")) # round dates to the day
temp <- as.vector(unique(x))
D.ARE.MUT <- all(temp-round(temp,10)<1e-14)
## rename dimensions using id
colnames(x) <- rownames(x) <- id
if(!is.null(prox.mat)){
colnames(prox.mat) <- rownames(prox.mat) <- id
}
if(length(x.names) != nrow(x)){
stop("inconsistent dimension for x")
}
## AUXILIARY FUNCTIONS ##
## test equality in floats
test.equal <- function(val,vec){
return(abs(val-vec) < 1e-12)
}
## return the names of optimal value(s) in a named vector
which.is.best <- function(vec){
res <- names(vec)[test.equal(best(vec), vec)]
return(res)
}
## select among different possible ancestors
selAmongAncestors <- function(idx,ances){
## Choose the most connected ancestor, given prox.mat
if(!is.null(prox.mat)){ # if we've got no other info
toKeep <- test.equal(max(prox.mat[ances,idx]), prox.mat[ances,idx])
ances <- ances[toKeep]
}
## If several ancestors remain, take the one closest to the average generation time.
if(length(ances)>1){
if(!D.ARE.MUT | is.null(mu) | is.null(haplo.length)) { # if we don't have mutation rates / haplo length, or if dist. are not nb of mutations
ances <- ances[which.min(x.dates[ances])] # take the oldest ancestor
} else { # if distances are mutations and we've got mu and L
timeDiff <- as.numeric(difftime(x.dates[idx], x.dates[ances], units="day")) # days between candidates and target
##nbGen <- round(timeDiff / gen.time) # number of generations
nbMut <- x[ances, idx]
prob <- dbinom(nbMut, timeDiff*haplo.length, mu)
ances <- ances[which.max(prob)] # take the most likely ancestor
}
}
return(ances)
}
## findAncestor
findAncestor <- function(idx){ # returns the index of one seq's ancestor
candid <- which(x.dates < x.dates[idx])
if(length(candid)==0) return(list(ances=NA, weight=NA))
if(length(candid)==1) return(list(ances=candid, weight=x[candid, idx]))
ancesId <- as.numeric(which.is.best(x[candid, idx]))
if(length(ancesId)>1) {
ancesId <- selAmongAncestors(idx,ancesId) # handle several 'best' ancestors
}
return(list(ances=ancesId, weight=x[ancesId, idx])) # Id of the ancestor
}
## BUILD THE OUTPUT ##
res <- sapply(id, findAncestor)
res <- data.frame(ances=unlist(res[1,]), weight=unlist(res[2,]))
ances.date <- x.dates[res[,1]]
res <- cbind.data.frame(id,res, date=x.dates, ances.date)
rownames(res) <- x.names
class(res) <- c("seqTrack","data.frame")
return(res)
} # end seqTrack.matrix
################
## plotSeqTrack
################
plotSeqTrack <- function(x, xy, use.arrows=TRUE, annot=TRUE, labels=NULL,
col=NULL, bg="grey", add=FALSE, quiet=FALSE, date.range=NULL,
jitter.arrows=0, plot=TRUE,...){
## CHECKS ##
if(!inherits(x,"seqTrack")) stop("x is not a seqTrack object")
if(ncol(xy) != 2) stop("xy does not have two columns")
if(nrow(xy) != nrow(x)) stop("x and xy have inconsistent dimensions")
## RECYCLE COL
if(!is.null(col)){
col <- rep(col,length=nrow(x))
} else {
col <- rep("black", nrow(x))
}
## DEFAULT LABELS
if(is.null(labels)){
if(!is.null(rownames(x))){
labels <- rownames(x)
} else {
labels <- 1:nrow(x)
}
}
## SUBSET DATA (REMOVE NAs) ##
isNA <- is.na(x[,2])
x <- x[!isNA,,drop=FALSE]
xy.all <- xy # used to retrieve all coordinates
xy <- xy[!isNA,,drop=FALSE]
if(!is.null(labels)){ # subset labels
labels <- labels[!isNA]
}
if(!is.null(col)){ # subset colors
col <- col[!isNA]
}
## FIND SEGMENTS COORDS ##
from <- unlist(x[,2])
to <- unlist(x[,1])
x.from <- xy.all[from,1]
y.from <- xy.all[from,2]
x.to <- xy.all[to,1]
y.to <- xy.all[to,2]
## HANDLE RANGE OF DATES ##
if(!is.null(date.range)){
if(is.character(date.range)){
msg <- paste("date.range is a character vector; " ,
"please convert it as dates using 'as.POSIXct'" ,
"\n(making sure dates are given as 'YYYY/MM/DD' or 'YYYY-MM-DD').", sep="")
stop(msg)
}
if(any(is.na(date.range))){
stop("NA in date.range")
}
dates <- x$date
toKeep <- (dates > min(date.range)) & (dates < max(date.range))
if(sum(toKeep)==0) {
if(!quiet) cat("\nNo item in the specified date range.\n")
return(NULL)
}
## SUBSETTING
x.from <- x.from[toKeep]
y.from <- y.from[toKeep]
x.to <- x.to[toKeep]
y.to <- y.to[toKeep]
col <- col[toKeep]
xy <- xy[toKeep,,drop=FALSE]
x <- x[toKeep,,drop=FALSE]
labels <- labels[toKeep]
}
## DO THE PLOTTING ##
if(plot){
obg <- par("bg")
on.exit(par(bg=obg))
if(!add){
par(bg=bg)
plot(xy, type="n", ...)
}
}
## PLOTTING ##
if(plot){
## ARROWS
if(use.arrows){
## handle segments/arrows with length 0 ##
nullLength <- (abs(x.from-x.to)<1e-10) & (abs(y.from-y.to)<1e-10)
## handle random noise around coordinates
if(jitter.arrows>0){
x.from[!nullLength] <- jitter(x.from[!nullLength], jitter.arrows)
x.to[!nullLength] <- jitter(x.to[!nullLength], jitter.arrows)
y.from[!nullLength] <- jitter(y.from[!nullLength], jitter.arrows)
y.to[!nullLength] <- jitter(y.to[!nullLength], jitter.arrows)
}
arrows(x.from[!nullLength], y.from[!nullLength],
x.to[!nullLength], y.to[!nullLength],
col=col[!nullLength], angle=15, ...)
} else{
## SEGMENTS
## handle random noise around coordinates
if(jitter.arrows>0){
x.from[!nullLength] <- jitter(x.from[!nullLength], jitter.arrows)
x.to[!nullLength] <- jitter(x.to[!nullLength], jitter.arrows)
y.from[!nullLength] <- jitter(y.from[!nullLength], jitter.arrows)
y.to[!nullLength] <- jitter(y.to[!nullLength], jitter.arrows)
}
segments(x.from, y.from, x.to, y.to, col=col,...)
}
## ANNOTATIONS
if(annot) {
text(xy,lab=labels, font=2)
}
## SUNFLOWERS / POINTS
if(any(nullLength)) {
sunflowerplot(x.from[nullLength], y.from[nullLength], seg.lwd=2, size=1/6,
col=col[nullLength], seg.col=col[nullLength], add=TRUE, ...)
points(x.from[nullLength], y.from[nullLength], col=col[nullLength], cex=2, pch=20, ...)
}
}
## RESULT ##
res <- data.frame(x.from, y.from, x.to, y.to, col=col)
return(invisible(res))
} # end plotSeqTrack
###########################
## get.likelihood.seqTrack
###########################
#' @export
#' @method get.likelihood seqTrack
get.likelihood.seqTrack <- function(x, mu, haplo.length,...){
if(any(na.omit(x$weight - round(x$weight)) > 1e-10)){
warning("Non-integer weights: number of mutations expected in x$weight.")
}
dates <- as.POSIXct(x$date)
anc.dates <- as.POSIXct(x$ances.date)
nb.days <- abs(as.integer(anc.dates-dates))
nb.mut <- x$weight
## res <- dbinom(nb.mut, size=haplo.length*nb.days, prob=mu)
res <- dpois(x=nb.mut, lambda=mu*haplo.length*nb.days)
return(res)
} # end get.likelihood.seqTrack
######################
## as.igraph.seqTrack
######################
as.igraph.seqTrack <- function(x, col.pal=redpal, ...){
## GET DAG ##
from.old <- x$ances
to.old <- x$id
isNotNA <- !is.na(from.old) & !is.na(to.old)
vnames <- sort(unique(c(from.old,to.old)))
from <- match(from.old,vnames)
to <- match(to.old,vnames)
dat <- data.frame(from,to,stringsAsFactors=FALSE)[isNotNA,,drop=FALSE]
out <- graph.data.frame(dat, directed=TRUE, vertices=data.frame(names=vnames))
## SET VARIOUS INFO ##
## WEIGHTS FOR EDGES
E(out)$weight <- x$weight[isNotNA]
## DATES FOR VERTICES (IN NB OF DAYS FROM EARLIEST DATE)
V(out)$dates <- difftime(x$date, min(x$date), units="days")
## SET EDGE LABELS ##
E(out)$label <- E(out)$weight
## SET EDGE COLORS
E(out)$color <- num2col(E(out)$weight, col.pal=col.pal, reverse=TRUE)
## SET LAYOUT ##
ypos <- V(out)$dates
ypos <- abs(ypos-max(ypos))
attr(out, "layout") <- layout.fruchterman.reingold(out, params=list(miny=ypos, maxy=ypos))
## RETURN OBJECT ##
return(out)
} # end as.igraph.seqTrack
#################
## plot.seqTrack
#################
#' @method plot seqTrack
#' @export
plot.seqTrack <- function(x, y=NULL, col.pal=redpal, ...){
## get graph ##
g <- as.igraph(x, col.pal=col.pal)
## make plot ##
plot(g, layout=attr(g,"layout"), ...)
## return graph invisibly ##
return(invisible(g))
} # end plot.seqTrack
################################################
################################################
######### OLD STUFF - NOT USED FOR NOW ######
################################################
################################################
##########################
## as("seqTrack", "graphNEL")
##########################
## if(require(graph)){
## setOldClass("seqTrack")
## setAs("seqTrack", "graphNEL", def=function(from){
## ## if(!require(ape)) stop("package ape is required")
## if(!require(graph)) stop("package graph is required")
## ori.labels <- rownames(from)
## from <- from[!is.na(from$ances),,drop=FALSE]
## ## CONVERT TO GRAPH
## res <- ftM2graphNEL(ft=cbind(ori.labels[from$ances], ori.labels[from$id]), W=from$weight, edgemode = "directed", V=ori.labels)
## return(res)
## })
## }
## #############
## ## .dTimeSeq
## #############
## ##
## ## mu0 and L are vectors, having one value per segment/chromosome
## ## mu0 is per nucleotide and per day
## .dTimeSeq <- function(mu, L, maxNbDays=100){
## ##mu <- mu/365 # mutation rates / site / day
## t <- 0:maxNbDays # in days added / substracted
## temp <- sapply((1-mu)^L, function(x) x^t )
## Pt <- apply(temp,1,prod)
## t <- c(-rev(t[-1]), t)
## Pt <- c(rev(Pt[-1]), Pt)
## return(list(t, Pt))
## }
## #############
## ## .rTimeSeq
## #############
## ##
## ## mu and L are vectors, having one value per segment/chromosome
## ##
## ## this returns nb days
## .rTimeSeq <- function(n, mu, L, maxNbDays=100){
## temp <- .dTimeSeq(mu, L, maxNbDays)
## res <- sample(temp[[1]], size=n, replace=TRUE, prob= temp[[2]]/sum(temp[[2]]))
## return(res)
## }
## #################
## ## .rUnifDate
## #################
## ##
## ## this returns random uniform dates in a given range
## ##
## .rUnifDate <- function(n, dateMin, dateMax, ...){
## rangeSize <- as.integer(difftime(dateMax,dateMin, units="days"))
## nbDays <- round(runif(n, min=0, max=rangeSize))
## res <- dateMin + nbDays*3600*24
## res <- as.POSIXct(round.POSIXt(res, units="days"))
## return(res)
## }
## #################
## ## .rNormTimeSeq
## #################
## ##
## ## this returns nb of days
## .rNormTimeSeq <- function(n, mean, sd, ...){
## res <- round(rnorm(n, mean=mean, sd=sd))
## return(res)
## }
## #################
## ## .rSampTimeSeq
## #################
## ##
## ## this returns nb of days
## .rSampTime <- function(n,...){
## res <- round(rnorm(n*2, -2))
## res <- res[res < 0 & res > -7][1:n]
## return(res)
## }
## ##################
## ## .pAbeforeBfast
## ##################
## ##
## ## faster version, same mu and length for both sequences
## ## already vectorised for dateA and dateB
## .pAbeforeBfast <- function(dateA, dateB, mu, L, maxNbDays=100){
## ## proba dist for both haplo
## temp <- .dTimeSeq(mu, L, maxNbDays)
## days <- temp[[1]]
## p <- temp[[2]]/sum(temp[[2]]) # scale to proba mass function
## ## days for A and B
## nbDays <- as.integer(round(difftime(dateB,dateA,units="days"))) # dateA - dateB, in days
## ## function for one comparison
## f1 <- function(Dt,max){ # does not work for Dt < 0 (have to reverse proba after)
## if(is.na(Dt)) return(NA)
## if(Dt>max) return(1)
## if(round(Dt)==0){
## temp <- sapply(1:(max-1), function(i) p[i]*sum(p[(i+1):max]))
## return(sum(temp))
## }
## term1 <- sum(p[1:Dt])
## idx <- seq(2,by=1,length=(max-Dt))
## temp <- sapply(idx, function(i) sum(p[i:max]))
## term2 <- sum( p[(Dt+1):max] * temp)
## return(term1+term2)
## }
## ## computations
## distribSize <- length(days)
## res <- sapply(nbDays, f1, max=distribSize)
## res[nbDays<0] <- 1-res[nbDays<0] # reverse proba for negative time diff
## return(res)
## } # end .pAbeforeBfast
## ##############
## ## .pAbeforeB
## ##############
## ##
## ## allows for different distributions for both haplo
## .pAbeforeB <- function(dateA, dateB, muA, muB, LA, LB, maxNbDays=100){
## ## proba dist for A
## tempA <- .dTimeSeq(muA, LA, maxNbDays)
## days <- tempA[[1]]
## pA <- tempA[[2]]/sum(tempA[[2]]) # scale to proba mass function
## ## proba dist for B
## tempB <- .dTimeSeq(muB, LB, maxNbDays)
## pB <- tempB[[2]]/sum(tempB[[2]]) # scale to proba mass function
## ## days for A and B
## nbDaysDiff <- as.integer(round(difftime(dateA,dateB,units="days"))) # dateA - dateB, in days
## daysA <- days
## daysB <- days - nbDaysDiff
## f1 <- function(i){ # proba A before B for one day
## idx <- daysB > daysA[i]
## return(pA[i] * sum(pB[idx]))
## }
## res <- sapply(1:length(days), f1) # proba for all days
## res <- sum(res) # sum
## return(res)
## }
## .pAbeforeB <- Vectorize(.pAbeforeB,
## vectorize.args=c("dateA","dateB", "muA", "muB", "LA", "LB")) ## end .pAbeforeB
## ##############
## ## seqTrackG - graph version of SeqTrack
## ##############
## ##
## ## - x is a matrix giving weights x[i,j] such that:
## ## 'i is ancestor j'; the algo looks for maximal weight branching
## ##
## ## - prox.mat is a directed proximity measure, so that prox.mat[i,j] is
## ## the 'proximity when going from i to j'
## ##
## seqTrackG.default <- function(x, x.names, x.dates, best=c("min","max"), force.temporal.order=TRUE,
## res.type=c("seqTrack", "graphNEL"), ...){
## ## CHECKS ##
## if(!require("graph")) stop("the graph package is not installed")
## if(!require("RBGL")) stop("the RBGL package is not installed")
## if(!exists("edmondsOptimumBranching")) {
## stop("edmondsOptimumBranching does not exist; \nmake sure to use the latest Bioconductor (not CRAN) version of RBGL")
## cat("\nWould you like to try and install latest version of RBGL (needs internet connection)\n y/n: ")
## ans <- tolower(as.character(readLines(con = getOption('adegenet.testcon'), n = 1)))
## if(ans=="y"){
## source("http://bioconductor.org/biocLite.R")
## biocLite("RBGL")
## }
## }
## best <- match.arg(best)
## res.type <- match.arg(res.type)
## if(length(x.names) != length(x.dates)){
## stop("inconsistent length for x.dates")
## }
## if(is.character(x.dates)){
## msg <- paste("x.dates is a character vector; " ,
## "please convert it as dates using 'as.POSIXct'" ,
## "\n(making sure dates are given as 'YYYY/MM/DD' or 'YYYY-MM-DD').", sep="")
## stop(msg)
## }
## x <- as.matrix(x)
## x.dates <- as.POSIXct(round.POSIXt(x.dates,units="days")) # round dates to the day
## if(length(x.names) != nrow(x)){
## stop("inconsistent dimension for x")
## }
## ## HANDLE BEST==MIN ##
## ## reverse x by a translation
## x.old <- x
## areZero <- x<1e-14
## x <- (max(x)+1) - x
## x[areZero] <- 0
## diag(x) <- 0
## ## BUILD THE GRAPH ##
## ## make prox matrix with temporal consistency
## if(force.temporal.order){
## x[outer(myDates, myDates, ">=")] <- 0
## }
## ## tweak to get around a bug in as(...,"graphNEL") - looses edge weights
## ## to replace with commented line once fixed in CRAN release
## ## myGraph <- as(x, "graphNEL")
## myGraph <- as(new("graphAM", x, edgemode="directed", values=list(weight=1)), "graphNEL")
## ## CALL EDMONDSOPTIMUMBRANCHING ##
## temp <- edmondsOptimumBranching(myGraph)
## ## SHAPE OUTPUT ##
## if(res.type=="seqTrack"){
## ## reorder output from edmondsOptimumBranching
## N <- length(x.names)
## myLev <- x.names
## ances <- as.integer(factor(temp$edgeList["from",], levels=myLev))
## desc <- as.integer(factor(temp$edgeList["to",], levels=myLev))
## newOrd <- order(desc)
## desc <- 1:N
## ances <- ances[newOrd]
## weights <- as.numeric(temp$weights)[newOrd]
## ## create the data.frame
## res <- data.frame(id=1:N)
## hasNoAnces <- difftime(myDates, min(myDates), units="secs") < 1 # 1 sec resolution for dates
## res$weight <- res$ances <- 1:N
## res$date <- x.dates
## ## fill in the d.f. with correct values
## res$ances[!hasNoAnces] <- ances
## res$ances[hasNoAnces] <- NA
## res$weight[!hasNoAnces] <- weights
## res$weight[hasNoAnces] <- NA
## res$ances.date <- x.dates[res$ances]
## res[is.na(res)] <- NA # have clean NAs
## row.names(res) <- x.names
## class(res) <- c("seqTrack","data.frame")
## ## handle best==min
## if(best=="min"){
## res$weight <- max(x.old)+1 - res$weight
## }
## }
## if(res.type=="graphNEL"){
## ## handle optim==min
## if(best=="min"){
## temp$weights <- max(x.old)+1 - temp$weights
## }
## res <- ftM2graphNEL(t(temp$edgeList), W=temp$weights, edgemode="directed")
## }
## ## RETURN RESULT ##
## return(res)
## } # end seqTrackG
#####################
## optimize.seqTrack
#####################
##
## TODO:
## 1) Change the output to retain xxx simulations | ok. -- done.
## 2) VECTORIZE mu and seq.length, recycle if needed with a warning
## 3) uncomment, adapt, and test code for missing data
##
## optimize.seqTrack.default <- function(x, x.names, x.dates, typed.chr=NULL, mu=NULL, seq.length=NULL,
## thres=0.2, best=c("min","max"), prox.mat=NULL, nstep=10, step.size=1e3,
## rDate=.rTimeSeq, arg.rDate=NULL, rMissDate=.rUnifDate, ...){
## ## CHECKS ##
## best <- match.arg(best)
## if(best=="min"){
## which.best <- which.min
## } else {
## which.best <- which.max
## }
## if(length(x.names) != length(x.dates)){
## stop("inconsistent length for x.dates")
## }
## if(is.character(x.dates)){
## msg <- paste("x.dates is a character vector; " ,
## "please convert it as dates using 'as.POSIXct'" ,
## "\n(making sure dates are given as 'YYYY/MM/DD' or 'YYYY-MM-DD').", sep="")
## stop(msg)
## }
## isMissDate <- is.na(x.dates)
## if(!identical(rDate, .rTimeSeq)){
## if(is.null(arg.rDate)){
## warning("Specific time distribution specified without arguments.")
## arg.rDate <- list(n=step.size)
## } else {
## if(!is.list(arg.rDate)) stop("If provided, arg.rDate must be a list.")
## if(!is.null(arg.rDate$n)) {
## warning("arg.rDate$n is provided, but will be replaced by step.size.")
## }
## arg.rDate$n <- step.size
## }
## }
## N <- length(x.names)
## id <- 1:N
## ## if(length(mu) < N) { # recycle mu
## ## mu <- rep(mu, length=N)
## ## }
## ## if(length(seq.length) < N) {# recycle seq.length
## ## seq.length <- rep(seq.length, length=N)
## ## }
## ## handle typed.chr, mu, seq.length
## if(identical(rDate, .rTimeSeq)){
## if(is.null(typed.chr)|is.null(mu)|is.null(seq.length)){
## stop("typed.chr, mu, and seq.length must be provided if rDate is .rTimeSeq")
## }
## if(!is.list(typed.chr)) {
## stop("typed.chr must be a list")
## }
## if(length(typed.chr)!=N) {
## stop("typed.chr has an inconsistent length")
## }
## if(is.null(names(mu))) stop("mu has no names")
## if(is.null(names(seq.length))) stop("seq.length has no names")
## if(any(mu > 1)) stop("mu has values > 1")
## if(any(mu < 0)) stop("mu has negative values")
## if(!identical(names(mu) , names(seq.length))) stop("Names of mu and seq.length differ.")
## if(any(!unique(unlist(typed.chr)) %in% names(mu))) {
## stop("Some chromosomes indicated in typed.chr are not in mu.")
## }
## list.mu <- lapply(typed.chr, function(e) mu[e])
## list.seq.length <- lapply(typed.chr, function(e) seq.length[e])
## }
## x.dates <- as.POSIXct(round.POSIXt(x.dates,units="days")) # round dates to the day
## x <- as.matrix(x)
## if(!is.null(prox.mat) && !identical(dim(prox.mat),dim(x))) {
## stop("prox.mat is provided but its dimensions are inconsistent with that of x")
## }
## ## rename dimensions using id
## colnames(x) <- rownames(x) <- id
## if(length(x.names) != nrow(x)){
## stop("inconsistent dimension for x")
## }
## ## SET THRESHOLD IF NEEDED ## ## NO LONGER USED
## ## if(is.null(thres)){
## ## thres <- sum(seqTrack(x.names=x.names, x.dates=x.dates, W=W,
## ## best=best, prox.mat=prox.mat, ...)$weight, na.rm=TRUE)
## ## }
## ## AUXILIARY FUNCTIONS ##
## ## to compare results -> returns a list of length two: logical, and the value of the res
## val.res <- function(res){
## return(sum(res$weight, na.rm=TRUE))
## }
## ## DO THE OPTIMISATION ##
## RANGE.DATES <- as.integer(round(diff(range(x.dates, na.rm=TRUE)))) # time window of the sample, in days
## NB.DATES.TO.SIM <- sum(!isMissDate)
## ## for loop is not to slow for < 1e6 rep
## ## and allows not to handle huge objects
## ## (which would grow exponentially)
## ## res.best <- res.ini # initialization
## ## DEFINE OUTPUTS ##
## ances <- integer(0)
## date <- character(0)
## ances.date <- character(0)
## valRes <- numeric(0)
## ## DEFAULT CASE: NO MISSING DATES
## if(!any(isMissDate)){
## ## dates initialisation, taken from initial prior
## ## If dates distrib is .rTimeSeq
## if(identical(rDate, .rTimeSeq)){
## newDates <- sapply(1:N, function(i)
## rDate(n=step.size, mu=list.mu[[i]], L=list.seq.length[[i]],
## maxNbDays=RANGE.DATES))
## } else { ## Else, any other distrib with free arguements
## newDates <- sapply(1:N, function(i) do.call(rDate, arg.rDate))
## }
## newDates <- t(newDates)*24*3600 + x.dates
## ## >> one step of 'step.size' simulations, all with same prior << ##
## for(i in 1:nstep){
## ## >> each step contains 'step.size' iterations << ##
## for(j in 1:step.size){
## myDates <- as.POSIXct(newDates[,j])
## res.new <- seqTrack(x, x.names=x.names, x.dates=myDates,
## best=best, prox.mat=prox.mat, ...)
## ##ances <- cbind(ances, res.new$ances) # not needed now
## date <- cbind(date, as.character(res.new$date))
## ##ances.date <- cbind(ances.date, as.character(res.new$ances.date)) # not needed now
## valRes <- c(valRes, val.res(res.new))
## ##}
## } # end for j
## ## retain a given % (thres) of the dates ##
## toKeep <- valRes <= quantile(valRes, thres) ## NOT WORKING FOR optim==max !!!
## valRes <- valRes[toKeep]
## date <- date[,toKeep,drop=FALSE] # retained posterior
## ## DEBUGING ##
## ## cat("\ntoKeep:\n")
## ## print(toKeep)
## ## cat("\nhead date (posterior):\n")
## ## print(head(date))
## ## END DEBUGING ##
## newDates <- apply(date, 1, function(vec)
## sample(vec, size=step.size, replace=TRUE)) # new prior
## newDates <- t(newDates)
## ## stop if all dates are fixed
## if(all(apply(newDates, 1, function(r) length(unique(r))==1))){
## cat("\nConvergence reached at step",i,"\n")
## break # stop the algorithm
## }
## ## re-initialize posterior distributions
## if(i<nstep){
## ## ances <- integer(0) # not needed now
## date <- character(0)
## ## ances.date <- character(0) # not needed now
## valRes <- numeric(0)
## } # end if
## } # end for i
## ## ## dates: new prior taken from obtained posterior
## ## if(length(valRes)==0) { # if no simul are retained
## ## warning(paste("No simulation was retained at the given threshold at step",i))
## ## } else {
## ## if(optim=="min"){ # define weights for further samplings
## ## w <- max(valRes,na.rm=TRUE) - valRes
## ## w <- w/sum(w)
## ## } else {
## ## w <- valRes
## ## w <- w/sum(w)
## ## }
## ## newDates <- apply(date, 1, function(vec) # used a weighted sampling
## ## sample(vec, size=step.size, replace=TRUE, prob=w))
## } # end if(!any(isMissDate))
## ## ## OTHER CASE: HANDLE MISSING DATES
## ## if(any(isMissDate)){
## ## ## Handle distribution and its parameters ##
## ## argList <- list(...)
## ## if(is.null(argList$dateMin) & identical(rMissDate, .rUnifDate)){ # earliest date
## ## argList$dateMin <- min(x.dates,na.rm=TRUE)
## ## } else {
## ## argList$dateMin[is.na(argList$dateMin)] <- min(x.dates,na.rm=TRUE)
## ## }
## ## if(is.null(argList$dateMax) & identical(rMissDate, .rUnifDate)){ # latest date
## ## argList$dateMax <- max(x.dates,na.rm=TRUE)
## ## } else {
## ## argList$dateMax[is.na(argList$dateMax)] <- max(x.dates,na.rm=TRUE)
## ## }
## ## argList$n <- sum(isMissDate)
## ## ## Make simulations ##
## ## for(i in 1:nstep){
## ## myDates <- x.dates
## ## ## distribution for available dates
## ## myDates[!isMissDate] <- myDates[!isMissDate] +
## ## rDate(n=NB.DATES.TO.SIM, mu=mu, L=seq.length, maxNbDays=2*RANGE.DATES)
## ## ## distribution for missing dates
## ## myDates[isMissDate] <- do.call(rMissDate, argList)
## ## res.new <- seqTrack(x.names=x.names, x.dates=myDates, W=W, optim=optim, prox.mat=prox.mat, ...)
## ## valRes[i] <- sum(res.new$weight,na.rm=TRUE)
## ## if(use.new.res(res.best, res.new)){
## ## res.best <- res.new
## ## }
## ## }
## ## }
## ## RESULT ##
## ## reconstruct the result with new dates
## res <- lapply(1:ncol(date), function(i)
## seqTrack(x=x, x.names=x.names, x.dates=as.POSIXct(date[,i]),
## best=best, prox.mat=prox.mat, ...))
## ances <- data.frame(lapply(res, function(e) e$ances))
## ances <- matrix(as.integer(unlist(ances)), nrow=nrow(ances))
## ances.date <- data.frame(lapply(res, function(e) as.character(e$ances.date)))
## ances.date <- matrix(as.character(unlist(ances.date)), nrow=nrow(ances.date))
## res <- list(ances=ances, date=date, ances.date=ances.date, valsim=valRes)
## return(res)
## } # optimize.seqTrack
## #################
## ## get.result.by
## #################
## .get.result.by <- function(x, ...){
## dat <- list(...)
## if(length(dat)==0) return(x)
## ## DEFINE NEW VALUES ##
## convertElem <- function(e){
## if(class(e)=="DNAbin") {
## e <- as.matrix(e)
## }
## ori.dim <- dim(e)
## e <- as.character(e)
## dim(e) <- ori.dim
## return(e)
## }
## dat <- lapply(dat,convertElem)
## dat <- as.matrix(data.frame(dat))
## newval <- apply(dat, 1, function(vec) paste(vec, collapse=""))
## newval <- unclass(factor(newval))
## newlev <- levels(newval)
## ## if x is a single output of seqTrack
## if(is.vector(x$ances)){
## newId <- newval # new values
## newAnces <- newval[x$ances] # new values
## ## make output
## res <- x
## res$id <- newId
## res$ances <- newAnces
## attr(res$ances, "levels") <- newlev
## }
## ## if x is an optimize.seqTrack output
## if(is.matrix(x$ances)){
## res <- x
## ori.ncol <- ncol(res$ances)
## res$ances <- matrix(newval[res$ances], ncol=ori.ncol)
## attr(res$ances, "levels") <- newlev
## }
## ## method for haploGen
## if(inherits(x,"haploGen")){
## res <- x
## ances.id <- match(x$ances, labels(x))
## }
## return(res)
## } # end get.result.by
## #################
## ## get.consensus
## #################
## .get.consensus <- function(orires, listres, mode=c("majority","best")){
## ## handle mode
## mode <- match.arg(mode)
## res <- orires
## if(mode=="majority"){
## nbDraws <- 0
## ## tables of occurences of ancestors
## temp <- apply(listres$ances, 1, table)
## ## compute compromise
## if(!is.list(temp)){
## newances <- temp
## ances.support <- rep(1,length(temp))
## } else {
## f1 <- function(tab){
## if(length(tab)==0) return(NA)
## res <- names(tab)[tab==max(tab)]
## ## if(length(res)==1) return(res)
## ## return(NA)
## if(length(res)>1) {
## nbDraws <- nbDraws+1
## }
## return(res[1])
## }
## newances <- sapply(temp, f1)
## ances.support <- sapply(temp, function(e) max(e, na.rm=TRUE)/sum(e, na.rm=TRUE))
## ances.support[is.na(newances)] <- NA
## }
## ## form the output
## olev <- levels(orires$ances)
## res$ances <- newances
## levels(res$ances) <- olev
## res$support <- ances.support
## res$weight <- rep(1, length(res$date))
## if(is.numeric(listres$ances)){
## res$ances <- as.numeric(res$ances)
## }
## cat("\nThere were\n",nbDraws, "draws.\n")
## } # end majority
## if(mode=="best"){
## toKeep <- which.min(listres$valsim)
## nbDraws <- sum(listres$valsim < (min(listres$valsim) + 1e-10 )) -1
## cat("\nThere were\n",nbDraws, "draws.\n")
## res$ances <- listres$ances[,toKeep]
## res$inf.date <- listres$date[,toKeep]
## res$ances.date <- listres$ances.date[,toKeep]
## res$weight <- rep(-1, length(res$date))
## }
## return(res)
## } # end get.consensus
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.