Nothing
############
## haploGen
############
##
## N: number of sequences to simulate
## mu: mutation rate per nucleotid per generation
## Tmax: periode of time to simulate
## mean.gen.time, sd.gen.time: average time for transmission and its standard deviation (normal dist)
## mean.repro, sd.repro: average number of transmissions and its standard deviation (normal dist)
##
haploGen <- function(seq.length=1e4, mu.transi=1e-4, mu.transv=mu.transi/2, t.max=20,
gen.time=function(){1+rpois(1,0.5)},
repro=function(){rpois(1,1.5)}, max.nb.haplo=200,
geo.sim=FALSE, grid.size=10, lambda.xy=0.5,
mat.connect=NULL,
ini.n=1, ini.xy=NULL){
## CHECKS ##
## HANDLE ARGUMENTS ##
## if numeric value, make it a function
if(is.numeric(gen.time)){
gen.time.val <- gen.time[1]
gen.time <- function(){return(gen.time.val)}
}
## if numeric value, make it a function
if(is.numeric(repro)){
repro.val <- repro[1]
repro <- function(){return(repro.val)}
}
## GENERAL VARIABLES ##
NUCL <- as.DNAbin(c("a","t","c","g"))
TRANSISET <- list('a'=as.DNAbin('g'), 'g'=as.DNAbin('a'), 'c'=as.DNAbin('t'), 't'=as.DNAbin('c'))
TRANSVSET <- list('a'=as.DNAbin(c('c','t')), 'g'=as.DNAbin(c('c','t')), 'c'=as.DNAbin(c('a','g')), 't'=as.DNAbin(c('a','g')))
res <- list(seq = as.matrix(as.DNAbin(matrix(character(0), nrow = 0, ncol = 0))),
dates = integer(),
ances = character())
toExpand <- logical()
myGrid <- matrix(1:grid.size^2, ncol=grid.size, nrow=grid.size)
## AUXILIARY FUNCTIONS ##
## generate sequence from scratch
seq.gen <- function(){
##res <- list(sample(NUCL, size=seq.length, replace=TRUE)) # DNAbin are no longer lists by default
res <- sample(NUCL, size=seq.length, replace=TRUE)
class(res) <- "DNAbin"
return(res)
}
## create substitutions for defined SNPs - no longer used
substi <- function(snp){
res <- sapply(1:length(snp), function(i) sample(setdiff(NUCL,snp[i]),1)) # ! sapply does not work on DNAbin vectors directly
class(res) <- "DNAbin"
return(res)
}
## create transitions for defined SNPs
transi <- function(snp){
res <- unlist(TRANSISET[as.character(snp)])
class(res) <- "DNAbin"
return(res)
}
## create transversions for defined SNPs
transv <- function(snp){
res <- sapply(TRANSVSET[as.character(snp)],sample,1)
class(res) <- "DNAbin"
return(res)
}
## duplicate a sequence (including possible mutations)
seq.dupli <- function(seq, T){ # T is the number of time units between ancestor and decendent
## transitions ##
n.transi <- rbinom(n=1, size=seq.length*T, prob=mu.transi) # total number of transitions
if(n.transi>0) {
idx <- sample(1:seq.length, size=n.transi, replace=FALSE)
seq[idx] <- transi(seq[idx])
}
## transversions ##
n.transv <- rbinom(n=1, size=seq.length*T, prob=mu.transv) # total number of transitions
if(n.transv>0) {
idx <- sample(1:seq.length, size=n.transv, replace=FALSE)
seq[idx] <- transv(seq[idx])
}
return(seq)
}
## what is the name of the new sequences?
seqname.gen <- function(nb.new.seq){
res <- max(as.integer(rownames(res$seq)), 0) + 1:nb.new.seq
return(as.character(res))
}
## how many days before duplication occurs ?
time.dupli <- function(){
##res <- round(rnorm(1, mean=mean.gen.time, sd=sd.gen.time))
res <- round(gen.time()) # force integers
res[res<0] <- 0
return(res)
}
## when duplication occurs?
date.dupli <- function(curDate){
res <- curDate + time.dupli()
return(res)
}
## how many duplication/transmission occur?
nb.desc <- function(){
##res <- round(rnorm(1, mean=mean.repro, sd=sd.repro))
res <- repro()
res[res<0] <- 0
return(res)
}
## where does an haplotype emerges in the first place?
xy.gen <- function(){
return(sample(1:grid.size, size=2, replace=TRUE))
}
## where does a transmission occur (destination)?
if(is.null(mat.connect)){ # use universal lambda param
xy.dupli <- function(cur.xy, nbLoc){
mvt <- rpois(2*nbLoc, lambda.xy) * sample(c(-1,1), size=2*nbLoc, replace=TRUE)
res <- t(matrix(mvt, nrow=2) + as.vector(cur.xy))
res[res < 1] <- 1
res[res > grid.size] <- grid.size
return(res)
}
} else { # use location-dependent proba of dispersal between locations
if(any(mat.connect < 0)) stop("Negative values in mat.connect (probabilities expected!)")
mat.connect <- prop.table(mat.connect,1)
xy.dupli <- function(cur.xy, nbLoc){
idxAncesLoc <- myGrid[cur.xy[1], cur.xy[2]]
newLoc <- sample(1:grid.size^2, size=nbLoc, prob=mat.connect[idxAncesLoc,], replace=TRUE) # get new locations
res <- cbind(row(myGrid)[newLoc] , col(myGrid)[newLoc]) # get coords of new locations
return(res)
}
}
## check result size and resize it if needed
resize.result <- function(){
curSize <- length(res$dates)
if(curSize <= max.nb.haplo) return(NULL)
toKeep <- rep(FALSE, curSize)
toKeep[sample(1:curSize, size=max.nb.haplo, replace=FALSE)] <- TRUE
removed.strains <- rownames(res$seq)[!toKeep]
res$seq <<- res$seq[toKeep,]
res$dates <<- res$dates[toKeep]
res$ances <<- res$ances[toKeep]
toExpand <<- toExpand[toKeep]
temp <- as.character(res$ances) %in% removed.strains
if(any(temp)) {
res$ances[temp] <<- NA
}
return(NULL)
}
## check result size and resize it if needed - spatial version
resize.result.xy <- function(){
curSize <- length(res$dates)
if(curSize <= max.nb.haplo) return(NULL)
toKeep <- rep(FALSE, curSize)
toKeep[sample(1:curSize, size=max.nb.haplo, replace=FALSE)] <- TRUE
removed.strains <- rownames(res$seq)[!toKeep]
res$seq <<- res$seq[toKeep,]
res$dates <<- res$dates[toKeep]
res$ances <<- res$ances[toKeep]
res$xy <<- res$xy[toKeep,,drop=FALSE]
toExpand <<- toExpand[toKeep]
temp <- as.character(res$ances) %in% removed.strains
if(any(temp)) {
res$ances[temp] <<- NA
}
return(NULL)
}
## MAIN SUB-FUNCTION: EXPANDING FROM ONE SEQUENCE - NON SPATIAL ##
expand.one.strain <- function(seq, date, idx){
toExpand[idx] <<- FALSE # this one is no longer to expand
nbDes <- nb.desc()
if(nbDes==0) return(NULL) # stop if no descendant
newDates <- sapply(1:nbDes, function(i) date.dupli(date)) # find dates for descendants
newDates <- newDates[newDates <= t.max] # don't store future sequences
nbDes <- length(newDates)
if(nbDes==0) return(NULL) # stop if no suitable date
newSeq <- lapply(1:nbDes, function(i) seq.dupli(seq, newDates[i]-date)) # generate new sequences
class(newSeq) <- "DNAbin" # lists of DNAbin vectors must also have class "DNAbin"
newSeq <- as.matrix(newSeq) # list DNAbin -> matrix DNAbin with nbDes rows
rownames(newSeq) <- seqname.gen(nbDes) # find new labels for these new sequences
res$seq <<- rbind(res$seq, newSeq) # append to general output
res$dates <<- c(res$dates, newDates) # append to general output
res$ances <<- c(res$ances, rep(rownames(res$seq)[idx], nbDes)) # append to general output
toExpand <<- c(toExpand, rep(TRUE, nbDes))
return(NULL)
}
## 2nd MAIN SUB-FUNCTION: EXPANDING FROM ONE SEQUENCE - SPATIAL ##
expand.one.strain.xy <- function(seq, date, idx, cur.xy){
toExpand[idx] <<- FALSE # this one is no longer to expand
nbDes <- nb.desc()
if(nbDes==0) return(NULL) # stop if no descendant
newDates <- sapply(1:nbDes, function(i) date.dupli(date)) # find dates for descendants
newDates <- newDates[newDates <= t.max] # don't store future sequences
nbDes <- length(newDates)
if(nbDes==0) return(NULL) # stop if no suitable date
newSeq <- lapply(1:nbDes, function(i) seq.dupli(seq, newDates[i]-date)) # generate new sequences
class(newSeq) <- "DNAbin" # lists of DNAbin vectors must also have class "DNAbin"
newSeq <- as.matrix(newSeq) # list DNAbin -> matrix DNAbin with nbDes rows
rownames(newSeq) <- seqname.gen(nbDes) # find new labels for these new sequences
res$seq <<- rbind(res$seq, newSeq) # append to general output
res$dates <<- c(res$dates, newDates) # append to general output
res$ances <<- c(res$ances, rep(rownames(res$seq)[idx], nbDes)) # append to general output
res$xy <<- rbind(res$xy, xy.dupli(cur.xy, nbDes))
toExpand <<- c(toExpand, rep(TRUE, nbDes))
return(NULL)
}
## PERFORM SIMULATIONS - NON SPATIAL CASE ##
if(!geo.sim){
## initialization
res$seq <- matrix(rep(seq.gen(), ini.n), byrow=TRUE, nrow=ini.n)
class(res$seq) <- "DNAbin"
rownames(res$seq) <- 1:ini.n
res$dates[1:ini.n] <- rep(0,ini.n)
res$ances[1:ini.n] <- rep(NA,ini.n)
toExpand <- rep(TRUE,ini.n)
## simulations: isn't simplicity beautiful?
while(any(toExpand)){
idx <- min(which(toExpand))
expand.one.strain(res$seq[idx,], res$dates[idx], idx)
resize.result()
}
## SHAPE AND RETURN OUTPUT ##
res$id <- as.character(1:length(res$ances))
res$ances <- as.character(res$ances)
names(res$dates) <- rownames(res$seq)
res$call <- match.call()
class(res) <- "haploGen"
return(res)
} # END NON-SPATIAL SIMULATIONS
## PERFORM SIMULATIONS - SPATIAL CASE ##
if(geo.sim){
## some checks
if(!is.null(mat.connect)) {
if(nrow(mat.connect) != ncol(mat.connect)) stop("mat.connect is not a square matrix")
if(nrow(mat.connect) != grid.size^2) stop("dimension of mat.connect does not match grid size")
}
## initialization
res$seq <- matrix(rep(seq.gen(), ini.n), byrow=TRUE, nrow=ini.n)
class(res$seq) <- "DNAbin"
rownames(res$seq) <- 1:ini.n
res$dates[1:ini.n] <- rep(0,ini.n)
res$ances[1:ini.n] <- rep(NA,ini.n)
toExpand <- rep(TRUE,ini.n)
if(is.null(ini.xy)){
locStart <- xy.gen()
} else{
locStart <- as.vector(ini.xy)[1:2]
}
res$xy <- matrix(rep(locStart, ini.n), byrow=TRUE, nrow=ini.n)
colnames(res$xy) <- c("x","y")
##cat("nb.strains","iteration.time",file="haploGenTime.out") # for debugging
## simulations: isn't simplicity beautiful?
while(any(toExpand)){
##time.previous <- Sys.time() # FOR DEBUGGING
idx <- min(which(toExpand))
expand.one.strain.xy(res$seq[idx,], res$dates[idx], idx, res$xy[idx,])
resize.result.xy()
## VERBOSE OUTPUT FOR DEBUGGING ##
## cat("\nNb strains:",length(res$ances),"/",max.nb.haplo)
## cat("\nLatest date:", max(res$dates),"/",t.max)
## cat("\nRemaining strains to duplicate", sum(toExpand))
## cat("\n",append=TRUE,file="haploGenTime.out")
## iter.time <- as.numeric(difftime(Sys.time(),time.previous,unit="sec"))
## time.previous <- Sys.time()
## cat(c(length(res$ances), iter.time),append=
##TRUE,file="haploGenTime.out")
## END DEBUGGING VERBOSE ##
}
## VERBOSE OUTPUT FOR DEBUGGING ##
##cat("\nSimulation time stored in haploGenTime.out\n")
## SHAPE AND RETURN OUTPUT ##
res$id <- as.character(1:length(res$ances))
res$ances <- as.character(res$ances)
names(res$dates) <- rownames(res$seq)
class(res) <- "haploGen"
res$call <- match.call()
return(res)
} # end SPATIAL SIMULATIONS
} # end haploGen
##################
## print.haploGen
##################
#' @method print haploGen
#' @export
print.haploGen <- function(x, ...){
cat("\t\n========================")
cat("\t\n= simulated haplotypes =")
cat("\t\n= (haploGen object) =")
cat("\t\n========================\n")
cat("\nSize :", length(x$ances),"haplotypes")
cat("\nHaplotype length :", ncol(x$seq),"nucleotids")
cat("\nProportion of NA ancestors :", signif(mean(is.na(x$ances)),5))
cat("\nNumber of known ancestors :", sum(!is.na(x$ances)))
nbAncInSamp <- sum(x$ances %in% labels(x))
cat("\nNumber of ancestors within the sample :", nbAncInSamp)
cat("\nDate range :", min(x$dates,na.rm=TRUE),"-",max(x$dates,na.rm=TRUE))
##nUniqSeq <- length(unique(apply(as.character(x$seq),1,paste,collapse="")))
##cat("\nNumber of unique haplotypes :", nUniqSeq)
cat("\n\n= Content =")
for(i in 1:length(x)){
cat("\n")
cat(paste("$", names(x)[i], sep=""),"\n")
if(names(x)[i] %in% c("seq","call")) {
print(x[[i]])
} else if(names(x)[i]=="xy"){
print(head(x[[i]]))
if(nrow(x[[i]]>6)) cat(" ...\n")
} else cat(head(x[[i]],6), ifelse(length(x[[i]])>6,"...",""),"\n")
}
return(NULL)
} # end print.haploGen
##############
## [.haploGen
##############
#' @export
"[.haploGen" <- function(x,i,j,drop=FALSE){
res <- x
res$seq <- res$seq[i,,drop=FALSE]
res$id <- res$id[i]
res$ances <- res$ances[i]
res$ances[!res$ances %in% res$id] <- NA
res$dates <- res$dates[i]
if(!is.null(res$xy)) res$xy <- res$xy[i,,drop=FALSE]
return(res)
}
##################
## labels.haploGen
##################
#' @method labels haploGen
#' @export
labels.haploGen <- function(object, ...){
return(object$id)
}
#######################
## as.POSIXct.haploGen
#######################
#' @method as.POSIXct haploGen
#' @export
as.POSIXct.haploGen <- function(x, tz="", origin=as.POSIXct("2000/01/01"), ...){
res <- as.POSIXct(x$dates*24*3600, origin=origin)
return(res)
}
#####################
## seqTrack.haploGen
#####################
#' @method seqTrack haploGen
#' @export
seqTrack.haploGen <- function(x, best=c("min","max"), prox.mat=NULL, ...){
myX <- dist.dna(x$seq, model="raw")
x.names <- labels(x)
x.dates <- as.POSIXct(x)
seq.length <- ncol(x$seq)
myX <- myX * seq.length
myX <- as.matrix(myX)
prevCall <- as.list(x$call)
if(is.null(prevCall$mu)){
mu0 <- 0.0001
} else {
mu0 <- eval(prevCall$mu)
}
res <- seqTrack(myX, x.names=x.names, x.dates=x.dates, best=best, prox.mat=prox.mat,...)
return(res)
}
########################
## as.seqTrack.haploGen
########################
as.seqTrack.haploGen <- function(x){
## x.ori <- x
## x <- na.omit(x)
toSetToNA <- x$dates==min(x$dates)
res <- list()
res$id <- labels(x)
res <- as.data.frame(res)
res$ances <- x$ances
res$ances[toSetToNA] <- NA
res$weight <- 1 # ??? have to recompute that...
res$weight[toSetToNA] <- NA
res$date <- as.POSIXct(x)[labels(x)]
res$ances.date <- as.POSIXct(x)[x$ances]
## set results as indices rather than labels
res$ances <- match(res$ances, res$id)
res$id <- 1:length(res$id)
## SET CLASS
class(res) <- c("seqTrack", "data.frame")
return(res)
}
################
## plotHaploGen
################
plotHaploGen <- function(x, annot=FALSE, date.range=NULL, col=NULL, bg="grey", add=FALSE, ...){
## SOME CHECKS ##
if(!inherits(x, "haploGen")) stop("x is not a haploGen object")
if(is.null(x$xy)) stop("x does not contain xy coordinates - try converting to graphNEL for plotting")
## ## CONVERSION TO A SEQTRACK-LIKE OBJECT ##
xy <- x$xy
res <- as.seqTrack.haploGen(x)
## res <- list()
## res$id <- labels(x)
## res <- as.data.frame(res)
## res$ances <- x$ances
## res$ances[toSetToNA] <- NA
## res$weight <- 1 # ??? have to recompute that...
## res$weight[toSetToNA] <- NA
## res$date <- as.POSIXct(x.ori)[labels(x)]
## res$ances.date <- as.POSIXct(x.ori)[x$ances]
## ## set results as indices rather than labels
## res$ances <- match(res$ances, res$id)
## res$id <- 1:length(res$id)
## CALL TO PLOTSEQTRACK ##
plotSeqTrack(x=res, xy=xy, annot=annot, date.range=date.range,
col=col, bg=bg, add=add, ...)
return(invisible(res))
} # end plotHaploGen
###################
## sample.haploGen
###################
#' @method sample haploGen
#' @export
sample.haploGen <- function(x, n){
##sample.haploGen <- function(x, n, rDate=.rTimeSeq, arg.rDate=NULL){
## EXTRACT THE SAMPLE ##
res <- x[sample(1:nrow(x$seq), n, replace=FALSE)]
## RETRIEVE SOME PARAMETERS FROM HAPLOSIM CALL
prevCall <- as.list(x$call)
if(!is.null(prevCall$mu)){
mu0 <- eval(prevCall$mu)
} else {
mu0 <- 1e-04
}
if(!is.null(prevCall$seq.length)){
L <- eval(prevCall$seq.length)
} else {
L <- 1000
}
## truedates <- res$dates
## daterange <- diff(range(res$dates,na.rm=TRUE))
## if(identical(rDate,.rTimeSeq)){
## sampdates <- .rTimeSeq(n=length(truedates), mu=mu0, L=L, maxNbDays=daterange/2)
## } else{
## arg.rDate$n <- n
## sampdates <- do.call(rDate, arg.rDate)
## }
## sampdates <- truedates + abs(sampdates)
return(res)
} # end sample.haploGen
######################
## as.igraph.haploGen
######################
#' @method as.igraph haploGen
#' @export
as.igraph.haploGen <- function(x, col.pal=redpal, ...){
## GET DAG ##
from <- x$ances
to <- x$id
isNotNA <- !is.na(from) & !is.na(to)
dat <- data.frame(from,to,stringsAsFactors=FALSE)[isNotNA,,drop=FALSE]
vnames <- as.character(unique(unlist(dat)))
out <- graph.data.frame(dat, directed=TRUE, vertices=data.frame(names=vnames, dates=x$dates[vnames]))
## SET WEIGHTS ##
D <- as.matrix(dist.dna(x$seq,model="raw")*ncol(x$seq))
temp <- mapply(function(i,j) return(D[i,j]), as.integer(from), as.integer(to))
E(out)$weight <- temp[isNotNA]
## DATES FOR VERTICES
V(out)$dates <- x$date
## SET EDGE LABELS ##
E(out)$label <- E(out)$weight
## SET EDGE COLORS
E(out)$color <- num2col(E(out)$weight, col.pal=col.pal, reverse=TRUE)
## SET LAYOUT ##
ypos <- V(out)$dates
ypos <- abs(ypos-max(ypos))
attr(out, "layout") <- layout.fruchterman.reingold(out, params=list(miny=ypos, maxy=ypos))
return(out)
} # end as.igraph.haploGen
#################
## plot.haploGen
#################
#' @method plot haploGen
#' @export
plot.haploGen <- function(x, y=NULL, col.pal=redpal, ...){
## get graph ##
g <- as.igraph(x, col.pal=col.pal)
## make plot ##
plot(g, layout=attr(g,"layout"), ...)
## return graph invisibly ##
return(invisible(g))
} # end plot.haploGen
##########################
## as("haploGen", "graphNEL")
##########################
## if(require(graph)){
## setOldClass("haploGen")
## setAs("haploGen", "graphNEL", def=function(from){
## if(!require(ape)) stop("package ape is required")
## if(!require(graph)) stop("package graph is required")
## N <- length(from$ances)
## areNA <- is.na(from$ances)
## ## EXTRACT WEIGHTS (nb of mutations)
## M <- as.matrix(dist.dna(from$seq, model="raw")*ncol(from$seq))
## rownames(M) <- colnames(M) <- from$id
## w <- mapply(function(i,j) {M[i, j]}, i=from$ances[!areNA], j=from$id[!areNA])
## ## CONVERT TO GRAPH
## res <- ftM2graphNEL(ft=cbind(from$ances[!areNA], from$id[!areNA]), W=w, edgemode = "directed", V=from$id)
## return(res)
## })
## }
## #####################
## ## seqTrackG.haploGen
## #####################
## seqTrackG.haploGen <- function(x, optim=c("min","max"), ...){
## myX <- dist.dna(x$seq, model="raw")
## x.names <- labels(x)
## x.dates <- as.POSIXct(x)
## seq.length <- ncol(x$seq)
## myX <- myX * seq.length
## prevCall <- as.list(x$call)
## if(is.null(prevCall$mu)){
## mu0 <- 0.0001
## } else {
## mu0 <- eval(prevCall$mu)
## }
## res <- seqTrackG(myX, x.names=x.names, x.dates=x.dates, best=optim,...)
## return(res)
## }
##############################
## optimize.seqTrack.haploGen
##############################
## optimize.seqTrack.haploGen <- function(x, thres=0.2, optim=c("min","max"),
## typed.chr=NULL, mu0=NULL, chr.length=NULL,
## prox.mat=NULL, nstep=10, step.size=1e3,
## rDate=.rTimeSeq, arg.rDate=NULL, rMissDate=.rUnifTimeSeq, ...){
## x.names <- labels(x)
## x.dates <- as.POSIXct(x)
## seq.length <- ncol(x$seq)
## myX <- dist.dna(x$seq, model="raw") * seq.length
## prevCall <- as.list(x$call)
## if(is.null(prevCall$mu)){
## mu0 <- 0.0001
## } else {
## mu0 <- eval(prevCall$mu)
## }
## res <- optimize.seqTrack.default(x=myX, x.names=x.names, x.dates=x.dates,
## typed.chr=typed.chr, mu0=mu0, chr.length=chr.length,
## thres=thres, optim=optim, prox.mat=prox.mat,
## nstep=nstep, step.size=step.size,
## rDate=rDate, arg.rDate=arg.rDate, rMissDate=rMissDate, ...)
## } # end optimize.seqTrack.haploGen
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.