# Hinda Haned, December 2008
# haned@biomserv.univ-lyon1.fr
##############################################
# forensim classes constructors definitions
##############################################
#making sure that the simulations are reproducible
#simugeno class constructor : simugeno function
simugeno <-
function(tab,which.loc=NULL,n=1)
{
if(!is.tabfreq(tab))
{
stop("tab must be a tabfreq object")
}
X <- tab@tab
M <- tab@which.loc
#cheking that marker.names are correct
if(is.null(which.loc))
{
which.loc <- M
}
else
{
if(identical(which.loc, character(0)))
{
stop("invalid markers names")
}
if(!all(which.loc %in% M))
{
stop("uknown markers chosen, please check the markers names")
}
}
popfac <- tab@pop.names
#n <- as.integer(n)
#locn is a local variable defining the number of genotypes
if(is.null(n))
{
locn <- 1
}
if(any(n < 0) )
{
stop('n must be a positive integer, or a list of positive integers')
}
if(identical(length(n),1) & identical(n,0))
{
stop("0 is not a valid value for n ")
}
# one population
if(is.null(popfac) & length(n) >= 1 )
{
popind <- NULL
if(length(n) > 1)
{
locn <- n[1]
if(locn == 0)
{
stop("0 is not a valid value for n ")
}
warning("only the first element of n will be considered")
}
if(length(n)==1 )
{
locn <- n
}
if(is.null(n))
{
locn <- 1
}
geno <- matrix(0,ncol=length(which.loc),nrow=locn)
rownames(geno) <- paste("Ind",1:locn,sep='')
colnames(geno) <- which.loc
for(i in which.loc)
{
a1 <- sample(names(X[[i]]),locn,prob=X[[i]],replace=TRUE)
a2 <- sample(names(X[[i]]),locn,prob=X[[i]],replace=TRUE)
geno[,i] <- paste(a1,a2,sep="/")
}
freq <- X[which.loc]#if one pop
}
# if multiple pops
if(!is.null(popfac))
{
#verifications
if(length(n) > length(popfac))
{
locn <- n[1:length(popfac)]
if(all(n==0))
{
stop("n is empty for all populations ")
}
warning("n contains too many elements, only the first ",length(popfac), " are considered" )
}
if(length(n) < length(popfac))
{
stop("n must be of length ",length(popfac))
}
#verif end
if(identical(length(n),length(popfac)))
{
locn <- n
}
names(locn)<-popfac
#print('popfac');print(popfac)
#print(length(popfac))
freq <- vector('list',length(popfac))
names(freq)<- popfac
nombL <- length(which.loc)
names(locn) <- popfac
X <- tab@tab
geno0 <- NULL
geno <- NULL
popind <- NULL
tempfreq <- NULL
if(length(popfac)==1)
{
popind<-rep(popfac,n)
for(i in popfac)
{
geno0 <- matrix(0,ncol=nombL,nrow=locn[i])#changes at each iteration, different number of sampeled individuals
for(m in which.loc)
{
colnames(geno0) <- which.loc
if(locn[i]==0)
{
geno0[ , m] <- 0
}
else
{
avec1 <- sample(names(X[[i]][[m]]),locn[i],prob=X[[i]][[m]],replace=TRUE)
avec2 <- sample(names(X[[i]][[m]]),locn[i],prob=X[[i]][[m]],replace=TRUE)
ty <- paste(avec1,avec2,sep='/')
geno0[ , m] <- ty
tempfreq <- c(tempfreq,X[[i]][m])#concatenation for all markers for one pop
}
}
geno <- rbind(geno,geno0)
freq[[i]]<- tempfreq
tempfreq <- NULL#free
#names(freq[i]) <- i
#print(freq[i])
}
}
#popind
if(length(popfac) > 1)
{
for(i in popfac)
{
geno0 <- matrix(0,ncol=nombL,nrow=locn[i])#changes at each iteration, different number of sampeled individuals
#print(rep(levels(popfac)[i],locn[i]))
popind <- c(popind,rep(i,locn[i]))
#print(popind)
for(m in which.loc)
{
colnames(geno0) <- which.loc
if(locn[i]==0)
{
geno0[ , m] <- 0
}
else
{
avec1 <- sample(names(X[[i]][[m]]),locn[i],prob=X[[i]][[m]],replace=TRUE)
avec2 <- sample(names(X[[i]][[m]]),locn[i],prob=X[[i]][[m]],replace=TRUE)
ty <- paste(avec1,avec2,sep='/')
geno0[ , m] <- ty
tempfreq <- c(tempfreq,X[[i]][m])#concatenation for all markers for one pop
}
}
#freq[[g]] <<- tempfreq
geno <- rbind(geno,geno0)
freq [[i]]<- tempfreq
tempfreq <- NULL#free
}
}
popind <- as.factor(popind)
}
ID <- paste('ind',1:sum(locn),sep='')
rownames(geno) <- ID
res <- new('simugeno')
res@tab.freq <- freq
res@nind <- sum(locn)
res@which.loc <- which.loc
res@tab.geno <- geno
res@pop.names <- popfac
res@popind <- popind
res@indID <- ID
return(res)
}
# simumix class constructor: simumix function
simumix <-
function(tab,which.loc=NULL,ncontri=1)
{
if(!is.simugeno(tab))
{
stop("\n tab must be a simugeno object\n")
}
M <- tab@which.loc
# cheking marker names
if(is.null(which.loc))
{
which.loc <- M
}
#if not null names, than check for possible errors
else
{
if(identical(which.loc, character(0)))
{
stop("markers names must be a character string")
}
if(!all(which.loc %in% M))
{
#if the user's markers are not the same that those of the tabfreq object
stop("uknown marker names")
}
}
popfac <- tab@pop.names
popind <- tab@popind
nsimugeno <- tab@nind
prof <- tab@tab.geno
tabfreq <- tab@tab.freq
nmark <- length(which.loc)
mix.all <- vector('list',nmark)
names(mix.all) <- which.loc
indID <- tab@indID
N <- ncontri
if(sum(ncontri) > nsimugeno)
{
stop("too much contributors in the mixture")
}
if(any(ncontri < 0))
{
stop("ncontri must be a positive integer, or a list of positive integers")
}
if(is.null(popfac))
{
if(identical(ncontri, 0))
{
stop("0 is not a valid value for ncontri ")
}
if(length(ncontri) > 1)
{
N <- ncontri[1]
if(N==0)
{
stop("0 is not a valid value for ncontri ")
}
warning("only the first element of ncontri will be considered")
}
#sampling individuals
samp0 <- sample(indID,N,replace=FALSE)#chosen profiles to ssimulate the mixture
mixprof <- prof[samp0,which.loc]
mixprof <- as.matrix(mixprof)
indnames <- samp0
if(length(which.loc)>1 & N==1)
{
mixprof <- t(mixprof)
#colnames(mixprof) <- which.loc
}
rownames(mixprof) <- indnames
popinfo <- NULL
colnames(mixprof) <- which.loc
#print(mixprof)
}
#multiple populations case
if(!is.null(popfac))
{
if(length(ncontri) <= 1)
{
if(!identical(length(popfac),length(ncontri)))
stop("ncontri must be of length ", length(popfac))
}
if(length(ncontri) > 1 & all(ncontri==0))
stop("ncontri is empty for all populations ")
if(length(ncontri) > length(popfac))
{
warning("only the ", length(popfac), " elements of ncontri are considered")
ncontri <- ncontri[1:length(popfac)]
}
if(length(ncontri) < length(popfac)){
stop("ncontri must be of length ",length(popfac))}
#case where the number of conributors is greater of the profiles present in the simugeno object
names(N) <- popfac
#print(N)
temprof <- NULL
temp0 <- NULL
popinfo <- NULL
poptemp <- as.character(unique(popind))
#print(poptemp)
for(p in popfac)
{
#print(N[p])
if(N[p]!=0)
{
samp1 <- sample(indID[popind == p ],N[p],replace=FALSE)#chosen profiles to ssimulate the mixture
temp0 <- c(temp0,samp1)
temp1 <- prof[samp1,]
temprof <- rbind(temprof,temp1)
popinfo <- c(popinfo,rep(names(N[p]), N[p]))
}
}
indnames <- temp0
mixprof <- temprof
mixprof <- mixprof[,which.loc]
mixprof <- as.matrix(mixprof)
if(ncol(mixprof)==1)
{
mixprof<-t(mixprof)
rownames(mixprof)<- indnames
colnames(mixprof)<-which.loc
}
else{
rownames(mixprof) <- indnames
colnames(mixprof) <- which.loc
}
popinfo <- as.factor(popinfo)
#print('apres')
}
# alleles present in the mixture
for(i in which.loc)
{
loc <- sort(unique(unlist(strsplit(mixprof[,i],'/'))))
temp.loc<- as.character(sort(as.numeric(loc)))
mix.all[[i]] <- temp.loc
}
res <- new('simumix')
res@mix.all <- mix.all
res@mix.prof <- mixprof
res@ncontri <- sum(N)
res@which.loc <- which.loc
res@popinfo <- popinfo
return(res)
}
#tabfreq class constructor: tabfreq function
tabfreq <-
function(tab,pop.names=NULL)
{
# single population case
if( !inherits(tab,"list") )#if it's not a list, it must be a data.frame or a matrix
{
res <- vector('list',1)
if( !inherits(tab,"data.frame") & !inherits(tab,"matrix") )
{#if tab is not a matrix or data.frame
stop("tab must be a of types matrix or data.frame")
}
if (is.null(colnames(tab)))
{
stop("tab columns have no name")
}
#cheking pop.names : one cold want to specify the population name, even for a single population
if(length(pop.names) > 1)
{
if(!is.factor(pop.names))
{
stop("a factor is expected for pop.names")
}
warning("only the first element of pop.names is considered")
popfac <- pop.names[1]
names(res) <- popfac
res[[popfac]] <- naomitab(tab)
}
if(length(pop.names) == 1)
{
if(!is.factor(pop.names))
{
stop("a factor is expected for pop.names")
}
popfac <- pop.names
names(res) <- popfac
res[[popfac]] <- naomitab(tab)
}
if(is.null(pop.names))
{
popfac <- pop.names
res <-0
res <- (naomitab(tab))
}
mark<- colnames(tab)[-1]
}
# multiple populations case
if( inherits(tab,"list") )
{
p <- length(tab)
res <- vector('list',p)
mm <- vector('list',p)
popfac <- pop.names
if(!is.factor(pop.names)){
stop("a factor is expected for pop.names")
}
if(length(pop.names) != p || length(pop.names) < p)
{
stop("tab and pop.names must have the same length")
}
if(length(pop.names) > p){
popfac <- pop.names[1:p]
warning("only the first ", p," elements of pop.names are considered")
}
for(i in popfac)
{
if( !inherits(tab[[i]],"data.frame") & !inherits(tab[[i]],"matrix") )
{
stop("all elements of tab must be of types matrix or data.frame")
}
if (is.null(colnames(tab[[i]])))
{
stop("columns of element ", i, " of tab have no name ")
}
res[[i]] <- naomitab(tab[[i]])
mm[[i]] <- colnames(tab[[i]])[-1]
}
names(res) <- popfac
#checking markers between populations
maxloc <- max(sapply(mm,function(y) length(y)))
lpop <- length(popfac)
comloc <- names(which(table(unlist(mm))==lpop))
if(length(comloc)==0)
{
stop("There is no shared markers between the ", lpop," populations. Please check your data.")
}
if(length(comloc) != 0 )
{
if(length(comloc) < maxloc)
{
warning("Only ", length(comloc)," markers are common between the ", lpop," populations. You might want to check your data.")
for(w in popfac)
{
res[[w]]<-res[[w]][comloc]
names(res[[w]]) <- popfac
}
mark <- comloc
}
if(length(comloc)==maxloc)
{
mark <- mm[[1]]#for example, no need to complicate
#if condition not met "res" doesn't change
}
}
}
#object tabfreq to construct
freq <- new('tabfreq')
freq@pop.names <- popfac
freq@tab <- res
freq@which.loc <- mark
#freq@theta <- theta
return(freq)
}
# Alias Method
as.simugeno <- simugeno
as.simumix <- simumix
as.tabfreq <- tabfreq
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.