Nothing
#' Simulate an admixed polyploid population characterized for multi-allelic markers
#'
#' @param K
#' Number of ancestral groups (positive integer superior or equal to 2)
#' @param N
#' Number of individuals (positive integer superior or equal to 2)
#' @param P
#' Ploidy level (positive integer) either as a single value to specify the same
#' ploidy for all simulated individuals, or as a vector of size `N`
#' @param M
#' Number of markers (positive integer superior or equal to 2)
#' @param L
#' Number of alleles (positive integer superior or equal to 2) either as a
#' single value to specify the same number of alleles for all simulated markers,
#' or as a vector of size `M`
#' @param C
#' Number of chromosomes (positive integer). By default, markers are evenly
#' distributed between chromosomes and evenly spaced
#' @param SmoothParam
#' Smoothing parameter corresponding to the number of generations since admixture
#' @param GeneticMap
#' An optional genetic map that overrides the number of markers `M` and chromosomes
#' `C`, with three named columns: `Marker`, `Chromosome`, `Distance`. The distances
#' must be in centiMorgan
#' @param Prop
#' Preconfigured matrix of admixture proportions that overrides the number of
#' individuals `N` and ancestral groups `K`
#' @param Freq
#' Preconfigured list of matrices of allele frequencies in ancestral groups
#' that overrides the number of markers `M`, alleles `L` and ancestral groups `K`
#' @param AlphaProp
#' Concentration parameter from the Dirichlet distribution
#' (positive numeric value) used for sampling admixture proportions
#' @param AlphaFreq
#' Concentration parameter from the Dirichlet distribution
#' (positive numeric value) used for sampling allele frequencies in
#' ancestral groups
#' @param Depth
#' (optional) Total read depth (positive integer), either as a single value
#' to specify the same depth for all datapoints, or as a matrix of
#' dimension `N` x `M`
#' @param SeqError
#' (optional) Sequencing error (numeric value between 0 and 1) rate to be used
#' when sampling sequencing reads
#' @param NbThreads
#' Number of threads to be used (positive integer) with a default value of 0
#' setting automatically all threads available
#' @param Seed
#' Seed for reproducible inference (integer)
#'
#' @return
#' A list of four items: a list of genotying matrices (`Geno`), a matrix of
#' admixture proportions (`Prop`), a list of matrices of allele frequencies in
#' ancestral groups (`Freq`), and a vector of log-likelihood values
#' over iterations (`LogLik`)
#'
#' @description
#' This function simulates a polyploid admixed population
#'
#' @importFrom magrittr %>%
#' @importFrom magrittr set_names
#' @import dplyr
#'
#' @export
#'
#' @examples
#' ## Simulate a polyploid admixed population
#' DataSim <- SimulatePop(K=3L, N=10L, P=6L, M=50L, C=5L, L=10L, Seed=123)
#'
#' ## Genotyping matrices
#' head(DataSim$Geno$Marker1)
#'
#' ## Ancestry matrices by chromosome
#' head(DataSim$Ancestry$Ind1$Chr1)
#'
#' ## Admixture proportions
#' head(DataSim$Prop)
#'
#' ## Allele frequencies in ancestral groups
#' DataSim$Freq$Marker1
#'
#' ## Genetic map
#' head(DataSim$GeneticMap)
SimulatePop <- function(K=3L, N=100L, P=6L, M=1000L, C=5L, L=10L, SmoothParam=1,
GeneticMap=NULL, Prop=NULL, Freq=NULL, AlphaProp=1, AlphaFreq=1,
Depth=NULL, SeqError=1e-2, NbThreads=0L, Seed=123L){
## Initial checks
stopifnot("K must be an integer superior or equal to 2" =
K>=2L && length(K)==1)
stopifnot("N must be an integer superior or equal to 2" =
N>=2L && length(N)==1)
stopifnot("P must be positive integers, either as single value or as a vector" =
all(P>=1L))
stopifnot("L must be integers superior or equal to 2, either as single value or as a vector" =
all(L>=2L))
stopifnot("C must be a positive integer" =
C>=1L && length(C)==1)
stopifnot("M must be an integer superior or equal to 2C" =
M>=2*C && length(M)==1)
stopifnot("SmoothParam must be a positive numeric value" =
SmoothParam>0)
stopifnot("When informed, GeneticMap must be a dataframe with three named columns
(Marker, Chromosome and Distance) and a minimum of 2 markers per
chromosome" =
is.null(GeneticMap) ||
(is.data.frame(GeneticMap) &&
all(c("Marker","Chromosome","Distance")%in%colnames(GeneticMap)) &&
min(table(GeneticMap$Chromosome)==2)))
stopifnot("When informed, Prop must be a matrix of numeric values" =
is.null(Prop) || (is.matrix(Prop) && is.numeric(Prop)))
stopifnot("When informed, Freq must be a list of numeric matrices of size superior or equal to 2C" =
is.null(Freq) || (length(Freq) >= 2*C && is.matrix(Freq[[1]]) && is.numeric(Freq[[1]])))
stopifnot("When informed, Freq matrices must have same number of rows as Prop" =
is.null(Freq) || is.null(Prop) || ncol(Prop)==nrow(Freq[[1]]))
stopifnot("AlphaProp must be a positive numeric value" =
AlphaProp>0)
stopifnot("AlphaFreq must be a positive numeric value" =
AlphaFreq>0)
stopifnot("SeqError must be a positive numeric value between 0 and 1" =
SeqError>=0 && SeqError<1)
stopifnot("When informed, Depth must be a single value or a matrix of size N x M with positive integers" =
is.null(Depth) || (all(Depth>=1L) && (length(Depth)==1) || (is.matrix(Depth) && nrow(Depth)==N && ncol(Depth)==M)))
## Set Seeds
set.seed(Seed)
## Set initial quantities
. <- NULL
Ind_names <- paste0("Ind",1:N) %>% set_names(.,.)
Group_names <- paste0("K",1:K) %>% set_names(.,.)
Marker_names <- paste0("Marker",1:M) %>% set_names(.,.)
Chr_names <- paste0("Chr",1:C) %>% set_names(.,.)
if(!is.null(GeneticMap)){
GeneticMap <- GeneticMap %>%
arrange(Chromosome,Distance)
Chr_names <- unique(GeneticMap$Chromosome) %>% set_names(.,.)
Marker_names <- GeneticMap$Marker %>% set_names(.,.)
M <- nrow(GeneticMap)
C <- length(Chr_names)
}
if(!is.null(Prop)){
if(!is.null(rownames(Prop))) Ind_names <- rownames(Prop)
if(!is.null(colnames(Prop))) Group_names <- colnames(Prop)
K <- ncol(Prop)
N <- nrow(Prop)
}
if(!is.null(Freq)){
if(!is.null(names(Freq))) Marker_names <- names(Freq)
if(!is.null(rownames(Freq[[1]]))) Group_names <- rownames(Freq[[1]])
M <- length(Freq)
K <- nrow(Freq[[1]])
L <- sapply(Freq,function(m)ncol(m))
}else{
L <- sapply(Marker_names,function(m)L)
}
if(length(P)==1) P <- rep(P,N)
## Additional checks
stopifnot("When informed, Depth must be either a scalar or a matrix of positive integers" =
is.null(Depth) || (length(Depth)==1 && Depth>0) ||
(is.matrix(Depth) && is.numeric(Depth) && ncol(Depth)==M && nrow(Depth)==N && all(Depth>=0)))
## Simulate admixture proportions
if(is.null(Prop)){
Prop <- .SimulateProp(Ind_names,Group_names,AlphaProp,Seed)
}
## Simulate allele frequencies in ancestral groups
if(is.null(Freq)){
Freq <- .SimulateFreq(Marker_names,Group_names,L,AlphaFreq,Seed,NbThreads)
}
## Genetic map formatting
Chromosome <- NULL
Distance <- NULL
if(is.null(GeneticMap)){
M_per_C <- floor(M/C) + (seq_len(C) <= M %%floor(M/C))
Chr_vect <- lapply(1:C,function(chr)rep(Chr_names[chr],M_per_C[chr])) %>% unlist
GeneticMap <- data.frame(Marker = Marker_names, row.names = NULL) %>%
mutate(Chromosome = Chr_vect) %>%
{lapply(Chr_names,function(chr)
filter(.,.$Chromosome==chr) %>%
mutate(Distance=seq(0,100,100/(nrow(.)-1))))} %>%
bind_rows()
}
## Calculate distances between marker pairs
PairwiseDist <- lapply(Chr_names,function(chr){
GeneticMap_chr <- GeneticMap %>% filter(Chromosome==chr)
res <- data.frame(Marker_1 = GeneticMap_chr$Marker[-nrow(GeneticMap_chr)],
Marker_2 = GeneticMap_chr$Marker[-1],
Chromosome = chr,
Dist = as.numeric(as.character(GeneticMap_chr$Distance))[-1] -
as.numeric(as.character(GeneticMap_chr$Distance))[-nrow(GeneticMap_chr)])
return(res)
})
## Simulate ancestry and genotyping
ResSim <- .SimulateGeno(Prop,Freq,PairwiseDist,P,SmoothParam,Seed,NbThreads)
## Deduce admixture proportions from ancestry dosages along chromosomes
Prop <- .DeduceProp(ResSim$Ancestry)
## Generate output
Res <- list(Geno = ResSim$Geno,
Ancestry = ResSim$Ancestry,
Prop = Prop,
Freq = Freq,
GeneticMap = GeneticMap)
## Sample sequencing reads
if(!is.null(Depth)){
if(length(Depth)==1) Depth <- matrix(Depth,N,M)
AlleleDepth <- .SampleReads(ResSim$Geno,Depth,SeqError,Seed,NbThreads)
Res <- append(list(AlleleDepth = AlleleDepth),Res)
}
## Return outputs
return(Res)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.