R/sim.mpcross.R

Defines functions sim.mpcross

Documented in sim.mpcross

#' Simulate data from multi-parent designs
#' 
#' Data is simulated according to a pedigree, map and QTL model
#' @export
#' @useDynLib mpMap
#' @param map Linkage map with which to generate data. See \code{\link[qtl]{sim.map}}
#' @param pedigree Pedigree for a multi-parent cross. Can be generated using \code{\link[mpMap]{sim.mpped}}
#' @param qtl QTL model, defined by a matrix with one row per QTL and 6 columns: the chromosome of the QTL, the position in cM on that chromosome, and the four founder effects
#' @param vare Phenotypic error variance
#' @param error.prob Probability of genotyping errors - data will be changed with this probability to one of the other founder values
#' @param missing.prob Probability of missing data in final genotypes
#' @param full.prob Probability of fully informative markers. Markers will be assigned with this probability to retain IBD genotypes from founders rather than being recoded into binary values. See details below for more information
#' @param keep.qtlgeno Flag for whether to retain the QTL genotypes as a component in the output \code{mpcross} object
#' @param transpos Positions of potential translocation (vector)
#' @param transval Which founder carries the translocation
#' @param map.function Map function for conversion of linkage map into recombination fractions. Default is "haldane"
#' @param seed Random seed for generation of data
#' @param fg Input founder genotypes (optional) - otherwise generated randomly
#' @param founderld Flag for whether to generate founder genotypes in linkage equilibrium (FALSE=default) or according to recombination map (TRUE)
#' @return Object of class \code{mpcross}. See \code{\link[mpMap]{mpcross}} for further details. Additional components are:
#' \item{ibd}{ Fully informative founder genotypes for all markers}
#' \item{qtlgeno}{ If argument \code{keep.qtlgeno} is \code{TRUE} then QTL genotypes will be retained}
#' @details Data are initially generated by transmitting founder genotypes down through the pedigree to the finals. Errors, missing data, and binary alleles are then overlaid on this data (stored in $ibd). If founderld==FALSE, binary alleles are generated at each locus with probability 0.25 that one founder will have the allele; 0.50 that two founders will have the allele; and 0.25 that three founders will have the allele. The founders with the allele are randomly selected after the number of founders with the allele has been simulated. If founderld==TRUE then some markers may be monomorphic and will need to be removed from the resulting object using \code{\link[mpMap]{clean.mpcross}}.
#' 
#' Note that if founder genotypes are input they should be coded as follows:
#' DArT markers take values in {0,1}
#' SNP markers take values in {0,2}
#' All other markers take some other set of values. 
#' @note Translocations can only be generated when founderld==FALSE and no founder genotypes are input. Note that founder effects in the QTL model are per allele; thus, the phenotypic difference between a line carrying the founder and one that is not will be twice the input founder effect (because all lines are inbred). 
#' @seealso \code{\link[mpMap]{sim.mpped}}, \code{\link[qtl]{sim.map}}
#' @examples
#' map <- sim.map(len=100, n.mar=11, eq.spacing=TRUE, include.x=FALSE)
#' sim.ped <- sim.mpped(4, 1, 500, 6, 1)
#' sim.dat <- sim.mpcross(map=map, pedigree=sim.ped, qtl=matrix(data=c(1, 50, .4, 0, 0, 0), nrow=1, ncol=6, byrow=TRUE), seed=1)

sim.mpcross <- 
function(map, pedigree, qtl=NULL, vare=1, error.prob=0, missing.prob=0, 
full.prob=0, keep.qtlgeno=TRUE, transpos=1, transval=0, map.function=c("haldane", "kosambi"), seed=1, fg=NULL, founderld=FALSE)
{
  set.seed(seed)

  map.function <- match.arg(map.function)
  if (error.prob < 1e-50) 
      error.prob <- 1e-50
  if (error.prob > 1) {
      error.prob <- 1 - 1e-50
      warning("error.prob shouldn't be > 1!")
  }
  n.founders <- sum(pedigree[,2]==0 & pedigree[,3]==0)

  ## check dimensions of input founders
  if (!is.null(fg) && (nrow(fg)!=n.founders | ncol(fg) != length(unlist(map))))
	stop("Input founder genotype matrix has the wrong dimensions")

  pednum <- check_ped(pedigree)

  #### May need to check some bug about ordering of QTL
  if (!is.null(qtl) && is.matrix(qtl)) 
      qtl <- qtl[order(qtl[, 1], qtl[, 2]), ] 
 
  if (map.function == "kosambi") 
      mf <- kosambiX2R else mf <- haldaneX2R

  n.founders <- length(which(pedigree[,2]==0&pedigree[,3]==0))
  n.ind <- sum(pedigree[,4])
  QTLm <- NULL
  newmap <- map
  if (is.null(qtl))  
    n.qtl <- 0 else  { 
 	newmap <- check_qtl(QTL=qtl, map=map, n.founders=n.founders)
	if (!is.matrix(qtl))	QTLm <- rbind(qtl) else QTLm <- qtl 
	n.qtl <- nrow(QTLm) 
    }
  # generate ibd genotypes - these are the true values, saved through
  geno <- CR_gen_geno(newmap, mf, pedigree, seed, transpos, transval)

  qtlgeno <- list()
  # need to select qtlgeno out of the genotypes
  if (n.qtl>0) {
    qtlcol <- grep("QTL", colnames(geno$finals))
    qtlgeno$founders <- geno$founders[,qtlcol, drop=FALSE]
    qtlgeno$finals <- geno$finals[,qtlcol, drop=FALSE]
#    geno$founders <- geno$founders[,-qtlcol, drop=FALSE]
#    geno$finals <- geno$finals[,-qtlcol, drop=FALSE]
  }

  # generate phenotype
  pheno <- generate_pheno(n.founders, qtlgeno$finals, QTLm, vare, n.ind)
  pheno <- as.data.frame(pheno)
  obsgeno <- geno 
  # generate errors in the data
  if (error.prob>0) 
    obsgeno <- generate_error(obsgeno, error.prob)

  # generate dominant markers
  if (full.prob < 1 | !is.null(fg))
    obsgeno <- generate_obs(obsgeno, newmap, full.prob, fg, transpos, transval, founderld)

	## do changes need to be made given that the generate.obs and
	## generate.obs2 may return slightly different values? 
	## need to coordinate so dimensions are the same. 

  if (n.qtl > 0)
  {
	qtlcol <- grep("QTL", colnames(obsgeno$finals))
	obsgeno$founders <- obsgeno$founders[,-qtlcol, drop=FALSE]
	obsgeno$finals <- obsgeno$finals[, -qtlcol, drop=FALSE]
	geno$founders <- geno$founders[,-qtlcol, drop=FALSE]
	geno$finals <- geno$finals[, -qtlcol, drop=FALSE]
#  	colnames(obsgeno$founders) <- unlist(lapply(newmap, names))[-grep("QTL", unlist(lapply(newmap, names)))]
#  	colnames(obsgeno$finals) <- unlist(lapply(newmap, names))[-grep("QTL", unlist(lapply(newmap, names)))]
#  } else {
#	  colnames(obsgeno$founders) <- unlist(lapply(newmap, names))
#	  colnames(obsgeno$finals) <- unlist(lapply(newmap, names))
  }

  obsgeno <- combine_ibd(obsgeno)

  map2 <- lapply(newmap, function(x) if (length(grep("QTL", names(x))>0)) return (x[-grep("QTL", names(x))]) else return(x))
  class(map2) <- "map"

  colnames(obsgeno$founders) <- colnames(obsgeno$finals) <- unlist(lapply(map2,names))

#  colnames(obsgeno$founders) <- substr(colnames(obsgeno$founders), 6, 
#		nchar(colnames(obsgeno$founders)))
#  colnames(obsgeno$finals) <- substr(colnames(obsgeno$finals), 6, 
#		nchar(colnames(obsgeno$finals)))
  
  if (missing.prob > 0) {
    obsgeno$finals[sample(c(TRUE, FALSE), length(obsgeno$finals), replace=TRUE, 
	prob=c(missing.prob, 1-missing.prob))] <- NA
  }

  rownames(obsgeno$finals) <- paste("L", 1:n.ind, sep="")
  rownames(pheno) <- rownames(obsgeno$finals)

  # ibd values are fully informative, while observed may have dominant markers
  mpcross <- list(founders=obsgeno$founders, finals=obsgeno$finals, 
	ibd=list(founders=geno$founders, finals=geno$finals), pheno=pheno, 
	map=map2, pedigree=pedigree, id=geno$id, fid=geno$fid) 

  if (keep.qtlgeno==TRUE)
	mpcross$qtlgeno <- qtlgeno

  attr(mpcross, "type") <- paste("ri", n.founders, "self", sep="")

  class(mpcross) <- "mpcross"  
  clean(mpcross)
  rownames(mpcross$founders) <- LETTERS[1:n.founders]

  mpcross 
}

Try the mpMap package in your browser

Any scripts or data that you put into this service are public.

mpMap documentation built on May 29, 2017, 2:50 p.m.