R/identifyDesign.R

identifyDesignPair <- function(pair, nFounders)
{
	char1 <- nchar(pair[1])
	char2 <- nchar(pair[2])
	motherName <- pair[3]
	fatherName <- pair[4]
	maxG <- log(nFounders, base=2)
	headStr <- paste(nFounders, "wayG", maxG, sep="")
	#Are we begining a selfing or AIC line?
	if(pair[1] == headStr && pair[2] == headStr)
	{
		if(motherName == fatherName)
		{
			return(paste(nFounders, "wayG", maxG, "self1", sep=""))
		}
		return(paste(nFounders, "wayG", maxG, "aic1", sep=""))
	}
	#Are we doing part of the initial cross?
	headStr <- paste(nFounders, "wayG", sep="")
	if(char1 == 6 && char2 == 6 && substr(pair[1], 1, 5) == headStr && substr(pair[2], 1, 5) == headStr)
	{
		n <- substr(pair[1], 6, 6)
		if(n >= maxG)
		{
			stop("Too many generations of initial crossing")
		}
		return(paste(nFounders, "wayG", as.integer(n)+1, sep=""))
	}
	#Straight selfing line?
	headStr <- paste0(nFounders, "wayG", maxG, "self")
	if(motherName == fatherName && pair[1] == pair[2] && substr(pair[1], 1, 10) == headStr && char1 == 11)
	{
		generation <- as.integer(substr(pair[1], 11, 11))
		return(paste0(nFounders, "wayG", maxG, "self", generation+1))
	}
	headStr <- paste0(nFounders, "wayG", maxG, "aic")
	if(substr(pair[1], 1, 9) == headStr && pair[1] == pair[2])
	{
		#We need to check that it has the form <Founders>wayG<log2(nFounders)>aic<number>. Specifically, we need to check that the end is a number.
		aicNumberLength <- char1 - 9
		aicNumberPart <- substr(pair[1], 10, char1)
		aicRegex <- regexec("[0-9]+", aicNumberPart)[[1]]
		aicNumberLength <- attr(aicRegex, "match.length")
		aicEndsWithNumber <- aicNumberLength == nchar(aicNumberPart)
		#AIC line?
		if(motherName != fatherName && aicEndsWithNumber)
		{
			generation <- as.integer(substr(pair[1], 10, char1))
			return(paste0(nFounders, "wayG", maxG, "aic", generation+1))
		}
		#Initial selfing of an AIC line?
		if(motherName == fatherName && aicEndsWithNumber)
		{
			return(paste0(substr(pair[1], 1, char1), "self1"))
		}
		#Get out selfing number part
		numberPart <- substr(pair[1], 14+aicNumberLength, char1)
		endsWithNumber <- attr(regexec("[0-9]+", numberPart)[[1]], "match.length") == nchar(numberPart)
		if(motherName == fatherName && substr(pair[1], 10+aicNumberLength, 13+aicNumberLength) == "self" && endsWithNumber)
		{
			generation <- as.integer(substr(pair[1], 14+aicNumberLength, char1))
			return(paste0(substr(pair[1], 1, 13+aicNumberLength), generation+1))
		}
	}
	#isG1 <- regexec(paste(nFounders, "wayG[0-9]", sep=""), pair[1])
	#isG2 <- regexec(paste(nFounders, "wayG[0-9]", sep=""), pair[2])
	return(NA)
}

#' Identify subtype of design in pedigree
#'
#' For a pedigree containing multiple variations on MAGIC designs, identify
#' the subtype of design, e.g., MAGIC+
#' @export
#' @param pedigree Pedigree to categorize
#' @return An additional vector identifying designs of individuals within the pedigree

identifyDesign <- function(pedigree)
{
	if(inherits(pedigree, "mpcross")) pedigree <- pedigree$pedigree
	designCol <- rep(NA, nrow(pedigree))
	isInitial <- pedigree[,2] == 0 & pedigree[,3] == 0
	nFounders <- sum(isInitial)
	if(nFounders != 4 && nFounders != 8) stop("nFounders must be 4 or 8")
	designCol[which(isInitial)] <- paste(nFounders, "wayG0", sep="")
	names(designCol) <- rownames(pedigree)
	addedDesigns <- which(isInitial)
	#Standardise the parents founders as NA (Otherwise, 0 and "0" have a different behaviour when the occur in pedigree[,2] of designCol[pedigree[,2]] - 0 shortens the result, but "0" returns an NA)
	pedigree[1:nFounders, 2:3] <- NA
	while(length(addedDesigns) > 0)
	{
		canAddDesigns <- is.na(designCol) & !is.na(designCol[pedigree[,2]]) & !is.na(designCol[pedigree[,3]])
		#Now we're dealing with only the subset where canAddDesigns is true
		motherDesign <- designCol[pedigree[which(canAddDesigns),2]]
		fatherDesign <- designCol[pedigree[which(canAddDesigns), 3]]
		motherName <- pedigree[pedigree[which(canAddDesigns),2],1]
		fatherName <- pedigree[pedigree[which(canAddDesigns),3],1]
		designCombinations <- cbind(motherDesign, fatherDesign, motherName, fatherName)
		newDesign <- apply(designCombinations, 1, identifyDesignPair, nFounders = nFounders)
		#And we want to further deal with the subset for which newDesign is not NA
		identifiedDesigns <- which(canAddDesigns)[which(!is.na(newDesign))]
		designCol[identifiedDesigns] <- newDesign
		addedDesigns <- identifiedDesigns
	}
	return(designCol)
}
behuang/mpMap documentation built on May 12, 2019, 10:53 a.m.