R/estimateRF.R

Defines functions estimateRFInternal estimateRF

Documented in estimateRF

#' @title Estimate pairwise recombination fractions
#' 
#' This function estimates the recombination fractions between all pairs of markers in the input object. The recombination fractions are estimated using numerical maximum likelihood, and a grid search. Because every estimate will be one of the input test values, the estimates can be stored efficiently with a single byte per estimate.
#' @param object An object of class \code{mpcross}. 
#' @param recombValues a vector of test values to use for the numeric maximum likelihood step. Must contain 0 and 0.5, and must have less than 255 values in total. The default value is \code{c(0:20/200, 11:50/100)}. 
#' @param lineWeights Values to use to correct for segregation distortion. This parameter should in general be left unspecified. 
#' @param gbLimit The maximum amount of working memory this estimation step should be allowed to use at any one time, in gigabytes. Smaller values may increase the computation time. A value of -1 indicates no limit.  
#' @param keepLod Set to \code{TRUE} to compute the likelihood ratio score statistics for testing whether the estimate is different from 0.5. Due to memory constraints this should generally be left as \code{FALSE}. 
#' @param keepLkhd Set to \code{TRUE} to compute the maximum value of the likelihood. Due to memory constraints this should generally be left as \code{FALSE}.
#' @param verbose Output diagnostic information, such as the amount of memory required, and the progress of the computation. 
#' @param markerRows Used to estimate only a subset of the full matrix of pairwise recombination fractions.
#' @param markerColumns Used to estimate only a subset of the full matrix of pairwise recombination fractions.
#' @return An object of class \code{mpcrossRF}, which contains the original genetic data, and also estimated recombination fraction data. 
#' @details
#' The majority of the options for this function should \emph{not} be specified by the end user. In particular, \code{keepLkhd}, \code{keepLod} and \code{lineWeights} should not be specified without good reason.
#'
#' Arguments \code{markerRows} and \code{markerColumns} can be used to estimate only a subset of the full recombination matrix. Reasons for doing this could include
#' \enumerate{
#' 	\item Allowing the full matrix to be estimated in multiple steps, with intermediate computations being saved
#'	\item The matrix of recombination fractions has \emph{mostly} already been estimated. This can occur when adding extra markers. 
#' 	\item Memory limitations. Performing estimation for markers with many alleles takes a large amount of memory. It is often useful to estimate recombination fractions between all pairs of biallelic markers, and let other pairs be done using a separate call. 
#' }
#' If arguments \code{markerRows} and \code{markerColumns} are used, only the \emph{upper-triangular part} of the specified subset is computed. See the examples for details. 
#' @export
#' @examples map <- qtl::sim.map(len = 100, n.mar = 11, include.x=FALSE)
#' f2Pedigree <- f2Pedigree(1000)
#' \donttest{cross <- simulateMPCross(map = map, pedigree = f2Pedigree, mapFunction = haldane, seed = 1)
#' rf <- estimateRF(cross)
#' #Print the estimated recombination fraction values
#' rf@@rf@@theta[1:11, 1:11]
#' 
#' #Now only estimate recombination fractions between the first 3 markers. 
#' #    The other estimates will just be marked as NA
#' rf <- estimateRF(cross, markerRows = 1:3, markerColumns = 1:3)
#' #Print the estimated recombination fraction values
#' rf@@rf@@theta[1:11, 1:11]
#' 
#' #A more complicated example, where three values are estimated
#' rf <- estimateRF(cross, markerRows = 1, markerColumns = 1:3)
#' #Print the estimated recombination fraction values
#' rf@@rf@@theta[1:11, 1:11]
#' 
#' #In this case only ONE value is estimated, because only one element of the requested subset 
#' #    lies in the upper-triangular part - The value on the diagonal. 
#' rf <- estimateRF(cross, markerRows = 3, markerColumns = 1:3)
#' #Print the estimated recombination fraction values
#' rf@@rf@@theta[1:11, 1:11]}
estimateRF <- function(object, recombValues, lineWeights, gbLimit = -1, keepLod = FALSE, keepLkhd = FALSE, verbose = FALSE, markerRows = 1:nMarkers(object), markerColumns = 1:nMarkers(object))
{
	inheritsNewMpcrossArgument(object)

	if (missing(recombValues)) recombValues <- c(0:20/200, 11:50/100)
	if (length(recombValues) >= 255)
	{
		stop("This package currently allows a maximum of 254 possible recombination fraction values")
	}
	recombValues <- sort(recombValues)
	if(!(0.5 %in% recombValues))
	{
		stop("Input recombValues must contain a value of 0.5")
	}
	if(!(0 %in% recombValues))
	{
		stop("Input recombValues must contain a value of 0")
	}
	if(missing(lineWeights))
	{
		lineWeights <- lapply(object@geneticData, function(x) rep(1, nLines(x)))
	}
	if(is.logical(verbose))
	{
		if(is.na(verbose))
		{
			stop("Input verbose cannot be NA")
		}
		else if(verbose)
		{
			verbose <- list(verbose = TRUE, progressStyle = 3L)
		}
		else verbose <- list(verbose = FALSE, progressStyle = 3L)
	}
	else
	{
		if(!is.list(verbose) || !("progressStyle" %in% names(verbose)))
		{
			stop("Input verbose must be TRUE, FALSE, or a list with entries named progressStyle and verbose")
		}
		if(!("verbose" %in% names(verbose))) verbose$verbose <- TRUE
		if(length(verbose$progressStyle) != 1L || !(verbose$progressStyle %in% 0:3))
		{
			stop("Input verbose$progressStyle must have value 0, 1, 2 or 3")
		}
		if(!is.logical(verbose$verbose) || length(verbose$verbose) != 1L)
		{
			stop("Input verbose$verbose must have value 0, 1, 2 or 3")
		}
	}

	if(class(lineWeights) == "numeric") lineWeights <- list(lineWeights)
	isNumericVectorListArgument(lineWeights)
	for(i in 1:length(object@geneticData))
	{
		if(length(lineWeights[[i]]) != nLines(object@geneticData[[i]]))
		{
			stop(paste0("Value of lineWeights[[", i, "]] must have nLines(object)[", i, "] entries"))
		}
	}
	listOfResults <- estimateRFInternal(object = object, recombValues = recombValues, lineWeights = lineWeights, markerRows = markerRows, markerColumns = markerColumns, keepLod = keepLod, keepLkhd = keepLkhd, gbLimit = gbLimit, verbose = verbose)
	if(is(object, "mpcrossRF") || ((is(object, "mpcrossLG") || is(object, "mpcrossMapped")) && !is.null(object@rf)))
	{
		warning("Updating RF data for existing object")
		.Call("assignRawSymmetricMatrixFromEstimateRF", object@rf@theta, markerRows, markerColumns, listOfResults$theta, PACKAGE="mpMap2")

		if(!is.null(listOfResults$lod))
		{
			if(!is.null(object@rf@lod)) lod <- object@rf@lod
			else lod <- new("dspMatrix", Dim = rep(nMarkers(object), 2), x = rep(as.numeric(NA), nMarkers(object)*(nMarkers(object) + 1)/2))
			.Call("assignDspMatrixFromEstimateRF", lod, markerRows, markerColumns, listOfResults$lod, PACKAGE="mpMap2")
			rownames(lod) <- colnames(lod) <- markers(object)
			object@rf@lod <- lod
		}

		if(!is.null(listOfResults$lkhd))
		{
			if(!is.null(object@rf@lkhd)) lkhd <- object@rf@lkhd
			else lkhd <- new("dspMatrix", Dim = rep(nMarkers(object), 2), x = rep(as.numeric(NA), nMarkers(object)*(nMarkers(object) + 1)/2))
			.Call("assignDspMatrixFromEstimateRF", lkhd, markerRows, markerColumns, listOfResults$lkhd, PACKAGE="mpMap2")
			rownames(lkhd) <- colnames(lkhd) <- markers(object)
			object@rf@lkhd <- lkhd
		}
		object@rf@gbLimit <- gbLimit
		output <- object
	}
	else
	{
		theta <- new("rawSymmetricMatrix", markers = markers(object), levels = recombValues, data = rep(as.raw(0xFF), nMarkers(object)*(nMarkers(object) + 1)/2))
		.Call("assignRawSymmetricMatrixFromEstimateRF", theta, markerRows, markerColumns, listOfResults$theta, PACKAGE="mpMap2")

		lod <- NULL
		if(!is.null(listOfResults$lod))
		{
			lod <- new("dspMatrix", Dim = rep(nMarkers(object), 2), x = rep(as.numeric(NA), nMarkers(object)*(nMarkers(object) + 1)/2))
			.Call("assignDspMatrixFromEstimateRF", lod, markerRows, markerColumns, listOfResults$lod, PACKAGE="mpMap2")
			rownames(lod) <- colnames(lod) <- markers(object)
		}

		lkhd <- NULL
		if(!is.null(listOfResults$lkhd))
		{
			lkhd <- new("dspMatrix", Dim = rep(nMarkers(object), 2), x = rep(as.numeric(NA), nMarkers(object)*(nMarkers(object) + 1)/2))
			.Call("assignDspMatrixFromEstimateRF", lkhd, markerRows, markerColumns, listOfResults$lkhd, PACKAGE="mpMap2")
			rownames(lkhd) <- colnames(lkhd) <- markers(object)
		}
		rf <- new("rf", theta = theta, lod = lod, lkhd = lkhd, gbLimit = gbLimit)

		if(class(object) == "mpcrossLG" || class(object) == "mpcrossMapped")
		{
			output <- object
			output@rf <- rf
		}
		else
		{
			output <- new("mpcrossRF", geneticData = object@geneticData, rf = rf)
		}
	}
	return(output)
}
estimateRFInternal <- function(object, recombValues, lineWeights, markerRows, markerColumns, keepLod, keepLkhd, gbLimit, verbose)
{
	return(.Call("estimateRF", object, recombValues, markerRows, markerColumns, lineWeights, keepLod, keepLkhd, gbLimit, verbose, PACKAGE="mpMap2"))
}
rohan-shah/mpMap2 documentation built on July 21, 2020, 8:58 p.m.