# Start of cross.R #############################################################
# crossesEqual -----------------------------------------------------------------
#' Test if two \pkg{R/qtl} \code{cross} objects are essentially equal.
#'
#' This uses the \pkg{R/qtl} function \code{comparecrosses} to check if two
#' input \code{cross} objects are essentially equal, with key elements being
#' identical, and numeric values being equal (within the given tolerance).
#'
#' @param cross1 An \pkg{R/qtl} \code{cross} object.
#' @param cross2 An \pkg{R/qtl} \code{cross} object.
#' @param tol Tolerance for comparing numeric values.
#'
#' @return Returns \code{TRUE} if the two input \code{cross} objects are
#' essentially equal. Otherwise, this function returns a character vector
#' describing an observed difference between the two \code{cross} objects.
#'
#' @template seealso-rqtl-manual
#'
#' @export
#' @family cross object functions
#' @rdname crossesEqual
crossesEqual <- function(cross1, cross2, tol=1e-5) {
diffs <- tryCatch({
suppressMessages( qtl::comparecrosses(cross1, cross2, tol=tol) )
result <- character()
}, error=function(e) {
result <- e[['message']]
})
return( if ( length(diffs) == 0 ) {TRUE} else {diffs} )
}
# getFlankingPhenoColIndices ---------------------------------------------------
#' Get flanking phenotype column indices.
#'
#' Get the indices of the phenotype columns in the \code{cross} phenotype data
#' frame that are flanking the specified index, excluding special columns such
#' as the sample ID column.
#'
#' @param cross An \pkg{R/qtl} \code{cross} object.
#' @param i Column index for which the flanking phenotype column indices
#' should be returned.
#'
#' @return Flanking phenotype column indices. If the specified index refers to
#' the first or last column, an \code{NA} value is returned for the lower or
#' upper flanking index, respectively.
#'
#' @keywords internal
#' @rdname getFlankingPhenoColIndices
getFlankingPhenoColIndices <- function(cross, i) {
stopifnot( isSinglePositiveNumber(i) )
# Get indices of phenotype columns in cross.
pheno.col <- getPhenoColIndices(cross)
if ( i < 1 || i > ncol(cross$pheno) ) {
stop("cross phenotype column index out of bounds - '", i, "'")
}
# Get lower flanking phenotype column index, or NA value if none available.
lower.diff <- i - pheno.col
if ( any(lower.diff > 0) ) {
lower.index <- which( lower.diff == min(lower.diff[ lower.diff > 0 ]) )
} else {
lower.index <- NA
}
# Get upper flanking phenotype column index, or NA value if none available.
upper.diff <- pheno.col - i
if ( any(upper.diff > 0) ) {
upper.index <- which( upper.diff == min(upper.diff[ upper.diff > 0 ]) )
} else {
upper.index <- NA
}
return( c(lower.index, upper.index) )
}
# hasTimeSeriesPhenotypes ------------------------------------------------------
#' Test if \code{cross} contains time-series phenotypes.
#'
#' @param cross An \pkg{R/qtl} \code{cross} object.
#' @param allow.gaps Allow gaps in time series, provided
#' that any gaps are a multiple of the inferred time step.
#' @param tol Tolerance for time step equality.
#'
#' @return \code{TRUE} if \code{cross} seems to have time-series phenotypes,
#' given the specified constraints; \code{FALSE} otherwise.
#'
#' @template section-time-series
#'
#' @export
#' @family cross object functions
#' @family time-series functions
#' @rdname hasTimeSeriesPhenotypes
hasTimeSeriesPhenotypes <- function(cross, allow.gaps=TRUE, tol=1e-5) {
return( ! is.null( inferTimeStep(cross, allow.gaps=allow.gaps, tol=tol) ) )
}
# inferLodColNames -------------------------------------------------------------
#' Infer \code{scanone} result LOD column names.
#'
#' Given a \code{cross} and set of phenotypes, get the expected the LOD column
#' names of the corresponding scanone result.
#'
#' @param cross An \pkg{R/qtl} \code{cross} object.
#' @param pheno.col Vector indicating the phenotypes for which LOD column names
#' should be inferred. This can be an integer vector of phenotype column indices
#' with respect to the columns of the \code{cross} phenotype \code{data.frame},
#' or a character vector that contains phenotype IDs or their syntactically
#' valid names. If none are specified, LOD names are inferred for all
#' \code{cross} phenotypes.
#'
#' @return Vector of the LOD column headings that would be expected in a
#' \code{scanone} result for the given \code{cross} and phenotypes.
#'
#' @keywords internal
#' @rdname inferLodColNames
inferLodColNames <- function(cross, pheno.col=NULL) {
pheno.col <- getPhenoColIndices(cross, pheno.col)
if ( length(pheno.col) == 1 ) {
lodcol.names <- 'lod'
} else if ( length(pheno.col) > 1 ) {
lodcol.names <- colnames(cross$pheno)[pheno.col]
} else {
stop("cannot infer LOD column names - no phenotypes specified")
}
return(lodcol.names)
}
# inferStrainIndices -----------------------------------------------------------
#' Infer sample strain indices.
#'
#' Infers strain indices of a set of samples from the given object. A character
#' vector of sample IDs can be passed as an argument, in which case the strain
#' indices will be inferred from each set of identical consecutive sample IDs.
#' If a \code{cross} object is passed as an argument, strain indices will be
#' inferred from sample IDs, if present. Otherwise, strain indices are inferred
#' from each set of identical consecutive sample genotypes.
#'
#' @param x An \pkg{R/qtl} \code{cross} object,
#' or a character vector of sample IDs.
#'
#' @return Vector of sample strain indices inferred from the input object.
#'
#' @template section-sample-ids
#'
#' @export
#' @family cross object functions
#' @rdname inferStrainIndices
inferStrainIndices <- function (x) {
UseMethod('inferStrainIndices', x)
}
# inferStrainIndices.character -------------------------------------------------
#' @export
#' @rdname inferStrainIndices
inferStrainIndices.character <- function(x) {
runs <- rle(x)
if ( anyDuplicated(runs$values) ) {
stop("non-consecutive replicate sample IDs")
}
runs$values <- 1:length(runs$values)
return( inverse.rle(runs) )
}
# inferStrainIndices.cross -----------------------------------------------------
#' @export
#' @rdname inferStrainIndices
inferStrainIndices.cross <- function(x) {
# For each sample, combine genotypes into a single string.
geno <- apply(qtl::pull.geno(x), 1, paste0, collapse='')
# Get cross sample IDs.
cross.info <- attr(x, 'info')
if ( ! is.null(cross.info) ) {
compareCrossInfo(x, cross.info)
sample.ids <- getSamples(cross.info)
} else {
sample.ids <- pull.ind(x)
}
# If cross has sample IDs, infer strain indices from these, then
# check genotypes are identical in the inferred strain replicates..
if ( ! is.null(sample.ids) ) {
strain.indices <- inferStrainIndices(sample.ids)
for ( s in unique(strain.indices) ) {
strain.geno <- geno[ which( strain.indices == s ) ]
if ( any(strain.geno != strain.geno[1]) ) {
stop("genotype mismatch in replicate samples")
}
}
} else { # ..otherwise infer strain indices directly from genotypes.
runs <- rle(geno)
runs$values <- 1:length(runs$values)
strain.indices <- inverse.rle(runs)
}
return(strain.indices)
}
# inferTetradIndices -----------------------------------------------------------
#' Infer sample tetrad indices.
#'
#' @description Infers tetrad indices of a set of samples from the given object.
#' A character vector of sample IDs can be passed as the first argument, in
#' which case the tetrad indices will be inferred, where possible, from the
#' sample IDs themselves.
#'
#' If a \code{cross} is the first argument, tetrad indices will be inferred
#' from sample IDs, if present, or otherwise by comparison of sample genotypes.
#'
#' @template section-sample-ids
#' @template section-tetradic-samples
#'
#' @param x An \pkg{R/qtl} \code{cross} object,
#' or a character vector of sample IDs.
#'
#' @return Vector of sample tetrad indices inferred from the input object.
#' Returns \code{NULL} if tetrad pattern could not be found.
#'
#' @export
#' @family cross object functions
#' @rdname inferTetradIndices
inferTetradIndices <- function(x) {
# TODO: improve identification of tetrads.
UseMethod('inferTetradIndices', x)
}
# inferTetradIndices.character -------------------------------------------------
#' @export
#' @rdname inferTetradIndices
inferTetradIndices.character <- function(x) {
# Tetrad indices corresponding to the input sample IDs.
sample.tindices <- NULL
# Get 'exemplar' sample IDs.
exemplar.ids <- unique(x)
# Get indices of exemplars with respect to the vector of sample IDs.
exemplar.sindices <- sapply(exemplar.ids, match, x)
# Get indices of replicates with respect to the vector of sample IDs.
terminal.sindices <- c(sapply(unname(exemplar.sindices[2:length(exemplar.sindices)]),
function(e) e - 1), length(x) )
replicate.sindices <- mapply(function(i, j) i:j, exemplar.sindices,
terminal.sindices, SIMPLIFY=FALSE)
# Indices of exemplars with respect to the 'notional' exemplar sample IDs,
# in the ideal situation where every sample is present in every tetrad.
# Notional exemplar indices are taken from the numeric/alphanumeric suffix
# of each sample ID. This is used to place exemplar samples in tetrads,
# and can work even if there are missing samples.
exemplar.nindices <- NULL
# If exemplars have numeric suffixes, try to assign notional indices from these..
if ( all( grepl(const$pattern$tetrad['numeric'], exemplar.ids) ) ) {
# Get sample numbers from numeric sample ID suffixes.
sample.numbers <- as.integer( sub(const$pattern$tetrad['numeric'], '\\1', exemplar.ids) )
# Test if sample numbers are ordered.
samples.ordered <- ! is.unsorted(sample.numbers)
# If sample numbers are sorted, get notional indices from the sample
# numbers, as offset by the value of the first sample number.
if (samples.ordered) {
exemplar.nindices <- sample.numbers - sample.numbers[1] + 1
}
# ..otherwise if exemplars have alphanumeric suffixes, try to assign notional indices from those.
} else if ( all( grepl(const$pattern$tetrad['alphanumeric'], exemplar.ids) ) ) {
# Get tetrad numbers from numeric part of sample ID suffixes.
tetrad.numbers <- as.integer( sub(const$pattern$tetrad['alphanumeric'], '\\1', exemplar.ids) )
# Test if tetrad numbers are ordered.
tetrads.ordered <- ! is.unsorted(tetrad.numbers)
# Get tetrad sample labels, assign a label index to each (e.g. 1 for 'A', 2 for 'B').
sample.labels <- toupper( sub(const$pattern$tetrad['alphanumeric'], '\\2', exemplar.ids) )
exemplar.lindices <- sapply(sample.labels, function(sample.label)
which( const$tetrad.sample.labels == sample.label ), USE.NAMES=FALSE)
# Test if sample labels are ordered within tetrads.
labels.ordered <- all( sapply(unique(tetrad.numbers), function(i)
! is.unsorted(exemplar.lindices[ which( tetrad.numbers == i ) ]) ) )
# If tetrad and sample numbers are sorted, get notional indices from the tetrad
# and sample numbers, as offset by the value of the first tetrad number.
if ( tetrads.ordered && labels.ordered ) {
exemplar.tindices <- tetrad.numbers - tetrad.numbers[1] + 1
exemplar.nindices <- 4 * (exemplar.tindices - 1) + exemplar.lindices
}
}
# If notional exemplar indices were identified, use these to assign tetrad indices.
if ( ! is.null(exemplar.nindices) ) {
# Get notional number of tetrads.
ntetrads <- ceiling( max(exemplar.nindices) / 4 )
# Get expected notional indices for the given number of tetrads.
expected.nindices <- lapply(1:ntetrads, function(i)
{ o <- 4*(i-1); seq(o + 1, o + 4) } )
# Get list containing notional indices for each tetrad.
tetrad.nindices <- lapply(1:ntetrads, function(i)
exemplar.nindices[ exemplar.nindices %in% expected.nindices[[i]] ])
# Collapse tetrad notional indices to tetrad exemplar indices.
tetrad.xindices <- tetrad.nindices[ lengths(tetrad.nindices) > 0 ]
exemplar.count <- 0
for ( i in 1:length(tetrad.xindices) ) {
tetrad.xindices[[i]] <- exemplar.count + seq_along(tetrad.xindices[[i]])
exemplar.count <- exemplar.count + length(tetrad.xindices[[i]])
}
# Get sample tetrad indices.
sample.tindices <- integer( length=length(x) )
for ( i in seq_along(tetrad.xindices) ) {
tetrad.sindices <- unlist( replicate.sindices[ tetrad.xindices[[i]] ] )
sample.tindices[tetrad.sindices] <- i
}
}
return(sample.tindices)
}
# inferTetradIndices.cross -----------------------------------------------------
#' @export
#' @rdname inferTetradIndices
inferTetradIndices.cross <- function(x) {
# Tetrad indices corresponding to the input sample IDs.
sample.tindices <- NULL
# Get cross sample IDs.
cross.info <- attr(x, 'info')
if ( ! is.null(cross.info) ) {
compareCrossInfo(x, cross.info)
sample.ids <- getSamples(cross.info)
} else {
sample.ids <- pull.ind(x)
}
stopifnot( length(sample.ids) > 0 )
# Set minimum fraction of tetradic genotypes required to infer tetrad indices.
threshold <- 0.95
# Infer strain indices, as replicates will be included in each tetrad.
strain.indices <- inferStrainIndices(x)
# Get indices of exemplars with respect to tthe set of samples.
exemplar.sindices <- sapply(unique(strain.indices), match, strain.indices)
# Get indices of replicates with respect to the set of samples.
terminal.sindices <- c(sapply(unname(exemplar.sindices[2:length(exemplar.sindices)]),
function(e) e - 1), length(strain.indices) )
replicate.sindices <- mapply(function(i, j) i:j, exemplar.sindices,
terminal.sindices, SIMPLIFY=FALSE)
# Tetrad indices with respect to the set of exemplar
# sample indices. NB: no need to look at full sample
# data, as replicate samples have identical genotypes.
tetrad.xindices <- NULL
# If cross has sample IDs, try to infer tetrad indices from these.
if ( ! is.null(sample.ids) ) {
id.tindices <- inferTetradIndices(sample.ids)
num.tetrads <- max(id.tindices)
exemplar.tindices <- id.tindices[exemplar.sindices]
tetrad.xindices <- lapply(1:num.tetrads, function(i)
which( exemplar.tindices == i ) )
}
# If we failed to infer tetrad exemplar indices from sample IDs,
# fall back to assuming that samples form complete tetrads.
if ( is.null(tetrad.xindices) ) {
num.tetrads <- length(exemplar.sindices) / 4
if ( isWholeNumber(num.tetrads) ) {
tetrad.xindices <- lapply(1:num.tetrads, function(i)
{ o <- 4*(i-1); seq(o + 1, o + 4) } )
}
}
# If tetrad indices identified from sample IDs or assumed from number of
# exemplars, check this against genotype data and get tetrad indices
# corresponding to samples.
if ( ! is.null(tetrad.xindices) ) {
# Calculate total number of possible tetrad genotypes.
total.tetrad.genotypes <- sum(lengths(tetrad.xindices) > 0) * qtl::totmar(x)
total.counted <- total.tetradic <- 0
geno <- qtl::pull.geno(x)
markers <- colnames(geno)
# For each candidate tetrad, count tetradic genotypes.
for ( i in 1:num.tetrads) {
tetrad.exemplar.sindices <- exemplar.sindices[ tetrad.xindices[[i]] ]
# If we have genotypes for a complete tetrad, check if appears to be tetradic.
if ( length(tetrad.exemplar.sindices) == 4 ) {
tetrad.geno <- geno[tetrad.exemplar.sindices, ]
tetrad.tables <- lapply(markers, function(m) table(tetrad.geno[, m]))
geno.counted <- sapply(tetrad.tables, sum) == 4
geno.homozygous <- ! geno.counted | sapply(tetrad.tables, function(x) all(x == 4))
# Create run-length encoding of homozygous genotype mask..,
runs <- rle(geno.homozygous)
# ..set internal runs of homozygosity as FALSE..
runs$values[ -c(1, length(runs$values)) ] <- FALSE
# ..and then recreate the original homozygous genotype mask.
geno.homozygous <- inverse.rle(runs)
geno.counted <- geno.counted & ! geno.homozygous
geno.tetradic <- geno.counted & sapply(tetrad.tables, function(x) all(x == 2))
total.tetradic <- total.tetradic + sum(geno.tetradic)
total.counted <- total.counted + sum(geno.counted)
}
}
# If enough genotypes checked and enough of those
# were tetradic, get tetrad indices of samples.
if ( total.counted > 0.5 * total.tetrad.genotypes ) {
if ( total.tetradic / total.counted >= threshold ) {
sample.tindices <- integer( length=qtl::nind(x) )
for ( i in 1:num.tetrads ) {
tetrad.sindices <- unlist( replicate.sindices[ tetrad.xindices[[i]] ] )
sample.tindices[tetrad.sindices] <- i
}
}
}
}
return(sample.tindices)
}
# inferTimeStep ----------------------------------------------------------------
#' Infer time step of time-series phenotypes.
#'
#' @param cross An \pkg{R/qtl} \code{cross} object.
#' @param allow.gaps Allow gaps in time series, provided
#' that any gaps are a multiple of the inferred time step.
#' @param tol Tolerance for time step equality.
#'
#' @return Inferred time step. Returns \code{NULL}
#' if time step could not be inferred.
#'
#' @template section-time-series
#'
#' @export
#' @family cross object functions
#' @family time-series functions
#' @rdname inferTimeStep
inferTimeStep <- function(cross, allow.gaps=TRUE, tol=1e-5) {
stopifnot( isBOOL(allow.gaps) )
# Get time-series phenotypes.
cross.info <- attr(cross, 'info')
if ( ! is.null(cross.info) ) {
compareCrossInfo(cross, cross.info)
phenotypes <- getPhenotypes(cross.info)
times <- as.numeric(phenotypes)
} else {
pheno.col <- getPhenoColIndices(cross)
phenotypes <- colnames(cross$pheno)[pheno.col]
times <- makeNumbers(phenotypes)
}
stopifnot( length(phenotypes) > 0 )
# One time point cannot form a series.
if ( length(times) == 1 ) {
return(NULL)
}
# Time points must be valid non-negative numbers.
if ( ! all ( isNonNegativeNumber(times) ) ) {
return(NULL)
}
# Times must be strictly monotonically increasing.
if ( is.unsorted(times, strictly=TRUE) ) {
return(NULL)
}
# Get vector of time steps.
time.steps <- diff(times)
# Infer size of time step.
time.step <- inferStepSize(time.steps, tol=tol)
if ( is.null(time.step) ) {
return(NULL)
}
# Time steps must be consistent.
if ( any(time.step - time.steps > tol) ) {
return(NULL)
}
# Identify gaps where difference between two times deviates from time step.
gap.indices <- which( time.steps - time.step > tol )
# If there are any gaps, verify that gaps are allowed and
# that all gaps are a multiple of the apparent time step.
if ( length(gap.indices) > 0 ) {
if ( ! allow.gaps ) {
return(NULL)
}
step.counts <- time.steps[gap.indices] / time.step - 1
if ( any( ! isWholeNumber(step.counts, tol=tol) ) ) {
return(NULL)
}
}
return(time.step)
}
# interpTimeSeries -------------------------------------------------------------
#' Interpolate gaps in a time-series.
#'
#' Interpolate values for gaps in time-series
#' phenotypes of a \code{cross} object.
#'
#' @param cross An \pkg{R/qtl} \code{cross} object.
#' @param tol Tolerance for time step equality.
#'
#' @return The input \code{cross} object is returned with gaps in time-series
#' phenotypes filled with values interpolated from the gap endpoints.
#'
#' @template section-time-series
#'
#' @export
#' @family cross object functions
#' @family time-series functions
#' @importFrom stats approx
#' @rdname interpTimeSeries
interpTimeSeries <- function(cross, tol=1e-5) {
cross <- padTimeSeries(cross, tol=tol)
# Get indices of phenotype columns in cross.
pheno.col <- getPhenoColIndices(cross)
# Get time-series phenotypes.
cross.info <- attr(cross, 'info')
if ( ! is.null(cross.info) ) {
compareCrossInfo(cross, cross.info)
phenotypes <- getPhenotypes(cross.info)
times <- as.numeric(phenotypes)
} else {
phenotypes <- colnames(cross$pheno)[pheno.col]
times <- makeNumbers(phenotypes)
}
numeric.phenotypes <- sapply(pheno.col,
function (i) is.numeric(cross$pheno[[i]]) )
if ( ! all(numeric.phenotypes) ) {
stop("cannot interpolate non-numeric phenotypes")
}
if ( anyNA(cross$pheno[, pheno.col[1] ]) ) {
stop("cannot interpolate data at start of time series")
}
if ( anyNA(cross$pheno[, pheno.col[ length(pheno.col) ] ]) ) {
stop("cannot interpolate data at end of time series")
}
for ( i in 1:nrow(cross$pheno) ) {
# Get time-series values for sample (including blanks).
time.series <- as.numeric( cross$pheno[i, pheno.col] )
# Get runs of blank values, then get list of vectors, such that each
# vector contains the indices of a consecutive set of blank columns.
blank.runs <- rle( is.na(time.series) )
J <- cumsum(blank.runs$lengths)
I <- c( 1, sapply(J[1:(length(J)-1)], function(j) j + 1) )
b <- blank.runs$values == TRUE
blank.index.list <- mapply(function(i, j) i:j, I[b], J[b], SIMPLIFY=FALSE)
# We know from the code immediately above that consecutive runs of blank
# values are grouped together, and we have checked above that none of
# these run off either end of the time series. Therefore, we can safely
# take the flanking columns as our interpolation endpoints (below).
for ( blank.indices in blank.index.list ) {
# Take values from flanking columns as interpolation endpoints.
x <- c(blank.indices[1] - 1, blank.indices[ length(blank.indices) ] + 1)
# Get time series values for this blank region.
y <- time.series[x]
# Interpolate phenotype values for this blank region.
interpolation <- stats::approx(x, y, blank.indices, 'linear')
# Set blank values from interpolation results.
time.series[blank.indices] <- interpolation$y
}
# Set time-series values for sample (with blanks interpolated).
cross$pheno[i, pheno.col] <- time.series
}
return(cross)
}
# makeCross --------------------------------------------------------------------
#' Make an \pkg{R/qtl} \code{cross} object.
#'
#' This function makes an \pkg{R/qtl} \code{cross} - essentially a list of two
#' elements named \code{'geno'} and \code{'pheno'} - from the input \code{geno}
#' and \code{pheno} objects. Both input objects must have an \code{'info'}
#' attribute of class \code{\linkS4class{CrossInfo}} containing sample IDs. The
#' output \code{cross} will be created from the intersection set of the samples
#' in the input \code{geno} and \code{pheno} objects. Sample replicates in the
#' input \code{pheno} object are preserved. Sample replicates in the input
#' \code{geno} object must have identical genotypes, which will be replicated
#' as needed to match the input phenotypes.
#'
#' @param geno A \code{geno} object.
#' @param pheno A \code{pheno} object.
#'
#' @return An \pkg{R/qtl} \code{cross} object.
#'
#' @keywords internal
#' @rdname makeCross
makeCross <- function(geno, pheno) {
stopifnot( 'geno' %in% class(geno) )
stopifnot( 'pheno' %in% class(pheno) )
# Get sequences from geno object.
geno.seqs <- names(geno)
# Get alleles from geno object.
alleles <- attr(geno, 'alleles')
# Get CrossInfo from geno object.
geno.info <- attr(geno, 'info')
# Validate CrossInfo of geno object.
compareCrossInfo(geno, geno.info)
# Clear unneeded attributes of geno object.
attr(geno, 'alleles') <- NULL
attr(geno, 'info') <- NULL
# Get CrossInfo for pheno data.
pheno.info <- attr(pheno, 'info')
compareCrossInfo(pheno, pheno.info)
attr(pheno, 'info') <- NULL
if ( ! hasSampleIDs(geno.info) ) {
stop("cannot make cross - no genotype sample IDs")
}
if ( ! hasSampleIDs(pheno.info) ) {
stop("cannot make cross - no phenotype sample IDs")
}
# Get sample IDs from genotype info.
geno.samples <- getSamples(geno.info)
# Get sample IDs from phenotype info.
pheno.samples <- getSamples(pheno.info)
if ( length( intersect(geno.samples, pheno.samples) ) == 0 ) {
stop("cannot make cross - no matching samples")
}
if ( is.na(geno.info@crosstype) ) {
stop("cannot make cross - unknown cross type")
}
# Check that genotype replicates have identical genotype data.
run.index.list <- getRunIndexList(geno.samples)
for ( run.indices in run.index.list ) {
if ( length(run.indices) > 1 ) {
for ( geno.seq in names(geno) ) {
run.data <- geno[[geno.seq]]$data[run.indices, ]
if ( ! all( duplicated(run.data)[-1] ) ) {
stop("genotype replicates have conflicting data")
}
}
}
}
# Get indices of phenotype samples in genotype data.
gindices <- match(pheno.samples, geno.samples)
# Expand genotype data to match phenotype samples.
for ( geno.seq in names(geno) ) {
geno[[geno.seq]]$data <- geno[[geno.seq]]$data[gindices, ]
}
# Expand sample table from geno object to match phenotype samples.
sample.table <- geno.info@samples[gindices, ]
rownames(sample.table) <- sample.table$sample.index <- getRowIndices(sample.table)
# Create CrossInfo object from genotype and phenotype info.
cross.info <- methods::new('CrossInfo',
seq = geno.info@seq,
pheno = pheno.info@pheno,
markers = geno.info@markers,
samples = sample.table,
alleles = alleles,
genotypes = geno.info@genotypes,
crosstype = geno.info@crosstype
)
# Prepare genotype and phenotype objects.
class(pheno) <- 'data.frame'
class(geno) <- 'list'
# Create cross list.
cross <- list(geno=geno, pheno=pheno)
# Set cross alleles.
attr(cross, 'alleles') <- alleles
# Set cross info.
attr(cross, 'info') <- cross.info
# Set cross class.
class(cross) <- c(cross.info@crosstype, 'cross')
return(cross)
}
# padTimeSeries ----------------------------------------------------------------
#' Pad gaps in a time-series.
#'
#' @param cross An \pkg{R/qtl} \code{cross} object.
#' @param tol Tolerance for time step equality.
#'
#' @return The input \code{cross} object is returned with all
#' gaps in time-series phenotypes padded with \code{NA} values.
#'
#' @template section-time-series
#'
#' @export
#' @family cross object functions
#' @family time-series functions
#' @rdname padTimeSeries
padTimeSeries <- function(cross, tol=1e-5) {
# Get indices of phenotype columns in cross.
pheno.col <- getPhenoColIndices(cross)
# Get index of ID column in cross.
id.col <- getIdColIndex(cross)
# Get ID column name.
id.col.name <- colnames(cross$pheno)[id.col]
# If ID column found, get flanking phenotypes. The ID column
# is reinserted between these in the padded phenotype data.
if ( ! is.null(id.col) ) {
flanking.indices <- getFlankingPhenoColIndices(cross, id.col)
flanking.phenames <- colnames(cross$pheno)[flanking.indices]
}
# Get phenotype data.
pheno <- qtl::pull.pheno(cross, pheno.col)
# Get phenotype data class.
pheno.class <- unique( sapply(pheno, class) )
if ( length(pheno.class) != 1 ) {
stop("inconsistent phenotype data types")
}
# Get missing value for this phenotype data.
missing.value <- getMissingValueFromClassS3(pheno.class)
# Get phenotypes and sample IDs.
cross.info <- attr(cross, 'info')
if ( ! is.null(cross.info) ) {
compareCrossInfo(cross, cross.info)
phenotypes <- getPhenotypes(cross.info)
times <- as.numeric(phenotypes)
sample.ids <- getSamples(cross.info)
} else {
phenotypes <- colnames(cross$pheno)[pheno.col]
times <- makeNumbers(phenotypes)
sample.ids <- pull.ind(cross)
}
# Time step is the mode of time differences.
time.step <- inferTimeStep(cross, tol=tol)
# Get differences between times.
time.diff <- diff(times)
# Identify gaps where difference between two times deviates from time step.
gap.indices <- which( time.diff - time.step > tol )
# Get number of such gaps.
num.gaps <- length(gap.indices)
# If there are gaps, pad them with NA values.
if ( num.gaps > 0 ) {
# Calculate step counts for all gaps.
step.counts <- as.integer( round(time.diff[gap.indices] / time.step - 1) )
# Set missing time points for each gap.
gap.steps <- vector('list', num.gaps)
for ( i in 1:num.gaps ) {
g <- gap.indices[i]
first.step <- times[g] + time.step
last.step <- times[g + 1] - time.step
gap.steps[[i]] <- seq(first.step, last.step, by=time.step)
}
# Get expected number of time points after gaps are padded.
num.times <- length(times) + sum(step.counts)
# Pad phenotype data and time points.
padded.pheno <- data.frame( matrix(nrow=nrow(pheno), ncol=num.times),
stringsAsFactors=FALSE)
padded.times <- rep(NaN, length=num.times)
padded.pheno[, 1] <- pheno[, 1]
padded.times[1] <- times[1]
t <- 2
for ( d in 1:length(time.diff) ) {
if ( d %in% gap.indices ) {
g <- which( gap.indices == d )
gap.times <- gap.steps[[g]]
for ( gap.time in gap.times ) {
padded.pheno[, t] <- rep(missing.value, nrow(pheno))
padded.times[t] <- gap.time
t <- t + 1L
}
}
i <- d + 1L
padded.pheno[, t] <- pheno[, i]
padded.times[t] <- times[i]
t <- t + 1L
}
# Set syntactically valid column names for padded phenotypes.
colnames(padded.pheno) <- make.names(padded.times)
# If phenotype has ID column, reinsert ID column
# as close as possible to its original position.
if ( ! is.null(id.col) ) {
stopifnot( length(sample.ids) > 0 )
if ( ! is.na(flanking.phenames[1]) ) {
i <- which( colnames(padded.pheno) == flanking.phenames[1] ) + 1
} else {
i <- which( colnames(padded.pheno) == flanking.phenames[2] ) - 1
}
padded.pheno <- insertColumn(padded.pheno, i, col.name=id.col.name, data=sample.ids)
}
# Replace padded phenotype data.
cross$pheno <- padded.pheno
# If cross has CrossInfo, update phenotypes to reflect padded data.
if ( ! is.null(cross.info) ) {
cross.info <- setPhenotypes(cross.info, padded.times)
attr(cross, 'info') <- cross.info
}
}
return(cross)
}
# permIndices ------------------------------------------------------------------
#' Generate permutation indices for a \code{cross} object.
#'
#' @description Given an input \code{cross} object with \code{N} samples, this
#' function generates a non-redundant set of \code{N} permutation indices in
#' the range \code{1-N}, which can be used to permute the phenotype or genotype
#' data of a \code{cross} object, as well as any associated data.
#'
#' @param cross An \pkg{R/qtl} \code{cross} object.
#'
#' @return Vector of sample indices that can be used to permute a
#' \code{cross} object or associated data.
#'
#' @template section-permutations
#'
#' @export
#' @family cross object functions
#' @family permutation functions
#' @rdname permIndices
permIndices <- function(cross) {
stopifnot( 'cross' %in% class(cross) )
num.samples <- nrow(cross$pheno)
sample.indices <- 1:num.samples
cross.info <- attr(cross, 'info')
if ( ! is.null(cross.info) ) {
compareCrossInfo(cross, cross.info)
replicate.indices <- getStrainIndices(cross.info)
tetrad.indices <- getTetradIndices(cross.info)
} else {
replicate.indices <- inferStrainIndices(cross)
tetrad.indices <- inferTetradIndices(cross)
}
tetradic <- ! is.null(tetrad.indices)
if (tetradic) {
sample.bindices <- tetrad.indices
} else {
sample.bindices <- rep.int(1, num.samples)
}
blocks <- lapply( 1:max(sample.bindices),
function(s) which( sample.bindices == s ) )
imbalanced.tetrads <- imbalanced.replicates <- FALSE
perm.indices <- rep(0, num.samples)
for ( block in blocks ) {
rep.freq <- table(replicate.indices[block])
rep.freq <- sort(rep.freq, decreasing=TRUE)
if (tetradic) {
if ( length(rep.freq) == 1 ) {
stop("cannot generate permutation indices - tetrads too imbalanced")
} else if ( length(rep.freq) != 4 ) {
imbalanced.tetrads <- TRUE
}
}
if( length(rep.freq) == 1 || rep.freq[1] > sum(rep.freq[2:length(rep.freq)]) ) {
stop("cannot generate permutation indices - replicates too imbalanced")
} else if ( length( unique(rep.freq) ) > 1 ) {
imbalanced.replicates <- TRUE
}
perm.block <- sample(sample.indices[block])
while ( any(replicate.indices[perm.block] == replicate.indices[block]) ) {
perm.block <- sample(sample.indices[block])
}
perm.indices[block] <- perm.block
}
if (imbalanced.replicates) {
warning("sample replicates are imbalanced")
}
if (imbalanced.tetrads) {
warning("tetrads are imbalanced")
}
return(perm.indices)
}
# permCross --------------------------------------------------------------------
#' Permute \code{cross} phenotype or genotype data.
#'
#' @description Given an input \code{cross} object, this function permutes
#' either the phenotype or genotype data (but not both), and returns the
#' permuted \code{cross} object.
#'
#' Note that when permuting phenotypes, any covariates must also be permuted
#' in the same way. In such cases, it is recommended to generate permutation
#' indices with \code{\link{permIndices}}, pass the result to the
#' \code{perm.indices} parameter of this function, and use the same
#' permutation indices to permute the covariate data.
#'
#' When permuting genotypes, any derived data (e.g. genotype probabilities)
#' are recalculated by default. To prevent recalculation of derived data,
#' set the \code{refresh} parameter to \code{FALSE}.
#'
#' @param cross An \pkg{R/qtl} \code{cross} object.
#' @param perm.indices Permutation indices.
#' @param perm.pheno Permute phenotype data (incompatible with \code{perm.geno}).
#' @param perm.geno Permute genotype data (incompatible with \code{perm.pheno}).
#' @param refresh Refresh any derived data.
#'
#' @return Permuted \code{cross} object.
#'
#' @template section-permutations
#'
#' @export
#' @family cross object functions
#' @family permutation functions
#' @rdname permCross
permCross <- function(cross, perm.indices=NULL, perm.pheno=TRUE,
perm.geno=FALSE, refresh=TRUE) {
stopifnot( 'cross' %in% class(cross) )
stopifnot( isBOOL(perm.pheno) )
stopifnot( isBOOL(perm.geno) )
stopifnot( isBOOL(refresh) )
if ( ! xor(perm.pheno, perm.geno) ) {
stop("permCross must permute either cross phenotypes or cross genotypes")
}
num.samples <- nrow(cross$pheno)
if ( ! is.null(perm.indices) ) {
stopifnot( length(perm.indices) == num.samples )
stopifnot( all( perm.indices %in% 1:num.samples ) )
stopif( anyDuplicated(perm.indices) )
} else {
perm.indices <- permIndices(cross)
}
if (perm.pheno) {
pheno.col <- getPhenoColIndices(cross)
cross$pheno[, pheno.col] <- cross$pheno[perm.indices, pheno.col]
}
if (perm.geno) {
geno.seqs <- names(cross$geno)
for ( geno.seq in geno.seqs ) {
cross$geno[[geno.seq]]$data <- cross$geno[[geno.seq]]$data[perm.indices, ]
}
if (refresh) {
cross <- refreshCross(cross)
}
}
return(cross)
}
# refreshCross -----------------------------------------------------------------
#' Refresh derived data attributes of \pkg{R/qtl} \code{cross}.
#'
#' @param cross An \pkg{R/qtl} \code{cross} object.
#' @param derived.attributes Names of derived data attributes to refresh. If
#' none are specified, all derived data attributes in the \code{cross} are
#' refreshed.
#'
#' @return Input \code{cross} object with derived data attributes refreshed.
#'
#' @keywords internal
#' @rdname refreshCross
refreshCross <- function(cross, derived.attributes=NULL) {
# This function is modelled on R/qtl function clean.cross; if that function
# changes, this should be changed to reflect it. Also, this function relies
# on internal flag 'onlylod' for determining which function to use.
stopifnot( 'cross' %in% class(cross) )
known.derived <- c('argmax', 'draws', 'errorlod', 'prob', 'rf')
if ( ! is.null(derived.attributes) ) {
unknown <- derived.attributes[ ! derived.attributes %in% known.derived ]
if ( length(unknown) > 0 ) {
stop("cross has unknown derived data elements - '",
toString(unknown), "'")
}
} else {
derived.attributes <- known.derived
}
geno.attr <- unique( as.vector( sapply( cross$geno, function (g) names(g) ) ) )
if ( 'argmax' %in% derived.attributes && 'argmax' %in% geno.attr ) {
arg.names <- c('step', 'off.end', 'error.prob', 'map.function', 'stepwidth')
args <- lapply(arg.names, function (a) { unique( sapply( cross$geno,
function (g) attr(g$argmax, a) ) ) } )
names(args) <- arg.names
cross <- qtl::argmax.geno(cross, step=args$step, off.end=args$off.end,
error.prob=args$error.prob, map.function=args$map.function,
stepwidth=args$stepwidth)
}
if ( 'draws' %in% derived.attributes && 'draws' %in% geno.attr ) {
n.draws <- unique( sapply( cross$geno, function (g) dim(g$draws)[3] ) )
arg.names <- c('step', 'off.end', 'error.prob', 'map.function', 'stepwidth')
args <- lapply(arg.names, function (a) { unique( sapply( cross$geno,
function (g) attr(g$draws, a) ) ) } )
names(args) <- arg.names
cross <- qtl::sim.geno(cross, n.draws=n.draws, step=args$step,
off.end=args$off.end, error.prob=args$error.prob,
map.function=args$map.function, stepwidth=args$stepwidth)
}
if ( 'errorlod' %in% derived.attributes && 'errorlod' %in% geno.attr ) {
arg.names <- c('error.prob', 'map.function')
args <- lapply(arg.names, function (a) { unique( sapply( cross$geno,
function (g) attr(g$errorlod, a) ) ) } )
names(args) <- arg.names
cross <- qtl::calc.errorlod(cross, error.prob=args$error.prob,
map.function=args$map.function)
}
if ( 'prob' %in% derived.attributes && 'prob' %in% geno.attr ) {
arg.names <- c('step', 'off.end', 'error.prob', 'map.function', 'stepwidth')
args <- lapply(arg.names, function (a) { unique( sapply( cross$geno,
function (g) attr(g$prob, a) ) ) } )
names(args) <- arg.names
cross <- qtl::calc.genoprob(cross, step=args$step, off.end=args$off.end,
error.prob=args$error.prob, map.function=args$map.function,
stepwidth=args$stepwidth)
}
if ( 'rf' %in% derived.attributes && 'rf' %in% names(cross) ) {
lod.flag <- attr(cross$rf, 'onlylod')
if ( ! is.null(lod.flag) && lod.flag ) {
cross <- qtl::markerlrt(cross)
} else {
cross <- qtl::est.rf(cross)
}
}
return(cross)
}
# End of cross.R ###############################################################
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.