Nothing
#' @title scanonevar
#' @name scanonevar
#' @author Robert W. Corty \email{rcorty@@gmail.com}
#'
#' @description \code{scanonevar} conducts a genome scan in an experimental
#' cross, accommodating covariate effects in residual variance and identifying
#' genetic effects on residual variance.
#'
#' @param cross The \code{cross}, built by \pkg{qtl} to be used in mapping
#' @param mean.formula The formula to describe the mean of the phenotype.
#' Keywords are mean.QTL.add and mean.QTL.dom for the additive and dominance
#' components of the QTL effect on the mean.
#' dglm model will be fit if mean.formula has only fixed effects.
#' hglm model will be fit if mean.formula has one or more random effects.
#' @param var.formula The formula to describe the residual variance of the
#' phenotype. Keywords are var.QTL.add and var.QTL.dom for the additive and
#' dominance components of the QTL effect on residual phenotype variance.
#' var.formula must have only fixed effects.
#' @param chrs chromosomes to scan
#' @param return.covar.effects Should covariate effects estimated at each locus be returned?
#' @param glm_family a character vector indicating the GLM family -- either 'gaussian' or 'poisson'
#' @param scan_types a vector containing at least one of 'mQTL', 'vQTL', and 'mvQTL', or up to all three.
#'
#' @return 27599
#' @export
#'
#' @examples
#' set.seed(27599)
#' test.cross <- qtl::sim.cross(map = qtl::sim.map(len = rep(20, 5), n.mar = 5), n.ind = 50)
#' scanonevar(cross = test.cross)
#'
scanonevar <- function(cross,
mean.formula = phenotype ~ mean.QTL.add + mean.QTL.dom,
var.formula = ~ var.QTL.add + var.QTL.dom,
chrs = qtl::chrnames(cross = cross),
scan_types = c('mQTL', 'vQTL', 'mvQTL'),
glm_family = 'gaussian',
return.covar.effects = FALSE) {
# give an informative error message if input is invalid
validate.scanonevar.input_(cross = cross,
mean.formula = mean.formula,
var.formula = var.formula,
glm_family = glm_family,
chrs = chrs)
# get inputs into a format that is easy for scanonevar_ to use
meta <- wrangle.scanonevar.input_(cross = cross,
mean.formula = mean.formula,
var.formula = var.formula,
glm_family = glm_family,
scan_types = scan_types,
chrs = chrs)
# execute the scan
result <- scanonevar_(meta = meta,
return.covar.effects = return.covar.effects)
sov <- list(meta = meta,
result = result)
class(sov) <- c('scanonevar', class(sov))
return(sov)
}
validate.scanonevar.input_ <- function(cross,
mean.formula,
var.formula,
glm_family,
chrs) {
# check validity of inputs
stopifnot(is.cross(cross))
stopifnot(all(chrs %in% qtl::chrnames(cross)))
stopifnot(glm_family %in% c('gaussian', 'poisson'))
# make and then check formulae
formulae <- make.formulae_(mean.formula, var.formula)
stopifnot(formulae_is_valid_(formulae = formulae))
# formulae must be valid for use with cross
stopifnot(formulae.is.valid.for.cross_(cross = cross,
formulae = formulae))
return(TRUE)
}
wrangle.scanonevar.input_ <- function(cross,
mean.formula,
var.formula,
glm_family,
scan_types,
chrs) {
if (!is.cross.w.genoprobs(x = cross)) {
message("calculating genoprobs with stepwidth = 5, off.end = 0, error.prob = 1e-4, map.function = 'haldane'")
cross <- qtl::calc.genoprob(cross = cross, step = 5)
}
loc.info.df <- wrangle.loc.info.df_(cross = cross,
chrs = chrs)
genoprob.df.long <- wrangle.genoprob.df_(cross = cross)
if (is.null(scan_types)) {
scan_types <- wrangle.scanonevar.types_(mean.formula, var.formula)
}
model <- ifelse(test = has_a_random_term(mean.formula),
yes = 'hglm',
no = 'dglm')
scan.formulae <- wrangle.scanonevar.formulae_(cross, mean.formula, var.formula)
modeling.df <- wrangle.scanonevar.modeling.df_(cross = cross,
genoprobs = genoprob.df.long,
scan.formulae = scan.formulae)
meta <- list(cross = cross,
scan.types = scan_types,
scan.formulae = scan.formulae,
modeling.df = modeling.df,
loc.info.df = loc.info.df,
genoprob.df = genoprob.df.long,
glm_family = glm_family,
model = model,
cross_type = class(cross)[1])
class(meta) <- c('scanonevar.meta', class(meta))
return(meta)
}
wrangle.scanonevar.types_ <- function(mean.formula,
var.formula) {
mean.qtl.idxs <- grep(pattern = 'mean.QTL', x = labels(stats::terms(mean.formula)))
var.qtl.idxs <- grep(pattern = 'var.QTL', x = labels(stats::terms(var.formula)))
if (all(mean.qtl.idxs, !var.qtl.idxs)) {
return('mQTL')
}
if (all(!mean.qtl.idxs, var.qtl.idxs)) {
return('vQTL')
}
if (all(mean.qtl.idxs, var.qtl.idxs)) {
return(c('mQTL', 'vQTL', 'mvQTL'))
}
stop('Should never get here.')
}
wrangle.scanonevar.formulae_ <- function(cross,
mean.formula,
var.formula) {
# first, replace terms that are simply marker names with marker.name_add + marker.name_dom
alt.formulae <- replace.markers.with.add.dom_(cross,
mean.formula,
var.formula)
null.formulae <- remove.qtl.terms_(formulae = alt.formulae)
# this way of adding to a list doesn't add anything when RHS is NULL
scan.formulae <- list()
scan.formulae[['mean.alt.formula']] <- alt.formulae[['mean.formula']]
scan.formulae[['mean.null.formula']] <- null.formulae[['mean.formula']]
scan.formulae[['var.alt.formula']] <- alt.formulae[['var.formula']]
scan.formulae[['var.null.formula']] <- null.formulae[['var.formula']]
return(scan.formulae)
}
wrangle.scanonevar.modeling.df_ <- function(cross,
scan.formulae,
genoprobs) {
response.model.df <- make.response.model.df_(cross = cross,
formulae = scan.formulae)
qtl.covar.model.df <- make.qtl.covar.model.df_(cross = cross,
formulae = scan.formulae)
phen.covar.model.df <- make.phen.covar.model.df_(cross = cross,
formulae = scan.formulae)
genet.covar.model.df <- make.genet.covar.add.dom.model.df_(cross = cross,
formulae = scan.formulae,
genoprobs = genoprobs)
return(dplyr::bind_cols(response.model.df,
qtl.covar.model.df,
phen.covar.model.df,
genet.covar.model.df))
}
scanonevar_ <- function(meta,
return.covar.effects) {
loc.name <- 'fake_global_for_CRAN'
result <- initialize.scanonevar.result_(loc.info.df = meta$loc.info.df,
scan.types = meta$scan.types,
scan.formulae = meta$scan.formulae,
return.covar.effects = meta$return.covar.effects)
mean.df <- sum(grepl(pattern = 'mean.QTL', x = labels(stats::terms(meta$scan.formulae[['mean.alt.formula']]))))
var.df <- sum(grepl(pattern = 'var.QTL', x = labels(stats::terms(meta$scan.formulae[['var.alt.formula']]))))
# loop over loci
for (loc.idx in 1:nrow(result)) {
# fill modeling.df with the genoprobs at the focal loc
this.loc.name <- result[['loc.name']][loc.idx]
loc.genoprobs <- dplyr::filter(.data = meta$genoprob.df,
loc.name == this.loc.name)
if (any(grepl(pattern = this.loc.name, x = names(meta$modeling.df)))) {
next
}
# puts a column of 0's for any dominance components if we are on X chromosome
# dglm handles this naturally, ignoring it in model fitting and giving effect estimate of NA
this.loc.modeling.df <- make.loc.specific.modeling.df(general.modeling.df = meta$modeling.df,
loc.genoprobs = loc.genoprobs,
model.formulae = meta$scan.formulae,
cross_type = meta$cross_type)
# hacky way to adjust the df on the X chr
if ('mean.QTL.dom' %in% names(this.loc.modeling.df) & all(this.loc.modeling.df[['mean.QTL.dom']] == 0)) {
this.loc.mean.df <- mean.df - 1
} else {
this.loc.mean.df <- mean.df
}
if ('var.QTL.dom' %in% names(this.loc.modeling.df) & all(this.loc.modeling.df[['var.QTL.dom']] == 0)) {
this.loc.var.df <- var.df - 1
} else {
this.loc.var.df <- var.df
}
# Fit the alternative model at this loc
alternative.fit <- fit_model(formulae = meta$scan.formulae,
data = this.loc.modeling.df,
model = meta$model,
mean = 'alt',
var = 'alt',
glm_family = meta$glm_family)
# if requested, save effect estimates
# may be safer to do with with name-matching, but this seems to work for now
# all this hullabaloo is because coef(summary()) drops NAs, but coef() doesn't
if (all(return.covar.effects)) {
if (identical(alternative.fit, NA)) {
if (loc.idx == 1) {
stop('Cant fit model on locus 1. Due to programming weirdness, cant return effect estimates.')
}
coef_mtx <- rbind(coef_mtx, rep(NA, ncol(coef_mtx)))
} else {
mean_coef_mtx <- stats::coef(summary(alternative.fit))
mean_ests <- mean_ses <- rep(NA, length(stats::coef(alternative.fit)))
mean_ests[!is.na(stats::coef(alternative.fit))] <- mean_coef_mtx[,'Estimate']
names(mean_ests) <- paste0('m__est__', names(stats::coef(alternative.fit)))
mean_ses[!is.na(stats::coef(alternative.fit))] <- mean_coef_mtx[,'Std. Error']
names(mean_ses) <- paste0('m__se__', names(stats::coef(alternative.fit)))
disp_fit <- alternative.fit$dispersion.fit
disp_coef_mtx <- stats::coef(summary(disp_fit))
disp_ests <- disp_ses <- rep(NA, length(stats::coef(disp_fit)))
disp_ests[!is.na(stats::coef(disp_fit))] <- disp_coef_mtx[,'Estimate']
names(disp_ests) <- paste0('v__est__', names(stats::coef(disp_fit)))
disp_ses[!is.na(stats::coef(disp_fit))] <- disp_coef_mtx[,'Std. Error']
names(disp_ses) <- paste0('v__se__', names(stats::coef(disp_fit)))
# collate ests and ses
if (loc.idx == 1) {
coef_mtx <- matrix(data = c(mean_ests, mean_ses, disp_ests, disp_ses), nrow = 1)
} else {
coef_mtx <- rbind(coef_mtx, c(mean_ests, mean_ses, disp_ests, disp_ses))
}
}
}
# Fit the appropriate null models at this loc
# and save LOD score and asymptotic p-value
if ('mQTL' %in% meta$scan.types) {
mean.null.fit <- fit_model(formulae = meta$scan.formulae,
data = this.loc.modeling.df,
model = meta$model,
mean = 'null',
var = 'alt',
glm_family = meta$glm_family)
result[['mQTL.lod']][loc.idx] <- LOD(alt = alternative.fit, null = mean.null.fit)
result[['mQTL.asymp.p']][loc.idx] <- stats::pchisq(q = result[['mQTL.lod']][loc.idx], df = this.loc.mean.df, lower.tail = FALSE)
}
if ('vQTL' %in% meta$scan.types) {
var.null.fit <- fit_model(formulae = meta$scan.formulae,
data = this.loc.modeling.df,
model = meta$model,
mean = 'alt',
var = 'null',
glm_family = meta$glm_family)
result[['vQTL.lod']][loc.idx] <- LOD(alt = alternative.fit, null = var.null.fit)
result[['vQTL.asymp.p']][loc.idx] <- stats::pchisq(q = result[['vQTL.lod']][loc.idx], df = this.loc.var.df, lower.tail = FALSE)
}
if ('mvQTL' %in% meta$scan.types) {
joint.null.fit <- fit_model(formulae = meta$scan.formulae,
data = this.loc.modeling.df,
model = meta$model,
mean = 'null',
var = 'null',
glm_family = meta$glm_family)
result[['mvQTL.lod']][loc.idx] <- LOD(alt = alternative.fit, null = joint.null.fit)
result[['mvQTL.asymp.p']][loc.idx] <- stats::pchisq(q = result[['mvQTL.lod']][loc.idx], df = this.loc.mean.df + this.loc.var.df, lower.tail = FALSE)
}
}
if (return.covar.effects) {
result <- cbind(result, coef_mtx)
}
class(result) <- c('scanonevar_result', class(result))
return(result)
}
initialize.scanonevar.result_ <- function(loc.info.df,
scan.types,
scan.formulae,
return.covar.effects = FALSE) {
chr.type <- chr <- loc.name <- pos <- 'fake_global_for_CRAN'
result <- dplyr::select(.data = loc.info.df,
chr.type,
chr,
loc.name,
pos)
if ('mQTL' %in% scan.types)
result[['mQTL.asymp.p']] <- result[['mQTL.lod']] <- NA
if ('vQTL' %in% scan.types)
result[['vQTL.asymp.p']] <- result[['vQTL.lod']] <- NA
if ('mvQTL' %in% scan.types)
result[['mvQTL.asymp.p']] <- result[['mvQTL.lod']] <- NA
# if (return.covar.effects) {
# result[['(Intercept)_mef']] <- result[['(Intercept)_mse']] <- NA
# for (mean.covar.name in labels(terms(scan.formulae[['mean.alt.formula']]))) {
# result[[paste0(mean.covar.name, '_mef')]] <- NA
# result[[paste0(mean.covar.name, '_mse')]] <- NA
# }
# result[['(Intercept)_vef']] <- result[['(Intercept)_vse']] <- NA
# for (var.covar.name in labels(terms(scan.formulae[['var.alt.formula']]))) {
# result[[paste0(var.covar.name, '_vef')]] <- NA
# result[[paste0(var.covar.name, '_vse')]] <- NA
# }
# }
return(result)
}
#' @title c.scanonevar
#' @name c.scanonevar
#'
#' @param ... the scanonevar objects with permutations to be combined
#'
#' @description combines scanonevar objects that have permutations to improve the precision of the p-value estimates.
#'
#' @return a scanonevar object that is the concatenation of the inputted
#' scanonevars
#'
#' @export
#'
c.scanonevar <- function(...) {
sovs <- list(...)
validate.c.scanonevar.input_(sovs)
first.sov <- sovs[[1]]
new.perms <- first.sov[['perms']]
for (sov.idx in 2:(length(sovs))) {
new.perms <- dplyr::bind_rows(new.perms,
sovs[[sov.idx]][['perms']])
}
first.sov[['perms']] <- new.perms
first.sov[['result']] <- calc.empir.ps(sov = first.sov[['result']],
perms = new.perms)
return(first.sov)
}
validate.c.scanonevar.input_ <- function(sovs) {
stopifnot(all(sapply(X = sovs, FUN = is.scanonevar.w.perms)))
first.sov <- sovs[[1]]
for (sov.idx in 2:(length(sovs))) {
# meta has to be the same
stopifnot(identical(x = first.sov[['meta']], y = sovs[[sov.idx]][['meta']],
ignore.environment = TRUE))
# result has to be the same
# todo
# perms have to have the same names...but shouldn't be the same
stopifnot(all.equal(names(first.sov[['perms']]),
names(sovs[[sov.idx]][['perms']])))
}
}
#' @title summary.scanonevar
#'
#' @author Robert Corty \email{rcorty@@gmail.com}
#'
#' @description \code{summary.scanonevar} prints out the loci in a scanonevar object
#' that exceed \code{thresh}. It is an S3 generic for summary(). It handles scanonevar
#' objects in both LOD units and empirical p value units.
#'
#' @param object the scanonevar object to be summarized
#' @param thresh the threshold over which (for LODs) or under which (for emprirical p values)
#' a locus will be printed.
#' @param units Which units should be used to summarise? 'lod', 'asymp.p', or 'empir.p'
#' @param ... additional arguments controlling the summary
#'
#' @return None. Only prints results to screen.
#'
#' @details none
#'
#' @method summary scanonevar
#' @export
#'
summary.scanonevar <- function(object, units = c('lod', 'asymp.p', 'empir.p'), thresh, ...) {
# hack to get R CMD CHECK to run without NOTEs that these globals are undefined
mQTL.lod <- vQTL.lod <- mvQTL.lod <- 'fake_global_for_CRAN'
mQTL.asymp.p <- vQTL.asymp.p <- mvQTL.asymp.p <- 'fake_global_for_CRAN'
mQTL.empir.p <- vQTL.empir.p <- mvQTL.empir.p <- 'fake_global_for_CRAN'
chr <- pos <- loc.name <- 'fake_global_for_CRAN'
units <- match.arg(arg = units)
if (!any(class(object) == "scanonevar")) {
stop("Input should have class \"scanonevar\".")
}
if (missing(thresh)) {
thresh <- switch(EXPR = units,
lod = 3,
asymp.p = 0.05,
empir.p = 0.05)
}
return <- list()
if (units == 'lod') {
return[['mQTL']] <- object$result %>%
dplyr::filter(mQTL.lod > thresh) %>%
dplyr::select(chr, pos, loc.name, mQTL.lod)
return[['vQTL']] <- object$result %>%
dplyr::filter(vQTL.lod > thresh) %>%
dplyr::select(chr, pos, loc.name, vQTL.lod)
return[['mvQTL']] <- object$result %>%
dplyr::filter(mvQTL.lod > thresh) %>%
dplyr::select(chr, pos, loc.name, mvQTL.lod)
}
if (units == 'asymp.p') {
return[['mQTL']] <- object$result %>%
dplyr::filter(mQTL.asymp.p > thresh) %>%
dplyr::select(chr, pos, loc.name, mQTL.asymp.p)
return[['vQTL']] <- object$result %>%
dplyr::filter(vQTL.asymp.p > thresh) %>%
dplyr::select(chr, pos, loc.name, vQTL.asymp.p)
return[['mvQTL']] <- object$result %>%
dplyr::filter(mvQTL.asymp.p > thresh) %>%
dplyr::select(chr, pos, loc.name, mvQTL.asymp.p)
}
if (units == 'empir.p') {
return[['mQTL']] <- object$result %>%
dplyr::filter(mQTL.empir.p > thresh) %>%
dplyr::select(chr, pos, loc.name, mQTL.empir.p)
return[['vQTL']] <- object$result %>%
dplyr::filter(vQTL.empir.p > thresh) %>%
dplyr::select(chr, pos, loc.name, vQTL.empir.p)
return[['mvQTL']] <- object$result %>%
dplyr::filter(mvQTL.empir.p > thresh) %>%
dplyr::select(chr, pos, loc.name, mvQTL.empir.p)
}
return(return)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.