#' Fit single-QTL model at a single position
#'
#' Fit a single-QTL model at a single putative QTL position and get detailed results
#' about estimated coefficients and individuals contributions to the LOD score.
#'
#' @md
#'
#' @param genoprobs A matrix of genotype probabilities, individuals x genotypes
#' @param pheno A numeric vector of phenotype values (just one phenotype, not a matrix of them)
#' @param kinship Optional kinship matrix.
#' @param addcovar An optional matrix of additive covariates.
#' @param nullcovar An optional matrix of additional additive
#' covariates that are used under the null hypothesis (of no QTL)
#' but not under the alternative (with a QTL). This is needed for
#' the X chromosome, where we might need sex as a additive
#' covariate under the null hypothesis, but we wouldn't want to
#' include it under the alternative as it would be collinear with
#' the QTL effects.
#' @param intcovar An optional matrix of interactive covariates.
#' @param weights An optional vector of positive weights for the
#' individuals. As with the other inputs, it must have `names`
#' for individual identifiers. Ignored if `kinship` is provided.
#' @param contrasts An optional matrix of genotype contrasts, size
#' genotypes x genotypes. For an intercross, you might use
#' `cbind(c(1,1,1), c(-1, 0, 1), c(-0.5, 1, -0.5))` to get
#' mean, additive effect, and dominance effect. The default is the
#' identity matrix.
#' @param model Indicates whether to use a normal model (least
#' squares) or binary model (logistic regression) for the phenotype.
#' If `model="binary"`, the phenotypes must have values in
#' \eqn{[0, 1]}.
#' @param se If TRUE, calculate the standard errors.
#' @param hsq (Optional) residual heritability; used only if
#' `kinship` provided.
#' @param reml If `kinship` provided: if `reml=TRUE`, use
#' REML; otherwise maximum likelihood.
#' @param tol Tolerance value for
#' linear regression by QR decomposition (in determining whether
#' columns are linearly dependent on others and should be omitted)
#' @param maxit Maximum number of iterations in logistic regression
#' fit (when `model="binary"`).
#'
#' @return A list containing
#' * `coef` - Vector of estimated coefficients.
#' * `SE` - Vector of estimated standard errors (included if `se=TRUE`).
#' * `lod` - The overall lod score.
#' * `ind_lod` - Vector of individual contributions to the LOD score.
#'
#' @details For each of the inputs, the row names are used as
#' individual identifiers, to align individuals.
#'
#' If `kinship` is absent, Haley-Knott regression is performed.
#' If `kinship` is provided, a linear mixed model is used, with a
#' polygenic effect estimated under the null hypothesis of no (major)
#' QTL, and then taken as fixed as known in the genome scan.
#'
#' @references Haley CS, Knott SA (1992) A simple
#' regression method for mapping quantitative trait loci in line
#' crosses using flanking markers. Heredity 69:315--324.
#'
#' Kang HM, Zaitlen NA, Wade CM, Kirby A, Heckerman D, Daly MJ, Eskin
#' E (2008) Efficient control of population structure in model
#' organism association mapping. Genetics 178:1709--1723.
#'
#' @examples
#' # load qtl2geno package for data and genoprob calculation
#' library(qtl2geno)
#'
#' # read data
#' iron <- read_cross2(system.file("extdata", "iron.zip", package="qtl2geno"))
#'
#' # insert pseudomarkers into map
#' map <- insert_pseudomarkers(iron$gmap, step=1)
#'
#' # calculate genotype probabilities
#' probs <- calc_genoprob(iron, map, error_prob=0.002)
#'
#' # grab phenotypes and covariates; ensure that covariates have names attribute
#' pheno <- iron$pheno[,1]
#' covar <- match(iron$covar$sex, c("f", "m")) # make numeric
#' names(covar) <- rownames(iron$covar)
#'
#' # leave-one-chromosome-out kinship matrix for chr 7
#' kinship7 <- calc_kinship(probs, "loco")[[7]]
#'
#' # scan chromosome 7 to find peak
#' out <- scan1(probs[,7], pheno, kinship7, addcovar=covar)
#'
#' # find peak position
#' max_pos <- rownames(max(out, map[7]))
#'
#' # fit QTL model just at that position
#' out_fit1 <- fit1(probs[[7]][,,max_pos], pheno, addcovar=covar)
#'
#' # fit QTL model just at that position, with polygenic effect
#' out_fit1_pg <- fit1(probs[[7]][,,max_pos], pheno, kinship7, addcovar=covar)
#'
#' @importFrom stats setNames
#' @export
fit1 <-
function(genoprobs, pheno, kinship=NULL, addcovar=NULL, nullcovar=NULL,
intcovar=NULL, weights=NULL,
contrasts=NULL, model=c("normal", "binary"),
se=TRUE, hsq=NULL, reml=TRUE, tol=1e-12, maxit=100)
{
if(!is.null(kinship)) { # use LMM; see fit1_pg.R
return(fit1_pg(genoprobs, pheno, kinship, addcovar, nullcovar,
intcovar, contrasts, se, hsq, reml, tol))
}
stopifnot(tol > 0)
bintol <- sqrt(tol)
# check that the objects have rownames
check4names(pheno, addcovar, NULL, intcovar, nullcovar)
# force things to be matrices
if(!is.null(addcovar) && !is.matrix(addcovar))
addcovar <- as.matrix(addcovar)
if(!is.null(nullcovar) && !is.matrix(nullcovar))
nullcovar <- as.matrix(nullcovar)
if(!is.null(intcovar) && !is.matrix(intcovar))
intcovar <- as.matrix(intcovar)
if(!is.null(contrasts) && !is.matrix(contrasts))
contrasts <- as.matrix(contrasts)
model <- match.arg(model)
if(model=="binary") {
if(!is.null(kinship))
stop("Can't yet account for kinship with model = \"binary\"")
if(any(!is.na(pheno) & (pheno < 0 | pheno > 1)))
stop('with model="binary", pheno should be in [0,1]')
}
else {
# square-root of weights
weights <- sqrt_weights(weights) # also check >0 (and if all 1's, turn to NULL)
}
# make sure pheno is a vector
if(is.matrix(pheno) || is.data.frame(pheno)) {
if(ncol(pheno) > 1)
warning("Considering only the first phenotype.")
rn <- rownames(pheno)
pheno <- pheno[,1]
names(pheno) <- rn
}
# genoprobs is a matrix?
if(!is.matrix(genoprobs))
stop("genoprobs should be a matrix, individuals x genotypes")
# make sure contrasts is square n_genotypes x n_genotypes
if(!is.null(contrasts)) {
ng <- ncol(genoprobs)
if(ncol(contrasts) != ng || nrow(contrasts) != ng)
stop("contrasts should be a square matrix, ", ng, " x ", ng)
}
# find individuals in common across all arguments
ind2keep <- get_common_ids(genoprobs, pheno, addcovar, nullcovar, intcovar,
weights, complete.cases=TRUE)
if(length(ind2keep)<=2) {
if(length(ind2keep)==0)
stop("No individuals in common.")
else
stop("Only ", length(ind2keep), " individuals in common: ",
paste(ind2keep, collapse=":"))
}
# omit individuals not in common
genoprobs <- genoprobs[ind2keep,,drop=FALSE]
pheno <- pheno[ind2keep]
if(!is.null(addcovar)) addcovar <- addcovar[ind2keep,,drop=FALSE]
if(!is.null(nullcovar)) nullcovar <- nullcovar[ind2keep,,drop=FALSE]
if(!is.null(intcovar)) intcovar <- intcovar[ind2keep,,drop=FALSE]
if(!is.null(weights)) weights <- weights[ind2keep]
# make sure addcovar is full rank when we add an intercept
addcovar <- drop_depcols(addcovar, TRUE, tol)
# make sure columns in intcovar are also in addcovar
addcovar <- force_intcovar(addcovar, intcovar, tol)
# multiply genoprobs by contrasts
if(!is.null(contrasts))
genoprobs <- genoprobs %*% contrasts
X0 <- drop_depcols(cbind(rep(1, length(pheno)), addcovar, nullcovar), FALSE, tol)
if(model=="normal") {
# if weights, adjust phenotypes
if(!is.null(weights)) pheno <- weights * pheno
# weights have 0 dimension if missing
if(is.null(weights)) weights <- numeric(0)
# null fit
fit0 <- fit1_hk_addcovar(X0, # plug addcovar where genoprobs would be
pheno,
matrix(nrow=length(pheno), ncol=0), # empty slot for addcovar
weights, se=FALSE, tol)
if(is.null(intcovar)) { # just addcovar
if(is.null(addcovar)) addcovar <- matrix(nrow=length(ind2keep), ncol=0)
fitA <- fit1_hk_addcovar(genoprobs, pheno, addcovar, weights, se=se, tol)
}
else { # intcovar
fitA <- fit1_hk_intcovar(genoprobs, pheno, addcovar, intcovar,
weights, se=se, tol)
}
# lod score
n <- length(pheno)
lod <- (n/2)*log10(fit0$rss/fitA$rss)
# residual SDs using 1/n
sigsq0 <- fit0$sigma^2/n*fit0$df
sigsqA <- fitA$sigma^2/n*fitA$df
# individual contributions to the lod score
ind_lod <- 0.5*(fit0$resid^2/sigsq0 - fitA$resid^2/sigsqA + log(sigsq0) - log(sigsqA))/log(10)
names(ind_lod) <- names(pheno)
}
else { # binary phenotype
# null fit
if(is.null(weights)) { # no weights
fit0 <- fit_binreg(X0, pheno, FALSE, maxit, bintol, tol)
weights <- numeric(0)
}
else {
fit0 <- fit_binreg_weighted(X0, pheno, weights, FALSE, maxit, bintol, tol)
}
if(is.null(intcovar)) { # just addcovar
if(is.null(addcovar)) addcovar <- matrix(nrow=length(ind2keep), ncol=0)
fitA <- fit1_binary_addcovar(genoprobs, pheno, addcovar, weights, se=se,
maxit, bintol, tol)
}
else { # intcovar
fitA <- fit1_binary_intcovar(genoprobs, pheno, addcovar, intcovar,
weights, se=se, maxit, bintol, tol)
}
lod <- fitA$log10lik - fit0$log10lik
# individual contributions to LOD
if(length(weights)==0) weights <- rep(1, length(pheno))
pA <- fitA$fitted_probs
ind_lodA <- weights * (pheno*log10(pA) + (1-pheno)*log10(1-pA))
p0 <- fit0$fitted_probs
ind_lod0 <- weights * (pheno*log10(p0) + (1-pheno)*log10(1-p0))
ind_lod <- ind_lodA - ind_lod0
}
# names of coefficients
coef_names <- scan1coef_names(genoprobs, addcovar, intcovar)
if(se) # results include standard errors
return(list(lod=lod, ind_lod=ind_lod,
coef=stats::setNames(fitA$coef, coef_names),
SE=stats::setNames(fitA$SE, coef_names)))
else
return(list(lod=lod, ind_lod=ind_lod,
coef=stats::setNames(fitA$coef, coef_names)))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.