#' @title A genotype object constructor
#'
#' @description Constructs a S3-object containing a SNP design matrix and a hierarchy. If
#' a SNP in the input object contains missing data, the clustering is used to impute
#' information from highly correlated neighbor SNPs. The genotype object is needed for
#' \code{\link{qtcatHit}} as input.
#'
#' @param snp an object of S4 class \linkS4class{snpMatrix}.
#' @param snpClust an object of class \code{\link{qtcatClust}}.
#' @param absCor a vector of absolute value of correlations considered in the hierarchy.
#' @param min.absCor a minimum absolute value of correlation which is considered. A value
#' in the range from 0 to 1.
#' @param mc.cores a number of cores for parallelising. The maximum is \code{'B'}. For
#' details see \code{\link[parallel]{mclapply}}.
#'
#' @examples
#' # file containing example data for SNP data
#' gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
#' snp <- read.snpData(gfile, sep = ",")
#' clust <- qtcatClust(snp)
#'
#' # Construct geotype object
#' geno <- qtcatGeno(snp, clust)
#'
#' @importFrom hit as.hierarchy
#' @importFrom methods is
#' @importFrom stats reorder
#' @export
qtcatGeno <- function(snp, snpClust, absCor, min.absCor = 0.5, mc.cores = 1) {
stopifnot(is(snp, "snpMatrix"))
stopifnot(is(snpClust, "qtcatClust"))
if (!setequal(names(snpClust$clusters), colnames(snp)))
stop("Names of 'snp' and 'snpClust' differ")
if (missing(absCor))
hier <- as.hierarchy(snpClust$dendrogram, 1 - min.absCor, names = colnames(snp))
else
hier <- as.hierarchy(snpClust$dendrogram, height = 1 - absCor, names = colnames(snp))
if (any(naFreq(snp) > 0))
snp <- imputeMedoids(snp, snpClust, hier, .25, mc.cores)
if (is.null(names <- snpClust$medoids))
names <- names(snpClust)
snpnames <- colnames(snp)[colnames(snp) %in% names]
desMat <- as.matrix(snp[, snpnames])
hier <- reorder(hier, snpnames)
out <- list(x = desMat,
hierarchy = hier,
clusters = snpClust$clusters,
medoids = snpClust$medoids,
snpInfo = snpInfo(snp))
class(out) <- "qtcatGeno"
out
}
#' @title A phenotype object constructor
#'
#' @description Constructs an S3-object containing phenotype and if additional covariats
#' exist a design matrix of those. The phenotype object is needed as input for
#' \code{\link{qtcatHit}}.
#'
#' @param names a vector of individual names of length 'n'.
#' @param pheno a vector of length 'n' or a matrix size 'n x 2' in case of binomial family.
#' This contains the phenotypic observations.
#' @param family a character string specifying the family of the phenotype distribution.
#' Either "gaussian" (default) or "binomial".
#' @param covariates a matrix typically generated by a call of
#' \code{\link[stats]{model.matrix}}. It contain additional variables influencing the
#' phenotype e.g. environmental and experimental covariates.
#'
#' @examples
#' # file containing example data for a phenotype.
#' pfile <- system.file("extdata/phenodata.csv", package = "qtcat")
#' pdat <- read.csv(pfile, header = TRUE)
#'
#' # Construct phenotype object
#' pheno <- qtcatPheno(names = pdat[, 1],
#' pheno = pdat[, 2],
#' covariates = model.matrix(~ pdat[, 3]))
#'
#' @export
qtcatPheno <- function(names, pheno, family = "gaussian", covariates = NULL) {
match.arg(family, c("gaussian", "binomial"))
nn <- length(names)
if (any(is.na(pheno)))
stop("Missing values in 'pheno' are not allowed")
if (!is.null(covariates)) {
if (is.matrix(covariates)) {
nc <- nrow(covariates)
if (all(covariates[, 1] == 1))
covariates <- covariates[, -1, drop = FALSE]
} else {
stop("'covariates' has to be a 'matrix'")
}
} else {
covariates <- matrix(nrow = nn, ncol = 0L)
nc <- nn
}
if (is.vector(pheno)) {
np <- length(pheno)
} else if (is.matrix(pheno) && family == "binomial") {
if (ncol(pheno) > 2)
stop("'phone' has more than two columns which is not allowed")
np <- nrow(pheno)
} else {
stop("'pheno' has to be a 'vector' or in in case of binomial family a 'matrix'")
}
if (nn == nc && nn == np)
out <- list(names = as.character(names),
pheno = pheno,
covariates = covariates,
family = family)
class(out) <- "qtcatPheno"
out
}
#' @title Fitting Hierarchical Inference Testing
#'
#' @description Hierarchical inference testing for phenotype-SNP association.
#'
#' @param pheno an object of class \code{\link{qtcatPheno}}.
#' @param geno an object of class \code{\link{qtcatGeno}}.
#' @param B a integer indicating the number of sample-splits.
#' @param p.samp1 a value specifying the fraction of data used for the LASSO sample-split.
#' The ANOVA sample-split is \code{1 - p.samp1}.
#' @param nfolds Number of folds (default is 5). See \code{\link[glmnet]{cv.glmnet}} for
#' more details.
#' @param overall.lambda Logical, if true, lambda is estimated once, if false (default),
#' lambda is estimated for each sample split.
#' @param lambda.opt a criterion for optimum selection of cross validated lasso. Either
#' "lambda.1se" (default) or "lambda.min". See
#' \code{\link[glmnet]{cv.glmnet}} for more details.
#' @param alpha a single value in the range of 0 to 1 for the elastic net mixing parameter.
#' @param gamma a vector of gamma-values used in significance estimation.
#' @param max.p.esti a maximum for computed p-values. All p-values above this value are set
#' to one. Small \code{max.p.esti} values reduce computing time.
#' @param seed a RNG seed, see \code{\link{set.seed}}.
#' @param mc.cores a number of cores for parallelising. The maximum is
#' \code{'B'}. For details see \code{\link[parallel]{mclapply}}.
#' @param trace logical, if \code{TRUE} it prints the current status of the program.
#' @param ... additional arguments for \code{\link[glmnet]{cv.glmnet}}.
#'
#' @examples
#' # If you want to run the examples, use:
#' # example(qtcatHit, run.dontrun = TRUE)
#' \dontrun{
#' # files containing example data for SNP data and the phenotype
#' pfile <- system.file("extdata/phenodata.csv", package = "qtcat")
#' gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
#' pdat <- read.csv(pfile, header = TRUE)
#' snp <- read.snpData(gfile, sep = ",")
#' clust <- qtcatClust(snp)
#' geno <- qtcatGeno(snp, clust)
#' pheno <- qtcatPheno(names = pdat[, 1],
#' pheno = pdat[, 2],
#' covariates = model.matrix(~ pdat[, 3]))
#'
#' # fitting HIT
#' fitted <- qtcatHit(pheno, geno)
#' }
#'
#' @importFrom methods is
#' @export
qtcatHit <- function(pheno, geno, B = 50, p.samp1 = 0.35,
nfolds = 5, overall.lambda = FALSE, lambda.opt = "lambda.1se",
alpha = 1, gamma = seq(0.05, 0.99, by = 0.01),
max.p.esti = 1, seed = 12321, mc.cores = 1, trace = FALSE, ...) {
set.seed(seed)
on.exit(set.seed(NULL))
stopifnot(is(pheno, "qtcatPheno"))
stopifnot(is(geno, "qtcatGeno"))
id <- intersect(pheno$names, rownames(geno$x))
if (!length(id))
stop("The ID intersect of 'pheno' and 'geno' is emty")
if (length(id.uniqueGeno <- setdiff(rownames(geno$x), id)))
cat("The following individuals are part of 'geno' but not of 'pheno':\n",
paste(id.uniqueGeno, collapse = " "), "\n")
if (length(id.uniquePheno <- setdiff(pheno$names, id)))
cat("The following individuals are part of 'pheno' but not of 'geno':\n",
paste(id.uniquePheno, collapse = " "), "\n")
phenoInx <- which(pheno$names %in% id)
genoInx <- match(pheno$names[phenoInx], rownames(geno$x))
if (ncol(pheno$covariates) == 0L)
x <- geno$x[genoInx, ]
else
x <- cbind(geno$x[genoInx, ], pheno$covariates[phenoInx, ])
y <- pheno$pheno[phenoInx]
fitHit <- hit(x, y, geno$hierarchy, pheno$family,
B, p.samp1, nfolds, overall.lambda, lambda.opt, alpha,
gamma, max.p.esti, mc.cores,
trace, standardize = FALSE)
out <- c(fitHit, geno[3L:5L])
class(out) <- c("qtcatHit", "hit")
out
}
#' @title Summarize results of Hierarchical Inference Test
#'
#' @description Summarizing the QTCs (significant cluster of SNPs) and their position at
#' the genome.
#'
#' @param object an object of class \code{\link{qtcatHit}}.
#' @param alpha an alpha level for significance estimation.
#' @param min.absCor a minimum absolute value of correlation to be considered.
#'
#' @examples
#' # If you want to run the examples, use:
#' # example(qtcatQtc, run.dontrun = TRUE)
#' \dontrun{
#' # files containing example data for SNP data and the phenotype
#' pfile <- system.file("extdata/phenodata.csv", package = "qtcat")
#' gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
#' pdat <- read.csv(pfile, header = TRUE)
#' snp <- read.snpData(gfile, sep = ",")
#' clust <- qtcatClust(snp)
#' geno <- qtcatGeno(snp, clust)
#' pheno <- qtcatPheno(names = pdat[, 1],
#' pheno = pdat[, 2],
#' covariates = model.matrix(~ pdat[, 3]))
#' fitted <- qtcatHit(pheno, geno)
#'
#' # Summarizing the QTCs (loci37, loci260, and loci367 are causal)
#' qtcatQtc(fitted)
#' }
#'
#' @importFrom hit hit
#' @importFrom methods is
#' @export
qtcatQtc <- function(object, alpha = 0.05, min.absCor = 0.05) {
stopifnot(is(object, "qtcatHit"))
y <- summary(object, alpha, 1 - min.absCor)
signames <- rownames(y)
sigclust <- match(signames, object$medoids)
sigClust <- matrix(0, length(object$clusters), 3L)
sigClust[, 2L] <- 1
for (i in seq_along(signames)) {
inx <- which(object$clusters == sigclust[i])
sigClust[inx, ] <- matrix(unlist(y[signames[i], ]),
length(inx), 3L, byrow = TRUE)
}
rownames(sigClust) <- names(object$clusters)
sigClust[, 2L] <- 1 - sigClust[, 2L]
colnames(sigClust) <- c(colnames(y)[1L], "absCor", colnames(y)[3L])
sigClust <- as.data.frame(sigClust[sigClust[, 1L] != 0, ,drop = FALSE])
out <- cbind(snpInfo(object)[rownames(sigClust), 1:2], sigClust)
out
}
setOldClass("qtcatHit")
#' @title Get position from qtcatHit object
#'
#' @description Genetic position info from an object of class qtcatHit.
#'
#' @param object an object of class \code{\link{qtcatHit}}.
#'
#'
#' @importFrom methods setMethod setOldClass
#' @export
setMethod("snpInfo", "qtcatHit",
function(object) {
out <- object$snpInfo
if (is.null(out)) {
cat("No position information available")
}
out
}
)
#' @title Find medoids of QTCs
#' @description Find a medoid of for each quantitative trait cluster (QTC).
#'
#' @param object an object of class \code{\link{qtcatHit}}.
#' @param geno an object of class \code{\link{qtcatGeno}}.
#' @param alpha an alpha level for significance estimation.
#' @param min.absCor minimum absolute value of correlation considered.
#'
#' @examples
#' # If you want to run the examples, use:
#' # example(medoQtc, run.dontrun = TRUE)
#' \dontrun{
#' # files containing example data for SNP data and the phenotype
#' pfile <- system.file("extdata/phenodata.csv", package = "qtcat")
#' gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
#' pdat <- read.csv(pfile, header = TRUE)
#' snp <- read.snpData(gfile, sep = ",")
#' clust <- qtcatClust(snp)
#' geno <- qtcatGeno(snp, clust)
#' pheno <- qtcatPheno(names = pdat[, 1],
#' pheno = pdat[, 2],
#' covariates = model.matrix(~ pdat[, 3]))
#' fitted <- qtcatHit(pheno, geno)
#'
#' # QTC medoids
#' medo <- medoQtc(fitted, geno)
#' }
#'
#' @importFrom methods is
#' @importFrom stats cor
#' @export
medoQtc <- function(object, geno, alpha = 0.05, min.absCor = 0.05) {
stopifnot(is(object, "qtcatHit"))
stopifnot(is(geno, "qtcatGeno"))
sigClust <- summary(object, alpha, min.absCor)
if (nrow(sigClust)) {
clusters <- split(rownames(sigClust), sigClust$clusters)
medoids <- sapply(clusters, function(names, geno) {
if (length(names) > 1L)
return(names[which.max(abs(cor(geno$x[, names])))])
else
return(names)
}, geno = geno)
} else {
medoids <- c()
}
medoids
}
#' @title Fitting a Linear Model to QTCs
#'
#' @description Linear model between phenotype and medoids of QTCs (significant SNP
#' clusters).
#'
#' @param object an object of class \code{\link{qtcatHit}}.
#' @param pheno an object of class \code{\link{qtcatPheno}}.
#' @param geno an object of class \code{\link{qtcatGeno}}.
#' @param alpha an alpha level for significance estimation.
#' @param min.absCor minimum absolute value of correlation considered.
#'
#' @examples
#' # If you want to run the examples, use:
#' # example(lmQtc, run.dontrun = TRUE)
#' \dontrun{
#' # files containing example data for SNP data and the phenotype
#' pfile <- system.file("extdata/phenodata.csv", package = "qtcat")
#' gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
#' pdat <- read.csv(pfile, header = TRUE)
#' snp <- read.snpData(gfile, sep = ",")
#' clust <- qtcatClust(snp)
#' geno <- qtcatGeno(snp, clust)
#' pheno <- qtcatPheno(names = pdat[, 1],
#' pheno = pdat[, 2],
#' covariates = model.matrix(~ pdat[, 3]))
#' fitted <- qtcatHit(pheno, geno)
#'
#' # fitting a LM to the phenotype and QTC medoids
#' lmfitted <- lmQtc(fitted, pheno, geno)
#' }
#'
#' @importFrom methods is
#' @importFrom stats lm
#' @export
lmQtc <- function(object, pheno, geno, alpha = 0.05, min.absCor = 0.05) {
stopifnot(is(object, "qtcatHit"))
stopifnot(is(pheno, "qtcatPheno"))
stopifnot(is(geno, "qtcatGeno"))
id <- intersect(pheno$names, rownames(geno$x))
phenoInx <- which(pheno$names %in% id)
if (!length(id))
stop("The ID intersect of 'pheno' and 'geno' is emty")
if (length(id.uniqueGeno <- setdiff(rownames(geno$x), id)))
cat("The following individuals are part of 'geno' but not of 'pheno':\n",
paste(id.uniqueGeno, collapse = " "), "\n")
if (length(id.uniquePheno <- setdiff(pheno$names, id)))
cat("The following individuals are part of 'pheno' but not of 'geno':\n",
paste(id.uniquePheno, collapse = " "), "\n")
medoids <- medoQtc(object, geno, alpha, min.absCor)
if (length(medoids)) {
xg <- geno$x[, colnames(geno$x) %in% medoids, drop = FALSE]
genoInx <- match(pheno$names[phenoInx], rownames(xg))
rownames(xg) <- NULL
if (ncol(pheno$covariates)) {
dat <- data.frame(y = pheno$pheno[phenoInx],
pheno$covariates[phenoInx, ],
xg[genoInx, ])
} else {
dat <- data.frame(y = pheno$pheno[phenoInx], xg[genoInx, ])
}
} else if (ncol(pheno$covariates)) {
dat <- data.frame(y = pheno$pheno[phenoInx],
pheno$covariates[phenoInx, ])
} else {
dat <- data.frame(y = pheno$pheno[phenoInx])
}
out <- lm(y ~ ., data = dat)
out
}
#' @title Plot resulting QTCs of the Hierarchical Inference Test
#'
#' @description Plot the QTCs (significant cluster of SNPs) at their
#' position at the genome.
#'
#' @param x an object of class \code{\link{qtcatHit}}.
#' @param alpha an alpha level for significance estimation.
#' @param xlab a title for the x axis.
#' @param ylab a title for the y axis.
#' @param col.axis colors for axis line, tick marks, and title respectively.
#' @param ... other graphical parameters may also be passed as arguments to this function.
#'
#' @examples
#' # If you want to run the examples, use:
#' # example(plotQtc, run.dontrun = TRUE)
#' \dontrun{
#' # files containing example data for SNP data and the phenotype
#' pfile <- system.file("extdata/phenodata.csv", package = "qtcat")
#' gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
#' pdat <- read.csv(pfile, header = TRUE)
#' snp <- read.snpData(gfile, sep = ",")
#' clust <- qtcatClust(snp)
#' geno <- qtcatGeno(snp, clust)
#' pheno <- qtcatPheno(names = pdat[, 1],
#' pheno = pdat[, 2],
#' covariates = model.matrix(~ pdat[, 3]))
#' fitted <- qtcatHit(pheno, geno)
#'
#' # Plot the QTCs (loci37, loci260, and loci367 are causal)
#' plotQtc(fitted)
#' }
#'
#' @importFrom graphics plot axis mtext
#' @export
plotQtc <- function(x, alpha = 0.05, xlab = "Chromosomes",
ylab = expression(-log[10](italic(p))), col.axis = NULL, ...) {
stopifnot(is(x, "qtcatHit"))
# make positions linear with gaps between chr's
pos <- snpInfo(x)[, 1:2]
chrminmax <- vapply(split(pos[, 2L], pos[, 1L]), function(x) c(min(x), max(x)), c(1, 2))
chrgap <- sum(chrminmax[2L, ]) * .01
chrsize <- cumsum(c(0, chrminmax[2L, -ncol(chrminmax)])) -
cumsum(chrminmax[1L, ]) +
cumsum(c(0, rep(chrgap, ncol(chrminmax) - 1L)))
chr <- sort(unique(pos[, 1L]))
for (i in seq_along(chr)) {
inx <- which(pos[, 1L] == chr[i])
pos[inx, 2L] <- pos[inx, 2L] + chrsize[i]
}
chrstartend <- vapply(split(pos[, 2L], pos[, 1L]), function(x) c(min(x), max(x)), c(1, 2))
xlim <- c(chrstartend[1L, 1L] - chrgap, chrstartend[2L, ncol(chrstartend)] + chrgap)
# get QTCs from result
qtc <- qtcatQtc(x, alpha = alpha, min.absCor = .01)
for (i in seq_along(chr)) {
inx2 <- which(qtc[, 1L] == chr[i])
qtc[inx2, 2L] <- qtc[inx2, 2L] + chrsize[i]
}
qtc$pValues[qtc$pValues] <- qtc$pValues[qtc$pValues] + 1e-308
qtc$log.pValues <- -log10(qtc$pValues)
# plot
plot(qtc$pos, qtc$log.pValues, xlim = xlim, ylim = c(0, max(9, qtc$log.pValues) + 1),
axes = FALSE, xlab = "", ylab = "", ...)
# x
for (i in seq_along(chr))
axis(1, labels = FALSE, at = c(chrstartend[1, i], chrstartend[2, i]), col = col.axis)
axis(1, at = colMeans(chrstartend), labels = chr, col = NA, col.axis = col.axis)
mtext(xlab, 1, 2.5, col = col.axis)
# y
axis(2, col.axis = col.axis, col = col.axis)
mtext(expression(-log[10](italic(p))), 2, 2.5, col = col.axis)
}
#' @title Plot markers selection frequencies of the Hierarchical Inference Test
#'
#' @description Plot markers selection frequencies at their
#' position at the genome.
#'
#' @param x an object of class \code{\link{qtcatHit}}.
#' @param xlab a title for the x axis.
#' @param ylab a title for the y axis.
#' @param col.axis colors for axis line, tick marks, and title respectively.
#' @param ... other graphical parameters may also be passed as arguments to this function.
#'
#' @examples
#' # If you want to run the examples, use:
#' # example(plotSelFreq, run.dontrun = TRUE)
#' \dontrun{
#' # files containing example data for SNP data and the phenotype
#' pfile <- system.file("extdata/phenodata.csv", package = "qtcat")
#' gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
#' pdat <- read.csv(pfile, header = TRUE)
#' snp <- read.snpData(gfile, sep = ",")
#' clust <- qtcatClust(snp)
#' geno <- qtcatGeno(snp, clust)
#' pheno <- qtcatPheno(names = pdat[, 1],
#' pheno = pdat[, 2],
#' covariates = model.matrix(~ pdat[, 3]))
#' fitted <- qtcatHit(pheno, geno)
#'
#' # Plot the selection frequncy of markers (loci37, loci260, and loci367 are causal)
#' plotSelFreq(fitted)
#' }
#'
#' @importFrom graphics plot axis mtext
#' @export
plotSelFreq <- function(x, xlab = "Chromosomes", ylab = "Sel. freq.",
col.axis = NULL, ...) {
stopifnot(is(x, "qtcatHit"))
# make positions linear with gaps between chr's
pos <- snpInfo(x)[, 1:2]
chrminmax <- vapply(split(pos[, 2L], pos[, 1L]), function(x) c(min(x), max(x)), c(1, 2))
chrgap <- sum(chrminmax[2L, ]) * .01
chrsize <- cumsum(c(0, chrminmax[2L, -ncol(chrminmax)])) -
cumsum(chrminmax[1L, ]) +
cumsum(c(0, rep(chrgap, ncol(chrminmax) - 1L)))
chr <- sort(unique(pos[, 1L]))
for (i in seq_along(chr)) {
inx <- which(pos[, 1L] == chr[i])
pos[inx, 2L] <- pos[inx, 2L] + chrsize[i]
}
chrstartend <- vapply(split(pos[, 2L], pos[, 1L]), function(x) c(min(x), max(x)), c(1, 2))
xlim <- c(chrstartend[1L, 1L] - chrgap, chrstartend[2L, ncol(chrstartend)] + chrgap)
# hit lasso selection fequency
inx <- which(x$selectFreq > 0)
selfreq <- cbind(pos[names(x$hier), 2L][inx], x$selectFreq[inx])
# hit lasso selection fequency plot
plot(selfreq, xlim = xlim, ylim = c(0, 1.1), axes = FALSE, xlab = "", ylab = "", ...)
# x
for (i in seq_along(chr))
axis(1, labels = FALSE, at = c(chrstartend[1, i], chrstartend[2, i]), col = col.axis)
axis(1, at = colMeans(chrstartend), labels = chr, col = NA, col.axis = col.axis)
mtext(xlab, 1, 2.5, col = col.axis)
# y
axis(2, at = c(0, .5, 1), col = col.axis, col.axis = col.axis)
mtext(ylab, 2, 2.5, col = col.axis)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.