Nothing
#' @title Fits a linear mixed model (without fixed SNP effects) and computes the fitted variance-covariance matrix for later use in the rGLS function.
#'
#' @description Uses a GenABEL object and phenotype data as input. The model is fitted using the \code{hglm} function in the hglm package.
#'
#' @param fixed A formula including the response and fixed effects
#' @param random A formula for the random effects
#' @param id.name The column name of the IDs in phen.data
#' @param genabel.data An GenABEL object including marker information. This object has one observation per individual.
#' @param phenotype.data A data frame including the repeated observations and IDs.
#' @param corStruc A list specifying the correlation structure for each random effect. The options are: \code{"Ind"} for iid random effects, \code{"GRM"} for a correlation structure given by a genetic relationship matrix, or \code{"CAR"} for a spatial correlation structure given by a Conditional Autoregressive model specified by a neighborhood matrix.
#' @param GRM A genetic relationship matrix. If not specified whilst the \code{"GRM"} option is given for \code{corStruc} then the GRM is computed internally within the function.
#' @param Neighbor.Matrix A neighborhood matrix having non-zero value for an element (i,j) where the observations i and j come from neighboring locations. The diagonal elements should be zero.
#' @return Returns a list including the fitted hglm object \code{fitted.hglm}, the variance-covariance matrix \code{V} and the ratios between estimated variance components for the random effects divided by the residual variance, \code{ratio}.
#' @author Lars Ronnegard
#'
#' @examples
#' ####### FIRST EXAMPLE USING GRM #############
#' data(Phen.Data) #Phenotype data with repeated observations
#' data(gen.data) #GenABEL object including IDs and marker genotypes
#' GWAS1 <- rGLS(y ~ age + sex, genabel.data = gen.data, phenotype.data = Phen.Data)
#' plot(GWAS1, main="")
#' summary(GWAS1)
#' #Summary for variance component estimation without SNP effects
#' summary(GWAS1@@call$hglm)
#' #The same results can be computed using the preFitModel as follows
#' fixed = y ~ age + sex
#' Mod1 <- preFitModel(fixed, random=~1|id, genabel.data = gen.data,
#' phenotype.data = Phen.Data, corStruc=list( id=list("GRM","Ind") ))
#' GWAS1b <- rGLS(fixed, genabel.data = gen.data,
#' phenotype.data = Phen.Data, V = Mod1$V)
#' plot(GWAS1b, main="Results using the preFitModel function")
#' ####### SECOND EXAMPLE USING CAR #############
#' # Add a fake nest variable to the data just to run the example
#' #In this example there are 6 nests and 60 observations per nest
#' Phen.Data$nest <- rep(1:6, each=60)
#' #A model including polygenic effects, permanent environmental effects,
#' #and nest effect as random
#' Mod2 <- preFitModel(fixed, random=~1|id + 1|nest, genabel.data = gen.data,
#' phenotype.data = Phen.Data, corStruc=list( id=list("GRM","Ind"), nest=list("Ind")) )
#' GWAS2 <- rGLS(fixed, genabel.data = gen.data, phenotype.data = Phen.Data, V = Mod2$V)
#' plot(GWAS2)
#' #Similar to previous plot because the nest effect variance component is almost 0.
#' ###################
#' #Construct a fake nighbourhood matrix
#' D = matrix(0,6,6)
#' D[1,2] = D[2,1] = 1
#' D[5,6] = D[6,5] = 1
#' D[2,4] = D[4,2] = 1
#' D[3,5] = D[5,3] = 1
#' D[1,6] = D[6,1] = 1
#' D[3,4] = D[4,3] = 1
#' #The matrix shows which pair of nests that can be considered as neighbours
#' image(Matrix(D), main="Neighbourhood matrix")
#' Mod3 <- preFitModel(fixed, random=~1|id + 1|nest, genabel.data = gen.data,
#' phenotype.data = Phen.Data, corStruc=list( id=list("GRM","Ind"),
#' nest=list("CAR")), Neighbor.Matrix=D )
#' GWAS2b <- rGLS(fixed, genabel.data = gen.data,
#' phenotype.data = Phen.Data, V = Mod3$V)
#' plot(GWAS2b)
#'
preFitModel <- function(fixed = y~1, random = ~1|id, id.name = "id", genabel.data, phenotype.data, corStruc = NULL, GRM = NULL, Neighbor.Matrix = NULL) {
if (!inherits(fixed, "formula") || length(fixed) != 3) stop("\n Fixed-effects model must be a formula of the form \"resp ~ pred\"")
if (!inherits(random, "formula")) stop("\n Random part must be a one-sided formula of the form \" ~ effect|Cluster\"")
if (attr(terms(random), "response") != 0) stop("\n Random part must be a one-sided formula of the form \" ~ effect|Cluster\"")
if (all.names(random)[2] != "|") stop("The subjects/clusters in Random must be separated by \" |\"")
if (class(genabel.data)!="gwaa.data") stop("The input of genabel.data is not a GenABEL object")
if (is.null(genabel.data@phdata$id)) stop("IDs not given as id in the phdata list")
#Get trait name
trait <- all.vars(fixed)[1]
#Remove NAs from phenotypic data
y.all <- phenotype.data[,names(phenotype.data) %in% trait]
phenotype.data <- phenotype.data[!is.na(y.all),]
#Connect IDs in GenABEL data set with IDs in the phenotype file
id1 <- phenotype.data[,names(phenotype.data) %in% id.name] #ID for phenotype data
id2 <- genabel.data@phdata$id #ID for genotype data
test1 <- id1 %in% id2
test2 <- id2 %in% id1
genabel.data = genabel.data[test2, ] #Exclude individuals having no phenotype information
phenotype.data = phenotype.data[test1, ] #Exclude individuals having no genotype information
id1 <- phenotype.data[,names(phenotype.data) %in% id.name] #ID for phenotype data for cleaned data
id2 <- genabel.data@phdata$id #ID for genotype data for cleaned data
### Create design matrix for the fixed effects ###
Y <- phenotype.data[,names(phenotype.data) %in% trait]
X <- model.matrix(fixed, data = phenotype.data)
if (nrow(X) != length(Y)) stop("Remove all lines with NA from the phenotype input data before running this function.")
row.names(X) <- NULL
###############################################
## Create design matrix for random effects ####
###############################################
#Construct incidence matrix for repeated observations
N <- length(id2)
n <- length(id1)
indx <- numeric(n)
for (i in 1:N) {
indx <- indx + i * (id1 %in% id2[i])
}
### Get SQRT of GRM first
if (is.null(GRM)) {
autosomalMarkers <- which(chromosome(genabel.data) != "X")
GRM <- compute.GRM(genabel.data[,snpnames(genabel.data)[autosomalMarkers]])
}
eig <- eigen(GRM)
if (max(diag(GRM))>1.6) print("There seems to be highly inbred individuals in your data")
if (min(eig$values) < -0.5 ) print("The genetic relationship matrix is far from positive definite")
non_zero.eigenvalues = eig$values>(1e-6) #Put numerically small eigenvalues to zero
eig$values[!non_zero.eigenvalues] <- 0
print("GRM ready")
Z.GRM <- ( eig$vectors %*% diag(sqrt(eig$values)) )[indx,]
##Create Z
RanTermVec <- unlist(strsplit(attr(terms(random), "term.labels"), split = c("+"), fixed = TRUE))
k.rand <- length(RanTermVec)
Z <- NULL
RandC <- NULL
data <- phenotype.data
for (i in 1:k.rand) {
RanTerm <- unlist(strsplit(RanTermVec[i], split = "|", fixed = TRUE))
RanTerm[2] <- gsub(" ", "", RanTerm[2], fixed = TRUE)
ranf <- paste("~", "as.factor(", RanTerm[2], ")", "-1", sep = "")
ranf <- as.formula(ranf)
rmf <- model.frame(ranf, data)
z <- model.matrix(attr(rmf, "terms"), data = rmf)
if (nrow(z) != length(Y)) stop("Remove all lines with NA from the phenotype input data before running this function.")
row.names(z) <- NULL
if (is.null(corStruc)) {
RandC <- c(RandC, ncol(z))
Z <- cbind(Z, z)
}else{
cs <- unlist( corStruc[names(corStruc) %in% RanTerm[2]] )
for (j in 1:length(cs)) {
zL <- switch(cs[j],
"Ind" = z,
"GRM" = Z.GRM,
"CAR" = z,
stop("Input correlation structure not available")
)
RandC <- c(RandC, ncol(zL))
Z <- cbind(Z, zL)
j.rand <- length(RandC)
if (cs[j]=="CAR") {
if ( is.null(Neighbor.Matrix) ) stop("CAR correlation requires a Neighbor.Matrix to be specified")
if ( ncol(z) != ncol(Neighbor.Matrix) ) stop("Incorrect dimension of Neighbor.Matrix")
if ( j.rand == 1) {
rand.family = list( CAR(D=Neighbor.Matrix) )
} else {
rand.family[[j.rand]] = CAR(D=Neighbor.Matrix)
}
} else {
if (j.rand == 1) rand.family = list(gaussian())
if (j.rand > 1) rand.family[[j.rand]] = gaussian()
}
}
}
}
#####################################
### Fit the model
if (length(RandC) > 1) mod1 <- hglm(y=Y, X=X, Z=Z, RandC=RandC, maxit=200, rand.family=rand.family)
if (length(RandC) == 1) mod1 <- hglm(y=Y, X=X, Z=Z, RandC=RandC, maxit=200, rand.family=rand.family[[1]])
ratio <- mod1$varRanef/mod1$varFix
#stop("The constructV function needs to be changed if CAR model is to be used")
#V <- constructV(Z, RandC, ratio)
V = diag(nrow(Z))
indx = 1
for (i in 1:length(ratio)) {
Z.tmp <- Z[, indx:(indx + RandC[i] - 1)]
indx = indx + RandC[i]
if (rand.family[[i]]$family != "CAR") {
V <- V + tcrossprod(Z.tmp) * ratio[i]
} else {
ratio[i] <- mod1$CAR.tau/mod1$varFix
V <- V + Z.tmp %*% solve( diag( ncol(Z.tmp) ) - mod1$CAR.rho * Neighbor.Matrix ) %*% t(Z.tmp) * ratio[i]
}
}
list( fitted.hglm = mod1, V = V, ratio = ratio)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.