Nothing
geemR <- function(formula, id, waves=NULL, data = parent.frame(),
family = gaussian, corstr = "independence", Mv = 1,
weights = NULL, corr.mat = NULL, init.beta=NULL,
init.alpha=NULL, init.phi = 1, scale.fix = FALSE, nodummy = FALSE,
sandwich = TRUE, useP = TRUE, maxit = 20, tol = 0.00001){
call <- match.call()
famret <- getfam(family)
if(inherits(famret, "family")){
LinkFun <- famret$linkfun
InvLink <- famret$linkinv
VarFun <- famret$variance
InvLinkDeriv <- famret$mu.eta
}else{
LinkFun <- famret$LinkFun
VarFun <- famret$VarFun
InvLink <- famret$InvLink
InvLinkDeriv <- famret$InvLinkDeriv
}
if(scale.fix & is.null(init.phi)){
stop("If scale.fix=TRUE, then init.phi must be supplied")
}
useP <- as.numeric(useP)
### First, get all the relevant elements from the arguments
dat <- model.frame(formula, data, na.action=na.pass)
nn <- dim(dat)[1]
if(typeof(data) == "environment"){
id <- id
weights <- weights
if(is.null(call$weights)) weights <- rep(1, nn)
waves <- waves
}
else{
if(length(call$id) == 1){
subj.col <- which(colnames(data) == call$id)
if(length(subj.col) > 0){
id <- data[,subj.col]
}else{
id <- eval(call$id, envir=parent.frame())
}
}else if(is.null(call$id)){
id <- 1:nn
}
if(length(call$weights) == 1){
weights.col <- which(colnames(data) == call$weights)
if(length(weights.col) > 0){
weights <- data[,weights.col]
}else{
weights <- eval(call$weights, envir=parent.frame())
}
}else if(is.null(call$weights)){
weights <- rep.int(1,nn)
}
if(length(call$waves) == 1){
waves.col <- which(colnames(data) == call$waves)
if(length(waves.col) > 0){
waves <- data[,waves.col]
}else{
waves <- eval(call$waves, envir=parent.frame())
}
}else if(is.null(call$waves)){
waves <- NULL
}
}
dat$id <- id
dat$weights <- weights
dat$waves <- waves
if(!is.numeric(dat$waves) & !is.null(dat$waves)) stop("waves must be either an integer vector or NULL")
# W is diagonal matrix of weights, sqrtW = sqrt(W)
# included is diagonal matrix with 1 if weight > 0, 0 otherwise
# includedvec is logical vector with T if weight > 0, F otherwise
# Note that we need to assign weight 0 to rows with NAs
# in order to preserve the correlation structure
na.inds <- NULL
if(any(is.na(dat))){
na.inds <- which(is.na(dat), arr.ind=T)
}
#SORT THE DATA ACCORDING TO WAVES
if(!is.null(waves)){
dat <- dat[order(id, waves),]
}else{
dat <- dat[order(id),]
}
# Figure out the correlation structure
cor.vec <- c("independence", "ar1", "exchangeable", "m-dependent", "unstructured", "fixed", "userdefined")
cor.match <- charmatch(corstr, cor.vec)
if(is.na(cor.match)){stop("Unsupported correlation structure")}
if(!is.null(dat$waves)){
wavespl <- split(dat$waves, dat$id)
idspl <- split(dat$id, dat$id)
maxwave <- rep(0, length(wavespl))
incomp <- rep(0, length(wavespl))
for(i in 1:length(wavespl)){
maxwave[i] <- max(wavespl[[i]]) - min(wavespl[[i]]) + 1
if(maxwave[i] != length(wavespl[[i]])){
incomp[i] <- 1
}
}
#If there are gaps and correlation isn't exchangeable or independent
#then we'll add some dummy rows
if(!is.element(cor.match, c(1,3)) & (sum(incomp) > 0) & !nodummy){
dat <- dummyrows(formula, dat, incomp, maxwave, wavespl, idspl)
id <- dat$id
waves <- dat$waves
weights <- dat$weights
}
}
if(!is.null(na.inds)){
weights[unique(na.inds[,1])] <- 0
for(i in unique(na.inds)[,2]){
if(is.factor(dat[,i])){
dat[na.inds[,1], i] <- levels(dat[,i])[1]
}else{
dat[na.inds[,1], i] <- median(dat[,i], na.rm=T)
}
}
}
includedvec <- weights>0
inclsplit <- split(includedvec, id)
dropid <- NULL
allobs <- T
if(any(!includedvec)){
allobs <- F
for(i in 1:length(unique(id))){
if(all(!inclsplit[[i]])){
dropid <- c(dropid, i)
}
}
}
if(length(dropid)>0){
dropind <- which(is.element(id, dropid))
dat <- dat[-dropind,]
includedvec <- includedvec[-dropind]
weights <- weights[-dropind]
id <- id[-dropind]
}
nn <- dim(dat)[1]
K <- length(unique(id))
modterms <- terms(formula)
X <- model.matrix(formula,dat)
Y <- model.response(dat)
offset <- model.offset(dat)
p <- dim(X)[2]
### if no offset is given, then set to zero
if(is.null(offset)){
off <- rep(0, nn)
}else{
off <- offset
}
# Is there an intercept column?
interceptcol <- apply(X==1, 2, all)
## Basic check to see if link and variance functions make any kind of sense
linkOfMean <- LinkFun(mean(Y[includedvec])) - mean(off)
if( any(is.infinite(linkOfMean) | is.nan(linkOfMean)) ){
stop("Infinite or NaN in the link of the mean of responses. Make sure link function makes sense for these data.")
}
if( any(is.infinite( VarFun(mean(Y))) | is.nan( VarFun(mean(Y)))) ){
stop("Infinite or NaN in the variance of the mean of responses. Make sure variance function makes sense for these data.")
}
if(is.null(init.beta)){
if(any(interceptcol)){
#if there is an intercept and no initial beta, then use link of mean of response
init.beta <- rep(0, dim(X)[2])
init.beta[which(interceptcol)] <- linkOfMean
}else{
stop("Must supply an initial beta if not using an intercept.")
}
}
# Number of included observations for each cluster
includedlen <- rep(0, K)
len <- rep(0,K)
uniqueid <- unique(id)
tmpwgt <- as.numeric(includedvec)
idspl <-ifelse(tmpwgt==0, NA, id)
includedlen <- as.numeric(summary(split(Y, idspl, drop=T))[,1])
len <- as.numeric(summary(split(Y, id, drop=T))[,1])
W <- Diagonal(x=weights)
sqrtW <- sqrt(W)
included <- Diagonal(x=(as.numeric(weights>0)))
# Get vector of cluster sizes... remember this len variable
#len <- as.numeric(summary(split(Y, id, drop=T))[,1])
# Set the initial alpha value
if(is.null(init.alpha)){
alpha.new <- 0.2
if(cor.match==4){
# If corstr = "m-dep"
alpha.new <- 0.2^(1:Mv)
}else if(cor.match==5){
# If corstr = "unstructured"
alpha.new <- rep(0.2, sum(1:(max(len)-1)))
}else if(cor.match==7){
# If corstr = "userdefined"
alpha.new <- rep(0.2, max(unique(as.vector(corr.mat))))
}
}else{
alpha.new <- init.alpha
}
#if no initial overdispersion parameter, start at 1
if(is.null(init.phi)){
phi <- 1
}else{
phi <- init.phi
}
beta <- init.beta
#Set up matrix storage
StdErr <- Diagonal(nn)
dInvLinkdEta <- Diagonal(nn)
Resid <- Diagonal(nn)
# if( (max(len)==1) & cor.match != 1 ){
# warning("Largest cluster size is 1. Changing working correlation to independence.")
# cor.match <- 1
# corstr <- "independence"
# }
# Initialize for each correlation structure
if(cor.match == 1){
# INDEPENDENCE
R.alpha.inv <- Diagonal(x = rep.int(1, nn))/phi
BlockDiag <- getBlockDiag(len)$BDiag
}else if(cor.match == 2){
# AR-1
tmp <- buildAlphaInvAR(len)
# These are the vectors needed to update the inverse correlation
a1<- tmp$a1
a2 <- tmp$a2
a3 <- tmp$a3
a4 <- tmp$a4
# row.vec and col.vec for the big block diagonal of correlation inverses
# both are vectors of indices that facilitate in updating R.alpha.inv
row.vec <- tmp$row.vec
col.vec <- tmp$col.vec
BlockDiag <- getBlockDiag(len)$BDiag
}else if(cor.match == 3){
# EXCHANGEABLE
# Build a block diagonal correlation matrix for updating and sandwich calculation
# this matrix is block diagonal with all ones. Each block is of dimension cluster size.
tmp <- getBlockDiag(len)
BlockDiag <- tmp$BDiag
# Create a vector of length number of observations with associated cluster size for each observation
n.vec <- vector("numeric", nn)
index <- c(cumsum(len) - len, nn)
for(i in 1:K){
n.vec[(index[i]+1) : index[i+1]] <- rep(includedlen[i], len[i])
}
#n.vec <- vector("numeric", nn)
#index <- c(cumsum(len) - len, nn)
#for(i in 1:K){
# n.vec[(index[i]+1) : index[i+1]] <- rep(len[i], len[i])
#}
}else if(cor.match == 4){
# M-DEPENDENT, check that M is not too large
if(Mv >= max(len)){
stop("Cannot estimate that many parameters: Mv >= max(clustersize)")
}
# Build block diagonal similar to in exchangeable case, also get row indices and column
# indices for fast matrix updating later.
tmp <- getBlockDiag(len)
BlockDiag <- tmp$BDiag
row.vec <- tmp$row.vec
col.vec <- tmp$col.vec
R.alpha.inv <- NULL
}else if(cor.match == 5){
# UNSTRUCTURED
if( max(len^2 - len)/2 > length(len)){
stop("Cannot estimate that many parameters: not enough subjects for unstructured correlation")
}
tmp <- getBlockDiag(len)
BlockDiag <- tmp$BDiag
row.vec <- tmp$row.vec
col.vec <- tmp$col.vec
}else if(cor.match == 6){
# FIXED
# check if matrix meets some basic conditions
corr.mat <- checkFixedMat(corr.mat, len)
R.alpha.inv <- as(getAlphaInvFixed(corr.mat, len), "symmetricMatrix")/phi
BlockDiag <- getBlockDiag(len)$BDiag
}else if(cor.match == 7){
# USERDEFINED
corr.mat <- checkUserMat(corr.mat, len)
# get the structure of the correlation matrix in a way that
# I can use later on.
tmp1 <- getUserStructure(corr.mat)
corr.list <- tmp1$corr.list
user.row <- tmp1$row.vec
user.col <- tmp1$col.vec
struct.vec <- tmp1$struct.vec
# the same block diagonal trick.
tmp2 <- getBlockDiag(len)
BlockDiag <- tmp2$BDiag
row.vec <- tmp2$row.vec
col.vec <- tmp2$col.vec
}else if(cor.match == 0){
stop("Ambiguous Correlation Structure Specification")
}else{
stop("Unsupported Correlation Structure")
}
stop <- F
converged <- F
count <- 0
beta.old <- beta
unstable <- F
phi.old <- phi
# Main fisher scoring loop
while(!stop){
count <- count+1
eta <- as.vector(X %*% beta) + off
mu <- InvLink(eta)
diag(StdErr) <- sqrt(1/VarFun(eta))
if(!scale.fix){
phi <- updatePhi(Y, mu, VarFun, p, StdErr, included, includedlen, sqrtW, useP)
}
phi.new <- phi
## Calculate alpha, R(alpha)^(-1) / phi
if(cor.match == 2){
# AR-1
alpha.new <- updateAlphaAR(Y, mu, VarFun, phi, id, len,
StdErr, p, included, includedlen,
includedvec, allobs, sqrtW,
BlockDiag, useP)
R.alpha.inv <- getAlphaInvAR(alpha.new, a1,a2,a3,a4, row.vec, col.vec)/phi
}else if(cor.match == 3){
#EXCHANGEABLE
alpha.new <- updateAlphaEX(Y, mu, VarFun, phi, id, len, StdErr,
Resid, p, BlockDiag, included,
includedlen, sqrtW, useP)
#R.alpha.inv <- getAlphaInvEX(alpha.new, n.vec, BlockDiag)/phi
R.alpha.inv <- getAlphaInvEX(alpha.new, n.vec, BlockDiag)/phi
}else if(cor.match == 4){
# M-DEPENDENT
if(Mv==1){
alpha.new <- updateAlphaAR(Y, mu, VarFun, phi, id, len,
StdErr, p, included, includedlen,
includedvec, allobs,
sqrtW, BlockDiag, useP)
}else{
alpha.new <- updateAlphaMDEP(Y, mu, VarFun, phi, id, len,
StdErr, Resid, p, BlockDiag, Mv,
included, includedlen,
allobs, sqrtW, useP)
if(sum(len>Mv) <= p){
unstable <- T
}
}
if(any(alpha.new >= 1)){
stop <- T
warning("some estimated correlation is greater than 1, stopping.")
}
R.alpha.inv <- getAlphaInvMDEP(alpha.new, len, row.vec, col.vec)/phi
}else if(cor.match == 5){
# UNSTRUCTURED
alpha.new <- updateAlphaUnstruc(Y, mu, VarFun, phi, id, len,
StdErr, Resid, p, BlockDiag,
included, includedlen, allobs,
sqrtW, useP)
# This has happened to me (greater than 1 correlation estimate)
if(any(alpha.new >= 1)){
stop <- T
warning("some estimated correlation is greater than 1, stopping.")
}
R.alpha.inv <- getAlphaInvUnstruc(alpha.new, len, row.vec, col.vec)/phi
}else if(cor.match ==6){
# FIXED CORRELATION, DON'T NEED TO RECOMPUTE
R.alpha.inv <- R.alpha.inv*phi.old/phi
}else if(cor.match == 7){
# USER SPECIFIED
alpha.new <- updateAlphaUser(Y, mu, phi, id, len, StdErr, Resid,
p, BlockDiag, user.row, user.col,
corr.list, included, includedlen,
allobs, sqrtW, useP)
R.alpha.inv <- getAlphaInvUser(alpha.new, len, struct.vec, user.row, user.col, row.vec, col.vec)/phi
}else if(cor.match == 1){
# INDEPENDENT
R.alpha.inv <- Diagonal(x = rep.int(1/phi, nn))
alpha.new <- "independent"
}
beta.list <- updateBetaR(Y, X, beta, off, InvLinkDeriv, InvLink, VarFun, R.alpha.inv, StdErr, dInvLinkdEta, tol, W, included)
beta <- beta.list$beta
phi.old <- phi
if( max(abs((beta - beta.old)/(beta.old + .Machine$double.eps))) < tol ){converged <- T; stop <- T}
if(count >= maxit){stop <- T}
beta.old <- beta
}
biggest <- which.max(len)[1]
index <- sum(len[1:biggest])-len[biggest]
if(K == 1){
biggest.R.alpha.inv <- R.alpha.inv
if(cor.match == 6) {
biggest.R.alpha <- corr.mat*phi
}else{
biggest.R.alpha <- solve(R.alpha.inv)
}
}else{
biggest.R.alpha.inv <- R.alpha.inv[(index+1):(index+len[biggest]) , (index+1):(index+len[biggest])]
if(cor.match == 6){
biggest.R.alpha <- corr.mat[1:len[biggest] , 1:len[biggest]]*phi
}else{
biggest.R.alpha <- solve(biggest.R.alpha.inv)
}
}
eta <- as.vector(X %*% beta) + off
if(sandwich){
sandvar.list <- getSandwichR(Y, X, eta, id, R.alpha.inv, phi, InvLinkDeriv, InvLink, VarFun,
beta.list$hess, StdErr, dInvLinkdEta, BlockDiag, W, included)
}else{
sandvar.list <- list()
sandvar.list$sandvar <- "no sandwich"
}
if(!converged){warning("Did not converge")}
if(unstable){warning("Number of subjects with number of observations >= Mv is very small, some correlations are estimated with very low sample size.")}
# Create object of class geem with information about the fit
dat <- model.frame(formula, data, na.action=na.pass)
X <- model.matrix(formula, dat)
if(is.character(alpha.new)){alpha.new <- 0}
results <- list()
results$beta <- as.vector(beta)
results$phi <- phi
results$alpha <- alpha.new
if(cor.match == 6){
results$alpha <- as.vector(triu(corr.mat, 1)[which(triu(corr.mat,1)!=0)])
}
results$coefnames <- colnames(X)
results$niter <- count
results$converged <- converged
results$naiv.var <- solve(beta.list$hess) ## call model-based
results$var <- sandvar.list$sandvar
results$call <- call
results$corr <- cor.vec[cor.match]
results$clusz <- len
results$FunList <- famret
results$X <- X
results$offset <- off
results$eta <- eta
results$dropped <- dropid
results$weights <- weights
results$terms <- modterms
results$y <- Y
results$biggest.R.alpha <- biggest.R.alpha/phi
results$R.a.inv <- R.alpha.inv
results$formula <- formula
class(results) <- "geem"
return(results)
}
### Method to update coefficients. Goes to a maximum of 10 iterations, or when
### rough convergence has been obtained.
updateBetaR = function(YY, XX, beta, off, InvLinkDeriv, InvLink,
VarFun, R.alpha.inv, StdErr, dInvLinkdEta, tol, W, included){
beta.new <- beta
conv=F
for(i in 1:10){
eta <- as.vector(XX%*%beta.new) + off
diag(dInvLinkdEta) <- InvLinkDeriv(eta)
mu <- InvLink(eta)
diag(StdErr) <- sqrt(1/VarFun(eta))
#hess <- crossprod( StdErr %*% dInvLinkdEta %*%XX, R.alpha.inv %*% W %*% StdErr %*%dInvLinkdEta %*% XX)
#esteq <- crossprod( StdErr %*%dInvLinkdEta %*%XX , R.alpha.inv %*% W %*% StdErr %*% as.matrix(YY - mu))
hess <- crossprod( StdErr %*% dInvLinkdEta %*%XX, included %*% R.alpha.inv %*% W %*% StdErr %*%dInvLinkdEta %*% XX)
esteq <- crossprod( StdErr %*%dInvLinkdEta %*%XX , included %*% R.alpha.inv %*% W %*% StdErr %*% as.matrix(YY - mu))
update <- solve(hess, esteq)
beta.new <- beta.new + as.vector(update)
}
return(list(beta = beta.new, hess = hess))
}
### Calculate the sandiwch estimator as usual.
getSandwichR = function(YY, XX, eta, id, R.alpha.inv, phi, InvLinkDeriv,
InvLink, VarFun, hessMat, StdErr, dInvLinkdEta,
BlockDiag, W, included){
diag(dInvLinkdEta) <- InvLinkDeriv(eta)
mu <- InvLink(eta)
diag(StdErr) <- sqrt(1/VarFun(eta))
scoreDiag <- Diagonal(x= YY - mu)
BlockDiag <- scoreDiag %*% BlockDiag %*% scoreDiag
#numsand <- as.matrix(crossprod( StdErr %*% dInvLinkdEta %*% XX, R.alpha.inv %*% W %*% StdErr %*% BlockDiag %*% StdErr %*% W %*% R.alpha.inv %*% StdErr %*% dInvLinkdEta %*% XX))
numsand <- as.matrix(crossprod( StdErr %*% dInvLinkdEta %*% XX, included %*% R.alpha.inv %*% W %*% StdErr %*% BlockDiag %*% StdErr %*% W %*% R.alpha.inv %*% included %*% StdErr %*% dInvLinkdEta %*% XX))
sandvar <- t(solve(hessMat, numsand))
sandvar <- t(solve(t(hessMat), sandvar))
return(list(sandvar = sandvar, numsand = numsand))
}
geem <- function(formula, id, waves=NULL, data = parent.frame(),
family = gaussian, corstr = "independence", Mv = 1,
weights = NULL, corr.mat = NULL, init.beta=NULL,
init.alpha=NULL, init.phi = 1, scale.fix = FALSE, nodummy = FALSE,
sandwich = TRUE, useP = TRUE, maxit = 20, tol = 0.00001, offset=NULL){
call <- match.call()
famret <- getfam(family)
if(inherits(famret, "family")){
LinkFun <- famret$linkfun
InvLink <- famret$linkinv
VarFun <- famret$variance
InvLinkDeriv <- famret$mu.eta
}else{
LinkFun <- famret$LinkFun
VarFun <- famret$VarFun
InvLink <- famret$InvLink
InvLinkDeriv <- famret$InvLinkDeriv
}
if(scale.fix & is.null(init.phi)){
stop("If scale.fix=TRUE, then init.phi must be supplied")
}
useP <- as.numeric(useP)
### First, get all the relevant elements from the arguments
dat <- model.frame(formula, data, na.action=na.pass)
nn <- dim(dat)[1]
if(typeof(data) == "environment"){
id <- id
weights <- weights
if(is.null(call$weights)) weights <- rep(1, nn)
waves <- waves
}
else{
if(length(call$id) == 1){
subj.col <- which(colnames(data) == call$id)
if(length(subj.col) > 0){
id <- data[,subj.col]
}else{
id <- eval(call$id, envir=parent.frame())
}
}else if(is.null(call$id)){
id <- 1:nn
}
if(length(call$weights) == 1){
weights.col <- which(colnames(data) == call$weights)
if(length(weights.col) > 0){
weights <- data[,weights.col]
}else{
weights <- eval(call$weights, envir=parent.frame())
}
}else if(is.null(call$weights)){
weights <- rep.int(1,nn)
}
if(length(call$waves) == 1){
waves.col <- which(colnames(data) == call$waves)
if(length(waves.col) > 0){
waves <- data[,waves.col]
}else{
waves <- eval(call$waves, envir=parent.frame())
}
}else if(is.null(call$waves)){
waves <- NULL
}
}
dat$id <- id
dat$weights <- weights
dat$waves <- waves
if(!is.numeric(dat$waves) & !is.null(dat$waves)) stop("waves must be either an integer vector or NULL")
# W is diagonal matrix of weights, sqrtW = sqrt(W)
# included is diagonal matrix with 1 if weight > 0, 0 otherwise
# includedvec is logical vector with T if weight > 0, F otherwise
# Note that we need to assign weight 0 to rows with NAs
# in order to preserve the correlation structure
na.inds <- NULL
if(any(is.na(dat))){
na.inds <- which(is.na(dat), arr.ind=T)
}
#SORT THE DATA ACCORDING TO WAVES
if(!is.null(waves)){
dat <- dat[order(id, waves),]
}else{
dat <- dat[order(id),]
}
# Figure out the correlation structure
cor.vec <- c("independence", "ar1", "exchangeable", "m-dependent", "unstructured", "fixed", "userdefined")
cor.match <- charmatch(corstr, cor.vec)
if(is.na(cor.match)){stop("Unsupported correlation structure")}
if(!is.null(dat$waves)){
wavespl <- split(dat$waves, dat$id)
idspl <- split(dat$id, dat$id)
maxwave <- rep(0, length(wavespl))
incomp <- rep(0, length(wavespl))
for(i in 1:length(wavespl)){
maxwave[i] <- max(wavespl[[i]]) - min(wavespl[[i]]) + 1
if(maxwave[i] != length(wavespl[[i]])){
incomp[i] <- 1
}
}
#If there are gaps and correlation isn't exchangeable or independent
#then we'll add some dummy rows
if(!is.element(cor.match, c(1,3)) & (sum(incomp) > 0) & !nodummy){
dat <- dummyrows(formula, dat, incomp, maxwave, wavespl, idspl)
id <- dat$id
waves <- dat$waves
weights <- dat$weights
}
}
if(!is.null(na.inds)){
weights[unique(na.inds[,1])] <- 0
for(i in unique(na.inds)[,2]){
if(is.factor(dat[,i])){
dat[na.inds[,1], i] <- levels(dat[,i])[1]
}else{
dat[na.inds[,1], i] <- median(dat[,i], na.rm=T)
}
}
}
includedvec <- weights>0
inclsplit <- split(includedvec, id)
dropid <- NULL
allobs <- T
if(any(!includedvec)){
allobs <- F
for(i in 1:length(unique(id))){
if(all(!inclsplit[[i]])){
dropid <- c(dropid, i)
}
}
}
if(length(dropid)>0){
dropind <- which(is.element(id, dropid))
dat <- dat[-dropind,]
includedvec <- includedvec[-dropind]
weights <- weights[-dropind]
id <- id[-dropind]
}
nn <- dim(dat)[1]
K <- length(unique(id))
modterms <- terms(formula)
X <- model.matrix(formula,dat)
Y <- model.response(dat)
offset <- model.offset(dat)
p <- dim(X)[2]
### if no offset is given, then set to zero
if(is.null(offset)){
off <- rep(0, nn)
}else{
off <- offset
}
# Is there an intercept column?
interceptcol <- apply(X==1, 2, all)
## Basic check to see if link and variance functions make any kind of sense
linkOfMean <- LinkFun(mean(Y[includedvec])) - mean(off)
if( any(is.infinite(linkOfMean) | is.nan(linkOfMean)) ){
stop("Infinite or NaN in the link of the mean of responses. Make sure link function makes sense for these data.")
}
if( any(is.infinite( VarFun(mean(Y))) | is.nan( VarFun(mean(Y)))) ){
stop("Infinite or NaN in the variance of the mean of responses. Make sure variance function makes sense for these data.")
}
if(is.null(init.beta)){
if(any(interceptcol)){
#if there is an intercept and no initial beta, then use link of mean of response
init.beta <- rep(0, dim(X)[2])
init.beta[which(interceptcol)] <- linkOfMean
}else{
stop("Must supply an initial beta if not using an intercept.")
}
}
# Number of included observations for each cluster
includedlen <- rep(0, K)
len <- rep(0,K)
uniqueid <- unique(id)
tmpwgt <- as.numeric(includedvec)
idspl <-ifelse(tmpwgt==0, NA, id)
includedlen <- as.numeric(summary(split(Y, idspl, drop=T))[,1])
len <- as.numeric(summary(split(Y, id, drop=T))[,1])
W <- Diagonal(x=weights)
sqrtW <- sqrt(W)
included <- Diagonal(x=(as.numeric(weights>0)))
# Get vector of cluster sizes... remember this len variable
#len <- as.numeric(summary(split(Y, id, drop=T))[,1])
# Set the initial alpha value
if(is.null(init.alpha)){
alpha.new <- 0.2
if(cor.match==4){
# If corstr = "m-dep"
alpha.new <- 0.2^(1:Mv)
}else if(cor.match==5){
# If corstr = "unstructured"
alpha.new <- rep(0.2, sum(1:(max(len)-1)))
}else if(cor.match==7){
# If corstr = "userdefined"
alpha.new <- rep(0.2, max(unique(as.vector(corr.mat))))
}
}else{
alpha.new <- init.alpha
}
#if no initial overdispersion parameter, start at 1
if(is.null(init.phi)){
phi <- 1
}else{
phi <- init.phi
}
beta <- init.beta
#Set up matrix storage
StdErr <- Diagonal(nn)
dInvLinkdEta <- Diagonal(nn)
Resid <- Diagonal(nn)
# if( (max(len)==1) & cor.match != 1 ){
# warning("Largest cluster size is 1. Changing working correlation to independence.")
# cor.match <- 1
# corstr <- "independence"
# }
# Initialize for each correlation structure
if(cor.match == 1){
# INDEPENDENCE
R.alpha.inv <- Diagonal(x = rep.int(1, nn))/phi
BlockDiag <- getBlockDiag(len)$BDiag
}else if(cor.match == 2){
# AR-1
tmp <- buildAlphaInvAR(len)
# These are the vectors needed to update the inverse correlation
a1<- tmp$a1
a2 <- tmp$a2
a3 <- tmp$a3
a4 <- tmp$a4
# row.vec and col.vec for the big block diagonal of correlation inverses
# both are vectors of indices that facilitate in updating R.alpha.inv
row.vec <- tmp$row.vec
col.vec <- tmp$col.vec
BlockDiag <- getBlockDiag(len)$BDiag
}else if(cor.match == 3){
# EXCHANGEABLE
# Build a block diagonal correlation matrix for updating and sandwich calculation
# this matrix is block diagonal with all ones. Each block is of dimension cluster size.
tmp <- getBlockDiag(len)
BlockDiag <- tmp$BDiag
# Create a vector of length number of observations with associated cluster size for each observation
n.vec <- vector("numeric", nn)
index <- c(cumsum(len) - len, nn)
for(i in 1:K){
n.vec[(index[i]+1) : index[i+1]] <- rep(includedlen[i], len[i])
}
#n.vec <- vector("numeric", nn)
#index <- c(cumsum(len) - len, nn)
#for(i in 1:K){
# n.vec[(index[i]+1) : index[i+1]] <- rep(len[i], len[i])
#}
}else if(cor.match == 4){
# M-DEPENDENT, check that M is not too large
if(Mv >= max(len)){
stop("Cannot estimate that many parameters: Mv >= max(clustersize)")
}
# Build block diagonal similar to in exchangeable case, also get row indices and column
# indices for fast matrix updating later.
tmp <- getBlockDiag(len)
BlockDiag <- tmp$BDiag
row.vec <- tmp$row.vec
col.vec <- tmp$col.vec
R.alpha.inv <- NULL
}else if(cor.match == 5){
# UNSTRUCTURED
if( max(len^2 - len)/2 > length(len)){
stop("Cannot estimate that many parameters: not enough subjects for unstructured correlation")
}
tmp <- getBlockDiag(len)
BlockDiag <- tmp$BDiag
row.vec <- tmp$row.vec
col.vec <- tmp$col.vec
}else if(cor.match == 6){
# FIXED
# check if matrix meets some basic conditions
corr.mat <- checkFixedMat(corr.mat, len)
R.alpha.inv <- as(getAlphaInvFixed(corr.mat, len), "symmetricMatrix")/phi
BlockDiag <- getBlockDiag(len)$BDiag
}else if(cor.match == 7){
# USERDEFINED
corr.mat <- checkUserMat(corr.mat, len)
# get the structure of the correlation matrix in a way that
# I can use later on.
tmp1 <- getUserStructure(corr.mat)
corr.list <- tmp1$corr.list
user.row <- tmp1$row.vec
user.col <- tmp1$col.vec
struct.vec <- tmp1$struct.vec
# the same block diagonal trick.
tmp2 <- getBlockDiag(len)
BlockDiag <- tmp2$BDiag
row.vec <- tmp2$row.vec
col.vec <- tmp2$col.vec
}else if(cor.match == 0){
stop("Ambiguous Correlation Structure Specification")
}else{
stop("Unsupported Correlation Structure")
}
stop <- F
converged <- F
count <- 0
beta.old <- beta
unstable <- F
phi.old <- phi
# Main fisher scoring loop
while(!stop){
count <- count+1
eta <- as.vector(X %*% beta) + off
mu <- InvLink(eta)
diag(StdErr) <- sqrt(1/VarFun(mu))
if(!scale.fix){
phi <- updatePhi(Y, mu, VarFun, p, StdErr, included, includedlen, sqrtW, useP)
}
phi.new <- phi
## Calculate alpha, R(alpha)^(-1) / phi
if(cor.match == 2){
# AR-1
alpha.new <- updateAlphaAR(Y, mu, VarFun, phi, id, len,
StdErr, p, included, includedlen,
includedvec, allobs, sqrtW,
BlockDiag, useP)
R.alpha.inv <- getAlphaInvAR(alpha.new, a1,a2,a3,a4, row.vec, col.vec)/phi
}else if(cor.match == 3){
#EXCHANGEABLE
alpha.new <- updateAlphaEX(Y, mu, VarFun, phi, id, len, StdErr,
Resid, p, BlockDiag, included,
includedlen, sqrtW, useP)
#R.alpha.inv <- getAlphaInvEX(alpha.new, n.vec, BlockDiag)/phi
R.alpha.inv <- getAlphaInvEX(alpha.new, n.vec, BlockDiag)/phi
}else if(cor.match == 4){
# M-DEPENDENT
if(Mv==1){
alpha.new <- updateAlphaAR(Y, mu, VarFun, phi, id, len,
StdErr, p, included, includedlen,
includedvec, allobs,
sqrtW, BlockDiag, useP)
}else{
alpha.new <- updateAlphaMDEP(Y, mu, VarFun, phi, id, len,
StdErr, Resid, p, BlockDiag, Mv,
included, includedlen,
allobs, sqrtW, useP)
if(sum(len>Mv) <= p){
unstable <- T
}
}
if(any(alpha.new >= 1)){
stop <- T
warning("some estimated correlation is greater than 1, stopping.")
}
R.alpha.inv <- getAlphaInvMDEP(alpha.new, len, row.vec, col.vec)/phi
}else if(cor.match == 5){
# UNSTRUCTURED
alpha.new <- updateAlphaUnstruc(Y, mu, VarFun, phi, id, len,
StdErr, Resid, p, BlockDiag,
included, includedlen, allobs,
sqrtW, useP)
# This has happened to me (greater than 1 correlation estimate)
if(any(alpha.new >= 1)){
stop <- T
warning("some estimated correlation is greater than 1, stopping.")
}
R.alpha.inv <- getAlphaInvUnstruc(alpha.new, len, row.vec, col.vec)/phi
}else if(cor.match ==6){
# FIXED CORRELATION, DON'T NEED TO RECOMPUTE
R.alpha.inv <- R.alpha.inv*phi.old/phi
}else if(cor.match == 7){
# USER SPECIFIED
alpha.new <- updateAlphaUser(Y, mu, phi, id, len, StdErr, Resid,
p, BlockDiag, user.row, user.col,
corr.list, included, includedlen,
allobs, sqrtW, useP)
R.alpha.inv <- getAlphaInvUser(alpha.new, len, struct.vec, user.row, user.col, row.vec, col.vec)/phi
}else if(cor.match == 1){
# INDEPENDENT
R.alpha.inv <- Diagonal(x = rep.int(1/phi, nn))
alpha.new <- "independent"
}
beta.list <- updateBeta(Y, X, beta, off, InvLinkDeriv, InvLink, VarFun, R.alpha.inv, StdErr, dInvLinkdEta, tol, W, included)
beta <- beta.list$beta
phi.old <- phi
if( max(abs((beta - beta.old)/(beta.old + .Machine$double.eps))) < tol ){converged <- T; stop <- T}
if(count >= maxit){stop <- T}
beta.old <- beta
}
biggest <- which.max(len)[1]
index <- sum(len[1:biggest])-len[biggest]
if(K == 1){
biggest.R.alpha.inv <- R.alpha.inv
if(cor.match == 6) {
biggest.R.alpha <- corr.mat*phi
}else{
biggest.R.alpha <- solve(R.alpha.inv)
}
}else{
biggest.R.alpha.inv <- R.alpha.inv[(index+1):(index+len[biggest]) , (index+1):(index+len[biggest])]
if(cor.match == 6){
biggest.R.alpha <- corr.mat[1:len[biggest] , 1:len[biggest]]*phi
}else{
biggest.R.alpha <- solve(biggest.R.alpha.inv)
}
}
eta <- as.vector(X %*% beta) + off
if(sandwich){
sandvar.list <- getSandwich(Y, X, eta, id, R.alpha.inv, phi, InvLinkDeriv, InvLink, VarFun,
beta.list$hess, StdErr, dInvLinkdEta, BlockDiag, W, included)
}else{
sandvar.list <- list()
sandvar.list$sandvar <- "no sandwich"
}
if(!converged){warning("Did not converge")}
if(unstable){warning("Number of subjects with number of observations >= Mv is very small, some correlations are estimated with very low sample size.")}
# Create object of class geem with information about the fit
dat <- model.frame(formula, data, na.action=na.pass)
X <- model.matrix(formula, dat)
if(is.character(alpha.new)){alpha.new <- 0}
results <- list()
results$beta <- as.vector(beta)
results$phi <- phi
results$alpha <- alpha.new
if(cor.match == 6){
results$alpha <- as.vector(triu(corr.mat, 1)[which(triu(corr.mat,1)!=0)])
}
results$coefnames <- colnames(X)
results$niter <- count
results$converged <- converged
results$naiv.var <- solve(beta.list$hess) ## call model-based
results$var <- sandvar.list$sandvar
results$call <- call
results$corr <- cor.vec[cor.match]
results$clusz <- len
results$FunList <- famret
results$X <- X
results$offset <- off
results$eta <- eta
results$dropped <- dropid
results$weights <- weights
results$terms <- modterms
results$R.a.inv <- R.alpha.inv
results$y <- Y
results$biggest.R.alpha <- biggest.R.alpha/phi
results$formula <- formula
class(results) <- "geem"
return(results)
}
### Simple moment estimator of dispersion parameter
updatePhi <- function(YY, mu, VarFun, p, StdErr, included, includedlen, sqrtW, useP){
nn <- sum(includedlen)
resid <- diag(StdErr %*% included %*% sqrtW %*% Diagonal(x = YY - mu))
phi <- (1/(sum(included)- useP * p))*crossprod(resid, resid)
return(as.numeric(phi))
}
### Method to update coefficients. Goes to a maximum of 10 iterations, or when
### rough convergence has been obtained.
updateBeta = function(YY, XX, beta, off, InvLinkDeriv, InvLink,
VarFun, R.alpha.inv, StdErr, dInvLinkdEta, tol, W, included){
beta.new <- beta
conv=F
for(i in 1:10){
eta <- as.vector(XX%*%beta.new) + off
diag(dInvLinkdEta) <- InvLinkDeriv(eta)
mu <- InvLink(eta)
diag(StdErr) <- sqrt(1/VarFun(mu))
#hess <- crossprod( StdErr %*% dInvLinkdEta %*%XX, R.alpha.inv %*% W %*% StdErr %*%dInvLinkdEta %*% XX)
#esteq <- crossprod( StdErr %*%dInvLinkdEta %*%XX , R.alpha.inv %*% W %*% StdErr %*% as.matrix(YY - mu))
hess <- crossprod( StdErr %*% dInvLinkdEta %*%XX, included %*% R.alpha.inv %*% W %*% StdErr %*%dInvLinkdEta %*% XX)
esteq <- crossprod( StdErr %*%dInvLinkdEta %*%XX , included %*% R.alpha.inv %*% W %*% StdErr %*% as.matrix(YY - mu))
update <- solve(hess, esteq)
beta.new <- beta.new + as.vector(update)
}
return(list(beta = beta.new, hess = hess))
}
### Calculate the sandiwch estimator as usual.
getSandwich = function(YY, XX, eta, id, R.alpha.inv, phi, InvLinkDeriv,
InvLink, VarFun, hessMat, StdErr, dInvLinkdEta,
BlockDiag, W, included){
diag(dInvLinkdEta) <- InvLinkDeriv(eta)
mu <- InvLink(eta)
diag(StdErr) <- sqrt(1/VarFun(mu))
scoreDiag <- Diagonal(x= YY - mu)
BlockDiag <- scoreDiag %*% BlockDiag %*% scoreDiag
#numsand <- as.matrix(crossprod( StdErr %*% dInvLinkdEta %*% XX, R.alpha.inv %*% W %*% StdErr %*% BlockDiag %*% StdErr %*% W %*% R.alpha.inv %*% StdErr %*% dInvLinkdEta %*% XX))
numsand <- as.matrix(crossprod( StdErr %*% dInvLinkdEta %*% XX, included %*% R.alpha.inv %*% W %*% StdErr %*% BlockDiag %*% StdErr %*% W %*% R.alpha.inv %*% included %*% StdErr %*% dInvLinkdEta %*% XX))
sandvar <- t(solve(hessMat, numsand))
sandvar <- t(solve(t(hessMat), sandvar))
return(list(sandvar = sandvar, numsand = numsand))
}
### summary function for geem object.
summary.geem <- function(object, ...) {
Coefs <- matrix(NA,nrow=length(object$beta),ncol=5)
Coefs[,1] <- c(object$beta)
naive <- is.character(object$var)
if(!naive & any(diag(object$var) < 0) ){
naive <- TRUE
warning("Some elements of robust variance estimate < 0. Reporting model based SE.")
}
Coefs[,2] <- sqrt(diag(object$naiv.var))
if(naive){Coefs[,3] <- rep(NA, length(object$beta))}else{Coefs[,3] <- sqrt(diag(object$var))}
if(naive){Coefs[,4] <- Coefs[,1]/Coefs[,2]}else{Coefs[,4] <- Coefs[,1]/Coefs[,3]}
Coefs[,5] <- round(2*pnorm(abs(Coefs[,4]), lower.tail=F), digits=8)
colnames(Coefs) <- c("Estimates","Model SE","Robust SE", "wald", "p")
summ <- list(beta = Coefs[,1], se.model = Coefs[,2], se.robust = Coefs[,3], wald.test = Coefs[,4], p = Coefs[,5],
alpha = object$alpha, corr = object$corr, phi = object$phi, niter = object$niter, clusz = object$clusz,
coefnames = object$coefnames, weights=object$weights, biggest.R.alpha = object$biggest.R.alpha)
class(summ) <- 'summary.geem'
return(summ)
}
### print function for summary.geem object
print.summary.geem <- function(x, ...){
Coefs <- matrix(0,nrow=length(x$coefnames),ncol=5)
rownames(Coefs) <- c(x$coefnames)
colnames(Coefs) <- c("Estimates","Model SE","Robust SE", "wald", "p")
Coefs[,1] <- x$beta
Coefs[,2] <- x$se.model
Coefs[,3] <- x$se.robust
Coefs[,4] <- x$wald.test
Coefs[,5] <- x$p
#print("Call: ", object$call, "\n")
print(signif(Coefs, digits=4))
if(!is.element(x$corr, c("independence", "ar1", "exchangeable"))){
if(dim(x$biggest.R.alpha)[1] > 4 ){
cat("\n Working Correlation[1:4,1:4]: \n")
print(as.matrix(round(x$biggest.R.alpha[1:4,1:4], digits=4)))
}else{
cat("\n Working Correlation: \n")
print(as.matrix(round(x$biggest.R.alpha, digits=4)))
}
}else{
cat("\n Estimated Correlation Parameter: ", signif(x$alpha, digits=4), "\n")
}
#cat("\n Est. Correlation: ", signif(x$alpha, digits=4), "\n")
cat(" Correlation Structure: ", x$corr, "\n")
cat(" Est. Scale Parameter: ", signif(x$phi, digits=4), "\n")
cat("\n Number of GEE iterations:", x$niter, "\n")
cat(" Number of Clusters: ", length(x$clusz), " Maximum Cluster Size: ", max(x$clusz), "\n")
cat(" Number of observations with nonzero weight: ", sum(x$weights != 0), "\n")
}
### print function for geem object
print.geem <- function(x, ...){
coefdf <- signif(data.frame(x$beta), digits=4)
rownames(coefdf) <- x$coefnames
colnames(coefdf) <- ""
print(x$call)
cat("\n", "Coefficients:", "\n")
print(t(coefdf))
cat("\n Scale Parameter: ", signif(x$phi, digits=4), "\n")
cat("\n Correlation Model: ", x$corr)
if(!is.element(x$corr, c("independence", "ar1", "exchangeable"))){
if(dim(x$biggest.R.alpha)[1] > 4 ){
cat("\n Working Correlation[1:4,1:4]: \n")
print(as.matrix(round(x$biggest.R.alpha[1:4,1:4], digits=4)))
}else{
cat("\n Working Correlation: \n")
print(as.matrix(round(x$biggest.R.alpha, digits=4)))
}
}else{
cat("\n Estimated Correlation Parameter: ", signif(x$alpha, digits=4), "\n")
}
cat("\n Number of clusters: ", length(x$clusz), " Maximum cluster size: ", max(x$clusz), "\n")
cat(" Number of observations with nonzero weight: ", sum(x$weights != 0), "\n")
}
#print.coef.geem <- function(x, ...){
# print(signif(x, 3))
#}
### Update the alpha (possibly) vector for the USERDEFINED correlation matrix.
updateAlphaUser <- function(YY, mu, phi, id, len, StdErr, Resid,
p, BlockDiag, row.vec, col.vec, corr.list,
included, includedlen, allobs,sqrtW, useP){
Resid <- StdErr %*% included %*% sqrtW %*% Diagonal(x = YY - mu)
ml <- max(len)
BlockDiag <- Resid %*% BlockDiag %*% Resid
alpha.new <- vector("numeric", length(corr.list))
index <- cumsum(len)-len
for(i in 1:length(alpha.new)){
newrow <- NULL
newcol <- NULL
for(j in 1:length(corr.list[[i]])){
newrow <- c(newrow, index[which(len >= col.vec[corr.list[[i]]][j])] + row.vec[corr.list[[i]][j]])
newcol <- c(newcol, index[which(len >= col.vec[corr.list[[i]]][j])] + col.vec[corr.list[[i]][j]])
}
bdtmp <- BlockDiag[cbind(newrow, newcol)]
if(allobs){
denom <- phi*(length(newrow) - useP * p)
}else{
denom <- phi*(sum(bdtmp!=0) - useP * p)
}
alpha.new[i] <- sum(bdtmp)/denom
}
return(alpha.new)
}
### Calculate the parameter for the EXCHANGEABLE correlation structure
updateAlphaEX <- function(YY, mu, VarFun, phi, id, len, StdErr,
Resid, p, BlockDiag, included,
includedlen, sqrtW, useP){
W <- sqrtW^2
Resid <- StdErr %*% included %*% sqrtW %*% Diagonal(x = YY - mu)
#print(mean(StdErr))
#print(mean(YY-mu))
denom <- phi*(sum(triu(included %*% BlockDiag %*% included, k=1)) - useP * p)
#BlockDiag <- StdErr %*%Diagonal(x = YY - mu) %*% BlockDiag %*% W %*% included %*% Diagonal(x = YY - mu) %*% StdErr
BlockDiag <- StdErr %*%Diagonal(x = YY - mu) %*% included %*% BlockDiag %*% W %*% Diagonal(x = YY - mu) %*% StdErr
alpha <- sum(triu(BlockDiag, k=1))
#alpha <- sum(triu(Resid %*% BlockDiag %*% Resid, k=1))
alpha.new <- alpha/denom
return(alpha.new)
}
### Calculate the parameters for the M-DEPENDENT correlation structure
updateAlphaMDEP <- function(YY, mu, VarFun, phi, id, len,
StdErr, Resid, p, BlockDiag, m,
included, includedlen, allobs, sqrtW, useP){
Resid <- StdErr %*% included %*% sqrtW %*% Diagonal(x = YY - mu)
BlockDiag <- Resid %*% BlockDiag %*% Resid
alpha.new <- vector("numeric", m)
for(i in 1:m){
if(sum(includedlen>i) > p){
bandmat <- drop0(band(BlockDiag, i,i))
if(allobs){
alpha.new[i] <- sum(bandmat)/(phi*(sum(as.numeric(len>i)*(len-i)) - useP * p))
}else{
alpha.new[i] <- sum( bandmat)/(phi*(length(bandmat@i) - useP * p))
}
}else{
# If we don't have many observations for a certain parameter, don't divide by p
# ensures we don't have NaN errors.
bandmat <- drop0(band(BlockDiag, i,i))
if(allobs){alpha.new[i] <- sum(bandmat)/(phi*(sum(as.numeric(len>i)*(len-i))))
}else{
alpha.new[i] <- sum( bandmat)/(phi*length(bandmat@i))
}
}
}
return(alpha.new)
}
### Calculate the parameter for the AR-1 correlation, also used for 1-DEPENDENT
updateAlphaAR <- function(YY, mu, VarFun, phi, id, len, StdErr, p,
included, includedlen, includedvec, allobs,
sqrtW, BlockDiag, useP){
#K <- length(len)
#oneobs <- which(len == 1)
Resid <- StdErr %*% sqrtW %*% Diagonal(x = YY - mu)
#len2 = len
#includedvec2 <- includedvec
#if(length(oneobs) > 0){
# index <- c(0, (cumsum(len) -len)[2:K], sum(len))
# len2 <- len[-oneobs]
# resid <- resid[-index[oneobs]]
# includedvec2 <- includedvec[-index[oneobs]]
#}
denom <- phi*(sum(band(triu(included %*% BlockDiag %*% included, k=1), k1=1, k2=1)) - useP * p)
num <- sum(band(triu(Resid %*% BlockDiag %*% Resid, k=1), k1=1, k2=1))
#nn <- length(resid)
#lastobs <- cumsum(len2)
#shiftresid1 <- resid[1:nn-1]
#shiftresid2 <- resid[2:nn]
#if(!allobs){
# shiftresid1 <- shiftresid1[-lastobs]
# shiftresid2 <- shiftresid2[-lastobs]
#s1incvec2 <- includedvec2[1:nn-1]
#s2incvec2 <- includedvec2[2:nn]
#s1incvec2 <- s1incvec2[-lastobs]
#s2incvec2 <- s2incvec2[-lastobs]
# alphasum <- crossprod(shiftresid1, shiftresid2)
#denom <- (as.vector(crossprod(s1incvec2, s2incvec2)) - p)*phi
#}else{
# alphasum <- crossprod(shiftresid1[-lastobs], shiftresid2[-lastobs])
#denom <- (sum(len2-1) - p)*phi
#}
alpha <- num/denom
return(as.numeric(alpha))
}
### Calculate alpha values for UNSTRUCTURED correlation
updateAlphaUnstruc <- function(YY, mu, VarFun, phi, id, len, StdErr, Resid,
p, BlockDiag, included, includedlen,
allobs, sqrtW, useP){
Resid <- StdErr %*% included %*% sqrtW %*%Diagonal(x = YY - mu)
ml <- max(len)
BlockDiag <- Resid %*% BlockDiag %*% Resid
alpha.new <- vector("numeric", sum(1:(ml-1)))
lalph <- length(alpha.new)
row.vec <- NULL
col.vec <- NULL
for(i in 2:ml){
row.vec <- c(row.vec, 1:(i-1))
col.vec <- c(col.vec, rep(i, each=i-1))
}
index <- cumsum(len)-len
if(sum(includedlen == max(len)) <= p){stop("Number of clusters of largest size is less than p.")}
for(i in 1:lalph){
# Get all of the indices of the matrix corresponding to the correlation
# we want to estimate.
newrow <- index[which(len>=col.vec[i])] + row.vec[i]
newcol <- index[which(len>=col.vec[i])] + col.vec[i]
bdtmp <- BlockDiag[cbind(newrow, newcol)]
if(allobs){
denom <- (phi*(length(newrow) - useP * p))
}else{
denom <- (phi*(sum(bdtmp!=0) - useP * p))
}
alpha.new[i] <- sum(bdtmp)/denom
}
return(alpha.new)
}
### Get the AR-1 inverse matrix
getAlphaInvAR <- function(alpha.new, a1,a2,a3,a4, row.vec, col.vec){
corr.vec <- c(alpha.new*a1/(1-alpha.new^2) , ( (1+alpha.new^2)*a2 + a3)/(1-alpha.new^2) + a4, alpha.new*a1/(1-alpha.new^2))
return(as(sparseMatrix(i= row.vec, j=col.vec, x=corr.vec), "symmetricMatrix"))
}
### Get the necessary structures for the inverse matrix of AR-1
### returns the row and column indices of the AR-1 inverse
### a1, a2, a3, and a4 are used to compute the entries in the matrix
buildAlphaInvAR <- function(len){
nn <- sum(len)
K <- length(len)
a1 <- a2 <- a3 <- a4 <- vector("numeric", nn)
index <- c(cumsum(len) - len, nn)
for (i in 1:K) {
if(len[i] > 1) {
a1[(index[i]+1) : index[i+1]] <- c(rep(-1,times = len[i]-1),0)
a2[(index[i]+1) : index[i+1]] <- c(0,rep(1,times=len[i]-2),0)
a3[(index[i]+1) : index[i+1]] <- c(1,rep(0,times=len[i]-2),1)
a4[(index[i]+1) : index[i+1]] <- c(rep(0,times=len[i]))
}
else if (len[i] == 1) {
a1[(index[i]+1) : index[i+1]] <- 0
a2[(index[i]+1) : index[i+1]] <- 0
a3[(index[i]+1) : index[i+1]] <- 0
a4[(index[i]+1) : index[i+1]] <- 1
}
}
a1 <- a1[1:(nn-1)]
subdiag.col <- 1:(nn-1)
subdiag.row <- 2:nn
row.vec <- c(subdiag.row, (1:nn), subdiag.col)
col.vec <- c(subdiag.col, (1:nn), subdiag.row)
return(list(row.vec = row.vec, col.vec= col.vec, a1 = a1, a2=a2, a3=a3, a4=a4))
}
### Returns the full inverse matrix of the correlation for EXCHANGEABLE structure
getAlphaInvEX <- function(alpha.new, diag.vec, BlockDiag){
return(as(BlockDiag %*% Diagonal(x = (-alpha.new/((1-alpha.new)*(1+(diag.vec-1)*alpha.new))))
+ Diagonal( x = ((1+(diag.vec-2)*alpha.new)/((1-alpha.new)*(1+(diag.vec-1)*alpha.new))
+ alpha.new/((1-alpha.new)*(1+(diag.vec-1)*alpha.new)))), "symmetricMatrix"))
}
### Get the inverse of M-DEPENDENT correlation matrix.
getAlphaInvMDEP <- function(alpha.new, len, row.vec, col.vec){
K <- length(len)
N <- sum(len)
m <- length(alpha.new)
# First get all of the unique block sizes.
#real.sizes <- sort(unique(len))
#mat.sizes <- fillMatSizes(real.sizes)
mat.sizes <- sort(unique(len))
#mat.sizes <- len
corr.vec <- vector("numeric", sum(len^2))
mat.inverses <- list()
index <- c(0, (cumsum(len^2) -len^2)[2:K], sum(len^2))
for(i in 1:length(mat.sizes)){
# Now create and invert each matrix
if(mat.sizes[i] == 1){
mat.inverses[[i]] <- 1
}else{
mtmp <- min(m, mat.sizes[i]-1)
a1 = list()
a1[[1]] <- rep(1, mat.sizes[i])
for(j in 1:mtmp){
a1[[j+1]] <- rep(alpha.new[j], mat.sizes[i]-j)
}
tmp <- bandSparse(mat.sizes[i], k=c(0:mtmp),diagonals=a1, symmetric=T )
mat.inverses[[i]] <- as.vector(solve(tmp))
}
}
# Put all inverted matrices in a vector in the right order
mat.finder <- match(len, mat.sizes)
corr.vec <- unlist(mat.inverses[mat.finder])
return(as(sparseMatrix(i=row.vec, j=col.vec, x=corr.vec), "symmetricMatrix"))
}
### Get a vector of elements of the inverse correlation matrix for UNSTRUCTURED
### Inversion strategy follows the same basic idea as M-DEPENDENT
getAlphaInvUnstruc <- function(alpha.new, len, row.vec, col.vec){
K <- length(len)
unstr.row <- NULL
unstr.col <- NULL
ml <- max(len)
sl2 <- sum(len^2)
for(i in 2:ml){
unstr.row <- c(unstr.row, 1:(i-1))
unstr.col <- c(unstr.col, rep(i, each=i-1))
}
unstr.row <- c(unstr.row, 1:ml)
unstr.col <- c(unstr.col, 1:ml)
xvec <- c(alpha.new, rep(1, ml))
# Get the biggest matrix implied by the cluster sizes
biggestMat <- forceSymmetric(sparseMatrix(i=unstr.row, j=unstr.col, x=xvec))
mat.sizes <- sort(unique(len))
corr.vec <- vector("numeric", sl2)
mat.inverses <- list()
index <- vector("numeric", K+1)
index[1] <- 0
index[2:K] <- (cumsum(len^2) -len^2)[2:K]
index[K+1] <- sl2
for(i in 1:length(mat.sizes)){
tmp <- biggestMat[1:mat.sizes[i], 1:mat.sizes[i]]
mat.inverses[[i]] <- as.vector(solve(tmp))
}
mat.finder <- match(len, mat.sizes)
corr.vec <- unlist(mat.inverses[mat.finder])
return(as(sparseMatrix(i=row.vec, j=col.vec, x=corr.vec), "symmetricMatrix"))
}
### Invert the FIXED correlation structure. Again,
### uses same basic technique as M-DEPENDENT
getAlphaInvFixed <- function(mat, len){
K <- length(len)
mat.sizes <- sort(unique(len))
mat.inverses <- list()
sl2 <- sum(len^2)
corr.vec <- vector("numeric", sl2)
index <- vector("numeric", K+1)
index[1] <- 0
index[2:K] <- (cumsum(len^2) -len^2)[2:K]
index[K+1] <- sl2
for(i in 1:length(mat.sizes)){
tmp <- mat[1:mat.sizes[i], 1:mat.sizes[i]]
mat.inverses[[i]] <- as.vector(solve(tmp))
}
mat.finder <- match(len, mat.sizes)
corr.vec <- unlist(mat.inverses[mat.finder])
return(as(getBlockDiag(len, corr.vec)$BDiag, "symmetricMatrix"))
}
### Get the structure of the USERDEFINED correlation matrix implied by the
### corr.mat argument to geem.
getUserStructure <- function(corr.mat){
ml <- dim(corr.mat)[1]
row.vec <- NULL
col.vec <- NULL
for(i in 2:ml){
row.vec <- c(row.vec, 1:(i-1))
col.vec <- c(col.vec, rep(i, each=i-1))
}
struct.vec <- corr.mat[cbind(row.vec, col.vec)]
corr.list <- vector("list", max(struct.vec))
for(i in 1:max(struct.vec)){
corr.list[[i]] <- which(struct.vec == i)
}
return(list(corr.list = corr.list, row.vec = row.vec, col.vec = col.vec, struct.vec = struct.vec))
}
### Get the inverse correlation matrix for USERDEFINED.
getAlphaInvUser <- function(alpha.new, len, struct.vec, user.row, user.col, row.vec, col.vec){
K <- length(len)
ml <- max(len)
sl2 <- sum(len^2)
# Indices for the correlation matrix for the subject
# with the most observations.
user.row <- c(user.row, 1:ml)
user.col <- c(user.col, 1:ml)
# The entries of the biggest matrix
xvec <- rep.int(0, length(struct.vec))
for(i in 1:length(alpha.new)){
xvec[which(struct.vec == i)] <- alpha.new[i]
}
xvec <- c(xvec, rep(1, ml))
biggestMat <- forceSymmetric(sparseMatrix(i=user.row, j=user.col, x=xvec))
mat.sizes <- sort(unique(len))
corr.vec <- vector("numeric", sl2)
mat.inverses <- list()
for(i in 1:length(mat.sizes)){
tmp <- biggestMat[1:mat.sizes[i], 1:mat.sizes[i]]
mat.inverses[[i]] <- as.vector(solve(tmp))
}
mat.finder <- match(len, mat.sizes)
corr.vec <- unlist(mat.inverses[mat.finder])
return(as(sparseMatrix(i=row.vec, j=col.vec, x=corr.vec), "symmetricMatrix"))
}
### Check some conditions on the USERDEFINED correlation structure supplied.
checkUserMat <- function(corr.mat, len){
if(is.null(corr.mat)){
stop("corr.mat must be specified if using user defined correlation structure")
}
if(dim(corr.mat)[1] < max(len)){
stop("corr.mat needs to be at least as long as the maximum cluster size.")
}
test.vec <- as.vector(corr.mat)
if(any(abs(test.vec-round(test.vec)) > .Machine$double.eps )){
stop("entries in corr.mat must be integers.")
}
max.val <- max(test.vec)
min.val <- min(test.vec)
if(!all(sort(unique(test.vec)) == min.val:max.val)){
stop("entries in corr.mat must be consecutive integers starting at 1.")
}
return(corr.mat[1:max(len), 1:max(len)])
}
getfam <- function(family){
if(is.character(family)){
family <- get(family, mode = "function", envir = parent.frame(2))
}
if(is.function(family)){
family <- family()
return(family)
}else if(inherits(family, "family")){
return(family)
}else if(is.list(family)){
if(length(match(names(family), c("LinkFun", "VarFun", "InvLink", "InvLinkDeriv"))) == 4){
famname <- "custom"
LinkFun <- family$LinkFun
InvLink <- family$InvLink
VarFun <- family$VarFun
InvLinkDeriv <- family$InvLinkDeriv
}else{
famname <- "custom"
LinkFun <- family[[1]]
VarFun <- family[[2]]
InvLink <- family[[3]]
InvLinkDeriv <- family[[4]]
}
FunList <- list("family"= famname, "LinkFun" = LinkFun, "VarFun" = VarFun, "InvLink" = InvLink, "InvLinkDeriv" = InvLinkDeriv)
return(FunList)
}else{
stop("problem with family argument: should be string, family object, or list of functions")
}
}
### Get a block diagonal matrix. Each block has dimension corresponding to
### each cluster size. By default, each block is just a matrix filled with ones.
getBlockDiag <- function(len, xvec=NULL){
K <- length(len)
if(is.null(xvec)){
xvec <- rep.int(1, sum(len^2))
}
row.vec <- col.vec <- vector("numeric", sum(len^2))
add.vec <- cumsum(len) - len
if(K == 1){
index <- c(0, sum(len^2))
}else{
index <- c(0, (cumsum(len^2) -len^2)[2:K], sum(len^2))
}
for(i in 1:K){
row.vec[(index[i] + 1):(index[i+1])] <- rep.int( (1:len[i]) + add.vec[i], len[i])
col.vec[(index[i] + 1):(index[i+1])] <- rep( (1:len[i]) + add.vec[i], each=len[i])
}
BlockDiag <- sparseMatrix(i = row.vec, j = col.vec, x = xvec)
if(!is.null(xvec)){
testsymm <- abs(sum(skewpart(BlockDiag)))
if(testsymm != 0) {
warning("Correlation matrix is not computed to be exactly symmetric. Taking only the symmetric part.")
}
}
return(list(BDiag = symmpart(BlockDiag), row.vec =row.vec, col.vec=col.vec))
}
### Check some conditions on the FIXED correlation structure.
checkFixedMat <- function(corr.mat, len){
if(is.null(corr.mat)){
stop("corr.mat must be specified if using fixed correlation structure")
}
if(dim(corr.mat)[1] < max(len)){
stop("Dimensions of corr.mat must be at least as large as largest cluster")
}
if(!isSymmetric(corr.mat)){
stop("corr.mat must be symmetric")
}
if(determinant(corr.mat, logarithm=T)$modulus == -Inf){
stop("supplied correlation matrix is not invertible.")
}
return(corr.mat[1:max(len), 1:max(len)])
}
### fitted function for geem object
fitted.geem <- function(object, ...){
#InvLink <- object$FunList$InvLink
InvLink <- object$FunList[[if(object$FunList$family == "custom") "InvLink" else "linkinv"]]
return(InvLink(object$eta))
}
predict.geem <- function(object, newdata = NULL,...){
coefs <- object$beta
if(is.null(newdata)){
return(as.vector(object$X %*% object$beta))
}else{
if(dim(newdata)[2] != length(coefs)){warning("New observations must have the same number of rows as coefficients in the model")}
return(as.vector(newdata %*% object$beta))
}
}
coef.geem <- function(object, ...){
coefs <- object$beta
names(coefs) <- object$coefnames
return(coefs)
}
family.geem <- function(object,...){
return(object$FunList)
}
dummyrows <- function(formula, dat, incomp, maxwave, wavespl, idspl){
missing <- missid <- misswave <- rep(0, sum(maxwave))
index <- 1
for(i in 1:length(wavespl)){
wa <- wavespl[[i]]
index <- index+1
for(j in 2:length(wa)){
wdiff <- wa[j] - wa[j-1] -1
if(wdiff > 0){
missing[index:(index+wdiff-1)] <- (wa[j-1]+1):(wa[j]-1)
missid[index:(index+wdiff-1)] <- idspl[[i]][1]
}
index <- index+wdiff+1
}
}
dat2 <- as.data.frame(matrix(nrow=sum(maxwave), ncol=dim(dat)[2]))
colnames(dat2) <- colnames(dat)
dat2[missing==0,] <- dat
dat2$id[missing > 0] <- missid[missing>0]
dat2$weights[missing > 0] <- 0
dat2$waves[missing > 0] <- missing[missing > 0]
NAcols <- which(!is.element(names(dat2), c("id", "waves", "weights")))
for(i in NAcols){
dat2[missing>0, i] <- median(dat2[,i], na.rm=TRUE)
}
retdat <- model.frame(formula, dat2, na.action=na.pass)
retdat$id <- dat2$id
retdat$weights <- dat2$weights
retdat$waves <- dat2$waves
return(retdat)
}
fillMatList <- function(real.sizes){
}
model.matrix.geem <- function(object, ...){
return(model.matrix(object$formula, data=model.frame(object)))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.