Generalized Meta-Analysis(GENMETA) is an approach for combining information on multivariate regression parameters across multiple different studies which have different, but, possibly overlapping information on subsets of covariates. GENMETA implements the generalized meta-analysis using iteratively reweighted least squares (IRWLS) algorithm. For details of the method, please see the Reference section below. This file provides guidelines for implementing generalized meta-analysis(GENMETA)
NOTE: If the user wishes to directly download the source files from Github, please go to the link https://github.com/28pro92/GENMETA and see the README for further instructions. If the user wishes to directly install the package from R console, please read the following instructions:
install.packages("devtools", dependencies=TRUE) library(devtools)
install_github("28pro92/packages-GENMETA") library(GENMETA)
library(MASS) library(stats) library(magic)
d.X = 3
mu = matrix(rep(0,d.X), nrow=d.X)
r1 = 0.3 r2 = 0.6 r3 = 0.1
Sigma = matrix( c(1, r1, r2, r1, 1, r3, r2, r3, 1), nrow=d.X, ncol=d.X)
beta.star = matrix(c(-1.2, log(1.3), log(1.3), log(1.3)),nrow = d.X+1)
n1 = 300 # sample size of the 1st data set. n2 = 500 # 2nd n3 = 1000 # 3rd
n = 50
no.of.simulations = 1000
sim.matrix = matrix(NA, no.of.simulations, 8)
start.time = Sys.time()
for(sim in 1:no.of.simulations) { set.seed(sim)
#--################################################ #--##### Generating the reference and studies ##### #--################################################
#---Generate the reference data set---#
X.rf = mvrnorm(n = n, mu, Sigma)
#----Generate data set 1. m1 means model 1.---#
X.m1 = mvrnorm(n = n1, mu, Sigma) # Generate the covariates. X.m1.1 = cbind(rep(1, n1), X.m1) # Add a column of 1's to X.m1. p.m1 = 1/(1+exp(-X.m1.1%*%beta.star)) # the vector of probabilities Y.m1 = rbinom(n1, size=1, p.m1) # the Bernoulli responses
#----Generate data set 2. m2 means model 2.---#
X.m2 = mvrnorm(n = n2, mu, Sigma) X.m2.1 = cbind(rep(1, n2), X.m2) p.m2 = 1/(1+exp(-X.m2.1%*%beta.star)) Y.m2 = rbinom(n2, size=1, p.m2)
#----Generate data set 3. m3 means model 3.---#
X.m3 = mvrnorm(n = n3, mu, Sigma) X.m3.1 = cbind(rep(1, n3), X.m3) p.m3 = 1/(1+exp(-X.m3.1%*%beta.star)) Y.m3 = rbinom(n3, size=1, p.m3)
#--####################################################### #--### Create data sets in the format of data frame.###### #--####################################################### data.m1 = data.frame(Y=Y.m1, X.m1) data.m2 = data.frame(Y=Y.m2, X.m2) data.m3 = data.frame(Y=Y.m3, X.m3)
#--#################################################################### #--### Fit logistic regression with reduced models to the data sets ### #--####################################################################
logit.m1 <- glm(Y ~ X1 + X2, data = data.m1, family = "binomial") if(logit.m1$converged == FALSE) { print("glm for logit.m1 is not convergent.") next }
logit.m2 <- glm(Y ~ X2 + X3, data = data.m2, family = "binomial")
if(logit.m2$converged == FALSE) { print("glm for logit.m2 is not convergent.") next }
logit.m3 <- glm(Y ~ X1 + X3, data = data.m3, family = "binomial")
if(logit.m3$converged == FALSE) { print("glm for logit.m3 is not convergent.") next }
#--#################################################################### #--### Obtain the estimators of the parameters in the reduced models.## #--####################################################################
theta.m1 = logit.m1$coefficients theta.m2 = logit.m2$coefficients theta.m3 = logit.m3$coefficients
#--#################################################################### #--#### The following is used to calculate robust variance estimates### #--#### for each of three parameter vectors in the reduced models. #### #--#### This is needed if the user inputs the variance-covariance ##### #--### matrices from each of studies. Later, we show that it can be ## #--### computed from reference data set if these are not provided #### #--###################################################################
#--#################################################################### #--### Find the covariance matrix estimators for the reduced models#### #--####################################################################
#--############################# #-- Basic notations for inputs# #--#############################
K = 3
A1 = c(1, 2) A2 = c(2, 3) # index set A2 A3 = c(1, 3) # index set A3
X.m1.used = cbind(rep(1, n1), X.m1[, A1, drop=F]) X.m2.used = cbind(rep(1, n2), X.m2[, A2, drop=F]) X.m3.used = cbind(rep(1, n3), X.m3[, A3, drop=F])
T.1 = matrix(rep(0, (length(A1)+1)^2), nrow=length(A1)+1) T.2 = T.1
for (i in 1:n1) { a = as.vector(exp(-X.m1.used[i, , drop=F]%*%theta.m1)) T.1 = T.1 + (a/(1+a)^2) * (t(X.m1.used[i, , drop=F]) %*% X.m1.used[i, , drop=F]) }
for (i in 1:n1) { a = as.vector(1/( 1 + exp(-X.m1.used[i, , drop=F]%*%theta.m1))) T.2 = T.2 + (Y.m1[i]-a)^2 * (t(X.m1.used[i, , drop=F])%*%X.m1.used[i, , drop=F]) }
Sigma.m1 = solve(T.1)%*%T.2%*%solve(T.1)
T.1 = matrix(rep(0, (length(A2)+1)^2), nrow=length(A2)+1) T.2 = T.1
for (i in 1:n2) { a = as.vector(exp(-X.m2.used[i, , drop=F]%*%theta.m2)) T.1 = T.1 + (a/(1+a)^2) * (t(X.m2.used[i, , drop=F])%*%X.m2.used[i, , drop=F]) }
for (i in 1:n2) { a = as.vector(1/( 1 + exp(-X.m2.used[i, , drop=F]%*%theta.m2))) T.2 = T.2 + (Y.m2[i]-a)^2 * (t(X.m2.used[i, , drop=F])%*%X.m2.used[i, , drop=F]) }
Sigma.m2 = solve(T.1)%*%T.2%*%solve(T.1)
#----- Computing robust estimate of Sigma.m3 -------#
T.1 = matrix(rep(0, (length(A3)+1)^2), nrow=length(A3)+1) T.2 = T.1
for (i in 1:n3) { a = as.vector(exp(-X.m3.used[i, , drop=F]%*%theta.m3)) T.1 = T.1 + (a/(1+a)^2) * (t(X.m3.used[i, , drop=F])%*%X.m3.used[i, , drop=F]) }
for (i in 1:n3) { a = as.vector(1/( 1 + exp(-X.m3.used[i, , drop=F]%*%theta.m3))) T.2 = T.2 + (Y.m3[i]-a)^2 * (t(X.m3.used[i, , drop=F])%*%X.m3.used[i, , drop=F]) }
Sigma.m3 = solve(T.1)%*%T.2%*%solve(T.1)
names(theta.m1)=c("(Intercept)","Age","Height") names(theta.m2)=c("(Intercept)","Height", "Weight") names(theta.m3)=c("(Intercept)","Age", "Weight")
#---- now put in the GENMETA example ----#
#--### The results shown in Table 2 is obtained by considering ##### #--#### study-specific variance-covariance matrices. This is shown ## #--#### below. ###################################################### #--################################################################## study1 = list(Coeff=theta.m1,Covariance=Sigma.m1,Sample_size=n1) study2 = list(Coeff=theta.m2,Covariance=Sigma.m2,Sample_size=n2) study3 = list(Coeff=theta.m3,Covariance=Sigma.m3,Sample_size=n3)
#---- Creating the study list for GENMETA input ---#
studies = list(study1,study2,study3) model = "logistic"
#------ Creating the reference data for GENMETA input -----#
reference = cbind(rep(1,n), X.rf) colnames(reference) = c("(Intercept)","Age","Height", "Weight")
result.same = GENMETA(studies, reference, model, initial_val = c(-1.2, log(1.3), log(1.3), log(1.3)))
if(sum(is.na(result.same$Est.coeff)) == 0 && is.null(result.same$Est.var.cov) != TRUE) { sim.matrix[sim, ] = c(result.same$Est.coeff, diag(result.same$Est.var.cov)) }
print(sim) }
stop.time = Sys.time()
elapsed.time = stop.time - start.time
Lower.CI = na.omit(sim.matrix)[,1:4] - 1.96*sqrt(na.omit(sim.matrix)[,5:8]) Upper.CI = na.omit(sim.matrix)[,1:4] + 1.96*sqrt(na.omit(sim.matrix)[,5:8])
Coverage.rate = matrix(0,dim(na.omit(sim.matrix))[1], 4) for(k in 1:dim(na.omit(sim.matrix))[1]) { if(beta.star[1,1] >= Lower.CI[k,1] & beta.star[1,1] <= Upper.CI[k,1]) Coverage.rate[k,1] = 1 if(beta.star[2,1] >= Lower.CI[k,2] & beta.star[2,1] <= Upper.CI[k,2]) Coverage.rate[k,2] = 1 if(beta.star[3,1] >= Lower.CI[k,3] & beta.star[3,1] <= Upper.CI[k,3]) Coverage.rate[k,3] = 1 if(beta.star[4,1] >= Lower.CI[k,4] & beta.star[4,1] <= Upper.CI[k,4]) Coverage.rate[k,4] = 1 }
square = function(x) { sum(x^2) }
Average.length = apply((Upper.CI-Lower.CI), 2, mean)
RMSE = sqrt(apply(t(t(na.omit(sim.matrix)[,1:4]) - as.numeric(beta.star)), 2, square)/dim(na.omit(sim.matrix))[1])
result = data.frame(cbind(apply(na.omit(sim.matrix), 2, mean)[1:4], beta.star, (apply(na.omit(sim.matrix), 2, mean)[1:4]- beta.star), sqrt(apply(na.omit(sim.matrix), 2, mean)[5:8]), sqrt(apply(na.omit(sim.matrix)[,1:4], 2, var))), apply(Coverage.rate,2, mean), Average.length, RMSE)
colnames(result) = c("Coeff","True", "Bias", "ESD", "SD", "Coverage.rate", "AL", "RMSE")
write.csv(result, file = "Simulation_1.csv", row.names = F)
d.X = 3
mu.1 = matrix(rep(0,d.X), nrow=d.X) mu.2 = matrix(rep(1,d.X), nrow=d.X) mu.3 = matrix(rep(0.5,d.X), nrow=d.X)
r1.1 = 0.2 r2.1 = 0.4 r3.1 = 0
Sigma.1 = matrix( c(1, r1.1, r2.1, r1.1, 1, r3.1, r2.1, r3.1, 1), nrow=d.X, ncol=d.X)
r1.2 = 0.3 r2.2 = 0.6 r3.2 = 0.1
Sigma.2 = matrix( c(1, r1.2, r2.2, r1.2, 1, r3.2, r2.2, r3.2, 1), nrow=d.X, ncol=d.X)
r1.3 = 0.4 r2.3 = 0.8 r3.3 = 0.2
Sigma.3 = matrix( c(1, r1.3, r2.3, r1.3, 1, r3.3, r2.3, r3.3, 1), nrow=d.X, ncol=d.X) # covariance matrix of the covariates.
beta.star = matrix(c(-1.2, log(1.3), log(1.3), log(1.3)),nrow = d.X+1)
n1 = 300 # sample size of the 1st data set. n2 = 500 # 2nd n3 = 1000 # 3rd
n = 50
no.of.simulations = 1000
sim.matrix = matrix(NA, no.of.simulations, 8)
start.time = Sys.time()
for(sim in 1:no.of.simulations) { set.seed(sim)
#--################################################# #--###### Generating the reference and studies ##### #--#################################################
#---Generate the reference data set---#
X.rf = mvrnorm(n = n, mu.1, Sigma.2)
#----Generate data set 1. m1 means model 1.---#
X.m1 = mvrnorm(n = n1, mu.1, Sigma.2) # Generate the covariates. X.m1.1 = cbind(rep(1, n1), X.m1) # Add a column of 1's to X.m1. p.m1 = 1/(1+exp(-X.m1.1%*%beta.star)) # the vector of probabilities Y.m1 = rbinom(n1, size=1, p.m1) # the Bernoulli responses
#----Generate data set 2. m2 means model 2.---#
X.m2 = mvrnorm(n = n2, mu.2, Sigma.2) X.m2.1 = cbind(rep(1, n2), X.m2) p.m2 = 1/(1+exp(-X.m2.1%*%beta.star)) Y.m2 = rbinom(n2, size=1, p.m2)
#----Generate data set 3. m3 means model 3.---#
X.m3 = mvrnorm(n = n3, mu.3, Sigma.2) X.m3.1 = cbind(rep(1, n3), X.m3) p.m3 = 1/(1+exp(-X.m3.1%*%beta.star)) Y.m3 = rbinom(n3, size=1, p.m3)
#--####################################################### #--### Create data sets in the format of data frame.###### #--####################################################### data.m1 = data.frame(Y=Y.m1, X.m1) data.m2 = data.frame(Y=Y.m2, X.m2) data.m3 = data.frame(Y=Y.m3, X.m3)
#--#################################################################### #--### Fit logistic regression with reduced models to the data sets ### #--####################################################################
logit.m1 <- glm(Y ~ X1 + X2, data = data.m1, family = "binomial") if(logit.m1$converged == FALSE) { print("glm for logit.m1 is not convergent.") next }
logit.m2 <- glm(Y ~ X2 + X3, data = data.m2, family = "binomial")
if(logit.m2$converged == FALSE) { print("glm for logit.m2 is not convergent.") next }
logit.m3 <- glm(Y ~ X1 + X3, data = data.m3, family = "binomial")
if(logit.m3$converged == FALSE) { print("glm for logit.m3 is not convergent.") next }
#--#################################################################### #--### Obtain the estimators of the parameters in the reduced models.## #--####################################################################
theta.m1 = logit.m1$coefficients theta.m2 = logit.m2$coefficients theta.m3 = logit.m3$coefficients
#--#################################################################### #--#### The following is used to calculate robust variance estimates### #--#### for each of three parameter vectors in the reduced models. #### #--#### This is needed if the user inputs the variance-covariance ##### #--### matrices from each of studies. Later, we show that it can be ## #--### computed from reference data set if these are not provided #### #--###################################################################
#--#################################################################### #--### Find the covariance matrix estimators for the reduced models#### #--####################################################################
#--############################# #-- Basic notations for inputs# #--#############################
K = 3
A1 = c(1, 2) A2 = c(2, 3) # index set A2 A3 = c(1, 3) # index set A3
X.m1.used = cbind(rep(1, n1), X.m1[, A1, drop=F]) X.m2.used = cbind(rep(1, n2), X.m2[, A2, drop=F]) X.m3.used = cbind(rep(1, n3), X.m3[, A3, drop=F])
T.1 = matrix(rep(0, (length(A1)+1)^2), nrow=length(A1)+1) T.2 = T.1
for (i in 1:n1) { a = as.vector(exp(-X.m1.used[i, , drop=F]%*%theta.m1)) T.1 = T.1 + (a/(1+a)^2) * (t(X.m1.used[i, , drop=F])%*%X.m1.used[i, , drop=F]) }
for (i in 1:n1) { a = as.vector(1/( 1 + exp(-X.m1.used[i, , drop=F]%*%theta.m1))) T.2 = T.2 + (Y.m1[i]-a)^2 * (t(X.m1.used[i, , drop=F])%*%X.m1.used[i, , drop=F]) }
Sigma.m1 = solve(T.1)%*%T.2%*%solve(T.1)
T.1 = matrix(rep(0, (length(A2)+1)^2), nrow=length(A2)+1) T.2 = T.1
for (i in 1:n2) { a = as.vector(exp(-X.m2.used[i, , drop=F]%*%theta.m2)) T.1 = T.1 + (a/(1+a)^2) * (t(X.m2.used[i, , drop=F])%*%X.m2.used[i, , drop=F]) }
for (i in 1:n2) { a = as.vector(1/( 1 + exp(-X.m2.used[i, , drop=F]%*%theta.m2))) T.2 = T.2 + (Y.m2[i]-a)^2 * (t(X.m2.used[i, , drop=F])%*%X.m2.used[i, , drop=F]) }
Sigma.m2 = solve(T.1)%*%T.2%*%solve(T.1)
#----- Computing robust estimate of Sigma.m3 -------#
T.1 = matrix(rep(0, (length(A3)+1)^2), nrow=length(A3)+1) T.2 = T.1
for (i in 1:n3) { a = as.vector(exp(-X.m3.used[i, , drop=F]%*%theta.m3)) T.1 = T.1 + (a/(1+a)^2) * (t(X.m3.used[i, , drop=F])%*%X.m3.used[i, , drop=F]) }
for (i in 1:n3) { a = as.vector(1/( 1 + exp(-X.m3.used[i, , drop=F]%*%theta.m3))) T.2 = T.2 + (Y.m3[i]-a)^2 * (t(X.m3.used[i, , drop=F])%*%X.m3.used[i, , drop=F]) }
Sigma.m3 = solve(T.1)%*%T.2%*%solve(T.1)
names(theta.m1)=c("(Intercept)","Age","Height") names(theta.m2)=c("(Intercept)","Height", "Weight") names(theta.m3)=c("(Intercept)","Age", "Weight")
#---- now put in the GENMETA example ----#
#--### The results shown in Table 2 is obtained by considering ##### #--#### study-specific variance-covariance matrices. This is shown ## #--#### below. ###################################################### #--################################################################## study1 = list(Coeff=theta.m1,Covariance=Sigma.m1,Sample_size=n1) study2 = list(Coeff=theta.m2,Covariance=Sigma.m2,Sample_size=n2) study3 = list(Coeff=theta.m3,Covariance=Sigma.m3,Sample_size=n3)
#---- Creating the study list for GENMETA input ---#
studies = list(study1,study2,study3) model = "logistic"
#------ Creating the reference data for GENMETA input -----#
reference = cbind(rep(1,n), X.rf) colnames(reference) = c("(Intercept)","Age","Height", "Weight")
result.same = GENMETA(studies, reference, model, initial_val = c(-1.2, log(1.3), log(1.3), log(1.3)))
if(sum(is.na(result.same$Est.coeff)) == 0 && is.null(result.same$Est.var.cov) != TRUE) { sim.matrix[sim, ] = c(result.same$Est.coeff, diag(result.same$Est.var.cov)) }
print(sim) }
stop.time = Sys.time()
elapsed.time = stop.time - start.time
Lower.CI = na.omit(sim.matrix)[,1:4] - 1.96*sqrt(na.omit(sim.matrix)[,5:8]) Upper.CI = na.omit(sim.matrix)[,1:4] + 1.96*sqrt(na.omit(sim.matrix)[,5:8])
Coverage.rate = matrix(0,dim(na.omit(sim.matrix))[1], 4) for(k in 1:dim(na.omit(sim.matrix))[1]) { if(beta.star[1,1] >= Lower.CI[k,1] & beta.star[1,1] <= Upper.CI[k,1]) Coverage.rate[k,1] = 1 if(beta.star[2,1] >= Lower.CI[k,2] & beta.star[2,1] <= Upper.CI[k,2]) Coverage.rate[k,2] = 1 if(beta.star[3,1] >= Lower.CI[k,3] & beta.star[3,1] <= Upper.CI[k,3]) Coverage.rate[k,3] = 1 if(beta.star[4,1] >= Lower.CI[k,4] & beta.star[4,1] <= Upper.CI[k,4]) Coverage.rate[k,4] = 1 }
square = function(x) { sum(x^2) }
Average.length = apply((Upper.CI-Lower.CI), 2, mean)
RMSE = sqrt(apply(t(t(na.omit(sim.matrix)[,1:4]) - as.numeric(beta.star)), 2, square)/dim(na.omit(sim.matrix))[1])
result = data.frame(cbind(apply(na.omit(sim.matrix), 2, mean)[1:4], beta.star, (apply(na.omit(sim.matrix), 2, mean)[1:4]- beta.star), sqrt(apply(na.omit(sim.matrix), 2, mean)[5:8]), sqrt(apply(na.omit(sim.matrix)[,1:4], 2, var))), apply(Coverage.rate,2, mean), Average.length, RMSE)
colnames(result) = c("Coeff","True", "Bias", "ESD", "SD", "Coverage.rate", "AL", "RMSE")
write.csv(result, file = "Simulation_9.csv", row.names = F)
Tang, R., Kundu, P. and Chatterjee, N. (2017) Generalized Meta-Analysis for Multivariate Regression Models Across Studies with Disparate Covariate Information. https://arxiv.org/abs/1708.03818
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.