Nothing
## ----invisibles, echo=FALSE, results='markup', message=FALSE------------------
library("knitr")
#library("gWQS")
## ----first step, echo=TRUE, results='markup', message=FALSE-------------------
library("qgcomp")
set.seed(543210)
qdat = simdata_quantized(n=5000, outcomtype="continuous", cor=c(.95, -0.3), b0=0, coef=c(0.25, -0.1, 0.05), q=4)
head(qdat)
cor(qdat[,c("x1", "x2", "x3")])
qgcomp(y~x1+x2+x3, expnms=c("x1", "x2", "x3"), data=qdat)
## ----metals data, echo=TRUE, results='markup', message=FALSE------------------
library("ggplot2")
data("metals", package="qgcomp")
head(metals)
## ----linear model and runtime, results='markup', fig.show='hold', fig.height=5, fig.width=7.5, cache=FALSE----
# we save the names of the mixture variables in the variable "Xnm"
Xnm <- c(
'arsenic','barium','cadmium','calcium','chromium','copper',
'iron','lead','magnesium','manganese','mercury','selenium','silver',
'sodium','zinc'
)
covars = c('nitrate','nitrite','sulfate','ph', 'total_alkalinity','total_hardness')
# Example 1: linear model
# Run the model and save the results "qc.fit"
system.time(qc.fit <- qgcomp.glm.noboot(y~.,dat=metals[,c(Xnm, 'y')], family=gaussian()))
# user system elapsed
# 0.011 0.002 0.018
# contrasting other methods with computational speed
# WQS regression (v3.0.1 of gWQS package)
#system.time(wqs.fit <- gWQS::gwqs(y~wqs,mix_name=Xnm, data=metals[,c(Xnm, 'y')], family="gaussian"))
# user system elapsed
# 35.775 0.124 36.114
# Bayesian kernel machine regression (note that the number of iterations here would
# need to be >5,000, at minimum, so this underestimates the run time by a factor
# of 50+
#system.time(bkmr.fit <- kmbayes(y=metals$y, Z=metals[,Xnm], family="gaussian", iter=100))
# user system elapsed
# 81.644 4.194 86.520
## ----linear model and runtimeb, results='markup', fig.show='hold', fig.height=5, fig.width=7.5, cache=FALSE----
# View results: scaled coefficients/weights and statistical inference about
# mixture effect
qc.fit
## ----linear model and runtime c, results='markup', fig.show='hold', fig.height=5, fig.width=7.5, cache=FALSE----
# quantized data
head(qc.fit$qx)
## ----linear model and runtime d, results='markup', fig.show='hold', fig.height=5, fig.width=7.5, cache=FALSE----
# regression with quantized data
qc.fit$qx$y = qc.fit$fit$data$y # first bring outcome back into the quantized data
newfit <- lm(y ~ arsenic_q + barium_q + cadmium_q + calcium_q + chromium_q + copper_q +
iron_q + lead_q + magnesium_q + manganese_q + mercury_q + selenium_q +
silver_q + sodium_q + zinc_q, data=qc.fit$qx)
newfit
## ----linear model and runtime e, results='markup', fig.show='hold', fig.height=5, fig.width=7.5, cache=FALSE----
sum(newfit$coefficients[-1]) # sum of all coefficients excluding intercept and confounders, if any
coef(qc.fit) # overall effect and intercept from qgcomp fit
## ----logistic qgcomp, results='markup', fig.show='hold', fig.height=5, fig.width=7.5, cache=FALSE----
qc.fit2 <- qgcomp.glm.noboot(disease_state~., expnms=Xnm,
data = metals[,c(Xnm, 'disease_state')], family=binomial(),
q=4)
qcboot.fit2 <- qgcomp.glm.boot(disease_state~., expnms=Xnm,
data = metals[,c(Xnm, 'disease_state')], family=binomial(),
q=4, B=10,# B should be 200-500+ in practice
seed=125, rr=FALSE)
qcboot.fit2b <- qgcomp.glm.boot(disease_state~., expnms=Xnm,
data = metals[,c(Xnm, 'disease_state')], family=binomial(),
q=4, B=10,# B should be 200-500+ in practice
seed=125, rr=TRUE)
## ----logistic qgcompb, results='markup', fig.show='hold', fig.height=5, fig.width=7.5, cache=FALSE----
qc.fit2
## ----logistic qgcompc, results='markup', fig.show='hold', fig.height=5, fig.width=7.5, cache=FALSE----
qcboot.fit2
## ----logistic qgcompd, results='markup', fig.show='hold', fig.height=5, fig.width=7.5, cache=FALSE----
qcboot.fit2b
## ----adj4cov-a, results='markup', fig.show='hold', fig.height=5, fig.width=7.5, cache=FALSE----
qc.fit3 <- qgcomp.glm.noboot(y ~ mage35 + arsenic + barium + cadmium + calcium + chloride +
chromium + copper + iron + lead + magnesium + manganese +
mercury + selenium + silver + sodium + zinc,
expnms=Xnm,
metals, family=gaussian(), q=4)
qc.fit3
plot(qc.fit3)
## ----adj4cov-b, results='markup', fig.show='hold', fig.height=5, fig.width=7.5, cache=FALSE----
qcboot.fit3 <- qgcomp.glm.boot(y ~ mage35 + arsenic + barium + cadmium + calcium + chloride +
chromium + copper + iron + lead + magnesium + manganese +
mercury + selenium + silver + sodium + zinc,
expnms=Xnm,
metals, family=gaussian(), q=4, B=10,# B should be 200-500+ in practice
seed=125)
qcboot.fit3
qcee.fit3 <- qgcomp.glm.ee(y ~ mage35 + arsenic + barium + cadmium + calcium + chloride +
chromium + copper + iron + lead + magnesium + manganese +
mercury + selenium + silver + sodium + zinc,
expnms=Xnm,
metals, family=gaussian(), q=4)
qcee.fit3
## ----adj4cov-c, results='markup', fig.show='hold', fig.height=5, fig.width=7.5, cache=FALSE----
qgcomp:::modelbound.ee(qcee.fit3)
plot(qcee.fit3, pointwiseref = 3, flexfit = FALSE, modelbound=TRUE)
plot(qcboot.fit3, pointwiseref = 3, flexfit = FALSE)
## ----adj4cov-d, results='markup', fig.show='hold', fig.height=5, fig.width=7.5, cache=FALSE----
pointwisebound.boot(qcboot.fit3, pointwiseref=3)
qgcomp:::modelbound.boot(qcboot.fit3)
## ----n-lin-non-hom-intro, results='markup', fig.show='hold', fig.height=5, fig.width=7.5, cache=FALSE----
qcboot.fit4 <- qgcomp(y~. + .^2,
expnms=Xnm,
metals[,c(Xnm, 'y')], family=gaussian(), q=4, B=10, seed=125)
plot(qcboot.fit4)
## ----ovrl-n-lin, results='markup', fig.show='hold', fig.height=5, fig.width=7.5, cache=FALSE----
qcboot.fit5 <- qgcomp(y~. + .^2,
expnms=Xnm,
metals[,c(Xnm, 'y')], family=gaussian(), q=4, degree=2,
B=10, rr=FALSE, seed=125)
plot(qcboot.fit5)
qcee.fit5b <- qgcomp.glm.ee(y~. + .^2,
expnms=Xnm,
metals[,c(Xnm, 'y')], family=gaussian(), q=4, degree=2,
rr=FALSE)
plot(qcee.fit5b)
## ----ovrl-n-linb, results='markup', fig.show='hold', fig.height=5, fig.width=7.5, cache=FALSE----
modelbound.boot(qcboot.fit5)
pointwisebound.boot(qcboot.fit5)
pointwisebound.noboot(qcee.fit5b)
## ----ovrl-n-lin-psi interp, results='markup', fig.show='hold', fig.height=5, fig.width=7.5, cache=FALSE----
qcboot.fit5
## ----graf-n-lin-1, results='markup', fig.show='hold', fig.height=3, fig.width=7.5, cache=FALSE----
library(splines)
# find all correlations > 0.6 (this is an arbitrary choice)
cormat = cor(metals[,Xnm])
idx = which(cormat>0.6 & cormat <1.0, arr.ind = TRUE)
newXnm = unique(rownames(idx)) # iron, lead, and cadmium
qc.fit6lin <- qgcomp.glm.boot(y ~ iron + lead + cadmium +
mage35 + arsenic + magnesium + manganese + mercury +
selenium + silver + sodium + zinc,
expnms=newXnm,
metals, family=gaussian(), q=8, B=10)
qc.fit6nonlin <- qgcomp.glm.boot(y ~ bs(iron) + bs(cadmium) + bs(lead) +
mage35 + arsenic + magnesium + manganese + mercury +
selenium + silver + sodium + zinc,
expnms=newXnm,
metals, family=gaussian(), q=8, B=10, degree=2)
qc.fit6nonhom <- qgcomp.glm.boot(y ~ bs(iron)*bs(lead) + bs(iron)*bs(cadmium) + bs(lead)*bs(cadmium) +
mage35 + arsenic + magnesium + manganese + mercury +
selenium + silver + sodium + zinc,
expnms=newXnm,
metals, family=gaussian(), q=8, B=10, degree=3)
## ----graf-n-lin-1b, results='markup', fig.show='hold', fig.height=3, fig.width=7.5, cache=FALSE----
pl_fit6lin <- plot(qc.fit6lin, suppressprint = TRUE, pointwiseref = 4)
pl_fit6lin + coord_cartesian(ylim=c(-0.75, .75)) +
ggtitle("Linear fit: mixture of iron, lead, and cadmium")
## ----graf-n-lin-2, results='markup', fig.show='hold', fig.height=3, fig.width=7.5, cache=FALSE----
pl_fit6nonlin <- plot(qc.fit6nonlin, suppressprint = TRUE, pointwiseref = 4)
pl_fit6nonlin + coord_cartesian(ylim=c(-0.75, .75)) +
ggtitle("Non-linear fit: mixture of iron, lead, and cadmium")
## ----graf-n-lin 3, results='markup', fig.show='hold', fig.height=3, fig.width=7.5, cache=FALSE----
pl_fit6nonhom <- plot(qc.fit6nonhom, suppressprint = TRUE, pointwiseref = 4)
pl_fit6nonhom + coord_cartesian(ylim=c(-0.75, .75)) +
ggtitle("Non-linear, non-homogeneous fit: mixture of iron, lead, and cadmium")
## ----grafwarn, results='markup', fig.show='hold', fig.height=3, fig.width=7.5, cache=FALSE----
qc.overfit <- qgcomp.glm.boot(y ~ bs(iron) + bs(cadmium) + bs(lead) +
mage35 + bs(arsenic) + bs(magnesium) + bs(manganese) + bs(mercury) +
bs(selenium) + bs(silver) + bs(sodium) + bs(zinc),
expnms=Xnm,
metals, family=gaussian(), q=8, B=10, degree=1)
qc.overfit
plot(qc.overfit, pointwiseref = 5)
## ----grafwarn-2, results='markup', fig.show='hold', fig.height=3, fig.width=7.5, cache=FALSE----
plot(qc.overfit, flexfit = FALSE, pointwiseref = 5)
## ----n-lin-exs, results='markup', fig.show='hold', fig.height=3, fig.width=7.5, cache=FALSE----
qc.fit7a <- qgcomp.glm.boot(y ~ factor(iron) + lead + cadmium +
mage35 + arsenic + magnesium + manganese + mercury +
selenium + silver + sodium + zinc,
expnms=newXnm,
metals, family=gaussian(), q=8, B=20, deg=2)
# underlying fit
summary(qc.fit7a$fit)$coefficients
plot(qc.fit7a)
## ----n-lin-exs-2, results='markup', fig.show='hold', fig.height=3, fig.width=7.5, cache=FALSE----
qc.fit7b <- qgcomp.glm.boot(y ~ factor(iron)*factor(lead) + cadmium +
mage35 + arsenic + magnesium + manganese + mercury +
selenium + silver + sodium + zinc,
expnms=newXnm,
metals, family=gaussian(), q=8, B=10, deg=3)
# underlying fit
#summary(qc.fit7b$fit)$coefficients
plot(qc.fit7b)
## ----n-lin-exs-3, results='markup', fig.show='hold', fig.height=3, fig.width=7.5, cache=FALSE----
qc.fit7c <- qgcomp.glm.boot(y ~ I(iron>4)*I(lead>4) + cadmium +
mage35 + arsenic + magnesium + manganese + mercury +
selenium + silver + sodium + zinc,
expnms=newXnm,
metals, family=gaussian(), q=8, B=10, deg=2)
# underlying fit
summary(qc.fit7c$fit)$coefficients
plot(qc.fit7c)
## ----splines, results='markup', fig.show='hold', fig.height=5, fig.width=7.5, cache=FALSE----
AIC(qc.fit6lin$fit)
AIC(qc.fit6nonlin$fit)
AIC(qc.fit6nonhom$fit)
BIC(qc.fit6lin$fit)
BIC(qc.fit6nonlin$fit)
BIC(qc.fit6nonhom$fit)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.