synthesize | R Documentation |
Fit parametric regression models to the outcome distribution and optionally
also parametric regression models for the joint distribution of the predictors
structural equation models.
Then the function sim.synth
can be called on the resulting object to
to simulate from the parametric model based on the machinery of the lava
package
synthesize(object, data, ...)
## S3 method for class 'formula'
synthesize(
object,
data,
recursive = FALSE,
max.levels = 10,
verbose = FALSE,
...
)
## S3 method for class 'lvm'
synthesize(
object,
data,
max.levels = 10,
logtrans = NULL,
verbose = FALSE,
fix.names = FALSE,
...
)
object |
Specification of the synthesizing model structures. Either a |
data |
Data to be synthesized. |
... |
Not used yet. |
recursive |
Let covariates recursively depend on each other. |
max.levels |
Integer used to guess which variables are categorical. When set to |
verbose |
Logical. If |
logtrans |
Vector of covariate names that should be log-transformed. This is primarily for internal use. |
fix.names |
Fix possible problematic covariate names. |
Synthesizes survival data (also works for linear models and generalized linear models).
The idea is to be able to simulate new data sets that mimic the original data.
See the vignette vignette("synthesize",package = "riskRegression")
for more details.
The simulation engine is: lava.
lava object
Thomas A. Gerds <tag@biostat.ku.dk>
lvm
# pbc data
library(survival)
library(lava)
data(pbc)
pbc <- na.omit(pbc[,c("time","status","sex","age","bili")])
pbc$logbili <- log(pbc$bili)
v_synt <- synthesize(object=Surv(time,status)~sex+age+logbili,data=pbc)
d <- simsynth(v_synt,1000)
fit_sim <- coxph(Surv(time,status==1)~age+sex+logbili,data=d)
fit_real <- coxph(Surv(time,status==1)~age+sex+logbili,data=pbc)
# compare estimated log-hazard ratios between simulated and real data
cbind(coef(fit_sim),coef(fit_real))
u <- lvm()
distribution(u,~sex) <- binomial.lvm()
distribution(u,~age) <- normal.lvm()
distribution(u,~trt) <- binomial.lvm()
distribution(u,~logbili) <- normal.lvm()
u <-eventTime(u,time~min(time.cens=0,time.transplant=1,time.death=2), "status")
lava::regression(u,logbili~age+sex) <- 1
lava::regression(u,time.transplant~sex+age+logbili) <- 1
lava::regression(u,time.death~sex+age+logbili) <- 1
lava::regression(u,time.cens~1) <- 1
transform(u,logbili~bili) <- function(x){log(x)}
u_synt <- synthesize(object=u, data=na.omit(pbc))
set.seed(8)
d <- simsynth(u_synt,n=1000)
# note: synthesize may relabel status variable
fit_sim <- coxph(Surv(time,status==1)~age+sex+logbili,data=d)
fit_real <- coxph(Surv(time,status==1)~age+sex+log(bili),data=pbc)
# compare estimated log-hazard ratios between simulated and real data
cbind(coef(fit_sim),coef(fit_real))
#
# Cancer data
#
data(cancer)
b <- lvm()
distribution(b,~rx) <- binomial.lvm()
distribution(b,~age) <- normal.lvm()
distribution(b,~resid.ds) <- binomial.lvm()
distribution(b,~ecog.ps) <- binomial.lvm()
lava::regression(b,time.death~age+rx+resid.ds) <- 1
b<-eventTime(b,futime~min(time.cens=0,time.death=1), "fustat")
b_synt <- synthesize(object = b, data = ovarian)
D <- simsynth(b_synt,1000)
fit_real <- coxph(Surv(futime,fustat)~age+rx+resid.ds, data=ovarian)
fit_sim <- coxph(Surv(futime,fustat)~age+rx+resid.ds, data=D)
cbind(coef(fit_sim),coef(fit_real))
w_synt <- synthesize(object=Surv(futime,fustat)~age+rx+resid.ds, data=ovarian)
D <- simsynth(w_synt,1000)
fit_sim <- coxph(Surv(futime,fustat==1)~age+rx+resid.ds,data=D)
fit_real <- coxph(Surv(futime,fustat==1)~age+rx+resid.ds,data=ovarian)
# compare estimated log-hazard ratios between simulated and real data
cbind(coef(fit_sim),coef(fit_real))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.