R/idem_imputation.R

Defines functions print.IDEMIMP plot.IDEMIMP imImpAll print.IDEMSINGLE plot.IDEMSINGLE imImpSingle plot.IDEMFIT print.IDEMFIT imFitModel

Documented in imFitModel imImpAll imImpSingle plot.IDEMFIT plot.IDEMIMP plot.IDEMSINGLE print.IDEMFIT print.IDEMIMP print.IDEMSINGLE

#' Imputation model fitting
#'
#' Fit linear imputation models to the observed data from complete survivors for
#' each treatment arm at each time point
#'
#' @param im.data A class \code{IDEMDATA} object generated by \code{\link{imData}}
#'
#' @return A class \code{IDEMFIT} list of modeling fitting results with the following items
#' \describe{
#' \item{im.data}{Original class \code{IDEMDATA} object}
#' \item{rst.mdl}{A list of modeling fitting results for each model with
#'     \describe{
#'       \item{lm}{results from function \code{lm}}
#'       \item{formula}{model formula}
#'       \item{coef}{model coefficients}
#'       \item{res}{residuals}
#'       \item{h}{bandwidth of residuals for kernel density estimation}}
#' }}
#'
#' @seealso \code{\link{imData}}, \code{\link{idem-package}}
#'
#' @examples
#' im.abc <- imData(abc, trt="TRT", surv="SURV", outcome=c("Y1","Y2"),
#'                  y0=NULL, endfml="Y2",
#'                  trt.label = c("UC+SBT", "SAT+SBT"),
#'                  cov=c("AGE"), duration=365, bounds=c(0,100));
#' im.fit <- imFitModel(im.abc);
#'
#' @export
#'
#'
imFitModel <- function(im.data) {

    stopifnot(get.const("IDEM.CLASS") %in% class(im.data));

    data.all <- im.data$data;
    lst.var  <- im.data$lst.var;
    vtrt     <- NULL;
    voutcome <- NULL;
    eoutcome <- NULL;
    vsurv    <- NULL;
    duration <- NULL;
    endfml   <- NULL;
    tmp.endp <- NULL;
    vy0      <- NULL;
    bounds   <- NULL;

    ##get parameters in current enviroment
    get.para(lst.var, environment());

    ##transfer data
    data.all <- get.transfer.all(data.all, lst.var);

    ##completers
    is.comp  <- apply(data.all[, voutcome, drop = FALSE],
                      1,
                      function(x){all(!is.na(x))});

    ##treatment arms
    a.trt <- get.trt(data.all[,vtrt]);
    rst   <- list(NULL);
    for (i in 1:length(a.trt)) {
        cur.data  <- data.all[which(a.trt[i] == data.all[,vtrt] & is.comp),];
        cur.rst   <- list(NULL);
        cur.names <- NULL;
        for (j in 1:length(voutcome)) {
            if (1 == j) {
                prev.y <- NULL;
            } else {
                ## prev.y <- voutcome[1:(j-1)];
                prev.y <- voutcome[j-1]
            }
            cur.f    <- paste(voutcome[j], "~", paste(c(prev.y, vy0, vcov), collapse="+"));
            cur.lm   <- lm(as.formula(cur.f), data=cur.data);
            cur.coef <- get.coef(cur.lm);
            cur.res  <- residuals(cur.lm);
            cur.h    <- get.band.h(cur.res);
            cur.rst[[j]] <- list(lm=cur.lm,
                                 formula=cur.f,
                                 coef=cur.coef,
                                 res=cur.res,
                                 h=cur.h);
        }
        rst[[i]] <- cur.rst;
    }
    names(rst) <- a.trt;

    rtn.rst <- list(im.data = im.data,
                    rst.mdl = rst);

    class(rtn.rst) <- get.const("FIT.CLASS");;
    invisible(rtn.rst);
}

#' Print model fitting results
#'
#' Print method of the class \code{IDEMFIT} generated by
#' \code{\link{imFitModel}}
#' @inheritParams print.IDEMDATA
#' @param x A class \code{IDEMFIT} object generated by \code{\link{imFitModel}}
#'
#' @details
#'
#' Print the results from \code{lm} for all the models
#'
#' @seealso \code{\link{imFitModel}}
#' @export
#'
print.IDEMFIT <- function(x, ...) {

    lst.var <- x$im.data$lst.var;
    rst.mdl <- x$rst.mdl;

    cat("-------------Model fitting results:--------------- \n\n");
    for (i in 1:length(rst.mdl)) {
        cat("-- Treatment ");
        if (is.null(lst.var$trt.label)) {
            cat(i-1, "\n");
        } else {
            cat(lst.var$trt.label[i], "\n");
        }

        for (j in 1:length(rst.mdl[[i]])) {
            cat("----", rst.mdl[[i]][[j]]$formula, "\n");
            print(summary(rst.mdl[[i]][[j]]$lm));
        }
    }
}

#' Plot model fitting results
#'
#' Plot method of the class \code{IDEMFIT} to generate model fitting diagnosis
#' plots
#'
#' @inheritParams print.IDEMFIT
#'
#' @param trt Treatment arm selected for the diagnostic plots. If \code{NULL},
#'     all treatment arms are included
#' @param mfrow Plot option
#' @seealso \code{\link{imFitModel}}
#'
#' @examples
#' im.abc <- imData(abc, trt="TRT", surv="SURV", outcome=c("Y1","Y2"),
#'                  y0=NULL, endfml="Y2",
#'                  trt.label = c("UC+SBT", "SAT+SBT"),
#'                  cov=c("AGE"), duration=365, bounds=c(0,100));
#' im.fit <- imFitModel(im.abc);
#' plot(im.fit, mfrow=c(2,4));
#'
#' @method plot IDEMFIT
#'
#' @export
#'
plot.IDEMFIT <- function(x, trt = NULL, mfrow = NULL, ...) {
    n.outcome <- length(x$im.data$lst.var$outcome);
    trt       <- set.option(trt, 1:length(x$rst.mdl) - 1)

    if (is.null(mfrow))
        mfrow <- c(length(trt)*n.outcome,2);

    par(mfrow = mfrow);
    for (i in trt) {
        cur.t <- x$im.data$lst.var$trt.label[i+1];
        if (is.null(cur.t) | is.na(cur.t)) {
            cur.t <- paste("Trt ", i, sep = "");
        }

        for (j in 1:n.outcome) {
            plot(x$rst.mdl[[i+1]][[j]]$lm, which=c(1:2),
                 main=paste(cur.t, x$im.data$lst.var$outcome[j], sep = ":"));
        }
    }
}

#' Impute missing data for MCMC convergence checking
#'
#' Call STAN model to impute missing data for an individual subject under
#' benchmark assumption for MCMC convergence checking
#'
#' @inheritParams imFitModel
#' @inheritParams imImpAll
#'
#' @param dsub original individual subject data
#' @param chains STAN parameter. Number of Markov chainsm
#' @param iter STAN parameter. Number of iterations
#' @param warmup STAN parameter. Number of burnin.
#' @param control STAN parameter. See \code{rstan::stan} for details.
#' @param ... other options to call STAN sampling such as \code{thin},
#'     \code{algorithm}. See \code{rstan::sampling} for details.
#' @param seed Random seed
#' @return
#'
#' \code{NULL} if there is no missing data in \code{dsub}
#'
#' Otherwise, return a class \code{IDEMSINGLE} object that contains a list with
#' components \describe{ \item{dsub}{original data of the subject}
#' \item{rst.stan}{A \code{stan.fit} class result returned from
#' \code{rstan::sampling}} \item{complete}{A dataframe with complete data for
#' the selected subject} }
#'
#' @examples
#' im.abc <- imData(abc, trt="TRT", surv="SURV", outcome=c("Y1","Y2"),
#'                  y0=NULL, endfml="Y2",
#'                  trt.label = c("UC+SBT", "SAT+SBT"),
#'                  cov=c("AGE"), duration=365, bounds=c(0,100));
#' im.fit <- imFitModel(im.abc);
#' im.imp <- imImpSingle(abc[1,], im.fit, chains = 4, iter = 200, warmup = 100);
#'
#' @export
#'
imImpSingle <- function(dsub, fit.rst, normal=TRUE,
                        chains = 4, iter = 5000, warmup = 1000,
                        control = list(adapt_delta=0.95),
                        ..., seed = NULL) {

    stopifnot(class(fit.rst) == get.const("FIT.CLASS"));
    stopifnot(1 == nrow(dsub));

    if (is.numeric(seed)) {
        old.seed <- .Random.seed;
        set.seed(seed);
    }

    lst.var <- fit.rst$im.data$lst.var;
    fit.all <- fit.rst$rst.mdl;

    vtrt     <- NULL;
    voutcome <- NULL;
    eoutcome <- NULL;
    vsurv    <- NULL;
    duration <- NULL;
    endfml   <- NULL;
    tmp.endp <- NULL;
    vy0      <- NULL;
    bounds   <- NULL;

    ##get parameters in current enviroment
    get.para(lst.var, environment());

    ## transfer data
    csub <- as.matrix(dsub);
    for (i in 1:length(voutcome)) {
        csub[, voutcome[i]] <- get.transfer(csub[, voutcome[i]], bounds);
    }

    outcome  <- csub[1, voutcome];
    y0       <- csub[1, vy0];
    trt      <- csub[1, vtrt];
    vx       <- csub[1, c(vy0, vcov)];

    if (any(is.na(vx)))
        return(NULL);

    fit.trt  <- fit.all[[as.character(trt)]];

    ##missing voutcome that needs imputation
    INX.MIS <- which(is.na(outcome));
    INX.OBS <- which(!is.na(outcome));
    if (0 == length(INX.MIS)) {
        return(NULL);
    }

    ##stan data
    IMIS         <- as.numeric(is.na(outcome));
    INX          <- rep(NA, length(outcome));
    INX[INX.MIS] <- 1:length(INX.MIS);
    INX[INX.OBS] <- 1:length(INX.OBS);

    YOBS <- 999999; ##dummy
    if (0 < length(INX.OBS)) {
        YOBS <- c(outcome[INX.OBS], YOBS);
    }

    COEF         <- NULL;
    RESIDUAL     <- NULL;
    H            <- NULL;
    for (i in 1:length(fit.trt)) {
        RESIDUAL <- cbind(RESIDUAL, fit.trt[[i]]$res);
        H        <- c(H, fit.trt[[i]]$h);
        cur.coef <- fit.trt[[i]]$coef;
        if (1 == i) {
            cur.coef <- c(cur.coef[1:2], 0, cur.coef[-(1:2)]);
        }
        COEF <- rbind(COEF, cur.coef);
    }


    list.stan <- list(NY   = length(outcome),
                      NOBS = length(INX.OBS),
                      YOBS = as.array(YOBS),
                      NX   = length(vx),
                      X    = as.array(vx),
                      IMIS = IMIS,
                      INX  = INX,
                      COEF = COEF,
                      ASSUMENORMAL = as.numeric(normal),
                      RESIDUAL     = RESIDUAL,
                      NRES         = nrow(RESIDUAL),
                      H            = H);

    ##stan sampling
    stan.rst <- sampling(stanmodels[["idem"]],
                         data=list.stan,
                         chains = chains,
                         iter = iter,
                         warmup = warmup,
                         control = control,
                         ...);

    ##get samples
    ymis <- rstan::extract(stan.rst, "YMIS")$YMIS;
    ymis <- get.inv.transfer(ymis, bounds);

    ycomplete <- dsub[rep(1,nrow(ymis)),];
    for(j in 1:ncol(ymis)) {
        ycomplete[,voutcome[INX.MIS[j]]] <- ymis[,j];
    }

    ##compute endpoint
    eval(parse(text=paste("tmp.endp <- with(ycomplete, {", endfml,"})")));
    ycomplete[[get.const("TXT.ENDP")]] <- tmp.endp;

    ##reset random seed
    if (is.numeric(seed)) {
        .Random.seed <- old.seed;
    }

    ##return
    rst <- list(dsub      = dsub,
                rst.stan  = stan.rst,
                complete  = ycomplete);

    class(rst) <- get.const("BENCH.CLASS");
    rst;
}

#' Plot MCMC mixing results
#'
#' Plot method of the class \code{IDEMSINGLE} to generate traceplot of the imputed missing
#' outcomes
#'
#' @inheritParams print.IDEMDATA
#'
#' @param x A class \code{IDEMSINGLE} object returned from \code{\link{imImpSingle}}
#'
#' @seealso \code{\link{imImpSingle}}
#'
#' @examples
#' im.abc <- imData(abc, trt="TRT", surv="SURV", outcome=c("Y1","Y2"),
#'                  y0=NULL, endfml="Y2",
#'                  trt.label = c("UC+SBT", "SAT+SBT"),
#'                  cov=c("AGE"), duration=365, bounds=c(0,100));
#' im.fit <- imFitModel(im.abc);
#' im.imp.single <- imImpSingle(abc[1,], im.fit,
#'                              chains = 4, iter = 200, warmup = 100);
#' plot(im.imp.single);
#'
#' @method plot IDEMSINGLE
#' @export
#'
plot.IDEMSINGLE <- function(x, ...) {
    rstan::traceplot(x$rst.stan, "YMIS");
}

#' Print MCMC mixing checking result
#'
#' Print method for class \code{IDEMSINGLE} objects generated
#' by \code{\link{imImpSingle}}
#'
#' @inheritParams plot.IDEMSINGLE
#'
#' @seealso \code{\link{imImpSingle}}
#'
#'
#' @export
#'
print.IDEMSINGLE <- function(x, ...) {
    cat("This checks the mixing of the MCMC sampling ");
    cat("for the following subject:\n");
    print(x$dsub);
    cat("\nCall plot function to generate the trace plot of the MCMC samples. \n");
}


#' Impute missing data
#'
#' Conduct imputation under benchmark assumptions or for sensitivity analysis
#' for a given set of subjects using the model fitting results
#'
#' @inheritParams summary.IDEMDATA
#'
#' @param fit.rst A class \code{IDEMFIT} results generated by
#'     \code{\link{imFitModel}}.
#' @param data.all A dataframe containing subjects with missing data. The
#'     default value is NULL, in which case the function will impute missing
#'     data for subjects in the original dataset in the class \code{IDEMFIT}
#'     object \code{fit.rst}
#' @param normal Logical variable indicating whether normality assumption should
#'     be made for the residuals
#' @param n.imp Number of complete datasets required
#' @param deltas Vector of imputation sensitivity parameters
#' @param update.progress Parameter reserved for run \code{idem} in GUI mode
#' @param imputeNone If \code{TRUE}, no imputation will be conducted. The data
#'     from subjects that do not need imputation will be returned
#' @param ... options to call STAN sampling. These options include
#'     \code{chains}, \code{iter}, \code{warmup}, \code{thin}, \code{algorithm}.
#'     See \code{rstan::sampling} for details.
#' @param seed Random seed
#'
#' @return
#'
#' If \code{imputeNone} is TRUE, return a dataset with the original data for the
#' subset of subjects who died at the end of the study or had no missing outcomes.
#'
#' Otherwise, return a class \code{IDEMIMP} list with components
#' \describe{
#' \item{lst.var}{List of parameters}
#' \item{complete}{A dataset with  the original data for
#' the subset of subjects who died at the end of the study or had no missing
#' outcomes and the \code{n.imp} imputed missing outcomes for subjects who need
#' missing value imputation.
#' }
#' \item{n.imp}{Number of imputed complete datasets}
#' \item{deltas}{Imputation sensitivity parameters}
#' \item{org.data}{Original dataset}
#' \item{normal}{Normal assumption for the imputation}
#' \item{stan.par}{STAN options}
#' }
#'
#' @examples
#'
#' \dontrun{
#' rst.abc <- imData(abc, trt="TRT", surv="SURV", outcome=c("Y1","Y2"),
#'                  y0=NULL, endfml="Y2",
#'                  trt.label = c("UC+SBT", "SAT+SBT"),
#'                  cov=c("AGE"), duration=365, bounds=c(0,100));
#' rst.fit  <- imFitModel(rst.abc);
#' rst.imp <- imImpAll(rst.fit, deltas=c(-0.25,0,0.25),
#'                     normal=TRUE, chains = 2, iter = 2000, warmup = 1000);}
#'
#' @export
#'
imImpAll <- function(fit.rst,
                     data.all = NULL,
                     deltas=0,
                     normal=TRUE,
                     n.imp=5,
                     endponly=TRUE,
                     update.progress=NULL,
                     imputeNone=FALSE,
                     ...,
                     seed = NULL) {

    f.addcols <- function(dset) {
        cbind('ID'=1:nrow(dset),
              'DELTA'=0,
              'IMP'=NA,
              dset);
    }

    stopifnot(class(fit.rst) == get.const("FIT.CLASS"));

    if (is.numeric(seed)) {
        old.seed <- .Random.seed;
        set.seed(seed);
    }

    if (!is.null(data.all)) {
        cur.data <- imData(data.all, fit.rst$im.data$lst.var);
        if (!get.const("IDEM.CLASS") %in% class(cur.data)) {
            print(cur.data);
            return(NULL);
        }
        ##subjects that need imputation
        need.imp <- summary(cur.data, opt = "missid", endponly=endponly);
    } else {
        data.all <- fit.rst$im.data$data;
        need.imp <- summary(fit.rst$im.data, opt = "missid", endponly=endponly);
    }

    lst.var <- fit.rst$im.data$lst.var;
    fit.all <- fit.rst$rst.mdl;

    if (is.null(deltas)) {
        deltas <- 0;
    } else {
        if (!(0 %in% deltas))
            deltas <- c(deltas, 0);
    }

    voutcome <- NULL;
    eoutcome <- NULL;
    vsurv    <- NULL;
    duration <- NULL;
    endfml   <- NULL;
    tmp.endp <- NULL;

    ##get parameters in current enviroment
    get.para(lst.var, environment());

    ##save org voutcome
    data.all[, paste(get.const("ORG.PREFIX"), voutcome, sep="")] <- data.all[, voutcome];

    ##get complete data. return if no imputation needed
    if (0 == length(need.imp) | imputeNone) {
        if (imputeNone & length(need.imp) > 0) {
            rst <- data.all[-need.imp,];
        } else {
            rst <- data.all;
        }

        eval(parse(text=paste("tmp.endp <- with(rst, {", endfml,"})")));
        rst[[get.const("TXT.ENDP")]] <- tmp.endp;
        rst <- f.addcols(rst);
        class(rst) <- c(class(rst), get.const("IMP.CLASS"));
        return(rst);
    } else if (nrow(data.all) == length(need.imp)) {
        data.comp <- NULL;
    } else {
        data.comp <- data.all[-need.imp,];
        data.comp <- f.addcols(data.comp);
        data.comp <- data.frame(data.comp);
        eval(parse(text=paste("tmp.endp <- with(data.comp, {", endfml,"})")));
        data.comp[[get.const("TXT.ENDP")]] <- tmp.endp;
    }

    ## start with all subjects with complete information
    rst    <- data.comp;
    n.need <- length(need.imp);
    for (i in 1:n.need) {
        cur.bench <- imImpSingle(data.all[need.imp[i],],
                                 fit.rst,
                                 normal=normal, ...);
        cur.rst <- imp.exponential(cur.bench, deltas=deltas, n.imp=n.imp)

        cur.rst <- cbind('ID' = nrow(data.comp) + i,
                         cur.rst)
        rst     <- rbind(rst, cur.rst);

        if ("PROGRESS" %in% toupper(class(update.progress))) {
            update.progress$set(value=i/n.need,
                                detail=paste(i, " out of ", n.need, sep=""));
        } else {
            cat(need.imp[i], ":", i, "out of", n.need, "\n");
        }
    }

    ##reset seed
    if (is.numeric(seed)) {
        .Random.seed <- old.seed;
    }

    ##return
    rownames(rst) <- NULL;
    rtn.rst <- list(lst.var  = lst.var,
                    deltas   = deltas,
                    normal   = normal,
                    org.data = data.all,
                    n.imp    = n.imp,
                    imp.par  = list(...),
                    use_mice = FALSE,
                    complete = rst);

    class(rtn.rst) <- c(class(rtn.rst),
                        get.const("IMP.CLASS"))

    invisible(rtn.rst);
}

#' Plot imputation results
#'
#' Generate different types of plots for class \code{IDEMIMP} objects generated
#' by \code{\link{imImpAll}}
#'
#'
#' @param x A class \code{IDEMIMP} object returned from \code{\link{imImpAll}}
#'
#' @param opt Types of the plot
#'    \itemize{
#'     \item{\code{imputed}: }{Plot density of imputed values and the density of the observed outcomes}
#'     \item{\code{composite}: }{Generate cumulative plot of the composite survival and functional outcome}
#' }
#'
#' @param fname File name of the result pdf file. If \code{fname} is null,
#'     result pdf file will not be generated
#'
#' @param ... Options for generating the plots.
#'
#'    \describe{
#'\item{type = imputed}{
#' \itemize{}
#' \itemize{
#' \item{\code{deltas}: }{Imputation sensitivity parameter for which to generate
#'     the results}
#'
#' \item{\code{endp}: }{If \code{TRUE}, plot the densities of the imputed
#'     functional outcomes. Otherwise, plot the densities of the imputed
#'     outcomes}
#'
#' \item{\code{adj}}{\code{density} estimation option}
#'
#' \item{\code{cols}}{\code{plot} option for colors}
#'
#' \item{\code{ltys}}{\code{plot} options for line types}
#'
#' \item{\code{xlim}}{\code{plot} options}
#'
#' \item{\code{ylim}}{\code{plot} options}
#'
#' \item{\code{mfrow}}{\code{plot} options}
#'
#' }}
#'
#' \item{type = composite}{
#'     \itemize{}
#'
#' \itemize{ \item{\code{at.surv}: }{Sets the range of the survival times to
#' plot in the cumulative distribution function. By default the range is the
#' range of survival values up to the duration of the study}
#'
#' \item{\code{at.z}: }{Sets the range of the functional outcome to plot in the
#'     cumulative distribution function. By defualt this is the range of the
#'     functional outcomes plus the buffer amount to improve visibility in the
#'     transition from survival to functional outcome}
#'
#' \item{\code{p.death}: }{Proportion
#'     of the plot width devoted to Survival. By default the cumulative
#'     distribution will devote horizontal space to the survival portion that is
#'     proportional to the number of subjects who die prior to duration}
#'
#' \item{\code{buffer}: }{Small horizontal gap used to better visually distinguish
#'     the transition from survival to functional outcome}
#'
#'
#' \item{\code{delta}: }{Imputation sensitivity parameter for which to generate the
#'     results}
#'
#' \item{\code{seg.lab}: }{Labels for the two components of the composite
#'     outcome}
#'
#' \item{\code{main}: }{\code{plot} options}
#'
#' }}
#' }
#'
#' @seealso \code{\link{imImpAll}}
#'
#' @examples
#' \dontrun{
#' im.abc <- imData(abc, trt="TRT", surv="SURV", outcome=c("Y1","Y2"),
#'                  y0=NULL, endfml="Y2",
#'                  trt.label = c("UC+SBT", "SAT+SBT"),
#'                  cov=c("AGE"), duration=365, bounds=c(0,100));
#' rst.fit  <- imFitModel(im.abc);
#' rst.imp <- imImpAll(rst.fit, deltas=c(-0.25,0,0.25),
#'                     normal=TRUE, chains = 2, iter = 2000, warmup = 1000);
#' plot(rst.imp, opt = "imputed"),
#' plot(rst.imp, opt = "composite")}
#' @method plot IDEMIMP
#'
#' @export
#'
#'
plot.IDEMIMP <- function(x, opt = c("imputed", "composite"), fname = NULL, ...) {
    opt <- match.arg(opt);
    switch(opt,
           imputed   = plot.imputed(x, fname=fname, ...),
           composite = plot.composite(x, fname=fname, ...))
}


#' Print imputation results
#'
#' Print method for class \code{IDEMIMP} objects generated
#' by \code{\link{imImpAll}}
#'
#' @inheritParams plot.IDEMIMP
#' @param ... Extra arguments
#'
#' @seealso \code{\link{imImpAll}}
#'
#'
#' @export
print.IDEMIMP <- function(x, ...) {
    cat("A total of", x$n.imp, "complete datasets were imputed. ")
    cat("Normality assumption \nwas ");
    if (!x$normal) {
        cat("NOT ");
    }
    cat("made for the imputation model residual distribution.\n\n");

    cat("The sensitivity parameters considered were\n");
    print(x$deltas);

    cat("\nThe last", x$n.imp, "records in the complete dataset \nare given below as an example: \n\n");
    print(tail(x$complete, n=x$n.imp));
}

Try the idem package in your browser

Any scripts or data that you put into this service are public.

idem documentation built on Aug. 9, 2023, 5:08 p.m.