R/LongCART_08May2022.R
In LongCART: Recursive Partitioning for Longitudinal Data and Right Censored Data Using Baseline Covariates

LongCART<- function (data, patid, fixed, gvars, tgvars, minsplit = 40, minbucket = 20, 
    alpha = 0.05, coef.digits = 2, print.lme = FALSE) 
{
    node.labels<- function(x){   #the x should be Treeout
        Terminal<- NULL
	tree.nt<- subset(x, !Terminal)
	tree.t<- subset(x, Terminal)
	n.group<- nrow(tree.t)
	id.nt<- tree.nt$ID

	node.labels<- data.frame(node=numeric(), label=character())
	for(k in 1:n.group){
		node.t<- tree.t$ID[k]
		#cat(node.t, "\n")
		text=NULL
		while(node.t>1){
			if((node.t/2)>floor(node.t/2)){
				text<- paste0(tree.nt$var[id.nt==floor(node.t/2)], ">=", tree.nt$index[id.nt==floor(node.t/2)], 
                          ifelse(!is.null(text), paste0(" & ", text), ""))
			} else if((node.t/2)==floor(node.t/2)){
				text<- paste0(tree.nt$var[id.nt==node.t/2], "<", tree.nt$index[id.nt==node.t/2],  
                          ifelse(!is.null(text), paste0(" & ", text), ""))
			} 
		#cat(text, "\n")
		node.t=floor(node.t/2)
		} #--- while loop ends
	node.labels[nrow(node.labels) + 1,]<- c(tree.t$ID[k], text)
	} #--- for loop ends
	node.labels
    }

    llike <- function(data, fixed) {
        if (exists("txx.out")) 
            rm(txx.out)
        loglik <- NA
        try(txx.out <- lme(fixed, data = data, method = "ML", 
            random = ~1 | patid, na.action = na.omit), silent = TRUE)
        if (exists("txx.out")) 
            loglik <- logLik(txx.out)[1]
        loglik
    }
    test4split <- function(cutoff, data, fixed, splitvar) {
        LRTS <- NA
        message(cutoff, appendLF = FALSE)
        left.data <- data[data[[splitvar]] < cutoff, ]
        right.data <- data[data[[splitvar]] >= cutoff, ]
        rn.ll <- llike(data = data, fixed = fixed)
        left.ll <- llike(data = left.data, fixed = fixed)
        right.ll <- llike(data = right.data, fixed = fixed)
        LRTS <- 2 * (left.ll + right.ll - rn.ll)
        message(" ", appendLF = FALSE)
        LRTS
    }
    single.group <- function(data, fixed, splitvar, minbucket) {
        xcut <- NA
        improve <- NA
        Y.name <- as.character(attr(as.Formula(fixed), "lhs"))
        data <- data[!is.na(data[[Y.name]]), ]
        data <- data[!is.na(data[[splitvar]]), ]
        temp <- unique(data[, c("patid", splitvar)])
        N <- tapply(temp$patid, temp[[splitvar]], length)
        temp1 <- as.data.frame(list(cuts = as.numeric(names(N)), 
            N = N))
        temp1$left <- cumsum(temp1$N) - temp1$N
        temp1$right <- sum(temp1$N) - temp1$left
        temp2 <- temp1[temp1$left >= minbucket & temp1$right >= 
            minbucket, ]
        vals <- temp2$cuts
        message("Evaluations of cutoffs for maximum improvements (Maximum cutoff value = ", 
            max(vals), ")\n", appendLF = FALSE)
        LRT.vec <- lapply(vals, FUN = test4split, data = data, 
            fixed = fixed, splitvar = splitvar)
        message(".\n", appendLF = FALSE)
        LRT.vec <- unlist(LRT.vec)
        if (any(!is.na(LRT.vec))) {
            index <- which.max(LRT.vec)
            xcut = vals[index]
            improve = LRT.vec[index]
        }
        ret <- list(xcut = xcut, improve = improve)
        ret
    }
    bestsplit <- function(data, fixed, gvars, tgvars, node.name, 
        minbucket, alpha) {
        ngvars <- length(gvars)
        best.gvar <- NA
        best.cutoff <- NA
        improve <- NA
        min.pval.adj <- NA
        stab.pval <- numeric(length = ngvars)
        for (v in 1:ngvars) {
            stab.pval[v] <- NA
            splitvar <- gvars[v]
            message("\nSplitting variable: ", splitvar, 
                "\n", appendLF = FALSE)
            data1 <- data[!is.na(splitvar), ]
            G <- length(unique(data1[[splitvar]]))
            message("G=", G, "\n", appendLF = FALSE)
            if (G > 1) {
                if (tgvars[v] == 0) 
                  stab.pval[v] <- StabCat(data = data1, patid = "patid", 
                    fixed = fixed, splitvar = splitvar)$pval
                if (tgvars[v] == 1) 
                  stab.pval[v] <- StabCont(data = data1, patid = "patid", 
                    fixed = fixed, splitvar = splitvar)$pval
            }
            else message("Instability test was NOT performed. \n", 
                appendLF = FALSE)
        }
        message("\n stab.pval=", stab.pval, "\n", 
            appendLF = FALSE)
        if (any(!is.na(stab.pval))) {
            stab.pval.adj <- p.adjust(stab.pval, method = "hochberg")
            sel.v <- which.min(stab.pval)
            min.pval.adj <- stab.pval.adj[sel.v]
            best.gvar <- gvars[sel.v]
            message("\n stab.pval.adj=", stab.pval.adj, 
                "\n", appendLF = FALSE)
            message("\n alpha=", alpha, "\n", appendLF = FALSE)
            if (min.pval.adj < alpha) {
                bestcut <- single.group(data, fixed, best.gvar, 
                  minbucket)
                best.cutoff <- bestcut$xcut
                improve <- bestcut$improve
            }
        }
        return(list(node = node.name, gvar = best.gvar, cutoff = best.cutoff, 
            improve = improve, pval = min.pval.adj))
    }
    coeff.Estimate <- function(LMEobj, coef.digits = 2) {
        ct <- LMEobj$tTable
        p <- nrow(ct)
        ct.names <- row.names(ct)
        yval <- " "
        for (ct.i in 1:p) yval <- paste0(yval, "+", round(ct[ct.i, 
            1], coef.digits), "*", ct.names[ct.i])
        yval <- gsub(pattern = "(Intercept)", replacement = "", 
            x = yval, fixed = TRUE)
        yval <- gsub(pattern = "+-", replacement = "-", 
            yval, fixed = TRUE)
        yval <- gsub(pattern = " +", replacement = "", 
            yval, fixed = TRUE)
        yval <- gsub(pattern = " ", replacement = "", 
            yval, fixed = TRUE)
        yval <- gsub(pattern = "*+", replacement = "+", 
            yval, fixed = TRUE)
        yval <- gsub(pattern = "*-", replacement = "-", 
            yval, fixed = TRUE)
        yval
    }
    rsplit <- function(data, fixed, gvars, tgvars, id, split, 
        alpha, minsplit, minbucket, Rate, loglik, env = parent.frame(), 
        coef.digits = 2) {
        s.var <- unlist(split[2])
        s.cut <- unlist(split[3])
        s.improve <- unlist(split[4])
        s.pval <- unlist(split[5])
        N <- length(unique(data$patid))
        if (id == 1) {
            env$Treeout <- data.frame(id = id, N = N, yval = Rate, 
                splitvar = s.var, cutoff = s.cut, pstab = s.pval, 
                loglik = loglik, improve = s.improve, stringsAsFactors = FALSE)
        }
        else {
            env$Treeout <- rbind(env$Treeout, c(id, N, Rate, 
                s.var, s.cut, s.pval, loglik, s.improve))
        }
        if (!is.na(s.var) && !is.na(s.cut)) {
            data <- data[!is.na(data[[s.var]]), ]
            id_l = id * 2
            message("---------------------------------------- \n", 
                appendLF = FALSE)
            message("NODE ", id_l, "- Rule:", s.var, 
                " <", s.cut, "\n", appendLF = FALSE)
            message("---------------------------------------- \n", 
                appendLF = FALSE)
            data_l <- subset(data, data[[s.var]] < s.cut)
            left.subj <- length(unique(data_l$patid))
            message("No. of individual in left node: ", 
                left.subj, " \n", appendLF = FALSE)
            tjj <- NULL
            tjj <- summary(lme(fixed, data = data_l, method = "REML", 
                random = ~1 | patid))
            if (print.lme) 
                cat("NODE ", id_l, "- Rule:", s.var, 
                  " <", s.cut, "\n")
            if (print.lme) 
                print(tjj$tTable)
            Rate <- coeff.Estimate(tjj, coef.digits = coef.digits)
            loglik <- logLik(lme(fixed, data = data_l, method = "REML", 
                random = ~1 | patid))
            if (left.subj >= minsplit) {
                message("\nDECISION: Go to the next level \n", 
                  appendLF = FALSE)
                split_l <- bestsplit(data = data_l, fixed = fixed, 
                  gvars = gvars, tgvars = tgvars, node.name = id_l, 
                  minbucket = minbucket, alpha = alpha)
                rsplit(data = data_l, fixed = fixed, gvars = gvars, 
                  tgvars = tgvars, id = id_l, split = split_l, 
                  alpha = alpha, minsplit = minsplit, minbucket = minbucket, 
                  Rate = Rate, loglik = loglik, env = env, coef.digits = coef.digits)
            }
            else {
                env$Treeout <- rbind(env$Treeout, c(id_l, left.subj, 
                  Rate, NA, NA, NA, loglik, NA))
            }
            id_r = id * 2 + 1
            message("---------------------------------------- \n", 
                appendLF = FALSE)
            message("NODE ", id_r, "- Rule:", s.var, 
                " >=", s.cut, "\n", appendLF = FALSE)
            message("---------------------------------------- \n", 
                appendLF = FALSE)
            data_r <- subset(data, data[[s.var]] >= s.cut)
            right.subj <- length(unique(data_r$patid))
            message("No. of individual in right node: ", 
                right.subj, " \n", appendLF = FALSE)
            tjj <- NULL
            tjj <- summary(lme(fixed, data = data_r, method = "REML", 
                random = ~1 | patid))
            if (print.lme) 
                cat("NODE ", id_r, "- Rule:", s.var, 
                  " >=", s.cut, "\n")
            if (print.lme) 
                print(tjj$tTable)
            Rate <- coeff.Estimate(tjj, coef.digits = coef.digits)
            loglik <- logLik(lme(fixed, data = data_r, method = "REML", 
                random = ~1 | patid))
            if (right.subj >= minsplit) {
                message("\nDECISION: Go to the next level \n", 
                  appendLF = FALSE)
                split_r <- bestsplit(data = data_r, fixed = fixed, 
                  gvars = gvars, tgvars = tgvars, node.name = id_r, 
                  minbucket = minbucket, alpha = alpha)
                rsplit(data = data_r, fixed = fixed, gvars = gvars, 
                  tgvars = tgvars, id = id_r, split = split_r, 
                  alpha = alpha, minsplit = minsplit, minbucket = minbucket, 
                  Rate = Rate, loglik = loglik, env = env, coef.digits = coef.digits)
            }
            else {
                env$Treeout <- rbind(env$Treeout, c(id_r, right.subj, 
                  Rate, NA, NA, NA, loglik, NA))
            }
        }
        else message("\nDECISION: NO more splitting required \n", 
            appendLF = FALSE)
    }
    if (!exists(as.character(substitute(data)))) 
        stop("Dataset does not exist\n")
    if (!is.data.frame(data)) 
        stop("Dataset does not exist\n")
    if (!patid %in% colnames(data)) 
        stop("The column ", patid, " containing subjects id is missing in dataset.\n")
    data$patid <- data[[patid]]
    data <- data[!is.na(data[["patid"]]), ]
    Y.name <- as.character(attr(as.Formula(fixed), "lhs"))
    if (!Y.name %in% colnames(data)) 
        stop("The column ", Y.name, " containing subjects id is missing in dataset.\n")
    data <- data[!is.na(data[[Y.name]]), ]
    if (length(gvars) != length(tgvars)) 
        stop("gvars and tgvars are not of equal length. \n")
    if (any(is.na(tgvars))) 
        stop("tgvars cannot have NA value. \n")
    for (var.i in 1:length(gvars)) {
        if (!gvars[var.i] %in% colnames(data)) 
            stop("The column ", gvars[var.i], " is missing in dataset.\n")
    }
    for (var.i in 1:length(gvars)) {
        temp <- as.data.frame(list(patid = cbind(data["patid"], 
            gvar = data[gvars[var.i]])))
        colnames(temp) = c("patid", "gvar")
        temp <- unique(temp)
        temp <- temp[!is.na("gvar"), ]
        n.gvar.pat <- tapply(temp$gvar, temp$patid, function(x) length(unique(x)))
        if (any(n.gvar.pat != 1)) 
            stop("One subject is associated with more than one value of ", 
                gvars[var.i], ".\n")
        rm(temp, n.gvar.pat)
    }
    LongCART.env <- new.env()
    message("------------------------------------------ \n", 
        appendLF = FALSE)
    message("           ROOT NODE: NODE 1               \n", 
        appendLF = FALSE)
    message("------------------------------------------ \n", 
        appendLF = FALSE)
    tjj <- NULL
    tjj <- summary(lme(fixed, data = data, method = "REML", 
        random = ~1 | patid))
    if (print.lme) 
        cat("ROOT NODE: NODE 1\n")
    if (print.lme) 
        print(tjj$tTable)
    Rate <- coeff.Estimate(tjj, coef.digits = coef.digits)
    loglik <- logLik(lme(fixed, data = data, method = "REML", 
        random = ~1 | patid))
    split <- bestsplit(data = data, fixed = fixed, gvars = gvars, 
        tgvars = tgvars, node.name = 1, minbucket = minbucket, 
        alpha = alpha)
    rsplit(data = data, fixed = fixed, gvars = gvars, tgvars = tgvars, 
        id = 1, split = split, alpha = alpha, minsplit = minsplit, 
        minbucket = minbucket, Rate = Rate, loglik = loglik, 
        env = LongCART.env, coef.digits = coef.digits)
    Treeout <- LongCART.env$Treeout
    colnames(Treeout) <- c("ID", "n", "yval", 
        "var", "index", "p (Instability)", 
        "loglik", "improve")
    row.names(Treeout) <- NULL
    Treeout[, 1] <- as.numeric(Treeout[, 1])
    Treeout[, 2] <- as.numeric(Treeout[, 2])
    Treeout[, 5] <- round(as.numeric(Treeout[, 5]), digits = 2)
    Treeout[, 6] <- round(as.numeric(Treeout[, 6]), digits = 3)
    Treeout[, 7] <- round(as.numeric(Treeout[, 7]), digits = 0)
    Treeout[, 8] <- round(as.numeric(Treeout[, 8]), digits = 0)
    Treeout$Terminal <- ifelse(is.na(Treeout[, 5]), TRUE, FALSE)
    print(Treeout)
    form <- Formula(fixed)
    Y.name <- as.character(attr(form, "lhs"))
    X.name <- as.character(attr(form, "rhs"))
    X.name <- gsub(pattern = " ", replacement = "", 
        x = X.name)
    X.name <- unlist(strsplit(X.name, "+", fixed = TRUE))
    p <- length(X.name) + 1
    AIC.tree <- 2 * sum(Treeout[, 7] * Treeout[, 9]) - 2 * (p + 
        2) * sum(Treeout[, 9])
    AIC.root <- 2 * Treeout[1, 7] - 2 * (p + 2)
    improve.AIC <- AIC.tree - AIC.root
    logLik.tree <- sum(Treeout[, 7] * Treeout[, 9])
    logLik.root <- Treeout[1, 7]
    Deviance <- 2 * (logLik.tree - logLik.root)
    LRT.df <- p * sum(Treeout[, 9]) - p
    LRT.p <- 1 - pchisq(Deviance, LRT.df)
    message("AIC(tree)=", AIC.tree, "   AIC(Root)=", 
        AIC.root, "\n", appendLF = FALSE)
    message("logLikelihood (tree)=", logLik.tree, "   logLikelihood (Root)=", 
        logLik.root, "\n", appendLF = FALSE)
    message("Deviance=", Deviance, " (df=", LRT.df, 
        ", p-val=", LRT.p, ") \n", appendLF = FALSE)
    sel <- as.numeric(rownames(Treeout[Treeout$Terminal, ]))
    Treeout1 <- as.data.frame(Treeout[, c("ID", "var", 
        "n", "yval", "loglik", "Terminal", 
        "index", "improve")])
    row.names(Treeout1) <- Treeout1$ID
    Treeout1$var = ifelse(Treeout1$Terminal, "<leaf>", 
        Treeout1$var)
    Treeout1$dev = -Treeout1$loglik
    Treeout1$wt <- Treeout1$count <- Treeout1$n
    Treeout1$ncompete <- Treeout1$nsurrogate <- 0
    Treeout1$ncat = -1
    frame <- Treeout1[c("var", "n", "wt", "dev", 
        "yval", "ncompete", "nsurrogate")]
    splits <- Treeout1[!Treeout1$Terminal, ]
    splits <- splits[c("count", "ncat", "improve", 
        "index")]
    splits <- as.matrix(splits)
    temp <- frame[frame$var == "<leaf>", ]
    cptable <- 0:(dim(temp)[1] - 1)
    functions <- NULL
    functions$text <- function(yval, dev, wt, ylevel, digits, 
        n, use.n) {
        if (use.n) 
            paste0(yval, "\nn=", n)
        else yval
    }

    nodelab<- node.labels(Treeout)

    varnam<- unique(frame$var)
    varnam<- varnam[varnam!="<leaf>" ]

    ret <- list(Treeout, frame, splits, cptable, functions, p, 
        AIC.tree, AIC.root, improve.AIC, logLik.tree, logLik.root, 
        Deviance, LRT.df, LRT.p, data, patid, fixed,
        nodelab, varnam)
    names(ret) <- c("Treeout", "frame", "splits", 
        "cptable", "functions", "p", "AIC.tree", 
        "AIC.root", "improve.AIC", "logLik.tree", 
        "logLik.root", "Deviance", "LRT.df", 
        "LRT.pval", "data", "patid", "fixed",
        "nodelab", "varnam")
    class(ret) <- "LongCART"
    ret
}
Any scripts or data that you put into this service are public.
LongCART documentation built on May 18, 2022, 1:06 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
LongCART
Recursive Partitioning for Longitudinal Data and Right Censored Data Using Baseline Covariates

R/LongCART_08May2022.R
In LongCART: Recursive Partitioning for Longitudinal Data and Right Censored Data Using Baseline Covariates

Try the LongCART package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

LongCART Recursive Partitioning for Longitudinal Data and Right Censored Data Using Baseline Covariates

R/LongCART_08May2022.R In LongCART: Recursive Partitioning for Longitudinal Data and Right Censored Data Using Baseline Covariates

Try the LongCART package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

LongCART
Recursive Partitioning for Longitudinal Data and Right Censored Data Using Baseline Covariates

R/LongCART_08May2022.R
In LongCART: Recursive Partitioning for Longitudinal Data and Right Censored Data Using Baseline Covariates