Nothing
## * Documentation initialization functions called by BuyseTest
#' @title internal functions for BuyseTest - initialization
#' @name BuyseTest-initialization
#' @description Functions called by \code{\link{BuyseTest}} to initialize the arguments.
#' @noRd
#'
#' @details
#'
#' \code{initializeArgs}: Normalize the argument
#' \itemize{
#' \item scoring.rule, pool.strata, neutral.as.uninf, add.halfNeutral, keep.pairScore, n.resampling, seed, cpus, trace: set to default value when not specified.
#' \item formula: call \code{initializeFormula} to extract arguments.
#' \item type: convert to numeric.
#' \item status: only keep status relative to TTE endpoint. Set to \code{NULL} if no TTE endpoint.
#' \item threshold: set default threshold to 1e-12.
#' the rational being we consider a pair favorable if X>Y ie X>=Y+1e-12.
#' When using a threshold e.g. 5 we want X>=Y+5 and not X>Y+5, especially when the measurement is discrete. \cr
#' \item data: convert to data.table object.
#' \item scoring.rule: convert to numeric.
#' }
#'
#' \code{initializeFormula}: extract \code{treatment}, \code{type}, \code{endpoint}, \code{threshold}, \code{status}, \code{operator}, and \code{strata}
#' from the formula. \cr \cr
#'
#' \code{initializeData}: Divide the dataset into two, one relative to the treatment group and the other relative to the control group.
#' Merge the strata into one with the interaction variable.
#' Extract for each strata the index of the observations within each group.
#'
#' @author Brice Ozenne
## * initializeArgs
initializeArgs <- function(status,
correction.uninf = NULL,
cpus = NULL,
data,
endpoint,
formula,
hierarchical = NULL,
keep.pairScore = NULL,
method.inference = NULL,
scoring.rule = NULL,
pool.strata = NULL,
model.tte,
n.resampling = NULL,
strata.resampling = NULL,
call,
neutral.as.uninf = NULL,
add.halfNeutral = NULL,
operator = NULL,
censoring,
restriction,
option,
seed = NULL,
strata,
threshold,
trace = NULL,
treatment,
type,
weightEndpoint = NULL,
weightObs = NULL,
envir){
name.call <- names(call)
## ** apply default options
if(is.null(cpus)){ cpus <- option$cpus }
if(is.null(keep.pairScore)){ keep.pairScore <- option$keep.pairScore }
if(is.null(scoring.rule)){ scoring.rule <- option$scoring.rule }
if(is.null(hierarchical)){ hierarchical <- option$hierarchical }
if(is.null(correction.uninf)){ correction.uninf <- option$correction.uninf }
if(is.null(method.inference)){ method.inference <- option$method.inference }
if(is.null(n.resampling)){ n.resampling <- option$n.resampling }
if(is.null(strata.resampling)){ strata.resampling <- option$strata.resampling }
if(is.null(neutral.as.uninf)){ neutral.as.uninf <- option$neutral.as.uninf }
if(is.null(add.halfNeutral)){ add.halfNeutral <- option$add.halfNeutral }
if(is.null(trace)){ trace <- option$trace }
fitter.model.tte <- option$fitter.model.tte
engine <- option$engine
alternative <- option$alternative
precompute <- option$precompute
## ** convert formula into separate arguments
if(!missing(formula)){
## the missing is for BuysePower where the arguments are not necessarily specified
test.null <- c(status = !missing(status) && !is.null(status),
endpoint = !missing(endpoint) && !is.null(endpoint),
operator = !missing(operator) && !is.null(operator),
censoring = !missing(censoring) && !is.null(censoring),
restriction = !missing(restriction) && !is.null(restriction),
strata = !missing(strata) && !is.null(strata),
threshold = !missing(threshold) && !is.null(threshold),
treatment = !missing(treatment) && !is.null(treatment),
type = !missing(type) && !is.null(type),
weightEndpoint = !missing(weightEndpoint) && !is.null(weightEndpoint)
)
if(any(test.null)){
txt <- names(test.null)[test.null]
warning("Argument",if(sum(test.null)>1){"s"}," \'",paste(txt, collpase="\' \'"),if(sum(test.null)>1){" are "}else{" is "}," ignored when argument \'formula\' has been specified\n")
}
resFormula <- initializeFormula(formula, hierarchical = hierarchical, envir = envir)
treatment <- resFormula$treatment
type <- resFormula$type
endpoint <- resFormula$endpoint
threshold <- resFormula$threshold
status <- resFormula$status
weightEndpoint <- resFormula$weightEndpoint
operator <- resFormula$operator
censoring <- resFormula$censoring
restriction <- resFormula$restriction
strata <- resFormula$strata
}else{
formula <- NULL
}
## ** type
validType1 <- paste0("^",c("b","bin","binary"),"$")
validType2 <- paste0("^",c("c","cont","continuous"),"$")
validType3 <- paste0("^",c("t","tte","time","timetoevent"),"$") ## [if modified, remember to change the corresponding vector in initFormula]
validType4 <- paste0("^",c("g","gaus","gaussian"),"$") ## [if modified, remember to change the corresponding vector in initFormula]
type <- tolower(type)
type[grep(paste(validType1,collapse="|"), type)] <- "bin"
type[grep(paste(validType2,collapse="|"), type)] <- "cont"
type[grep(paste(validType3,collapse="|"), type)] <- "tte"
type[grep(paste(validType4,collapse="|"), type)] <- "gaus"
## ** endpoint
index.typeTTE <- which(type=="tte")
endpoint.TTE <- endpoint[index.typeTTE]
threshold.TTE <- threshold[index.typeTTE]
D <- length(endpoint)
D.TTE <- length(endpoint.TTE)
Uendpoint <- unique(endpoint)
Uendpoint.TTE <- unique(endpoint.TTE)
## ** default values
if(is.null(formula)){
if(is.null(threshold)){
threshold <- rep(10^{-12},D) # if no treshold is proposed all threshold are by default set to 10^{-12}
attr(threshold, "original") <- ifelse(type=="bin",NA,0)
}else{
attr(threshold, "original") <- threshold
}
if(is.null(restriction)){
restriction <- rep(as.numeric(NA),D)
}
if(is.null(operator)){
operator <- rep(">0",D)
}
if(is.null(weightEndpoint)){
if(hierarchical){
weightEndpoint <- rep(1,D)
}else{
weightEndpoint <- rep(1/D,D)
}
}
if(is.null(status)){
status <- rep("..NA..",D)
}else if(length(status) != D && length(status) == D.TTE){
status.save <- status
status <- rep("..NA..", D)
status[index.typeTTE] <- status.save
}
if(is.null(censoring)){
censoring <- rep("right",D)
}else if(length(status) != D && length(status) == D.TTE){
censoring.save <- status
censoring <- rep("right", D)
censoring[index.typeTTE] <- status.save
}
}else{
attr(threshold, "original") <- threshold
}
## ** status
Ustatus <- unique(status)
status.TTE <- status[index.typeTTE]
## from now, status contains for each endpoint the name of variable indicating status (0) or event (1) or NA
## ** censoring
## ## if(any(type %in% 1:2)){
## ## censoring[type %in% 1:2] <- as.character(NA)
## ## }
## if(!is.numeric(censoring)){
## censoring.save <- censoring
## censoring <- sapply(unname(censoring),function(iC){
## if(identical(iC,"NA")){
## return(0)
## }else if(identical(iC,"right")){
## return(1)
## }else if(identical(iC,"left")){
## return(2)
## }else{
## return(NA)
## }
## })
## attr(censoring,"original") <- censoring.save
## }
## ** scoring.rule
## WARNING: choices must be lower cases
## remember to update check scoring.rule (in BuyseTest-check.R)
if(is.character(scoring.rule)){
scoring.rule <- switch(tolower(scoring.rule),
"gehan" = 0,
"peron" = 1,
"efron" = 2,
NA
)
}
if (D.TTE == 0) {
scoring.rule <- 0
if ("scoring.rule" %in% name.call && trace > 0) {
message("NOTE : there is no survival endpoint, \'scoring.rule\' argument is ignored \n")
}
}
## ** pool.strata
if(is.null(strata)){
pool.strata <- 0
attr(pool.strata,"type") <- "none"
attr(pool.strata,"original") <- NA
}else if(is.null(pool.strata)){
pool.strata <- switch(tolower(option$pool.strata),
"buyse" = 0,
"cmh" = 1,
"equal" = 2,
"standardisation" = 3,
"standardization" = 3,
"var-favorable" = 4.1,
"var-unfavorable" = 4.2,
"var-netbenefit" = 4.3,
"var-winratio" = 4.4,
NA
)
if(tolower(option$pool.strata)=="standardisation"){
attr(pool.strata,"type") <- "standardization"
}else{
attr(pool.strata,"type") <- tolower(option$pool.strata)
}
attr(pool.strata,"original") <- NA
}else if(is.character(pool.strata)){
pool.strata_save <- tolower(pool.strata)
pool.strata <- switch(pool.strata_save,
"buyse" = 0,
"cmh" = 1,
"equal" = 2,
"standardisation" = 3,
"standardization" = 3,
"var-favorable" = 4.1,
"var-unfavorable" = 4.2,
"var-netbenefit" = 4.3,
"var-winratio" = 4.4,
NA
)
if(!is.na(pool.strata_save) && pool.strata_save=="standardisation"){
attr(pool.strata,"type") <- "standardization"
}else{
attr(pool.strata,"type") <- pool.strata_save
}
attr(pool.strata,"original") <- pool.strata_save
}else if(is.numeric(pool.strata)){
pool.strata_save <- switch(as.character(pool.strata),
"0" = "buyse",
"1" = "cmh",
"2" = "equal",
"3" = "standardization",
"4.1" = "var-favorable",
"4.2" = "var-unfavorable",
"4.3" = "var-netbenefit",
"4.4" = "var-winratio",
NA
)
attr(pool.strata,"type") <- pool.strata_save
attr(pool.strata,"original") <- pool.strata_save
}else{
pool.strata <- NA
}
## ** threshold
if(any(is.na(threshold))){
threshold[which(is.na(threshold))] <- 10^{-12}
}
if(any(abs(threshold)<10^{-12})){
threshold[which(abs(threshold)<10^{-12})] <- 10^{-12}
}
## ** method.inference
method.inference <- gsub("-"," ",tolower(method.inference),fixed = TRUE)
attr(method.inference,"permutation") <- grepl("permutation",method.inference)
attr(method.inference,"bootstrap") <- grepl("bootstrap",method.inference)
attr(method.inference,"studentized") <- grepl("studentized",method.inference)
attr(method.inference,"ustatistic") <- grepl("u statistic",method.inference)
if(all(is.na(strata.resampling)) || length(strata.resampling)== 0){
attr(method.inference,"resampling-strata") <- as.character(NA)
}else{
attr(method.inference,"resampling-strata") <- strata.resampling
}
if(method.inference == "varexact permutation"){
n.resampling <- Inf
}
## ** neutral.as.uninf
if(length(neutral.as.uninf)==1 && D>1){
neutral.as.uninf <- rep(neutral.as.uninf,D)
}
## ** correction.uninf
correction.uninf <- as.numeric(correction.uninf)
if(correction.uninf>0){
engine <- "GPC_cpp"
}
## ** model.tte
if(scoring.rule>0){
if((!is.null(model.tte))){
if((length(unique(endpoint.TTE)) == 1) && !inherits(model.tte, "list")){
attr.save <- attr(model.tte,"iidNuisance")
model.tte <- list(model.tte)
names(model.tte) <- unique(endpoint.TTE)
attr(model.tte,"iidNuisance") <- attr.save
}
attr(data,"model.tte_regressor") <- unique(unlist(lapply(model.tte, function(iM){
all.vars(stats::delete.response(terms(formula(iM))))
})))
}
}else{
model.tte <- NULL
}
if(!is.null(model.tte)){
fitter.model.tte <- unlist(lapply(model.tte, class))
}else{
fitter.model.tte <- setNames(rep(fitter.model.tte, length(Uendpoint.TTE)), Uendpoint.TTE)
}
## ** iid
iid <- attr(method.inference,"studentized") || (method.inference == "u statistic")
if(iid){
attr(method.inference,"hprojection") <- option$order.Hprojection
}else{
attr(method.inference,"hprojection") <- NA
}
if(iid && scoring.rule>0){ ## Peron/Efron scoring rule
if(is.null(model.tte)){
iidNuisance <- TRUE
}else if(!is.null(attr(model.tte,"iidNuisance"))){
iidNuisance <- attr(model.tte,"iidNuisance")
}else if(all(deparse(expr = call$data) == sapply(model.tte, function(iModel){deparse(expr=iModel$call$data)}))){
iidNuisance <- TRUE
}else{
iidNuisance <- FALSE
message("Uncertainty related to the estimation of the survival probabilities is ignored. \n",
"Consider adding an attribute \"iidNuisance\" to the argument \'model.tte\' taking value TRUE to change this default behavior. \n")
}
}else{
iidNuisance <- FALSE
}
## ** cpu
if (cpus == "all") {
cpus <- parallel::detectCores() # this function detect the number of CPU cores
}
## ** trace
if(is.logical(trace)){
trace <- as.numeric(trace)
}
## ** seed
if(!is.null(seed)){
attr(seed,"max") <- 10^(floor(log10(.Machine$integer.max))-1)
}
## ** operator
if(!is.numeric(operator)){
operator <- sapply(operator, switch, ">0"=1, "<0"=-1, NA)
}
## ** export
return(list(
name.call = name.call,
status = status,
status.TTE = status.TTE,
correction.uninf = correction.uninf,
cpus = cpus,
D = D,
D.TTE = D.TTE,
data = data,
endpoint = endpoint,
endpoint.TTE = endpoint.TTE,
engine = engine,
fitter.model.tte = fitter.model.tte,
formula = formula,
iid = iid,
iidNuisance = iidNuisance,
index.endpoint = match(endpoint, Uendpoint) - 1,
index.status = match(status, Ustatus) - 1,
keep.pairScore = keep.pairScore,
keep.survival = option$keep.survival,
scoring.rule = scoring.rule,
pool.strata = pool.strata,
model.tte = model.tte,
method.inference = method.inference,
n.resampling = n.resampling,
hierarchical = hierarchical,
neutral.as.uninf = neutral.as.uninf,
add.halfNeutral = add.halfNeutral,
operator = operator,
censoring = censoring,
restriction = restriction,
order.Hprojection = option$order.Hprojection,
precompute = precompute,
seed = seed,
strata = strata,
threshold = threshold,
trace = trace,
treatment = treatment,
type = type,
Uendpoint = Uendpoint,
Ustatus = Ustatus,
weightEndpoint = weightEndpoint,
weightObs = weightObs,
debug = option$debug
))
}
## * initializeData
initializeData <- function(data, type, endpoint, Uendpoint, D, scoring.rule, status, Ustatus, method.inference, censoring, strata, pool.strata, treatment, hierarchical, copy,
keep.pairScore, endpoint.TTE, status.TTE, iidNuisance, weightEndpoint, weightObs){
if (!data.table::is.data.table(data)) {
data <- data.table::as.data.table(data)
}else if(copy){
data <- data.table::copy(data)
}
## ** convert character/factor to numeric for binary endpoints
name.bin <- endpoint[which(type == "bin")]
if(length(name.bin)>0){
data.class <- sapply(data,class)
test.num <- (data.class %in% c("numeric","integer"))
if(any(test.num==FALSE)){
endpoint.char <- setdiff(names(data.class)[test.num==FALSE],c(treatment,strata))
for(iE in endpoint.char){
data[, c(iE) := as.double(as.factor(.SD[[1]]))-1.0, .SDcols = iE]
}
}
}
## ** n.obs
n.obs <- data[,.N]
## ** strata
if(!is.null(strata)){
data[ , c("..strata..") := interaction(.SD, drop = TRUE, lex.order = FALSE, sep = "."), .SDcols = strata]
level.strata <- levels(data[["..strata.."]])
data[ , c("..strata..") := as.numeric(.SD[["..strata.."]])] # convert to numeric
n.obsStrata <- data[,.N, by = "..strata.."][,stats::setNames(.SD[[1]],.SD[[2]]),.SD = c("N","..strata..")]
}else{
data[ , c("..strata..") := 1]
n.obsStrata <- n.obs
level.strata <- 1
}
nlevel.strata <- length(level.strata)
if(pool.strata==3){
grid.strata <- as.matrix(expand.grid(0:(nlevel.strata-1), 0:(nlevel.strata-1)))
rownames(grid.strata) <- ifelse(level.strata[grid.strata[,1]+1]==level.strata[grid.strata[,2]+1],
level.strata[grid.strata[,1]+1],
paste(level.strata[grid.strata[,1]+1],level.strata[grid.strata[,2]+1],sep="."))
}else{
grid.strata <- cbind(0:(nlevel.strata-1), 0:(nlevel.strata-1))
rownames(grid.strata) <- level.strata
}
n.strata <- NROW(grid.strata)
## ** convert treatment to binary indicator
level.treatment <- levels(as.factor(data[[treatment]]))
trt2bin <- stats::setNames(0:1,level.treatment)
data[ , c(treatment) := trt2bin[as.character(.SD[[1]])], .SDcols = treatment]
## ** rowIndex
data[,c("..rowIndex..") := 1:.N]
## ** unique status
if(any(status == "..NA..")){
data[,c("..NA..") := -100]
}
## ** TTE with status
if(scoring.rule>0){
test.status <- sapply(status.TTE, function(iC){any(data[[iC]]==0)})
if(all(test.status==FALSE)){
scoring.rule <- 0
iidNuisance <- FALSE
}
## distinct time to event endpoints
endpoint.UTTE <- unique(endpoint.TTE[test.status])
status.UTTE <- unique(status.TTE[test.status])
D.UTTE <- length(endpoint.UTTE)
## correspondance endpoint, TTE endpoint (non TTEe endpoint are set to -100)
index.UTTE <- match(endpoint, endpoint.UTTE, nomatch = -99) - 1
}else{
endpoint.UTTE <- numeric(0)
status.UTTE <- numeric(0)
D.UTTE <- 0
index.UTTE <- rep(-100, D)
}
## ** scoring method for each endpoint
## check if status
n.CR <- sapply(status, function(iC){max(data[[iC]])})
test.CR <- n.CR[status]>1
test.censoring <- sapply(Ustatus, function(iC){any(data[[iC]]==0)})[status]
method.score <- sapply(1:D, function(iE){ ## iE <- 1
if(type[iE] %in% c("bin","cont")){
return("continuous")
}else if(type[iE] == "gaus"){
return("gaussian")
}else if(type[iE] == "tte"){
if(test.censoring[iE]==FALSE && test.CR[iE]==FALSE){
return("continuous")
}else if(scoring.rule == 0){ ## 3/4 Gehan (right/left censoring)
return(switch(censoring[iE],
"left" = "TTEgehan2",
"right" = "TTEgehan"))
}else if(scoring.rule>0){
return(switch(as.character(test.CR[iE]),
"FALSE" = "SurvPeron",
"TRUE" = "CRPeron"))
}
}
})
attr(method.score,"test.censoring") <- test.censoring
attr(method.score,"test.CR") <- test.CR
paired <- all(n.obsStrata==2)
## ** previously analyzed distinct TTE endpoints
if(scoring.rule>0 && hierarchical){ ## only relevant when using Peron scoring rule with hierarchical GPC
## number of distinct, previously analyzed, TTE endpoints
nUTTE.analyzedPeron_M1 <- sapply(1:D, function(iE){
if(iE>1){
sum(endpoint.UTTE %in% endpoint[1:(iE-1)])
}else{
return(0)
}
})
}else{
nUTTE.analyzedPeron_M1 <- rep(0,D)
}
## ** number of observations per strata used when resampling
index.C <- which(data[[treatment]] == 0)
index.T <- which(data[[treatment]] == 1)
if(any(!is.na(attr(method.inference,"resampling-strata")))){
n.obsStrataResampling <- table(data[,interaction(.SD), .SDcols = attr(method.inference,"resampling-strata")])
}else{
n.obsStrataResampling <- n.obs
}
## ** skeleton for survival proba (only relevant for Peron scoring rule)
skeletonPeron <- list(survTimeC = lapply(1:D, function(iE){lapply(1:n.strata, function(iS){matrix(nrow=0,ncol=0)})}),
survTimeT = lapply(1:D, function(iE){lapply(1:n.strata, function(iS){matrix(nrow=0,ncol=0)})}),
survJumpC = lapply(1:D, function(iE){lapply(1:n.strata, function(iS){matrix(nrow=0,ncol=0)})}),
survJumpT = lapply(1:D, function(iE){lapply(1:n.strata, function(iS){matrix(nrow=0,ncol=0)})}),
lastSurv = lapply(1:D, function(iS){matrix(nrow = n.strata, ncol = 2*max(1,n.CR[iS]))}), ## 4 for competing risk setting, 2 is enough for survival
p.C = matrix(-100, nrow = n.strata, ncol = D),
p.T = matrix(-100, nrow = n.strata, ncol = D),
iid = list(survJumpC = lapply(1:D.UTTE, function(IE){lapply(1:n.strata, matrix, nrow = 0, ncol = 0)}),
survJumpT = lapply(1:D.UTTE, function(IE){lapply(1:n.strata, matrix, nrow = 0, ncol = 0)})
)
)
## ** iid for gaussian endpoints
n.endpoint <- length(endpoint)
index.gaussiid <- which(type == "gaus")
if(length(index.gaussiid)>0 && any(!is.na(censoring[index.gaussiid]))){
index.gaussiid2 <- intersect(which(!is.na(censoring)),index.gaussiid)
for(iE in index.gaussiid2){ ## iE <- 1
skeletonPeron$survTimeC[[iE]] <- data[index.C,list(list(do.call(cbind,.SD[[1]]))), .SDcols = censoring[iE], by = "..strata.."][[2]]
skeletonPeron$survTimeT[[iE]] <- data[index.T,list(list(do.call(cbind,.SD[[1]]))), .SDcols = censoring[iE], by = "..strata.."][[2]]
}
}
## ** weightEndpoint
if(missing(weightObs) || is.null(weightObs)){
data$..weight.. <- 1
weightObs <- data$..weight..
}else if( (length(weightObs)==1) && (weightObs %in% names(data)) ){
names(data)[names(data)==weightObs] <- "..weight.."
weightObs <- data$..weight..
}
## ** pool.strata
## set default pool.strata to Buyse for paired data
## otherwise pooling will do something strange
if(paired && pool.strata !=0){
if(is.na(attr(pool.strata,"original"))){
pool.strata[] <- 0
}else{
warning("Weights from the \"buyse\" pooling scheme (argument \'pool.strata\') are recommended for paired data. \n")
}
}
## ** keep.pairScore
if(identical(attr(method.inference,"hprojection"),2) && scoring.rule>0){
## need the detail of the score to perform the 2nd order projection
keep.pairScore <- TRUE
}else if(identical(attr(method.inference,"hprojection"),2) && pool.strata == 3){
table2x2.nobs <- table(strata = data[["..strata.."]],data[[treatment]])
dfStrata2 <- expand.grid(strata0 = 1:length(level.strata),
strata1 = 1:length(level.strata))
dfStrata2$N <- NROW(data)
dfStrata2$m <- length(index.C)
dfStrata2$n <- length(index.T)
dfStrata2$m.X0 <- table2x2.nobs[dfStrata2$strata0,1]
dfStrata2$n.X1 <- table2x2.nobs[dfStrata2$strata1,2]
dfStrata2$X0 <- table(data[["..strata.."]])[dfStrata2$strata0]
dfStrata2$X1 <- table(data[["..strata.."]])[dfStrata2$strata1]
dfStrata2$weight <- (dfStrata2$X0*dfStrata2$X1/dfStrata2$N^2)/(dfStrata2$m.X0*dfStrata2$n.X1/(dfStrata2$m*dfStrata2$n))
if(any(dfStrata2$weight %% 1 != 0)){
## if decimal weigths need the detail of the score to perform the 2nd order projection
keep.pairScore <- TRUE
}
}
## ** export
keep.cols <- union(union(c(treatment, strata, "..strata.."),
na.omit(attr(method.inference,"resampling-strata"))),
attr(data,"model.tte_regressor")) ## add regressor from survival models in case they do not match GPC strata variable (user-specific survival)
return(list(data = data[,.SD,.SDcols = keep.cols],
M.endpoint = as.matrix(data[, .SD, .SDcols = Uendpoint]),
M.status = as.matrix(data[, .SD, .SDcols = Ustatus]),
index.C = index.C,
index.T = index.T,
weightObs = weightObs,
index.strata = tapply(data[["..rowIndex.."]], data[["..strata.."]], list),
level.treatment = level.treatment,
level.strata = level.strata, pool.strata = pool.strata, ## distinct strata levels (e.g. M, F)
method.score = method.score, paired = paired,
grid.strata = grid.strata, ## strata (e.g. M, F, M.F, F.M) - different from level.strata when using standardisation
n.obs = n.obs,
n.obsStrata = n.obsStrata,
n.obsStrataResampling = n.obsStrataResampling,
cumn.obsStrataResampling = c(0,cumsum(n.obsStrataResampling)),
skeletonPeron = skeletonPeron,
scoring.rule = scoring.rule,
iidNuisance = iidNuisance,
nUTTE.analyzedPeron_M1 = nUTTE.analyzedPeron_M1,
endpoint.UTTE = endpoint.UTTE,
status.UTTE = status.UTTE,
D.UTTE = D.UTTE,
index.UTTE = index.UTTE,
keep.pairScore = keep.pairScore
))
}
## * initializeFormula
initializeFormula <- function(x, hierarchical, envir){
validClass(x, valid.class = "formula")
## ** extract treatment
treatment <- setdiff(all.vars(x), all.vars(stats::delete.response(stats::terms(x))))
if(length(treatment)!=1){
stop("initFormula: there must be exactly one response variable in formula\n",
"number of response variables founded: ",length(treatment),"\n")
}
if(length(as.character(x))!=3){
stop("initFormula: formula with unexpected length, as.character(x) should have length 3\n",
"length founded: ",length(as.character(x)),"\n")
}
## ** restrict to the right side of the formula
x.rhs <- as.character(x)[3]
## remove all blanks
x.rhs <- gsub("[[:blank:]]|\n", "", x.rhs)
## find endpoints
## https://stackoverflow.com/questions/35347537/using-strsplit-in-r-ignoring-anything-in-parentheses/35347645
## (*SKIP)(*FAIL): ignore
## \\( \\): inside brackets
## [^()]*: anything but ()
magic.formula <- "\\([^()]*\\)(*SKIP)(*FAIL)|\\h*\\+\\h*"
vec.x.rhs <- unlist(strsplit(x.rhs, split = magic.formula, perl = TRUE))
## find all element in the vector corresponding to endpoints (i.e. ...(...) )
## \\w* any letter/number
## [[:print:]]* any letter/number/punctuation/space
index.endpoint <- grep("\\w*\\([[:print:]]*\\)$", vec.x.rhs)
index.strata <- setdiff(1:length(vec.x.rhs), index.endpoint)
## ** strata variables
if(length(index.strata)==0){
strata <- NULL
}else{
strata <- vec.x.rhs[index.strata]
}
## ** number of endpoint variables
vec.x.endpoint <- vec.x.rhs[index.endpoint]
n.endpoint <- length(vec.x.endpoint)
if(n.endpoint==0){
stop("initFormula: x must contain endpoints \n",
"nothing of the form type(endpoint,threshold,status) found in the formula \n")
}
## ** extract endpoints and additional arguments
threshold <- NULL
status <- NULL
endpoint <- NULL
operator <- NULL
censoring <- NULL
restriction <- NULL
weightEndpoint <- NULL
type <- NULL
validArgs <- c("endpoint","mean",
"status", "std",
"iid",
"threshold","operator","weight","censoring","restriction")
## split around parentheses
ls.x.endpoint <- strsplit(vec.x.endpoint, split = "(", fixed = TRUE)
for(iE in 1:n.endpoint){
## extract type
candidate <- tolower(ls.x.endpoint[[iE]][1])
if(candidate %in% c("b","bin","binary")){
type <- c(type, candidate)
iValidArgs <- setdiff(validArgs,c("status","threshold","mean","std","iid","restriction"))
default.censoring <- "right"
}else if(candidate %in% c("c","cont","continuous")){
type <- c(type, candidate)
iValidArgs <- setdiff(validArgs,c("status","mean","std","iid"))
default.censoring <- "right"
}else if(candidate %in% c("t","tte","time","timetoevent")){
type <- c(type, candidate)
iValidArgs <- setdiff(validArgs,c("mean","std","iid"))
default.censoring <- "right"
}else if(candidate %in% c("g","gaus","gaussian")){
type <- c(type, candidate)
iValidArgs <- setdiff(validArgs, c("endpoint","status","censoring","restriction"))
default.censoring <- as.character(NA)
}else if(candidate %in% c("s","strat","strata")){
strata <- c(strata, gsub(")", replacement = "",ls.x.endpoint[[iE]][2]))
next
}else{
stop("initFormula: cannot convert the element ",paste(ls.x.endpoint[[iE]],collapse="(")," in the formula to a useful information.\n")
}
## get each argument
iVec.args <- strsplit(gsub(")", replacement = "",ls.x.endpoint[[iE]][2]),
split = ",", fixed = TRUE)[[1]]
n.args <- length(iVec.args)
## check size
if(n.args==0){
stop("initFormula: invalid formula \n",
vec.x.rhs[iE]," must contain the name of the endpoint between the parentheses \n"
)
}
if(n.args>4){
stop("initFormula: invalid formula \n",
x[iE]," has too many arguments (maximum 4: endpoint, threshold, status variable, operator) \n")
}
## extract name of each argument
iIndex.name <- grep("=",iVec.args)
iArg <- gsub("^[[:print:]]*=", replacement = "", iVec.args)
iName <- rep(as.character(NA),n.args)
## use existing names
if(length(iIndex.name)>0){
iiName <- gsub("=[[:print:]]*$","",iVec.args[iIndex.name])
iName[iIndex.name] <- iiName
if(any(iiName %in% iValidArgs == FALSE)){
stop("initFormula: invalid formula \n",
vec.x.rhs[iE]," contains arguments that are not \"",paste0(iValidArgs,sep = "\" \""),"\" \n")
}
if( any(duplicated(iiName)) ){
stop("initFormula: invalid formula \n",
vec.x.rhs[iE]," contains arguments with the same name \n")
}
}else{
iiName <- NULL
}
## add missing names
n.missingNames <- n.args - length(iiName)
if(n.missingNames>0){
iName[setdiff(1:n.args,iIndex.name)] <- setdiff(iValidArgs,iiName)[1:n.missingNames]
}
## rename
if("mean" %in% iName){
iName[iName=="mean"] <- "endpoint"
}
if("std" %in% iName){
iName[iName=="std"] <- "status"
}
if("iid" %in% iName){
iName[iName=="iid"] <- "censoring"
}
## extract arguments
endpoint <- c(endpoint, gsub("\"","",iArg[iName=="endpoint"]))
if("threshold" %in% iName){
thresholdTempo <- try(eval(expr = parse(text = iArg[iName=="threshold"])), silent = TRUE)
if(inherits(thresholdTempo,"try-error")){
thresholdTempo <- try(eval(expr = parse(text = iArg[iName=="threshold"]), envir = envir), silent = TRUE)
if(inherits(thresholdTempo,"try-error")){
stop(iArg[iName=="threshold"]," does not refer to a valid threshold \n",
"Should be numeric or the name of a variable in the global workspace \n")
}
}
if(inherits(thresholdTempo, "function")){
packageTempo <- environmentName(environment(thresholdTempo))
if(nchar(packageTempo)>0){
txt <- paste0("(package ",packageTempo,")")
}else{
txt <- ""
}
stop(iArg[iName=="threshold"]," is already defined as a function ",txt,"\n",
"cannot be used to specify the threshold \n")
}
threshold <- c(threshold, as.numeric(thresholdTempo))
}else{
threshold <- c(threshold, NA)
}
if("status" %in% iName){
status <- c(status, gsub("\"","",iArg[iName=="status"]))
}else{
status <- c(status, "..NA..")
}
if("operator" %in% iName){
operator <- c(operator, gsub("\"","",iArg[iName=="operator"]))
}else{
operator <- c(operator, ">0")
}
if("weight" %in% iName){
weightEndpoint <- c(weightEndpoint, as.numeric(eval(expr = parse(text = iArg[iName=="weight"]))))
}else if(hierarchical){
weightEndpoint <- c(weightEndpoint, 1)
}else{
weightEndpoint <- c(weightEndpoint, as.numeric(NA))
}
if("censoring" %in% iName){
censoring <- c(censoring, gsub("\"","",iArg[iName=="censoring"]))
}else{
censoring <- c(censoring, default.censoring)
}
if("restriction" %in% iName){
restrictionTempo <- try(eval(expr = parse(text = iArg[iName=="restriction"])), silent = TRUE)
if(inherits(restrictionTempo,"try-error")){
restrictionTempo <- try(eval(expr = parse(text = iArg[iName=="restriction"]), envir = envir), silent = TRUE)
if(inherits(restrictionTempo,"try-error")){
stop(iArg[iName=="restriction"]," does not refer to a valid restriction \n",
"Should be numeric or the name of a variable in the global workspace \n")
}
}
if(inherits(restrictionTempo, "function")){
packageTempo <- environmentName(environment(restrictionTempo))
if(nchar(packageTempo)>0){
txt <- paste0("(package ",packageTempo,")")
}else{
txt <- ""
}
stop(iArg[iName=="restriction"]," is already defined as a function ",txt,"\n",
"cannot be used to specify the restriction \n")
}
restriction <- c(restriction, as.numeric(restrictionTempo))
}else{
restriction <- c(restriction, as.numeric(NA))
}
}
## ** export
if(all(is.na(weightEndpoint))){
weightEndpoint <- rep(1/length(weightEndpoint), length(weightEndpoint))
}else if(sum(weightEndpoint, na.rm = TRUE)<1){
weightEndpoint[!is.na(weightEndpoint)] <- (1-sum(weightEndpoint, na.rm = TRUE))/sum(is.na(weightEndpoint))
}
out <- list(treatment = treatment,
type = type,
endpoint = endpoint,
threshold = threshold,
status = status,
operator = operator,
weightEndpoint = weightEndpoint,
censoring = censoring,
restriction = restriction,
strata = strata)
return(out)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.