knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
library(SubgroupBoost)

Introduction

This tutorial guide the reader through the analysis of treatment performing and non-performing subgroup identification using our package SubgroupBoost.

Data input and preprocessing

data("simdata")
names(simdata)
str(simdata$train)
str(simdata$test)
#----- data manipulation-----#
#training data
dat.train=simdata[[1]][,-c(1:5)]
info.train=simdata[[1]][,1:5]
dtrain <- xgb.DMatrix(as.matrix(dat.train),label = info.train$trt01p)

#testing data
dat.test=simdata[[2]][,-c(1:5)]
info.test=simdata[[2]][,1:5]
dtest <- xgb.DMatrix(as.matrix(dat.test),label = info.test$trt01p)

RMST

#----- RMST death -----#
dat1=data.frame(dat.train,trt01p=info.train$trt01p, evnt=info.train$evnt1, aval=info.train$aval1)
set.seed(123) # for reproducibility
model_RMST<-SubgroupBoost.RMST(dat1)

#check variables selected
importance=xgb.importance(model_RMST$feature_names, model_RMST)$Feature
importance

# Get predicted value for each sample in training data
pred_RMST<-predict(model_RMST, dtrain)
# Calcualate probabilities of belonging to treatment performing subgroup by sigmoid transformation
prob_RMST<-1/(1+exp(-pred_RMST))
#plot the resulting probabilities
hist(prob_RMST)

#Wrap it up to be a function, use probability 0.5 as cutoff for subgroup label
SubgroupBoost.predict<-function(model, data){
  # Get predicted value of model
  pred<-predict(model, data)
  # Calcualate probabilities by sigmoid transformation
  prob<-1/(1+exp(-pred))
  # Use 0.5 cutoff to seperate samples into two subgroups
  assign<-ifelse(prob>0.5,1,0)
  return(group_assign=assign)
}

#2 by 2 table of predicted and true subgroup label for training and testing data
RMST.predict.train<-SubgroupBoost.predict(model_RMST, dtrain)
table(Predicted=RMST.predict.train, True=simdata[[3]])

RMST.predict.test<-SubgroupBoost.predict(model_RMST, dtest)
table(Predicted=RMST.predict.test, True=simdata[[4]])

Win-difference

#----- win-ratio ----#
#Specify the type of outcome by argument comparison. For two survival outcomes, set it equals "survival survival". 
#Note: Make sure the Primary outcome is defined in variable evnt1 and aval1, and secondary outcome is defined by columns evnt2 and aval2 in info.train object, for two survival outcomes case.
comparison="survival survival"
set.seed(123)
model_wd<-SubgroupBoost.wd(dat.train, info.train, comparison)

#check variables selected
importance=xgb.importance(model_wd$feature_names, model_wd)$Feature
importance

#2 by 2 table of predicted and true subgroup label for training and testing data
wd.predict.train<-SubgroupBoost.predict(model_wd, dtrain)
table(Predicted=wd.predict.train, True=simdata[[3]])

wd.predict.test<-SubgroupBoost.predict(model_wd, dtest)
table(Predicted=wd.predict.test, True=simdata[[4]])


liupeng2117/SubgroupBoost documentation built on Feb. 7, 2022, 1 p.m.