R/CrossInternalRisk.fun.R

#################################  This function develops the Risk score (step 1) - Cross Internal Method #########################################################
#############################################################################################################################################################


CrossInternalRisk.fun=function(dataset){
#load libraries
library(glmnet)
library(Hmisc)
####################random half RCTs from studies###############################
set.seed(1)
Advance<-dataset[which(dataset$STUDYID=="105MS301"),]
Advance.risk<-Advance[sample(nrow(Advance), nrow(Advance)/2),]
todrop<-c("STUDYID","USUBJID","RELAPSE2year")
Advance.risk<-Advance.risk[ , !(names(Advance.risk) %in% todrop)]
set.seed(1)
Define<-dataset[which(dataset$STUDYID=="109MS301"),]
Define.risk<-Define[sample(nrow(Define), nrow(Define)/2),]
Define.risk<-Define.risk[ , !(names(Define.risk) %in% todrop)]
set.seed(1)
Confirm<-dataset[which(dataset$STUDYID=="C-1801"),]
Confirm.risk<-Confirm[sample(nrow(Confirm), nrow(Confirm)/2),]
Confirm.risk<-Confirm.risk[ , !(names(Confirm.risk) %in% todrop)]
set.seed(1)
Affirm<-dataset[which(dataset$STUDYID=="C-1802"),]
Affirm.risk<-Affirm[sample(nrow(Affirm), nrow(Affirm)/2),]
Affirm.risk<-Affirm.risk[ , !(names(Affirm.risk) %in% todrop)]
set.seed(1)
Mscrg<-dataset[which(dataset$STUDYID=="NS26321"),]
Mscrg.risk<-Mscrg[sample(nrow(Mscrg), nrow(Mscrg)/2),]
Mscrg.risk<-Mscrg.risk[ , !(names(Mscrg.risk) %in% todrop)]
##all half studies together
mrg<-rbind(Advance.risk,Define.risk,Confirm.risk,Affirm.risk,Mscrg.risk)
#####################LASSO preparation####################

###blinded to treatment so drop variable TRT01A
todrop<-c("TRT01A")
mrg.both<-mrg[ , !(names(mrg) %in% todrop)]
### delete NA values (LASSO requierement)
mrg.both<-na.omit(mrg.both)

##logistic regression based on selected variables from LASSO
model<-lrm(RELAPSE1year~RLPS3YR+GDLESBL+SFPCSBL, data=mrg.both)

##check the assumption of linearity
a<-lrm(RELAPSE1year~rcs(RLPS3YR,5)+rcs(GDLESBL,5)+rcs(SFPCSBL,5), data=mrg.both)
anova(a) ##rlps3yr nonlinear
b<-lrm(RELAPSE1year~rcs(RLPS3YR,4)+rcs(GDLESBL,4)+rcs(SFPCSBL,4), data=mrg.both)
anova(b)##rlps3yr nonlinear
AIC(a)
AIC(b) ###best choice of knots is knots=4

###final model (as anova does not indicate nonlinear relationship)
finalmodel<-lrm(RELAPSE1year~rcs(RLPS3YR,4)+GDLESBL+SFPCSBL, data=mrg.both)
finalmodel
df<-finalmodel[["stats"]][["d.f."]]
events<- nrow(mrg.both[which(mrg.both$RELAPSE1year==1),])
####return back
cat("The selected variables are: RLPS3YR, GDLESBL, SFPCSBL ", fill = TRUE)
EPV<-events/df
cat("The EPV of the model is", EPV, fill=TRUE)
if (EPV>10){cat("The EPV of the model is >10, as recommended.", fill=TRUE)}
if (EPV<10){cat("The EPV of the model is <10, possibility of overfitting problem.", fill = TRUE)}

cat("The final model is:",fill=TRUE)
return(finalmodel)

}
htx-r/RiskModelNMApredictions documentation built on June 12, 2019, 9:52 a.m.