internalRisk2year.fun=function(dataset) {
##loading libraries
library(glmnet)
library(Hmisc)
################# LASSO preperation ##########################################
################################################################
MSrelapse<-dataset[which(MSrelapse$STUDYID!="105MS301"),]
dataset<-MSrelapse
#drop STUDYID, USIBJID (no needed for the matrix), drop TRT01A (blinded to treatment)
##### drop RELAPSE1year - we need only the analysis for 2 years
todrop<-c("STUDYID","USUBJID","TRT01A","RELAPSE1year")
X<-dataset[ , !(names(dataset) %in% todrop)]
### develop the matrix for the model
X.both<-model.matrix(X$RELAPSE2year~.,data=X)
#### delete NAs values (LASSO requirement)
X.both<-na.omit(X.both)
X<-na.omit(X)
set.seed(1)
#############################LASSO###########################################
########################################################################
###LASSO (alpha=1-default) with 10 cross validations
cv.fit.both<-cv.glmnet(x=X.both,y=X$RELAPSE2year,family="binomial")
###coefficients of variables via LASSO
cv.coef.both<-coef(cv.fit.both,s="lambda.1se")
###############RESULTS
### Non zero coefficients -> selected variables
cv.pf.em.both<-rownames(cv.coef.both)[as.numeric(cv.coef.both)!=0]
###selected variables
cv.pf.em.both
##plot lambda
plot(cv.fit.both)
##plot coeff
plot(coef(cv.fit.both))
###logistic model based on LASSO selected variables
model<-lrm(RELAPSE2year~AGE+WEIGHTBL+EDSSBL+RLPS3YR+TRELMOS+PRMSGR+REGION+NHPTDHBL+GDLESBL+SFPCSBL+SENSORBL+DISTWKBL,data=X)
model
##check the assumption of linearity
##best choice of knots and test for linearity
a<-lrm(RELAPSE2year~rcs(AGE,5)+rcs(WEIGHTBL,5)+rcs(EDSSBL,5)+rcs(RLPS3YR,5)+rcs(TRELMOS,5)+PRMSGR+REGION+rcs(NHPTDHBL,5)+rcs(GDLESBL,5)+rcs(SFPCSBL,5)+SENSORBL+DISTWKBL,data=X)
anova(a)##relapse3years
b<-lrm(RELAPSE2year~rcs(AGE,4)+rcs(WEIGHTBL,4)+rcs(EDSSBL,4)+rcs(RLPS3YR,4)+rcs(TRELMOS,4)+PRMSGR+REGION+rcs(NHPTDHBL,4)+rcs(GDLESBL,4)+rcs(SFPCSBL,4)+SENSORBL+DISTWKBL,data=X)
anova(b)##relapse3years non linear
AIC(a)
AIC(b) ###best choice of knots is knots=4
finalmodel<-lrm(RELAPSE2year~AGE+WEIGHTBL+EDSSBL+rcs(RLPS3YR,4)+TRELMOS+PRMSGR+REGION+NHPTDHBL+GDLESBL+SFPCSBL+SENSORBL+DISTWKBL,data=X)
df<-finalmodel[["stats"]][["d.f."]]
events<- nrow(X[which(X$RELAPSE2year==1),])
####return back
cat("The selected variables are:" , cv.pf.em.both, fill = TRUE)
EPV<-events/df
cat("The EPV of the model is", EPV, fill=TRUE)
if (EPV>10){cat("The EPV of the model is >10, as recommended.", fill=TRUE)}
if (EPV<10){cat("The EPV of the model is <10, possibility of overfitting problem.", fill = TRUE)}
cat("The final model is:",fill=TRUE)
return(finalmodel)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.