#######################################################################################################
### Selects variables via LASSO based on half RCTs ################################
### with 100 cross validations (100 different half-datasets)###########################
############# 2 YEARS RELAPSES ###################################
################################################################################################################
library(glmnet)
library(Hmisc)
####################random half RCTs from studies###############################
for (i in 1:100) {
Advance<-dataset[which(dataset$STUDYID=="105MS301"),]
Advance.risk<-Advance[sample(nrow(Advance), nrow(Advance)/2),]
todrop<-c("STUDYID","USUBJID","RELAPSE1year")
Advance.risk<-Advance.risk[ , !(names(Advance.risk) %in% todrop)]
Define<-dataset[which(dataset$STUDYID=="109MS301"),]
Define.risk<-Define[sample(nrow(Define), nrow(Define)/2),]
Define.risk<-Define.risk[ , !(names(Define.risk) %in% todrop)]
Confirm<-dataset[which(dataset$STUDYID=="C-1801"),]
Confirm.risk<-Confirm[sample(nrow(Confirm), nrow(Confirm)/2),]
Confirm.risk<-Confirm.risk[ , !(names(Confirm.risk) %in% todrop)]
Affirm<-dataset[which(dataset$STUDYID=="C-1802"),]
Affirm.risk<-Affirm[sample(nrow(Affirm), nrow(Affirm)/2),]
Affirm.risk<-Affirm.risk[ , !(names(Affirm.risk) %in% todrop)]
Mscrg<-dataset[which(dataset$STUDYID=="NS26321"),]
Mscrg.risk<-Mscrg[sample(nrow(Mscrg), nrow(Mscrg)/2),]
Mscrg.risk<-Mscrg.risk[ , !(names(Mscrg.risk) %in% todrop)]
##all half studies together
mrg<-rbind(Advance.risk,Define.risk,Confirm.risk,Affirm.risk,Mscrg.risk)
#####################LASSO preparation####################
###blinded to treatment so drop variable TRT01A
todrop<-c("TRT01A")
mrg.both<-mrg[ , !(names(mrg) %in% todrop)]
### delete NA values (LASSO requierement)
mrg.both<-na.omit(mrg.both)
#### model matrix needed for LASSO
half.matrix<-model.matrix(mrg.both$RELAPSE2year~.,data=mrg.both)
half.matrix<-na.omit(half.matrix)
#################################LASSO################################
######################################################################
##10 cross validations
cv.fit.half<-cv.glmnet(x=half.matrix,y=mrg.both$RELAPSE2year,family="binomial")
### LASSO coefficients
cv.coef.half<-coef(cv.fit.half,s="lambda.1se")
####RESULTS
### non zero coefficients lead to selected variables
cv.pf.em.half<-rownames(cv.coef.half)[as.numeric(cv.coef.half)!=0]
print(cv.pf.em.half)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.