analWindow_Cox <- function(DataFile_cutData,DataFile_feature,DataFile_personal,idColName,labelColName,dataLengthColName,predictorColName=NULL,isDescription=F,testN=100,method="weighting"){
DataFile_cutData$window_N <- as.integer(DataFile_cutData$window_N)
if(is.na(DataFile_cutData$window_N)){
DataFile_cutData$window_N <- 0
N=1
}else{
N <- max(DataFile_cutData$window_N)
}
if(length(DataFile_feature) != 1 & length(DataFile_feature) != max(DataFile_cutData$window_N)){
stop("error feature length")
}
cutdata <- as.data.table(DataFile_cutData)
ccscategory <- unique(c(ccstable$CCS_CATEGORY))
cutdata_dataCol <- c(deparse(substitute(window_N)),deparse(substitute(ID)),deparse(substitute(periodAge)),ccscategory)
cutdata <- cutdata[, cutdata_dataCol,with=FALSE]
names(cutdata) <- c("window_N","ID","periodAge",ccscategory)
test_timeseriesauc_summary <- c()
test_windowauc_summary <- NULL
#如果用同樣的feature跳message
if(length(DataFile_feature)==1){
message("same features in all windows")
}
DataFile_personal <- as.data.table(DataFile_personal)
personal_dataCol <- c(deparse(substitute(idColName)),deparse(substitute(labelColName)),deparse(substitute(dataLengthColName)),predictorColName)
DataFile_personal <- DataFile_personal[, personal_dataCol, with=FALSE]
names(DataFile_personal) <- c("ID","label","dataLength",predictorColName)
DataFile_personal$futat <- DataFile_personal$label
DataFile_personal$futat <- as.numeric(DataFile_personal$futat)
DataFile_personal$dataLength <- as.numeric(DataFile_personal$dataLength)
set.seed(10)
for(j in 1:testN){
#切test train
test_indices <- sample(1:length(unique(cutdata$ID)),length(unique(cutdata$ID))/5)
#coxph
model <- list()
COXccs <- list()
testdata <- list()
traindata <- list()
for(i in 1:N){
if(length(DataFile_feature)==1){
COXccs_top <- DataFile_feature[[1]][selected==1, "CCS_CATEGORY"]
}else if(length(DataFile_feature)==N){
COXccs_top <- DataFile_feature[[i]][selected==1, "CCS_CATEGORY"]
}
setDT(COXccs_top)
if(N==1){
tcutdata_test <- cutdata[test_indices]
tcutdata_train <- cutdata[-test_indices]
}else{
tcutdata_test <- cutdata[window_N==i][test_indices]
tcutdata_train <- cutdata[window_N==i][-test_indices]
}
string <- paste(as.character(COXccs_top$CCS_CATEGORY), collapse = "+")
paste2 <- function(x, y, sep = "+") paste(x, y, sep = sep)
if(string == ""){
string2 <- "`SEX`+`periodAge`"
}else{
if(length(grep("periodAge",names(DataFile_cutData)))>0){
if(! is.null(predictorColName)){
predictorvector <- reduce(predictorColName,paste2)
predictorvector <- paste0(predictorvector,"+`periodAge`")
string2 <- paste0(string,"+",predictorvector,"")
}else{
string2 <- paste0(string,"+`periodAge`","")
}
}else{
if(! is.null(predictorColName)){
predictorvector <- reduce(predictorColName,paste2)
string2 <- paste0(string,"+",predictorvector,"")
}else{
string2 <- paste0(string,"")
}
}
}
finaldata <- merge(tcutdata_train,DataFile_personal,by="ID",all.x=T)
finaldata_test <- merge(tcutdata_test,DataFile_personal,by="ID",all.x=T)
f1 <- as.formula(paste("Surv(dataLength, futat) ~ ", string2))
ova.fit <- coxph(f1, data = finaldata)
ova.fit
COXccs_window <- summary(ova.fit)
temp <- cbind(COXccs_window$conf.int, "Pr(>|z|)" =c(COXccs_window$coefficients[,5]))
temp <- round(temp,3)
options(scipen = 999)
temp <- cbind("CCS_CATEGORY"=c(rownames(temp)), temp)
COXccs_window <- as.data.table(temp)
COXccs_window$`exp(-coef)` <- NULL
#加入文字敘述及數量
CCSWide_num <- setDT(tcutdata_train)[,lapply(tcutdata_train[,3:ncol(tcutdata_train)],sum),]
CCSWide_num <- melt(setDT(CCSWide_num), na.rm = TRUE)[,.(CCS_CATEGORY=variable,caseCount=value)]
CCSWide_num$CCS_CATEGORY <- paste0(CCSWide_num$CCS_CATEGORY)
COXccs_window <- merge(COXccs_window,CCSWide_num,by="CCS_CATEGORY",all.x=T)
if(isDescription==T){
COXccs_des <- unique(ccstable[,c("CCS_CATEGORY","CCS_CATEGORY_DESCRIPTION","CCS_LVL_2_LABEL","CCS_LVL_1_LABEL"),])
COXccs_window <- merge(COXccs_window,COXccs_des,by="CCS_CATEGORY",all.x=T)
}
setDT(COXccs_window)
setnames(COXccs_window,"CCS_CATEGORY","features")
model[[i]] <- ova.fit
COXccs[[i]] <- COXccs_window
traindata[[i]] <- tcutdata_train
testdata[[i]] <- tcutdata_test
}
#model evaluate
votesummary <- NULL
windowauc <- c()
for(i in 1:N){
windowcut_window <- merge(testdata[[i]], DataFile_personal, all.x = T,by="ID")
personal_test <- DataFile_personal[ID %in% windowcut_window$ID]
forROC <- data.frame(personal_test[,.(ID,label)],pre=predict(model[[i]], newdata = windowcut_window[,3:ncol(windowcut_window)], type="risk"))
rocobj <- roc(forROC$label,forROC$pre)
#
windowauc[i] <- as.numeric(auc(forROC$label,forROC$pre))
ROC <- coords(rocobj, x="best", input="threshold", best.method="youden")
if(method=="vote"){
threshold <- ROC$threshold
predictvalue <- ifelse(forROC$pre>threshold,1,0)
}else if(method=="weighting"){
predictvalue <- forROC$pre
}
votesummary <- cbind(votesummary,predictvalue)
}
test_windowauc_summary <- cbind(test_windowauc_summary,windowauc)
votesummary <- as.data.table(votesummary)
if(method=="vote"){
votesummary[,finalpredict:= ifelse(rowSums(.SD)>N/2,1,0),]
}else if(method=="weighting"){
getPredict <- function(data,x){
sum = 0
for(i in 1:length(data)){
sum <- sum+data[i]*(1/i)
}
return(sum)
}
votesummary$finalpredict <- apply(votesummary, 1, getPredict)
}
finalrocobj <- roc(personal_test$label,votesummary$finalpredict)
auc <- as.numeric(auc(personal_test$label,votesummary$finalpredict))
test_timeseriesauc_summary[j] <- auc
}
test_windowauc_summary <- as.data.table(test_windowauc_summary)
test_windowauc_summary_list <- list()
for(i in 1:N){
assign(paste0("window",i),as.numeric(test_windowauc_summary[i,]))
test_windowauc_summary_list[[i]] <- get(paste0("window",i))
}
windowname <- paste0("window", 1:N)
names(test_windowauc_summary_list) <- windowname
list.Cox <- list(model = model, summarytable = COXccs,evaluation_test=test_timeseriesauc_summary,evaluation_test_window=test_windowauc_summary_list)
return(list.Cox)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.