knitr::opts_chunk$set(echo = TRUE) library(data.table)
dirname <- 'data_cat_maxAUC_0.78' flist <- list.files(path=dirname, pattern='0.*.csv') auclist <- as.numeric(gsub(".csv", "", flist)) train <- fread(paste0(dirname, '/', 'train.csv')) y <- as.factor(train$target) rho <- sum(y == 1)/length(y) predictions <- matrix(nrow = 400000, ncol=length(flist)) i <- 1 for (f in flist) { tmp <- fread(paste0(dirname, '/', f)) predictions[ ,i] <- tmp[[2]] i <- i + 1 } dim(predictions)
head(predictions)
hist(predictions[, 1])
library(summa) ksumma <- summa::summa(predictions, "rank")
hist(ksumma@estimated_rank)
fde1 <- fdensemble(predictions[,1:8])
fde1 <- predict_performance(fde1, auclist[1:8], rho, alpha = 1)
plot_cor(fde1, class_flag='positive')
hist(fde1@predictions[ ,4])
submit <- data.table(id=seq(600000,999999), target=fde1@estimated_rank) fwrite(submit, file='submission.csv')
plot_cor(fde1, legend_flag=T)
cor(fde1@rank_matrix)
library(summa) data_binary <- create_predictions(3000, 30, 0.3, "rank") str(data_binary)
summ <- summa(data_binary$predictions, "rank")
summ <- calculate_performance(summ, data_binary$actual_labels)
summa_plot(summ)
train <- as.data.table(readr::read_csv('data-CATII.csv.bz2')) train$target <- ifelse(train$target == 1, "Yes", "No") train <- train[sample(seq_along(train$target), 5000), ]
table(train$target)
inTraining0 <- createDataPartition(train$target, p = .75, list = FALSE) training <- train[ inTraining0,] testing <- train[-inTraining0,] testingY <- as_label(train$target, class1=1)
model_list <- c('nnet', 'rda', 'svmLinear', 'svmRadial', 'pls', 'knn', 'earth', 'avNNet', 'mlp', 'nb', 'rf', 'rpart', 'ctree', 'C5.0', 'gbm', 'bayesglm', 'glm', 'glmnet', 'simpls') t1 <- mtrainer(c('rda', 'knn'), dataInfo = 'CATII')
t1 <- train(t1, target~., training, update=TRUE)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.