# Legacy codes
if(F){
# cvFitOneModel
cvFitOneModel_legacy <- function(Xbin, Ybin,
params=list(
# xgb.cv only
nfold=5,
nrounds = 100,
# xgb.cv & xgboost
max_depth = 10,
eta = 0.5,
nthread = 5,
colsample_bytree = 1,
min_child_weight = 1
),
breakVec=c(0, 0.25, 0.5, 0.75, 1.0),
genes,
verbose = F){
# Test
if(F){
# Example
library(GSClassifier); library(xgboost)
testData <- readRDS(system.file("extdata", "testData.rds", package = "GSClassifier"))
expr <- testData$PanSTAD_expr_part
design <- testData$PanSTAD_phenotype_part
modelInfo <- modelData(
design,
id.col = "ID",
variable = c("platform", "PAD_subtype"),
Prop = 0.1,
seed = 145
)
Xs <- expr[,modelInfo$Data$Train$ID]
y <- modelInfo$Data$Train
y <- y[colnames(Xs),]
Ys <- ifelse(y$PAD_subtype == 'PAD-I',1,ifelse(y$PAD_subtype == 'PAD-II',2,ifelse(y$PAD_subtype == 'PAD-III',3,ifelse(y$PAD_subtype == 'PAD-IV',4,NA)))); table(Ys)/length(Ys)
PADi <- readRDS(system.file("extdata", paste0('PAD.train_20220916.rds'), package = "GSClassifier"))
geneSet <- PADi$geneSet
res <- trainDataProc(
Xmat = Xs,
Yvec = Ys,
geneSet = geneSet,
subtype = 2,
ptail = 0.5,
breakVec = c(0, 0.25, 0.5, 0.75, 1)
)
# Data
Xbin <- res$dat$Xbin
Ybin <- res$dat$Ybin
genes <- res$dat$Genes
params <- list(
# xgboost & xgb.cv
nfold = 5,
nrounds = 100,
# xgboost
max_depth = 10,
colsample_bytree = 1,
min_child_weight = 1,
eta = 0.5,
gamma = 0.25,
subsample = 0.7
)
breakVec=c(0, 0.25, 0.5, 0.75, 1.0)
verbose = T
}
dtrain <- xgb.DMatrix(Xbin, label = Ybin)
# xgb.cv
# 2022-09-15 : WARNING: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
# 2022-09-16: WARNING: If you are loading a serialized model (like pickle in Python, RDS in R) generated by
# older XGBoost, please export the model by calling `Booster.save_model` from that version
# first, then load it back in current version. See: https://xgboost.readthedocs.io/en/latest/tutorials/saving_model.html
# for more details about differences between saving model and serializing.
for(i in 1:10000){
x <- tryCatch(
cvRes <- xgb.cv(
params = params,
nrounds = params$nrounds,
nfold = params$nfold,
data = dtrain,
early_stopping_rounds=2,
metrics = list("logloss", "auc"),
objective = "binary:logistic",
verbose = verbose
),
error = function(e)e)
if('message' %in% names(x)){
if(verbose) LuckyVerbose('Attention! AUC: the dataset only contains pos or neg samples. Repeat xgb.cv')
x_error <- x
} else {
cvRes <- x
break
}
}
if(verbose) LuckyVerbose('Best interation: ',cvRes$best_iteration)
# xgboost via best interation
bst <- xgboost(params = params,
data = Xbin,
label = Ybin,
nrounds = cvRes$best_iteration,
objective = "binary:logistic",
verbose = ifelse(verbose,1,0))
return(list(bst=bst, breakVec=breakVec, genes=genes))
}
# nround不是xgb.cv选的。容易过拟合。
cvFitOneModel <- function(Xbin, Ybin,genes,
params = list(
# xgb.cv only
nfold=5,
nrounds = 15,
# xgb.cv & xgboost
max_depth = 10,
eta = 0.5,
nthread = 5,
colsample_bytree = 1,
min_child_weight = 1
),
breakVec=c(0, 0.25, 0.5, 0.75, 1.0),
seed = 102,
verbose = F){
# Test
if(F){
# Example
library(GSClassifier); library(xgboost)
testData <- readRDS(system.file("extdata", "testData.rds", package = "GSClassifier"))
expr <- testData$PanSTAD_expr_part
design <- testData$PanSTAD_phenotype_part
modelInfo <- modelData(
design,
id.col = "ID",
variable = c("platform", "PAD_subtype"),
Prop = 0.1,
seed = 145
)
Xs <- expr[,modelInfo$Data$Train$ID]
y <- modelInfo$Data$Train
y <- y[colnames(Xs),]
Ys <- ifelse(y$PAD_subtype == 'PAD-I',1,ifelse(y$PAD_subtype == 'PAD-II',2,ifelse(y$PAD_subtype == 'PAD-III',3,ifelse(y$PAD_subtype == 'PAD-IV',4,NA)))); table(Ys)/length(Ys)
PADi <- readRDS(system.file("extdata", paste0('PAD.train_20220916.rds'), package = "GSClassifier"))
geneSet <- PADi$geneSet
res <- trainDataProc(
Xmat = Xs,
Yvec = Ys,
geneSet = geneSet,
subtype = 2,
ptail = 0.5,
breakVec = c(0, 0.25, 0.5, 0.75, 1)
)
# Data
Xbin <- res$dat$Xbin
Ybin <- res$dat$Ybin
genes <- res$dat$Genes
params <- list(
# xgboost & xgb.cv
nrounds = 15,
# xgboost
max_depth = 10,
colsample_bytree = 1,
min_child_weight = 1,
eta = 0.5,
gamma = 0.25,
subsample = 0.7
)
breakVec=c(0, 0.25, 0.5, 0.75, 1.0)
verbose = T
seed = 102
}
# xgboost via best interation
set.seed(seed)
params_xg <- params[-match(c('nrounds'), names(params))]
bst <- xgboost(params = params_xg,
data = Xbin,
label = Ybin,
objective = "binary:logistic",
nrounds = params$nrounds,
verbose = ifelse(verbose,1,0))
return(list(bst=bst, breakVec=breakVec, genes=genes))
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.