Nothing
knitr::opts_chunk$set( collapse = TRUE, comment = "#>" )
library(autoScorecard) #Quick Modeling ##step 1: Import Data accepts <- read.csv(system.file("extdata", "accepts.csv", package = "autoScorecard" )) ##step 2: Create scorecard ##Considering efficiency and readability, many parameters of this automatic modeling function ##are default, which requires high-precision modeling and needs to be manually established step by step auto_scorecard1 <- auto_scorecard( feature = accepts[1:2000,], key_var= "application_id", y_var = "bad_ind",sample_rate = 0.7, points0 = 600, odds0=1/20, pdo = 50, max_depth =3,tree_p = 0.1, missing_rate = 0, single_var_rate = 1, iv_set=0.02, char_to_number = TRUE , na.omit = TRUE) #Step-by-Step Modeling ##step 1: Import Data accepts <- read.csv(system.file("extdata", "accepts.csv", package = "autoScorecard" )) ##step 2: Data Description data_detect1 <- data_detect( df = accepts, key_var = c("application_id","account_number") , y_var = "bad_ind" ) head(data_detect1) ##step 3: Data Filtering feature<- filter_var( df = accepts , key_var = c("application_id","account_number"), y_var = "bad_ind" , missing_rate = 0 , single_var_rate = 1 , iv_set = 0.02, char_to_number = TRUE , na.omit = TRUE ) ##step 4: Select Training Sample d = sort( sample( nrow( feature ), nrow( feature )*0.7)) train <- feature[d,] test <- feature[-d,] ##step 5: Data Distribution Comparison comparison_two_data( df1 = train , df2 = test ,key_var = c("application_id","account_number"), y_var="bad_ind") ##step 6: Automatic binning of data ##Decision Tree Binning treebins_train <- bins_tree(df= train, key_var= c("application_id","account_number"), y_var="bad_ind", max_depth=3, p=0.1) ##Equal Frequency Binning binning_eqfreq1 <- binning_eqfreq( df= train, feat= 'tot_derog', label = 'bad_ind', nbins = 3) ##Equal Width Binning binning_eqwid1 <- binning_eqwid( df = train, feat = 'tot_derog', label = 'bad_ind', nbins = 3) ##The K-means Binning binning_kmean1 <- binning_kmean( df = train, feat= 'loan_term', label = 'bad_ind', nbins = 3) ##Chi-Square Binning bins_chim1 <- bins_chim( df = train[1:200,], key_var = "application_id", y_var = "bad_ind" , alpha=0.1 ) ##Unsupervised Automatic Binning Function f_1 <-bins_unsupervised( df = feature[1:200,] , id="application_id" , label="bad_ind" , methods = c("k_means", "equal_width","equal_freq") , bin_nums=5 ) ##The Combination of Two Bins Produces the Best Binning Result best1 <- best_iv( df=f_1 ,bin=c('bins') , method = c('method') ,variable= c( "variable" ) ,label_iv='miv' ) vs1 <- best_vs( df1 = treebins_train[,-c(3)], df2 = best1[,-c(1:2)] ,variable="variable" ,label_iv='miv' ) ##step 7: Replace Feature Data by Binning Template woe_train <- rep_woe( df= train ,key_var="application_id", y_var="bad_ind" , tool=treebins_train , var_label= "variable",col_woe='woe', lower='lower' ,upper ='upper' ) woe_test <- rep_woe( df= test ,key_var="application_id", y_var="bad_ind" , tool=treebins_train , var_label= "variable",col_woe='woe', lower='lower' ,upper ='upper' ) ##step 8: Modeling lg<-stats::glm(bad_ind~.,family=stats::binomial(link='logit'),data= woe_train) lg_both<-stats::step(lg,direction = "both") logit<-stats::predict(lg_both,woe_test) woe_test$lg_both_p<-exp(logit)/(1+exp(logit)) pred_both <- ROCR::prediction(woe_test$lg_both_p, woe_test$bad_ind) perf_both <- ROCR::performance(pred_both,"tpr","fpr") ##step 9: Correlation Diagram coe = (lg_both$coefficients) cor1<-stats::cor( Xvar_df<- woe_train[-which(colnames(feature) %in% c("application_id","account_number","bad_ind"))]) corrplot::corrplot(cor1) corrplot::corrplot(cor1,method = "number") ##step 10: Manually Input Parameters to Generate Scorecards ##scorecard Score<-noauto_scorecard( bins_card= woe_test, fit= lg_both,bins_woe=treebins_train ,points0 = 600, odds0 = 1/20, pdo = 50 ,k = 2) Score_2<-noauto_scorecard( bins_card= woe_train, fit= lg_both,bins_woe=treebins_train ,points0 = 600, odds0 = 1/20, pdo = 50 ,k = 3) ##scorecard2 Score<-noauto_scorecard2( bins_card= woe_test, fit= lg_both,bins_woe=treebins_train ,points0 = 600, odds0 = 1/20, pdo = 50 ,k = 2) Score_2<-noauto_scorecard2( bins_card= woe_train, fit= lg_both,bins_woe=treebins_train ,points0 = 600, odds0 = 1/20, pdo = 50 ,k = 3) ##step 11: PSI data_train <- Score_2$data_score data_test <- Score$data_score psi_1<-psi_cal( df_train = data_train , df_test = data_test,feat='Score',label='bad_ind' , nbins=10) ##step 12: Data Painter plot_board( label= woe_test$bad_ind, pred = woe_test$lg_both_p )
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.