params$actual_date
library("opensignauxfaibles") library("dplyr") database_signauxfaibles <- database_connect() table_wholesample <- collect_wholesample(db = database_signauxfaibles, table = "wholesample") sample_train <- table_wholesample %>% filter(periode == "2015-01-01") %>% filter(effectif >= 10) sample_test <- table_wholesample %>% filter(periode == "2016-01-01") %>% filter(effectif >= 10) sample_actual <- table_wholesample %>% filter(periode == params$actual_date) %>% filter(effectif >= 10)
table_wholesample %>% detect_na()
formulas_0_12 <- list( "effectif" = outcome_0_12 ~ cut_effectif, "growth_effectif" = outcome_0_12 ~ cut_effectif + cut_growthrate + lag_effectif_missing, "apart" = outcome_0_12 ~ cut_effectif + cut_growthrate + lag_effectif_missing + apart_last12_months + apart_consommee + apart_share_heuresconsommees, "cotisation_effectif" = outcome_0_12 ~ cut_effectif + cut_growthrate + lag_effectif_missing + apart_last12_months + apart_consommee + apart_share_heuresconsommees + log_cotisationdue_effectif, "dettecumulee" = outcome_0_12 ~ cut_effectif + cut_growthrate + lag_effectif_missing + apart_last12_months + apart_consommee + apart_share_heuresconsommees + log_cotisationdue_effectif + log_ratio_dettecumulee_cotisation_12m + indicatrice_dettecumulee_12m, "croissancedettecumulee" = outcome_0_12 ~ cut_effectif + cut_growthrate + lag_effectif_missing + apart_last12_months + apart_consommee + apart_share_heuresconsommees + log_cotisationdue_effectif + log_ratio_dettecumulee_cotisation + indicatrice_dettecumulee + indicatrice_croissance_dettecumulee, "nb_debits" = outcome_0_12 ~ cut_effectif + cut_growthrate + lag_effectif_missing + apart_last12_months + apart_consommee + apart_share_heuresconsommees + log_cotisationdue_effectif + log_ratio_dettecumulee_cotisation + indicatrice_dettecumulee + indicatrice_croissance_dettecumulee + nb_debits, "delais" = outcome_0_12 ~ cut_effectif + cut_growthrate + lag_effectif_missing + apart_last12_months + apart_consommee + apart_share_heuresconsommees + log_cotisationdue_effectif + log_ratio_dettecumulee_cotisation + indicatrice_dettecumulee + indicatrice_croissance_dettecumulee + nb_debits + delai + delai_sup_6mois, "codenaf" = outcome_0_12 ~ cut_effectif + cut_growthrate + lag_effectif_missing + apart_last12_months + apart_consommee + apart_share_heuresconsommees + log_cotisationdue_effectif + log_ratio_dettecumulee_cotisation + indicatrice_dettecumulee + indicatrice_croissance_dettecumulee + nb_debits + delai + delai_sup_6mois + libelle_naf_niveau1 )
plyr::ldply( .data = formulas_0_12, .fun = function(x) { glm(formula = x, family = "binomial", data = sample_train) %>% broom::augment(newdata = sample_test, type.predict = "response") %>% pROC::roc(outcome_0_12 ~ .fitted, data = . , smooth = FALSE) %>% .$auc %>% tibble::as_tibble() } )
output_prediction_0_12 <- formulas_0_12$dettecumulee %>% glm( formula = ., data = sample_train, family = "binomial" ) %>% broom::augment( newdata = sample_actual, type.predict = "response" ) %>% dplyr::rename(prediction_0_12 = .fitted)
output_prediction_0_12 %>% dplyr::anti_join( y = compute_filter_proccollectives( db = database_signauxfaibles, .date = "2017-10-01"), by = "numero_compte", copy = TRUE ) %>% dplyr::anti_join( y = compute_filter_ccsv( db = database_signauxfaibles, .date = "2017-10-01"), by = c("numero_compte"), copy = TRUE ) %>% dplyr::arrange( dplyr::desc( prediction_0_12 ) ) %>% dplyr::filter(is.na(prediction_0_12) == FALSE) %>% dplyr::select( raison_sociale, siret, libelle_naf_niveau1, code_departement, prediction_0_12, effectif, apart_last12_months, apart_consommee, apart_share_heuresconsommees, nb_debits, delai, delai_sup_6mois, everything() ) %>% dplyr::slice(1:100) %>% readr::write_csv(path = "output/table_predictions_2017_11_29.csv")
output_prediction_0_12 %>% dplyr::anti_join( y = compute_filter_proccollectives( db = database_signauxfaibles, .date = "2017-10-01"), by = "numero_compte", copy = TRUE ) %>% dplyr::anti_join( y = compute_filter_ccsv( db = database_signauxfaibles, .date = "2017-10-01"), by = c("numero_compte"), copy = TRUE ) %>% dplyr::arrange( dplyr::desc( prediction_0_12 ) ) %>% dplyr::filter(is.na(prediction_0_12) == FALSE) %>% readr::write_csv(path = "output/table_predictions_2017_11_29_all.csv")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.