library("opensignauxfaibles") library("dplyr") database_signauxfaibles <- database_connect() table_wholesample <- collect_wholesample(db = database_signauxfaibles, table = "wholesample") sample_train <- table_wholesample %>% filter(periode == "2014-01-01") sample_test <- table_wholesample %>% filter(periode == "2015-01-01") sample_actual <- table_wholesample %>% filter(periode == params$actual_date)
table_wholesample %>% detect_na()
formulas_6_18 <- list( "effectif" = outcome_6_18 ~ cut_effectif, "growth_effectif" = outcome_6_18 ~ cut_effectif + cut_growthrate + lag_effectif_missing, "apart" = outcome_6_18 ~ cut_effectif + cut_growthrate + lag_effectif_missing + apart_last12_months + apart_consommee + apart_share_heuresconsommees, "cotisation_effectif" = outcome_6_18 ~ cut_effectif + cut_growthrate + lag_effectif_missing + apart_last12_months + apart_consommee + apart_share_heuresconsommees + log_cotisationdue_effectif, "dettecumulee" = outcome_6_18 ~ cut_effectif + cut_growthrate + lag_effectif_missing + apart_last12_months + apart_consommee + apart_share_heuresconsommees + log_cotisationdue_effectif + log_ratio_dettecumulee_cotisation + indicatrice_dettecumulee, "croissancedettecumulee" = outcome_6_18 ~ cut_effectif + cut_growthrate + lag_effectif_missing + apart_last12_months + apart_consommee + apart_share_heuresconsommees + log_cotisationdue_effectif + log_ratio_dettecumulee_cotisation + indicatrice_dettecumulee + indicatrice_croissance_dettecumulee, "nb_debits" = outcome_6_18 ~ cut_effectif + cut_growthrate + lag_effectif_missing + apart_last12_months + apart_consommee + apart_share_heuresconsommees + log_cotisationdue_effectif + log_ratio_dettecumulee_cotisation + indicatrice_dettecumulee + indicatrice_croissance_dettecumulee + nb_debits, "delais" = outcome_6_18 ~ cut_effectif + cut_growthrate + lag_effectif_missing + apart_last12_months + apart_consommee + apart_share_heuresconsommees + log_cotisationdue_effectif + log_ratio_dettecumulee_cotisation + indicatrice_dettecumulee + indicatrice_croissance_dettecumulee + nb_debits + delai + delai_sup_6mois, "codenaf" = outcome_6_18 ~ cut_effectif + cut_growthrate + lag_effectif_missing + apart_last12_months + apart_consommee + apart_share_heuresconsommees + log_cotisationdue_effectif + log_ratio_dettecumulee_cotisation + indicatrice_dettecumulee + indicatrice_croissance_dettecumulee + nb_debits + delai + delai_sup_6mois + libelle_naf_niveau1 )
plyr::ldply( .data = formulas_6_18, .fun = function(x) { glm(formula = x, family = "binomial", data = sample_train) %>% broom::augment(newdata = sample_test, type.predict = "response") %>% pROC::roc(outcome_6_18 ~ .fitted, data = . , smooth = FALSE) %>% .$auc %>% tibble::as_tibble() } )
output_prediction_6_18 <- formulas_6_18$dettecumulee %>% glm( formula = ., data = sample_train, family = "binomial" ) %>% broom::augment( newdata = sample_actual, type.predict = "response" ) %>% dplyr::rename(prediction_6_18 = .fitted)
output_prediction_6_18 %>% dplyr::anti_join( y = compute_filter_proccollectives( db = database_signauxfaibles, .date = "2017-08-01"), by = "numero_compte", copy = TRUE ) %>% dplyr::anti_join( y = compute_filter_ccsv( db = database_signauxfaibles, .date = "2017-08-01"), by = c("numero_compte"), copy = TRUE ) %>% dplyr::arrange( dplyr::desc( prediction_6_18 ) ) %>% dplyr::filter(is.na(prediction_6_18) == FALSE) %>% dplyr::select(raison_sociale, siren, siret, libelle_naf_niveau1, code_departement, siege, prediction_6_18, effectif, apart_last12_months, apart_consommee, apart_share_heuresconsommees, nb_debits, delai, delai_sup_6mois ) %>% dplyr::slice(1:100) %>% readr::write_csv(path = "data/table_predictions_6_18_2017_09_18.csv")
output_prediction_6_18 %>% dplyr::anti_join( y = compute_filter_proccollectives( db = database_signauxfaibles, .date = "2017-08-01"), by = "numero_compte", copy = TRUE ) %>% dplyr::anti_join( y = compute_filter_ccsv( db = database_signauxfaibles, .date = "2017-08-01"), by = c("numero_compte"), copy = TRUE ) %>% dplyr::arrange( dplyr::desc( prediction_6_18 ) ) %>% dplyr::filter(is.na(prediction_6_18) == FALSE) %>% readr::write_csv(path = "data/table_predictions_6_18_2017_09_18_all.csv")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.