#' Analyze and correct categorie
#'
#' This function takes `comptage` and `enquete` data to compute categorie_corrigee field.
#'
#' @param comptage a data.frame that must contain the following columns: .....
#' @param enquete a data.frame that must contain the following columns: .....
#'
#' @import magrittr
#' @importFrom rlang .data
#'
#' @return a vector the same length as the number of rows in comptage with the new `categorie` value
#' @keywords internal
correct_categ <- function(comptage,
enquete) {
# Test input data.frames ----------------------------------------------------------------------
df_has_cols(comptage,
comptage_colnames)
df_has_cols(enquete,
enquete_colnames)
# Test input for unexpected values ------------------------------------------------------------
## TODO
# Combine result to the output -----------------------------------------------------------------
## Add `comptage` information to enquete
enquete <- enquete %>%
dplyr::select(.data$id_quest, .data$categorie, .data$categorie_corrige,
.data$type_sortie, .data$dms, dplyr::starts_with("iti_"),
.data$km_sortie, .data$type_trajet, # used for Case 5
.data$nb_vae, .data$nb_total_velo, .data$activites, ## Used for Case 6 11
.data$activite_motiv, .data$activites, .data$activites_aucune # USed for Case 9 12
) %>%
dplyr::mutate(main_id_quest = radical_quest(.data$id_quest)) %>% # Deal with multiple quest by group
dplyr::left_join(dplyr::select(comptage,
.data$id_quest, .data$categorie_visuelle_cycliste),
by = c("main_id_quest" = "id_quest"))
## Deal with differences in categorie and categorie_visuelle
cat_to_correct <- enquete %>%
##Delete enquete on non-cyclists (https://github.com/JMPivette/evavelo/discussions/3)
dplyr::filter(!is.na(.data$categorie_visuelle_cycliste)) %>%
dplyr::filter(.data$categorie != .data$categorie_visuelle_cycliste) %>%
## Apply case 1 2 3 4 7 10 algorithm
correct_itinerant() %>%
## Apply case 6 11 algorithm
correct_spor_lois() %>%
## Apply case 9 12 algorithm
correct_util_lois() %>%
## Apply case 5 8
correct_util_sport() %>%
dplyr::select(.data$main_id_quest,
.data$id_quest,
.data$categorie_corrige,
.data$categorie)
message("\nCorrection de cat\u00e9gories pour ", nrow(cat_to_correct), " questionnaires ....")
## Check where decision couldn't be made and replace with answer from enquete.
no_decision <- cat_to_correct %>%
dplyr::filter(is.na(.data$categorie_corrige)) %>%
dplyr::pull(.data$id_quest)
nb_no_decision <- length(no_decision)
if(nb_no_decision != 0){
message(
"Il n\'a pas \u00e9t\u00e9 possible de corriger les cat\u00e9gories de ",
nb_no_decision,
" questionnaire(s).\n\tLa cat\u00e9gorie du d\u00e9clarant sera utilis\u00e9e:\n\t",
paste(no_decision, collapse = ", ")
)
}
cat_to_correct <- cat_to_correct %>%
dplyr::mutate(categorie_corrige = dplyr::coalesce(.data$categorie_corrige,
.data$categorie)) %>%
dplyr::select(.data$main_id_quest,
.data$id_quest,
.data$categorie_corrige)
## Check for multiple "categorie_corrige" in the same group
quest_multiple_cat <- cat_to_correct %>%
dplyr::distinct(.data$main_id_quest, .data$categorie_corrige) %>%
dplyr::count(.data$main_id_quest) %>%
dplyr::filter(.data$n>1) %>%
dplyr::pull(.data$main_id_quest)
if(length(quest_multiple_cat) != 0){
warning(
"Les questionnaires multiples suivants ont plusieurs valeurs de cat\u00e9gorie corrig\u00e9es:\n\t",
quest_multiple_cat,
call. = FALSE
)
}
## Update comptage values (categorie_visuelle_cycliste_corrige)
comptage <- comptage %>%
dplyr::select(.data$id_quest, .data$categorie_visuelle_cycliste, .data$categorie_breve) %>%
dplyr::left_join(## use left join since we have NAs in id_quest
cat_to_correct %>%
dplyr::select(.data$main_id_quest, .data$categorie_corrige) %>%
dplyr::group_by(.data$main_id_quest) %>%
dplyr::slice_head(), ## Remove multiple cat per questionary to avoid adding rows
by = c("id_quest" = "main_id_quest")
) %>%
dplyr::transmute(
.data$id_quest,
categorie_visuelle_cycliste_corrige = dplyr::coalesce(
.data$categorie_corrige,
.data$categorie_breve, ## chapter 3.1.12.2 categorie_breve override categorie_visuelle_cycliste
.data$categorie_visuelle_cycliste)
) %>%
dplyr::mutate( ## Remove answers in case of divergent answers in multiple questionary
categorie_visuelle_cycliste_corrige = dplyr::if_else(
.data$id_quest %in% quest_multiple_cat,
NA_character_,
.data$categorie_visuelle_cycliste_corrige)
)
## Update enquete values (categorie_corrige)
enquete <- enquete %>%
dplyr::transmute(.data$id_quest,
categorie_corrige = .data$categorie) %>% ## initialize with response from cyclist
dplyr::rows_update(dplyr::select(cat_to_correct, -.data$main_id_quest),
by = "id_quest")
## Return a list with all information.
list(comptages_man_post_traitements = comptage,
enquetes_post_traitement = enquete)
}
# Individual cases functions ------------------------------------------------------------------
#' Apply categorie_corrige methodology when Itinerant is present in the answer
#'
#' In Chapter 3.1.11 , this corresponds to cases 1 2 3 4 7 10
#'
#' this function can be used inside pipe operator and is compatible with dplyr
#'
#' @param data a data.frame
#'
#' @importFrom rlang .data
#'
#' @return a data.frame the same size of data with updated categorie_corrige values.
#' @keywords internal
correct_itinerant <- function(data){
## Apply algorithm
rows_to_update <- data %>%
dplyr::filter(.data$categorie != .data$categorie_visuelle_cycliste) %>%
dplyr::filter(.data$categorie == "Itin\u00e9rant" |
.data$categorie_visuelle_cycliste == "Itin\u00e9rant") %>%
## check coherence of itinerant answers ('coherent' column)
add_coherence() %>%
dplyr::mutate(
## Other category than Itinerant
other_cat = dplyr::coalesce(
dplyr::na_if(.data$categorie, "Itin\u00e9rant"),
dplyr::na_if(.data$categorie_visuelle_cycliste, "Itin\u00e9rant"))
) %>%
dplyr::mutate(
categorie_corrige =
dplyr::case_when(
is.na(type_sortie) & is.na(dms) ~ NA_character_,
is.na(dms) & (type_sortie != "Plusieurs jours" | !coherent) ~ NA_character_,
is.na(type_sortie) & dms > 1 & !coherent ~ NA_character_,
type_sortie == "Plusieurs jours" & (dms > 1 | coherent) ~ "Itin\u00e9rant",
dms <= 1 ~ other_cat,
coherent ~ "Itin\u00e9rant",
iti_km_voyage/dms > 40 ~ "Itin\u00e9rant",
TRUE ~ other_cat
)
)
## Update rows
data %>%
dplyr::rows_update(dplyr::select(rows_to_update,
.data$id_quest, .data$categorie_corrige),
by = "id_quest")
}
#' Apply categorie_corrigee Methodology to decide between Sportif and Loisir
#'
#' In Chapter 3.1.11, this corresponds to cases 6 11
#'
#' this function can be used inside pipe operator and is compatible with dplyr
#'
#' @param data a data.frame
#'
#' @importFrom rlang .data
#'
#' @return a data.frame the same size of data with updated categorie_corrige values.
#' @keywords internal
correct_spor_lois <- function(data){
rows_to_update <- data %>%
dplyr::filter(.data$categorie == "Sportif" & .data$categorie_visuelle_cycliste =="Loisir" |
.data$categorie == "Loisir" & .data$categorie_visuelle_cycliste =="Sportif"
) %>%
dplyr::mutate(
autre_activite = .data$activites_aucune == 0,
## VAE definition might change in future version of Methodology
vae = .data$nb_vae == .data$nb_total_velo
) %>%
dplyr::mutate(
categorie_corrige = dplyr::case_when(
vae ~ "Loisir",
km_sortie > 50 & !autre_activite ~ "Sportif",
km_sortie > 50 & autre_activite ~ "Loisir",
km_sortie <= 50 ~ "Loisir",
TRUE ~ NA_character_)
)
data %>%
dplyr::rows_update(dplyr::select(rows_to_update,
.data$id_quest, .data$categorie_corrige),
by = "id_quest")
}
#' Apply categorie_corrigee Methodology to decide between Utilitaire and Loisir
#'
#' In Chapter 3.1.11, this corresponds to cases 9 12
#'
#' this function can be used inside pipe operator and is compatible with dplyr
#'
#' @param data a data.frame
#'
#' @importFrom rlang .data
#'
#' @return a data.frame the same size of data with updated categorie_corrige values.
#' @keywords internal
correct_util_lois <- function(data){
rows_to_update <- data %>%
dplyr::filter(.data$categorie == "Utilitaire" & .data$categorie_visuelle_cycliste == "Loisir" |
.data$categorie == "Loisir" & .data$categorie_visuelle_cycliste == "Utilitaire"
) %>%
dplyr::mutate(
no_activity = is.na(.data$activites) | .data$activites_aucune == 1
) %>%
dplyr::mutate(
categorie_corrige = dplyr::case_when(
no_activity ~ categorie,
stringr::str_detect(activite_motiv, "but") ~ "Utilitaire",
stringr::str_detect(activite_motiv, "occasion") ~ "Loisir",
TRUE ~ NA_character_
))
## Update rows
data %>%
dplyr::rows_update(dplyr::select(rows_to_update,
.data$id_quest, .data$categorie_corrige),
by = "id_quest")
}
#' Apply categorie_corrigee Methodology to decide between Utilitaire and Sportif
#'
#' In Chapter 3.1.11, this corresponds to cases 5 8
#'
#' this function can be used inside pipe operator and is compatible with dplyr
#'
#' @param data a data.frame
#'
#' @importFrom rlang .data
#'
#' @return a data.frame the same size of data with updated categorie_corrige values.
#' @keywords internal
correct_util_sport <- function(data){
# Cas 5
# https://github.com/JMPivette/evavelo/issues/48
cas_5 <- data %>%
dplyr::filter(.data$categorie == "Utilitaire" & .data$categorie_visuelle_cycliste =="Sportif") %>%
dplyr::mutate(
categorie_corrige =
dplyr::case_when(
is.na(km_sortie) ~ NA_character_,
km_sortie <= 30 & (is.na(type_trajet) | type_trajet == "Aller-retour") ~ "Utilitaire",
TRUE ~ "Sportif"
)
) %>%
dplyr::select(.data$id_quest, .data$categorie_corrige)
# Cas 8
# https://github.com/JMPivette/evavelo/issues/50
cas_8 <- data %>%
dplyr::filter(.data$categorie == "Sportif" & .data$categorie_visuelle_cycliste =="Utilitaire") %>%
dplyr::mutate(vae = !is.na(.data$nb_vae) & .data$nb_vae == .data$nb_total_velo) %>%
dplyr::mutate(
categorie_corrige = dplyr::case_when(
.data$km_sortie > 50 & .data$vae == FALSE ~ "Sportif",
.data$km_sortie <= 50 | .data$vae == TRUE ~ "Utilitaire",
TRUE ~ NA_character_
)
) %>%
dplyr::select(.data$id_quest, .data$categorie_corrige)
## Update rows
data %>%
dplyr::rows_update(rbind(cas_5, cas_8),
by = "id_quest")
}
#' Add a column with 'coherence' information to a data.frame
#'
#' Internal function that calls is_iti_coherent
#'
#' data should have the following columns: iti_km_voyage, iti_experience,
#' iti_depart_itineraire, iti_arrivee_itineraire,
#' iti_depart_initial, iti_arrivee_final
#'
#' @param data a data.frame. Some columns are mandatory. See details for more information
#' @param col_name name of the new column created
#'
#' @importFrom rlang .data :=
#'
#' @return 'data' data.frame with a new logical column name after 'col_name'
#' @keywords internal
add_coherence <- function(data,
col_name = "coherent") {
data %>%
dplyr::mutate(
## check coherence of itinerant answers
!!col_name := is_iti_coherent(iti_km_voyage = .data$iti_km_voyage,
iti_experience = .data$iti_experience,
iti_dep_iti_valide = .data$iti_dep_iti_valide,
iti_arr_iti_valide = .data$iti_arr_iti_valide,
iti_depart_initial = .data$iti_depart_initial,
iti_arrivee_final = .data$iti_arrivee_final)
)
}
#' Helper function to control "coherence" of specific "itinerance" answers:
#'
#' https://github.com/JMPivette/evavelo/discussions/7
#' https://github.com/JMPivette/evavelo/issues/39
#'
#' @param iti_km_voyage numeric vector
#' @param iti_experience character vector
#' @param iti_dep_iti_valide character vector
#' @param iti_arr_iti_valide character vector
#' @param iti_depart_initial character vector
#' @param iti_arrivee_final character vector
#'
#' @return a boolean vector indicating if answer is coherent
#' @keywords internal
is_iti_coherent <- function (iti_km_voyage,
iti_experience,
iti_dep_iti_valide,
iti_arr_iti_valide,
iti_depart_initial,
iti_arrivee_final){
## Check iti depart and arrivee
coher_commune <- (!is.na(iti_dep_iti_valide) | !is.na(iti_depart_initial)) &
(!is.na(iti_arr_iti_valide) | !is.na(iti_arrivee_final))
## Check that we have at least 2 answers out of three
coher_2_3 <- ((!is.na(iti_km_voyage)) + coher_commune + (!is.na(iti_experience))) >= 2
return(coher_2_3)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.