```{=html}

```r
knitr::opts_chunk$set(echo = TRUE)
library(etlTurtleNesting)
library(drake)
library(wastdr)
library(pointblank)
library(magrittr)
library(reactable)
library(gt)
# readd(w2_data)
# readd(w2_user_mapping)
# loadd(w2_data)
# loadd(w2_user_mapping)
a <- user_mapping %>%
    annotate_user_mapping_w2() %>%
    pointblank::create_agent() %>%
    pointblank::col_vals_lt("dist", 0.01) %>%
    interrogate()

if (!is.null(a$has_intel)){
actions_table <- a %>%
  pointblank::get_data_extracts() %>%
  magrittr::extract2(1) %>%
  dplyr::rowwise() %>% 
  # dplyr::mutate(active_at = get_user_area(odkc_ex, odkc_username)) %>% 
  # dplyr::select(-email, -phone) %>% 
  dplyr::arrange(-dist) %>% 
  gt::gt() %>%
  gt::fmt_markdown(columns = gt::everything()) %>%
  gt::cols_label(
    legacy_username = gt::html("<h5>They wrote</h5>\n<small>W2 username</small>"),
    legacy_userid = gt::html("<small>W2 legacy ID</small>"),
    # active_at = gt::html("<small>Active at</small>"),
    wastd_matched = gt::html("<h5>We matched</h5>\n<small>WAStD User</small>"),
    search_wastd = gt::html("<h5>Search</h5>\n<small>likely candidates</small>"),
    dist = gt::html("<h5>Dissimilarity</h5>\n<small>smaller = better</small>")
  ) %>%
  gt::tab_spanner(label = "Wamtram profile",
                  columns = c(legacy_username,
                              legacy_userid)) %>%
  gt::tab_spanner(label = "WASTD User match",
                  columns = c(wastd_matched,
                              search_wastd,
                              dist)) %>%
  gt::tab_spanner(
    label = "WASTD User profile (chosen as most likely)",
    columns = c(role, pk, username, name, nickname, aliases, email, phone)
  )  
} else {actions_table <- NULL}

User matching results {.tabset}

Please review {.active}

Work through the issues directly in the table below.

Your actions:

if (is.null(actions_table)) {
  wastdr::wastdr_msg_success("No mismatches found")
} else {
    actions_table
}

Data and Methods

During the data import from ODK Central to WAStD, wastdr maps each distinct ODK Collect "username" as written by data collectors to actual WAStD user profiles.

The matching is done by fuzzyjoin::stringdist_left_join using the Jaro-Winker distance between the ODKC username and the WAStD name and aliases. Aliases are split up by comma. The Jaro-Winker distance was chosen as it returns the highest number of correct matches.

Details about available distance measures can be found here.

You can download a spreadsheet of mismatches from the CSV button:

a

Note: updating WAStD will not refresh this report automatically - we have to re-run the data import.

When we re-run the user matching with fresh data, users with updated aliases should match up.

The QA validation will pick up all username matches with a dissimilarity of 0.01 or higher. Perfect matches will have a dissimilarity below 0.01.

Adding new data collectors to WAStD

Coordinators of data capture programs supply us with a spreadsheet in this exact format:

We add the following columns:

Create missing users in WAStD

Run manually when necessary.

validation_data_extracts <- a %>%
  pointblank::get_data_extracts() 

if (length(validation_data_extracts) > 0) {
  missing_users <- validation_data_extracts %>%
    magrittr::extract2(1) %>% 
    dplyr::rowwise() %>%
    dplyr::select(legacy_userid) %>%
    dplyr::arrange(legacy_userid) %>%
    dplyr::distinct(legacy_userid) %>%
    dplyr::left_join(w2_data$persons, by = c("legacy_userid" = "person_id")) %>%
    dplyr::transmute(
      username = clean_name %>%
        wastdr::urlize() %>% stringr::str_replace_all("-", "_"),
      name = clean_name,
      # phone = mobile, # NA
      # email = clean_email, # NA
      role = glue::glue("{specialty} {comments}")
    )

  missing_users %>% wastdr::wastd_bulk_post("users", verbose = TRUE)

}


dbca-wa/etlTurtleNesting documentation built on Nov. 18, 2022, 8:03 a.m.