R/logger_test.R

Defines functions logger_test

Documented in logger_test

#' logger_test
#'
#' Test the logger exports against the schema to see if the event name has the correct parameters
#' @param log_data The output of the logger
#' @param os The operating system
#' @param schema The schema DF
#' @export

logger_test <- function(log_data,os,schema) {

  require(tidyverse)

  os <- str_to_lower(os)

  if  (os == 'ios') {

    log_data <- log_data %>%
      mutate(type = ifelse(event_name == 'screen_view','screenView',
                           ifelse(event_name == 'User property','userProperty','event')))

  }

  prep_schema <- schema %>%
    select(1,extra_parameters)

  if (os == 'android') {

    standard_params <- c("event_name", "time", "type", "tracker", 'app_package', 'app_version', 'device_model', 'operating_system', 'os_version',"screen_name",
                         "app_session_hit_count", "app_session_id", "date", "timestamp")

    prep_schema <- prep_schema %>%
      mutate(extra_parameters = ifelse(str_detect(firebase_event_name,'biometric')==TRUE,str_replace(extra_parameters,'login_method','biometric_method'),extra_parameters),
             extra_parameters = ifelse(str_detect(firebase_event_name,'payment')==TRUE,str_replace(extra_parameters,'amount_band','payment_amount'),extra_parameters),
             extra_parameters = ifelse(str_detect(firebase_event_name,'transaction')==TRUE,str_replace(extra_parameters,'amount_band','payment_amount'),extra_parameters)
      )

  }  else if (os == 'ios') {

    standard_params <- c("event_name", "timestamp", "screen_name", "app_session_hit_count",
                         "app_session_id", "date", "device_model")

  } else {

    stop('os needs to be either "android" or "ios"')

  }

  prep_schema <- prep_schema  %>%
    separate(extra_parameters,into = paste('parameter',1:25,sep=''),sep=';') %>%
    mutate_all(~ trimws(.)) %>%
    mutate_at(2:25, ~ str_replace_all(.,':.*','')) %>%
    remove_na_cols()

  params_tbl <- tibble(params = standard_params,
                       log_type = os) %>%
    mutate(row = row_number(),
           parameters = paste('standard_parameter',row,sep='')) %>%
    select(-row)

  spread_params <- params_tbl %>%
    spread(parameters,params)

  final_schema <- prep_schema %>%
    mutate(log_type = os) %>%
    rename(event_name = 1) %>%
    left_join(spread_params,by='log_type')

  event_list <- quick_pull(final_schema,'event_name')

  new_test_tbl <- tibble()

  for (i in 1:nrow(log_data)) {

    x <- log_data %>%
      slice(i) %>%
      remove_na_cols()

    xname <- quick_pull(x,'event_name')

    if (!x$type == 'event' | (x$type == 'event' & !xname %in% event_list) ) {

print(paste(i,': ',xname,' skipped',sep=''))

      next()

    } else {

      log_params <- tibble(logger_values = sort(names(x))) %>%
        mutate(log_type = os)

      if (os == 'ios') {

        log_params <- log_params %>%
          filter(!logger_values == 'type')

      }

      evname_left <- x$event_name

      y <- final_schema %>%
        filter(event_name == evname_left) %>%
        remove_na_cols() %>%
        select(-log_type,-event_name) %>%
        gather()

      schema_params <- tibble(schema_values = sort(y$value)) %>%
        mutate(log_type = os) %>%
        filter(ifelse(log_type == 'android',!schema_values == 'has_apple_pay',!schema_values %in% c('has_google_pay','gpay_activation')))

      test_tbl <- schema_params %>%
        full_join(log_params,by=c('schema_values'='logger_values')) %>%
        mutate(log_type.x = ifelse(is.na(log_type.x),'missing_data',log_type.x),
               log_type.y = ifelse(is.na(log_type.y),'missing_data',log_type.y),
               matching_status = ifelse(log_type.x == log_type.y,'full_match',
                                        ifelse(log_type.x == 'missing_data','missing_from_schema','missing_from_source'))) %>%
        rename(values = 1)

      pass_rate <- sum(test_tbl$matching_status == 'full_match') / nrow(test_tbl)
      source_fail_rate <- sum(test_tbl$matching_status == 'missing_from_source') / nrow(test_tbl)
      schema_fail_rate <- sum(test_tbl$matching_status == 'missing_from_schema') / nrow(test_tbl)
      expected_params <- nrow(schema_params)
      passed_params <- nrow(log_params)
      source_fails <- test_tbl %>%
        filter(matching_status == 'missing_from_source') %>%
        pull(values)

      source_fails <- paste(source_fails,collapse = ', ')

      schema_fails <- test_tbl %>%
        filter(matching_status == 'missing_from_schema') %>%
        pull(values)

      schema_fails <- paste(schema_fails,collapse = ', ')

      tests_final <- tibble(test_expected_params = expected_params,
                            test_passed_params = passed_params,
                            test_pass_rate = pass_rate,
                            test_perfect = pass_rate == 1,
                            test_source_fail_rate = source_fail_rate,
                            test_schema_fail_rate = schema_fail_rate,
                            test_source_fails = source_fails,
                            test_schema_fails = schema_fails)

      z <- bind_cols(x,tests_final)

      new_test_tbl <- bind_rows(new_test_tbl,z)

print(paste(i,': ',xname,' tested',sep=''))
    }



  }

  new_test_tbl <- new_test_tbl %>%
    select(event_name,test_expected_params,test_passed_params,
           test_pass_rate,test_perfect,test_source_fail_rate,test_source_fails,test_schema_fails,
           screen_name,timestamp,app_session_id,app_session_hit_count,date,type,everything())

  return(new_test_tbl)

}
neugelb/neugelbtools documentation built on July 7, 2020, 1:17 a.m.