R/get_content_dic_eng.R

Defines functions get_content_dic_eng

Documented in get_content_dic_eng

#' make a nice recoded dictionary of a content of an open answer
#'
#' @description unsed when open comments field is recived,  a field that reuire human
#' recassification. the function genterates a nicly formated table, and also does
#' a simple classifiction based on the mose common cathegories. this function is dedicated
#' to the english language
#'
#' @param x a tbl() that contains the relevant field
#' @param variable_name the name of the relevant field
#' @param idvar the response id variable
#' @param ... additional variables to insert into the dictionary.
#' @param folder_name a string indicating in which directory the file will be saved, defoult is "data"
#' @param name a string indicating the file name. note: end with .csv
#'
#'
#' @return writed the content dic into the specified folder
#'
#' @details
#'
#' @examples
#' open_comments <- tribble(~id, ~open, ~yr,
#'                         1, "appel", 2017,
#'                         2, "appel souce", 2017,
#'                         3, "pasta", 2018,
#'                         4, "pasti", 2019,
#'                         5, "pasta", 2019,
#'                         6, "mango fruit", 2017,
#'                         7, "kewee fruit", 2017,
#'                         8, "forbbiden fruit", 2018)
#'
#' # disregard the yr variable
#' open_comments %>%
#'    get_content_dic_eng(open, idvar = id, yr,folder_name="data",name="value_dic.csv")
#'
#' @seealso prop, add_prop
#' @importFrom magrittr %>%
#' @importFrom stringr str_detect
#' @importFrom stringr str_length
#' @importFrom stringr str_to_title
#' @importFrom stringr str_to_lower
#'
#' @export
get_content_dic_eng <- function(x,variable_name,idvar=`Response ID`,...,folder_name="data",name="value_dic.csv"){

  require(tidyverse,quietly = TRUE)

  vn <- enquo(variable_name)
  id <- enquo(idvar)

  appears_most_vec <- x %>%
    select(!!vn) %>%
    filter(!is.na(!!vn)) %>%
    filter(!str_detect(!!vn,"^no$|^NO$|^No$|^none$|^NONE$|^None$|^Not$|^not$|^NOT$|^Nope$|^nope$")) %>%
    filter(!str_detect(!!vn,"^NA$|^N[:punct:]A$")) %>%
    filter(str_length(!!vn)!=1) %>%
    mutate(!!vn:=str_to_title(!!vn)) %>%
    count(!!vn) %>%
    filter(n>1) %>%
    arrange(desc(n)) %>%
    pull(!!vn)

  content_dic <- x %>%
    select(!!id,...,!!vn) %>%
    filter(!is.na(!!vn)) %>%
    arrange(!!vn) %>%
    distinct() %>%
    mutate(Irrelevant="",other="")



  for (word in appears_most_vec){
    var=sym(word)

    #make it case unsesitive
    word_pattern=paste0(word,"|",str_to_lower(word))

    content_dic <- content_dic %>%
      mutate(!!var:=ifelse(str_detect(string = !!vn,pattern = word_pattern)|
                             str_detect(string = str_to_lower(!!vn),pattern = word_pattern),
                           "1",""))
  }

  readr::write_excel_csv(content_dic,path = paste0(folder_name,"/",name))

  # return(content_dic)

}
sarid-ins/saridr documentation built on Nov. 10, 2020, 9:07 p.m.