R/numeric_without_unit.R

#' Labelled numeric variable without unit name and missing label
#'
#' Converts a standard better / same / worse scale to numeric 1, 0, -1.
#' @param labelled_var  column from a survey data frame where gender is recorded.
#' @param unit An explicit unit name to be removed, such as "years".
#' @param na_label An explicit missing label to be removed such as "refusal"
#' @param max_value Optional maximum value, anything reaching this level will be replaced with \code{NA}.
#' @param min_value Optional maximum value, anything reaching this level will be replaced with \code{NA}.
#' @param comma If decimals are written with commas, such as 10,7 instead of 10.7
#' @importFrom haven as_factor
#' @importFrom stringr str_trim
#' @export
#'
#numeric_without_unit (eb$d11)
#haven::as_factor (df$d11)
#labelled_var <- df$d11

numeric_without_unit <- function ( labelled_var,
                                   unit = NULL,
                                   na_label = NULL,
                                   min_value = NULL,
                                   max_value = NULL,
                                   comma = FALSE) {
  units = c("years", "box")
  na_labels = c("refusal", "nt/nv", "dk")
  if ( class(labelled_var) == "labelled") {
    labelled_var <- haven::as_factor (labelled_var) }

  if (!is.null(na_label)) {
    if (! na_label %in% na_labels) {
      na_labels = c(na_label, na_labels)
    }
  }
  if (!is.null(unit)) {
    if (! unit %in% units) {
      unit = c(unit, units)
    }
  }
  labelled_var <- tolower(as.character(labelled_var))
  labelled_var <- gsub("(spont.)", "", labelled_var)
  labelled_var <- ifelse(labelled_var %in% na_labels, NA, labelled_var)
  labelled_var <- sapply(units, function(x) gsub(x, "", labelled_var) )
  labelled_var <- stringr::str_trim(labelled_var, side = "both")
  if ( comma == TRUE )   labelled_var <- gsub(",", "\\.", labelled_var)
  labelled_var <- stringr::str_extract(labelled_var, "[+-]?( (\\d+(\\.\\d*)?)|(\\.\\d+) )")
  labelled_var <- stringr::str_trim(labelled_var, side = "both")
  if (! is.null(max_value)) {
    labelled_var <- ifelse(labelled_var <= max_value, NA, labelled_var)
  }
  if (! is.null(min_value)) {
    labelled_var <- ifelse(labelled_var >= min_value, NA, labelled_var)
  }
  return(labelled_var)
}

gsub("^([0-9]+(\\.[0-9])?)$", "", "This is 190.78 atoms ")

  stringr::str_extract("This is 190.78 atoms ", "[+-]?( (\\d+(\\.\\d*)?)|(\\.\\d+) )")
  stringr::str_extract("This is 190,78 atoms ", "[+-]?( (\\d+(,\\d*)?)|(,\\d+) )")

grep("[+-]?( (\\d+(\\.\\d*)?)|(\\.\\d+) )", "This is 190.78 atoms ")


gsub(",", "\\.", "This is 190,78 atoms ")
antaldaniel/surveyreader documentation built on May 16, 2019, 2:29 a.m.