R/QC_clean.R
In eHDPrep: Quality Control and Semantic Enrichment of Datasets

Documented in encode_as_num_mat encode_binary_cats encode_bin_cat_vec encode_cats encode_genotypes encode_genotype_vec encode_ordinals onehot_vec ordinal_label_levels strings_to_NA warn_missing_dots

#    Copyright (C) 2022 Queens University Belfast
#    
#    This file is part of 'eHDPrep'
#
#    'eHDPrep' is free software: you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation, either version 3 of the License, or
#    (at your option) any later version.
#
#    'eHDPrep' is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with 'eHDPrep'.  If not, see <http://www.gnu.org/licenses/>.

# needed for "where" function
utils::globalVariables("where")

##' Clean missing values in a character vector
##'
##' Internal. Replaces values in character vectors representing missing values
##' with the reserved value \code{NA}. "Undetermined", "unknown", "missing",
##' "fail", "fail / unknown", "equivocal", "equivocal / unknown", "*". These can
##' be replaced with the \code{values_to_replace} parameter.
##'
##' @param x input vector
##' @param values_to_replace vector containing values to be replaced with
##'   \code{NA}
##' @param ... additional arguments to pass to \code{grepl}. Use
##'   \code{ignore.case = TRUE} to ignore case.
##' @return vector with default or specified values replaced with \code{NA}
##' @noRd
##' @examples
##' nums <- c(1,2,3,4,5,6,7,8,-99)
##' NA_clean(nums, -99)
NA_clean <- function(x, values_to_replace = NULL, ...) {

  pattern <- paste0("^", values_to_replace, "$")
  pattern <- paste0(pattern, collapse = "|")
  x[grepl(pattern = pattern, x = x, ...)] <- NA
  x
}

#' Replace values in non-numeric columns with NA
#'
#' Replaces specified or pre-defined strings in non-numeric columns with
#' \code{NA}. 
#'
#' @details Columns to process can be specified in custom arguments (\code{...}) or will
#' be applied to all non-numeric columns.
#' Default strings which will be replaced with \code{NA} are as follows:
#' "Undetermined", "unknown", "missing", "fail", "fail / unknown", 
#' "equivocal", "equivocal / unknown", "*".
#' String search is made using \code{\link[base]{grepl}} and supports
#' \code{\link[base]{regex}} so metacharacters (\code{. \ | ( ) [ ] {  } ^ $ * + ? $})
#' should be escaped with a "\code{\\}" prefix.
#' Matches are case sensitive by default but can ignore case with the parameter: 
#' \code{ignore.case = TRUE} in \code{...}).
#'
#' @param strings_to_replace character vector of values to be replaced with
#'   \code{NA}.
#' @inheritParams nums_to_NA
#'
#' @return data with specified values replaced with NA.
#' @export
#'
#' @examples
#' data(example_data)
#' 
#' # original unique values in diabetes column:
#' unique(example_data$diabetes)
#' # Using default values
#' res <- strings_to_NA(example_data)
#' unique(res$diabetes)
#' 
#' 
#' # original unique values in diabetes_type column:
#' unique(example_data$diabetes_type)
#' # Using custom values
#' res <- strings_to_NA(example_data, strings_to_replace = "Type I")
#' unique(res$diabetes_type)
#' 
strings_to_NA <- function(data, ..., strings_to_replace = NULL) {
  # use built in values if no strings are specified
  if(is.null(strings_to_replace)) {
    strings_to_replace <- c("Undetermined",
                                 "unknown",
                                 "missing",
                                 "fail",
                                 "fail / unknown",
                                 "equivocal",
                                 "equivocal / unknown",
                                 #"9",
                                 "*")
  } else {}

  if (missing(...)) {
    data %>%
      dplyr::mutate(dplyr::across(!where(is.numeric),
                                  ~ NA_clean(.x, strings_to_replace, ignore.case = TRUE)
                                  )
      )
    } else {
        vars <- dplyr::enquos(...)
        
        # stop if numeric vars are selected
        data %>%
          dplyr::select(!!! vars & where(is.numeric)) %>%
          names() -> 
          num_vars
        
        if(length(num_vars > 0 )) {
          stop("`...` must not contain numeric variables.\n",
               paste0("\u2716 `", num_vars, "` is a numeric variable\n"), call. = FALSE)
        } else {}
        
        data %>%
          dplyr::mutate(dplyr::across(!!! vars,
                                      ~ NA_clean(.x, strings_to_replace, ignore.case = TRUE)
          ))
      }

}

#' Replace numeric values in numeric columns with NA
#'
#' Replaces specified numbers in numeric columns with \code{NA}.
#' 
#' Columns to process can be specified in \code{...} or the function will be
#' applied to all numeric columns.
#' 
#' @param data A data frame, data frame extension (e.g. a tibble), or a lazy
#'   data frame (e.g. from dbplyr or dtplyr).
#' @param nums_to_replace numeric vector of values to be replaced with
#'   \code{NA}. Case is ignored.
#' @inheritParams dplyr::select
#' @return \code{data} with specified values replaced with NA
#' @export
#'
#' @examples
#' data(example_data)
#' 
#' # replace all 1,2, and 3 from tumoursize and patient_id with NA.
#' nums_to_NA(data = example_data, tumoursize, patient_id, nums_to_replace = c(1,2,3))
nums_to_NA <- function (data, ..., nums_to_replace = NULL) {
  if(is.null(nums_to_replace)) {
         warning("`nums_to_replace` is missing and no values will be replaced with NA",call.= FALSE)
  } else {}

  if (missing(...)) {
    data %>%
      dplyr::mutate(dplyr::across(where(is.numeric),
                                  function(x) NA_clean(x, nums_to_replace))
                    )
  } else {
    vars <- dplyr::enquos(...)

    data %>%
      dplyr::mutate(dplyr::across(.cols = c(!!! vars),
                                  .fns = function(x) NA_clean(x, nums_to_replace))
                    )
  }
}

##' Encode a categorical vector with binary categories
##'
##' In a character vector, converts binary categories to factor levels.
##'
##' Binary categories to convert can be specified with a named character vector,
##' specified in \code{values}. The syntax of the named vector is:
##' \code{negative_finding = positive_finding}.  If \code{values} is not
##' provided, the default list will be used: \code{"No"="Yes", "No/unknown" =
##' "Yes", "no/unknown" = "Yes", "Non-user" = "User", "Never" = "Ever", "WT" =
##' "MT"}.
##'
##' @param x non-numeric input vector
##' @param values Optional named vector of user-defined values for binary values
##'   using \code{binary_label_1 = binary_label_2} syntax (e.g. \code{c("No" =
##'   "Yes")} would assign level 1 to "No" and 2 to "Yes").
##' @param numeric_out If true, numeric vector is returned. If false, factor is
##'   returned.
##' @return Factor with false finding encoded as 1 and true finding encoded as
##'   2. Alternatively, numeric vector if \code{numeric_out} parameter is
##'   \code{TRUE}.
##' @importFrom forcats fct_relevel
encode_bin_cat_vec <- function(x, values = NULL, numeric_out = FALSE) {
  if(is.factor(x)){ x <- as.character(x)} else{x} # convert factor inputs to character

  # if(length(unique(x)) != 2)
  #   stop("Input vector should have 2 unique values", call. = FALSE)

  # default values to clean
  # first is negative finding, second is positive finding
  default_values <- c("No"="Yes", "No/unknown" = "Yes", "no/unknown" = "Yes",
                      "Non-user" = "User", "Never" = "Ever", "WT" = "MT")

  # use default values if no others are supplied
  if (is.null(values)) {values <- default_values} else{values}

  for (i in 1:length(values)) {
      if (all(unique(x) %in% c(names(values)[[i]],values[[i]], NA))) {
        out <- forcats::fct_relevel(factor(x), c(names(values)[[i]],values[[i]]))
      if (numeric_out) {out <- sapply(out, function(x) as.double(x)-1)}
        return(out)
      }
  }

   return(x)

}

#' Encode categorical variables as binary factors
#' 
#' In a data frame, converts binary categories to factors. Ordering of levels is
#' standardised to: \code{negative_finding, positive_finding}. This embeds a
#' standardised numeric relationship between the binary categories while
#' preserving value labels.
#' 
#' Binary categories to convert can be specified with a named character vector,
#' specified in \code{values}. The syntax of the named vector is:
#' \code{negative_finding = positive_finding}. If \code{values} is not
#' provided, the default list will be used: \code{"No"="Yes", "No/unknown" =
#' "Yes", "no/unknown" = "Yes", "Non-user" = "User", "Never" = "Ever", "WT" =
#' "MT"}.
#'
#' @inheritParams encode_bin_cat_vec
#' @inheritParams nums_to_NA
#' @inherit nums_to_NA
#'
#' @return dataset with specified binary categories converted to factors.
#' @export
#'
#' @examples
#' # use built-in values. Note: rural_urban is not modified
#' # Note: diabetes is not modified because "missing" is interpreted as a third category.
#' # strings_to_NA() should be applied first
#' encode_binary_cats(example_data, hypertension, rural_urban)
#'
#' # use custom values. Note: rural_urban is now modified as well.
#' encoded_data <- encode_binary_cats(example_data, hypertension, rural_urban,
#'                    values = c("No"= "Yes", "rural" = "urban"))
#' 
#' # to demonstrate the new numeric encoding:
#' dplyr::mutate(encoded_data, hypertension_num = as.numeric(hypertension), .keep = "used") 

encode_binary_cats <- function(data, ..., values = NULL) {
  warn_missing_dots(missing(...))
  vars <- dplyr::enquos(...)

  data %>%
    dplyr::mutate(dplyr::across(c(!!! vars),
                                function(x) encode_bin_cat_vec(x, values = values)))
}

##' Encode ordinal variables
##'
##' Converts character or factor variables in the input data frame to ordered factors
##' embedding numeric relationship between values while preserving value labels.
##' 
##' @param ord_levels character vector containing values in desired order
##'   (lowest to highest).
##' @param strict_levels logical constant. If \code{TRUE}, variables in
##'   \code{...} which contain values other than \code{ord_levels} (including
##'   \code{NA}) are not modified and a warning is given. If \code{FALSE},
##'   values not in \code{ord_levels} are converted to \code{NA}.
##' @inheritParams nums_to_NA
##' @return dataframe with specified variables encoded as ordered factors.
##' @importFrom dplyr mutate across enquos summarise everything as_label
##' @importFrom tidyr pivot_longer
##' @importFrom rlang .data
##' @export
##' @examples
##' data(example_data)
##' require(dplyr)
##' require(magrittr)
##' encode_ordinals(example_data, ord_levels = c("N0","N1","N2"), n_stage)
##'
##' # Note: "unequivocal" is present in  t_stage but not in `ord_levels`.
##' # with `strict_levels` TRUE, t_stage is unmodified and a warning message is given:
##' 
##' encode_ordinals(example_data,
##'    ord_levels = c("T1","T2","T3a", "T3b", "T4"), strict_levels = TRUE, t_stage) %>%
##'    select(t_stage)
##'    
##' # with `strict_levels` FALSE, it is replaced with NA:
##' 
##' encode_ordinals(example_data,
##'    ord_levels = c("T1","T2","T3a", "T3b", "T4"), strict_levels = FALSE, t_stage) %>%
##'    select(t_stage)
encode_ordinals <- function(data, ord_levels, ..., strict_levels = TRUE) {

  if (missing(...)) {
    warn_missing_dots(TRUE)
    return(invisible(data))
  } else {
    vars <- dplyr::enquos(...)

    #check for missing levels
    data %>%
      dplyr::select(!!! vars) %>%
      dplyr::summarise(dplyr::across(everything(), ~!.x %in% ord_levels)) %>%
      # sum number of missing levels
      dplyr::summarise(dplyr::across(c(!!! vars), sum)) %>%
      tidyr::pivot_longer(dplyr::everything()) %>%
      # filter variables with missing levels
      dplyr::filter(.data$value > 0) ->
      missing_levels
    
    # warn if any missing levels
    if(nrow(missing_levels) > 0 & strict_levels) {
      warning("When parameter `strict_levels` is TRUE, all levels must be present in `ord_levels` for variables specified with `...`.\nThe following variables have not been modified:\n",
              paste0("\u2716 `", missing_levels$name, "` contains ", missing_levels$value, " problematic values.\n"),
              "\n\u2139 Ensure missing values (i.e. `NA`) have been included in `ord_levels` if it should be considered.",  call. = FALSE)
      
      # remove variables containing missing levels from `vars` and don't modify
      vars <- vars[-which(sapply(vars, function(x) dplyr::as_label(x) %in% missing_levels$name))]
    } else {}
    
    data %>%
      dplyr::mutate(dplyr::across(c(!!! vars),
                                  ~ factor(as.character(.x),
                                           levels = ord_levels,
                                           ordered=T)
                                  )
                    )
  }
}


##' Encode a genotype/SNP vector
##'
#' Standardises homozygous SNP alleles (e.g. recorded as 'A') to two character
#' form (e.g. 'A/A') and orders heterozygous SNP alleles alphabetically (e.g.
#' "GA" becomes "A/G"). The SNP values are then converted from a character
#' vector to an ordered factor, ordered by SNP allele frequency (e.g. most
#' frequent SNP allele is 1, second most frequent value is 2, and least frequent
#' values is 3). This method embeds the numeric relationship between the SNP
#' allele frequencies while preserving value labels.
##'
##' @param x input vector containing genotype data
##' @return Ordered factor, ordered by allele frequency in variable
##' @importFrom magrittr %>%
##' @importFrom dplyr recode
##' @importFrom forcats fct_infreq
encode_genotype_vec <- function(x) {
  
  # make uppercase
  x <- toupper(x)
  
  # replace single values for homozygous with double
  x <- sapply(as.character(x),
              function(x) ifelse(nchar(x) == 1, paste0(x, x), x)
              )
  # order SNP allele pairs alphabettically

  x <- sapply(as.character(x),
              function(x){ ifelse(is.na(x),
                                 NA,
                                  paste0(sort(c(substr(x, 1, 1), substr(x, 2, 2))),
                                  collapse = "")) }
              )
  
  # Add "/" between each SNP's alleles
  x <- sapply(as.character(x),
              function(x){ ifelse(is.na(x),
                                 NA,
                                 paste0(substr(x, 1, 1), "/", substr(x, 2, 2))) }
              )
  
  # order SNP alleles by cohort frequency (in x)
  unname(forcats::fct_infreq(x, ordered = TRUE))
}

#' Encode genotype/SNP variables in data frame
#'
#' Standardises homozygous SNPs (e.g. recorded as "A") to two character
#' form (e.g. "A/A") and orders heterozygous SNPs alphabetically (e.g.
#' "GA" becomes "A/G"). The SNP values are then converted from a character
#' vector to an ordered factor, ordered by observed allele frequency (in the supplied cohort). The most
#' frequent allele is assigned level 1, the second most frequent value is assigned level 2, and the least frequent
#' values is assigned level 3). This method embeds the numeric relationship between the
#' allele frequencies while preserving value labels.
#'
#' @inheritParams nums_to_NA
#' @return `data` with variables (\code{...}) encoded as standardised genotypes
#' @export
#'
#' @examples
#' data(example_data)
#' require(dplyr)
#' require(magrittr)
#' 
#' # one variable
#' encode_genotypes(example_data, SNP_a) %>%
#' select(SNP_a)
#' 
#' # multiple variables
#' encode_genotypes(example_data, SNP_a, SNP_b) %>%
#' select(SNP_a, SNP_b)
#' 
#' # using tidyselect helpers
#' encode_genotypes(example_data, dplyr::starts_with("SNP")) %>%
#' select(starts_with("SNP"))
#' 
encode_genotypes <- function(data, ...) {
  warn_missing_dots(test = missing(...))

  vars <- dplyr::enquos(...)
  data %>%
    dplyr::mutate(dplyr::across(c(!!! vars), encode_genotype_vec))
}


#' One hot encode a vector
#' 
#' Uses one-hot encoding to convert nominal vectors to a tibble containing
#' variables for each of the unique values in input vector.
#'
#' @param x non-numeric vector
#' @param prefix prefix to append to output variable names
#' @importFrom dplyr as_tibble group_by add_tally ungroup select
#' @importFrom rlang .data
#' @importFrom tidyr pivot_wider
#' @importFrom  tibble rownames_to_column
#' @return tibble
onehot_vec <- function(x, prefix) {
  prefix <- ifelse(missing(prefix),deparse(substitute(x)),prefix)

  x %>% 
    dplyr::as_tibble() %>%
    tibble::rownames_to_column() %>%
    dplyr::group_by(.data$rowname) %>%
    dplyr::add_tally() %>%
    dplyr::ungroup() %>%
    tidyr::pivot_wider(names_from = .data$value,
                       values_from = n,
                       values_fill = 0,
                       names_prefix = paste0(prefix,"_"),
                       names_repair = "universal") %>%
    suppressMessages() %>%
    dplyr::select(-"rowname")
}


##' Encode categorical variables using one-hot encoding.
##'
##' Variables specified in \code{...} are replaced with new variables describing
##' the presence of each unique category. Generated variable names have space
##' characters replaced with "_" and commas are removed.
##'
##' @inheritParams nums_to_NA
##' @inheritParams encode_as_num_mat
##'
##' @return Tibble with converted variables.
##' @importFrom magrittr %>%
##' @importFrom rlang .data
##' @importFrom dplyr enquo enquos mutate ungroup group_by all_of select n_distinct pull filter
##' @importFrom tibble add_column
##' @importFrom tidyr pivot_longer pivot_wider
##' @export
##' @examples
##' require(magrittr)
##' require(dplyr)
##' 
##' data(example_data)
##' 
##' # encode one variable
##' encode_cats(example_data, marital_status) %>%
##' select(starts_with("marital_status"))
##' 
##' # encode multiple variables
##' encoded <- encode_cats(example_data, diabetes, marital_status)
##' 
##' select(encoded, starts_with("marital_status"))
##' # diabetes_type included below but was not modified:
##' select(encoded, starts_with("diabetes")) 
encode_cats <- function(data, ...) {
  if (missing(...)) {
    warn_missing_dots(TRUE)
    return(invisible(data))
  } else {
    # pivot wont accept a direct unquoting so is done here
    vars <- names(dplyr::select(data, !!! dplyr::enquos(...) ))
  }
  
  # warn of and ignore variables with only two or fewer categories, excluding NA. they
  # don't benefit from this function's encoding and would be better encoded
  # using encode_binary_cats()
  data %>%
    # count distinct values
    dplyr::summarise(dplyr::across(dplyr::all_of(vars), dplyr::n_distinct, na.rm = T)) %>%
    # are there more than 2 distinct values (per variable)?
    dplyr::summarise(dplyr::across(dplyr::all_of(vars), ~ .x <= 2)) %>%
    pivot_longer(dplyr::everything(), names_to = "var", values_to = "lessthaneq_2_unique_vals") %>%
    dplyr::filter(.data$lessthaneq_2_unique_vals) %>%
    dplyr::pull(.data$var) ->
    unchanged_vars
    
  if(length(unchanged_vars) > 0) {
    # remove vars with 2 or fewer distinct non-missing values
    vars <- vars[which(!vars %in% unchanged_vars)]
    # warn user
    warning("The following variables specified in `...`, contain 2 or fewer distinct non-missing values:\n- ",
            paste0(unchanged_vars, collapse = "\n- "), "\nThese variables may be more suitable for other functionality such as eHDPrep::encode_binary_cats(). They will not be modified here.", call. = FALSE)
  } else {}
  
  # one hot encoding
  data %>%
    dplyr::mutate(.OHE_n = 1) %>%
    tidyr::pivot_longer(cols = dplyr::all_of(vars),
                        values_to = ".OHE_value",
                        names_to = ".OHE_name") %>%
    tidyr::pivot_wider(names_from = c(.data$.OHE_name,
                                      .data$.OHE_value),
                       values_from = .data$.OHE_n,
                       values_fill = 0,
                       names_sort = TRUE,
                       names_repair = "universal")
}

##' Convert data frame to numeric matrix
##'
##' Converts all columns to numeric and uses the row identifier column
##' (\code{id_var}) as row names.
##'
##' @param id_var An unquoted  expression which corresponds to a variable in
##'   \code{data} which identifies each row.
##' @inheritParams nums_to_NA
##' @return Numeric matrix with \code{id_var} values as row names
##' @importFrom magrittr %>%
##' @importFrom dplyr mutate enquo as_label across
##' @importFrom tibble column_to_rownames
##' @export
##' @examples
##' require(dplyr)
##' require(magrittr)
##' mtcars %>%
##'   dplyr::as_tibble(rownames = "id") %>%
##'   encode_as_num_mat(id)
encode_as_num_mat <- function(data, id_var) {
  # `id_var` presence check
  if(missing(id_var)) {
    stop("`id_var` must be supplied.", call. = FALSE)
    } else {id_var <- enquo(id_var)}

  tryCatch({
    data %>%
      tibble::column_to_rownames(dplyr::as_label(id_var)) %>%
      dplyr::mutate(dplyr::across(dplyr::everything(), as.numeric))
  }, warning = function(w) {
    stop("There was a problem with converting `data` to a numeric matrix. 
         This is most likely because some columns were not prepared correctly. 
         Please see the vignette for information on how to process the dataset for 
         full numeric conversion: `vignette('Introduction_to_eHDPrep', package = 'eHDPrep')`
         ", call. = FALSE)
    return(NULL)
  }
  )
    
}

#' Missing dots warning
#' 
#' Internal function. Warns if dots (...) argument have not been supplied
#' @param test expression to test.
#'
#' @return warning to user that no values were modified
warn_missing_dots <-function(test) {
  if(test) {
    warning("No argument was given for `...` so no values were modified", call. = FALSE)
  } else {}
}

#' Extract labels and levels of ordinal variables in a dataset
#'
#' This function enables preservation of the text labels for ordinal variables in
#'  a dataset in preparation for conversion to a numeric matrix. A table is produced
#'  which retains the mappings between the text labels and the
#' numerical labels for future reference.
#'
#' @param data data frame with ordinal variables with labels and levels to be
#'   extracted.
#' @param out_path Optional string. Path to write output to. If not supplied, \R
#'   object will be returned.
#'
#' @return Tibble of text label and (numerical) level mappings
#' @importFrom rlang .data
#' @export
#'
#' @examples
#' require(magrittr)  # for %>%
#' 
#' # create an example class_tbl object
#' # note that diabetes_type is classed as ordinal yet is not modified as its
#' # levels are not pre-coded. It should instead be encoded with encode_ordinals().
#' tibble::tribble(~"var", ~"datatype",
#' "patient_id", "id",
#' "tumoursize", "numeric",
#' "t_stage", "ordinal_tstage",
#' "n_stage", "ordinal_nstage",
#' "diabetes", "factor",
#' "diabetes_type", "ordinal",
#' "hypertension", "factor",
#' "rural_urban", "factor",
#' "marital_status", "factor",
#' "SNP_a", "genotype",
#' "SNP_b", "genotype",
#' "free_text", "freetext") -> data_types
#' 
#' # show unqiue values for t_stage in pre-QC example_data 
#' unique(example_data$t_stage)
#' 
#' # apply quality control to example_data
#' apply_quality_ctrl(example_data, patient_id, data_types,
#' bin_cats =c("No" = "Yes", "rural" = "urban"),  min_freq = 0.6) %>%
#' ordinal_label_levels -> res
#' 
#' # examine the labels and levels of t_stage in post-QC example_data
#' dplyr::filter(res, variable == "t_stage")
#' 
ordinal_label_levels <- function(data, out_path = NULL) {
  
  data %>%
    dplyr::select(where(is.ordered)) ->
    data_ord
  
  data_ord %>%
    dplyr::mutate(dplyr::across(dplyr::everything(), as.character)) %>%
    tidyr::pivot_longer(everything(), names_to = "variable", values_to = "label") ->
    labels
   
  data_ord %>%
    dplyr::mutate(dplyr::across(dplyr::everything(), as.numeric)) %>%
    tidyr::pivot_longer(dplyr::everything(), names_to = "variable", values_to = "level") %>%
    dplyr::select("level") ->
    levels 
  
  dplyr::bind_cols(labels, levels) %>%
    dplyr::distinct(dplyr::across(dplyr::everything())) %>%
    dplyr::arrange(.data$variable, .data$level) ->
    out
  
  # optionally return output
  if(!is.null(out_path)) {
    readr::write_csv(x = out, file = out_path)
  } else {return(out)}

}
Any scripts or data that you put into this service are public.
eHDPrep documentation built on Sept. 9, 2025, 5:56 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
eHDPrep
Quality Control and Semantic Enrichment of Datasets

R/QC_clean.R
In eHDPrep: Quality Control and Semantic Enrichment of Datasets

Defines functions ordinal_label_levels warn_missing_dots encode_as_num_mat encode_cats onehot_vec encode_genotypes encode_genotype_vec encode_ordinals encode_binary_cats encode_bin_cat_vec strings_to_NA NA_clean

Documented in encode_as_num_mat encode_binary_cats encode_bin_cat_vec encode_cats encode_genotypes encode_genotype_vec encode_ordinals onehot_vec ordinal_label_levels strings_to_NA warn_missing_dots

Try the eHDPrep package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

eHDPrep Quality Control and Semantic Enrichment of Datasets

R/QC_clean.R In eHDPrep: Quality Control and Semantic Enrichment of Datasets

Defines functions ordinal_label_levels warn_missing_dots encode_as_num_mat encode_cats onehot_vec encode_genotypes encode_genotype_vec encode_ordinals encode_binary_cats encode_bin_cat_vec strings_to_NA NA_clean

Documented in encode_as_num_mat encode_binary_cats encode_bin_cat_vec encode_cats encode_genotypes encode_genotype_vec encode_ordinals onehot_vec ordinal_label_levels strings_to_NA warn_missing_dots

Try the eHDPrep package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

eHDPrep
Quality Control and Semantic Enrichment of Datasets

R/QC_clean.R
In eHDPrep: Quality Control and Semantic Enrichment of Datasets