R/hcv_utils.R

Defines functions merge_hcv_genotype

Documented in merge_hcv_genotype

#' HCV genotype cleaner
#'
#' @param x data frame with specimen specific HCV genotype data
#' recorded as a character string
#' @param drop_levels flag indicating whether to drop empty genotype
#' levels - defaults to TRUE
#'
#' @importFrom assertthat assert_that 
#' @importFrom stringr str_extract str_detect

merge_hcv_genotype <- function(x,
                               drop_levels = TRUE) {
  
  # check args
  assertthat::assert_that(is.data.frame(x),
                          is.character(x$hcv_genotype),
                          is.character(x$hcv_genotype_sub_type),
                          is.logical(drop_levels))
  
  # extract relevent genotype data
  x$geno_temp <- stringr::str_extract(x$hcv_genotype, 
                                      "\\d{1}$")
  
  x$sub_temp <- stringr::str_extract(x$hcv_genotype_sub_type, 
                                     "\\b\\w{1}$")
 
  # concat data
  x$hcv_full_genotype <- ifelse(is.na(x$sub_temp), 
                            paste0("Genotype ", x$geno_temp),
                            paste0("Genotype ", 
                            x$geno_temp,
                            x$sub_temp)) 
  
  # convert missing data to NA
  x$hcv_full_genotype[stringr::str_detect(x$hcv_full_genotype,
                                      "NA")] <- NA_character_
  
  # define all cominbations of genotype and sub_genotype for factor levels
  sub_string <- c("", letters[1:5])
  gen_levels <- paste0("Genotype ", 
                       rep(1:6, each = length(sub_string)), 
                       sub_string)
  
  # convert output to factor
  x$hcv_full_genotype <- factor(x$hcv_full_genotype, 
                                levels = gen_levels)
  
  if (drop_levels) {
    x$hcv_full_genotype <- droplevels(x$hcv_full_genotype)  
  }
  
  # remove temporary variables
  x$geno_temp <- NULL
  x$sub_temp <- NULL
  
x   
}
JayAchar/hisreportr documentation built on March 18, 2020, 5:57 a.m.