R/clean_init_num.R

# ---------------------Below: generic function getInitNum-----------------------
#' Get Initial Numeric Part from An Object
#'
#' Get initial numeric part from an object regardless vector, matrix or data
#' frame. Elements beginning with numeric part will be extracted, else will be
#' dropped.
#'
#' @author Yiying Wang, \email{wangy@@aetna.com}
#' @param x Object. vector, matrix or data frame.
#' @param ... Further arguments to be passed to the next method.
#'
#' @return An object of the same structure with \code{x}
#' @export
#'
#' @examples
#' \dontrun{
#' getInitNum(c("33d", "3.2%", "2d2", "d", "$43", "\uFFE543"))
#' # return c(33.000, 0.032, 2.000, NA, 43.000, 43.000)
#' }
getInitNum <- function(x, ...) {
    UseMethod(".getInitNum", x)
}

#' @export
.getInitNum.default <- function(x, ...){
    if (!is.null(dim(x))) {
        out <- .convVec2Num(as.character(x))
    }else{
        out <- sapply(x, cmpfun(.convVec2Num))
    }
    attributes(out) <- attributes(x)
    return(out)
}

#' @export
.getInitNum.vector <- function(x, ...){
    out <- .convVec2Num(as.character(x))
    return(out)
}

#' @export
.getInitNum.character <- .getInitNum.vector

#' @export
.getInitNum.integer <- .getInitNum.vector

#' @export
.getInitNum.numeric <- .getInitNum.vector
    
#' @export	
.getInitNum.double <- .getInitNum.vector

#' @export
.getInitNum.list <- function(x, ...){
    out <- lapply(x, .convVec2Num)
    if(!is.null(names(x))) names(out) <- names(x)
    return(out)
}

#' @export
.getInitNum.matrix <- function(x, ...){
    dim <- dim(x)
    out <- matrix(.convVec2Num(x), nrow=dim[1])
    return(out)
}

#' @export
.getInitNum.data.frame <- function(x, ...){
    dim <- dim(x)
    vname <- names(x)
    out <- matrix(.convVec2Num(x), nrow=dim[1])
    out <- as.data.frame(out)
    names(out) <- vname
    return(out)
}

#' @importFrom stringr str_detect str_replace_all str_replace
.convVec2Num <- function(x){
    # Convert a vector to numeric
    # This is the base function for methods("getInitNum")
    # Args:
    #   x: a vector
    # Return:
    #   A vector extracting the initial numeric part
    x <- lapply(x, as.character)
    x <- unlist(x)
    v <- vector(length=length(x))
    v[! str_detect(x, "^[$\uffe5\u20AC]*(\\d|-\\d)")] <- NA
    x <- str_replace_all(x, ",", "")
    pattern <- "^([$\uffe5\u20AC]* *)(-* *)(\\d+|\\d+\\.\\d*)(%|e-*\\d+)*.*$"
    v[str_detect(x, "^[$\uffe5\u20AC]*(\\d|-\\d)")] <-
        str_replace_all(x[str_detect(x, "^[$\uffe5\u20AC]*(\\d|-\\d)")],
                        pattern, "\\2\\3\\4")
    v <- str_replace_all(v, "%$","e-2")
    return(as.numeric(v))
}
madlogos/aseshms documentation built on May 21, 2019, 11:03 a.m.