R/find_pattern.R

Defines functions find_pattern find_tailspace find_multiplespace find_leadspace

Documented in find_leadspace find_multiplespace find_pattern find_tailspace

#!/usr/bin/Rscript
# -*- coding: utf-8 -*-

################################ Description ###################################
# Title: Function definitions for pattern finding
# Purpose: These function find simple patterns in a common way. They use the find_pattern function to have the same output.
# Created the 2014-12-29 
# by Joris Muller <joris.muller@jom.link>
# Licence: GPLv3 <http://www.gnu.org/licenses/>
################################################################################

#' @title Find pattern
#' @description Helper function. Find a simple pattern and return it to a standardized way to be use by dfcheck.
#' @param pattern character The pattern to test
#' @param x character A character vector to test.
#' @param return_logical logical If \code{TRUE}, return a logical vector. If \code{FALSE} (default), return an integer vector. 
#' @return An integer vector with the index of the elements with the pattern. If \code{return_logical} argument is \code{TRUE}, return a logical vector instead.
#' @author Joris Muller
#' @seealso \code{find_leadspace()}, \code{find_tailspace()}, \code{find_multiplespace()}

find_pattern <- function(pattern, x, return_logical = FALSE) {
    # Simple regex to match with a leading space

    if (return_logical)
        leading_spaces <- grepl(pattern = pattern, x = x)
    else
        leading_spaces <- grep(pattern = pattern, x = x)

    return(leading_spaces)
} # End of function "find_pattern" definition


#' @title Find tailing spaces
#' @description Find tailing spaces in a character vector. It's a common error when data is keyboarded.
#' @param x character A character vector to test.
#' @param return_logical logical If \code{TRUE}, return a logical vector. If \code{FALSE} (default), return an integer vector. 
#' @return An integer vector with the index of the elements with tailing space. If \code{return_logical} argument is \code{TRUE}, return a logical vector instead.
#' @author Joris Muller
#' @seealso \code{find_leadspace()}
#' @export

find_tailspace <- function(x, return_logical = FALSE) {
    find_pattern(" $", x = x, return_logical = return_logical) 
} # End of function "find_tailspace" definition


#' @title Find multiple spaces
#' @description Find multiple spaces in a character vector. It's a common error when data is keyboarded.
#' @param x character A character vector to test.
#' @param return_logical logical If \code{TRUE}, return a logical vector. If \code{FALSE} (default), return an integer vector. 
#' @return An integer vector with the index of the elements with multiple space. If \code{return_logical} argument is \code{TRUE}, return a logical vector instead.
#' @author Joris Muller
#' @export

#TODO: Write test units (by jomuller the 2014-12-30)

find_multiplespace <- function(x, return_logical = FALSE) {
    find_pattern(" {2,}", x = x, return_logical = return_logical) 
} # End of function "find_leadspace" definition


#' @title Find leading spaces
#' @description Find leading spaces in a character vector. It's a common error when data is keyboarded.
#' @param x character A character vector to test.
#' @param return_logical logical If \code{TRUE}, return a logical vector. If \code{FALSE} (default), return an integer vector. 
#' @return An integer vector with the index of the elements with leading space. If \code{return_logical} argument is \code{TRUE}, return a logical vector instead.
#' @author Joris Muller
#' @export

find_leadspace <- function(x, return_logical = FALSE) {
    find_pattern("^ ", x = x, return_logical = return_logical) 
} # End of function "find_leadspace" definition
jomuller/dfcheck documentation built on May 19, 2019, 7:26 p.m.