#' runMopper
#' @name runMopper
#' @description runMopper
#' @param s string or dataframe
#' @param pos string, "first" or "last"
#' @return string
#' @export
#' @examples \dontrun{
#' }
runMopper <- function(dp, cleanId, opts, colIds = NULL, appendCol = FALSE){
availableCleanerDp1 <- c("mopWord","mopSplitFixedPattern","mopWhiteSpace","mopStrChop","mopAccents","mopDates")
availableCleanerDp2 <- c("mopDictionaryMatch")
if(cleanId %in% availableCleanerDp1){
dp <- getDpSelection(dp, cols = colIds)
df <- getDataframe(dp)
# opts <- list(pos="first", tro = "fda")
cleanIdOptNames <- getCleanIdOpts(cleanId)
ids <- sapply(names(opts), function(s){s %in% cleanIdOptNames})
opts <- opts[ids]
params <- c(list(df), opts, cols = colIds)
#cleanId <- "mopWord"
dfout <- do.call(cleanId, params)
out <- newDatapkg(dfout, name = "output")
} else{
stop("no clean id found")
}
out
}
getCleanIdOpts <- function(cleanId){
out <- c()
if (cleanId == "mopWord") out <- "pos"
if (cleanId == "mopSplitFixedPattern") out <- c("pattern","splitLength")
if (cleanId == "mopWhiteSpace") out <- out <- c()
if (cleanId == "mopStrChop") out <- c("start","end")
if (cleanId == "mopAccents") out <- c()
out
}
#' mopWord
#' @name mopWord
#' @description mopWord
#' @param s string or dataframe
#' @param pos string, "first" or "last"
#' @return string
#' @export
#' @examples \dontrun{
#' }
mopWord <- function(s, pos ="first",cols=NULL){
getWord <- function(pos){
#str <- "Caserío La Mesa-Veredas La Mesa, La Danta y Mulatos"
#stringi:::stri_extract_last(str, regex="\\w+")
##stringi:::stri_extract_last(s, regex="[:alnum:]+$")
#str_extract(str, '\\w+$')
function(str){
if(pos=="first"){
out <- word(str,1)
}
else if(pos=="last"){
out <- word(str,-1)
}
# else if(!pos %in% c("first","last") ){
# stop("Specify pos='first' or pos='last'")
# }
out
}
}
f <- getWord(pos = pos)
if(class(s) =="factor") s <- as.character(s)
if(class(s) == "character"){
out <- f(s)
}
if("data.frame" %in% class(s)){
colNums <- match(cols,names(s)) %||% 1:ncol(s)
df <- as.data.frame(s[,colNums])
names(df) <- names(s)[colNums]
out <- as.data.frame(sapply(df, f))
# df <- tbl_df(s)
# #cols <- c("fuente","implicado")
# colNums <- match(cols,names(df)) %||% 1:ncol(df)
# out <- df %>%
# select(colNums) %>%
# rowwise() %>%
# mutate_each(funs(f))
}
out
}
#' mopSplitFixedPattern
#' @name mopSplitFixedPattern
#' @description mopSplitFixedPattern
#' @param string
#' @return string
#' @export
#' @examples \dontrun{
#' }
mopSplitFixedPattern <- function(s, pattern, splitLength = 2, cols=NULL){
stringSplitFun <- function(pattern, splitLength){
function(str){
as.data.frame(stringr::str_split_fixed(str,pattern,splitLength))
}
}
f <- stringSplitFun(pattern = pattern, splitLength = splitLength)
#library(pryr)
#unenclose(f)
if(class(s) =="factor") s <- as.character(s)
if(class(s)=="character"){
out <- f(s)
names(out) <- paste("COL",1:splitLength, sep=".")
}
if("data.frame" %in% class(s)){
colNums <- match(cols,names(s)) %||% 1:ncol(s)
if(length(colNums)==1){
out <- f(s[,colNums])
names(out) <- paste(names(s)[colNums],1:splitLength, sep=".")
} else{
df <- as.data.frame(s[,colNums])
names(df) <- names(s)[colNums]
l <- lapply(df, f)
out <- do.call(cbind,l)
}
}
out
}
#' mopWhiteSpace
#' @name mopWhiteSpace
#' @description mopWhiteSpace
#' @param string
#' @return string
#' @export
#' @examples \dontrun{
#' }
mopWhiteSpace <- function(s, cols=NULL){
trimWhite <- function(str){
#str <- " Caserío La Mesa-Veredas La Mesa, La Danta y Mulatos "
stringr:::str_trim(str)
}
f <- trimWhite
if(class(s) =="factor") s <- as.character(s)
if(class(s)=="character"){
out <- f(s)
}
if("data.frame" %in% class(s)){
colNums <- match(cols,names(s)) %||% 1:ncol(s)
df <- as.data.frame(s[,colNums])
names(df) <- names(s)[colNums]
out <- as.data.frame(sapply(df, f))
#df <- tbl_df(s)
#cols <- c("fuente","implicado")
#colNums <- match(cols,names(df)) %||% 1:ncol(df)
#out <- df %>%
# select(colNums) %>%
# rowwise() %>%
# mutate_each(funs(f))
}
out
}
#' mopStrChop
#' @name mopStrChop
#' @description mopStrChop
#' @param string
#' @return string
#' @export
#' @examples \dontrun{
#' }
mopStrChop <- function(s, start = 1, end = 2, cols=NULL){
strChop <- function(start,end){
function(str){
#str <- "Caserío La Mesa-Veredas La Mesa, La Danta y Mulatos "
substr(str, start = start, stop = end)
}
}
f <- strChop(start = start, end = end)
if(class(s) =="factor") s <- as.character(s)
if(class(s)=="character"){
out <- f(s)
}
if("data.frame" %in% class(s)){
colNums <- match(cols,names(s)) %||% 1:ncol(s)
df <- as.data.frame(s[,colNums])
names(df) <- names(s)[colNums]
out <- as.data.frame(sapply(df, f))
#df <- tbl_df(s)
#cols <- c("fuente","implicado")
#colNums <- match(cols,names(df)) %||% 1:ncol(df)
#out <- df %>%
# select(colNums) %>%
# rowwise() %>%
# mutate_each(funs(f))
}
out
}
#' mopAccents
#' @name mopAccents
#' @description mopAccents
#' @param string
#' @return string
#' @export
#' @examples \dontrun{
#' }
mopAccents <- function(s, cols=NULL){
f <- removeAccents
if(class(s) =="factor") s <- as.character(s)
if(class(s)=="character"){
out <- f(s)
}
if("data.frame" %in% class(s)){
colNums <- match(cols,names(s)) %||% 1:ncol(s)
df <- as.data.frame(s[,colNums])
names(df) <- names(s)[colNums]
out <- as.data.frame(sapply(df, f))
#df <- tbl_df(s)
##cols <- c("fuente","implicado")
#colNums <- match(cols,names(df)) %||% 1:ncol(df)
#out <- df %>%
# select(colNums) %>%
# rowwise() %>%
# mutate_each(funs(f))
}
out
}
#' mopDictionaryMatch
#' @name mopDictionaryMatch
#' @description mopDictionaryMatch
#' @param string
#' @return string
#' @export
#' @examples \dontrun{
#' }
mopDictionaryMatch <- function(s, dict, cols=NULL){
dicMatch <- function(dict){
function(str){
#str <- "Caserío La Mesa-Veredas La Mesa, La Danta y Mulatos "
dictionaryMatch(str,dict)
}
}
f <- dicMatch(dict)
if(class(s) =="factor") s <- as.character(s)
if(class(s)=="character"){
out <- f(s)
}
if("data.frame" %in% class(s)){
colNums <- match(cols,names(s)) %||% 1:ncol(s)
df <- as.data.frame(s[,colNums])
names(df) <- names(s)[colNums]
out <- as.data.frame(sapply(df, f))
#dd <- tbl_df(s)
##cols <- c("fuente","implicado")
#colNums <- match(cols,names(dd)) %||% 1:ncol(df)
#out <- dd %>%
# select(colNums) %>%
# rowwise() %>%
# mutate_each(funs(f))
}
out
}
#' mopDates
#' @name mopDates
#' @description mopDates
#' @param string
#' @return string
#' @export
#' @examples \dontrun{
#' }
mopDates <- function(s, from, to = NULL, cols=NULL){
transDate <- function(from, to){
function(str){
transformDate(str,from, to)
}
}
f <- transDate(from = from, to = to)
#unenclose(f)
if(class(s) =="factor") s <- as.character(s)
if(class(s)=="character"){
out <- f(s)
}
if("data.frame" %in% class(s)){
colNums <- match(cols,names(s)) %||% 1:ncol(s)
df <- as.data.frame(s[,colNums])
names(df) <- names(s)[colNums]
out <- as.data.frame(sapply(df, f))
#dd <- tbl_df(s)
##cols <- c("fuente","implicado")
#colNums <- match(cols,names(dd)) %||% 1:ncol(df)
#out <- dd %>%
# select(colNums) %>%
# rowwise() %>%
# mutate_each(funs(f))
}
out
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.