#' @title Function to clean training and test data
#'
#' @description \code{clean} Function to clean training and test data.
#'
#' @details Function to clean training and test data.
#'
#' @param \code{x} Input dataframe
#'
#' @return Cleaned dataframe
#'
#' @examples
#'
#'\dontrun{
#' library(catR)
#' library(dplyr)
#' library(readr)
#'
#' train <- "train.csv" %>%
#' read_csv %>%
#' clean
#'
#'}
#'
#' @export
clean <- function(
x,
dummy_variables = NULL
) {
out <- tryCatch(
{
x[["Name_length"]] <- nchar(x[["Name"]])
x[["Year"]] <- lubridate::year(x[["DateTime"]])
x[["Month"]] <- lubridate::month(x[["DateTime"]])
x[["Day"]] <- lubridate::day(x[["DateTime"]])
x[["Wday"]] <- lubridate::wday(x[["DateTime"]])
x[["Hour"]] <- lubridate::hour(x[["DateTime"]])
x[["Weekend"]] <- ifelse(x[["Wday"]] %in% c(1,7), 1, 0)
x[["TimeOfDay"]] <- "Unknown"
x[which(x$Hour >= 5 & x$Hour < 12),"TimeOfDay"] <- "Morning"
x[which(x$Hour >= 12 & x$Hour < 13),"TimeOfDay"] <- "Noon"
x[which(x$Hour >= 13 & x$Hour < 18),"TimeOfDay"] <- "Afternoon"
x[which(x$Hour >= 18 & x$Hour < 21),"TimeOfDay"] <- "Evening"
x[which(x$Hour >= 21 & x$Hour <= 23),"TimeOfDay"] <- "Night"
x[which(x$Hour >= 0 & x$Hour < 5),"TimeOfDay"] <- "Night"
x <- tidyr::separate_(x, "SexuponOutcome", c("Fertility","Gender"), fill = "right")
x[["Gender"]] <- ifelse(is.na(x[["Gender"]]),"Unknown", x[["Gender"]])
# Convert age into num of days
x[["Age_days"]] <- sapply(x[["AgeuponOutcome"]], FUN = convert_age)
# Convert
x[is.na(x)] <- 0 # Fill NA with 0
# Simple Breed
x[["IsMix"]] <- 0
x[stringr::str_detect(x$Breed, "Mix"), "IsMix"] <- 1
# Simple Color
x[["Simple_Colour"]] <- sapply(x[["Color"]], function(x) strsplit(x, split = '/| ')[[1]][1])
# Dummy variables
if (!is.null(dummy_variables)) {
x <- dummy_variables %>%
purrr::map(~dummy_vars(train,variable = .x)) %>%
bind_cols %>%
bind_cols(x, .)
}
return(x)
},
warning = function(w) {
w
},
error = function(e) {
e
},
finally = {}
)
return(out)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.