This package was created to provide analysts with functions that are useful relative to the data within the enterpise.

changeclass

Dummycode

kfold

imputer

preprocess

SummaryStats

StaticMissingVars

parallelApply

trim

require(MASS)
for (f in list.files("F:\\R_Project_wec-preprocess\\R\\") ) {
    if(!f %in% c('Model Wrapper.r')){
    source(paste("F:\\R_Project_wec-preprocess\\R\\",f,sep=''))
    }
}

changeclass

This function was designed to convert variables to their most likely column class.

df = data.frame(point1 = rnorm(1000,1,0),point2 = rnorm(1000,1,100),point3 = rnorm(1000,1,100),point4 = rnorm(1000,1,100))
df$point1 = as.character(df$point1)
df$point2 = as.numeric(df$point2)
df$point3 = as.factor(df$point3)
df$point4 = as.character(df$point4)
str(df)
head(df)

Now we can call our changeclass function

df <- changeclass(df)
str(df)
head(df)

If we have less than 100 rows the function will exit

df = data.frame(point1 = rnorm(31,1,0),point2 = rnorm(31,1,100),point3 = rnorm(31,1,100),point4 = rnorm(31,1,100))
df <- changeclass(df)

Dummycode

df = DummyCode(mtcars,c('carb','am'),F)
head(df)
df = DummyCode(mtcars,c('carb','am'),T)
head(df)

kfold

iris = suppressMessages(kfold(iris,3))
table(iris$Species,iris$folds)

imputer

df = Boston
df[,1][c(1,6,8,12,66,77,88,101,303)] <- NA

imputed.df <- impute(df,1,F,F,T)
imputed.df <- impute(df,1,F,T,F)
imputed.df <- impute(df,1,T,F,F)

SummaryStats

data <- suppressMessages(df_stats(Boston))
data 

StaticMissingVars

df <- data.frame(ID = 1:100,X = rnorm(100),X2 = rnorm(100),X3 = rnorm(100),D3 = 2,Y = rnorm(100),Output = 1,Volume=4)
df[,2][c(1:75)] <- NA
df[,7][c(1:96)] <- 3
df[,5][c(1:85)] <- 3
df <- Static_Missing_Vars(df) 

parallelApply

p.func <- function(x){
  if(x > 2){"big"
  }else if(x == 1){"Thats random"
  }else{"Hello"}
}

df <- data.frame(id = rnorm(10000))
df <- suppressMessages(parallelApply(df,1,p.func,1))
table(df$ParRow)

Example with two parameters

m.func <- function(x,y){
  if(x > 20 & y > 200){"big"
  }else if(x < 20 & y > 188){"Thats random"
  }else{"Hello"}
}

df <- data.frame(id = rnorm(10000,mean = 18,sd = 10),x =rnorm(10000), y = rnorm(10000,mean = 200,sd = 60))
df <- suppressMessages(parallelApply(df,c(1,3),m.func,2))
table(df$ParRow)

trim

sentence <- "It    is a beautiful    day, let us go to the        park for a    picinic "
trim(sentence)


moone009/wec-preprocess documentation built on May 23, 2019, 6:10 a.m.