vignettes/Examples.R

# Example 1 - Predict sales prices

# Setup:
# https://www.kaggle.com/c/house-prices-advanced-regression-techniques
# House Prices: Advanced Regression Techniques
dt_house <- read.csv("./data/data_house.csv")
dt_house <- dt_house[, c("MSSubClass", "MSZoning", "LotArea", "LotShape",
                         "Alley", "LandContour", "LotConfig", "LandSlope",
                         "Neighborhood", "BldgType", "WoodDeckSF", "OpenPorchSF",
                         "HouseStyle", "OverallQual", "OverallCond","SaleType",
                         "SaleCondition", "LotFrontage", "MoSold", "SalePrice")]
dt_house[, "MSSubClass"] <- as.factor(dt_house[, "MSSubClass"])
dt_house[, "MoSold"] <- as.factor(dt_house[, "MoSold"])
dt_house[, "LotArea"] <- as.numeric(dt_house[, "LotArea"])
dt_house[, "LotShape"] <- as.factor(dt_house[, "LotShape"])
dt_house[, "Alley"] <- as.factor(dt_house[, "Alley"])
dt_house[, "LandContour"] <- as.factor(dt_house[, "LandContour"])
dt_house[, "LotConfig"] <- as.factor(dt_house[, "LotConfig"])
dt_house[, "LandSlope"] <- as.factor(dt_house[, "LandSlope"])
dt_house[, "Neighborhood"] <- as.factor(dt_house[, "Neighborhood"])
dt_house[, "BldgType"] <- as.factor(dt_house[, "BldgType"])
dt_house[, "WoodDeckSF"] <- as.numeric(dt_house[, "WoodDeckSF"])
dt_house[, "OpenPorchSF"] <- as.numeric(dt_house[, "OpenPorchSF"])
dt_house[, "HouseStyle"] <- as.factor(dt_house[, "HouseStyle"])
dt_house[, "OverallQual"] <- as.numeric(dt_house[, "OverallQual"])
dt_house[, "OverallCond"] <- as.numeric(dt_house[, "OverallCond"])
dt_house[, "SaleType"] <- as.factor(dt_house[, "SaleType"])
dt_house[, "SaleCondition"] <- as.factor(dt_house[, "SaleCondition"])
dt_house[, "LotFrontage"] <- as.numeric(dt_house[, "LotFrontage"])
dt_house[, "MoSold"] <- as.factor(dt_house[, "MoSold"])
dt_house[, "SalePrice"] <- as.numeric(dt_house[, "SalePrice"])

# Execution
select(dt_house, 'SalePrice', fit_method = 'lm', metric = 'aic')



# Example 2 - Red Wine Quality

# Setup:
# https://www.kaggle.com/uciml/red-wine-quality-cortez-et-al-2009
dt_wine <- read.csv("./data/data_wine.csv")
dt_wine[, "quality"] <- as.numeric(dt_wine[, "quality"])

# Execution:
select(dt_wine, 'quality', fit_method = 'lm', metric = 'aic')



# Example 3 - Life Expectancy (WHO)

# Setup:
# https://www.kaggle.com/kumarajarshi/life-expectancy-who
# Statistical Analysis on factors influencing Life Expectancy
dt_life <- read.csv("./data/data_life.csv")
dt_life[, "Country"] <- as.factor(dt_life[, "Country"])
dt_life[, "Year"] <- as.numeric(dt_life[, "Year"])
dt_life[, "Status"] <- as.factor(dt_life[, "Status"])
dt_life[, "Life.expectancy"] <- as.numeric(dt_life[, "Life.expectancy"])
for(i in 5:dim(dt_life)[2]){
  dt_life[, i] <- as.numeric(dt_life[, i])
}

# Execution:
select(dt_life, 'Life.expectancy', fit_method = 'lm', metric = 'aic')


# Example 4 -  Bike sharing dataset

# Setup:
# Bike sharing dataset
dt_bike <- read.csv("./data/data_bike.csv")
dt_bike[, 'dteday'] <- as.numeric(as.Date(dt_bike[, 'dteday']))
dt_bike[, 'yr'] <- as.factor(dt_bike[, 'yr'])
dt_bike[, 'mnth'] <- as.factor(dt_bike[, 'mnth'])
dt_bike[, 'holiday'] <- as.factor(dt_bike[, 'holiday'])
dt_bike[, 'workingday'] <- as.factor(dt_bike[, 'workingday'])
dt_bike[, 'weathersit'] <- as.factor(dt_bike[, 'weathersit'])
dt_bike$instant <- NULL
dt_bike$registered <- NULL
dt_bike$casual <- NULL

# Execution:
select(dt_bike, 'cnt', fit_method = 'lm', metric = 'aic')


# Example 5

# Setup:
# Basic data set loading and test of function lm()
# Load a build in data set. BostonHousing
library(mlbench)
data(BostonHousing)

# Execution:
select(BostonHousing, 'medv', fit_method = 'lm', metric = 'aic')

score_value <- rep(0,repetition)
score_value[k] <- temp[[2]]
plot(score_value)
jakemanderson/GA documentation built on Jan. 1, 2020, 1:03 p.m.