In this example, a generalized boosted model is developed to predict the prices of homes sold in Iowa City, IA during 2005-2008.
library(MachineShop) stats <- list( list("Number of homes" = ~ length(sale_amount)), "sale_amount" = list("Median (Range)" = ~ median_range(sale_amount, "$", big.mark = ",", big.interval = 3)), "sale_year" = list("Median (Range)" = ~ median_range(sale_year)), "sale_month" = list("Median (Range)" = ~ median_range(sale_month)), "built" = list("Median (Range)" = ~ median_range(built)), "style" = list("Home" = ~ n_perc(style == "Home"), "Condo" = ~ n_perc(style == "Condo")), "construction" = list( "1 1/2 Story Frame" = ~ n_perc(construction == "1 1/2 Story Frame"), "1 Story Brick" = ~ n_perc(construction == "1 Story Brick"), "1 Story Condo" = ~ n_perc(construction == "1 Story Condo"), "1 Story Frame" = ~ n_perc(construction == "1 Story Frame"), "2 Story Brick" = ~ n_perc(construction == "2 Story Brick"), "2 Story Condo" = ~ n_perc(construction == "2 Story Condo"), "2 Story Frame" = ~ n_perc(construction == "2 Story Frame"), "Split Foyer Frame" = ~ n_perc(construction == "Split Foyer Frame"), "Split Level Frame" = ~ n_perc(construction == "Split Level Frame")), "base_size (sq ft)" = list("Median (Range)" = ~ median_range(base_size)), "garage1_size (sq ft)" = list("Median (Range)" = ~ median_range(garage1_size)), "garage2_size (sq ft)" = list("Median (Range)" = ~ median_range(garage2_size)), "lot_size (sq ft)" = list("Median (Range)" = ~ median_range(lot_size)), "bedrooms" = list("1-2" = ~ n_perc(bedrooms %in% 1:2), "3" = ~ n_perc(bedrooms == 3), "4" = ~ n_perc(bedrooms == 4), "5+" = ~ n_perc(bedrooms > 4)), "basement" = list("Yes" = ~ n_perc(basement == "Yes"), "No" = ~ n_perc(basement == "No")), "ac" = list("Yes" = ~ n_perc(ac == "Yes"), "No" = ~ n_perc(ac == "No")), "attic" = list("Yes" = ~ n_perc(attic == "Yes"), "No" = ~ n_perc(attic == "No")), "lon" = list("Median (Range)" = ~ median_range(lon)), "lat" = list("Median (Range)" = ~ median_range(lat))) summary_kbl(stats, ICHomes)
## Analysis libraries library(MachineShop) library(ggplot2) ## Training and test sets set.seed(123) train_indices <- sample(nrow(ICHomes), nrow(ICHomes) * 2 / 3) trainset <- ICHomes[train_indices, ] testset <- ICHomes[-train_indices, ] ## Model formula fo <- sale_amount ~ . ## Boosted regression model tuned with the training set model_fit <- TunedModel(GBMModel, grid = 5) %>% fit(fo, data = trainset) ## Variable importance vi <- varimp(model_fit) plot(vi)
## Performance plotted over the grid points (tuned_model <- as.MLModel(model_fit)) plot(tuned_model, type = "line")
## Test set observed and predicted sale amounts obs <- response(model_fit, newdata = testset) pred <- predict(model_fit, newdata = testset) ## Test set performance performance(obs, pred)
cal <- calibration(obs, pred, breaks = NULL) plot(cal, se = TRUE)
## Marginal predictor effects pd <- dependence(model_fit, select = c(base_size, built, basement)) plot(pd)
## Spatial distribution pd <- dependence(model_fit, select = c(lon, lat), interaction = TRUE, n = 25) df <- cbind(pd$Predictors, sale_amount = pd$Value) ggplot(df) + stat_summary_2d(aes(lon, lat, z = sale_amount), binwidth = 0.006) + geom_point(aes(lon, lat), data = trainset) + labs(x = "Longitude", y = "Latitude", fill = "Sale Amount")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.