Load the libraries and the Mio Affitto dataset:
suppressMessages({ library(data.table) library(ggplot2) library(scales) library(glmnet) library(sqldf) library(reshape2) library(romeHousePrices) }) assignDirectory() load(paste0(savingDir, "/detail_Mio_2015.10.05.06.28.32_cleaned.RData")) ## When filtering to top zones, quartieres, etc. how many should be kept? maxToPlot = 10
Set some options:
## Keep data.table from printing all the time assign("depthtrigger", 100, data.table:::.global)
d[, roundSuper := round(superficie/25) * 25] print(ggplot(d, aes(x = prezzo, fill = factor(roundSuper))) + geom_bar(position = "fill") + xlim(c(0,3001)) + labs(y = "", fill = "superficio") + scale_y_continuous(labels = percent))
Not surprisingly, price increases with number of rooms. However, it's interesting to see that the increase seems to be faster than linear: prices really jump after 4 or 5 rooms. But, that may just mean that we've moved into a group of really nice houses, whereas the 1-4 locali houses are a mix of fancy and not so fancy.
ggplot(d, aes(x = locali, y = prezzo)) + geom_point() + geom_smooth()
ggplot(d, aes(x = locali, y = prezzo, group = locali)) + geom_boxplot()
ggplot(d, aes(x = superficie, y = prezzo)) + geom_point() + geom_smooth()
ggplot(d, aes(x = superficie, y = prezzo / superficie)) + geom_point() + geom_smooth()
ggplot(d, aes(x = bagni, y = prezzo)) + geom_point()
ggplot(d, aes(x = bagni, y = prezzo, group = bagni)) + geom_boxplot()
d[, zonaCnt := .N, by = zona] cutoff = sort(unique(d$zonaCnt), decreasing = TRUE)[maxToPlot] ggplot(d[zonaCnt >= cutoff, ], aes(x = substr(zona, 1, 30), y = prezzo, group = zona)) + geom_boxplot() + theme(axis.text.x = element_text(angle = 90)) d[, zonaCnt := NULL]
d[, quartiereCnt := .N, by = quartiere] cutoff = sort(unique(d$quartiereCnt), decreasing = TRUE)[maxToPlot] ggplot(d[quartiereCnt >= cutoff, ], aes(x = quartiere, y = prezzo, group = quartiere)) + geom_boxplot() + theme(axis.text.x = element_text(angle = 90)) d[, quartiereCnt := NULL]
d[, capCnt := .N, by = CAP] cutoff = sort(unique(d$capCnt), decreasing = TRUE)[maxToPlot] ggplot(d[capCnt >= cutoff, ], aes(x = CAP, y = prezzo, group = CAP)) + geom_boxplot() + theme(axis.text.x = element_text(angle = 90)) d[, capCnt := NULL]
Partition data by - CAP - Zona - Quartiere
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.