Introduction

library("knitr")
library("plyr")
library("dplyr")
library("data.table")
library("treelapse")

Data Preparation

tmp <- tempfile()
zillow_url <- "http://files.zillowstatic.com/research/public/Neighborhood/Neighborhood_Zhvi_AllHomes.csv"
download.file(zillow_url, tmp)

zillow <- fread(tmp) %>%
  filter(State == "CA") %>%
  as.data.table()

zillow$RegionID <- paste0(zillow$RegionID, "_", zillow$RegionName)
zillow_impute <- zillow[, 8:ncol(zillow), with = F] %>%
  as.matrix() %>%
  apply(1, function(x) { na.locf(x, fromLast = T)}) %>%
  t()

zillow[, 8:ncol(zillow)] <- data.table(zillow_impute)
region_scales <- c(
  "State",
  "Metro",
  "CountyName",
  "City",
  "RegionID"
)
paths <- zillow[, region_scales, with = F] %>%
  as.matrix()

head(paths)
paths[, "CountyName"] <- paste0("Cn_", paths[, "CountyName"])
paths[, "Metro"] <- paste0("M_", paths[, "Metro"])
paths[, "City"] <- paste0("Ci_", paths[, "City"])

for (i in seq_len(nrow(paths))) {
  if (any(paths[i, ] == "")) {
    last_nonempty <- min(which(paths[i, ] == "")) - 1
    paths[i, paths[i, ] == ""] <- paths[i, last_nonempty]
  }
}

edges <- taxa_edgelist(paths)
edges <- rbind(
  edges,
  cbind(
    "parent" = "root",
    "child" = setdiff(edges[, 1], edges[, 2])
  )
)
tip_values <- zillow %>%
  select(RegionID, starts_with("19"), starts_with("20")) %>%
  melt.data.table(id.vars = "RegionID", variable.name = "month") %>%
  dcast.data.table(month ~ RegionID)

values <- tree_fun_multi(
  edges,
  log(as.matrix(tip_values[, -1, with = F]), 10),
  tree_mean
)

values$time <- values$row

Visualization

timebox_tree(
  values %>% select(unit, time, value),
  edges,
  display_opts = list(
    "size_max" = 4
  )
)
treebox(
  values %>% select(unit, time, value),
  edges,
  display_opts = list(
    "size_max" = 4
  )
)

Interpretation



krisrs1128/treelapse documentation built on Jan. 3, 2020, 6:29 p.m.