appraiseR: R Package For Real Estate Appraisals

library(magrittr)

centro_2015 <- read.csv2("./inst/centro_2015.csv", row.names = 1)

## Tidying data

# centro_2015
colnames(centro_2015) <- c("valor", "area_total", "quartos", "suites",
                           "garagens", "dist_b_mar", "padrao", "E", "N")

# centro_2015$padrao <-
#   stringi::stri_trans_general(centro_2015$padrao, "latin-ascii")


centro_2015 <- within(centro_2015, {
  PU <- valor/area_total
  padrao <- factor(padrao, levels = c("baixo", "médio", "alto"))
})

# padrao_levels <- c("baixo", "medio", "alto")
# centro_2015$padrao %<>% readr::parse_factor(padrao_levels)

## Transform data.frames to sf

centro_2015 <- sf::st_as_sf(centro_2015,
                            coords = c("E", "N"), crs = 31982)

#' Prices of 50 Florianopolis' downtown apartaments
#'
#' A SpatialPointsDataFrame containing a sample of 50 apartaments with
#' prices and other attributes in Florianopolis' downtown
#'
#' @format A tibble with 53 rows (50 samples and 3 apartments to be
#'   appraised) and 7 variables:
#' \itemize{
#'   \item valor: price, in brazilian Reais
#'   \item area_total: Total Area, in squared meters
#'   \item quartos: Rooms
#'   \item suites: Ensuites
#'   \item garagens: Garages
#'   \item dist_b_mar: Distance to the beach
#'   \item padrao: Building Standard - baixo, medio, alto
#'   (i.e. low, normal, high)
#' }
#' @source \strong{HOCHHEIM, Norberto}. \emph{Engenharia de avaliacoes
#' imobiliarias: Modulo Basico}. Florianopolis: IBAPE/SC, 2015, p.21-22
#' @examples
#' data(centro_2015)
#' centro_2015$padrao <- as.numeric(centro_2015$padrao)
#' fit <- lm(log(valor) ~ area_total + quartos + suites + garagens +
#'               log(dist_b_mar) + I(1/padrao), data = centro_2015)
#' # Look for outliers
#' library(car)
#' qqPlot(fit)
#' fit1 <- update(fit, subset = -c(31, 39))
#' qqPlot(fit1)
#' summary(fit1)
"centro_2015"

#zilli_2020 <- suppressWarnings(readr::read_csv2("./inst/zilli.csv"))
zilli_2020 <- read.csv2("./inst/zilli.csv")

zilli_2020 <- within(zilli_2020, {
  PSN <- factor(PSN)
  CH <- factor(CH)
  MO <- factor(MO, levels = c("N", "SM", "MO"))
  PC <- factor(PC, levels = c("B", "M", "A"))
  BRO <- factor(BRO, levels = c("Centro", "Agronomica", "Trindade"))
  })

zilli_2020 <- sf::st_as_sf(zilli_2020,
                           coords = c("COORD_E", "COORD_N"), crs = 31982)

#' Prices of 225 Florianopolis' apartaments in 3 neighborhoods
#'
#' A SpatialPointsDataFrame containing a sample of 225 apartaments with
#' prices and other attributes in 3 different Florianopolis' neighbourhoods.
#'
#' @format A tibble with 53 rows (50 samples and 3 apartments to be
#'   appraised) and 7 variables:
#' \itemize{
#'   \item VT: price, in brazilian Reais
#'   \item VU: price per sq. meter
#'   \item AP: Private Area, in squared meters
#'   \item DPXV: Distance to Praça XV
#'   \item DSBM: Distance to Beira Mar Mall
#'   \item DSIG: Distance to Iguatemi Mall
#'   \item DCTC: Distance to  CTC/UFSC
#'   \item DABM: Distance to Beira Mar Avenue
#'   \item ND: Number of rooms
#'   \item NB: Number of bathrooms
#'   \item NS: Number of ensuites
#'   \item NG: Number of garages
#'   \item MO: Furnishes - N (none), SM (some), MO (full)
#'   \item PSN: Swimming pool?
#'   \item CH: Barbecue grill?
#'   \item PC: Building Standard - B, M, A
#'   (i.e. low, normal, high)
#'   \item BRO: Neighborhood
#' }
#' @source \strong{ZILLI, Carlos Augusto}. \emph{Regressão geograficamente
#' ponderada aplicada na avaliação em massa de imóveis urbanos.}. 2020.
#' Dissertação de Mestrado em Engenharia de Transportes e Gestão Territorial.
#' Centro Tecnológico da UFSC. Florianópolis/SC.
#' @examples
#' data(zilli_2020)
#' zilli_2020$PC <- as.numeric(zilli_2020$PC)
#' fit <- lm(log(VU) ~ log(AP) + log(DABM) + ND + NB + NG + PSN + PC,
#' data = zilli_2020[1:190, ], subset = -c(86, 115))
#' summary(fit)
#'
#' fefit <- lm(log(VU) ~ log(AP) + log(DABM) + ND + NB + NG + PSN + PC + BRO,
#' data = zilli_2020[1:190, ], subset = -c(86, 115))
#' summary(fefit)
#'
"zilli_2020"

trindade <- data.frame(PU = c(427, 458, 510, 511, 528, 545, 564, 574, 574, 590,
                              601, 602, 602, 609, 620))

#' Prices of 15 Florianopolis' apartaments with 2 rooms in Trindade neighbourhood
#'
#' A tibble containing a sample of 15 unitary values of apartment sales prices
#' per squared meters.
#' @format a tibble with 15 rows and 1 variable:
#' \itemize{
#'   \item PU unitary price per squared meter.
#' }
#' @source \strong{HOCHHEIM, Norberto}. \emph{Engenharia de Avaliacoes I}.
#' Florianopolis: IBAPE/SC, 2005, p.18
"trindade"

jungles <- tibble::tribble(
  ~Meses,  ~a, ~b, ~c, ~d, ~e, ~f, ~g, ~h, ~i, ~j, ~k, ~l, ~m, ~n, ~o, ~p, ~q, ~r, ~s, ~t, ~u, ~v, ~x, ~y,
  1, 100, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
  2,  47, 53, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
  3,  23, 50, 27, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
  4,  14, 33, 37, 16, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
  5,   9, 24, 30, 28,  9, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
  6,   7, 17, 23, 26, 21,  6, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
  7,   5, 12, 19, 23, 20 ,17,  4, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
  8,   4, 10, 15, 17, 20, 18, 13,  3, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
  9,   3,  8, 13, 16, 16, 18, 14,  9,  3, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
  10,   2,  6, 11, 14, 14, 15, 14, 14,  8,  2, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
  11,   2,  5,  8, 12, 13, 13, 14, 15, 12,  4,  2, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
  12,   2,  5,  6, 11, 11, 11, 14, 12, 11, 11,  4,  2, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
  13,   2,  4,  6,  8, 11, 11, 11, 11, 13,  9,  8,  5,  1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
  14,   2,  3,  5,  6, 10, 10, 11, 11, 12,  9,  9,  8,  3,  1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
  15,   2,  2,  4,  7,  8, 10, 10, 10, 10, 10,  9,  8,  6,  3,  1, NA, NA, NA, NA, NA, NA, NA, NA, NA,
  16,   2,  2,  3,  6,  7,  8,  9,  9, 10, 10,  9,  9,  8,  4,  3,  1, NA, NA, NA, NA, NA, NA, NA, NA,
  17,   2,  2,  3,  5,  5,  8,  9,  9,  9, 10, 10,  8,  7,  6,  4,  2,  1, NA, NA, NA, NA, NA, NA, NA,
  18,   2,  2,  3,  4,  4,  8,  8,  8,  8,  9,  9,  9,  8,  6,  6,  3,  2,  1, NA, NA, NA, NA, NA, NA,
  19,   1,  2,  3,  4,  4,  7,  7,  8,  8,  8,  7,  7,  7,  7,  6,  6,  4,  3,  1, NA, NA, NA, NA, NA,
  20,   1,  2,  3,  3,  3,  4,  6,  7,  9,  9,  8,  8,  7,  6,  6,  6,  6,  3,  2,  1, NA, NA, NA, NA,
  21,   1,  1,  2,  2,  4,  6,  6,  6,  6,  7,  7,  8,  7,  7,  6,  6,  6,  5,  4,  2,  1, NA, NA, NA,
  22,   1,  2,  2,  2,  4,  4,  6,  6,  6,  6,  7,  8,  8,  7,  6,  6,  5,  5,  4,  2,  2,  1, NA, NA,
  23,   1,  2,  2,  2,  4,  4,  5,  5,  6,  6,  7,  6,  6,  6,  6,  6,  6,  6,  5,  4,  2,  2,  1, NA,
  24,   1,  1,  2,  2,  3,  3,  4,  6,  6,  6,  6,  6,  7,  7,  6,  6,  6,  5,  5,  5,  3,  2,  1,  1
)

#' Parametrized optimals costs percentages per period according to Jungles and
#' Avila (2009)
#'
#' A tibble containing optimal costs percentages for each period for projects
#' with 1 to 24 periods.
#' @format a tibble with 24 rows and 25 columns
#'
#' @source \strong{JUNGLES, A. E.; AVILA, A. V.}. \emph{Gestao do controle e
#' planejamento de empreendimentos}. 2009.
"jungles"

loteamento <- read.csv("./inst/loteamento_residencial.csv")

#' Land division data
#'
#' A tibble containing a sample of 20 plots in subdivision in Florianopolis.
#' Paradigm situation: dry, flat, 15m front width and 30~60m length.
#'
#' @format A tibble with 20 rows and 8 variables:
#' \itemize{
#'   \item preco: price, in brazilian Reais
#'   \item area: land area in squared meters
#'   \item tipo: type: offer or sale -
#'   venda, oferta (i.e. sale, offer)
#'   \item frente: front width of the land in meters
#'   \item profundidade: length of the land in meters
#'   \item topo: topography -
#'   plano, aclive (i.e. flat, slope)
#'   \item inclinacao: slope
#'   \item pedologia: pedology -
#'   seco, pantanoso (i.e. dry, marshy)
#' }
#' @source \strong{HOCHHEIM, Norberto}. \emph{Engenharia de Avaliacoes I}.
#' Florianopolis: IBAPE/SC, 2005, p.74
#' @examples
#' data(loteamento)
#'
#' # 1. Fatores do IBAPE/SP 2005 (aditivo), cf. Hochheim (2005 , p.82)
#'
#' loteamento <- within(loteamento, {
#'                Coferta <- ifelse(tipo == "oferta", 1.11, 1)
#'                Cfrente <- (frente/15)^0.15
#'                Ctopo <- ifelse(topo == "plano", 1,
#'                           ifelse(inclinacao/100 >= .20, 0.85,
#'                             ifelse(inclinacao/100 > .10,  0.90,
#'                               ifelse(inclinacao/100 > 0, .95,
#'                                 ifelse(inclinacao/100 >= -.05, .95,
#'                                   ifelse(inclinacao/100 >= -.10, .9,
#'                                     ifelse(inclinacao/100 >= -.20, .80, .70)))))))
#'                Cpedo <- ifelse(pedologia == "seco", 1, .6)
#'                Chom <-  (1 + ((Coferta - 1) + (Cfrente - 1) +
#'                                       (Ctopo - 1) + (Cpedo - 1)))
#'                PU <- preco/area
#'                PUhom <- PU/Chom
#' }
#' )
#'
#' # 1.1 Saneamento da amostra
#'
#' outlier_analysis(loteamento$PUhom)
#' outlier_analysis(loteamento$PUhom, "2_sd")
#' outlier_analysis(loteamento$PUhom, "chauvenet")
#'
#' # 1.2 Avaliacao final
#' Valor <- mean(loteamento$PUhom[-c(7, 19)])
#' sdValor <- sd(loteamento$PUhom[-c(7, 19)])
#'
#' # 1.3 Poder de predicao
#' loteamento <- within(loteamento, P <- Valor*area*Chom)
#' with(loteamento, powerPlot(y = preco[-c(7, 19)], yhat = P[-c(7, 19)],
#'                            axis = "inverted"))
#'
#' # 2. Fatores IBAPE/SP 2011 (misto)
#'
#' loteamento <- within(loteamento, {
#'                Coferta <- ifelse(tipo == "oferta", 1.11, 1)
#'                Cfrente <- (frente/15)^0.15
#'                Ctopo <- ifelse(topo == "plano", 1,
#'                           ifelse(inclinacao/100 >= .20, 0.85,
#'                             ifelse(inclinacao/100 > .10,  0.90,
#'                               ifelse(inclinacao/100 > 0, .95,
#'                                 ifelse(inclinacao/100 >= -.05, .95,
#'                                   ifelse(inclinacao/100 >= -.10, .9,
#'                                     ifelse(inclinacao/100 >= -.20, .80, .70)))))))
#'                Cpedo <- ifelse(pedologia == "seco", 1, .6)
#'                Chom <-  Coferta*(1 + ((Cfrente - 1) + (Ctopo - 1) +
#'                                   (Cpedo - 1)))
#'                PU <- preco/area
#'                PUhom <- PU/Chom
#' }
#' )
#'
#' # 2.1 Saneamento da amostra
#'
#' outlier_analysis(loteamento$PUhom)
#' outlier_analysis(loteamento$PUhom, "2_sd")
#' outlier_analysis(loteamento$PUhom, "chauvenet")
#'
#' # 2.2 Avaliacao final
#' Valor <- mean(loteamento$PUhom[-c(7, 19)])
#' sdValor <- sd(loteamento$PUhom[-c(7, 19)])
#'
#' # 2.3 Poder de predicao
#' loteamento <- within(loteamento, P <- Valor*area*Chom)
#' with(loteamento, powerPlot(y = preco[-c(7, 19)], yhat = P[-c(7, 19)],
#'            axis = "inverted"))
#'
#' # 3. Fatores multiplicativos
#'
#' loteamento <- within(loteamento, {
#'                Coferta <- ifelse(tipo == "oferta", 1.11, 1)
#'                Cfrente <- (frente/15)^0.25
#'                Chom <-  Coferta*Cfrente*Ctopo*Cpedo
#'                PU <- preco/area
#'                PUhom <- PU/Chom
#' }
#' )
#'
#' # 3.1 Saneamento da amostra
#'
#' outlier_analysis(loteamento$PUhom)
#' outlier_analysis(loteamento$PUhom, "2_sd")
#' outlier_analysis(loteamento$PUhom, "chauvenet")
#'
#' # 3.2 Avaliacao final
#'
#' Valor <- mean(loteamento$PUhom[-c(7, 19)])
#' sdValor <- sd(loteamento$PUhom[-c(7, 19)])
#'
#' # 3.3 Poder de predicao
#'
#' loteamento <- within(loteamento, P <- Valor*area*Chom)
#' with(loteamento, powerPlot(y = preco[-c(7, 19)], yhat = P[-c(7, 19)],
#'            axis = "inverted"))
#'
#' # 4. Regressao Linear
#'
#' fit <- lm(log(PU) ~ log(frente/15) + tipo + poly(inclinacao, 2) + pedologia,
#'            data = loteamento, subset = -c(7, 19))
#' powerPlot(fit, axis = "inverted", scale = "original", FUN = "log")
#' p <- predict(fit, newdata = list(frente = 15, tipo = "venda", inclinacao = 0,
#'                               pedologia = "seco"))
#' exp(p)

"loteamento"

jurere_2017 <- read.csv2("./inst/jurere.csv")

jurere_2017 <- within(jurere_2017, {
  ZONEAMENTO <- factor(ZONEAMENTO)
  PAVIMENTOS <- factor(PAVIMENTOS)
  TOPOGRAFIA <- factor(TOPOGRAFIA)
  ESQUINA <- factor(ESQUINA, levels = c(0,1), labels = c("Sim", "Nao"))
  CONDOMINIO_FECHADO <- factor(CONDOMINIO_FECHADO, levels = c(0,1),
                               labels = c("Sim", "Nao"))
  FRENTES <- as.integer(FRENTES)
})

jurere_2017 <- sf::st_as_sf(jurere_2017,
                            coords = c("E", "N"), crs = 31982)

#' Land division data
#'
#' A tibble containing a sample of 20 plots in subdivision in Florianopolis.
#' Paradigm situation: dry, flat, 15m front width and 30~60m length.
#'
#' @format A tibble with 20 rows and 8 variables:
#' \itemize{
#'   \item VALOR TOTAL: price, in brazilian Reais
#'   \item VU: unitary value per sq. meters
#'   \item AREA: area, in sq. meters
#'   \item TESTADA: front width of the land in meters
#'   \item ESQUINA: corner?
#'   0 = no; 1 = yes
#'   \item FRENTES: number of lot fronts
#'   \item DIST_MAR: distance to the sea
#'   \item PAVIMENTOS: number of floors
#'   \item DIST_MAR: distance to the sea
#'   \item CONDOMINIO_FECHADO: gated community?
#'   0 = no, 1 = yes.
#'   \item PAVIMENTO: paved?
#'   NAO = no, SIM = yes.
#'   \item TOPOGRAFIA: topography
#'   PLANO = flat
#'   \item DATA: date
#'   \item FONTE: source
#' }
#' @examples
#' data(jurere_2017)
#' fit <- lm(log(VU) ~ log(AREA)*log(TESTADA) + log(DIST_MAR) + PAVIMENTOS,
#'           data = jurere_2017)
#' library(effects)
#' plot(predictorEffects(fit, residuals = T), multiline = T, main = NULL)
#'
#' # Centering
#'
#' library(dplyr)
#' fit1 <- lm(log(VU) ~ log(AREA/450) + log(TESTADA/15) + log(DIST_MAR/33) + PAVIMENTOS,
#'           data = jurere_2017, subset = -c(11, 27, 29))
#'
"jurere_2017"

trivelloni_2005 <- read.csv2("./inst/trivelloni_2005.csv")

# trivelloni_2005
colnames(trivelloni_2005) <- c("Obs", "E", "N", "valor", "tipo", "area_total",
                               "area_terreno", "garagens", "novo",
                               "P_2", "P_3", "P_4")

trivelloni_2005 <- within(trivelloni_2005, {
  novo <- factor(novo, levels = c(0,1), labels = c("Nao", "Sim"))
  garagens <- factor(garagens, levels = c(0,1), labels = c("Nao", "Sim"))
  padrao <- ifelse(P_2 == 1, "alto",
                   ifelse(P_3 == 1, "medio",
                          ifelse(P_4 == 1, "baixo", NA)))
  tipo <- factor(tipo, levels = c("Apartame", "Kitinete", "Comercia",
                                  "terreno", "casa"))
})

trivelloni_2005 <- trivelloni_2005[, -1]

trivelloni_2005 <- sf::st_as_sf(trivelloni_2005,
                                coords = c("E", "N"),
                                crs = 5527)

#' Urban large parcels data
#'
#' A tibble containing a sample of 19 large parcels.
#'
#' @format A tibble with 19 rows and 4 variables:
#' \itemize{
#'   \item R: id
#'   \item Ficha: another id (unused)
#'   \item VU: unitary value per sq. meters
#'   \item Area: area, in sq. meters
#' }
#' @examples
#' data(glebas)
#' fit <- lm(rec(VU) ~ log(Area), data = glebas)
#' library(effects)
#' plot(predictorEffects(fit, residuals = T), id = T,
#'       axes = list(
#'         grid = TRUE,
#'         y=list(transform=list(trans=rec, inverse=rec), lab = "VU")
#'         )
#'      )
#' # Issue: Influential Point 4 (see also plot(fit))
#'
#' # Solution:
#' fit1 <- update(fit, rec(VU)~log(Area), subset = -4)
#' plot(predictorEffects(fit1, residuals = T), id = T,
#'       axes = list(
#'         grid = TRUE,
#'         y=list(transform=list(trans=rec, inverse=rec), lab = "VU")
#'       )
#'     )
"glebas"

glebas <- read.csv2("./inst/glebas.csv")

#' Urban large parcels with built area data
#'
#' A tibble containing a sample of 20 large parcels with different built areas.
#'
#' @format A tibble with 20 rows and 5 variables:
#' \itemize{
#'   \item R: id
#'   \item Ficha: another id (unused)
#'   \item VI: sale price
#'   \item AT: land area, in sq. meters
#'   \item AC: Built area, in sq. meters
#' }
#' @examples
#' data(glebas2)
#' fit <- lm(VI ~ log(AT) + AC, data = glebas2)
#' library(effects)
#' plot(predictorEffects(fit, residuals = T), id = T,
#'       axes = list(
#'         grid = TRUE,
#'         x = list(rotate=30)
#' ))
#' powerPlot(fit, axis="inverted", smooth = TRUE, methods = c("lm", "loess"))
#' # Issue: Influential Points 5 and 10 (see also plot(fit))
#'
#' # Solution 1 (better to interpret):
#' fit1 <- update(fit, VI ~ AT + AC, subset = -c(2, 5,10))
#' plot(predictorEffects(fit1, residuals = T), id = T,
#'       axes = list(
#'         grid = TRUE,
#'         x = list(rotate=30)
#' ))
#' powerPlot(fit1, axis = "inverted", smooth = TRUE, methods = c("lm", "loess"))
#' predict(fit1, newdata = list(AT = 9123.50, AC = 2272.47),
#'              interval = 'confidence', level = .80)
#' # + 30% higher value than predicted with the original fit
#'
#' # Solution 2 (just to add some nonlinear relationship between the original
#'                variables)
#' fit2 <- update(fit, sqrt(VI) ~ sqrt(AT) + sqrt(AC), subset = -c(2, 10))
#' plot(predictorEffects(fit2, residuals = T), id = T,
#'       axes = list(
#'         grid = TRUE,
#'         x = list(rotate=30),
#'         y = list(transform=list(trans=sqrt, inverse=sqr), lab = "VI")
#' ))
#' powerPlot(fit2, axis = "inverted", smooth = TRUE, methods = c("lm", "loess"),
#'             func="sqrt") # note bias and nonlinearity
#'
#' predict(fit2, newdata = list(AT = 9123.50, AC = 2272.47),
#'              interval = 'confidence', level = .80)
#' # Almost 50% higher value than predicted with the original fit
"glebas2"

glebas2 <- read.csv2("./inst/glebas2.csv")

#' Urban large parcels in different urban contexts
#'
#' A tibble containing a sample of 17 large parcels within differents urban
#' contexts.
#'
#' @format A tibble with 17 rows and 5 variables:
#' \itemize{
#'   \item R: id
#'   \item VU: unitary value per sq. meters
#'   \item AT: land area, in sq. meters
#'   \item ACESSO: dummy variable that indicates if the area is direct reachble
#'   or not
#'   \item SUP: dummy variable that indicates if the area was landfilled
#' }
#' @examples
#' data(glebas3)
#' fit <- lm(log(VU) ~ I(AT^-1) + ACESSO + SUP, data = glebas3)
#' library(effects)
#' plot(predictorEffects(fit, residuals = T), id = T,
#'       axes = list(
#'         grid = TRUE,
#'         x = list(rotate=30),
#'         y = list(transform=list(trans=log, inverse=exp), lab = "VU")
#' ))
#' powerPlot(fit, axis="inverted", smooth = TRUE, methods = c("lm", "loess"))
#' p <- predict(fit, newdata = list(AT = 60000, ACESSO = factor(0),
#'                                   SUP = factor(1)),
#'                interval = "confidence", level = .80
#'             )
#' exp(p)
#' amplitude(exp(p)) # very good!

"glebas3"

glebas3 <- read.csv2("./inst/glebas3.csv")

glebas3 <- within(glebas3,{
  ACESSO <- factor(ACESSO)
  SUP <- factor(SUP)
})

#' Different relations between sales prices and listing prices
#'
#' A tibble containing a sample of 65 houses in Atibaia/SP with listing and
#' sales prices.
#'
#' @format A tibble with 65 rows and 17 variables:
#' \itemize{
#'   \item Id: id
#'   \item Endereco: house address
#'   \item Descricao: Advertising text
#'   \item Bairro: neighborhood
#'   \item AreaConstruida: building area
#'   \item DataTransacao: sales date
#'   \item Data: sales date, with convenient format for modelling
#'   \item Localizacao: indicative of house to be inside gated community or not
#'   \item IndiceFiscal: land price per square meter
#'   \item PadraoConstrutivo: construction
#'   \item OrigemComprador: buyer origin
#'   \item FinalidadeCompra: purpose of purchase
#'   \item ValorOfertado: List prices
#'   \item ValorVendido: Sale prices
#'   \item FatorOferta: relation between list and sales prices
#'   \item PUvenda: sale price per square meter
#'   \item PUoferta: list price per square meter
#' }
#' @examples
#' data(atibaia)
#' fitOferta <- lm(log(PUoferta) ~ log(IndiceFiscal) + log(AreaConstruida) +
#'                  PadraoConstrutivo, data = atibaia)
#' fitVenda <- lm(log(PUvenda) ~ log(IndiceFiscal) + log(AreaConstruida) +
#'                 PadraoConstrutivo, data = atibaia)
#' plotModel(fitOferta, ca = TRUE, residuals = TRUE, colour = PadraoConstrutivo,
#'           at = list(IndiceFiscal = 300, AreaConstruida = 300,
#'                     PadraoConstrutivo = "Médio"))
#' plotModel(fitVenda, ca = TRUE, residuals = TRUE, colour = PadraoConstrutivo,
#'           at = list(IndiceFiscal = 300, AreaConstruida = 300,
#'                     PadraoConstrutivo = "Médio"))
"atibaia"
atibaia <- read.csv2('inst/atibaia.csv')

atibaia <- within(atibaia, {
  PUoferta <- ValorOfertado/AreaConstruida
  PUvenda <- ValorVendido/AreaConstruida
  FatorOferta <- ValorVendido/ValorOfertado
  OrigemComprador <- factor(OrigemComprador)
  Localizacao <- factor(Localizacao)
  FinalidadeCompra <- factor(FinalidadeCompra)
  PadraoConstrutivo <- factor(PadraoConstrutivo,
                              levels = c("Simples", "Médio", "Superior", "Fino"))
})

#' Land lots in Campo Grande/MS
#'
#' @format A tibble with 2769 rows and 55 variables:
#' @examples
#' data(CampoGrande)
#'
#' # EXPLORATORY ANALYSIS
#'
#' library(vtable)
#' st(CampoGrande,
#'    vars = c("VU", "AREA", "FRENTE", "RENDA", "DISTCENTRO", "VISTA", "TESTADAS",
#'             "PAV", "VOCACAO", "COMERCIAL", "PADRAO", "ZONA", "EA", "ZEIA",
#'             "DATA"),
#'    add.median = T)
#'
#' CampoGrande <- CampoGrande[CampoGrande$DATA == 10, ]
#'
#' library(car)
#' Boxplot(log(PU) ~ VOCACAO, data = CampoGrande, subset = -630)
#' Boxplot(log(PU) ~ ZEIA, data = CampoGrande, subset = -630)
#' Boxplot(log(PU) ~ COMERCIAL, data = CampoGrande, subset = -630)
#' Boxplot(log(PU) ~ PADRAO, data = CampoGrande, subset = -630)
#' plotdf(log(PU) ~ log(AREA) + log(FRENTE) + log(RENDA) | VOCACAO, CampoGrande)
#' scatterplotMatrix(~ log(PU) + log(AREA) + log(FRENTE) + log(RENDA) | VOCACAO,
#'                   data = CampoGrande[-630, ],
#'                   subset = AREA < 100000 & FRENTE < 1000,
#'                   diagonal = TRUE,
#'                   regLine = TRUE,
#'                   smooth = FALSE,
#'                   ellipse = FALSE,
#'                   cex = .75,
#'                   id = F,
#'                   cex.labels = 1,
#'                   legend = list(cex = .7, pt.cex = .75),
#'                   las = 1)


"CampoGrande"
CampoGrande <- readxl::read_excel("./inst/ELEMENTOS_2769_DROUBI.xlsx")
colnames(CampoGrande) <- c("ID", "ENDERECO",   "BAIRRO",
                     "INFORMANTE", "TELEFONE", "FICHA",
                     "LATITUDE", "LONGITUDE", "ORIGEM",
                     "TIPO", "DATA", "MURADO",
                     "PAV", "ESQUINA", "ZEIA",
                     "VISTA", "SUPERFICIE", "TOPO",
                     "A_DEMOLIR", "A_DEMOLIR_M2", "COMERCIAL",
                     "ZONEAMENTO", "ZONA_1_5", "TO", "CA", "IE", "ZC",
                     "EA", "POLO", "VOCACAO",
                     "PADRAO", "DISTCENTRO", "FORMATO",
                     "TESTADAS", "RENDA", "PRECO", "AREA", "PU",
                     "FRENTE", "OBS"
                     #, "geometry"
)
CampoGrande <- within(CampoGrande,{
  PU <- 0.88*PRECO/AREA
  ORIGEM <- factor(ORIGEM)
  MURADO <- factor(MURADO)
  PAV <- factor(PAV)
  ESQUINA <- factor(ESQUINA)
  ZEIA <- factor(ZEIA, levels = 1:4,
                 labels = c("1", "2", "4", "Nao"))
  VISTA <- factor(VISTA)
  SUPERFICIE <- factor(SUPERFICIE)
  TOPO <- factor(TOPO)
  A_DEMOLIR <- factor(A_DEMOLIR)
  COMERCIAL <- factor(COMERCIAL)
  ZONA <- factor(ZONA_1_5, levels = 1:5,
                 labels =  c("Z5", "Z4", "Z3", "Z2", "Z1"),
                 ordered = F)
  TO <- factor(TO)
  CA <- factor(CA)
  IE <- factor(IE)
  ZC <- factor(ZC, levels = 1:2,
               labels = c("Nao", "Sim"))
  EA <- factor(EA, levels = 1:2,
               labels = c("Nao", "Sim"))
  VOCACAO <- factor(VOCACAO, levels = c("Chacara", "Industrial", "Unifamiliar",
                                        "Multifamiliar", "Comercial"))
  PADRAO <- factor(PADRAO, levels = c("Baixo", "Medio", "Alto"))
  FORMATO <- factor(FORMATO)
  TESTADAS <- factor(TESTADAS)
  DATA <- factor(DATA)
})
CampoGrande <- CampoGrande[-969, ] # Frente = 0!
CampoGrande$ID <- 1:nrow(CampoGrande)
CampoGrande$ZEIA <- relevel(CampoGrande$ZEIA, ref = "Nao")
CampoGrande <- sf::st_as_sf(CampoGrande, coords = c("LONGITUDE", "LATITUDE"),
                remove = FALSE)
sf::st_crs(CampoGrande) <- 4326
CampoGrande <- sf::st_transform(CampoGrande, 31981)
CampoGrande$E <- sf::st_coordinates(CampoGrande)[, "X"]
CampoGrande$N <- sf::st_coordinates(CampoGrande)[, "Y"]

RegressaoSimples <- readxl::read_excel("./inst/ExRegSimplesCurso.xlsx")
#' Simple Linear Regression Example Data
#'
#' A data.frame containing a sample of 10 lots with
#' prices and other attributes in Recife/PE
#'
#' @format A tibble with 10 rows and 5 variables:
#' \itemize{
#'   \item ID: index
#'   \item AREA: Total Area, in squared meters
#'   \item DISTPOLO: Distance to Road, in kilometers
#'   \item PU: Unitary Price, in Reais per squared meters
#'   \item PT: Total Price, in Reais
#' }
#' @examples
#' data(RegressaoSimples)
#' fit <- lm(PU ~ DISTPOLO, data = RegressaoSimples)
#' attach(RegressaoSimples)
#' library(TeachingDemos)
#' loess.demo(PU, DISTPOLO)
"RegressaoSimples"

usethis::use_data(centro_2015, zilli_2020, trindade, jungles, loteamento,
                  jurere_2017, trivelloni_2005, glebas, glebas2, glebas3,
                  atibaia, CampoGrande, RegressaoSimples,
                  overwrite = TRUE)