R/Oikotie.R

Defines functions GetOikotie

Documented in GetOikotie

# This file is a part of the soRvi program (http://louhos.github.com/sorvi/)

# Copyright (C) 2010-2012 Louhos <louhos.github.com>. All rights reserved.

# This program is open source software; you can redistribute it and/or modify 
# it under the terms of the FreeBSD License (keep this notice): 
# http://en.wikipedia.org/wiki/BSD_licenses

# This program is distributed in the hope that it will be useful, 
# but WITHOUT ANY WARRANTY; without even the implied warranty of 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.



#' Preprocess Oikotie data
#'
#' Preprocess data about Finnish apartment prices from Oikotie in years 2010-2011
#' 
#' @return list with Oikotie myynnit data
#'
#' @author Juuso Parkkinen \email{sorvi-commits@@lists.r-forge.r-project.org}
#' @export
GetOikotie <- function() {
     
  message("Loading Oikotie data...")
#  library(gdata)
    # First download and unzip data from http://www2.hs.fi/extrat/hsnext/oikotie-data.zip
  myynnit <- read.csv("data/myynnit.csv", sep=";", quote="", fileEncoding="ISO-8859-1")
  
  # Fix formats, and remove lines with errors (additional ';'s)
  myynnit$Size <- as.numeric(gsub(pattern=",", replacement=".", as.vector(myynnit$Size)))
  myynnit$Price <- as.numeric(gsub(pattern=",", replacement=".", as.vector(myynnit$Price)))
  myynnit$Floor <- as.numeric(as.vector(myynnit$Floor))
  myynnit$Apartment.condition <- as.numeric(as.vector(myynnit$Apartment.condition))
  myynnit <- myynnit[-unique(c(which(is.na(myynnit$Size)), which(is.na(myynnit$Floor)))),]
  
  # Compute price per square meter and fix zip codes
  myynnit$Price.per.square <- myynnit$Price / myynnit$Size
  myynnit$Zip.code <- as.character(myynnit$Zip.code)
  for (i in 2:4)
    myynnit$Zip.code[nchar(myynnit$Zip.code)==i] <- paste(paste(rep("0", 5-i), collapse=""), myynnit$Zip.code[nchar(myynnit$Zip.code)==i], sep="") 
  
  # Filter data based on price and size
  myynnit <- myynnit[-which(myynnit$Price <= 3),]
  myynnit <- myynnit[-which(myynnit$Price > 1000000),] 
  myynnit <- myynnit[-which(myynnit$Size < 10),]
  myynnit <- myynnit[-which(myynnit$Size > 500),]
  myynnit <- myynnit[-which(myynnit$Price.per.square < 500),]
  
  # Extract street names
  streets <- strsplit(as.vector(myynnit$Location), split=" ")
  streets2 <- sapply(streets, function(x) paste(x[1:(length(x)-1)], collapse=" "))
  lengths <- sapply(streets, length)
  streets2[grep("Hennalankuja", streets2)] <- "Hennalankuja"
  streets2[lengths %in% c(5,6)] <- sapply(streets[lengths %in% c(5,6)], function(x) x[1])
  myynnit$Street <- streets2
  
  # Take only Helsinki region data (zip code begins with 00, 01, 02)
  zips <- unique(myynnit$Zip.code)
  zips.beginnings <- sapply(strsplit(zips, split=""), function(x) paste(x[1:2], collapse=""))
  zips.hr <- zips[zips.beginnings %in% c("00", "01", "02")]
  #hr.myynnit <- subset(myynnit, Zip.code %in% zips.hr)
  hr.myynnit <- myynnit[myynnit$Zip.code %in% zips.hr, ]
  
  # Fix encoding
  myynnit$Location <- factor(iconv(myynnit$Location, from="ISO-8859-1", to="UTF-8"))
  myynnit$Street <- factor(iconv(myynnit$Street, from="ISO-8859-1", to="UTF-8"))
  myynnit$Room.configuration <- factor(iconv(myynnit$Room.configuration, from="ISO-8859-1", to="UTF-8"))
  hr.myynnit$Location <- factor(iconv(hr.myynnit$Location, from="ISO-8859-1", to="UTF-8"))
  hr.myynnit$Street <- factor(iconv(hr.myynnit$Street, from="ISO-8859-1", to="UTF-8"))
  hr.myynnit$Room.configuration <- factor(iconv(hr.myynnit$Room.configuration, from="ISO-8859-1", to="UTF-8"))
  
  message("DONE")
  return(list(myynnit=myynnit, hr.myynnit=hr.myynnit))
}

Try the sorvi package in your browser

Any scripts or data that you put into this service are public.

sorvi documentation built on May 2, 2019, 6:16 p.m.