draft/getWatsons.R

getWatsons <- function(){
  dataUrl <- GET('http://www.watsons.com.tw/store-finder')
  dataUrlParse <- htmlParse(content(dataUrl, 'text', encoding='utf8'))
  store_nm <- xpathSApply(dataUrlParse, '//div[@class="addressDe fl"]/a',xmlValue)
  addr <- xpathSApply(dataUrlParse, '//div[@class="addressDe fl"]/p[1]',xmlValue)
  zipChar <- xpathSApply(dataUrlParse, '//div[@class="addressDe fl"]/p[2]',xmlValue)
  storeGis <- xpathSApply(dataUrlParse, '//script',xmlValue)
  storeGis <- storeGis[str_detect(storeGis, 'arrayStoresAddress\\[[0-9]+\\] = \\{lat:.+lng:.+\\}')]
  gisY <- str_extract(str_extract(storeGis, '(?<=lat:).+(?=,)'),'[0-9]+\\.[0-9]+')
  gisX <- str_extract(str_extract(storeGis, '(?<=lng:).+(?=\\})'),'[0-9]+\\.[0-9]+')
  area <- str_extract(zipChar, '(^[:alpha:]+-?[:alpha:]+(?=,))')
  town <- str_extract(str_extract(zipChar, ',.+,'), '[:alnum:]+-?[:alnum:]+')
  zip <- str_extract(zipChar, '(?<=,.)[0-9]+')    
  result <- data.frame(brand_nm='屈臣氏', store_nm, addr, gisX, gisY, area, town, zip, zipChar, stringsAsFactors = FALSE)
}
leoluyi/CRMaddress documentation built on May 21, 2019, 5:08 a.m.