draft/getTaiHo.R

getTaiHo <- function(){
  url      <- 'http://www.0800290290.com.tw/ion/place.htm'
  htmldoc1 <- content(GET(url, encoding='utf8'), as='text', encoding='utf8')
  htmldoc2 <- htmlParse(htmldoc1, encoding='utf8')
  info1    <- cssApply(htmldoc2," table.main >tr>td>table>tr>td " , cssCharacter)
  info2    <- str_replace_all(info1,'( |\n)+',' ')
  store_nm <- str_trim(cssApply(htmldoc2," table.main >tr>td>table>tr>td>a>h1 " , cssCharacter))
  info3    <- str_replace_all(info2, paste0(store_nm, collapse = '|'), '')
  info4    <- str_trim(str_replace_all(info3, '(TEL(.| )+$)', ''))
  info4    <- str_replace(info4, '[!\\(][0-9]+\\)[0-9]+-?[0-9]+', '')
  info4    <- str_replace(info4, '營業時間.+$', '')
  addr     <- str_replace_all(info4, ' ', '')
  addr     <- addr[-c(1:7)]
  tel_no   <- str_extract(info2,'[0-9]{2}(-| )[0-9]{3,4}-[0-9]{3,4}|[0-9]{2}-[0-9]{8,9}|\\([0-9]{2}\\)[0-9]{3}-[0-9]{4}|[0-9]{2}-[0-9]{7}|[0-9]{2}[0-9]{3}-[0-9]{4}')
  tel_no   <- tel_no[-c(1:7)]
  
  data1         <- data.frame('太和工房',store_nm,addr,tel_no)
  names(data1)  <- c("brand_nm","store_nm","addr","tel_no")
  data1
}
leoluyi/CRMaddress documentation built on May 21, 2019, 5:08 a.m.