draft/get50lan.R

get50lan <- function(){
  res <- GET('http://www.50lan.com.tw/shops_c1.php')
  res <- content(res, encoding = 'utf8')
  visitLink <- cssApply(res,'#ct-left-t1 > ul > li > a', cssLink)
  visitCity <- cssApply(res,'#ct-left-t1 > ul > li > a', cssCharacter)
  visitCity <- str_replace(visitCity, '[:space:]', '')
  visitLink <- sprintf('http://www.50lan.com.tw/%s',visitLink )
  result <- list()
  for (i in 1:length(visitLink)){
    res <- GET(visitLink[i])
    res <- content(res, encoding = 'utf8')
    wantData <- xpathSApply(res,'//div[@class="news-box"]/ul/li', xmlValue)
    wantData <- str_replace(wantData, '< 觀看地圖 >','')
    wantData <- str_replace(wantData, ' +','')
    wantData <- wantData[str_detect(wantData,'[:alnum:]')]
    store_nm <- str_extract(wantData,'(^[:alpha:]+)')
    tel_no <- str_extract(wantData,'\\([0-9]+\\) ?[0-9]+-[0-9]+')
    tel_no <- str_replace_all(tel_no, ' |\\(|\\)|-','')
    addr <- str_extract(wantData,' ([:alnum:]|-|\\(|\\))+( | )?$')
    addr <- str_replace_all(addr, '(\\([:alnum:]+\\))| | ','')
    result[[i]] <- data.frame(city= visitCity[i], store_nm, tel_no, addr, stringsAsFactors = FALSE )
  }
  result <- do.call(rbind, result)
  result <- data.frame(brand_nm='五十嵐', result, stringsAsFactors = FALSE)
}
leoluyi/CRMaddress documentation built on May 21, 2019, 5:08 a.m.