draft/getKobayaShi.R

getKobayaShi <- function(){
  URL="http://www.kobayashi.com.tw/map_list/"
  res <- GET(URL, encoding='utf8')
  res2 <- htmlParse(content(res, "text", encoding = "utf8"), encoding = "utf8")
  city <- cssApply(res2,"td.city_text tr > td > a",cssLink)
  #store_table > tbody > tr:nth-child(1) > td > table > tbody > tr > td:nth-child(1) > a
  city2<-str_extract(city,'[0-9]+')
  
  wantUrl=sprintf("http://www.kobayashi.com.tw/load_map/%s",city2)
  getkoblink<-function(URL){
    #URL="http://www.kobayashi.com.tw/load_map/2"
    res <- GET(URL, encoding='utf8')
    res2 <- htmlParse(content(res, "text", encoding = "utf8"), encoding = "utf8")
    #body > table:nth-child(1) > tbody > tr > td:nth-child(1) > a
    link <- cssApply(res2,"tr > td > a",cssLink)
    link2=sprintf("http://www.kobayashi.com.tw%s",link)
    return(link2)
  }
  getkobinfo<-function(URL){
    #URL="http://www.kobayashi.com.tw/map/7/"
    res3 <- GET(URL, encoding='utf8')
    res4 <- htmlParse(content(res3, "text", encoding = "utf8"), encoding = "utf8")
    store_nm <- cssApply(res4,"tr > td.city_title4",cssCharacter)
    store_info <- cssApply(res4,"tr > td.city_text3",cssCharacter)
    store_info<-str_replace_all(store_info,'.+:','')
    result <- data.frame(brand_nm='小林眼鏡',store_nm=store_nm, addr=store_info[3], tel_no=store_info[1],fax=store_info[2],
                         stringsAsFactors=FALSE )
  }
  
  kobinfo1<-unlist(lapply(wantUrl,getkoblink))
  kobinfo2<-lapply(kobinfo1,getkobinfo)
  kobinfo3 <- do.call(rbind, kobinfo2)
}
leoluyi/CRMaddress documentation built on May 21, 2019, 5:08 a.m.