draft/getYeschain.R

getYeschain <- function(){
  URL="http://dms.yeschain.com.tw/locationData.php"
  res <- GET(URL, encoding='utf8')
  res2 <- htmlParse(content(res, "text", encoding = "utf8"), encoding = "utf8")
  city <- cssApply(res2,"div.LeftStore > a > div",cssCharacter)
  
  getyeslink<-function(city){
    URL=sprintf('http://dms.yeschain.com.tw/locationData.php?city=%s',URLencode(city))
    #  URL='http://dms.yeschain.com.tw/locationData.php?city=%E5%8F%B0%E5%8C%97%E5%B8%82'
    res3 <- GET(URL, encoding='utf8')
    res4 <- htmlParse(content(res3, "text", encoding = "utf8"), encoding = "utf8")
    res5 <- content(res3, "text", encoding='utf8')
    res6<-str_replace_all(res5,'[:space:]',' ')
    store_no <- unlist(str_extract_all(res6,'onChangeCity\\(.+[0-9].+\\).+</a><img'))
    if(length(store_no)==0){
      URL2=sprintf('http://dms.yeschain.com.tw/locationData.php?city=%s',URLencode(city))
      return(URL2)
    } else {
      store_no <- unlist(str_extract_all(store_no,'\\|.+'))
      store_no2<-unlist(strsplit(store_no,'</a>'))
      store_no2<-unlist(str_extract_all(store_no2,'onChangeCity.+'))
      store_no3<-unlist(str_extract_all(store_no2,"\\'[0-9]+\\'"))
      store_no3<-str_replace_all(store_no3,"\\'",'') 
      URL=sprintf('http://dms.yeschain.com.tw/locationData.php?city=%s&no=%s',URLencode(city),store_no3)
      URL2=sprintf('http://dms.yeschain.com.tw/locationData.php?city=%s',URLencode(city))
      URL[length(URL)+1]=URL2
      return(URL)
    }
  }
  getyesinfo<-function(URL){
    res7 <- GET(URL, encoding='utf8')
    res8 <- htmlParse(content(res7, "text", encoding = "utf8"), encoding = "utf8")
    store_nm <- cssApply(res8,"div > div.RightStore > table tr > td",cssCharacter)
    store_nm <- store_nm[(length(store_nm)-6):length(store_nm)]
    store_nm<-str_replace_all(store_nm,'[:space:]','')
    result <- data.frame(brand_nm='躍獅藥局',store_nm=store_nm[1], addr=store_nm[5], tel_no=store_nm[3], 
                         time=store_nm[7],
                         stringsAsFactors=FALSE )
  }
  yesinfo1<-unlist(lapply(city,getyeslink))
  yesinfo2 <- lapply(yesinfo1,getyesinfo)
  yesinfo3 <- do.call(rbind, yesinfo2)
  return(yesinfo3)
}
leoluyi/CRMaddress documentation built on May 21, 2019, 5:08 a.m.