draft/get12HotPot.R

get12HotPot <- function(){
  
  URL='http://www.12hotpot.com.tw/store.aspx'
  res <- GET(URL, encoding='utf8')
  res2 <- htmlParse(content(res, "text", encoding = "utf8"), encoding = "utf8")
  res3 <- content(res, "text")
  
  area <- cssApply(res2,"#area > option",cssValue)
  area<-area[-1]
  res4<-str_replace_all(res3,'[:space:]',' ')
  city <- unlist(str_extract_all(res4,'case.+break'))
  city<-unlist(strsplit(city,'break;'))
  city<-city[1:length(area)]
  city2<-str_extract_all(city,'new Option.+\\)\\);')
  city2<-sapply(city2,strsplit,'form1.city.options.add')
  city3 <- sapply(city2,str_extract,'\\".+\\",')
  city3 <- sapply(city3,str_replace_all,'\\"','')
  city4<-sapply(city3,strsplit,',')
  city5=matrix(,,2)
  for(i in 1:length(city4)){
    city5<-rbind(city5,cbind(i,unlist(city4[i])))
  }
  city5<-na.exclude(city5)
  area_F<-city5[,1]
  city_F<-city5[,2]
  
  
  get12hotlink<-function(i,j){
    URL=sprintf('http://www.12hotpot.com.tw/store.aspx?area=%s&city=%s',i,j)
    return(URL)
  }
  
  get12hotlink2<-function(URL){
    #  URL="http://www.12hotpot.com.tw/store.aspx?area=1&city=台北市"
    res5 <- GET(URL, encoding='utf8')
    res6 <- htmlParse(content(res5, "text", encoding = "utf8"), encoding = "utf8")
    store_link <- cssApply(res6,"tr > td.store > a",cssLink)
    return(store_link)
  }
  
  get12hotinfo<-function(wantURL){
    # URL="http://www.12hotpot.com.tw/store_print.aspx?id=11215"
    URL=sprintf('http://www.12hotpot.com.tw/%s',wantURL)
    res7 <- GET(URL, encoding='utf8')
    res8 <- htmlParse(content(res7, "text", encoding = "utf8"), encoding = "utf8")
    store_nm <- cssApply(res8,"tr > td> span > strong",cssCharacter)
    store_info <- cssApply(res8,"tr> td.t12",cssCharacter)
    brand_nm='石二鍋'
    result <- data.frame(brand_nm=brand_nm,store_nm=store_nm, addr=store_info[1], tel_no=store_info[2], 
                         time=store_info[3],park=store_info[4],traffic=store_info[5],
                         stringsAsFactors=FALSE )
  }
  hotinfo<-get12hotlink(area_F,city_F)
  hotinfo2<-unlist(lapply(hotinfo,get12hotlink2))
  hotinfo3 <- lapply(hotinfo2,get12hotinfo)
  hotinfo4 <- do.call(rbind, hotinfo3)
}
leoluyi/CRMaddress documentation built on May 21, 2019, 5:08 a.m.