draft/getNet.R

getNet <- function(){
  NET_URL = 'http://www.net-fashion.net/content/shopdata1/'
  NET = htmlParse(NET_URL,encoding='utf8')
  NET_Page1 = cssApply(NET, "a.shoppage_select", cssLink)
  NET_Page2 = cssApply(NET, "a.shoppage", cssLink)
  NET_Pages = c(NET_Page1,NET_Page2)
  
  OUTPUT=list()
  for(i in 1:length(NET_Pages))
  {
    #i=6
    NET = htmlParse(NET_Pages[i],encoding='utf8')
    area = xpathSApply(NET,'//*[@id="color_table"]/tr/td[1]',xmlValue)
    area = area[-1]
    store_nm = xpathSApply(NET,'//*[@id="color_table"]/tr/td[2]',xmlValue)
    store_nm = store_nm[-1]
    addr = xpathSApply(NET,'//*[@id="color_table"]/tr/td[4]',xmlValue)
    addr = addr[-1]
    tel_no = xpathSApply(NET,'//*[@id="color_table"]/tr/td[5]',xmlValue)
    #tel_no = str_replace_all(tel_no,'([-])+','')
    tel_no = str_replace_all(tel_no,'([ ])+','')
    tel_no = tel_no[-1]
    open_time = xpathSApply(NET,'//*[@id="color_table"]/tr/td[6]',xmlValue)
    open_time = str_replace_all(open_time,'([ ])+','')
    open_time = open_time[-1]
    OUTPUT[[i]] = data.frame(brand_nm="NET",area=area,store_nm=store_nm,addr=addr,tel_no=tel_no,open_time=open_time,data_dt=gsub('-','',Sys.Date()))
    flush.console()  
  }
  
  FINISH = do.call(rbind,OUTPUT)
  FINISH = FINISH[nchar(str_trim(as.character(FINISH$store_nm)))>0,]
}
leoluyi/CRMaddress documentation built on May 21, 2019, 5:08 a.m.