draft/getHangTen.R

getHangTen <- function(){
  HT_URL = 'http://www.hangten.com.tw/_shop_info/store/include/map_bbg_1.html'          
  HAGTANG = GET(HT_URL)
  HAGTANG2 = htmlParse(HAGTANG,encoding='utf8')
  AREA_Pages = cssApply(HAGTANG2, "a", cssLink)
  HAGTANG_Info = list()
  for (i in 1:(length(AREA_Pages)-1)) 
  {
    #i=1
    RUN = TRUE
    C_Page = paste0("http://www.hangten.com.tw/_shop_info/store/include/",AREA_Pages[i])
    while (RUN) 
    {    
      HAGTANG = GET(C_Page)
      HAGTANG2 = htmlParse(HAGTANG,encoding='utf8')
      URL_Pages = cssApply(HAGTANG2, "a", cssLink)
      
      HAGTANG = content(HAGTANG,as="text",encoding = "utf8")
      HAGTANG_Table = readHTMLTable(HAGTANG)
      #View(HAGTANG_Table[3])
      HAGTANG_Table2=HAGTANG_Table[[3]][2:25,c(1,3,6)]
      HAGTANG_Table2=na.omit(unique(HAGTANG_Table2))
      
      if (is.null(nrow(HAGTANG_Info))) { HAGTANG_Info=HAGTANG_Table2
      } else {  HAGTANG_Info=rbind(HAGTANG_Info,HAGTANG_Table2)  }    
      flush.console()
      N_Page = paste0("http://www.hangten.com.tw/_shop_info/store/include/",URL_Pages[length(URL_Pages)])
      if (N_Page > C_Page) { C_Page = N_Page 
      } else { RUN = FALSE }
      if (C_Page == "http://www.hangten.com.tw/_shop_info/store/include/") {  RUN = FALSE  }
    }
  }
  
  HAGTANG_Info=HAGTANG_Info[-which(HAGTANG_Info[,1] == ""),]
  HAGTANG_Info2=data.frame(brand_nm="HANG TEN",store_nm=HAGTANG_Info$V1,addr=HAGTANG_Info$V6,tel_no=HAGTANG_Info$V3,data_dt=gsub('-','',Sys.Date()))
}
leoluyi/CRMaddress documentation built on May 21, 2019, 5:08 a.m.