draft/get8way.R

get8way <- function(){
  #八方雲集
  Data1=list()
  for (i in 1:37){
    url='http://www.8way.com.tw/index_down.php?sele=searchRS&searchname=&Submit2=%E6%90%9C%E5%B0%8B%E5%BA%97%E5%AE%B6&Page='
    bb=GET(paste(url,i,sep=""))
    html2 = htmlParse(content(bb, "text", encoding = "utf8"), encoding = "utf8")
    
    tables <- readHTMLTable(html2)
    Data=na.exclude(tables[[1]][,2:3])[-1,]
    Data1[[i]]=data.frame(Data[,1][which(Data[,2]!="")],Data[,2][which(Data[,2]!="")])
  }
  Data=do.call(rbind,Data1)
  
  a=gsub("\\(有提供麵類產品\\)| \\(有提供麵類產品\\)|  \\(本店為麵類商品專賣店\\| \\(本店為麵類商品專賣店\\)  |本店為麵類商品專賣店|\\(|\\)| |   | |  )","",as.character(Data[,1]))
  b=str_extract(a,".\\S{0,10}店")
  b=unlist(b)
  c=list()
  for (i in 1: length(a)){
    c[[i]]=gsub(b[i],"",a[i])
  }
  c=unlist(c)
  d=gsub("訂餐專線:|訂餐專線| |   | |  )","",as.character(Data[,2]))
  Data2=cbind("八方雲集",b,c,d)
  Title = c("brand_nm","store_nm","addr","tel_no")
  colnames(Data2)=Title
  return(as.data.frame(Data2, stringsAsFactors = FALSE))
}
leoluyi/CRMaddress documentation built on May 21, 2019, 5:08 a.m.