draft/getTheNorthFace.R

getTheNorthFace <- function(){
  
  #直營櫃點門市
  res <- GET("http://thenorthface.com.tw/zh-tw/shop/category/1")
  res <- htmlParse(content(res,'text', encoding = 'utf8'), encoding = 'utf8')
  
  page_url_num=matrix(xpathSApply(res, '//*[@class="page_block"]/ul/li/a', xmlValue))
  page_url_num=page_url_num[-length(page_url_num)]
  wantURL <- sprintf('http://thenorthface.com.tw/zh-tw/shop/category/1/%s',page_url_num)
  page_url_num=length(page_url_num)
  
  OUTPUT=list()
  for(i in 1:page_url_num)
  {
    res <- GET(wantURL[i])
    res <- htmlParse(content(res,'text', encoding = 'utf8'), encoding = 'utf8')
    store_nm=xpathSApply(res, '//*[@class="list_block"]/table/tr/td[2]', xmlValue)
    store_nm <- str_replace_all(store_nm, '(\r|\n| |\t)+', '')
    tel_no=xpathSApply(res, '//*[@class="list_block"]/table/tr/td[3]', xmlValue)
    addr=xpathSApply(res, '//*[@class="list_block"]/table/tr/td[4]', xmlValue)
    addr <- str_replace_all(addr, '(\r|\n| |\t)+', '')
    addr <- str_replace_all(addr, '([(]).+$', '')
    OUTPUT[i]= list(data.frame(brand_nm='The North Face',type='直營櫃點門市',store_nm=store_nm,addr=addr,tel_no=tel_no, data_dt=gsub('-','',Sys.Date()) ,stringsAsFactors=FALSE ))
  }
  FINISH1=do.call(rbind,OUTPUT)
  
  
  
  
  #經銷門市
  res <- GET("http://thenorthface.com.tw/zh-tw/shop/category/2")
  res <- htmlParse(content(res,'text', encoding = 'utf8'), encoding = 'utf8')
  
  
  page_url_num=matrix(xpathSApply(res, '//*[@class="page_block"]/ul/li/a', xmlValue))
  page_url_num=page_url_num[-length(page_url_num)]
  wantURL <- sprintf('http://thenorthface.com.tw/zh-tw/shop/category/2/%s',page_url_num)
  page_url_num=length(page_url_num)
  
  OUTPUT=list()
  for(i in 1:page_url_num)
  {
    res <- GET(wantURL[i])
    res <- htmlParse(content(res,'text', encoding = 'utf8'), encoding = 'utf8')
    store_nm=xpathSApply(res, '//*[@class="list_block"]/table/tr/td[2]', xmlValue)
    store_nm <- str_replace_all(store_nm, '(\r|\n| |\t)+', '')
    tel_no=xpathSApply(res, '//*[@class="list_block"]/table/tr/td[3]', xmlValue)
    addr=xpathSApply(res, '//*[@class="list_block"]/table/tr/td[4]', xmlValue)
    addr <- str_replace_all(addr, '(\r|\n| |\t)+', '')
    addr <- str_replace_all(addr, '([(]).+$', '')
    OUTPUT[i]= list(data.frame(brand_nm='The North Face',type='經銷門市',store_nm=store_nm,addr=addr,tel_no=tel_no, data_dt=gsub('-','',Sys.Date()) ,stringsAsFactors=FALSE ))
  }
  FINISH2=do.call(rbind,OUTPUT)
  
  
  FINISH=rbind(FINISH1,FINISH2)
}
leoluyi/CRMaddress documentation built on May 21, 2019, 5:08 a.m.