draft/getMaytag.R

getMaytag <- function(){
  wantURL <- 'http://www.maytag.com.tw/Service/'
  res <- GET(wantURL, encoding='utf8')
  res2 <- content(res, encoding='utf8')
  
  page_num=xpathSApply(res2, '//*[@class="PageUnSelected"]', xmlValue)
  page_num=length(strsplit(page_num,split='[ ]',fixed=T))+1
  
  #組分頁網址
  wanthttp=c()
  for(i in 1:page_num){
    wanthttp[i] <- sprintf('http://www.maytag.com.tw/Service/index.aspx?ctl00$ContentPlaceHolder1$Datagrid1=%s,1,0,30',i)
  }
  
  #取分頁中資訊
  OUTPUT=list()
  for(i in 1:page_num){
    wantURL <- wanthttp[i] 
    res <- GET(wantURL, encoding='utf8')
    res2 <- content(res, encoding='utf8')
    area=xpathSApply(res2, '//*[@id="ctl00_ContentPlaceHolder1_Datagrid1"]/tr/td[1]', xmlValue)
    area=area[-length(area)]
    store_nm=xpathSApply(res2, '//*[@id="ctl00_ContentPlaceHolder1_Datagrid1"]/tr/td[2]', xmlValue)
    addr=xpathSApply(res2, '//*[@id="ctl00_ContentPlaceHolder1_Datagrid1"]/tr/td[3]', xmlValue)
    addr <-paste(area,addr)
    addr <- str_replace_all(addr, '(\r|\n| | )+', '')
    OUTPUT[[i]]= data.frame(brand_nm='美泰克',area=area,store_nm=store_nm,addr=addr, data_dt=gsub('-','',Sys.Date()) ,stringsAsFactors=FALSE )
  }
  FINISH=do.call(rbind,OUTPUT)
  # 20150911 fix addr
  FINISH$addr = str_replace(FINISH$addr, ':.+$','')
  return(FINISH)
}
leoluyi/CRMaddress documentation built on May 21, 2019, 5:08 a.m.