getJeJoy <- function(){
res <- GET("http://je.joy.com.tw/mobile/branch_list.php")
res <- htmlParse(content(res,'text', encoding = 'utf8'), encoding = 'utf8')
#取出地區變數名稱
area_option=xpathSApply(res, '//*[@id="select"]/option', xmlValue)
area_option=area_option[-1]
for(i in 1:length(area_option))
{
area_option[i]=URLencode(area_option[i])
}
#取出地區網址
wantURL <- sprintf('http://je.joy.com.tw/mobile/branch_list.php?city=%s',area_option)
area_num=length(wantURL)
wantURLs=list()
for(i in 1:area_num)
{
res <- GET(wantURL[i])
res <- htmlParse(content(res,'text', encoding = 'utf8'), encoding = 'utf8')
subarea_option=xpathSApply(res, '//*[@id="select2"]/option', xmlAttrs)
subarea_option=subarea_option[-1]
subarea_option_num=length(subarea_option)
for(j in 1:subarea_option_num)
{
wantURLs= rbind(wantURLs,sprintf('http://je.joy.com.tw/mobile/branch_list.php?city=%s&bcode=%s',area_option[i],subarea_option[j]))
}
}
#排除無分店的縣市連結
wantURLs=wantURLs[regexpr("bcode=selected",wantURLs)==-1]
wantURLs_num=length(wantURLs)
#取最後分頁
#頻繁抓取會被出現錯誤,無法抓取而出現錯誤需加大停止時間
OUTPUT=c()
for(i in 1:wantURLs_num)
{
res=htmlParse(wantURLs[i], encoding='utf8')
store_nm=xpathSApply(res, '//*[@class="wrap"]/table/tr[1]/td[2]', xmlValue)
addr=xpathSApply(res, '//*[@class="wrap"]/table/tr[2]/td[2]', xmlValue)
addr <- str_replace_all(addr, '(\r|\n| |地址:)+', '')
addr <- str_replace_all(addr, '([(]).+$', '')
tel_no=xpathSApply(res, '//*[@class="wrap"]/table/tr[3]/td[2]', xmlValue)
tel_no <- str_replace_all(tel_no, '(\r|\n| |電話:)+', '')
OUTPUT[i]= list(data.frame(brand_nm='佳音英語',store_nm=store_nm,addr=addr,tel_no=tel_no, data_dt=gsub('-','',Sys.Date()) ,stringsAsFactors=FALSE ))
Sys.sleep(3)
}
FINISH=do.call(rbind,OUTPUT)
FINISH$addr=str_replace_all(FINISH$addr, '(().+$', '')
FINISH$addr=str_replace_all(FINISH$addr, '(【).+$', '')
FINISH
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.