getSubway <- function(){
url <- "http://www.twsubway.com/www/include/index.php?Page=4"
res <- GET(url, encoding='utf8')
res2 <- content(res,"text", encoding='utf8')
html <- htmlParse(res2, encoding = "utf8")
res0 <- cssApply(html,"#pane_locator > div ", cssCharacter)
total <-as.integer(str_replace_all(unlist(str_extract_all(res0,'共.+項')), '(共|項|[:space:])', ""))
total_page <- floor(total/10)+1
mylist <- list()
for (i in 1:total_page)
{
url<- sprintf("http://www.twsubway.com/www/include/index.php?pageNum_content01=%s&totalRows_content01=%s&Page=4", i-1, total)
res <- GET(url, encoding='utf8')
res2 <- content(res,"text", encoding='utf8')
html <- htmlParse(res2, encoding = "utf8")
data3 <- cssApply(html,"#pane_locator > ul > ul li", cssCharacter)
data4 <- str_replace_all(unlist(str_extract_all(data3, "\t.+\n")), '(\t|\n|[:space:]|BEYONDPLAZA PLAZA)', "")
data41 <- str_replace_all(data4, 'BEYONDPLAZA', "")
data5 <- str_replace_all(unlist(str_extract_all(data3, "(店|醫院|PLAZA)\n.+\n.+(\\)|09)")), '\\(.+\\)|\\(|店|\n|[:space:]|BEYOND PLAZA|頂樓 (國道三號-清水服務區)|\\(|09|[0-9]{2}\\)', "")
data50 <- str_replace_all(data5, '(\\-靜宜大學宜園餐廳1F|副樓|1\\-2/F|01011櫃位|1F&B1|2620\\-28|2277\\-|(林口長庚紀念醫院美食街))', "-")
data51 <- str_replace_all(data50, '\\-|^[0-9]{3}|頂樓|PLAZA|第.+中心', "")
data6 <- str_replace_all(unlist(str_extract_all(data3, "\n.+([0-9]{2}\\)|[0-9]{4}-).+\n")), '\n|[:space:]|\\(', "")
data7 <- str_replace_all(data6, '\\)', "-")
data01=data.frame("subway",data41, data51, data7, stringAsFactors=FALSE)
data02 <- data01[,-5]
mylist[[i]] <- data02
}
data06 <- rbind(mylist[[1]], mylist[[2]], mylist[[3]], mylist[[4]], mylist[[5]],
mylist[[6]], mylist[[7]], mylist[[8]], mylist[[9]], mylist[[10]],
mylist[[11]], mylist[[12]], mylist[[13]], mylist[[14]]
)
colnames(data06) = c("brand_nm", "store_nm", "addr", "tel_no")
data06
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.