getMaxxis <- function(){
URL <- 'http://www.cst.com.tw/pubtw/store/store_2011-2-2.html'
res <- GET(URL, encoding='utf8')
res2 <- httr::content(res, "text", encoding = "utf8")
res2 <- htmlParse(res2, encoding = "utf8")
result <- cssApply(res2,'div#left > p > a',cssLink)
result <- unlist(str_extract_all(result,'^store.+'))
#各縣市連結
wantPages <- sprintf('http://www.cst.com.tw/pubtw/store/%s',result)
#URL <- 'http://www.cst.com.tw/pubtw/store/store_Taipei_2.html'
#URL <- wantPages[2]
getPages <- function(URL) {
res <- GET(URL, encoding='utf8')
res2 <- httr::content(res, "text", encoding = "utf8")
res2 <- htmlParse(res2, encoding = "utf8")
result <- unlist(cssApply(res2,'li > a',cssLink))
result <- result[nchar(result)>0]
storeURL <- sprintf('http://www.cst.com.tw/pubtw/store/%s',result)
return(storeURL)
}
#URL <- 'http://www.cst.com.tw/pubtw/store/tp/store_tp_004.html'
getInfo <- function(URL){
res <- GET(URL, encoding='utf8')
res2 <- httr::content(res, "text", encoding = "utf8")
res2 <- htmlParse(res2, encoding = "utf8")
result <- unlist(cssApply(res2,'tr > td',cssCharacter))
result <- str_replace_all(result,'[:space:]','')
result2 <- matrix(result, ncol = 2, byrow = TRUE)
index <- !(result2[,1]=='所屬經銷商:')
result3 <- result2[index,]
result3 <- result3[,2]
addr <- str_extract(result3[3],'^.+\\(一廠|^.+\\(')
addr <- str_replace(addr,'\\(一廠|\\(','')
data.frame(brand_nm='瑪吉斯',
store_nm=result3[1],
store_owner=result3[2],
addr=addr,
tel_no=result3[4],
stringsAsFactors=FALSE)
}
UrlList <- lapply(wantPages, getPages)
allList <- lapply(unlist(UrlList), getInfo)
Maxxis <- do.call(rbind, allList)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.