draft/getTybio.R

getTybio <- function(){
  # total 頁數
  url       <- 'http://www.tybio.com.tw/webc/html/service/index.aspx?Page=1'
  htmldoc1  <- content(GET(url, encoding='utf8'), as='text', encoding='utf8')
  htmldoc2  <- htmlParse(htmldoc1, encoding='utf8')
  page      <- gsub("/","",cssApply(htmldoc2,"span.style04_about" ,cssCharacter))
  
  
  # 所有網址
  store_nm1 <- list()
  data2     <- list()
  for (i in 1:page) {
    URL             <- sprintf('http://www.tybio.com.tw/webc/html/service/index.aspx?Page=%s',i)
    HTMLDOC1        <- content(GET(URL, encoding='utf8'), as='text', encoding='utf8')
    HTMLDOC2        <- htmlParse(HTMLDOC1, encoding='utf8')
    store_nm        <- gsub(">","",cssApply(HTMLDOC2," table > tr > td > strong" ,cssCharacter))
    store_nm1[[i]]  <- as.data.frame(store_nm[-which(store_nm=="")])
    data1           <- cssApply(HTMLDOC2,".style_gray3" ,cssCharacter)
    data2[[i]]      <- as.data.frame(matrix(data1, ncol=4, byrow=TRUE))
  }
  
  store_nm2         <- do.call(rbind,store_nm1)
  data3             <- do.call(rbind,data2)
  
  names(store_nm2)  <- c('store_nm')
  names(data3)      <- c('addr','tel_no','tm')
  
  datas             <- data.frame('台鹽',store_nm2$store_nm,data3$addr,data3$tel_no,data3$tm)
  names(datas)      <- c('brand_nm','store_nm','addr','tel_no','tm')
  datas
}
leoluyi/address_crawler documentation built on May 21, 2019, 5:09 a.m.