draft/getPostATM.R

getPostATM <- function(){
  # 找出頁數
  url      <- 'http://www.post.gov.tw/post/internet/I_location/index.jsp?topage=1&PreRowDatas=10&city=&input2=&st_7=&st_6=&is_night=&st_1_5=&zip5=&prsb_no=&city_area=&ID=190105&post_address=&style=1&keyword=&Page_Load=1'
  htmldoc1 <- content(GET(url, encoding='utf8'), as='text', encoding='utf8')
  htmldoc2 <- htmlParse(htmldoc1, encoding='utf8')
  page     <- cssApply(htmldoc2,"#pagemain > h1 " , cssCharacter)
  page1    <- ceiling(as.numeric(str_extract(page,'[0-9]{4}'))/10)
  
  # 所有資料
  
  tables <- list()
  for (i in 1: page1){
    aa          <- '&PreRowDatas=10&city=&input2=&st_7=&st_6=&is_night=&st_1_5=&zip5=&prsb_no=&city_area=&ID=190105&post_address=&style=1&keyword=&Page_Load=1'
    URL         <- sprintf('http://www.post.gov.tw/post/internet/I_location/index.jsp?topage=%s%s',i, aa)
    HTML1       <- content(GET(URL, encoding='utf8'), as='text', encoding='utf8')
    HTML2       <- htmlParse(HTML1, encoding = 'utf8')
    tables[[i]] <- readHTMLTable(HTML2)[[1]]
  }
  
  info1 <- do.call(rbind,tables)
  info2 <- data.frame('郵局ATM',info1$V2,info1$V3)
  names(info2) <- c('brand_nm','store_nm','addr')
  info2
}
leoluyi/CRMaddress documentation built on May 21, 2019, 5:08 a.m.