draft/getDante.R

getDante <- function(){
  # 據點總覽
  url      <- 'http://www.dante.com.tw/store_10_2_1.php'
  htmldoc1 <- content(GET(url, encoding='utf8'), as='text', encoding='utf8')
  htmldoc2 <- htmlParse(htmldoc1, encoding='utf8')
  tables   <- readHTMLTable(htmldoc2)
  table1   <- data.frame(tables[[1]][2:4])
  
  col      <- data.frame(t(data.frame(names(table1))), row.names = NULL)
  col1     <- str_extract_all(col$X3,'[0-9]{2,4}')
  col2     <- paste0("(",col1[[1]][1],")",col1[[1]][2],"-",col1[[1]][3])
  col3     <- data.frame(col$X1,col$X2,col2)
  
  colnames(col3)   <- c("store_nm","addr","tel_no")
  colnames(table1) <- c("store_nm","addr","tel_no")
  
  data     <- rbind(col3,table1)
  data1    <- str_replace_all(data$addr,'\\(.+\\)', '')
  
  data2    <- data.frame('丹堤',data$store_nm,data1,data$tel_no)
  names(data2) <- c("brand_nm","store_nm","addr","tel_no")
  data2
}
leoluyi/address_crawler documentation built on May 21, 2019, 5:09 a.m.