draft/getYourchance.R

getYourchance <- function(){
  allLink <- sprintf("http://www.yourchance.com.tw/index.php?option=module&lang=cht&task=showlist&id=%s",1:4)
  result <- list()
  getYourchanceData <- function(URL){
    res <- GET(URL)
    res2 <- content(res, encoding = 'utf8')
    store_nm <-xpathSApply(res2,'//*[@id="in_content"]/div/ul/li/a/div/div[@class="title "]',xmlValue)
    if(is.null(store_nm)) return(NULL)
    store_nm <- str_replace_all(store_nm, '[:space:]', '')
    store_nm <- store_nm[str_detect(store_nm, '[:alnum:]')]
    addr <-xpathSApply(res2,'//*[@id="in_content"]/div/ul/li/a/div/div[@class="ad"]',xmlValue)
    addr <- str_replace(addr, '地 址:','')
    zip <- str_extract(addr, '^[0-9]+')
    addr <- str_replace_all(addr, '^[0-9]+| ','')
    tel_no <-xpathSApply(res2,'//*[@id="in_content"]/div/ul/li/a/div/div[@class="te"]',xmlValue)
    open <- tel_no[seq(2,length(tel_no), by=2)]
    open <- str_replace_all(open, '營業時間:', '')
    tel_no <- tel_no[seq(1,length(tel_no), by=2)]
    tel_no <- str_replace_all(tel_no, '(電 話: )|[:space:]', '')
    
    checkNhiPath <- sprintf('//*[@id="in_content"]/div/ul[1]/li[%s]/a/div/div[@class="title "]/span/img[@src="http://www.yourchance.com.tw/site/themes/default/cht/images/icon_h.png"]', 1:length(store_nm))
    checkNhiRes <- character(length(store_nm))
    for( i in 1:length(store_nm)){
      checkNhiResTp <- xpathSApply(res2,checkNhiPath[i],xmlValue )
      checkNhiRes[i] <- ifelse(is.list(checkNhiResTp)|is.null(checkNhiResTp), '無健保', '有健保') 
    }
    
    checkCarPath <- sprintf('//*[@id="in_content"]/div/ul[1]/li[%s]/a/div/div[@class="title "]/span/img[@src="http://www.yourchance.com.tw/site/themes/default/cht/images/icon_p.png"]', 1:length(store_nm))
    checkCarRes <- character(length(store_nm))
    for( i in 1:length(store_nm)){
      checkCarResTp <- xpathSApply(res2,checkCarPath[i],xmlValue )
      checkCarRes[i] <- ifelse(is.list(checkCarResTp)|is.null(checkCarResTp), '無停車場', '有停車場') 
    }
    tel_no <- str_replace_all(tel_no, '-', '')
    data.frame(brand_nm='佑全藥局', store_nm, addr, zip, tel_no, open, nhi=checkNhiRes, car=checkCarRes, stringsAsFactors = FALSE)
  }
  for(i in 1:4) result[[i]] <- getYourchanceData(allLink[i])
  result <- do.call(rbind, result)
  return(result)
}
leoluyi/CRMaddress documentation built on May 21, 2019, 5:08 a.m.