draft/getTw16.R

getTw16 <- function(){
  page=c(0:4)
  URL=sapply(page, function(wantPage) sprintf('http://organ.tw16.net/organList.asp?Page=1&zNo=%s&cNo=&aNo=', wantPage))
  
  # 北台灣:
  getPageUrlCT <- function(){
    wantURL <- sprintf('http://organ.tw16.net/organList.asp?Page=%s&zNo=0&cNo=&aNo=',1)
    res <- GET(wantURL, encoding='big5')
    html <- htmlParse(content(res, "text", encoding = "big5"), encoding = "utf8")
    maxPage=xpathSApply(html,'//*[@id="table3"]/tr[11]/td/table[2]/tr[1]/td/p/font/font',xmlValue)
    maxPage=str_extract(maxPage,'共有.[0-9]+ 頁')
    maxPage=str_replace(str_extract(maxPage,'共有.[0-9]+ 頁'),'共有 ','')
    maxPage=str_replace(maxPage,'頁','')
    maxpage<- as.numeric(maxPage) 
    wantPages<- sapply(1:maxPage, function(wantPage) sprintf('http://organ.tw16.net/organList.asp?Page=%s&zNo=0&cNo=&aNo=', wantPage)
    ) 
    return(wantPages)
  }
  
  
  getstore <- function(URL){
    res <- GET(URL, encoding='Big5')
    t=rawToChar(res$content, multiple=FALSE)
    html=htmlParse(t, encoding = "Big5")
    name <-cssApply(html, "#table8 > tr:nth-child(1) > td:nth-child(2) > a > font", cssCharacter)
    data <-cssApply(html, "#table8 > tr:nth-child(3) > td:nth-child(2) > font", cssCharacter)
    tel <-cssApply(html, "#table8 >  tr:nth-child(4) > td:nth-child(2) > font", cssCharacter)
    name=name[c(1:10)]
    addrs=data[c(2,5,8,11,14,17,20,23,26,29)]
    addrs=str_replace_all(addrs,' ','')
    tel=data[c(3,6,9,12,15,18,21,24,27,30)]
    result=cbind(name,addrs,tel)
    result=data.frame(result)
    return(result)
  }
  
  e=getPageUrlCT()
  
  STORE <- lapply(e, getstore) 
  STORE_FINAL <- do.call(rbind, STORE)
  
  # 中台灣:
  getPageUrlCT1 <- function(){
    wantURL <- sprintf('http://organ.tw16.net/organList.asp?Page=%s&zNo=1&cNo=&aNo=',1)
    res <- GET(wantURL, encoding='big5')
    html <- htmlParse(content(res, "text", encoding = "big5"), encoding = "utf8")
    maxPage=xpathSApply(html,'//*[@id="table3"]/tr[11]/td/table[2]/tr[1]/td/p/font/font',xmlValue)
    maxPage=str_extract(maxPage,'共有.[0-9]+ 頁')
    maxPage=str_replace(str_extract(maxPage,'共有.[0-9]+ 頁'),'共有 ','')
    maxPage=str_replace(maxPage,'頁','')
    maxpage<- as.numeric(maxPage) 
    wantPages<- sapply(1:maxPage, function(wantPage) sprintf('http://organ.tw16.net/organList.asp?Page=%s&zNo=1&cNo=&aNo=', wantPage)
    ) 
    return(wantPages)
  }
  
  
  getstore1 <- function(URL){
    res <- GET(URL, encoding='Big5')
    t=rawToChar(res$content, multiple=FALSE)
    html=htmlParse(t, encoding = "Big5")
    name <-cssApply(html, "#table8 > tr:nth-child(1) > td:nth-child(2) > a > font", cssCharacter)
    data <-cssApply(html, "#table8 > tr:nth-child(3) > td:nth-child(2) > font", cssCharacter)
    tel <-cssApply(html, "#table8 >  tr:nth-child(4) > td:nth-child(2) > font", cssCharacter)
    name=name[c(1:10)]
    addrs=data[c(2,5,8,11,14,17,20,23,26,29)]
    addrs=str_replace_all(addrs,' ','')
    tel=data[c(3,6,9,12,15,18,21,24,27,30)]
    result=cbind(name,addrs,tel)
    result=data.frame(result)
    return(result)
  }
  
  f=getPageUrlCT1()
  
  STORE1 <- lapply(f ,getstore1) 
  STORE_FINAL1 <- do.call(rbind, STORE1)
  
  
  
  
  # 南台灣:
  getPageUrlCT2 <- function(){
    wantURL <- sprintf('http://organ.tw16.net/organList.asp?Page=%s&zNo=2&cNo=&aNo=',1)
    res <- GET(wantURL, encoding='big5')
    html <- htmlParse(content(res, "text", encoding = "big5"), encoding = "utf8")
    maxPage=xpathSApply(html,'//*[@id="table3"]/tr[11]/td/table[2]/tr[1]/td/p/font/font',xmlValue)
    maxPage=str_extract(maxPage,'共有.[0-9]+ 頁')
    maxPage=str_replace(str_extract(maxPage,'共有.[0-9]+ 頁'),'共有 ','')
    maxPage=str_replace(maxPage,'頁','')
    maxpage<- as.numeric(maxPage) 
    wantPages<- sapply(1:maxPage, function(wantPage) sprintf('http://organ.tw16.net/organList.asp?Page=%s&zNo=2&cNo=&aNo=', wantPage)
    ) 
    return(wantPages)
  }
  
  
  getstore2<- function(URL){
    res <- GET(URL, encoding='Big5')
    t=rawToChar(res$content, multiple=FALSE)
    html=htmlParse(t, encoding = "Big5")
    name <-cssApply(html, "#table8 > tr:nth-child(1) > td:nth-child(2) > a > font", cssCharacter)
    data <-cssApply(html, "#table8 > tr:nth-child(3) > td:nth-child(2) > font", cssCharacter)
    tel <-cssApply(html, "#table8 >  tr:nth-child(4) > td:nth-child(2) > font", cssCharacter)
    name=name[c(1:10)]
    addrs=data[c(2,5,8,11,14,17,20,23,26,29)]
    addrs=str_replace_all(addrs,' ','')
    tel=data[c(3,6,9,12,15,18,21,24,27,30)]
    result=cbind(name,addrs,tel)
    result=data.frame(result)
    return(result)
  }
  
  g=getPageUrlCT2()
  
  STORE2<- lapply(g, getstore2) 
  STORE_FINAL2 <- do.call(rbind, STORE2)
  
  # 東台灣:
  getPageUrlCT3 <- function(){
    wantURL <- sprintf('http://organ.tw16.net/organList.asp?Page=%s&zNo=3&cNo=&aNo=',1)
    res <- GET(wantURL, encoding='big5')
    html <- htmlParse(content(res, "text", encoding = "big5"), encoding = "utf8")
    maxPage=xpathSApply(html,'//*[@id="table3"]/tr[11]/td/table[2]/tr[1]/td/p/font/font',xmlValue)
    maxPage=str_extract(maxPage,'共有.[0-9]+ 頁')
    maxPage=str_replace(str_extract(maxPage,'共有.[0-9]+ 頁'),'共有 ','')
    maxPage=str_replace(maxPage,'頁','')
    maxpage<- as.numeric(maxPage) 
    wantPages<- sapply(1:maxPage, function(wantPage) sprintf('http://organ.tw16.net/organList.asp?Page=%s&zNo=3&cNo=&aNo=', wantPage)
    ) 
    return(wantPages)
  }
  
  
  getstore3<- function(URL){
    res <- GET(URL, encoding='Big5')
    t=rawToChar(res$content, multiple=FALSE)
    html=htmlParse(t, encoding = "Big5")
    name <-cssApply(html, "#table8 > tr:nth-child(1) > td:nth-child(2) > a > font", cssCharacter)
    data <-cssApply(html, "#table8 > tr:nth-child(3) > td:nth-child(2) > font", cssCharacter)
    tel <-cssApply(html, "#table8 >  tr:nth-child(4) > td:nth-child(2) > font", cssCharacter)
    name=name[c(1:10)]
    addrs=data[c(2,5,8,11,14,17,20,23,26,29)]
    addrs=str_replace_all(addrs,' ','')
    tel=data[c(3,6,9,12,15,18,21,24,27,30)]
    result=cbind(name,addrs,tel)
    result=data.frame(result)
    return(result)
  }
  
  h=getPageUrlCT3()
  
  STORE3 <- lapply(h ,getstore3) 
  STORE_FINAL3<- do.call(rbind, STORE3)
  
  
  # 離島:
  getPageUrlCT4<- function(){
    wantURL <- sprintf('http://organ.tw16.net/organList.asp?Page=%s&zNo=4&cNo=&aNo=',1)
    res <- GET(wantURL, encoding='big5')
    html <- htmlParse(content(res, "text", encoding = "big5"), encoding = "utf8")
    maxPage=xpathSApply(html,'//*[@id="table3"]/tr[11]/td/table[2]/tr[1]/td/p/font/font',xmlValue)
    maxPage=str_extract(maxPage,'共有.[0-9]+ 頁')
    maxPage=str_replace(str_extract(maxPage,'共有.[0-9]+ 頁'),'共有 ','')
    maxPage=str_replace(maxPage,'頁','')
    maxpage<- as.numeric(maxPage) 
    wantPages<- sapply(1:maxPage, function(wantPage) sprintf('http://organ.tw16.net/organList.asp?Page=%s&zNo=4&cNo=&aNo=', wantPage)
    ) 
    return(wantPages)
  }
  
  
  getstore4<- function(URL){
    res <- GET(URL, encoding='Big5')
    t=rawToChar(res$content, multiple=FALSE)
    html=htmlParse(t, encoding = "Big5")
    name <-cssApply(html, "#table8 > tr:nth-child(1) > td:nth-child(2) > a > font", cssCharacter)
    data <-cssApply(html, "#table8 > tr:nth-child(3) > td:nth-child(2) > font", cssCharacter)
    tel <-cssApply(html, "#table8 >  tr:nth-child(4) > td:nth-child(2) > font", cssCharacter)
    name=name[c(1:10)]
    addrs=data[c(2,5,8,11,14,17,20,23,26,29)]
    addrs=str_replace_all(addrs,' ','')
    tel=data[c(3,6,9,12,15,18,21,24,27,30)]
    result=cbind(name,addrs,tel)
    result=data.frame(result)
    return(result)
  }
  
  i=getPageUrlCT4()
  
  STORE4 <- lapply(i,getstore4) 
  STORE_FINAL4<- do.call(rbind, STORE4)
  
  STORE_all<-rbind( STORE_FINAL, STORE_FINAL1, STORE_FINAL2, STORE_FINAL3, STORE_FINAL4)
  STORE_all=data.frame(brand_nm='全國醫療網',store_nm=STORE_all$name,addr=STORE_all$addrs,tel_no=STORE_all$tel)
}
leoluyi/CRMaddress documentation built on May 21, 2019, 5:08 a.m.