draft/getTwGod.R

getTwGod <- function(){
  URL <- 'http://www.twgod.com/CwP/P/P11567.html'
  res <- GET(URL, encoding='utf8')
  res2 <- htmlParse(content(res, "text", encoding = "utf8"), encoding = "utf8")
  areaURL <- cssApply(res2,'div > div > a',cssLink)
  
  #各縣市連結
  wantPages <- sprintf('http://www.twgod.com/CwP/P/%s',areaURL)
  
  #URL <- 'http://www.twgod.com/CwP/P/P29.html'
  getInfo <- function(URL){
    res <- GET(URL, encoding='utf8')
    res2 <- htmlParse(content(res, "text", encoding = "utf8"), encoding = "utf8")  
    result <- cssApply(res2,'tr.TableLine1 > td > div',cssCharacter)  
    if(length(result)<1) return(NULL)
    result <- matrix(result, ncol = 4, byrow = TRUE)
    
    data.frame(brand_nm='台灣寺廟',
               store_nm=result[,2],
               god_nm=result[,3],
               addr=result[,4],
               stringsAsFactors=FALSE)
    
    result <- try(data.frame(brand_nm='台灣寺廟',
                             store_nm=result[,2],
                             god_nm=result[,3],
                             addr=result[,4],
                             stringsAsFactors=FALSE), silent = TRUE)
    if (class(result)=='try-error') result <- NULL
    return(result)
    
  }
  
  allList <- lapply(wantPages, getInfo) 
  twgod <- do.call(rbind, allList) 
}
leoluyi/CRMaddress documentation built on May 21, 2019, 5:08 a.m.