draft/getKidCastle.R

getKidCastle <- function(){
  getsidurl <- function(){
    wantURL <- 'http://www.kidcastle.com/web_3/school_search.php?select1=21'
    res <- GET(wantURL, encoding='utf8')
    res2 <- content(res, encoding='utf8')
    
    (maxPage<- xpathSApply(res2, '/html/body/table[2]/tr[2]/td/table/tr[1]/td[1]/table/tr[1]/td/a', xmlAttrs))
    
    
    wantPages<- sapply(maxPage, 
                       function(wantPage) sprintf('http://www.kidcastle.com/web_3/%s', wantPage)
    )
    return(wantPages)
  }
  
  #getsidurl()
  
  #URL='http://www.kidcastle.com/web_3/school_search.php?select1=21'
  
  getAddr <- function(URL){
    res <- GET(URL, encoding='utf8')
    res2 <- content(res, encoding='utf8')
    
    name  <- xpathSApply(res2, '/html/body/table[2]/tr[2]/td/table/tr[1]/td[2]/table/tr[7]/td/table/tr[3]/td/table/tr[1]/td/a', xmlValue)
    addr  <- xpathSApply(res2, '/html/body/table[2]/tr[2]/td/table/tr[1]/td[2]/table/tr[7]/td/table/tr[3]/td/table/tr[1]/th[1]', xmlValue)
    tel   <- xpathSApply(res2, '/html/body/table[2]/tr[2]/td/table/tr[1]/td[2]/table/tr[7]/td/table/tr[3]/td/table/tr[1]/th[2]', xmlValue)
    
    
    
    return(data.frame(brand_nm='吉的堡', store_nm=name,addr=addr,tel_no=tel, stringsAsFactors = FALSE))
  }
  
  #getAddr(URL)
  
  
  UrlList <- lapply(getsidurl(), getAddr)
  UrlList_1 <- do.call(rbind, UrlList)
}
leoluyi/CRMaddress documentation built on May 21, 2019, 5:08 a.m.