R/sungki_quant.R

Defines functions get_code naver_discussion naver_news analyst_consensus

Documented in analyst_consensus get_code naver_discussion naver_news

### SUNGKI BAE ###
### SUNGKI QUANT ###

#' get_code Function
#'
#' This function allows you received the stock code as a keyword in daum finance.
#' @keywords stock code, stock name
#' @export
#' @examples
#' get_code()

get_code <- function(stock_name){
  require(rvest)
  require(stringr)
  require(xts)
  url <- paste("http://finance.daum.net/search/search.daum?name=",
               stock_name,
               "&nil_profile=vsearch&nil_src=stock",
               sep = "")

  html_page <- read_html(url, encoding = "UTF-8")
  n <- stock_name
  c <- html_nodes(html_page, ".stockCode")%>%html_text()
  if( length(c) == 0 ){
    n <- (html_nodes(html_page, ".txt")%>%html_text)[1]
    c <- substring(as.character(html_nodes(html_page, ".txt")[1]), 47, 52)
  }

  return( c(n, c) )
}

#-------------------------------------------------------------------------------#
#-------------------------------------------------------------------------------#
#-------------------------------------------------------------------------------#
#-------------------------------------------------------------------------------#



#' naver_discussion Function
#'
#' This function allows you Crawl Naver stock discussion room's posts with stock code or name
#' @keywords social data, naver stock discussion
#' @export
#' @examples
#' naver_discussion()

naver_discussion <- function(stock){
  require(rvest)
  require(stringr)
  require(xts)

  stock_code <- get_code(stock)[2]

  get_data <- function(html_page){
    #- time and views
    time <- html_nodes(html_page, ".tah.p10.gray03")
    time <- time[which(html_name(time)=="span")]
    time <- html_text(time)
    views <- time[seq(2,length(time),2)]
    time <- time[seq(1,length(time),2)]
    #- title
    title <- html_nodes(html_page, ".title")
    cutting <- function(text){
      title <- substring(as.character(text),20)
      title <- substring(title,regexpr("title=\"",title)+7,regexpr("\">",title)-1)
      return( title )
    }
    title <- unlist(lapply(title, cutting))
    return( cbind(time, title, views ) )
  }

  DATA <- data.frame()
  switch <- "on"
  page <- 1

  while (switch == "on") {

    url <- paste("http://finance.naver.com/item/board.nhn?code=",
                 stock_code,
                 "&page=",
                 page,
                 sep = "")

    html_page <- read_html(url, encoding = "euc-kr")

    if(page != html_nodes(html_page,".on")[3]%>%html_children()%>%html_text()){
      cat("--- read", page-1, "page ")
      break
    }

    DATA <- tryCatch(
      rbind( get_data(html_page)[20:1,], DATA ),
      error =  function(e) {
        tmp <- get_data(html_page)
        switch <<- "off"
        DATA <<- rbind(get_data(html_page)[nrow(tmp):1,], DATA)
      })

    page <- page+1

    if(page == 10){
      cat("read", 10, "page ")
    }
    else if(page %% 10 == 0 | switch == "off"){
      cat("--- read", page, "page ")
    }




  } # while switch == on

  DATA <- as.matrix(DATA)
  rownames(DATA) <- gsub('\\.',"-",DATA[,1])
  DATA <- as.xts(DATA)

  return( DATA[,2:3] )
}


#-------------------------------------------------------------------------------#
#-------------------------------------------------------------------------------#
#-------------------------------------------------------------------------------#
#-------------------------------------------------------------------------------#


#' naver_news Function
#'
#' This function allows you Crawl Naver news titles with stock_code.
#' @keywords social data, naver news
#' @export
#' @examples
#' naver_news()

naver_news <- function(stock, contents = "off"){

  stock_code <- get_code(stock)[2]

  if(contents == "on"){
    get_data <- function(html_page){
      title <- html_nodes(html_page, ".tit")%>%html_text()
      info <- html_nodes(html_page, ".info")%>%html_text()
      date <- html_nodes(html_page, ".date")%>%html_text()
      tmp <- paste("http://finance.naver.com/",
                   html_nodes(html_page, ".title a") %>% html_attr(name = "href"),
                   sep = "")
      news <- vector()
      for(i in 1:length(tmp)){
        news_page <- read_html(tmp[i], encoding = "euc-kr")
        text <- html_nodes(news_page, xpath = '//div[@class="scr01"]/node()[not(self::div)]') %>% html_text()
        text <- paste(text[-((length(text)-9) : length(text))], collapse = "")

        news <- append(news,text)
      }
      return ( cbind(date, info, title, news) )
    }
  }
  else{
    get_data <- function(html_page){
      title <- html_nodes(html_page, ".tit")%>%html_text()
      info <- html_nodes(html_page, ".info")%>%html_text()
      date <- html_nodes(html_page, ".date")%>%html_text()
      return ( cbind(date, info, title) )
    }
  }

  DATA <- data.frame()
  switch <- "on"
  page <- 1

  while (switch == "on") {

    url <- paste("http://finance.naver.com/item/news_news.nhn?code=",
                 stock_code,
                 "&page=",
                 page,
                 "&sm=entity_id.basic&clusterId=",
                 sep = "")

    html_page <- read_html(url, encoding = "euc-kr")

    if(page != html_nodes(html_page,".on")[2]%>%html_children()%>%html_text()){
      cat("--- end")
      break
    }

    tmp <- get_data(html_page)
    if(nrow(tmp) != 1){
      DATA <- rbind(tmp[nrow(tmp):1,], DATA)
    }
    else{
      DATA <- rbind(tmp, DATA)
    }

    page <- page+1

    if(page == 10){
      cat("read", 10, "page ")
    }
    else if(page %% 10 == 0 | switch == "off"){
      cat("--- read", page, "page ")
    }

  } # while switch == on

  DATA <- as.matrix(DATA)
  rownames(DATA) <- gsub('\\.',"-",DATA[,1])
  DATA <- as.xts(DATA)

  if(contents == "on"){
  return( DATA[,2:4] )
    }
  else{
    return( DATA[,2:3] )
  }
}


#-------------------------------------------------------------------------------#
#-------------------------------------------------------------------------------#
#-------------------------------------------------------------------------------#
#-------------------------------------------------------------------------------#




#' analyst_consensus Function
#'
#' This function allows you Crawl Analyst consensus with stock_code.
#' @keywords social data, naver news
#' @export
#' @examples
#' analyst_consensus()

analyst_consensus <- function(stock){
  require(rvest)
  require(stringr)
  require(xts)

  stock_name <- get_code(stock)[1]
  stock_code <- get_code(stock)[2]

  paper_url <- function(url){
    html_page <- read_html(url, encoding = "euc-kr")
    nid <- html_nodes(html_page, ".type_1 td a")
    nid <- as.character(nid)[ seq(2, length(nid), 3) ]
    nid <- substring(nid,regexpr(".nhn",nid)+9,regexpr("&amp",nid)-1)

    p_url <- paste("http://finance.naver.com/research/company_read.nhn?nid=",
                   nid,
                   "&page=&searchType=itemCode&itemCode=",
                   stock_code,
                   sep = "")

    return( p_url)
  }

  get_data <- function(p_url){
    html_page <- read_html(p_url, encoding = "euc-kr")
    title <- sub(".pdf","",html_nodes(html_page, ".con_link")[2]%>%html_text())
    from <- strsplit(html_nodes(html_page, ".source")%>%html_text(),"\\|")[[1]][1]
    date <- strsplit(html_nodes(html_page, ".source")%>%html_text(),"\\|")[[1]][2]
    tp <- html_nodes(html_page, ".money")%>%html_text()
    coment <-  html_nodes(html_page, ".coment")%>%html_text()

    return( c(date, title, from, tp, coment))
  }

  DATA <- data.frame()
  switch <- "on"
  page <- 1

  while (switch == "on") {

    url <- paste("http://finance.naver.com/research/company_list.nhn?keyword=&brokerCode=&writeFromDate=&writeToDate=&searchType=itemCode&itemName=",
                 stock_name,
                 "&itemCode=",
                 stock_code,
                 "&page=",
                 page,
                 sep = "")



    html_page <- read_html(url, encoding = "euc-kr")

    if(page != html_nodes(html_page,".on")[2]%>%html_children()%>%html_text()){
      cat("--- end")
      break
    }

    p_url <- paper_url(url)

    tmp <- t(sapply(p_url, get_data))

    if(nrow(tmp) != 1){
      DATA <- rbind(tmp[nrow(tmp):1,], DATA)
    }
    else{
      DATA <- rbind(tmp, DATA)
    }

    if(page == 1){
      cat("read", page, "page ")
    }
    else{
      cat("--- read", page, "page ")
    }

    page <- page+1

  } # while switch == on

  DATA <- as.matrix(DATA)
  rownames(DATA) <- gsub('\\.',"-",DATA[,1])
  DATA <- as.xts(DATA)
  colnames(DATA) <- c("del","title", "from", "target price", "consensus")

  return( DATA[,2:5] )
}
opop4615/sungkiquant documentation built on May 22, 2019, 8:52 p.m.