R/find_jelcode.R

Defines functions tabNetwork_jelcode order_jelcode dominant_jelcode ratio_jelcode count_jelcode find_jelcode

Documented in count_jelcode dominant_jelcode find_jelcode order_jelcode ratio_jelcode

find_jelcode<-function(x){ # x a string
 # require(stringi)
  y=NULL
  for (i in 1:dim(jelclassification::df_jel)[1]) {
    y[i]<-stringi::stri_detect_regex(x,pattern = jelclassification::df_jel[i,"Keywords"],case_insensitive=TRUE)
  }
  code1=unlist(jelclassification::df_jel[,"Code"])
  code2<-c(na.exclude(code1[y]))
  code3<-paste(unname(code2),collapse = ",")
  return(code3)
  #return(unname(c(na.exclude(code[y]))))
}


######################
count_jelcode<-function(x){ # x is a string
 y=tibble::as_tibble(data.frame(table(stringr::str_split(jelclassification::find_jelcode(x),pattern=","))))
 colnames(y)=c("Code","Freq")
 y=dplyr::inner_join(y,jel_keywords_count(),by="Code")
 y%>%
   select(Code,Theme,Freq)

}


######################
ratio_jelcode<-function(x){ # x is a string
  #require(stringi)

  y=tibble::as_tibble(data.frame(table(stringr::str_split(find_jelcode(x),pattern=","))))
  colnames(y)=c("Code","Freq")
  y=dplyr::inner_join(y,jel_keywords_count(),by="Code")
  y%>%
    mutate(Ratio=Freq/n)%>%
    select(Code,Theme,Freq,Ratio)

}


###################################
dominant_jelcode<-function(x,method="Freq"){
  y=ratio_jelcode(x)
  z=which(unlist(y[,method]) %in% max(y[,method]))
  y$Code[z]
}

##############################
order_jelcode<-function(x,method="Freq",position=1){
  y=jelclassification::ratio_jelcode(x)
  if(method=="Freq")
    y=y%>%
      dplyr::mutate(Weight=Freq/sum(Freq,na.rm = T))%>%
      dplyr::arrange(desc(Freq))
  if(method=="Ratio")
    y=y%>%
      dplyr::mutate(Weight=Ratio/sum(Ratio,na.rm = T))%>%
      dplyr::arrange(desc(Ratio))

  return(data.frame(c(y[position,"Code"]),c(y[position,"Weight"])))

}
order_jelcode("bank regression model financial.")
################################
tabNetwork_jelcode<-function(data,colname,...){
  y=NULL;Co=NULL;We=NULL
  for (i in 1:dim(data)[1]) {
    y[[i]]=order_jelcode(data[i,colname],...)
    Co[[i]]=(y[[i]])$Code
    We[[i]]=(y[[i]])$Weight
  }
  data.frame(Code=unlist(Co),Weight=unlist(We))
}

dat=data.frame(text=c("bank regression model financial.","The study consider financial deposit bank"))
tabNetwork_jelcode(dat,colname ="text",position = 1)
raulincadet/jelclassification documentation built on Dec. 22, 2021, 1 p.m.