R/gReek.R

Defines functions greek_transformation greek_stop_words

Documented in greek_stop_words greek_transformation

greek_transformation <- function(x){
  if((dim(as.data.frame(x))!=c(1,1))[1]==TRUE){
    if(validUTF8(as.character(x[[1]][1]))!=TRUE){
      x<-iconv(x, from ="ISO-8859-7" , to = "UTF8")
    clean1<-c()
    clean2<-c()
    clean3<-c()
    clean4<-c()
    clean5<-c()
    clean6<-c()
    clean7<-c()
    clean8<-c()
    clean9<-c()
    clean1<-gsub("ά","α",unlist(x))
    clean2<-gsub("έ","ε",clean1)
    clean3<-gsub("ή","η",clean2)
    clean4<-gsub("ί","ι",clean3)
    clean5<-gsub("ό","ο",clean4)
    clean6<-gsub("ύ","υ",clean5)
    clean7<-gsub("ώ","ω",clean6)
    clean8<-gsub("ϋ","υ",clean7)
    clean9<-gsub("ϊ","ι",clean8)
    count<- 0 
    clean_tokens<-x
    for(f in 1:length(clean_tokens)){
      for(g in 1:length(clean_tokens[[f]])){
        count<-count+1
        clean_tokens[[f]][g]<-clean9[count]
      }
    }
      }else{
      clean1<-c()
      clean2<-c()
      clean3<-c()
      clean4<-c()
      clean5<-c()
      clean6<-c()
      clean7<-c()
      clean8<-c()
      clean9<-c()
      clean1<-gsub("ά","α",unlist(x))
      clean2<-gsub("έ","ε",clean1)
      clean3<-gsub("ή","η",clean2)
      clean4<-gsub("ί","ι",clean3)
      clean5<-gsub("ό","ο",clean4)
      clean6<-gsub("ύ","υ",clean5)
      clean7<-gsub("ώ","ω",clean6)
      clean8<-gsub("ϋ","υ",clean7)
      clean9<-gsub("ϊ","ι",clean8)
      count<- 0 
      clean_tokens<-x
      for(f in 1:length(clean_tokens)){
        for(g in 1:length(clean_tokens[[f]])){
          count<-count+1
          clean_tokens[[f]][g]<-clean9[count]
        }
      }
    }
  }else{
    clean1<-c()
    clean2<-c()
    clean3<-c()
    clean4<-c()
    clean5<-c()
    clean6<-c()
    clean7<-c()
    clean8<-c()
    clean9<-c()
    clean1<-gsub("ά","α",unlist(x))
    clean2<-gsub("έ","ε",clean1)
    clean3<-gsub("ή","η",clean2)
    clean4<-gsub("ί","ι",clean3)
    clean5<-gsub("ό","ο",clean4)
    clean6<-gsub("ύ","υ",clean5)
    clean7<-gsub("ώ","ω",clean6)
    clean8<-gsub("ϋ","υ",clean7)
    clean9<-gsub("ϊ","ι",clean8)
    
  }
  return(clean9)
}

greek_stop_words<- function(){
  stop_words<-c("εκει","https","κάποια","εσυ","αυτος","αυτη","εμεις","πάνω","κάτω","t.co","u","0001f92a","εχεις","αλλα","άλλα","τι","κατά","γιατι","γιατί","αλλά","ως","μέσα","ειχε","όπως","όλο","ο","α","β","γ","δ","ε","ζ","η","θ","ι","κ","λ","μ","ν","ξ","ο","π","ρ","σ","τ","υ","φ","χ","ψ","ω","a","b","c","d","e","f","να","ναι","μας","τετοιες","ήταν","ηταν","αυτο","ας","εγω","εχει","ή","η","εκεί","και","λίγο","λιγο","πάλι","μονο","απ","μόνο","αυτά","αυτή","αυτα","αυτη","εγώ","ούτε","υπάρχει","-","κάνει","στους","κάθε","πρέπει","τώρα","λέει","όχι","ήταν","amp","δύο","σαν","το","να","για","του","είναι","ειναι","στις","έχω","μετά","μη","κάτι","είσαι","πολύ","σήμερα","καλημέρα","όλα","ολα","όλοι","ολοι","όλες","ολες","πολυ","πολλή","πολλά","πολλη","πολλα","την","με","του","της","τα","που" , "δεν", "στο","είναι", "θα", "τον","σε","από","απο", "μου","στην","οι", "τους","μας","τη", "των", "στη","στα","τις", "ότι","οτι", "σου","στον","αλλά","μια", "τι","αν","σας","έχει","ένα","αυτό","δε","όταν","κι", "γιατί", "πως","πιο", "μην", "έχουν", "ρε","μόνο")
 return(stop_words)  
}
NKryst/gReek documentation built on Dec. 27, 2019, 12:12 a.m.