R/remove.emptyTweets.R

#' @title Remove empty entries
#'
#' @description A function to remove empty entries - often, aggressive cleaning will make some empty entries (e.g. tweets which only consist of a URL). Depending on your downstream analyses, you may want to remove these (e.g. for training a word embeddings model, empty entries are useless)

#' @param text.clean character vector
#' @return vector text.clean with empty entries removed
#' @export

remove.empty = function(text.clean){

  if(class(text.clean) != "character"){
    stop('class is not \'character\'')}

  index.remove = which(sapply(text.clean, function(x){
    base::nchar(x) == 0}))
  print(paste0('this many tweets are now empty: ', length(index.remove)))

  if (length(index.remove) > 0){
    text.clean = text.clean[-index.remove,] # remove all of the empty tweets
  }

  return(text.clean)
}
bvidgen/tc documentation built on May 9, 2019, 2:21 a.m.