Nothing
      #' @title Simulates the opinion expectation distribution
#' of a digital text document.
#' @description This function simulates the expectation distribution of the
#' observed opinion score (computed using the `opi_score` function).
#' The resulting tidy-format dataframe can be described as the
#' `expected sentiment document (ESD)` (Adepeju and Jimoh, 2021).
#' @param osd_data A list (dataframe). An \code{n} x \code{3}
#' OSD, in which \code{n} represents the length of the
#' text records that have been successfully classified as
#' expressing positive, negative or a neutral sentiment.
#' Column \code{1} of the OSD is the text record ID,
#' column \code{2} shows the sentiment classes (i.e. positive,
#' negative, or neutral), while column \code{3} contains two
#' variables: `present` and `absent` indicating records that
#' include and records that do not include any of the specified
#' theme keywords, respectively.
#' @param nsim (an integer) Number of replicas (ESD) to simulate.
#' Recommended values are: 99, 999, 9999, and so on. Since the run time
#' is proportional to the number of replicas, a moderate number of
#' simulation, such as 999, is recommended. Default: \code{99}.
#' @param metric (an integer) Specify the metric to utilize for the
#' calculation of the opinion score. Default: \code{1}. See
#' details in the documentation of \code{opi_score} function.
#' The input argument here must correspond to that of \code{opi_score}
#' function in order to compute a statistical significance value (p-value).
#' @param fun A user-defined function given that parameter
#' \code{metric} is set equal to \code{5}. See details in the
#' documentation of the \code{opi_score} function.
#' @param quiet (TRUE or FALSE) To suppress processing
#' messages. Default: \code{TRUE}.
#' @usage opi_sim(osd_data, nsim=99, metric = 1, fun = NULL, quiet=TRUE)
#' @examples
#'
#' #Prepare an osd data from the output
#' #of `opi_score` function.
#'
#' score <- opi_score(textdoc = policing_dtd,
#'                      metric = 1, fun = NULL)
#' #extract OSD
#' OSD <- score$OSD
#' #note that `OSD` is shorter in length
#' #than `policing_dtd`, meaning that some
#' #text records were not classified
#'
#' #Bind a fictitious indicator column
#' osd_data2 <- data.frame(cbind(OSD,
#'            keywords = sample(c("present","absent"), nrow(OSD),
#'            replace=TRUE, c(0.35, 0.65))))
#'
#' #generate expected distribution
#' exp_score <- opi_sim(osd_data2, nsim=99, metric = 1,
#'                                  fun = NULL, quiet=TRUE)
#' #preview the distribution
#' hist(exp_score)
#'
#' @details Employs non-parametric randomization testing approach in
#' order to generate the expectation distribution of the observed
#' opinion scores (see details in Adepeju and Jimoh 2021).
#' @return Returns a list of expected opinion scores with length equal
#' to the number of simulation (\code{nsim}) specified.
#' @references (1) Adepeju, M. and Jimoh, F. (2021). An Analytical
#' Framework for Measuring Inequality in the Public Opinions on
#' Policing – Assessing the impacts of COVID-19 Pandemic using
#' Twitter Data. https://doi.org/10.31235/osf.io/c32qh
#' @importFrom tidytext unnest_tokens
#' @importFrom tibble tibble
#' @importFrom magrittr %>%
#' @importFrom dplyr filter group_by mutate
#' ungroup distinct select summarise bind_rows rename
#'
#' @export
#'
opi_sim <- function(osd_data, nsim=99, metric = 1, fun = NULL, quiet=TRUE){
  #options(warn=-1)
  sentiment <- keywords <- nnrow <- pos_neg_count <-
    prob2 <- head <- ID <- n <- sentiment2 <-
    flush.console <- NULL
  #check if randomization is too small
  if(nsim < 99){
    stop("Number of simulation (nsim) is too small!!")
  }
  if(nsim > 9999){
    stop(paste("Consider specifying a smaller",
               "number of simulations (nsim)!!", sep=" "))
  }
  #check metric
  if(!metric %in% c(1:5)){
    stop(paste(" 'Metric' argument can only assume values from",
               "1, 2,..., 5", sep=" "))
  }
  #check if a user-defined function is inputted
  if(metric == 5 & is.null(fun)){
    stop("A function (equation) is required in the parameter 'fun'")
  }
  if(metric %in% c(1:4) & !is.null(fun)){
    print(paste("Warning: `fun` parameter will not be used!!",
                "Otherwise, set`metric = 5`", sep=" "))
  }
    nsim_exp_scores <- NULL
    for(m in seq_len(nsim)){ #m<-1
      #create backup of the all 'neutrals'
      #for simulation, neutral remains untouched
      #to be appended back later
      neutral_osd <- osd_data %>%
        filter(sentiment == 'neutral')
      #filter neutral
      #simul is based on permutation
      #of positv and negat sentimnt labels
      p1 <- osd_data %>%
        filter(sentiment!="neutral")%>%
        group_by(keywords)%>%
        mutate(nnrow=n())%>%
        mutate(prob1=nnrow/nrow(osd_data))%>% #prob of absent
        ungroup()%>%
        group_by(keywords, sentiment) %>%
        mutate(pos_neg_count=n())%>%
        mutate(prob2=pos_neg_count/nnrow)##%>%
      len_excl_neutral <- nrow(p1)
      ab_class_sent <- p1[which(p1$keywords == "absent"),2]
      length_present_group <- length(which(p1$keywords == "present"))
      #now collate the unique probabilities of 'absent' class
      p1_prob <- p1 %>%
        filter(keywords == "absent")%>%
        distinct(sentiment, .keep_all = TRUE)%>%
        select(sentiment, prob2)
      if(nsim == 1){
        #set.seed(len_excl_neutral)
        new_ex_class_sent <- sample(p1_prob$sentiment,
          length_present_group, replace=TRUE, prob = p1_prob$prob2)
      }
      if(nsim > 1){
      #generate samples of present class using the prob of absent class
        if(m == 1){
          #set.seed(len_excl_neutral)
          new_ex_class_sent <- sample(p1_prob$sentiment,
            length_present_group, replace=TRUE, prob = p1_prob$prob2)
        }
        if(m > 1){
          #set.seed(nrow(data))
          new_ex_class_sent <- sample(p1_prob$sentiment,
            length_present_group, replace=TRUE, prob = p1_prob$prob2)
        }
      }
      new_ex_class_sent
      new_sentiment_list <- c(new_ex_class_sent,
                              as.vector(unlist(ab_class_sent)))
      #p1[which(p1$keywords == "present"), 3] <- new_ex_class_sent
      final_p1 <- data.frame(cbind(p1, sentiment2=new_sentiment_list))
      head(final_p1)
      #expected
      final_p1_ESD <- final_p1 %>%
        select(ID, sentiment, keywords, sentiment2)%>%
        mutate(sentiment = sentiment2)%>%
        select(-c(sentiment2))
      #now, prepare compute different opinion scores
      #append neutral list
      final_p1_ESD <- rbind(final_p1_ESD, neutral_osd)
      afinn_ESD <- final_p1_ESD %>%
        group_by(sentiment)%>%
        #count the proportion of
        summarise(n=n())
      #to ensure that each value exist
      sent_gr <- data.frame(sentiment=c("negative", "positive", "neutral"),
                            n=0)
      wh <- sent_gr$sentiment %in% afinn_ESD$sentiment
      #
      afinn_ESD <- afinn_ESD %>%
        bind_rows(sent_gr[which(wh==FALSE),])
      #calculate opinion score
      if (metric %in% c(1:4)){
        if(metric == 1){
          total_n <- sum(afinn_ESD$n)
          afinn_ESD <- afinn_ESD %>%
            rename(No_of_text_records=n)
          P <- afinn_ESD[which(afinn_ESD$sentiment == "positive"),2]
          N <- afinn_ESD[which(afinn_ESD$sentiment == "negative"),2]
          PD <- round(((P - N)/(P + N))*100,digits = 2)
          }
        if(metric == 2){
          P <- afinn_ESD[which(afinn_ESD$sentiment == "positive"),2]
          N <- afinn_ESD[which(afinn_ESD$sentiment == "negative"),2]
          O <- afinn_ESD[which(afinn_ESD$sentiment == "neutral"),2]
          PD <- round((abs(P - N) / (P + N + O))*100,digits = 2)
        }
        if(metric == 3){
          P <- afinn_ESD[which(afinn_ESD$sentiment == "positive"),2]
          N <- afinn_ESD[which(afinn_ESD$sentiment == "negative"),2]
          O <- afinn_ESD[which(afinn_ESD$sentiment == "neutral"),2]
          PD <- round((P / (P + N + O))*100, digits = 2)
        }
        if(metric == 4){
          P <- afinn_ESD[which(afinn_ESD$sentiment == "positive"),2]
          N <- afinn_ESD[which(afinn_ESD$sentiment == "negative"),2]
          O <- afinn_ESD[which(afinn_ESD$sentiment == "neutral"),2]
          PD <- round((N / (P + N + O))*100, digits = 2)
        }
      }
      if(metric == 5){
        P <- afinn_ESD[which(afinn_ESD$sentiment == "positive"),2]
        N <- afinn_ESD[which(afinn_ESD$sentiment == "negative"),2]
        O <- afinn_ESD[which(afinn_ESD$sentiment == "neutral"),2]
        PD <- as.numeric(fun(P, N, O))
    }
      nsim_exp_scores <- c(nsim_exp_scores,
                           as.numeric(as.character(PD)))
      if(quiet == FALSE){
        flush.console()
        print(paste("No. of simulations completed:", m, sep=" "))
      }
      if(quiet == TRUE){
        #do nothing
      }
    }
    return(nsim_exp_scores)
  }
#}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.