# library(stringr)
my_colours <-
    rep(c("red", "blue", "green", "purple", "orange", "gray"), 10)

INFO_COLUMNS <- params$INFO_COLUMNS
DATE_BASED_CORPUS <- params$DATE_BASED_CORPUS
GROUPING_VARIABLE <- params$GROUPING_VARIABLE
knitr::opts_chunk$set(echo = TRUE)
utvalg <- params$dataset
terms_highlight <- params$terms_highlight

terms_highlight[is.na(terms_highlight)] <- ""
terms_highlight[length(terms_highlight) == 0] <- ""
case_sensitive <- params$case_sensitive
if (!identical(terms_highlight, "")) {
  for (i in seq_along(terms_highlight)) {
    utvalg$Text <-
      stringr::str_replace_all(
        utvalg$Text,
        stringr::regex(
          paste0("(", terms_highlight[i], ")"),
          ignore_case = !case_sensitive
        ),
        sprintf('<span style="color:%s">\\1</span>',
                my_colours[i])
      )
  }
}

utvalg$Text <- stringr::str_replace_all(utvalg$Text, "\n", "<br>")

if ("Title" %in% INFO_COLUMNS) {
    utvalg$Text <- paste0(paste0("<b>",
                                 "Title: ",
                                 utvalg$Title,
                                 "</b>"),
                            "<br><br>",
                            utvalg$Text)
}
hit_count_text <- NULL

if (!identical(terms_highlight, "")) {
  for (i in seq_along(terms_highlight)) {
    hit_count_text <- paste(
      sep = "<br>",
      hit_count_text,
      sprintf(
        "'%s' is found _corporaexplorer_%s times in the document
                                    (%s search)",
        sprintf(
          '<span style="color:%s">%s</span>',
          my_colours[i],
          terms_highlight[i]
        ),
        stringr::str_count(utvalg$Text,
                               stringr::regex(terms_highlight[i],
                                              ignore_case = !case_sensitive)),
        if (case_sensitive == TRUE)
          "case sensitive"
        else
          "case insensitive"
      )
    )
  }
  hit_count_text <- paste0("<br>", hit_count_text)#, "<hr>")
  # Removing spaceholder, fixing singular/plural TODO still dirty hack
  hit_count_text <- stringr::str_replace_all(hit_count_text, "is found _corporaexplorer_1 times",
                                             "is found 1 time")
  hit_count_text <- stringr::str_replace_all(hit_count_text, "is found _corporaexplorer_(\\d+) times",
                                             "is found \\1 times")
}
doc_info_text <- ""


  if (DATE_BASED_CORPUS == TRUE) {
  doc_info_text <-paste(sep = "<br>",
                       paste0(doc_info_text,
                           tags$b("Date: "),
                           format(utvalg$Date,
                                  "%A %d %B %Y")
                       ))
  }

if ("Title" %in% INFO_COLUMNS) {
    doc_info_text <- paste(sep = "<br>",
                           doc_info_text,
                           paste0(tags$b("Title: "),
                                  utvalg$Title))
}

if ("URL" %in% INFO_COLUMNS) {
    doc_info_text <- paste(
        sep = "<br>",
        doc_info_text,
        paste0(
            tags$b("URL: "),
            "<a href='",
            utvalg$URL,
            "'target='_blank'>",
            utvalg$URL,
            "</a>"
        )
    )
}

other_columns <-
    INFO_COLUMNS[!INFO_COLUMNS %in% c("Date", "Title", "URL")]

for (i in seq_along(other_columns)) {
    doc_info_text <- paste(doc_info_text,
                           sep = "<br>",
                           paste0(
                               tags$b(other_columns[i]),
                               tags$b(": "),
                               utvalg[[other_columns[i]]]
                           ))
}

doc_info_text <- paste(doc_info_text,
                       sep = "<br>",
                       paste0(
                           tags$b("Word count: "),
                           stringi::stri_count_words(utvalg$Text)
                       ))

doc_info_text <- stringr::str_replace(doc_info_text, "^<br>", "")

info_tab <-     paste0("\n## Info\n\n", doc_info_text, hit_count_text)
if (DATE_BASED_CORPUS == TRUE) {
  utvalg_title <- paste0(
      "# (",
      seq_len(nrow(utvalg)),
      ") ",
      format(utvalg$Date, "%d %B %Y"),
      "--",
      utvalg$Title,
      ' {.tabset .tabset-fade}'
  )
} else if (DATE_BASED_CORPUS == FALSE) {
  utvalg_title <- paste0(
      "# (",
      seq_len(nrow(utvalg)),
      ") "
  )
  if (!is.na(GROUPING_VARIABLE)) {
    utvalg_title <- paste0(
      utvalg_title,
      utvalg[[GROUPING_VARIABLE]],
      "--"
    )
  }
  utvalg_title <- paste0(utvalg_title,
                         utvalg$Seq,
                         ' {.tabset .tabset-fade}')
}
utvalg_tekst <- paste(utvalg_title,
paste0("\n\n## Text\n",
                      utvalg$Text),
                      #"\n___\n",
                      info_tab,
                      "\n___\n",
                      sep = "\n"
)
rmarkdown::html_notebook_output_html(utvalg_tekst)


kgjerde/corporaexplorer documentation built on July 3, 2023, 7:02 p.m.