R/rt_fund.R
In rtransparency: Identifies Indicators of Transparency

Documented in rt_fund

# TODO: First find the paragraphs that contain intersting words and then
#    apply the obliteration and rest of functions and test if this saves
#    time!



#' Identify mentions of support
#'
#' Identifies mentions of "This work was funded by ..." and of "This work was
#'     completed using funds from ..."
#'
#' @param article A List with paragraphs of interest.
#' @return The index of the paragraph of interest.
#' @noRd
get_support_1 <- function(article) {

  # synonyms <- .create_synonyms()
  # words <- c("This", "research", "is_have", "funded", "by")
  #
  # this_research <-
  #   synonyms %>%
  #   magrittr::extract(words[1:2]) %>%
  #   lapply(.bound) %>%
  #   lapply(.encase) %>%
  #   paste(collapse = " ")
  #
  # synonyms %>%
  #   magrittr::extract(words[3:5]) %>%
  #   lapply(.bound) %>%
  #   lapply(.encase) %>%
  #   # lapply(.max_words) %>%
  #   paste(collapse = synonyms$txt) %>%
  #   paste(this_research, .) %>%
  #   grep(article, perl = TRUE)


  synonyms <- .create_synonyms()
  words <- c("This_singular", "research_singular", "is_singular", "funded_funding", "by")

  this_research <-
    synonyms %>%
    magrittr::extract(words[1:2]) %>%
    lapply(.bound) %>%
    lapply(.encase) %>%
    # lapply(.max_words) %>%
    paste(collapse = ".{0,15}")

  was_funded_by <-
    synonyms %>%
    magrittr::extract(words[3:5]) %>%
    lapply(.bound) %>%
    lapply(.encase) %>%
    # lapply(.max_words) %>%
    paste(collapse = synonyms$txt)

  singular <-
    c(this_research, was_funded_by) %>%
    paste(collapse = synonyms$txt) %>%
    grep(article, perl = TRUE)

  if (!length(singular)) {

    words <- c("These", "researches", "are", "funded_funding", "by")

    synonyms %>%
      magrittr::extract(words) %>%
      lapply(.bound, location = "end") %>%
      lapply(.encase) %>%
      # lapply(.max_words) %>%
      paste(collapse = synonyms$txt) %>%
      grep(article, perl = TRUE)

  } else {

    return(singular)

  }
}


#' Identify mentions of support
#'
#' Returns the index with the elements of interest.
#'
#' @param article A List with paragraphs of interest.
#' @return The index of the paragraph of interest.
#' @noRd
get_support_2 <- function(article) {

  synonyms <- .create_synonyms()
  words <- c("funded_funding", "this_singular", "research_singular")

  singular <-
    synonyms %>%
    magrittr::extract(words) %>%
    lapply(.bound, location = "end") %>%
    lapply(.encase) %>%
    # lapply(.max_words) %>%
    paste(collapse = synonyms$txt) %>%
    grep(article, perl = TRUE)

  if (!length(singular)) {

    words <- c("funded", "these", "researches")

    synonyms %>%
      magrittr::extract(words) %>%
      lapply(.bound, location = "end") %>%
      lapply(.encase) %>%
      # lapply(.max_words) %>%
      paste(collapse = synonyms$txt) %>%
      grep(article, perl = TRUE)

  } else {

    return(singular)

  }
}


#' Identify mentions of support
#'
#' Returns the index with the elements of interest. More generic than _1.
#'
#' @param article A List with paragraphs of interest.
#' @return The index of the paragraph of interest.
#' @noRd
get_support_3 <- function(article) {

  synonyms <- .create_synonyms()
  words <- c("research", "is_have", "funded", "by")

  research_is <-
    synonyms %>%
    magrittr::extract(words[1:2]) %>%
    lapply(.bound, location = "end") %>%
    lapply(.encase) %>%
    paste(collapse = " ")

  synonyms %>%
    magrittr::extract(words[3:4]) %>%
    lapply(.bound, location = "end") %>%
    lapply(.encase) %>%
    # lapply(.max_words) %>%
    paste(collapse = synonyms$txt) %>%
    paste(research_is, ., sep = synonyms$txt) %>%
    grep(article, perl = TRUE)
}


#' Identify mentions of support
#'
#' Returns the index with the elements of interest. More generic than _1.
#'
#' @param article A List with paragraphs of interest.
#' @return The index of the paragraph of interest.
#' @noRd
get_support_4 <- function(article) {

  # TODO: I have now made this function much more general than before, by
  #    removing the requirement for "is" from the start. It seems that this now
  #    became more sensitive with no loss in specificity. If it remains so in
  #    further testing, remove all previews functions, which are basically more
  #    specific versions of this!

  synonyms <- .create_synonyms()
  words <- c("funded", "by", "award")

  synonyms %>%
    magrittr::extract(words) %>%
    lapply(.bound) %>%
    lapply(.encase) %>%
    # lapply(.max_words) %>%
    paste(collapse = synonyms$txt) %>%
    grep(article, perl = TRUE)
}


get_support_5 <- function(article) {

  # TODO: This is trying to capture some phrases missed by get_support_4 b/c of
  #    the requirement for is, e.g. "Project supported by X". I am introducing
  #    a 3 word limit at the start to make it stricter - upon tests, there were
  #    very very few mistakes (1/200 FP) without introducing this restriction.

  synonyms <- .create_synonyms()
  words <- c("funded", "by", "foundation")

  funded_by_award <-
    synonyms %>%
    magrittr::extract(words) %>%
    lapply(.bound) %>%
    lapply(.encase) %>%
    paste(collapse = synonyms$txt)

  start_of_sentence <- "(^\\s*|(:|\\.)\\s*|[A-Z][a-zA-Z]+\\s*)"
  .max_words(start_of_sentence, n_max = 4, space_first = FALSE) %>%
    paste0(funded_by_award) %>%
    grep(article, perl = TRUE)
}


#' Identify mentions of support
#'
#' Return the index of support statements such as: We gratefully acknowledge
#'     support from the UK Engineering and Physical Sciences Research Council.
#'
#' @param article A List with paragraphs of interest.
#' @return The index of the paragraph of interest.
#' @noRd
get_support_6 <- function(article) {

  synonyms <- .create_synonyms()
  words_1 <- c("acknowledge", "support_only", "foundation_award")
  words_2 <- c("support_only", "foundation_award", "acknowledged")

  .max_words(c("acknowledge", "support_only"), 2)

  acknowledge <-
    synonyms %>%
    magrittr::extract(words_1[1]) %>%
    lapply(.bound) %>%
    lapply(.encase) %>%
    lapply(.max_words)

  support_foundation <-
    synonyms %>%
    magrittr::extract(words_1[2:3]) %>%
    lapply(.bound) %>%
    lapply(.encase) %>%
    paste(collapse = synonyms$txt)

  a <-
    c(acknowledge, support_foundation) %>%
    paste(collapse = " ") %>%
    grep(article, perl = TRUE)

  b <-
    synonyms %>%
    magrittr::extract(words_2) %>%
    lapply(.bound) %>%
    lapply(.encase) %>%
    # lapply(.max_words) %>%
    paste(collapse = synonyms$txt) %>%
    grep(article, perl = TRUE)

  return(unique(c(a, b)))
}


#' Identify mentions of support
#'
#' Return the index of support statements such as: Support was provided by the
#'     U.S. Department of Agriculture (USDA) Forest Service.
#'
#' @param article A List with paragraphs of interest.
#' @return The index of the paragraph of interest.
#' @noRd
get_support_7 <- function(article) {

  synonyms <- .create_synonyms()

  a <- "Support"
  b <- .encase(synonyms$received)
  d <- .encase(synonyms$by)
  grep(paste(a, b, d, sep = synonyms$txt), article, perl = TRUE)

}


#' Identify mentions of support
#'
#' Return the index of support statements such as: We are grateful to the
#'     National Institute of Mental Health (MH091070: PI’s Dante Cicchetti and
#'     Sheree L. Toth) for support of this work.
#'
#' @param article A List with paragraphs of interest.
#' @return The index of the paragraph of interest.
#' @noRd
get_support_8 <- function(article) {

  # TODO: This function was onboarded to capture a tiny proportion of
  #    statements. Consider (1) activating this function only if no hit by the
  #    more prevalent functions, (2) placing it in Acknowledgements (all missed
  #    article that prompted this are in the acknowledgements) and (3) to
  #    change this function into processing sentence by sentence to cover for
  #    all permutations of these terms.

  synonyms <- .create_synonyms()
  words <- c("foundation", "provide", "funding_financial", "for_of", "research")
  max_words <- .max_words(" ", n_max = 3, space_first = FALSE)

  # foundation <-
  #   synonyms %>%
  #   magrittr::extract(words[1]) %>%
  #   lapply(.bound) %>%
  #   lapply(.encase)
  #
  # funding_for_research <-
  #   synonyms %>%
  #   magrittr::extract(words[2:4]) %>%
  #   lapply(.bound) %>%
  #   lapply(.encase) %>%
  #   paste(collapse = max_words)
#
#   for_research <-
#     synonyms %>%
#     magrittr::extract(words[4:5]) %>%
#     lapply(.bound) %>%
#     lapply(.encase) %>%
#     paste(collapse = max_words)

  # c(foundation, funding_for_research) %>%
  #   paste(collapse = synonyms$txt) %>%
  #   grep(article, perl = TRUE)

  synonyms %>%
    magrittr::extract(words) %>%
    lapply(.bound) %>%
    lapply(.encase) %>%
    paste(collapse = synonyms$txt) %>%
    grep(article, perl = TRUE)

}


#' Identify mentions of support
#'
#' Return the index of support statements such as: The US Environmental
#'     Protection Agency’s (USEPA) Office of Research and Development funded and
#'     managed the research described in the present study.
#'
#' @param article A List with paragraphs of interest.
#' @return The index of the paragraph of interest.
#' @noRd
get_support_9 <- function(article) {

  # Only done for words other than "supported" b/c this is very generic

  synonyms <- .create_synonyms()
  words <- c("foundation", "funded", "research")

  foundation <-
    synonyms %>%
    magrittr::extract(words[1]) %>%
    lapply(.bound) %>%
    lapply(.encase)

  funding <-
    synonyms %>%
    magrittr::extract(words[2]) %>%
    lapply(grep, pattern = "upport", value = TRUE, invert = TRUE) %>%
    lapply(.bound) %>%
    lapply(.encase)

  research <-
    synonyms %>%
    magrittr::extract(words[3]) %>%
    lapply(.bound) %>%
    lapply(.encase) %>%
    paste0(".{0,20}\\.")  # only match to words at the end of the sentence.

  c(foundation, funding, research) %>%
    paste(collapse = synonyms$txt) %>%
    grep(article, perl = TRUE)

}


#' Identify mentions of support
#'
#' Return the index of support statements such as: We are grateful to the
#'     National Institute of Mental Health (MH091070: PI’s Dante Cicchetti and
#'     Sheree L. Toth) for support of this work.
#'
#' @param article A List with paragraphs of interest.
#' @return The index of the paragraph of interest.
#' @noRd
get_support_10 <- function(article) {

  # TODO: This function was onboarded to capture a tiny proportion of
  #    statements. Consider (1) activating this function only if no hit by the
  #    more prevalent functions, (2) placing it in Acknowledgements (all missed
  #    article that prompted this are in the acknowledgements) and (3) to
  #    change this function into processing sentence by sentence to cover for
  #    all permutations of these terms.

  synonyms <- .create_synonyms()
  words <- c("thank", "foundation", "funding_financial", "research")

  synonyms %>%
    magrittr::extract(words) %>%
    lapply(.bound) %>%
    lapply(.encase) %>%
    paste(collapse = synonyms$txt) %>%
    grep(article, perl = TRUE)

}


#' Identify mentions of developed
#'
#' Return the index of statements such as: This publication was developed
#'     under Assistance Agreement No. 83563701-0 awarded by the U.S.
#'     Environmental Protection Agency to the University of Michigan.
#'
#' @param article A List with paragraphs of interest.
#' @return The index of the paragraph of interest.
#' @noRd
get_developed_1 <- function(article) {

  # TODO: This function was onboarded to capture a tiny proportion of
  #    statements. Consider (1) activating this function only if no hit by the
  #    more prevalent functions, (2) placing it in Acknowledgements (all missed
  #    article that prompted this are in the acknowledgements) and (3) to
  #    change this function into processing sentence by sentence to cover for
  #    all permutations of these terms.

  synonyms <- .create_synonyms()
  words <- c("This", "research", "is", "developed", "by", "foundation")

  this_research_is <-
    synonyms %>%
    magrittr::extract(words[1:3]) %>%
    lapply(.bound) %>%
    lapply(.encase) %>%
    # lapply(.max_words) %>%
    paste(collapse = synonyms$txt)

  developed <- words[4]

  by_foundation <-
    synonyms %>%
    magrittr::extract(words[5:6]) %>%
    lapply(.bound) %>%
    lapply(.encase) %>%
    # lapply(.max_words) %>%
    paste(collapse = synonyms$txt)

  c(this_research_is, developed, by_foundation) %>%
    paste(collapse = synonyms$txt) %>%
    grep(article, perl = TRUE)

}

#' Identify mentions of received
#'
#' Returns the index with the elements of interest. More generic than _1.
#'
#' @param article A List with paragraphs of interest.
#' @return The index of the paragraph of interest.
#' @noRd
get_received_1 <- function(article) {

  synonyms <- .create_synonyms()
  words <- c("received", "funds_award_financial", "by")

  synonyms %>%
    magrittr::extract(words) %>%
    lapply(.bound) %>%
    lapply(.encase) %>%
    paste(collapse = " ") %>%
    grep(article, perl = TRUE)
}


#' Identify mentions of received
#'
#' Returns the index with the elements of interest. More generic than _1.
#'
#' @param article A List with paragraphs of interest.
#' @return The index of the paragraph of interest.
#' @noRd
get_received_2 <- function(article) {

  synonyms <- .create_synonyms()
  words <- c("received", "funding_financial", "by", "foundation_award")

  synonyms %>%
    magrittr::extract(words) %>%
    lapply(.bound) %>%
    lapply(.encase) %>%
    paste(collapse = synonyms$txt) %>%
    grep(article, perl = TRUE)
}


#' Identify mentions of recepient
#'
#' Returns the index of mentions such as: "Recipient of National Institutes of
#'     Health Grants AG044552, AI121621, and DK112365.
#'
#' @param article A List with paragraphs of interest.
#' @return The index of the paragraph of interest.
#' @noRd
get_recipient_1 <- function(article) {

  synonyms <- .create_synonyms()
  words <- c("recipient", "award")

  synonyms %>%
    magrittr::extract(words) %>%
    lapply(.bound) %>%
    lapply(.encase) %>%
    paste(collapse = synonyms$txt) %>%
    grep(article, perl = TRUE)
}


#' Identify mentions of "the authors ... financial support
#'
#' Returns the index with the elements of interest. More generic than _1.
#'
#' @param article A List with paragraphs of interest.
#' @return The index of the paragraph of interest.
#' @noRd
get_authors_1 <- function(article) {

  synonyms <- .create_synonyms()
  words <- c("This", "author", "funds_award_financial")

  synonyms %>%
    magrittr::extract(words) %>%
    lapply(.bound) %>%
    lapply(.encase) %>%
    paste(collapse = synonyms$txt) %>%
    grep(article, perl = TRUE)


  # "[Ss]upport",
  # "[Ff]inancial assistance",
  # "\\b[Aa]id\\b",

  # The authors thank X for funding
  # The authors received no/did not receive financial support
  # The authors acknowledge the/disclosed receipt of support\\b/financial support of
  # The authors are supported by

  # The authors received no funds
  # The authors have no support or funding to report
}


#' Identify mentions of "the authors have no funding ..."
#'
#' Returns the index with the elements of interest. More generic than _1.
#'
#' @param article A List with paragraphs of interest.
#' @return The index of the paragraph of interest.
#' @noRd
get_authors_2 <- function(article) {

  synonyms <- .create_synonyms()
  words <- c("This", "author", "have", "no", "funding_financial_award")

  synonyms %>%
    magrittr::extract(words) %>%
    lapply(.bound) %>%
    lapply(.encase) %>%
    paste(collapse = synonyms$txt) %>%
    grep(article, perl = TRUE)
}


#' Identify mentions of "thank ... financial support
#'
#' Returns the index with the elements of interest. More generic than _1.
#'
#' @param article A List with paragraphs of interest.
#' @return The index of the paragraph of interest.
#' @noRd
get_thank_1 <- function(article) {

  synonyms <- .create_synonyms()

  synonyms$financial <- c(synonyms$financial, "for supporting")
  words <- c("We", "thank", "financial")

  synonyms %>%
    magrittr::extract(words) %>%
    lapply(.bound) %>%
    lapply(.encase) %>%
    paste(collapse = synonyms$txt) %>%
    grep(article, perl = TRUE)
}


#' Identify mentions of thank you statements
#'
#' Returns the index with the elements related to "The authors acknowledge
#'     Worldwide Cancer Research (AICR) 15-1002; Blueprint 282510; ..."
#'
#' @param article A List with paragraphs of interest.
#' @return The index of the paragraph of interest.
#' @noRd
get_thank_2 <- function(article) {

  synonyms <- .create_synonyms()
  words <- c("thank")
  txt <- "[a-zA-Z0-9\\s,()/:;-]*"

  thank <-
    synonyms %>%
    magrittr::extract(words) %>%
    lapply(.bound) %>%
    lapply(.encase) %>%
    paste(collapse = synonyms$txt)

  thank %>%
    paste("[0-9]{5}", sep = txt) %>%
    grep(article, perl = TRUE)
}


#' Identify mentions of "funding for this study was..."
#'
#' Returns the index with the elements of interest. More generic than _1.
#'
#' @param article A List with paragraphs of interest.
#' @return The index of the paragraph of interest.
#' @noRd
get_fund_1 <- function(article) {

  synonyms <- .create_synonyms()
  words <- c("funding_financial_award", "for", "research", "received")

  funding_for <-
    synonyms %>%
    magrittr::extract(words[1:2]) %>%
    lapply(.bound) %>%
    lapply(.encase) %>%
    paste(collapse = " ")

  synonyms %>%
    magrittr::extract(words[3:4]) %>%
    lapply(.bound) %>%
    lapply(.encase) %>%
    paste(collapse = synonyms$txt) %>%
    paste(funding_for, ., sep = synonyms$txt) %>%
    grep(article, perl = TRUE)
}


#' Identify mentions of Funding titles
#'
#' Returns the index with the elements of interest. More generic than _1.
#'
#' @param article A List with paragraphs of interest.
#' @return The index of the paragraph of interest.
#' @noRd
get_fund_2 <- function(article) {

  synonyms <- .create_synonyms()
  words <- c("funding_title")

  a <-
    synonyms %>%
    magrittr::extract(words) %>%
    lapply(.title) %>%
    lapply(.encase) %>%
    paste() %>%
    grep(article, perl = TRUE)

  if (length(a) > 0) {

    if (nchar(article[a + 1]) == 0) {
      return(c(a, a + 2))
    } else {
      return(c(a, a + 1))
    }

  } else {

    synonyms %>%
      magrittr::extract(words) %>%
      lapply(.title, within_text = TRUE) %>%
      lapply(.encase) %>%
      paste() %>%
      grep(article, perl = TRUE)

  }
}


#' Identify mentions of Funding titles
#'
#' Returns the index with the elements of interest. More generic than _1.
#'
#' @param article A List with paragraphs of interest.
#' @return The index of the paragraph of interest.
#' @noRd
get_fund_3 <- function(article) {

  synonyms <- .create_synonyms()
  words <- c("any_title")

  synonyms %>%
    magrittr::extract(words) %>%
    lapply(.encase) %>%
    paste("[A-Z]") %>%
    grep(article, perl = TRUE)

}


#' Identify mentions of funds in acknowledgements
#'
#' Returns the index with the elements of interest. More generic than _1.
#'
#' @param article A List with paragraphs of interest.
#' @return The index of the paragraph of interest.
#' @noRd
get_fund_acknow <- function(article) {

  synonyms <- .create_synonyms()
  words <- c("funded", "funds_award_financial")

  funded_synonyms <-
    synonyms %>%
    magrittr::extract(words) %>%
    lapply(.bound) %>%
    unlist()

  c(funded_synonyms, "NIH (|\\()(R|P)[0-9]{2}", "awarded by") %>%
    .encase %>%
    grep(article, perl = TRUE, ignore.case = TRUE)

}


#' Identify mentions of funds in acknowledgements
#'
#' Returns the index with the elements of interest. More generic than _1.
#'
#' @param article A List with paragraphs of interest.
#' @return The index of the paragraph of interest.
#' @noRd
get_fund_acknow_new <- function(article) {

  # Detect funding acknowledged in prose. Requires explicit funding language: a
  # funding verb directed at a funder ("supported/funded/financed by ..."), an
  # institutional "support/funding of/from the ...", a grant or award
  # identifier, or a named award. A bare mention of an institution, the word
  # "support", or "acknowledge" is not enough, because those also appear in
  # competing-interest statements, generic thanks and author affiliations.
  patterns <- c(
    "\\b(fund|support|financ|sponsor)(ed|ing)?\\b(\\s+\\w+){0,3}\\s+(by|through)\\b",
    "\\b(support|funding|grant|grants|funds)\\s+(of|from)\\s+the\\b",
    "\\bgrant(s|ed)?\\b(\\s+\\w+){0,3}\\s+(no\\.?|number|#|from|award|agreement|id|ref)\\b",
    "\\b(fellowship|scholarship|studentship|bursary|stipend|endowment)\\b"
  )

  grep(.encase(patterns), article, perl = TRUE, ignore.case = TRUE)
}


# get_fund_acknow_new <- function(article) {
#
#   synonyms <- .create_synonyms()
#   words <- c("acknowledge")
#
#   a <- synonyms %>%
#     magrittr::extract(words) %>%
#     lapply(.bound) %>%
#     lapply(.encase)
#
#   grep(paste0(a, synonyms$txt, "[0-9]{3,10}"), article, perl = TRUE)
#
# }


#' Identify mentions of "Supported by ..."
#'
#' Returns the index with the elements of interest. More generic than _1.
#'
#' @param article A List with paragraphs of interest.
#' @return The index of the paragraph of interest.
#' @noRd
get_supported_1 <- function(article) {

  synonyms <- .create_synonyms()
  words <- c("Supported", "by")

  synonyms %>%
    magrittr::extract(words) %>%
    lapply(.bound) %>%
    lapply(.encase) %>%
    paste(collapse = " ") %>%
    paste("[a-zA-Z]+") %>%
    grep(article, perl = TRUE)

}

#' Identify mentions of Financial support titles
#'
#' Returns the index with the elements of interest. More generic than _1.
#'
#' @param article A List with paragraphs of interest.
#' @return The index of the paragraph of interest.
#' @noRd
get_financial_1 <- function(article) {

  synonyms <- .create_synonyms()
  words <- c("financial_title")

  a <-
    synonyms %>%
    magrittr::extract(words) %>%
    lapply(.title) %>%
    lapply(.encase) %>%
    # lapply(.max_words) %>%
    paste() %>%
    grep(article, perl = TRUE)


  if (length(a) > 0) {

    if (nchar(article[a + 1]) == 0) {
      return(c(a, a + 2))
    } else {
      return(c(a, a + 1))
    }

  } else {

    synonyms %>%
      magrittr::extract(words) %>%
      lapply(.title, within_text = TRUE) %>%
      lapply(.encase) %>%
      # lapply(.max_words) %>%
      paste() %>%
      grep(article, perl = TRUE)

  }
}


#' Identify mentions of Financial support titles followed by specific text
#'
#' Returns the index with the elements of interest. More generic than _1.
#'
#' @param article A List with paragraphs of interest.
#' @return The index of the paragraph of interest.
#' @noRd
get_financial_2 <- function(article) {

  synonyms <- .create_synonyms()
  words <- c("financial_title", "No")

  synonyms %>%
    magrittr::extract(words) %>%
    lapply(.bound) %>%
    lapply(.encase) %>%
    # lapply(.max_words) %>%
    paste(collapse = " ") %>%
    grep(article, perl = TRUE)

}


#' Identify mentions of Financial support titles followed by specific text
#'
#' Returns the index with the elements of interest. More generic than _1.
#'
#' @param article A List with paragraphs of interest.
#' @return The index of the paragraph of interest.
#' @noRd
get_financial_3 <- function(article) {


  synonyms <- .create_synonyms()
  words <- c("financial_title", "this", "research")

  synonyms %>%
    magrittr::extract(words) %>%
    lapply(.bound) %>%
    lapply(.encase) %>%
    # lapply(.max_words) %>%
    paste(collapse = synonyms$txt) %>%
    grep(article, perl = TRUE)

}


#' Identify mentions of Disclosure statements
#'
#' Returns the index with the elements of interest. More generic than _1.
#'
#' @param article A List with paragraphs of interest.
#' @return The index of the paragraph of interest.
#' @noRd
get_disclosure_1 <- function(article) {

  synonyms <- .create_synonyms()
  words <- c("disclosure_title")

  a <-
    synonyms %>%
    magrittr::extract(words) %>%
    lapply(.title) %>%
    lapply(.encase) %>%
    paste(collapse = "|") %>%
    grep(article, perl = TRUE)

  out <- integer()
  if (!!length(a)) {

    for (i in 1:length(a)) {

      if (nchar(article[a[i] + 1]) == 0) {
        b <- a[i] + 2
      } else {
        b <- a[i] + 1
      }

      d <-
        synonyms$funding_financial_award %>%
        lapply(.bound) %>%
        paste0(collapse = "|") %>%
        grep(article[b], perl = TRUE)

      if (!!length(d)) out <- c(out, a[i], b)
    }
  }
  return(out)
}


#' Identify mentions of Disclosure statements
#'
#' Returns the index with the elements of interest. More generic than _1.
#'
#' @param article A List with paragraphs of interest.
#' @return The index of the paragraph of interest.
#' @noRd
get_disclosure_2 <- function(article) {

  synonyms <- .create_synonyms()
  words <- c("disclosure_title", "funding_financial_award")

  disclosure <-
    synonyms %>%
    magrittr::extract(words[1]) %>%
    lapply(.title, within_text = TRUE)

  funding <-
    synonyms %>%
    magrittr::extract(words[2]) %>%
    lapply(.bound)

  c(disclosure, funding) %>%
    lapply(.encase) %>%
    paste(collapse = synonyms$txt) %>%
    grep(article, perl = TRUE)

}


#' Identify mentions of Financial support titles
#'
#' Returns the index with the elements of interest. More generic than _1.
#'
#' @param article A List with paragraphs of interest.
#' @return The index of the paragraph of interest.
#' @noRd
get_grant_1 <- function(article) {

  # TODO: This whole function takes a LOT of time to run, but it is ONLY
  #    activated for "(Grant [Ss]ponsor(|s):)|)Contract [Gg]rant
  #    [Ss]ponsor(|s):). Consider replacing it with just this!!!

  synonyms <- .create_synonyms()
  words <- c("grant_title")

  a <-
    synonyms %>%
    magrittr::extract(words) %>%
    lapply(.title) %>%
    lapply(.encase) %>%
    paste() %>%
    grep(article, perl = TRUE)


  if (!!length(a)) {

    if (nchar(article[a + 1]) == 0) {
      return(c(a, a + 2))
    } else {
      return(c(a, a + 1))
    }

  } else {

    # This is done to avoid mentions such as "Grant AK, Brown AZ, ..."
    grant <- c("G(?i)rant ", "^[A-Z](?i)\\w+ grant ", "Contract grant ")

    support <-
      synonyms %>%
      magrittr::extract(c("support", "funder")) %>%
      lapply(paste0, "(?-i)") %>%
      lapply(.title, within_text = TRUE) %>%
      unlist()

    grant %>%
      lapply(paste0, support) %>%
      unlist() %>%
      .encase() %>%
      grep(article, perl = TRUE)

  }
}


#' Identify mentions of Grant numbers in the Funding/Acknowledgements
#'
#' Returns the index with the elements of interest. More generic than _1.
#'
#' @param article A List with paragraphs of interest.
#' @return The index of the paragraph of interest.
#' @noRd
get_french_1 <- function(article) {

  # This study was financed by... - avoiding specifics b/c of UTF-8 characters
  grep("Cette.*tude.*financ.*par", article, perl = TRUE, ignore.case = TRUE)

}


#' Identify mentions of project numbers.
#'
#' Returns the index of mentions such as: "NIH Project no. AB123."
#'
#' @param article A List with paragraphs of interest.
#' @return The index of the paragraph of interest.
#' @noRd
get_project_acknow <- function(article) {

  grep("project (no|num)", article, perl = TRUE, ignore.case = TRUE)

}


#' Get common phrases
#'
#' Returns the index with the elements of interest.
#'
#' @param article A List with paragraphs of interest.
#' @return The index of the paragraph of interest.
#' @noRd
get_common_1 <- function(article) {

  synonyms <- .create_synonyms()
  words <- c("no", "funding_financial_award", "is", "received")

  no_funding <-
    synonyms %>%
    magrittr::extract(words[1:2]) %>%
    lapply(.bound) %>%
    lapply(.encase) %>%
    paste(collapse = " ")

  was_received <-
    synonyms %>%
    magrittr::extract(words[3:4]) %>%
    lapply(.bound) %>%
    lapply(.encase) %>%
    paste(collapse = " ")

  no_funding %>%
    paste(was_received, sep = synonyms$txt) %>%
    grep(article, perl = TRUE)
}



#' Get common phrases
#'
#' Returns the index with the elements of interest.
#'
#' @param article A List with paragraphs of interest.
#' @return The index of the paragraph of interest.
#' @noRd
get_common_2 <- function(article) {

  synonyms <- .create_synonyms()
  words <- c("No", "funding_financial_award", "received")

  synonyms %>%
    magrittr::extract(words) %>%
    lapply(.bound) %>%
    lapply(.encase) %>%
    paste(collapse = synonyms$txt) %>%
    grep(article, perl = TRUE)
}


#' Get common phrases
#'
#' Identify statements of the following type: "All authors are required to
#'     disclose all affiliations, funding sources and financial or management
#'     relationships that could be perceived as potential sources of bias. The
#'     authors disclosed none.
#'
#' @param article A List with paragraphs of interest.
#' @return The index of the paragraph of interest.
#' @noRd
get_common_3 <- function(article) {

  grep("required to disclose.*disclosed none", article)

}


#' Get common phrases
#'
#' Identify statements of the following type: "There were no external funding
#'     sources for this study"
#'
#' @param article A List with paragraphs of interest.
#' @return The index of the paragraph of interest.
#' @noRd
get_common_4 <- function(article) {

  synonyms <- .create_synonyms()
  words <- c("no", "funding_financial_award", "for", "this", "research")

  no_funding <-
    synonyms %>%
    magrittr::extract(words[1:2]) %>%
    lapply(.bound) %>%
    lapply(.encase) %>%
    paste(collapse = synonyms$txt)

  for_this <-
    synonyms %>%
    magrittr::extract(words[3:4]) %>%
    lapply(.bound) %>%
    lapply(.encase) %>%
    paste(collapse = " ")

  research <-
    synonyms %>%
    magrittr::extract(words[5]) %>%
    lapply(.bound) %>%
    lapply(.encase)

  c(no_funding, for_this, research) %>%
    paste(collapse = synonyms$txt) %>%
    grep(article, perl = TRUE)

}


#' Get common phrases
#'
#' Identify statements of the following type: "No specific sources of funding."
#'
#' @param article A List with paragraphs of interest.
#' @return The index of the paragraph of interest.
#' @noRd
get_common_5 <- function(article) {

  synonyms <- .create_synonyms()
  words <- c("no", "sources", "funding_financial")

  synonyms %>%
    magrittr::extract(words) %>%
    lapply(.bound) %>%
    lapply(.encase) %>%
    paste(collapse = .max_words(" ", space_first = FALSE)) %>%
    grep(article, perl = TRUE)

}


#' Identify mentions of "Acknowledgement and"
#'
#' Returns the index with the elements of interest. More generic than _1.
#'
#' @param article A List with paragraphs of interest.
#' @return The index of the paragraph of interest.
#' @noRd
get_acknow_1 <- function(article) {

  txt <- "[a-zA-Z0-9\\s,()-]*"  # order matters

  txt_0 <- "^Acknowledg(|e)ment(|s)"
  txt_1 <- "(of|and)"
  txt_2 <- "([Ss]upport |\\b[Ff]unding|\\b[Ff]inancial)"

  total_txt <- c(txt_0, txt_1, txt_2)
  indicator_regex <- paste0(total_txt, collapse = " ")

  a <- grep(indicator_regex, article, perl = TRUE, ignore.case = TRUE)

  if (length(a) > 0) {
    if (!is.na(article[a + 1])) {
      # TODO: Change in all functions
      if (nchar(article[a + 1]) == 0) {
        return(c(a, a + 2))
      } else {
        return(c(a, a + 1))
      }
    } else {
      return(a)
    }
  } else {
    return(a)
  }
}


#' Identify acknowledgements
#'
#' Returns the index with the elements of interest. More generic than _1.
#'
#' @param article A List with paragraphs of interest.
#' @return The index of the paragraph of interest.
#' @noRd
get_acknow_2 <- function(article) {

  txt_0 <- "(^A(?i)cknowledg(|e)ment(|s)(?-i))"

  txt_1 <- "(^Acknowledg(|e)ment(|s)"
  txt_2 <- "(of|and)"
  txt_3 <- "([Ss]upport |\\b[Ff]unding|\\b[Ff]inancial))"

  total_txt <- c(txt_1, txt_2, txt_3)
  indicator_regex <- paste0(total_txt, collapse = " ")
  indicator_regex <- paste(txt_0, indicator_regex, sep = "|")

  grep(indicator_regex, article, perl = TRUE)
}


#' Avoid disclosures that are in fact COI statements
#'
#' Returns the index with the elements of interest. More generic than _1.
#'
#' @param article A List with paragraphs of interest.
#' @return The index of the paragraph of interest.
#' @noRd
negate_disclosure_1 <- function(article) {

  synonyms <- .create_synonyms()

  txt <- "[a-zA-Z0-9\\s,()-]*"  # order matters

  disclose_synonyms <- c(
    "[Dd]isclose(|s)(|:|\\.)",
    "[Dd]isclosure(|s)(|:|\\.)"
  )

  conflict_synonyms <- c(
    "conflict(|s) of interest",
    "conflicting interest",
    "conflicting financial interest",
    "conflicting of interest",
    "conflits d'int",
    "conflictos de Inter"
  )

  compete_synonyms <- c(
    "competing interest",
    "competing of interest",
    "competing financial interest"
  )

  and_synonyms <- c(
    "and",
    "&",
    "or"
  )

  not_synonyms <- c(
    "not"
  )

  funded_synonyms <- c(
    "\\bfunded",
    "\\bfinanced",
    "\\bsupported",
    "\\bsponsored",
    "\\bresourced"
  )

  disclose <- .encase(disclose_synonyms)
  conflict <- .encase(c(conflict_synonyms, compete_synonyms))
  and <- .encase(and_synonyms)
  not <- .encase(not_synonyms)
  funded <- .encase(funded_synonyms)

  regex <- paste(disclose, conflict, and, not, funded, sep = txt)
  a <- grepl(regex, article, perl = TRUE)

  if (any(a)) {

    return(a)

  } else {

    funded <- .encase(c(funded_synonyms, synonyms$funding))
    regex <- paste(disclose, funded, conflict, sep = txt)
    grepl(regex, article, perl = TRUE)
  }
}


#' Identify "Funding disclosure. Nothing to declare" statements
#'
#' Returns the index with the elements of interest. More generic than _1.
#'
#' @param article A List with paragraphs of interest.
#' @return The index of the paragraph of interest.
#' @noRd
negate_disclosure_2 <- function(article) {

  synonyms <- .create_synonyms()

  Disclosure_synonyms <- c(
    "F(?i)inancial disclosure(|s)(?-i)(|:|\\.)",
    "F(?i)inancial declaration(|s)(?-i)(|:|\\.)",
    "Disclosure(|:|\\.)",
    "Declaration(|:|\\.)"
  )

  disclosure_synonyms <- c(
    "financial disclosure(|s)",
    "financial declaration(|s)",
    "disclosure",
    "declaration"
  )

  disclose_synonyms <- c(
    "to disclose",
    "to declare",
    "to report"
  )

  Disclosure <- .encase(Disclosure_synonyms)
  disclosure <- .encase(disclosure_synonyms)
  disclose   <- .encase(disclose_synonyms)
  no <- .encase(synonyms$No)
  no_1 <- "(no|not have any)"
  no_2 <- "(nil|nothing)"

  regex_1 <-  # Financial disclosure: Nothing to declare
    paste(Disclosure, no) %>%
    .encase()
  regex_2 <-  # Financial disclosure: The authors have no financial disclosures
    paste(Disclosure, paste(no_1, disclosure), sep = synonyms$txt) %>%
    .encase()
  regex_3 <-  # Financial disclosure: The author has nothing to declare
    paste(Disclosure, paste(no_2, disclose), sep = synonyms$txt) %>%
    .encase()

  regex <- paste(regex_1, regex_2, regex_3, sep = "|")
  grepl(regex, article, perl = TRUE)

}


#' Avoid financial that is part of COI statements
#'
#' Returns the index with the elements of interest. More generic than _1.
#'
#' @param article A List with paragraphs of interest.
#' @return The index of the paragraph of interest.
#' @noRd
negate_conflict_1 <- function(article) {

  synonyms <- .create_synonyms()
  words <- c("conflict_title")

  synonyms %>%
    magrittr::extract(words) %>%
    lapply(.title) %>%
    lapply(.encase) %>%
    paste() %>%
    grepl(article, perl = TRUE)
}


#' Avoid mentions that no funding information was provided
#'
#' Returns the index with the elements of interest. More generic than _1.
#'
#' @param article A List with paragraphs of interest.
#' @return The index of the paragraph of interest.
#' @noRd
negate_absence_1 <- function(article) {

  synonyms <- .create_synonyms()
  words <- c("No", "info", "of", "funding_financial_award", "is", "received")

  synonyms %>%
    magrittr::extract(words) %>%
    lapply(.bound) %>%
    lapply(.encase) %>%
    paste(collapse = synonyms$txt) %>%
    grepl(article, perl = TRUE) |
    grepl(
      paste(
        "\\bfunding\\s*:?\\s*none\\b",
        "\\bno (external |specific |direct )?(funding|financial support|grant support)\\b",
        "\\bno funding and conflicts? of interest\\b",
        "\\bfinancial disclosures?\\s*:?\\s*none\\b",
        "\\bgrant support\\s*&\\s*financial disclosures?\\s*:?\\s*none\\b",
        "\\bno financial support was received\\b",
        "\\breceived no (specific )?(funding|financial support|grant support)\\b",
        "\\bdid not receive any specific grant\\b",
        "\\b(funding\\s*)?(nil|none|n/?a|not applicable)\\.?\\s*$",
        "\\bfunding\\s+(the )?(authors?|author) (have|has) nothing to report\\b",
        "\\b(the )?(authors?|author) (have|has) nothing to report\\b",
        "\\bfunding\\s+nothing to (declare|report)\\b",
        "\\bnothing to (declare|report)\\b",
        "^\\s*(funding|financial support|sources of support)\\s*:?\\s*$",
        "\\bfunding acquisition\\b",
        "\\b(study|case report|paper|article|work|research) (was|is) not funded\\b",
        "\\bnot funded\\b",
        "\\bnot receive any funding\\b",
        "\\bdid not receive (any )?funding\\b",
        "\\bhas not been funded\\b",
        "\\bwithout any funding or sponsorship\\b",
        "\\bno financial support is available\\b",
        "\\bno specific funding sources? for this study\\b",
        "\\bfunding/support\\s*none\\b",
        "\\bno relevant sources? of funding\\b",
        "\\brelevant sources? of funding pertaining\\b",
        "\\bsupported by (the |this |available |clinical |previous |prior |published )?(evidence|data|analyses|analysis|results|findings|studies|literature|guidelines|imaging|histopathologic|framework|method|model|theory|hypothesis|observations|physiological compatibility|systematic review)\\b",
        "\\b(findings|results|observations) .{0,80}supported by the work of\\b",
        "\\bstructurally supported by\\b",
        "\\bno grants? (were )?involved( in supporting)?\\b",
        "\\bdeclared that no grants? were involved\\b",
        "\\bno external finding\\b",
        sep = "|"
      ),
      article,
      perl = TRUE,
      ignore.case = TRUE
    )

}


#' Remove mentions of COIs that may cause FPs
#'
#' Returns the text without potentially misleading mentions of COIs.
#'
#' @param article A List with paragraphs of interest.
#' @return The list of paragraphs without mentions of financial COIs.
#' @noRd
obliterate_conflict_1 <- function(article) {

  # Good for finding, but not for substituting b/c it's  a lookahead
  # words <- c(
  #   # positive lookahead makes these phrases interchangeable
  #   "(?=[a-zA-Z0-9\\s,()-]*(financial|support))",
  #   "(?=[a-zA-Z0-9\\s,()-]*(conflict|competing))"
  # )

  synonyms <- .create_synonyms()

  financial_1 <- "(funding|financial|support)"
  financial_2 <- "(financial|support)"
  relationship <- synonyms$relationship %>% .bound() %>% .encase()
  conflict <- synonyms$conflict %>% .bound() %>% .encase()
  financial_interest <- "(financial(?:\\s+\\w+){0,3} interest)"

  regex_1 <- paste(financial_1, relationship, conflict, sep = synonyms$txt)
  regex_2 <- paste(conflict, relationship, financial_2, sep = synonyms$txt)
  regex_3 <- paste(relationship, financial_interest,  sep = synonyms$txt)

  c(regex_1, regex_2, regex_3) %>%
    lapply(.encase) %>%
    paste(collapse = "|") %>%
    gsub("", article, perl = TRUE)

}


#' Remove disclosures with inappropriate sentences
#'
#' Returns the text without potentially misleading disclsoures mentions. This
#'     is intended to solve problems with disclosures, such as: Disclosure.
#'     Authors have no conflict of interests, and the work was not supported
#'     or funded by any drug company. This project was funded by the Deanship
#'     of Scientific Research, King Abdulaziz University, Jeddah, Saudi Arabia
#'     (Grant No. 4/165/1431);
#'
#' @param article A List with paragraphs of interest.
#' @return The list of paragraphs without mentions of financial COIs.
#' @noRd
obliterate_disclosure_1 <- function(article) {

  synonyms <- .create_synonyms()
  words <- c("disclosure_title", "conflict", "and", "not", "funded")

  # disclosure_title <-
  #   synonyms %>%
  #   magrittr::extract(words[1]) %>%
  #   lapply(.title) %>%
  #   lapply(.encase)

  synonyms %>%
    magrittr::extract(words[2:5]) %>%
    lapply(.bound) %>%
    lapply(.encase) %>%
    paste(collapse = synonyms$txt) %>%
    paste0(synonyms$txt, ., synonyms$txt, "($|.)") %>%
    gsub("", article, perl = TRUE)

  # disclosure_title %>%
  #   paste0("(", synonyms$txt, conflict_funded, synonyms$txt, ")") %>%
  #   gsub("\\1", article, perl = TRUE)


  # if (any(a)) {
  #
  #   return(a)
  #
  # } else {
  #
  #   funded <- .encase(c(funded_synonyms, synonyms$funding))
  #   regex <- paste(disclose, funded, conflict, sep = txt)
  #   grepl(regex, article, perl = TRUE)
  # }

}


#' Remove fullstops that are unlikely to represent end of sentence
#'
#' Returns the list of paragraphs without potentially misleading fullstops.
#'
#' @param article A List with paragraphs of interest.
#' @return The list of paragraphs without misleading fullstops.
#' @noRd
obliterate_fullstop_1 <- function(article) {

  j_p_a_i <- "([A-Z])(\\.)\\s*([A-Z])(\\.)\\s*([A-Z])(\\.)"

  article %>%
    stringr::str_replace_all(j_p_a_i, "\\1 \\3 \\5") %>%
    stringr::str_replace_all("([A-Z])(\\.)\\s*([A-Z])(\\.)", "\\1 \\3") %>%
    stringr::str_replace_all("(\\s[A-Z])(\\.) ([A-Z][a-z]+)", "\\1 \\3") %>%
    stringr::str_replace_all("\\.\\s*([a-z0-9])", " \\1") %>%
    stringr::str_replace_all("\\.([A-Z])", " \\1") %>%
    stringr::str_replace_all("\\.\\s*([A-Z]+[0-9])", " \\1") %>%
    stringr::str_replace_all("\\.([^\\s0-9\\[])", "\\1") %>%
    stringr::str_replace_all("([0-9])\\.([0-9])", "\\1\\2")

}


#' Remove references
#'
#' Returns the list of paragraphs without references.
#'
#' @param article A List with paragraphs of interest.
#' @return The list of paragraphs without misleading fullstops.
#' @noRd
obliterate_refs_1 <- function(article) {

  # Built like this to avoid distabilizing the algorithm
  article <- gsub("^.*\\([0-9]{4}\\).*$", "References", article)
  article <- gsub("^.* et al\\..*$", "References", article)

  return(article)
}


#' Identify and extract Funding statements in TXT files.
#'
#' Takes a TXT file and returns data related to the presence of a Funding
#'     statement, including whether a Funding statement exists. If a Funding
#'     statement exists, it extracts it.
#'
#' @param filename The name of the TXT file as a string.
#' @return A dataframe of results. It returns the PMID (if this was part of the
#'     filename), whether a funding statement was found, what this statement
#'     was and the name of the function that identified this text. The functions
#'     are returned to add flexibility in how this package is used, such as
#'     future definitions of COI that may differ from the one we used.
#' @examples
#' \donttest{
#' # Write a short example article to a temporary text file.
#' filepath <- file.path(tempdir(), "PMID00000000-PMC0000000.txt")
#' writeLines(c(
#'   "To our knowledge, this is the first study of its kind.",
#'   "Conflicts of interest: none declared.",
#'   "This work was supported by the National Institutes of Health (R01-000000).",
#'   "The protocol was registered at ClinicalTrials.gov (NCT00000000).",
#'   "All data and code are available at https://github.com/example/repo.",
#'   "We independently replicated the original analysis."
#' ), filepath)
#'
#' # Identify and extract the funding statement.
#' results_table <- rt_fund(filepath)
#' }
#' @export
rt_fund <- function(filename) {

  # Fix common PDF-to-text artifacts (hyphenation and mid-word line breaks),
  # then split into paragraphs.
  broken_1 <- "([a-z]+)-\n*([a-z]+)"
  broken_2 <- "([a-z]+)(|,|;)\n*([a-z]+)"
  paragraphs <-
    .read_txt(filename) %>%
    purrr::map(gsub, pattern = broken_1, replacement = "\\1\\2") %>%
    purrr::map(gsub, pattern = broken_2, replacement = "\\1\\3") %>%
    purrr::map(strsplit, "\n| \\*") %>%
    unlist() %>%
    utf8::utf8_encode()
  paragraphs <- paragraphs[nzchar(trimws(paragraphs))]

  # A TXT file carries no XML structure: all text goes to the body and the
  # XML-structural route is disabled (pmc_fund_ls reports nothing found).
  # Detection then runs through the same text helpers as rt_fund_pmc(), which
  # also applies its own conflict/disclosure obliteration internally.
  article_ls <- list(ack = character(0), body = paragraphs,
                     footnotes = character(0))
  pmc_fund_ls <- list(is_fund_pred = FALSE, fund_text = "",
                      is_fund_pmc_title = NA)

  res <- .rt_fund_pmc(article_ls, pmc_fund_ls)

  article <- basename(filename) %>% stringr::word(sep = "\\.")
  pmid <- gsub("^.*PMID([0-9]+).*$", "\\1", filename)

  tibble::tibble(
    article,
    pmid,
    is_funded_pred = res$is_fund_pred,
    funding_text = res$fund_text
  )
}