R/calcul_sigaps.R

Defines functions calc_paper_score calc_author_score merge_publi_sigaps nearest_year_available calc_sigaps_rank calc_sigaps_score get_sigaps_by_paper sum_sigaps

Documented in calc_author_score calc_paper_score merge_publi_sigaps

#' Calculate a paper score based on it's rank
#'
#' @param paper_rank A character vector with the rank of each paper
#' @return An integer vector with the scores
calc_paper_score <- function(paper_rank) {
  # deal with NA
  paper_rank <- as.character(paper_rank)

  # Vectorised version of the switch statement in order to deal with
  # vector of paper_rank larger than one
  scores <- sapply(X = paper_rank, FUN = switch,
                   "A" = 8,
                   "B" = 6,
                   "C" = 4,
                   "D" = 3,
                   "E" = 2,
                   "NC" = 1,
                   `NA` = NA,
                   NA # Default value
  )

  as.integer(scores)
}


#' Compute the author score
#' @details Compute the author score based on his sigaps' rank
#' @param sigaps_rank A character vector with sigaps' rank (1, 2, 3, Other or Last)
#' @return An integer vector with the author score for each paper
calc_author_score <- function(sigaps_rank) {
  sigaps_rank <- as.character(sigaps_rank)
  scores <- sapply(X = sigaps_rank, FUN = switch,
                   "1" = 4,
                   "2" = 3,
                   "3" = 2,
                   "Other" = 1,
                   "Last" = 4,
                   `NA` = NA,
                   NA # Default value
  )

  as.integer(scores)
}
## Crado

#' Merge authors publications and publications ranks
#'
#' @import dplyr
#' @export
merge_publi_sigaps <- function(df_author, df_sigaps = df_sigaps_publi) {

  years_available <- as.integer(unique(df_sigaps$year))
  df_author$nearest_year <- as.character(sapply(df_author$year, FUN = nearest_year_available, years_available))
  # find the nearest year available
  merged_df <- df_author %>%
    left_join(df_sigaps, by = c("journal_title_iso" = "iso_title", "nearest_year" = "year"))

  merged_df
}

nearest_year_available <- function(year, years_available) {
  if (is.na(year))
    return(max(years_available))

  diff_years <- abs(years_available - year)
  minimal_diff <- min(diff_years, na.rm = T)
  year_index <- which(diff_years == minimal_diff)
  years_available[year_index][1]
}

calc_sigaps_rank <- function(rank, n_author) {

  mapply(rank, n_author, FUN = function(rank, n_author) {
    if (rank == n_author & rank > 1) {
      return("Last")
    } else if (rank <= 3) {
      return(as.character(rank))
    } else {
      return("Other")
    }
  }, SIMPLIFY = TRUE
  )
}

#' @export
#' @import dplyr
calc_sigaps_score <- function(df_publi) {
  df_publi %>%
    mutate(paper_score = calc_paper_score(paper_rank),
           sigaps_author_rank = calc_sigaps_rank(author_rank, n_authors),
           author_score = calc_author_score(sigaps_author_rank),
           sigaps_score = paper_score * author_score)
}

#' @export
get_sigaps_by_paper <- function(author_lastname, author_firstname = NULL,
                                affiliation, year_end = NULL, year_start = NULL,
                                xml_path = NULL, other_query = NULL,
                                publication_type = c("Journal Article",
                                                     "Review", "Editorial"),
                                df_sigaps = df_sigaps_publi) {

  # If no file selected, retrieve data in pubmed
  if (is.null(xml_path)) {
    pubmed_publi <- import_author_pubmed(
      author_lastname, author_firstname, affiliation,
      year_end, year_start, other_query
      )
  } else {
    pubmed_publi <- import_xml_pubmed(
      xml_path = xml_path, lastname = author_lastname
    )
  }

  df_merged_publi_sigaps <- merge_publi_sigaps(pubmed_publi, df_sigaps = df_sigaps)
  calc_sigaps_score(df_merged_publi_sigaps)
}

#' @export
sum_sigaps <- function(df_publi)
  sum(df_publi$sigaps_score, na.rm = T)
jomuller/rigaps documentation built on May 29, 2019, 12:39 p.m.