#' Characterize authors found within papers' titles
#'
#' \code{characterize_au} calculates several author related metrics from a
#' scimeetr object. The results are returned in a list of data frame. The
#' metrics in the table are: title-words frequency, title-words relative
#' frequency, title-words relevance.
#'
#' @seealso \code{\link{characterize_jo}} for journal characterization,
#' \code{\link{characterize_ab}} for abstract-word characterization,
#' \code{\link{characterize_kw}} for keyword characterization,
#' \code{\link{characterize_ti}} for title characterization,
#' \code{\link{characterize_un}} for university characterization,
#' \code{\link{characterize_co}} for country characterization
#' @param scimeetr_data An object of class scimeetr.
#' @param lambda A number from 0 to 1. 0 for relative frequency 1 for total
#' occurence only
#' @examples
#' # Example with an object of class scimeetr (see import_wos_files() or
#' # import_scopus_files()) already in the workspace
#' author_list <- characterize_au(scimeetr_list)
#' # Since this example shows how to load WOS from your system we need to run
#' # the following line to find the path to the exemple file
#' fpath <- system.file("extdata", package="scimeetr")
#' fpath <- paste(fpath, "/wos_folder/", sep = "")
#' # Then we can run the actual example
#' example_scimeetr_object <- import_wos_files(files_directory = fpath)
#' characterize_au(example_scimeetr_object)
#'
#' @return A list of dataframe. The list length matchs the number of communities
#' that the scimeetr object contains.
#' @import dplyr
#' @export
characterize_au <- function(scimeetr_data, lambda = 0.7) {
splitted_cr <- split_cr(scimeetr_data)
splt_cr_freq <- map2(map(scimeetr_data, 'cr'), rep(list(splitted_cr), length(scimeetr_data)), full_join, by = c('ID'))
hold <- map2(map(scimeetr_data, 'dfsci'), splt_cr_freq, full_join, by = c('RECID')) %>%
map(function(dfsci_mod){
au_list <- strsplit(dfsci_mod$AU, split="; ")
lengthlist <- map(au_list, length)
position <- map2(as.list(rep(1, length(au_list))), lengthlist, function(x,y){
if(y >= 3){
z <- c(x,3:y, 2)
} else if(y != 0) {
z <- c(x:y)
} else {
z <- NA
}
return(z)
}) %>%
unlist()
# find top 10 cut off
tc_tresh <- quantile(dfsci_mod$TC, c(0.90), na.rm = T)
lc_tresh <- quantile(dfsci_mod$Frequency.x, c(0.90), na.rm = T)
# ----------------
audf <- data.frame('AU' = toupper(unlist(au_list)),
'UT' = rep(dfsci_mod$UT, sapply(au_list, length)),
'TC' = rep(dfsci_mod$TC, sapply(au_list, length)),
'RECID' = rep(dfsci_mod$RECID, sapply(au_list, length)),
'PY' = rep(dfsci_mod$PY, sapply(au_list, length)),
'LC' = rep(dfsci_mod$Frequency.x, sapply(au_list, length)),
'PO' = na.omit(position),
'LG' = rep(sapply(au_list, length), sapply(au_list, length)),
'tc_tresh' = as.numeric(tc_tresh[1]),
'lc_tresh' = as.numeric(lc_tresh[1]),
stringsAsFactors=F)
audf$LC[is.na(audf$LC)] <- 0
audf1 <- audf %>%
mutate(worth = suppressWarnings(((1/PO)/(sum(1/c(1:LG))))/((1/1)/(sum(1/c(1:LG))))),
LCC = LC * worth,
TCC = TC * worth,
is_lc_highly_cited = LC > lc_tresh,
is_tc_highly_cited = TC > tc_tresh
) %>%
group_by(AU) %>%
mutate(res_n = n()) %>%
#filter(res_n >= 1) %>%
mutate(rank_LCC = rank(desc(LCC)),
HHL = LCC > rank_LCC,
rank_TCC = rank(desc(TCC)),
HH = TCC > rank_TCC,
rank_LC = rank(desc(LC)),
HL = LC > rank_LC,
rank_TC = rank(desc(TC)),
H = TC > rank_TC) %>%
#filter(!is.na(HL) & !is.na(H)) %>%
summarise(HHL = sum(HHL, na.rm = T),
HH = sum(HH, na.rm = T),
HL = sum(HL, na.rm = T),
H = sum(H, na.rm = T),
Local_cit = sum(LC, na.rm = T),
Global_cit = sum(TC, na.rm = T),
nb_papers = n(),
nb_highly_cited_papers_TC = sum(is_tc_highly_cited),
nb_highly_cited_papers_LC = sum(is_lc_highly_cited)) %>%
#filter(H>=1) %>%
ungroup() %>%
mutate(local2global = Local_cit / Global_cit) %>%
arrange(desc(HHL))
audf1 <- audf1[!is.na(audf1$AU),]
audf1$AU <- stringr::str_replace_all(audf1$AU, ',', '')
first_au <- group_by(dfsci_mod, author) %>%
summarise(fa_nb_paper_cited = n(),
fa_total_cit = sum(Frequency.x, na.rm = T))
df <- full_join(audf1, first_au, by = c('AU' = 'author'))
return(df)
})
# If it's a sub_community, table of relative frequency
tmp <- purrr::map(scimeetr_data, "parent_com") %>%
purrr::compact()
hold_relative <- purrr::map2(hold[names(tmp)], hold[as.character(tmp)], function(child, parent, lambda) {
tst <- left_join(child, parent, by = "AU") %>%
mutate(relevance_based_on_local_cit = lambda * log(Local_cit.x/sum(Local_cit.x,na.rm = T), base = 10) + (1 - lambda) * log((Local_cit.x/sum(Local_cit.x,na.rm = T))/(Local_cit.y/sum(Local_cit.y,na.rm = T)), base = 10),
relevance_based_on_nb_papers = lambda * log(nb_papers.x/sum(nb_papers.x,na.rm = T), base = 10) + (1 - lambda) * log((nb_papers.x/sum(nb_papers.x,na.rm = T))/(nb_papers.y/sum(nb_papers.y,na.rm = T)), base = 10)) %>%
select(AU,starts_with('relevance'))
}, lambda)
au_df <- list()
for(x in 1:length(hold)){
subh <- hold_relative[[names(hold)[x]]]
if(!is.null(subh)) {
au_df[[x]] <- left_join(hold[[names(hold)[x]]], hold_relative[[names(hold)[x]]], 'AU')
} else {
au_df[[x]] <- hold[[x]]
}
}
names(au_df) <- names(scimeetr_data)
return(au_df)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.