# importing ris data and changing to bibtex structure
#makes ready for citr package
#Anthony
#feb2020

#set uni library correctly
# myPaths <- .libPaths("C:/Program Files/R/R-3.6.2/library")
# myPaths <- c(myPaths)
# .libPaths(myPaths)  # add new path
# .libPaths()

################################ demo ########
## To cite bibliometrix in publications, please use:
## Aria, M. & Cuccurullo, C. (2017) bibliometrix: An R-tool for comprehensive science mapping analysis, Journal of Informetrics, 11(4), pp 959-975, Elsevier. 
## http:\\www.bibliometrix.org                        
## To start with the shiny web-interface, please digit:

#import library

Sorting reference data {#dataref}

There are several packages I use in this analysis (bibliometrix, revtools,RefManageR). The overall workflow is focused around the development of a reference/resource in the overall database. This goes from raw data entry to unique bib entry with all the associated metadata and other bits. THe general steps are:

Step 1.

Import meta-data into mendeley database using one of these methods: - webclipper - bib entry - manually

Step 2.

require(tidyverse)
##my data
#easiest way seems to be revtools
# jujst an idea
# revtools::read_bibliography(filename = "./data/mendeley/")
#for now have imported data as bibfile is the shiny app and saved as csv
dat_bib_beech <- read.csv("C://PhD/beech-publication-wr/_references/data/Bibliometrix-Export-File-2020-02-22.csv")

# length(names(M))
names(dat_bib_beech)
str(dat_bib_beech)
#manually reorder names
#to match example for shiny app

#quick fix
new_names <- c("AU", "TI", "DE_unknown", "DT", "DT2", "DI", "BE_unknown", "BN_unknown", "SN",
                          "PN", "PP", "PU", "VL", "BO", "DB", "PY", "TC","CR", "C1_unknown", "AU_UN", "SR_FULL", "SR")
fullNames_new <- c("Author", "TI", "DE_unknown", "DT", "DT2", "DI", "BE_unknown", "BN_unknown", "SN",
                   "PN", "PP", "PU", "VL", "BO", "DB", "PY", "TC","CR", "C1_unknown", "AU_UN", "SR_FULL", "SR")

names(dat_bib_beech) <- new_names
##Full number of references
#...
#Data wrangling
dat2 <- dat_bib_beech %>%
  dplyr::select(-c(BN_unknown,BE_unknown,C1_unknown)) %>%
    mutate(Publication_ID = seq(1:length(dat_bib_beech$TI)),
           PrePrint = ifelse(DT == "PREPRINT", "Yes", "No"),
           Tags_general = as.character(DE_unknown),
           Author1 = as.character(AU),
           Title = as.character(TI),
           Publish_year = AU,
           Document_type = DT2,
           Contact_auth = gsub(",.*","",SR),
           doi = DI,
           pages = as.numeric(PP)) %>%
  select(-c(TI,DE_unknown, DT, DT2, SR, DI)) %>%
        as_tibble()

final_remove <- names(dat2)[1:13]

dat3 <- dat2 %>%
    select(-c(final_remove))

Resources

Video tutorials

Basic Data

bibliometrix package

require(bibliometrix)

bibliometrix: An R-Tool for Comprehensive Science Mapping Analysis here.

##my data

#easiest way seems to be revtools
# jujst an idea
# revtools::read_bibliography(filename = "./data/mendeley/")
#for now have imported data as bibfile is the shiny app and saved as csv
dat_bib_beech <- read.csv("C://PhD/beech-publication-wr/_references/data/Bibliometrix-Export-File-2020-02-22.csv")

# length(names(M))
names(dat_bib_beech)
str(dat_bib_beech)
#manually reorder names
#to match example for shiny app

#quick fix
new_names <- c("AU", "TI", "DE_unknown", "DT", "DT2", "DI", "BE_unknown", "BN_unknown", "SN",
                          "PN", "PP", "PU", "VL", "BO", "DB", "PY", "TC","CR", "C1_unknown", "AU_UN", "SR_FULL", "SR")
fullNames_new <- c("Author", "TI", "DE_unknown", "DT", "DT2", "DI", "BE_unknown", "BN_unknown", "SN",
                   "PN", "PP", "PU", "VL", "BO", "DB", "PY", "TC","CR", "C1_unknown", "AU_UN", "SR_FULL", "SR")

names(dat_bib_beech) <- new_names
##Full number of references
#...
#Data wrangling
dat2 <- dat_bib_beech %>%
  select(-c(BN_unknown,BE_unknown,C1_unknown)) %>%
    mutate(Publication_ID = seq(1:length(dat_bib_beech$TI)),
           PrePrint = ifelse(DT == "PREPRINT", "Yes", "No"),
           Tags_general = as.character(DE_unknown),
           Author1 = as.character(AU),
           Title = as.character(TI),
           Publish_year = AU,
           Document_type = DT2,
           Contact_auth = gsub(",.*","",SR),
           doi = DI,
           pages = as.numeric(PP)) %>%
  select(-c(TI,DE_unknown, DT, DT2, SR, DI)) %>%
        as_tibble()

final_remove <- names(dat2)[1:13]

dat3 <- dat2 %>%
    select(-c(final_remove))

needs to be sorted... this is the respondant I think...

#feb2020
# dat2$Contact_auth <- gsub(",.*","",dat2$SR)

# str(dat2)
# names(dat2)

#group_by(Title) 
# %>%
# summarise_all()
# unique(dat2$Title)
#unique id
# max(dat2$Publication_ID)

#number of referenced pre-prints
table(dat2$PrePrint)

#what are theses???
# unique(dat2$Tags_general)

#save modified data
write.csv(dat2, "C://PhD/beech-publication-wr/_references/data/v2_RCode_output.csv")
#run bib shiny again and import new v2 csv file
# bibliometrix::
# ?biblioshiny()

# dat2 == data_setLocations
#data from locations etc
# data_setLocations <- revtools::read_bibliography("./data/v1_RCode_output.csv")

#bibliography from data resource
dat_full_tidy_approach <- revtools::read_bibliography("C://PhD/beech-publication-wr/_references/Beech_forests.bib") %>%
            mutate(Publication_ID = seq(1,174,1)) %>%
              full_join(dat2, by = "Publication_ID")


#amazing?
# revtools::as.bibliography(dat_full_tidy_approach)

# glimpse(dat_full_tidy_approach)

#manual for now
dat_full_tidy_approach$date[1] <- "2017"

#for all dates with year first
dat_full_tidy_approach$date <- gsub("-.*","",dat_full_tidy_approach$date)

dat_tidy1 <- dat_full_tidy_approach %>%
                mutate(year = as.factor(date))
Modified data
#read in modified bib data
dat2 <- read.csv("C://PhD/beech-publication-wr/_references/data/v2_RCode_output.csv")
# kableExtra::kable(head(dat2))

revtools package

require(revtools)

Simple example

#nah but help notes
library(revtools)
# import data
file_location <- system.file(
  "extdata",
  "avian_ecology_bibliography.ris",
  package = "revtools")

x <- read_bibliography(file_location)

# generate then locate some 'fake' duplicates
x_duplicated <- rbind(x, x[1:5,])
x_check <- find_duplicates(x_duplicated)
# returns a vector of potential matches
x_check
revtools::extract_unique_references(x_duplicated, find_duplicates(x_duplicated))

# ?find_duplicates
revtools::find_duplicates(data = dat_tidy1)

#list for referencing short keys
# revtools::print.bibliography(dat_tidy1)

Using shiny too

write_bibliography(dat_tidy1, "C://PhD/beech-publication-wr/_references/data/test_out2.ris", format = "ris")
write_bibliography(dat_tidy1, "C://PhD/beech-publication-wr/_references/data/test_out2.ris", format = "ris")

write_bibliography(dat_tidy1, "C://PhD/beech-publication-wr/_references/data/test_out2.bib", format = "bib")
?write_bibliography
#page count...
ggplot(data = dat_tidy1, aes(x = year, y = pages.x)) + 
  geom_point()

#reference dates
table(dat_tidy1$year)

write.csv(dat_tidy1, "C://PhD/beech-publication-wr/_references/data/revtools_dataframe_structure.csv")

#demo
# vignette("revtools")

#print titles to check first
#manual changing in case of accident
#feb2020

#what sort of grammar is actually important?
#feb2020
dat_full_tidy_approach$title[1] <- c("Confronting the risks of large-scale invasive species control")
dat_full_tidy_approach$title[2] <- c("Unexpected consequences of control: Competitive vs. Predator release in a four-species assemblage of invasive mammals")

RefManageR package

##########################ON old way I was working with it but not anymore.....####################
require(revtools)
##raw import
#should be bib but dont have one
dat <- readFiles("C://PhD/beech-publication-wr/Beech-forests.bib")
dat_ris <- readFiles("C://PhD/beech-publication-wr/_references/data/Beech_forests.ris")
#working with ris
# str(dat)
# 
M <- convert2df(dat, dbsource = "scopus", format = "plaintext")

names(M)
head(M)

M_scop <- convert2df(dat, dbsource = "scopus", format = "bibtex")
# 
M_webof_Sci <- convert2df(dat, dbsource = "isi", format = "bibtex")

require(tidyverse)
glimpse(M_webof_Sci)
head(M_webof_Sci[22])
glimpse(M_scop)

summary(M_webof_Sci)
# dat_lits <- 
require(RefManageR)
#run shiny app locally

# needs other packages above too
# bibliometrix::biblioshiny()


#demo
# vignette("bibliometrix-vignette")


davan690/beech-publication-wr documentation built on March 29, 2020, 11:09 a.m.