_references/Code/dat_lit_import.R

# importing ris data and changing to bibtex structure
#makes ready for citr package
#Anthony
#feb2020

#set uni library correctly
# myPaths <- .libPaths("C:/Program Files/R/R-3.6.2/library")
# myPaths <- c(myPaths)
# .libPaths(myPaths)  # add new path
# .libPaths()

################################ demo ########
## To cite bibliometrix in publications, please use:
## Aria, M. & Cuccurullo, C. (2017) bibliometrix: An R-tool for comprehensive science mapping analysis, Journal of Informetrics, 11(4), pp 959-975, Elsevier. 
## http:\\www.bibliometrix.org                        
## To start with the shiny web-interface, please digit:

#import library
require(bibliometrix)

#import datat
#test
dat <- readFiles("https://www.bibliometrix.org/datasets/savedrecs.bib")

# str(dat)
# 
M <- convert2df(dat, dbsource = "isi", format = "bibtex")


names(M)

##my data
#for now have imported data as bibfile is the shiny app and saved as csv
dat_bib_beech <- read.csv("C://PhD/beech-publication-wr/_references/data/Bibliometrix-Export-File-2020-02-22.csv")

length(names(M))
names(dat_bib_beech)
str(dat_bib_beech)
#manually reorder names
#to match example for shiny app

#quick fix
new_names <- c("AU", "TI", "DE_unknown", "DT", "DT2", "DI", "BE_unknown", "BN_unknown", "SN",
                          "PN", "PP", "PU", "VL", "BO", "DB", "PY", "TC","CR", "C1_unknown", "AU_UN", "SR_FULL", "SR")
fullNames_new <- c("Author", "TI", "DE_unknown", "DT", "DT2", "DI", "BE_unknown", "BN_unknown", "SN",
                   "PN", "PP", "PU", "VL", "BO", "DB", "PY", "TC","CR", "C1_unknown", "AU_UN", "SR_FULL", "SR")

##Full number of references
#...
#Data wrangling
dat2 <- dat_bib_beech %>%
  select(-c(BN_unknown,BE_unknown,C1_unknown)) %>%
    mutate(Publication_ID = seq(1:length(dat_bib_beech$TI)),
           PrePrint = ifelse(DT == "PREPRINT", "Yes", "No"),
           Tags_general = as.character(DE_unknown),
           Author1 = as.character(AU),
           Title = as.character(TI),
           Publish_year = AU,
           Document_type = DT2,
           Contact_auth = gsub(",.*","",SR),
           doi = DI,
           pages = as.numeric(PP)) %>%
  select(-c(TI,DE_unknown, DT, DT2, SR, DI)) %>%
        as_tibble() %>%
    select(-c(names(dat2)[1:13]))

#needs to be sorted...
#this is the respondant I think...
#feb2020
# dat2$Contact_auth <- gsub(",.*","",dat2$SR)

str(dat2)
names(dat2)

#group_by(Title) 
# %>%
# summarise_all()
# unique(dat2$Title)
#unique id
# max(dat2$Publication_ID)

#number of referenced pre-prints
table(dat2$PrePrint)

#what are theses???
# unique(dat2$Tags_general)




#save modified data
write.csv(dat_bib_beech, "./data/v1_RCode_output.csv")

#run bib shiny again and import new v2 csv file
# bibliometrix::
# ?biblioshiny()

# dat2 == data_setLocations
#data from locations etc
# data_setLocations <- revtools::read_bibliography("./data/v1_RCode_output.csv")

#bibliography from data resource
dat_full_tidy_approach <- revtools::read_bibliography("./Beech_forests.bib") %>%
            mutate(Publication_ID = seq(1,174,1)) %>%
              full_join(dat2, by = "Publication_ID")


#amazing?
# revtools::as.bibliography(dat_full_tidy_approach)

glimpse(dat_full_tidy_approach)

#manual for now
dat_full_tidy_approach$date[1] <- "2017"

#for all dates with year first
dat_full_tidy_approach$date <- gsub("-.*","",dat_full_tidy_approach$date)

dat_tidy1 <- dat_full_tidy_approach %>%
                mutate(year = as.factor(date))

write_bibliography(dat_tidy1, "./data/test_out2.ris", format = "ris")
write_bibliography(dat_tidy1, "./data/test_out2.ris", format = "ris")

#page count...
ggplot(data = dat_tidy1, aes(x = year, y = pages.x)) + 
  geom_point()

#reference dates
table(dat_tidy1$year)

write.csv(dat_tidy1, "./data/revtools_dataframe_)structure.csv")

#demo
vignette("revtools")

#print titles to check first
#manual changing in case of accident
#feb2020

#what sort of grammar is actually important?
#feb2020
dat_full_tidy_approach$title[1] <- c("Confronting the risks of large-scale invasive species control")
dat_full_tidy_approach$title[2] <- c("Unexpected consequences of control: Competitive vs. Predator release in a four-species assemblage of invasive mammals")


#nah but help notes
# library(revtools)
# # import data
# file_location <- system.file(
#   "extdata",
#   "avian_ecology_bibliography.ris",
#   package = "revtools")
# 
# x <- read_bibliography(file_location)
# 
# # generate then locate some 'fake' duplicates
# x_duplicated <- rbind(x, x[1:5,])
# x_check <- find_duplicates(x_duplicated)
# # returns a vector of potential matches
# x_check
# revtools::extract_unique_references(x_duplicated, find_duplicates(x_duplicated))
# 
# ?find_duplicates
# revtools::find_duplicates(data = dat_tidy1,match_variable = "doi")

#list for referencing short keys
revtools::print.bibliography(dat_tidy1)




##########################ON old way I was working with it but not anymore.....####################
##raw import
dat <- readFiles("./Beech_forests.bib")
dat_ris <- readFiles("./data/Beech_forests.ris")
#run shiny app locally

# needs other packages above too
bibliometrix::biblioshiny()


#demo
# vignette("bibliometrix-vignette")

#working with ris
# str(dat)
# 
M <- convert2df(dat, dbsource = "scopus", format = "plaintext")

names(M)
head(M)
# M_scop <- convert2df(dat, dbsource = "scopus", format = "bibtex")
# 
# M_webof_Sci <- convert2df(dat, dbsource = "isi", format = "bibtex")

require(tidyverse)
glimpse(M_webof_Sci)
head(M_webof_Sci[22])
glimpse(M_scop)

summary(M_webof_Sci)
# dat_lits <- 
davan690/beech-publication-wr documentation built on March 29, 2020, 11:09 a.m.