This package implements various metadata processing tasks performed at the Hoover Institution Library & Archives.
This function is used to convert our data from .xml to .csv format using XPath expressions. Define item nodes (i.e. files, items etc.) and collection nodes (information about collection) to extract the data.
Example
library(HooverArchives)
item_nodes = list(path = "(//c)|(//c01)|(//c02)|(//c03)",
nodes = c("primarynode", ".//unittitle", "./did//unitdate"),
types = c("attrs", "text", "text"))
collection_nodes <- list(path="archdesc[@level='collection']",
nodes=c(".//unittitle", "./did//unitdate"),
types=c("text", "text"))
filedata <- fromXMLtoCSV(system.file("rusdata.xml", package="HooverArchives"),
item_nodes, collection_nodes)
This function helps to organize the Belgium data by adding extra "Series" row and reformatting data in a proper format.
Example
library(HooverArchives)
library(readxl)
library(xlsx)
#Load data and create indices
#Open Sheet 1
dat2.1<-read.xlsx(system.file("BelgiumData.xlsx", package="HooverArchives"),
sheetIndex=1, header=FALSE, encoding = "utf-8")
dat2.1[]<-lapply(dat2.1, as.character)
colnames(dat2.1)<-as.character(dat2.1[3,])
dat2.1<-dat2.1[-(1:3),-c(1,14)];
dat2.1$indexW<-dat2.1$`Item title`
#Open Sheet 2
dat2.2<-read.xlsx(system.file("BelgiumData.xlsx", package="HooverArchives"),
sheetIndex=2, header=TRUE, encoding = "utf-8")
dat2.2$indexW<- dat2.2$`Packet.Catalog.Title`
#Merge two dataframe using BuildIndex and Merge_data functions
index_matches<-buildIndex(dat2.1$indexW,dat2.2$indexW,
index_simplify=TRUE,
fuzzy_matching=TRUE,
index_hashing=FALSE)
mdat<-mergeData(dat2.1,dat2.2, index_matches)
#Use fromFILEStoSERIES() to add the Series row
coverted.dat<-fromFILEStoSERIES(dat=mdat,
series_title="Series title",
files="index",
series_scope_note="Series scope note",
series_date_range="Hoover date range",
scope_and_content="Scope.and.content",
problems_notes="Series scope note",
box_barcode="Box_Barcode",
ckey="Ckey.x",
top_container="Final.Box..")
coverted.dat$Date<-dateReformatter(coverted.dat$Date)
convertedtoArchivesSpace<-subset(coverted.dat, select=c(
"Title", "Hierarchical_Relationship",
"Processing_Information", "CkeyV", "Description_Level", "Date",
"Top_Container_[indicator]",
"Box_Barcode", "Scope_and_content"), value=TRUE)
#Save file in xlsx to preserve diacritic characters
#write.xlsx(datHarvard, "convertedtoArchivesSpace.xlsx", sheetName = "ArchivesSpace", col.names = TRUE)
This function helps to obtain "Files" variable by splitting issue dates into separate rows.
Example
library(HooverArchives)
item_nodes<-list(path="(//c)|(//c01)|(//c02)|(//c03)|(//c04)|(//c05)|(//c06)|(//c07)|(//c08)",
nodes=c("primarynode", ".//unittitle", "./did//unitdate", "./did//unitdate",
"./did//language", "./did//abstract", ".//container",
"./did//container", "./scopecontent[@id]", ".//scopecontent/head",
".//scopecontent/p", "./accessrestrict[@id]",
".//accessrestrict/head", ".//accessrestrict/p", ".//note"),
types=c("attrs", "text", "text", "attrs", "attrs", "text", "text", "attrs", "attrs",
"text", "text", "attrs", "text", "text", "text"))
collection_nodes<-list(path="archdesc[@level='collection']",
nodes=c(".//unittitle", "./did//unitdate", "./did//language", "./did//abstract"),
types=c("text", "text", "attrs", "text"))
file_transf<-fromXMLtoCSV(system.file("rusnewspapers.xml", package="HooverArchives"), item_nodes, collection_nodes)
convdata<-fromSERIEStoFILES(file_transf, issueDates="note.text", locale="Russian")
#write.csv(convdata, "convdata_2012C30.csv")
This function helps to convert transliterated Cyrillic to original Cyrillic.
Example
library(HooverArchives)
# conversion to Russian
dat<-c("Mezhdunarodnaia gazeta. Gl. redaktor: Iu. Zarechkin. Moscow, Russia. Semiweekly. 199?", "DEN' UCHITELIA komissiia po obrazovaniiu ob''edineniia Iabloko", "III-ii RIM vestnik Rossiiskogo patrioticheskogo dvizheniia. Redaktory: M. Artem'ev, V. Rugich. Moscow, Russia.")
converteddata_ru <- fromLATtoCYR(dat, LAOR=TRUE, OROR=FALSE, EnglishDetection=TRUE)
# conversion to Ukrainian
dat<-read.csv(system.file("Ukraine_microform.csv", package="HooverArchives"), sep=",", encoding = "UTF-8", stringsAsFactors = FALSE)
converteddata_uk <- fromLATtoCYR(dat$FIELD.245, tolanguage="Ukrainian")
This function helps to standartize and correct misspellings of date entries.
Example
library(HooverArchives)
datesV<- c("1914:Aug 2 - 20; 1918:NOv 18",
"1941:August-September 5, 1943 :Aug-1944:July")
reformated.data <- dateReformatter(datesV)
reformated.data
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.