fromFILEStoSERIES: fromFILEStoSERIES function
In kkalininMI/HooverArchives: HooverArchives

Description Usage Arguments Value Examples

View source: R/fromFILEStoSERIES.R

This function helps to organize the Belgium data by adding extra "Series" row and reformatting data in a proper format.

fromFILEStoSERIES(
  dat = NULL,
  series_title = NULL,
  files = NULL,
  series_scope_note = NULL,
  series_date_range = NULL,
  scope_and_content = NULL,
  box_barcode = NULL,
  top_container = NULL,
  processing_information = NULL,
  ckey = NULL,
  lang_encoding = "English",
  add_articles = NULL,
  remove_special_characters = TRUE,
  alphabetizewithinbrackets = FALSE,
  diacriticslatinization = TRUE,
  USextension = FALSE,
  ...
)

`dat`	data frame
`series_title`	variable name "Series title"
`files`	variable name for "Files"
`series_scope_note`	variable name for "Series scope note"
`series_date_range`	variable name for "Hoover date range"
`scope_and_content`	variable name for "Scope and content"
`box_barcode`	variable name for "Box Barcode"
`top_container`	variable name for "Top container"
`processing_information`	variable name for "Processing information"
`ckey`	variable name for "Ckey"
`lang_encoding`	sets system's locale to specific language (by default "English")
`add_articles`	vector of extra articles to be excluded from alphabetical ordering
`remove_special_characters`	remove special characters, i.e. horizontal brackets (TRUE by default)
`alphabetizewithinbrackets`	alphabetize by a string within square brackets (FALSE by default)
`diacriticslatinization`	transform characters with diacritics to Latin for alphabetization (TRUE by default)
`USextension`	file name change to reflect specifics of US data
`...`	other parameters

Returns altered dataframe.

library(HooverArchives)
library(readxl)
library(xlsx)

#Load data and create indices

#Open Sheet 1
dat2.1<-read.xlsx(system.file("BelgiumData.xlsx", package="HooverArchives"), sheetIndex=1, header=FALSE, encoding = "utf-8")
dat2.1[]<-lapply(dat2.1, as.character)
colnames(dat2.1)<-as.character(dat2.1[3,])
dat2.1<-dat2.1[-(1:3),-c(1,14)];
dat2.1$indexW<-dat2.1$`Item title`

#Open Sheet 2
dat2.2<-read.xlsx(system.file("BelgiumData.xlsx", package="HooverArchives"), sheetIndex=2, header=TRUE, encoding = "utf-8")
dat2.2$indexW<- dat2.2$`Packet.Catalog.Title`

#Merge two dataframe using BuildIndex and Merge_data functions
index_matches<-buildIndex(dat2.1$indexW,dat2.2$indexW,
                          index_simplify=TRUE,
                          fuzzy_matching=TRUE,
                          index_hashing=FALSE)
mdat<-mergeData(dat2.1,dat2.2, index_matches)

#Use fromFILEStoSERIES() to add the Series row
coverted.dat<-fromFILEStoSERIES(dat=mdat,
                               series_title="Series title",
                               files="index",
                               series_scope_note="Series scope note",
                               series_date_range="Hoover date range",
                               scope_and_content="Scope.and.content",
                               problems_notes="Series scope note",
                               box_barcode="Box_Barcode",
                               ckey="Ckey.x",
                               top_container="Final.Box..")
coverted.dat$Date<-dateReformatter(coverted.dat$Date)
convertedtoArchivesSpace<-subset(coverted.dat, select=c("Title", "Hierarchical_Relationship",	"Processing_Information",
                                          "CkeyV", "Description_Level",	"Date", "Top_Container_[indicator]",
                                          "Box_Barcode", "Scope_and_content"), value=TRUE)
#Save file in xlsx to preserve diacritic characters
#write.xlsx(convertedtoArchivesSpace, "convertedtoArchivesSpace.xlsx", sheetName = "ArchivesSpace", col.names = TRUE)