#'Import from legacy excel files
#'@description Import legacy data from Excel files
#'@param excel.file An Excel of a blank DIMA template
#'@param format Format type, as identified in the reference lookup table
#'@param reference Reference lookup table filepath which identifies excel file formats and corresponding field map
#'@name legacy.format
#'@return A \code {tbl} of all method data in a directory formatted for joining to the DIMA.template.
#'@export
#'@rdname legacy.format
#'
#Make reference tall format
reference.tall<-function(reference){
#Read reference file
reference<-read.csv(reference, stringsAsFactors = FALSE)
#Gather Reference tall
reference.tall<-reference %>% tidyr::gather(key=format, value=cell.ref, -Table, -FieldName)
return(reference.tall)
}
#'@export
#'@rdname legacy.format
#Map excel references to a given field name, given a specific reference table
map.excel<-function(excel.file, reference.tall, field, format){
cell.ref<-reference.tall$cell.ref[reference.tall$FieldName==field &reference.tall$format==format] %>% as.character() %>% strsplit(.,split=",")%>% unlist() %>% gsub("\ ", "", .)
#Check to see if it is a valid excel reference
if(!any((grepl(pattern="[[:digit:]]", cell.ref)&grepl(pattern="[[:alpha:]]", cell.ref)))){
data<-cell.ref
} else if(any(is.na(cell.ref))){
data<-NA
} else if (field=="Date"){
data<-tryCatch(readxl::read_excel(path=excel.file,sheet =format,range=cell.ref, col_types = "date", col_names=FALSE)[[1]]%>% as.character, error=function(e) return(NA))
} else{
#Define rows
rows<-gsub(pattern="[[:alpha:]]", "", x=cell.ref)%>% unique()%>%
strsplit(., split = ":")%>% unlist()%>% as.numeric
rows<-seq(min(rows), max(rows))
#Define cells
cols<-strsplit(cell.ref, split = ":")%>% unlist() %>% openxlsx::convertFromExcelRef() %>% unique()
#Read in row/column specific data from R
data<-tryCatch(openxlsx::read.xlsx(excel.file, sheet=format, rows=rows, cols=cols, colNames=FALSE, skipEmptyRows=FALSE)%>% unlist(),
warning=function(w) return(NA))
}
return(data)
}
#'@export
#'@rdname legacy.format
build.header<-function(excel.file, reference.tall, format){
#Build Header Data Frame
header.fields<-reference.tall$FieldName[reference.tall$Table=="Header"&reference.tall$format==format]
#Build Header Data Frame
header.data<-data.frame(FieldName=header.fields,
value=sapply(X=header.fields, FUN=function(f=X){
print(f)
data<-map.excel(excel.file = excel.file, format = format, reference.tall=reference.tall,field=f)
assign(paste(f), data)
})) %>%
#Spread so that the response values are a row
tidyr::spread(key = FieldName, value=value)%>% dplyr::mutate(excel.file=excel.file)
#Return Header Data
return(header.data)
}
#'@export
#'@rdname legacy.format
#Build Detail Table
build.detail<-function(excel.file, reference.tall, format){
detail.fields<-reference.tall$FieldName[reference.tall$Table=="Detail"&reference.tall$format==format]
#Build data frame
detail.data<-lapply(X=detail.fields, FUN=function(f=X){
print(f)
df=data.frame(data=map.excel(excel.file = excel.file, format = format, reference.tall=reference.tall,field=f)%>% as.character(),
FieldName=f) %>% dplyr::mutate(id=1:n(), excel.file=excel.file)
df})%>%
do.call(rbind, .) %>%
#Spread so that the response values are a row
tidyr::spread(key = FieldName, value=data)%>% dplyr::select(-id)
}
#'@export
#'@rdname legacy.format
#'
# join.header.detail<-function(excel.file, reference.tall, format){
# lapply(X=formats, FUN=function(X){
# #Gather header and detail file
# header<-build.header(excel.file=excel.file, reference.tall=reference.tall, format = X)
# detail<-build.detail(excel.file=excel.file, reference.tall=reference.tall, format = X)
# #Join header and detail
# dplyr::left_join(header, detail)
#
# })%>% do.call(rbind, .)
# }
#Put it all together for an import function
xlsx2R<-function(folder, reference, out){
#Build reference tall data frame
reference.tall<-reference.tall(reference=reference)
files<-list.files(folder, full.names = TRUE, recursive=TRUE)%>% subset(grepl(pattern = ".xlsx$", x=.)&!grepl(pattern = "~", x=.))
if(file.exists(out)){
read.files<-read.csv(out) %>% dplyr::select(excel.file)%>% unique()
#Remove files that have already been read
files<-files[!files %in% read.files$excel.file]
}
#For each excel file in the folder, pull the relevant data
lapply(X=files, FUN=function(X){
format<-openxlsx::getSheetNames(X) %>% subset(. %in% unique(reference.tall$format))
#If the result of format is a character string, then gather header and detail file
if(length(format)>0){
print(X)
#Gather header and detail file
header<-build.header(excel.file=X, reference.tall=reference.tall, format = format)
detail<-build.detail(excel.file=X, reference.tall=reference.tall, format = format)
#Join header and detail
header.detail<-dplyr::left_join(header, detail) %>% dplyr::mutate(Format=format)
#save the output to a csv
if(!file.exists(out)){
write.table(header.detail, out, append=FALSE, col.names = TRUE, row.names = FALSE, sep=",")
} else {
write.table(header.detail, out, append=TRUE, col.names = FALSE, row.names = FALSE, sep=",")
}
} else{
warning(paste("No valid data for import.", paste(X), "will be ignored", sep=" "))
print(X)
}})
#Return the completed file from the compiled csv
compiled.csv<-read.csv(out, colClasses = "character" ) %>% unique()
#Fix Shrubshape into a single field (if they exist)
if("ShrubShape1" %in% colnames(compiled.csv)){
compiled.csv$ShrubShape<-paste(compiled.csv$ShrubShape1, compiled.csv$ShrubShape2, compiled.csv$ShrubShape3, compiled.csv$ShrubShape4, sep="")%>%
stringr::str_replace_all("NA|Na", "")
compiled.csv<-dplyr::select(compiled.csv, -c(ShrubShape1:ShrubShape4))
}
#Return
return(compiled.csv)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.