readXML4R <- function(file){
# CHECK IF FILE EXISTS
if(!file.exists(file)) stop(paste0("'", file, "' does not exist."))
# READ IN FILE
read_lines <- readLines(file)
# COLLAPSE AT LINE BREAKS IN FILE
read_lines <- paste(read_lines, collapse="\n")
# ADD LINE BREAKS WHERE NOT IN ORIGINAL FILE
read_lines <- gsub('(>)([[:print:]])', '\\1\n\\2', read_lines)
read_lines <- gsub('([[:print:]])(</)', '\\1\n\\2', read_lines)
# SPLIT AT LINE BREAKS
lines <- strsplit(x=read_lines, split="\n")[[1]]
lines <- gsub("^[\t]*", "", lines)
# DEFAULT INITIAL OBJECT TYPE
object_type <- 'vector'
# SET RETURN LIST
read_xml_lines <- readXMLLines(lines)
rlist <- list(read_xml_lines$rlist)
names(rlist)[1] = read_xml_lines$obj.name
rlist
}
readXMLLines <- function(lines, line.init=1){
rlist <- list()
prev_line_type <- ''
# READ EACH LINE
i <- line.init
while(i <= length(lines)){
# SET LINE
line <- lines[i]
#cat(i, ':', line, '\n')
# SKIP EMPTY LINES
if(line == ""){
i <- i + 1
next
}
# CHECK FOR OBJECT OPEN/CLOSE TAG
reg_expr_open <- regexpr(pattern='<[[:alnum:]|_.]+[ |>]', text=line)
reg_expr_close <- regexpr(pattern='</[ ]*[[:alnum:]|_.]+[ ]*>', text=line)
# SET LINE TYPE
line_type <- 'fill'
if(reg_expr_open > 0) line_type <- 'object_open'
if(reg_expr_close > 0) line_type <- 'object_close'
# GET OBJECT DATA
if(line_type == 'object_open'){
if(prev_line_type == 'object_open') {
read_xml_2_list <- readXMLLines(lines, line.init=i)
if(is.null(rlist)) rlist <- list()
if(is.matrix(rlist) && nrow(rlist) == 0) rlist <- list()
rlist[[read_xml_2_list$obj.name]] <- read_xml_2_list$rlist
#print(rlist)
i <- read_xml_2_list$line.init
next
}
# GET OBJECT NAME
object_name <- gsub("<|[\t]", "", substr(line, reg_expr_open, reg_expr_open+attr(reg_expr_open,"match.length")-2))
# CHECK FOR OBJECT TYPE
reg_expr <- regexpr(pattern='type=[\'|\"]?[[:alnum:]|.]+[\'|\"]?', text=line)
if(reg_expr > 0){
object_type <- gsub('[\'|\"]', "", substr(line, reg_expr+5, reg_expr+attr(reg_expr,"match.length")-1))
}else{
object_type <- 'vector'
}
# CHECK FOR AS NUMERIC
if(grepl(pattern='as[.]?numeric=[\'|\"]?true[\'|\"]?', x=line, ignore.case=TRUE)){is_numeric <- TRUE}else{is_numeric <- FALSE}
# CREATE EMTPY VECTOR
if(object_type == 'vector') rlist <- c()
if(object_type == 'matrix'){
# GET NUMBER OF TABS IN FIRST ROW
ntab <- length(strsplit(x=lines[i+1], split='\t')[[1]]) - 1
# FIND IF INCLUDES ROWNAMES
if(grepl(pattern='rownames=[\'|\"]?true[\'|\"]?', x=line, ignore.case=TRUE)){add_rownames <- TRUE}else{add_rownames <- FALSE}
if(add_rownames){
# CREATE EMPTY VECTOR FOR ROWNAMES
row_names <- c()
ncol <- ntab
}else{
ncol <- ntab + 1
}
# CREATE EMTPY MATRIX
rlist <- matrix(NA, nrow=0, ncol=ncol)
}
if(object_type == 'list'){
# FIND IF INCLUDES NAMES
if(grepl(pattern='names=[\'|\"]?true[\'|\"]?', x=line, ignore.case=TRUE)){add_names <- TRUE}else{add_names <- FALSE}
# CREATE EMTPY LIST
rlist <- list()
}
}else if(line_type == 'fill'){
if(object_type == 'vector'){
data_values <- line
# SET AS NUMERIC
if(is_numeric) data_values <- suppressWarnings(as.numeric(data_values))
rlist <- c(rlist, data_values)
}else if(object_type == 'matrix'){
line_split <- strsplit(x=line, split='\t')[[1]]
if(add_rownames){
# ADD ROWNAMES
row_names <- c(row_names, line_split[1])
# GET DATA VALUES
data_values <- line_split[2:length(line_split)]
}else{
# GET DATA VALUES
data_values <- line_split[1:length(line_split)]
}
# SET AS NUMERIC
if(is_numeric) data_values <- suppressWarnings(as.numeric(data_values))
# CHECK THAT NUMBER OF DATA VALUES MATCHES NUMBER OF COLUMNS IN MATRIX
if(length(data_values) != ncol) stop(paste0("Inconsistent number of columns in row (line ", i, ")."))
rlist <- rbind(rlist, data_values)
}else if(object_type == 'list'){
line_split <- strsplit(x=line, split='\t')[[1]]
if(add_names){
# GET NAME
name <- line_split[1]
# GET DATA VALUES
data_values <- line_split[2:length(line_split)]
}else{
# GET DATA VALUES
data_values <- line_split[1:length(line_split)]
}
# SET AS NUMERIC
if(is_numeric) data_values <- suppressWarnings(as.numeric(data_values))
if(add_names){
rlist[[name]] <- data_values
}else{
rlist[[length(rlist[[object_name]])+1]] <- data_values
}
}
}else if(line_type == 'object_close'){
if(is.matrix(rlist)){
# ADD ROWNAMES
if(add_rownames){
rownames(rlist) <- row_names
}else{
rownames(rlist) <- NULL
}
if(nrow(rlist) == 0) rlist <- NULL
}
return(list(rlist=rlist, obj.name=object_name, line.init=i+1))
}
i <- i + 1
prev_line_type <- line_type
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.