#' Detailed R code parser
#'
#' This is a parser for R code using the same grammar as the usual parser
#' (\code{\link{parse}}) but structuring the information differently.
#'
#'
#' @param file Source file to parse
#' @param encoding Encoding for the file
#' @param text If \code{text} is given, then its content is sent to a temporary
#' file, which is then used as the \code{file} argument.
#' @return The output is a list of expressions similar to the output of
#' \code{\link{parse}} except that expressions are not associated with an
#' inline \code{srcref} information. Instead, each expression has an \code{id}
#' attribute that refers to the data.
#'
#' The \code{data} attribute is a data frame with the following columns:
#' \item{line1,col1,byte1}{First line, column, and byte of the symbol (token or
#' expression)} \item{line2,col2,byte2}{Last line, column, and byte of the
#' symbol (token or expression)} \item{token}{Token type.} \item{id}{Unique
#' identifier given to this token or expression} \item{parent}{identifier for
#' the expression that contains this token or expression} \item{token.desc}{A
#' description of this token type. This is read from the output generated by
#' bison}
#' @note The location information is different to that of R's
#' \code{\link{parse}} function, \code{\link{parse}} structures columns and
#' bytes information to comply with the \code{\link{substring}} function
#' whereas this function returns the information as an offset to the start of
#' the line.
#' @author Romain Francois <romain@@r-enthusiasts.com>
#' @seealso The usual R parser \code{\link{parse}}
#' @references This function is largely inspired from the R core
#' \code{\link{parse}} function. The C code uses the same grammar as the one
#' used by \code{\link{parse}}.
#'
#' The Gnu bison parser generator (\url{http://www.gnu.org/software/bison/}) is
#' used to create the C code from the grammar.
#' @keywords manip
#' @examples
#'
#'
#' f <- system.file( "example", "f.R", package = "parser" )
#' out <- parser( f )
#'
#'
#' @export
parser <- function( file, encoding = "unknown", text ){
# go to a temporary file to deal with the text argument,
# functions and connections (I know it is ugly)
if( !missing( text ) ){
file <- tempfile( );
cat( text , file = file, sep = "\n" )
} else if( inherits(file,"function") ){
source <- attr( file, "source" )
if( is.null(source ) ){
source <- deparse( file )
}
file <- tempfile( )
writeLines( source, file )
} else if( inherits( file, "connection" ) ){
sc <- summary( file )
text <- readLines( file )
file <- tempfile()
writeLines( text, file )
}
# this is where the actual work takes place
p <- .External( "do_parser", file = file, encoding = encoding )
# formatting the results a bit
data <- as.data.frame( t(attr(p,"data")) )
colnames( data ) <- c( "line1", "col1", "byte1",
"line2", "col2", "byte2", "token", "id", "parent" )
data[["top_level"]] <- .Call( "top_level", data$parent, PACKAGE = "parser" )
# populate token.desc and terminal
m <- match( data$token, grammar_symbols$token )
data$token.desc <- as.character(grammar_symbols$desc)[ m ]
data$terminal <- grammar_symbols$terminal[m]
# go back to C to grab the actual tokens
data$text <- character( nrow(data) )
toks <- getTokens( data= data[ data$terminal, , drop = FALSE],
encoding = encoding, file = file,
sort = FALSE )
data$text[ match( toks$id, data$id) ] <- toks$text
attr( p, "data" ) <- data
attr( p, "file" ) <- file
attr( p, "encoding") <- encoding
oldClass( p ) <- "parser"
p
}
#' Get the child expression of an expression in the parse tree
#'
#'
#' @param x Output from the \code{\link{parser}} function
#' @param i index of the top-level expressions we want the childs of. The
#' default value (0) means all top level expressions
#' @param parent id of the parent expressions as found in the id column
#' @return An integer vector giving all the ids of symbols that are childs of
#' one of the parent expression
#' @author Romain Francois <romain@@r-enthusiasts.com>
#' @seealso \code{\link{parser}}
#' @keywords manip
#' @export
getChilds <- function( x, i = 0,
parent = sapply( x[i], function(.) attr(.,"id" ) ) ){
if(missing(parent) && ( missing(i) || is.null(i) || i==0 ) ){
return( attr(x, "data")[,'id'] )
}
all.childs <- c()
data <- attr( x, "data" )
parents <- abs( data[, "parent"] )
id <- data[, "id" ]
childs <- function( index ){
kids <- id[ parents %in% index ]
if( length(kids) ){
all.childs <<- c( all.childs, kids )
childs( kids )
}
}
childs( parent )
sort( all.childs )
}
getDirectChilds <- function( x, parent = 0, data = attr(x, "data") ){
parents <- abs( data[, "parent"] )
id <- data[, "id" ]
id[ parents %in% parent ]
}
isTerminal <- function(x, id = 0, data = attr(x, "data") ){
terminal <- data[["terminal"]]
ids <- data[["id"]]
if( ! id %in% ids ){
FALSE
} else{
terminal[ ids %in% id ]
}
}
#' Gets the terminal tokens
#'
#' Retrieve the terminal nodes from a parser object.
#'
#' @param x the parser object.
#' @param data the data attribute of x
#' @param encoding The encoding used on the file
#' @param file the file x came from.
#' @param sort Does the data need to be sorted?
#'
#' @export
getTokens <- function( x,
data = attr( x, "data" )[ attr(x, "data")[["terminal"]], ],
encoding = attr( x, "encoding"),
file = attr( x, "file" ),
sort = TRUE ){
if( sort ){
data <- data[ do.call( order, data[, c("line1", "col1") ] ), ]
} else{
if( is.unsorted(data[["line1"]] ) ){
stop( "data is not in increasing order of line1" )
}
}
data.frame( id = data[, "id"],
text = .External( "do_getTokens",
file = file,
encoding = encoding,
line1 = data[, "line1" ],
col1 = data[, "byte1" ],
line2 = data[, "line2" ],
col2 = data[, "byte2" ],
PACKAGE="parser"),
stringsAsFactors = FALSE
)
}
#' @name grammar_symbols
#' @title Parser Grammar Symbols
#' @docType data
#' @aliases grammar_symbols
#' @description
#' A data table with the symbols used in the parser.
#' @usage
#' grammar_symbols
#' @format
#' A data frame with \code{desc}, the description;
#' \code{token}, the number identifier of the class;
#' and \code{terminal} if the class constitutes terminal nodes.
#' @keywords datasets
NULL
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.