R/parser.R

Defines functions parser getChilds

Documented in getChilds parser

parser <- function( file, encoding = "unknown", text ){
	# go to a temporary file to deal with the text argument, 
	# functions and connections  (I know it is ugly)
	if( !missing( text ) ){
		file <- tempfile( );
		cat( text , file = file, sep = "\n" )
	} else if( inherits(file,"function") ){
		source <- attr( file, "source" )
		if( is.null(source ) ){
			source <- deparse( file )
		}
		file <- tempfile( )
		writeLines( source, file )
	} else if( inherits( file, "connection" ) ){
		sc <- summary( file )
		text <- readLines( file )
		file <- tempfile()
		writeLines( text, file )
	}
	
	# this is where the actual work takes place
	p <- .External( "do_parser", file = file, encoding = encoding )
	
	# formatting the results a bit
	data <- as.data.frame( t(attr(p,"data")) )
	colnames( data ) <- c( "line1", "col1", "byte1", 
		 	"line2", "col2", "byte2", "token", "id", "parent" )
	
	data[["top_level"]] <- .Call( "top_level", data$parent, PACKAGE = "parser" )
	
	# populate token.desc and terminal
	m <- match( data$token, grammar_symbols$token )
	data$token.desc <- as.character(grammar_symbols$desc)[ m ]
	data$terminal <- grammar_symbols$terminal[m]
	
	# go back to C to grab the actual tokens
	data$text     <- character( nrow(data) )
	toks <- getTokens( data= data[ data$terminal, , drop = FALSE], 
		encoding = encoding, file = file, 
		sort = FALSE )
	data$text[ match( toks$id, data$id) ] <- toks$text
	attr( p, "data" ) <- data
	attr( p, "file" ) <- file
	attr( p, "encoding") <- encoding
	oldClass( p ) <- "parser"
	p
}

getChilds <- function( x, i = 0, 
	parent = sapply( x[i], function(.) attr(.,"id" ) ) ){
	
	if(missing(parent) && ( missing(i) || is.null(i) || i==0 ) ){
		return( attr(x, "data")[,'id'] )
	}
	all.childs <- c()
	data    <- attr( x, "data" )
	parents <- abs( data[, "parent"] )
	id      <- data[, "id" ]
	childs  <- function( index ){
		kids <- id[ parents %in% index ]
		if( length(kids) ){
			all.childs <<- c( all.childs, kids )
			childs( kids )
		}
	}
	childs( parent )
	sort( all.childs )
}

getDirectChilds <- function( x, parent = 0, data = attr(x, "data") ){
	parents <- abs( data[, "parent"] )
	id      <- data[, "id" ]
	id[ parents %in% parent ]
}

isTerminal <- function(x, id = 0, data = attr(x, "data") ){
	terminal  <- data[["terminal"]]
	ids       <- data[["id"]]
	if( ! id %in% ids ){
		FALSE
	} else{
		terminal[ ids %in% id ]
	}
}


#' Gets the terminal tokens
getTokens <- function( x, 
	data = attr( x, "data" )[ attr(x, "data")[["terminal"]], ], 
	encoding = attr( x, "encoding"), 
	file = attr( x, "file" ), 
	sort = TRUE ){
	
	if( sort ){
		data <- data[ do.call( order, data[, c("line1", "col1") ] ), ]
	} else{
		if( is.unsorted(data[["line1"]] ) ){
			stop( "data is not in increasing order of line1" )
		}
	}
	data.frame( id = data[, "id"], 
		text = .External( "do_getTokens", 
			file = file, 
			encoding = encoding, 
			line1 = data[, "line1" ], 
			col1  = data[, "byte1"  ], 
			line2 = data[, "line2" ], 
			col2  = data[, "byte2"  ] ), 
		stringsAsFactors = FALSE
		)
	
}


#' parses the grammar.output file that is generated by bison
grammar.symbols <- function(  ){
	# the output file from bison
	gram.output.file <- system.file( "grammar", "gram.output", package = "parser" )
	rl <- readLines( gram.output.file ) 
	
	.extract <- function( start.rx, end.rx, type = "terminal" ){
		start <- grep( start.rx, rl )[1] + 1L
		end <- grep( end.rx, rl)[1] - 1L
		
		rl <- rl[ start:end ]
		rx <- "(^.*) \\((\\d+)\\).*"
		rl <- grep( rx, rl, perl = T, value = T )
		desc   <- gsub( rx, "\\1", rl, perl = TRUE )
		token  <- as.integer( gsub( rx, "\\2", rl, perl = TRUE ) )
		data.frame( desc = desc, token = token, 
			terminal = rep.int( type , length(token) ), 
			stringsAsFactors = FALSE )
	}
	rbind( 
		.extract( 
			"^Terminals, with rules where they appear",
			"^Nonterminals, with rules where they appear", 
			TRUE ), 
		.extract( 
			"^Nonterminals, with rules where they appear",
			"^state 0", 
			FALSE )
		)
}

grammar_symbols <- grammar.symbols()

Try the parser package in your browser

Any scripts or data that you put into this service are public.

parser documentation built on May 2, 2019, 6:49 p.m.