R/CmdInterpret.R
In rFocal: Run 'FOCAL' Language Source Code

# basic command interpreters
# not needed: MODIFY is an interactive ; ERASE clears program memory
# BE IT DECLARED:  each func sends back the next idx to be used.
# fIXED BUG  in calls below top-level, "=" does not bind the object, so replace with "<-"

# notice that the code produced is ready to save result into fenv
 makeSET <- function(theidx, thecmd, crunch, strnum,fenv) {
 	thecmd <- trimws(thecmd)
 	# the preparsing makes the existing 'thecmd' good R code already
 	thecmd <- gsub("=", " <<- ",thecmd ) 
 	#  get ready for eval(parse), 
	thecmd <- paste0(thecmd,collapse='')
 	thevar <- trimws(gsub('<<-.{1,}$','', thecmd) )
	#initialize it if necessary,  and remove any vector brackets before doing so
	varname <- gsub('[[].{1,}[]]', '', thevar)
	if(!exists(varname,envir=fenv,inherits=FALSE)) 	assign(varname,NULL, envir=fenv)
 	return( list(rcode = thecmd, newidx = theidx+1))
 }

# TODO: make sure an IF() 4,RETURN,5 will work
# since the prepareCode regex fouls up brackets in IF(...) ,  fix it here
makeIF  <- function(theidx, thecmd, crunch, strnum, fenv) {
# IF(Value) a,b,c  (less than, equal, greater than)
# IF(Value) a,b;  means ignore Value > 0 and so on
#  Note: RETURN is an allowed action
#  a line number means GOTO 
# BUGFIX - replacing " [ ]" with "( )" in prepareCode was a regex nightmare, so do it here
#BUT AGAIN, don't do it globally
foof <- unlist(strsplit(thecmd,''))
	fop <- grep('\\[',foof)
	fcl <- grep('\\]',foof)
# always replace first fop with'(' 
#then that sum thing
plen <- length(fop) # if they're different, something's really bad
if(plen > 1) {
	getit <- fcl[1:(plen-1)] <= fop[2:plen]
	# grab first "TRUE"; if none, grab last parencl
	# first parenop is always the start of the "IF(...)"
	if(sum(getit)) theclose <- fcl[getit][1] else theclose <-fcl[plen]
} else theclose <- fcl
foof[fop[1]] <- sub('\\[','(',foof[fop[1]])
foof[theclose] <- sub('\\]',')',foof[theclose])
thecmd <- paste0(foof,collapse='')
#BUGFIX2: get rid of the global "1+" 
thecmd <- gsub('\\(1[+]','(',thecmd)
# put in spaces so that parenop/parencl work right
 thecmd <- gsub('[(]', ' ( ', thecmd)
thecmd <- gsub('[)]', ' ) ', thecmd)
tmp <- unlist(strsplit(thecmd, ' ')  ) 
# if there's parens inside the if Value Have to do that 'count up; count down'
# thing to match the first "(" is always what we want.  but the ")" we want requires counting.
	parenop <- grep('\\(',tmp)
	parencl <- grep('\\)',tmp)
	plen <- length(parenop) # if they're different, something's really bad
	if(plen > 1) {
		getit <- parencl[1:(plen-1)] <= parenop[2:plen]
		# grab first "TRUE"; if none, grab last parencl
		# first parenop is always the start of the "IF(...)"
		if(sum(getit)) theclose <- parencl[getit][1] else theclose <-parencl[plen]
	} else theclose <- parencl[plen]
	focif <- paste0(tmp[parenop[1]:theclose], collapse = ' ' )
# run parsefoc on tmp[(parencl+1):length(tmp)], which requires recombining and splitting on ',',
# then if tmp is shorter than 4, do tmp <- c(tmp,'' ,'') as needed
# ops to perform:
# 1 if no alpha chars, convert the 'x.y' to a numeric pair and prepend a 'goto'
# 2 send each cmd thru parsefoc 
# 3 follow design in makeGOTO to make the result of makeIF be a jump to desired idx
ifcmd <- tmp[-(parenop[1]:theclose)]
tricmd <- trimws(unlist(strsplit(paste0(c(ifcmd),collapse=' '),',')))
ifops <- list()
oplen <- length(tricmd)
for(jc in 1:oplen){
	if(!length(grep('[a-z]',tricmd[jc]))) {
# it's a x.y for a goto
		ifops[[jc]] <- c('goto',tricmd[[jc]])		
	} else ifops[[jc]] <- unlist(strsplit(tricmd[jc] ,' ') )
	# parse the commands so switch-builder's output is 
# R code (string) that I can evalparse. 	
	tmpparse <- parsefoc(theidx,ifops[[jc]],crunch,strnum, fenv )
	ifops[[jc]] <- tmpparse$rcode
}
# all I want from a goto is to set the nextidx, 
		ifout <- paste0(c('val <- sign(',tmp[parenop[1]:theclose],') + 2 ;'),collapse=' ')	
switchout <- 'switch(val  '
for(jcmd in 1:length(ifops) ) {
	switchout <- c(switchout,', ', ifops[[jcmd]])
} 
switchout <- c(switchout, ')')
rcode <- paste0(c(ifout,switchout) ,collapse = '  ')
return(list(rcode = rcode, newidx = theidx+1) )
}  # end of makeIF

# ask in focal may or may not have a quote string
# readline coerces to string
makeASK   <- function(theidx, thecmd, crunch, strnum, fenv) {
# 
quotloc <- gregexpr('"',thecmd)[[1]]
comloc <- gregexpr(',',thecmd)[[1]]
# replace quoted commas with a fixit string
qlen <- length(quotloc)
if(as.logical(qlen)){
	splitc <- unlist(strsplit(thecmd,'') )
	for(jq in seq(1,qlen,by=2)){
		comtmp <- comloc[comloc > quotloc[jq] & comloc < quotloc[jq+1]]
		if(length(comtmp)) splitc[comtmp] <- 'ZZrealCommaZZ'
	}
	thecmd <- paste0(splitc,collapse='')
}	
# replace '\"' with ' ZQUOTEZ ' so a single el't of thecmd which is an entire quote gets handled correctly. 
#  fix the '!' as linefeed,so it gets the ZQUOTEZ treatment as well. 
# # look for all typtmp[j] for which grep('"') fails AND grep('!') passes.
allwants <- gsub('\\\"',' ZQUOTEZ ',thecmd)
allwants <-  unlist(strsplit(allwants,' ')) #will be class character
# BUG (see also makeTYPE)- don't want to lose commas inside quoted string.

allwants <- unlist(strsplit(allwants,',')) # get rid of commas but keep vars separate
splats <- trimws(allwants) == '!'
allwants[splats] <- ' ZQUOTEZ \\n ZQUOTEZ '
# do the split on space again
allwants <-  unlist(strsplit(allwants,' ')) #will be class character
# remove "" items from allwants
allwants <-  allwants[nchar(allwants)>0]
# don't prepend the variable name, but make it the first arg to runASK()
pstart <- 'runASK( '
pend <- ': " )'
bar <- '\n' 
pbar <- ';' #force end of command indicator on every readline()
wantout <- NULL
wanttmp <-NULL
# check for quote string(s)
gotquote <- grep('ZQUOTEZ',allwants)
if(length(gotquote)) {
	theprompt <- NULL
#  need to catch gotquote[1] ==1; this if() catches an input without prompt string
	if(gotquote[1] > 1) 	wanttmp <- allwants[1:(gotquote[1]-1)]
	maxq <- (length(gotquote)/2)
	for(jq in 1:maxq) {
		theprompt[jq] <- paste0(allwants[gotquote[2*jq -1]:gotquote[2*jq] ],collapse = ' ')
#h add a ';' to separate prompt from the readline command line
		wantout[jq] <- paste0('cat (',theprompt[jq],bar,');')
# now grab everything up to next quote
# don't do this if we're out of gotquote.
		if(jq < maxq) {
# rbind won't work - dimensioning
			if((gotquote[2*jq+1]) - (gotquote[2*jq]) != 1) {
				tmpchunk <- allwants[(gotquote[2*jq]+1):(gotquote[(2*jq)+1]-1)]
				wanttmp <- c(wanttmp, tmpchunk )
			}
		}
	}
	# grab leftovers
	if (length(allwants) > max(gotquote)) {
		wanttmp <- c(wanttmp,allwants[(max(gotquote)+1):length(allwants)])
	}
	allwants <- wanttmp
} #end of if length(gotquote)
# remove "" items from allwants
allwants <-  allwants[nchar(allwants)>0]
# some lines will be text to cat(), others will simply call runASK with a single  variable
# initialize each var in fenv envir
for(jw in 1: length(allwants)) {
# extra prompt for each item	
# make CMD check happy,- pass fenv
		ltmp <- paste0(pstart, '"',allwants[jw], '",fenv=fenv);',collapse =  ' ')
		wantout <- c(wantout,ltmp)
#   first see if it exists, and if not, create it
# zeroth: parse out  "k[j]" since i do NOT want to creat a var named "k[j]" !!
	varname <- gsub('[[].{1,}[]]', '', allwants[jw])
	if(!exists(varname,envir=fenv,inherits=FALSE)) 	assign(varname,NULL, envir=fenv)
}
#finally, replace ZQUOTEZ 
wantout <- gsub('ZQUOTEZ','\\\"',wantout)
wantout <- gsub('ZZrealCommaZZ',',',wantout)
 return(  list(rcode = wantout, newidx = theidx + 1))
}

# GOTO will operate solely on 'x.yy'
#theidx, thecmd[-1], crunch, strnum, fenv=fenv
makeGOTO <- function(theidx, thecmd, crunch, strnum, fenv) {
if(!grepl('\\.',thecmd)) stop('Illegal line number ' , thecmd)
 gnum <- (unlist(strsplit(thecmd,'\\.')))
# check for "short" minor number
 str2 <- gnum[2]
str1 <- as.numeric(gnum[1])
if(nchar(str2) < 2) str2 <- paste0(str2,'0',collapse= '')
gnum <- as.numeric(c(str1,str2))
# manual: "The numbers 1.00, 2.00,etc., are illegal line numbers; they are used to indicate the entire group. "  
# manual: GOTO must be to a specific line number, so not GOTO 3 either
if(length(gnum) < 2) {
	gnum[2] <- strnum[which(strnum[,1] == gnum)[1],2]
}
gidx <- intersect(which(strnum[,1]== gnum[1]), which(strnum[,2] ==gnum[2]))
if (!length(gidx)) stop('GOTO directed to nonexistent line.')
seqidx <- gidx[1] # there can be multiple matches. 
	#then return the new seqidx to the 'main' loop or whatever to continue writing
	#  just returning a string to set the new idx
rcode <- paste0('nextidx <- ', seqidx, collapse= ' ')
return(list(rcode = rcode, newidx = seqidx))
}

# 'push' loop index into an fenv object, and update it every cycle
makeDO  <- function(theidx, thecmd, crunch, strnum, fenv) {
# similar to goto, except just grab the designated index but do NOT change seqidx
# if 'y' is zero or doesn't exist, must do all of section x .  
#a GOTO or IF inside the line that DO specified then that's a 'jump' and the rest
# of that group is executed. BUT BUT if GOTO IF specifies Outside the group, then 
# the remainder of the DO-group is NOT executed.
 dnum <- (unlist(strsplit(thecmd,'\\.')))
# check for "short" minor number; there may not be any such
if(length(dnum) > 1) {
	 str2 <- dnum[2]
	str1 <- as.numeric(dnum[1])
	if(nchar(str2) < 2) str2 <- paste0(str2,'0',collapse= '')
	dnum <- as.numeric(c(str1,str2))
}
	# check for section
	doall <- FALSE
	if(length(dnum) == 1 || dnum[2] ==0) doall = TRUE  # no .y exists
	if(doall) {
		didx <- which(strnum[,1] == dnum)  # keep all rows in this group
	} else {
	didx <- intersect(which(strnum[,1]== dnum[1]), which(strnum[,2] ==dnum[2]))
	}			
	if (!length(didx)) stop('DO directed to nonexistent line.')
#TODO: to be able to use as.numeric in runDO, want c('1','3','5') form
# or can I just use didx as-is?
	idxargs <- paste0(didx, collapse = ', ')
# note: need strnum to handle multiple commands in a called GOTO line
	result <- paste0(c('runDO(c(',idxargs,'), retcode, strnum, fenv)'), collapse = '') 		
return(list(rcode = result, newidx = theidx + 1))
}		


makeFOR <- function(theidx, thecmd, crunch, strnum, fenv) {
# F0R A=B,C ,D ; COMMAND     # if  no 'C,'  then increment by one . 
# F0R A=B,C ,D ; COMMAND_one ; COMMAND_two     is legal.
#  for (a in seq(B,D, by= C ))
 #  if length(foogrep[[1]]) >0 we grab the first value and
# process , so recursive calls to makeFOR will work their way to the final
# FOR in the original line, then feed code backwards 
foogrep <- grep('f[or]{0,2}',thecmd)
# if we found a FOR, start recursing
foundfor <- length(foogrep)
forbits <- NULL # to be vector of inner loops
if(foundfor) {
	allfor <- thecmd  # just to avoid early clobbering
	for(jf in foundfor:1) {
	barf <- allfor[foogrep[jf]:length(allfor)]
	#reduce the original string
	allfor <- allfor[1:(foogrep[jf] -1)]
	# parsefoc will remove the "FOR " element of barf
	# deliberately fake idx
	forbits[jf] <- parsefoc(-10,barf, crunch, strnum ,fenv)$rcode
	} #  end jf loop
thecmd <- allfor
#  "insert" each forbits[jf] into the next forbits , 
# look for the closing "}"
# if forbits is length 1 I go too far. ; simply if foundfor ==1 I get an NA here. 
#just avoid the for when foundfor ==1 
if(foundfor >= 2){
	for(jc in foundfor:2){
			forbits[jc-1] <- gsub('}', paste0(forbits[jc],' }', collapse = ''), forbits[jc-1])
		}
	}
# so now forbits[1] contains all sub-for-loops 	
 } # end if foundfor

foo <- paste0(thecmd, collapse= ' ')
splitit <- unlist(strsplit(foo,';'))
#input may not have spaces around "=", so fix that here
splitit <- gsub( '=',' = ',splitit)
splitone <- unlist(strsplit(splitit[1],' '))
# safety check:
splitone <- splitone[!(splitone =='')]
#Collapse things
# Note: this leaves cruft in splitone[4:n] but we never call it.
splitone[3] <- paste0(splitone[3:length(splitone)],collapse='')
splitforidx <- unlist(strsplit(splitone[3],','))
thefor <- 'for('
idx <- splitone[1] 
thefor <- c(thefor,idx, 'in seq(', splitforidx[1],  ',')
inext <- splitforidx[2]
if (length(splitforidx) ==2 ){
	thefor <- c(thefor,inext,') ){')
} else {
	thefor <- c(thefor,splitforidx[3],',by = ',inext,') ){ ')
}
# recombine
thefor <- unlist(paste0(thefor, collapse=' '))
# push idx into fenv . because the index in a FOR loop is a funky object,
# must update value every time thru loop
# Note: keep this on the off chance that some code line follwing a FOR
#  makes use of the index value .
assign(idx,NULL,envir=fenv) 
thefor <- paste0(thefor,idx,"<<-",idx,";")
# submit each command to the parsing subroutine, and stack them
thecmds <- trimws(splitit[-1])
# safety catch: if no commands exist, skip this loop
if(length(thecmds)){
	for(jcmd in 1:length(thecmds)) {
	#need to split up the command string again
		splitcmd <- trimws(strsplit(thecmds[jcmd],'\\s{1,}',perl=TRUE)[[1]] )  
		thisLine <- parsefoc(theidx,splitcmd,crunch,strnum ,fenv) 
		thefor <- paste0(thefor,' ',thisLine$rcode,'; ')
	}
}
# depending on source of a parsed command, may have extra ';' so
thefor <- gsub('[;]\\s{0,}[;]',';',thefor)
# NOW, append the 'forbits' to 'thefor' if there are any.  
# remember I collapsed forbits
if(foundfor){	
	thefor <- paste0(c(thefor, forbits[1]),collapse = ' ' )
}
forout <- paste0(thefor, ' }')   # finished off the FOR)
 #note:  paste0 will foul up unless always switch between "  and  ' at 
 # every level of  quote encapsulation
return(list(rcode = forout, newidx = theidx + 1))
} # end of makeFOR


#RETURN only works in conjunction with DO
#just send back a magic word that tells runDO() to finish off
 makeRETURN <- function(theidx, thecmd, crunch,strnum, fenv){
# equivalent to break inside a blob called from DO
# return code that is acceptable to parsefoc() as that's how runDO works
#  newidx here is fake, because makeRETURN has no knowledge of where to go next.
return(list(rcode ="retcheck <- TRUE",  newidx = -10) )
}
#in FOCAL,  Only double quote (") is allowed to indicate text, tho' the text string itself
# can contain a single quote (') .   
makeTYPE <- function(theidx, thecmd, crunch , strnum,fenv) { 
	
#if a comma is inside a quoted string, I don't want to lose it.
qtmp <- paste0(thecmd,collapse='zzspacezz')
quotloc <- gregexpr('"',qtmp)[[1]]
comloc <- gregexpr(',',qtmp)[[1]]
# now replace quoted commas with a fixit string
qlen <- length(quotloc)
if(as.logical(qlen)){
	splitc <- unlist(strsplit(qtmp,''))
	for(jq in seq(1,qlen,by=2)){
		comtmp <- comloc[comloc > quotloc[jq] & comloc < quotloc[jq+1]]
		if(length(comtmp)) splitc[comtmp] <- 'ZZrealCommaZZ'
	}
	qtmp <- paste0(splitc,collapse='')
	thecmd <- unlist(strsplit(qtmp,'zzspacezz'))
}	
typtmp <- unlist(strsplit(thecmd,',') )
# get rid of empty items.
tmpidx <- which(typtmp =='')
if(length(tmpidx)) typtmp <- typtmp[-tmpidx]   # that just removes them
tmplen <- length(typtmp)
# need a space for times when the output of makeTYPE shows up in parsefoc
typinit <- 'cat ('
# format(x, dig=N) is basically sprintf('%k.N',x) where k is expanded as needed,
# just like sprintf() does 
 	percent <- grep('[%]', typtmp)
	if(length(percent) ){
# so skip the format items
# if there's no variable immediately following, this may lead to a blank "TYPE" command.
# make it a space char
		typtmp[percent] <- ' '
#		typtmp <- typtmp[-c(percent)]
	} 
# get rid of empty items
typtmp <- typtmp[which(nchar(typtmp)>0)]
#  find consecutive '\"' and merge them, must do before swapping \n for ! 
# But: they can be consecutive and still contain text! 
# hence the fix to the inner if also:  looping by pairs
# also need a comma after every closing \", which won't happen if there's
# no whitespace between \" and the next item. THus I add spaces everwhere
barw <- grep('\\\"', typtmp)
if (length(barw)>1){
# Hairy bug: the \" can interfere with encapsulated quotes in runXXX. fixed.
# there cannot be a  "\'" in any input string. 
	typtmp <- gsub("\\\"","\" ", typtmp)
#  strsplit on whitespace too
	typtmp <- unlist(strsplit(typtmp, '\\s{1,}',perl=TRUE ))
	barw <- grep('\\\"', typtmp)  # need to recount
	if(length(barw)){
		for(jw in seq(2,length(barw),by=2) ){
			if(barw[jw] -barw[jw-1] == 1) {
				typtmp[barw[jw]] <- paste0('   ',typtmp[barw[jw]],collapse='')
				} 
			}
		}
# while 'loop' here to merge strings
		newtmp <- NULL   #typtmp[1]
		idxm <- 1
		while(idxm <= length(typtmp))	{
#  grep(2,...) will find "2" and "12", so don't do that			
			wstart <- which(idxm == barw)
#  if the last element of typtmp is something like  ' \"foo\"' there is no following
# barw[wstart+1]
			if(length(wstart) && wstart < length(barw)) {
				newtmp <- c(newtmp, paste0(c(typtmp[barw[wstart]:barw[wstart+1]]), collapse=' ') )
				idxm <- barw[wstart+1] +1
			} else{
# not part of string, just pass it along				
				newtmp <- c(newtmp, typtmp[idxm])
				idxm <- idxm +1 
			}
		}	 #end of while	
# finally,
	typtmp <- newtmp	
} #end of if length(barw)
# look for all typtmp[j] for which grep('"') fails AND grep('!') passes.
barc <- grep('[a-z0-9]', typtmp)
bars <- grep('[!]', typtmp)
if(length(bars)) {
	barg <- setdiff(bars,barc)
	typtmp[barg] <- gsub('[!]','\"\n\"', typtmp[barg])
}
#  add the \n here 
#BUG: if this is the 'end' of the command, I do not want a CR/LF unless there was a '!' 
# typtmp <- c(typtmp, '" \n"')
 # clean up again
typtmp <- typtmp[nchar(typtmp)>0]
# intelligent fix, finally....put typinit here
typout <- paste0(c(typinit,typtmp[1]), collapse=' ')
# need commas after 1st item but NOT after penultimate (ultimate is ")"
if(length(typtmp) > 1) {
	typout <- paste0(c(typout ,typtmp[2:(length(typtmp))]), collapse = ' , ')
	}
typout <- paste0(typout, ')', collapse = ' ')
# replace commas
typout <- gsub('ZZrealCommaZZ',',', typout)
return(list(rcode = typout, newidx = theidx+1))
} #end of makeTYPE

# sending a 'return' doesn't  exit far enough, so just send a 'magic word'
makeQUIT<- function(theidx, thecmd, crunch ,strnum, fenv) {
	cmdstr <- 'quitquit'
	# ooops,  to avoid infinite loop, return a big value. as of this version,
	# not actually needed, as runRcode triggers on 'quitquit'
#	newidx <- max(retidx[,1] + 1)
	return( list(rcode = cmdstr, newidx  = theidx) )
}