R/compile.R

## compile.R - compile R functions
# Some of this was inspired by Tierney's compiler package.

MathOps = c("+", "-", "*", "/", "%/%", "^")
LogicOps = c("<", ">", "<=", ">=", "!=", "==", "!")

#??? Kill off
XXXX.assignHandler =
function(call, env, ir)
{
   args = call[-1]

   val = compile(args[[2]], env, ir)

          # CreateLocalVariable, with a reference in the
          # environment, and then CreateStore of the value.
   checkArgs(args, list(c('character', 'symbol'), 'ANY'), '<-')
     # XXX may not be a variable
   if(is.name(var))
      var <- as.character(args[1])
   
   val <- args[[2]]
   if (is.na(findVar(var, env))) {
        # Create new local store, TODO remove the type here and infer it
     type = getDataType(val, env)
     assign(var, createFunctionVariable(type, var, env, ir), envir = env) ## Todo fix type
     env$.types[[var]] = type
   }
   
   ref <- get(var, envir = env)

      # Now, create store. TODO: how does this work *without*
      # constants? Where is evaluation handled... probably not
      # here?
   createStore(ir, val, ref)
 }

getBasicType =
function(call)
{
   id =  as.character(call[[1]])
   switch(id,
            integer = Int32Type,
            string = ,
            character = StringType,
            numeric = DoubleType,
            logical = Int1Type,
            float = FloatType)
}

isPrimitiveConstructor =
function(call)
{
  if(is.call(call) && as.character(call[[1]])  %in% c("integer", "string", "character", "numeric", "logical", "float"))
    TRUE
  else
    FALSE
}

isSubsettingAssignment =
function(call)
{  
  length(call) > 1 &&
   is.call(tmp <- call[[2]]) &&
    as.character(tmp[[1]]) %in% c("[", "[[")
}

assignHandler = `compile.=` =   # `compile.<-` 
  # Second version here so I don't mess the other one up.
  #
  # XXX   This is now getting to long. Break it up and streamline.
  #
function(call, env, ir, ..., .useHandler = TRUE)
{

#   if(.useHandler && !is.na(i <- match(as.character(call[[1]]), names(env$.compilerHandlers)))) 
#       return(env$.compilerHandlers[[i]](call, env, ir, ...))
   
   args = call[-1]  # drop the = or <-
   stringLiteral = FALSE
   type = NULL


#XXX may not need to do this but maybe can compile the RHS as usual.
   if(isSubsettingAssignment(call) && is(ty <- getElementAssignmentContainerType(call[[2]], env), "STRSXPType")) {
     return(assignToSEXPElement(call[[2]], call[[3]], env, ir, type = ty))
   }
   
       # look at the RHS
   if(isLiteral(args[[2]])) {  #!! these are the args, not the call - so first element is not = or <-, but the LHS.
         # Do we need to eval() this. If it is really literal, no.
      tmp = val = eval(args[[2]])
      ctx = getContext(env$.module)

      lhs.type = getDataType(args[[1]], env)

      if( !is.character(tmp) && (is.null(lhs.type) || !sameType(DoubleType, lhs.type))  && env$.integerLiterals  && val == floor(val) ){
          tmp = val = as.integer(val)
      }
      if(!is.null(lhs.type))
          type = lhs.type
      else
           type = getDataType(I(val), env)
      
      val = makeConstant(ir, val, type, ctx)
      type = getDataType(val, env)
      if(is.character(tmp)) 
         stringLiteral = TRUE

   } else if(FALSE && isPrimitiveConstructor(args[[2]])) {  # what does skipping this break? any code where we have an integer() or whatever and use it as int *
       # so this is probably just defining a variable.
       # Use the type of the RHS to create the variable.
       # Perhaps just change compile.call to handle these functions
       # specially  and return a val.
       #
       #  Need to know if we need to create a SEXP or just the corresponding native type
       # i.e. an INTSXP or an int [n]

       type = getBasicType(args[[2]])
       val = NULL
    } else
      val = compile(args[[2]], env, ir)

   

   if(is.name(args[[1]])) {
         var = as.character(args[[1]])

      # We don't search parameters for the var name, since we don't
      # want to try to assign over a parameter name.
      #XXX  I think we do want to mimic that behaviour but understand which local variable that corresponds to.
         ref <- getVariable(var, env, ir, load = FALSE, search.params = FALSE)
         if(is.null(ref)) {

                 # No existing variable; detect type and create one.
          if(is.null(type)) 
            type = env$.localVarTypes[[var]]

          if(is.null(type)) 
             type = getDataType(var, env)

                 # didn't get a type from the variable, so look at the RHS.
          if(is.null(type)) 
             type = getDataType(val, env, call[[3]])

        
        if (is.null(type)) {
                   # Variable not found in env or global environments; get type via Rllvm
          if (is(val, "StoreInst")) {
            # This is from the val = compile(); probably from a
            # statement like: y <- 4L; x <- y. When args[[2]] is
            # compiled above, getVariable returns an object of class
            # StoreInst. We ignore the current val, and instead query
            # the type from the variable.
            type = getDataType(args[[2]], env)
          }

          if(is(val, "Value"))
            type = getDataType(val, env)
      }
          #XXXX Merge with compile.character
         if(stringLiteral) {  # isStringType(type)) 
           gvar = createGlobalVariable(sprintf(".%s", var), env$.module, type, val, TRUE, PrivateLinkage)
           val = getGetElementPtr(gvar, ctx = ctx)
           type = StringType
         }

         assign(var, ref <- createFunctionVariable(type, var, env, ir), envir = env) ## Todo fix type and put into env$.types
         env$.types[[var]] = type
       }
   } else {

      expr = args[[1]]

         # XXX  have to be a lot more general here, but okay to be simple for now (Apr 26 2013).
      if(is(ty <- getElementAssignmentContainerType(expr, env), "SEXPType")
          && is.null(expr <- assignToSEXPElement(expr, val, env, ir, ty)))
            return(val)

      #XXXX What does removing load = FALSE affect. Find examples of where this breaks matters.
      # fuseLoops?
        ref = compile(expr, env, ir, ..., load = FALSE)
   }

   if(!is.null(val)) {
#browser()
      if(!sameType(getType(val), getElementType(getType(ref)))) {
#XXX
#cat("fix this cast\n")
#         val = Rllvm::createCast(ir, "SIToFP", val, getElementType(getType(ref)))
	  val = createCast(env, ir, getElementType(getType(ref)), getType(val), val)
      }
        
      store = ir$createStore(val, ref)
      if(!is.null(tmp <- attr(val, "zeroBasedCounting"))) {
         attr(ref, "zeroBasedCounting") = tmp
         if(is.name(call[[2]]))
           env$.zeroBased[as.character(call[[2]])] = TRUE
      }
   }

   setVarType(env, args[[1]], val)


   val  # return value - I (Vince) changed this to val from ans (the createStore() return).
        # There seems to be very little we can do with the object of class
        # StoreInst. Note: this seems to be the way it's done here
        # too: http://llvm.org/docs/tutorial/LangImpl7.html
}


setVarType =
    #
    # This adds the type to 
    #
function(env, lhs, rhs, meta = FALSE)
{
   if(is.name(lhs)) {
       lhs = as.character(lhs)
       ty = getType(rhs)
       if(!is.null(tmp <- attr(rhs, "RType")))
           ty = get(tmp, globalenv(), inherits = TRUE)
       env$.localVarTypes[[ lhs ]] = ty

       if(meta)
           setMetadata(env$.module, lhs, class(ty))
   }
}

getElementAssignmentContainerType =
  #
  #  called from just above for x[.....]
  #
function(call, env)
{
   if(is.name(call))
      var = call
   else
      var = call[[2]]

   getDataType(var, env)
}

createFunctionVariable =
  #
  # create a local variable, but put it in the entry block of the function
  # rather than in the current block.  The idea is that we don't
  # want to allocate variables in a loop and so end up repeating that instruction.
  #
function(type, id, env, ir)
{
  cur = getInsertBlock(ir)
  setInsertPoint(ir, env$.entryBlock)
  on.exit(setInsertPoint(ir, cur))
  createLocalVariable(ir, type, id, TRUE)  # to insert before terminator.
}


compile <-
function(e, env, ir, ..., fun = env$.fun, name = getName(fun), .targetType = NULL, .useHandler = TRUE)
{
   if(is(e, "RC++Reference")) # for already compiled objects, i.e. Value.
      return(e)

   if(.useHandler && is.call(e)) {
      tmp = dispatchCompilerHandlers(e, env$.compilerHandlers, env, ir, ...)
      if(!is.null(tmp))
        return(tmp)
   }
   
    # This doesn't always seem to dispatch on <-, i.e. in the code generated by rewriteSApply(). That is just a call.
  if(is.call(e) && as.character(e[[1]]) %in% c("<-", "=", "<<-"))
    `compile.=`(e, env, ir, ...)
  else
     UseMethod("compile")
}

compile.character =
  # See varargs.R which is a call to printf() with a constant format
function(e, env, ir, ..., .targetType = NULL)
{
   ctxt = getContext(env$.module)
   ty = arrayType(Int8Type, nchar(e))
   strVar = createGlobalVariable(".tmpString", env$.module, val = e, constant = TRUE, linkage = PrivateLinkage)
   return(getGetElementPtr(strVar, ctx = ctxt))
   
   ptrVar = createFunctionVariable(StringType, ".tmpStringPtr", env, ir) 
   setInitializer(ptrVar, strVar)
   createLoad(ir, ptrVar)
}


`compile.{` = compileExpressions =
  #
  # This compiles a group of expressions.
  # It handles moving from block to block with a block for
  # each expression. <Is this still true? or is it more sophisticated about blocks now?>
function(exprs, env, ir, fun = env$.fun, name = getName(fun), .targetType = NULL, ..., afterBlock = NULL, nextBlock = NULL)
{
  #insertReturn(exprs)
   given.afterBlock = !missing(afterBlock)
   
  if(as.character(exprs[[1]]) != "{")
      compile(exprs, env, ir, fun = fun, name = name)
  else {
     oldVals = env$.remainingExpressions
     on.exit(env$.remainingExpressions <- oldVals)
      
    exprs = exprs[-1]

    idx = seq_along(exprs)

    for (i in idx) {
        cur = ir$getInsertBlock()
        if(length(getTerminator(cur))) 
            break

        env$.remainingExpressions = exprs[ - (1:i) ]

        pop = FALSE
        if(is.call(exprs[[i]]) && (is(exprs[[i]], "if") || is(exprs[[i]], "for") || is(exprs[[i]], "while")) ) {

            if(i < length(idx))  # length(afterBlock) == 0 &&
                afterBlock = if(length(afterBlock)) afterBlock else Block(env$.fun, sprintf("after.%s", deparse(exprs[[i]])))
            else { 
                afterBlock = nextBlock
               if(is(exprs[[i]], "if")) {
                    # THIS SEEMS ugly. It handles the case of a while() { } where the last expression in the {}
                    #  is an if() expr with no else.
                    #  We need to return to the while() condition, not jump to nextBlock. 
                   if(length(env$.loopStack) && env$.loopStack[1] == "while") {
                      tmp = list(...)$nextIterBlock
                      if(!is.null(tmp)) 
                         afterBlock = tmp
                      else
                         stop("probably something wrong!!!")
                   }
               }
            }

            pop = TRUE                
            #pushNextBlock(env, afterBlock)
        }
        
        compile(exprs[[i]], env, ir, fun = fun, name = name, nextBlock = afterBlock, ...)

        if(pop) {
             # Do we setInsertBlock() for this next block?
            #popNextBlock(env)  # popping the wrong thing!
            b = afterBlock
            afterBlock = NULL
            if(!is.null(b))
                setInsertBlock(ir, b)
        }
#        # One approach to handling the lack of an explicit return is to
#        # create the return instruction ourselves, or to add a return
#        # around the call before we compile. The advantage of the latter
#        # is that any code generation that we write to ensure the correct
#        # return type on the expressions e.g. return(x + 1) will do the correct
#        # thing on the x + 1 part, not later converting the value.
#      if(i == idx[length(idx)] && !is.call(exprs[[i]]) || exprs[[i]][[1]] != as.name('return')) { # last one
#        ir$createReturn(val)
#      } else
#        val
      }
  }
}


compile.name <-
function(e, env, ir, ..., fun = env$.fun, name = getName(fun), .targetType = NULL)  
{
   getVariable(e, env, ir, searchR = TRUE, ...)
}

compile.integer <-
function(e, env, ir, ..., fun = env$.fun, name = getName(fun), .targetType = NULL)  
{
   if(length(e) == 1)
      createIntegerConstant(e)
   else
     stop("not compiling integer vector for now")
}

compile.logical <-
function(e, env, ir, ..., fun = env$.fun, name = getName(fun), .targetType = NULL)  
{
 # compile(as(e, "integer"), env, ir, ..., fun = fun, name = name)
  createLogicalConstant(e)
}


compile.numeric <-
function(e, env, ir, ..., fun = env$.fun, name = getName(fun), .targetType = NULL)  
{

  if(length(e) == 1) {
     if(length(.targetType))
       createConstant(val = e, type = .targetType)    
     else
       createDoubleConstant(e)
  }
  else
     stop("not compiling numeric vector for now")
}


compile.Value <-
  # This is just an LLVM value
function(e, env, ir, ..., fun = env$.fun, name = getName(fun), .targetType = NULL)  
  e

compile.default <-
function(e, env, ir, ..., fun = env$.fun, name = getName(fun), .targetType = NULL)
{
    if(is(e, "("))
       return(compile(e[[2]], env, ir, .targetType = .targetType))
    
    if(is(e, "Value") || is(e, "Instruction"))
      return(e)
    

    if (is.call(e)) {
      dispatchCompilerHandlers(e, env$.compilerHandlers, env, ir, ...)
    } else if (is.symbol(e)) {
      var <- as.character(e)
      return(var) ## TODO: lookup here, or in OP function?
    } else if(is.character(e)) 
       return(compile.character(e, env, ir, ...))
    else
      stop("can't compile objects of class ", class(e))
}

dispatchCompilerHandlers =
    #XXX Dispatch across lists.
function(e, handlers, env, ir, ...)
{
           # Recursively compile arguments
    call.op <- findCall(e[[1]], env$.compilerHandlers)
    if(is.null(call.op))
        return(NULL)
      
    if(is.list(call.op)) {
        for(f in call.op) {
            tmp = f(e, env, ir, ...)
            if(!is.null(tmp))
                return(tmp)
        }
    } else {
         if (typeof(call.op) != "closure" && is.na(call.op)) 
           call.op = findCall("call", env$.compilerHandlers)

         if(!is.list(call.op) && !is.function(call.op) && is.na(call.op))
            return(NULL)

          # XXX Dispatch across list here if we get back a list.
         call.op(e, env, ir, ...)
     }
}    


findGlobals=
function(fun, merge = FALSE, ignoreDefaultArgs = TRUE)
{
  ans = codetools::findGlobals(fun, merge)
  if(!merge && ignoreDefaultArgs) {
     formals(fun) = lapply(formals(fun), function(x) NULL)
     ans$functions = codetools::findGlobals(fun, FALSE)$functions
  }
  ans
}


addArgSEXPTypeMetadata =
function(className, id, module)
{
  name = sprintf("%s.SEXPType", id)
  setMetadata(module, name, list("SEXPType", className))
}


addSEXPTypeMetadata =
function(module, argTypes)
{
  k = sapply(argTypes, class)
  i = (k != "SEXPType")
  if(any(i)) 
      mapply(addArgSEXPTypeMetadata,  k[i], names(argTypes)[i], MoreArgs = list(module = module))
  
  any(i)
}




compileFunction <-
function(fun, returnType, types = list(), module = Module(name), name = NULL,
         compiler = makeCompileEnv(),
         NAs = FALSE,
         asFunction = FALSE, asList = FALSE,
         optimize = TRUE, ...,
         .functionInfo = list(...),
         .routineInfo = list(),
         .compilerHandlers = getCompilerHandlers(),
         .globals = getGlobals(fun, names(.CallableRFunctions), .ignoreDefaultArgs, .assert = .assert, .debug = .debug), #  would like to avoid processing default arguments.
                                 # findGlobals(fun, merge = FALSE, .ignoreDefaultArgs), 
         .insertReturn = !identical(returnType, VoidType),
         .builtInRoutines = getBuiltInRoutines(),
         .constants = getConstants(),
         .vectorize = character(), .execEngine = NULL,
         structInfo = list(), .ignoreDefaultArgs = TRUE,
         .useFloat = FALSE, .zeroBased = logical(),
         .localVarTypes = list(), .fixIfAssign = TRUE,
         .CallableRFunctions = list(), 
         .RGlobalVariables = character(),
         .debug = TRUE, .assert = TRUE, .addSymbolMetaData = TRUE,
         .readOnly = constInputs(fun),
         .integerLiterals = TRUE,
         .loadExternalRoutines = TRUE
         )  # .duplicateParams = TRUE
{
   if(missing(name))
      name = deparse(substitute(fun))

   if(is.logical(.assert))
      .assert = if(.assert) ".assert" else character()

   if(!missing(types) && !is.list(types))
      types = structure(list(types), names = names(formals(fun))[1])

   if(is.logical(.execEngine) && .execEngine)
      .execEngine = ExecutionEngine(module)
      
   if(.fixIfAssign)
     fun = fixIfAssign(fun)
  
  ftype <- typeof(fun)
  if (ftype == "closure") {

       #if there is no type information but the author put the type information on the function itself, use that.
     .typeInfo = attr(fun, "llvmTypes")
     if( (missing(returnType) || missing(types)) && !is.null(.typeInfo)) {
       if(missing(types))
          types = .typeInfo$parms
       if(missing(returnType))
          returnType = .typeInfo$returnType
     }
     
    args <- formals(fun) # for checking against types; TODO

    if(length(types) == 0 && length(args) > 0) {
        types = getTypeInfo(fun)
        returnType = types[[1]]
        types =  types[[2]]
    }

    
    if(length(args)  > length(types)) {
       stop("need to specify the types for all of the arguments for the ", name, " function")
    } else if(length(types) > length(args))
       warning("more types specified than parameters for the new routine")

    if(length(names(types)) == 0)
      names(types) = names(args)
    
    # Find the name of the function if not provided
    if (is.null(name))
      name <- deparse(substitute(fun))

      # See if we have some SEXP types for which we may need to the length.
      # This might go as we can call Rf_length().  nrow()
    rVecTypes = sapply(types, isRVectorType)
    if(any(rVecTypes)) {
       lengthVars = sprintf("_%s_length", names(types)[rVecTypes])
       types[lengthVars] = replicate(length(lengthVars), Int32Type)
    } else
       lengthVars = character()

     
    # Grab types, including return. Set up Function, block, and params.
    isDimensionedType = sapply(types, is, "DimensionedType")

       # The dimTypes need to have names or we need to be able to map them back to the particular arguments. They do!
    if(any(isDimensionedType)) {
       dimTypes = types[isDimensionedType]
       types[isDimensionedType] = replicate(sum(isDimensionedType), SEXPType) #XXXX Not necessarily a SEXPType anymore.
    } else
       dimTypes = list()
     
    argTypes <- types
    llvm.fun <- Function(name, returnType, argTypes, module)




    if(any( i <- sapply(argTypes, is, "SEXPType")))
       addSEXPTypeMetadata(module, argTypes[i])

      # if we picked up any .R() expressions in the function, add the resulting types
      # to the .CallableRFunctions.
    if(length(.globals$skippedExpressions) &&
                (i <- names(.globals$skippedExpressions) == ".R")) {
        
        z = structure(lapply(.globals$skippedExpressions[i],
                               function(x)
                                  eval(x[[3]])),
                       names = sapply(.globals$skippedExpressions[i], function(x) as.character(x[[2]][[1]]))) # have to deal with obj$f() in x[[2]][1]]
        .CallableRFunctions = c(.CallableRFunctions, z)
    }
        
    
    if(any(.globals$functions %in% names(.builtInRoutines))) {
       i = match(.globals$functions, names(.builtInRoutines), 0)
       .routineInfo = .builtInRoutines[ i ]
       .globals$functions = .globals$functions[i == 0]
    }

    if(length(.CallableRFunctions)) {
       i = match(names(.CallableRFunctions), .globals$functions, 0)
       .globals$functions = .globals$functions[ i == 0]
    }
     

    if(name %in% .globals$functions && !(name %in% names(.functionInfo)))
       .functionInfo[[name]] = list(returnType = returnType, params = types)


    block <- Block(llvm.fun, "entry")
    params <- getParameters(llvm.fun)  # TODO need to load these into nenv
    ir <- IRBuilder(block)


#XXX temporary to see if we should declare and load these individually when we encounter them
# Really need the user to specify the DLL not just the name in case of ambiguities, so often easier to do this separately.
    if(.loadExternalRoutines && length(.routineInfo))
        processExternalRoutines(module, .funcs = .routineInfo, .addMetaData = .addSymbolMetaData)

    if(length(.globals$functions)) 
       compileCalledFuncs(.globals, module, .functionInfo, optimize = FALSE)

    if(length(.globals$variables)) {

       .globals$variables = setdiff(.globals$variables, c(ExcludeGlobalVariables, .RGlobalVariables))


       i = .globals$variables %in% names(module)
       if(any(i)) {
              #XXX should check that they are actual variables and not functions.
          .globals$variables = .globals$variables[!i]
       }

#XXX     nenv & ir are not yet defined. What do we want here?
       compileGlobalVariables(.globals$variables, module, nenv, ir)
    }
    

    compiler = compiler(.compilerHandlers, NAs, .builtInRoutines, .functionInfo, structInfo, .zeroBased,
                        .integerLiterals, .useFloat, .debug, .assert, .addSymbolMetaData, .CallableRFunctions,
                        compiler = compiler)
    nenv = compiler
    nenv$.fun = llvm.fun
    nenv$.params = params
    nenv$.types = types
    nenv$.returnType = returnType
    nenv$.entryBlock = block
    nenv$.funName = name  # name of the routine being compiled.     

    nenv$.ExecEngine = .execEngine
    nenv$.module = module

    nenv$.localVarTypes = .localVarTypes
    nenv$.Constants = .constants
    nenv$.dimensionedTypes = dimTypes     
     

    if(.insertReturn)
       fun = insertReturn(fun, env = nenv)        
    fbody <- body(fun)

   last = fbody[[length(fbody)]]
   if(sameType(VoidType, returnType) && is.call(last) && as.character(last[[1]]) == "if" && length(last) == 3)
      fbody[[ length(fbody) + 1L ]] = quote(return( ))
   
     
    nenv$.Rfun = fun

    if(length(.readOnly)) {
       k = .readOnly  
#       mayMutate = setdiff(names(formals(fun)), k)
       if(length(k)) {
          idx =  match(k, names(argTypes))
           if(any(is.na(idx)))
               stop("mismatch in parameter names and types")
          mapply(function(type, arg) {
                      if(isPointerType(type))
                         setParamAttributes(arg, LLVMAttributes["ReadOnly"]) # 28L, TRUE) 
                   }, argTypes[k], getFunctionArgs(llvm.fun)[idx])

       }
   }
     
    compileExpressions(fbody, nenv, ir, llvm.fun, name)

       # the second condition occurs when we have an if() with no else as the last expression
       # in the function. We add a return() so the compiler handlers work, and then we don't
       # add the return here.
       # Could also check for a terminator with:  getTerminator(getInsertBlock(ir))
    if(identical(returnType, VoidType) && !identical(fbody[[length(fbody)]], quote(return())))
       ir$createReturn()


     if(length(nenv$.SetCallFuns)) {
           # This is for the callbacks to R. We have to get the expressions for the callback to the module.
           # We have a way to set them, another way to create them (although this doesn't handle complex arguments)
           # We might eliminate lots of these and just go with  serializing the expression and deserializing
           # when it is needed.
         lapply(nenv$.SetCallFuns,
                function(x)
                  compileSetCall(x$var, x$name, module))

         lapply(nenv$.SetCallFuns,
                function(x)         
                  compileCreateCallRoutine(nenv, ir, x$call, sprintf("create_%s", x$var), x$var))

         if(!is.null(nenv$.ExecEngine))   # don't use .ee as this field in the compiler(env) may have been set as a side effect of compile()
             lapply(nenv$.SetCallFuns,
                function(x)  {
                   .llvm( module[[x$name]],  x$call, .ee = nenv$.ExecEngine) 
                })

          lapply(nenv$.SetCallFuns,
                 function(x)
                   createDeserializeCall(nenv, ir, x$call, x$deserializeCallFun))

     }

    ## This may ungracefully cause R to exit, but it's still
    ## preferably to the crash Optimize() on an unverified module
    ## creates.
    if(optimize && verifyModule(module))
       Optimize(module, execEngine = .execEngine)

     
    if(asFunction) 
       makeFunction(fun, llvm.fun, .vectorize = .vectorize, .execEngine = .execEngine, .lengthVars = lengthVars)
    else if (asList)
       list(mod = module, fun = llvm.fun, env = nenv)
    else
       llvm.fun
  } else if(!isIntrinsic(name))
     stop("compileFunction can currently only handle closures. Failing on ", name)
}


compilerStartFunction =
function(env, ir, name, retType, paramTypes = list())
{
  if(name %in% names(env$.module))
      f = env$.module[[name]]
  else
      f = Function(name, retType, paramTypes, module = env$.module)
  
  b = Block(f, "createCallEntry")
  ir$setInsertBlock(b)
  env$.localVarTypes = list()
  env$.returnType = if(missing(retType)) getReturnType(f)  else retType  # want the user to specify this to be able to distinguish different classes of SEXP types.
  env$.fun = f
  
  env$.entryBlock = b  # vital to set this so that the local variables go into this block.
                       # otherwise go into the entry block of the original function being compiled
                       # Need to generalize, e.g. add a method to the compiler to create a new Function

  TRUE

}


Rf_routines = c("length")
RewrittenRoutineNames = c("numeric", "integer", "logical", "character", "list", "double")

mapRoutineName =
function(name)
{
  w = name %in% Rf_routines
  name[w] = sprintf("Rf_%s", name[w])
  name
}

makeCompileEnv =
function()
{
   nenv <- new.env( parent = emptyenv())
   nenv$.continueBlock = list()
   nenv$.nextBlock = list()

   nenv$declFunction = function(name) 
        declareFunction(nenv$.builtInRoutines[[name]], name, nenv$.module)

   nenv$.funCalls = list()
   nenv$addCallInfo = function(name, retType = NULL, types = NULL) {
        i = length(nenv$.funCalls)
        nenv$.funCalls[[i + 1L]] <<- list(name, returnType = retType, params = types)
        names(nenv$.funCalls)[i + 1L] <<- name
        TRUE
   }

   nenv
}


isRVectorType =
  # This is transient and intended to identify if the
  # type corresponds to an R vector, rather than an R scalar.
  # For now this is an arrayType(, 0), i.e. with zero elements
function(type)
{
  is(type, "ArrayType") && getNumElements(type) == 0
}


processExternalRoutines =
function(mod, ..., .funcs = list(...), .lookup = TRUE, .addMetaData = TRUE)
{
  names(.funcs) = mapRoutineName(names(.funcs))

  w = !duplicated(names(.funcs))
  .funcs = .funcs[w]

  .funcs = .funcs[setdiff(names(.funcs) , RewrittenRoutineNames)]
  
  ans = mapply(declareFunction, .funcs, names(.funcs), MoreArgs = list(mod))

  if(.lookup) {
    syms = lapply(names(.funcs),
                    function(x) {
                       info = getNativeSymbolInfo(x)
                       if(.addMetaData) {
                           pkg = info$package
                             # do we need the path? yes if it is not part of a package.
                             # Some of these will be "wrong", i.e. too specific
                             # e.g. finding printf in RLLVMCompile since libc is linked to RLLVMCompile.so.
                             # But that is R's problem, i.e. should be fixed there or we should specify
                             # where it is in the registration information in this package for known
                             # external routines
                           setMetadata(mod, sprintf("symbolInfo.%s", info$name),
                                            list("package", pkg[["name"]], "path", pkg[["path"]]))
                       }
                       info$address
                    })
    llvmAddSymbol(.syms = structure( syms, names = names(.funcs)))
  }
  ans
}


getSymbolInfoMetadata =
function(module, id = character())
{
  if(length(id) == 0) {
     # get all the metadata
     # get the names of all metadata, find those named symbolInfo\\..* and then call this function in an lapply()      
     all = getMetadata(module)
     i = grepl("^symbolInfo\\.", names(all))
     return(lapply(all[i], function(node) getSymbolInfoMetadata(module, node)))
  }

  if(length(id) > 1) {
      ans = lapply(id, function(x) getSymbolInfoMetadata(module, x))
      if(is.character(id))
          names(ans) = i
      return(ans)
  }


  md = if(is.character(id))
          md = getMetadata(module, sprintf("symbolInfo.%s", id))
       else
          id
  
  if(is.null(md))
      stop("no symbolInfo metadata for ", id)

  a = md[[1]]
  vals = names(a[])
  i = seq(1, length(vals)-1, by = 2)
  structure(vals[i+1], names = vals[i])
}


getConstants =
function(..., .defaults = ConstantInfo)
{
  vals = list(...)
  .defaults[names(vals)] = vals
  .defaults
}

getBuiltInRoutines =
  #
  # See FunctionTypeInfo also 
  #
function(..., env = NULL, useFloat = FALSE)
{

  if(!is.null(env) && exists(".builtInRoutines", env))
    return(get(".builtInRoutines", env))

  
  SEXPType = getSEXPType()

  
  basic = if(useFloat)
             list(exp = list(FloatType, FloatType),
                  log = list(FloatType, FloatType),       
                  pow = list(FloatType, FloatType, FloatType),
                  sqrt = list(FloatType, FloatType))    
          else
             list(exp = list(DoubleType, DoubleType),
                  log = list(DoubleType, DoubleType),       
                  pow = list(DoubleType, DoubleType, DoubleType),
                  sqrt = list(DoubleType, DoubleType))      


 ans =  list(
       length = list(Int32Type, getSEXPType("REAL")),
       Rf_length = list(Int32Type, getSEXPType("REAL")),        # same as length. Should rewrite name length to Rf_length.
       INTEGER = list(Int32PtrType, getSEXPType("INT")),
       REAL = list(DoublePtrType, getSEXPType("REAL")),
       Rf_allocVector = list(SEXPType, Int32Type, Int32Type),
       Rf_protect = list(VoidType, SEXPType),
       Rf_unprotect = list(VoidType, Int32Type),
       Rf_unprotect_ptr = list(VoidType, SEXPType),     
       R_PreserveObject = list(VoidType, SEXPType),
       R_ReleaseObject = list(VoidType, SEXPType),     
       Rf_mkChar = list(getSEXPType("CHAR"), StringType),
       Rf_PrintValue = list(VoidType, SEXPType),
       STRING_ELT = list(getSEXPType("CHAR"), getSEXPType("STR"), Int32Type), # long vectors?
       SET_STRING_ELT = list(SEXPType, getSEXPType("STR"), Int32Type, getSEXPType("CHAR")), # XXX may need different type for the index for long vector support.       
       SET_VECTOR_ELT = list(SEXPType, getSEXPType("VEC"), Int32Type, SEXPType), # XXX may need different type for the index for long vector support.
       VECTOR_ELT = list(SEXPType, getSEXPType("VEC"), Int32Type),
       SETCAR = list(SEXPType, SEXPType, SEXPType),
       SETCDR = list(SEXPType, SEXPType, SEXPType),
       SET_TAG = list(VoidType, SEXPType, SEXPType),
       Rf_install = list(SEXPType, StringType),
       CDR = list(SEXPType, SEXPType),
     
     
       Rf_nrows = list(Int32Type, SEXPType),
       Rf_ncols = list(Int32Type, SEXPType),
     
       numeric = list(REALSXPType, Int32Type),
       integer = list(INTSXPType, Int32Type),
       logical = list(LGLSXPType, Int32Type),
       character = list(LGLSXPType, Int32Type),

       Rf_ScalarInteger = list(SEXPType, Int32Type),
       Rf_ScalarReal = list(SEXPType, DoubleType),
       Rf_ScalarLogical = list(SEXPType, Int32Type),
       Rf_mkString = list(SEXPType, StringType),

       Rprintf = list(VoidType, StringType, "..." = TRUE),
       printf = list(Int32Type, StringType, "..." = TRUE),

       Rf_eval = list(SEXPType, SEXPType, SEXPType),
       Rf_asInteger = list(Int32Type, SEXPType),
     
#XXX the following are not correct and need some thinking.       
       nrow = list(Int32Type, c("matrix", "data.frame")),
       ncol = list(Int32Type, c("matrix", "data.frame")),
       dim = list(quote(matrix(Int32Type, 2)), c("matrix", "data.frame")),

       strdup = list(StringType, StringType),
       R_CHAR = list(StringType, SEXPType),

       R_loadRObjectFromString = list(SEXPType, StringType),

       Rf_error = list(VoidType, StringType, "..." = TRUE),
       R_raiseStructuredError = list(VoidType, StringType, pointerType(StringType), Int32Type),
       R_va_raiseStructuredError = list(VoidType, StringType, Int32Type, "..." = TRUE) ,

       memcpy = list(pointerType(VoidType), pointerType(VoidType), pointerType(VoidType), Int32Type),

       strcmp = list(Int32Type, StringType, StringType),
       puts = list(Int32Type, StringType),
       printf = list(Int32Type, StringType, "..." = TRUE)
     )

  ans[names(basic)] = basic

  others = list(...)
  ans[names(others)] = others
  
  ans
}

# Should this just be names of CompilerHandlers? No, need more than those.
# Although we could add these items to CompilerHandlers and have them map
# to the existing handlers, e.g., sapply = ...
# But this is not a good idea. printf is just a regular call.
ExcludeCompileFuncs = c("{", "sqrt", "return", MathOps,
                        LogicOps, "||", "&&", # add more here &, |
                        ":", "=", "<-", "<<-", "[<-", '[', "[[", "for", "if", "while",
                        "repeat", "(", "!", "^", "$", "$<-",
                        "sapply", "lapply",
                        "printf",
                        "break", "next",
                        ".R", ".typeInfo", ".signature", ".varDecl", ".pragma",
                        ".assert", ".debug",
                        "stop", "warning"
    
                       )  # for now


compileCalledFuncs =
  #
  #  The .functionInfo
  #
function(globalInfo, mod, .functionInfo = list(), ...)
{
  funs = setdiff(globalInfo$functions, ExcludeCompileFuncs)

     # Skip the ones we already have in the module.
     # Possibly have different types!
  funs = funs[!(funs %in% names(getModuleFunctions(mod))) ]
  funs = funs[!(sapply(funs, isIntrinsic))]  
  
  funs = structure(lapply(funs, get), names = funs)


  lapply(names(funs),
           function(id) {
             if(id %in% names(.functionInfo)) {
               types = .functionInfo[[id]]
               compileFunction(funs[[id]],
                               types$returnType,
                               types = types$params,
                               module = mod, name = id,
                               ...
                               )
             } else
               compileFunction(funs[[id]], module = mod, name = id)
           })
}


makeFunction =
function(fun, compiledFun, .vectorize = character(),  .execEngine = NULL, .lengthVars = character())
{
  e = new.env()
  e$.fun = compiledFun
  e$.irCode = showModule(compiledFun, TRUE)
  
  if(is.null(.execEngine))  
     .execEngine = ExecutionEngine(as(compiledFun, "Module"))  # evaluate this now or quote it.
  
  args = c(as.name('.fun'), lapply(names(formals(fun)), as.name), as.name("..."))
  k = call('.llvm')
  k[2:(length(args) + 1)] = args

  if(length(.lengthVars))
     k[.lengthVars] = lapply(gsub("_(.*)_length", "\\1", .lengthVars),
                             function(id)
                               substitute(length(x), list(x = as.name(id))))
  
  k[[".ee"]] = as.name('.ee')
  
  formals(fun)$.ee = .execEngine
  formals(fun) = c(formals(fun), formals(function(...){}))
  body(fun) = k
  environment(fun) = e
  fun
}


isMutableRObject =
function(var)
{
   where = sapply(var, function(x) find(x)[1])
   if(any(is.na(where)))
     stop("Cannot find variable ", if(sum(is.na(where) > 1)) "s", " ", paste(var[is.na(where)], collapse = ", "))

   var %in% ".GlobalEnv"
}

compileGlobalVariables =
function(varNames, mod, env, ir,
          mutable = sapply(varNames, isMutableRObject))
{
   ctx = getGlobalContext()
   sapply(varNames[!mutable],
           function(var) {
             val = createConstant(ir, get(var), context = ctx)   
             createGlobalVariable(var, val = val, mod, constant = TRUE)
           })

#
   #XX create variables for the mutable ones.
}


getTypeInfo =
function(fun)
{
  b = body(fun)
  if(is(b, "{"))
      e = b[[2]]
  else
      e = b

  if(is.call(e) && as.character(e[[1]]) == ".typeInfo")
      eval(e) # , globalenv())
  else
      stop("no .typeInfo() call")
}

.typeInfo = .signature =
    # We might process this as a unlisted collection and regroup them into list(returnType = ,  params = )
function(..., .types = list(...))
{
  i = sapply(.types, function(x) is(x, "externalptr") || is(x, "Type"))

  if(all(i))
      .types = list(.types[[1]], .types[-1])
  else
      .types
}

.varDecl =
function(..., .types = list(...))
{
  .types
}
duncantl/RLLVMCompile documentation built on May 15, 2019, 5:31 p.m.