#' Optimizer: Memory Allocation.
#'
#' Performs one memory allocation pass.
#' Carefully examine the results after running this function!
#'
#' @param code A list of character vectors with the code to optimize.
#'
#' @examples
#' code <- paste(
#' "v <- NULL",
#' "for (i in 1:5) {",
#' " v[i] <- i^2",
#' "}",
#' sep = "\n"
#' )
#' cat(opt_memory_alloc(list(code))$codes[[1]])
#' @export
opt_memory_alloc <- function(code) {
res <- list()
res$codes <- lapply(code, ma_one_file)
return(res)
}
# Executes memory allocation on one text of code.
#
# @param code A character vector with code to optimize.
#
ma_one_file <- function(code) {
parsed_dataset <- parse_text(code)
flatten_pd <- flatten_leaves(parsed_dataset)
if(nrow(parsed_dataset) == 0) {
return (deparse_data(flatten_pd))
}
result_flatten_pd <- ma_one_flatten_pd(parsed_dataset, flatten_pd)
return(deparse_data(result_flatten_pd))
}
# Restricts the operations of the optimizer to the body of a single `FOR` loop in code snippets containing nested `FOR` loops
#
# @param body_fpd A parsed data.frame containing the `body` of a `FOR` loop
#
restrict_body_to_one_loop <- function(body_fpd) {
list_of_fors <- body_fpd[body_fpd$token == "FOR", "pos_id"]
if(length(list_of_fors) > 1) {
body_fpd <- body_fpd[body_fpd$pos_id < list_of_fors[[2]], ]
}
body_fpd
}
# Restricts the operations of the optimizer to the condition of a single `FOR` loop in code snippets containing nested `FOR` loops
#
# @param cond_fpd A parsed data.frame containing the `condition` of a `FOR` loop
#
restrict_condition_to_one_loop <- function(cond_fpd) {
list_of_conds <- cond_fpd[cond_fpd$token == "forcond", "pos_id"]
if(length(list_of_conds) > 1) {
cond_fpd <- cond_fpd[cond_fpd$pos_id < list_of_conds[[2]], ]
}
cond_fpd
}
# Returns the name of the vector being assigned in the given expression. Returns FALSE if no vector is being assigned.
#
# @param node_id A numeral indicating the node ID of the expression from which the vector's name is to be extracted
# @param fpd A flattened parsed data.frame of the original code snippet given
#
extract_vector_name <- function(fpd, node_id) {
assignment_pd <- get_children(fpd, node_id)
assignment_index <- which(assignment_pd$token %in% assigns)
if(length(assignment_index) > 0) {
assignment_type <- assignment_pd[assignment_index, "token"]
if(assignment_type == "EQ_ASSIGN" || assignment_type == "LEFT_ASSIGN") {
vector_name <- assignment_pd[(assignment_index - 1), "text"]
} else {
vector_name <- assignment_pd[(assignment_index + 1), "text"]
}
return (vector_name)
} else {
return (FALSE)
}
}
# Checks whether a number is explicitly mentioned in the `condition` of a `FOR` loop.
# Returns the number mentioned if present, else returns `FALSE`
#
# @param cond_fpd A parsed data.frame containing the `condition` of a `FOR` loop
# @param index_name A string representing the name of the iterator of the `FOR` loop
# @param in_pos A numeric indicating the `pos_id` of `in` from the condition of a `FOR` loop
#
check_forcond_num_declaration <- function(cond_fpd, index_name, in_pos) {
forcond_number_flag <- FALSE
if("':'" %in% cond_fpd[cond_fpd$pos_id > in_pos, "token"]) {
colon_pos <- which(cond_fpd$token == "':'", arr.ind = TRUE)
colon_pos_id <- cond_fpd[colon_pos, "pos_id"]
if("NUM_CONST" %in% cond_fpd[cond_fpd$pos_id > colon_pos_id, "token"]) {
forcond_number_flag <- TRUE
cond_fpd <- cond_fpd[cond_fpd$pos_id > colon_pos_id, ]
memory_alloc_number <- as.integer(cond_fpd[cond_fpd$token == "NUM_CONST", "text"])
}
}
else if("SYMBOL_FUNCTION_CALL" %in% cond_fpd[cond_fpd$pos_id > in_pos &
cond_fpd$text == "seq_len", "token"]) {
seqLen_pos_id <- cond_fpd[cond_fpd$pos_id > in_pos &
cond_fpd$token == "SYMBOL_FUNCTION_CALL" &
cond_fpd$text == "seq_len", "pos_id"]
if("NUM_CONST" %in% cond_fpd[cond_fpd$pos_id > seqLen_pos_id, "token"]) {
forcond_number_flag <- TRUE
cond_fpd <- cond_fpd[cond_fpd$pos_id > seqLen_pos_id, ]
memory_alloc_number <- as.integer(cond_fpd[cond_fpd$token == "NUM_CONST", "text"])
}
forcond_number_flag <- FALSE
}
else if("SYMBOL_FUNCTION_CALL" %in% cond_fpd[cond_fpd$pos_id > in_pos &
cond_fpd$text == "c", "token"]) {
c_pos_id <- cond_fpd[cond_fpd$pos_id > in_pos &
cond_fpd$token == "SYMBOL_FUNCTION_CALL" &
cond_fpd$text == "c", "pos_id"]
if("NUM_CONST" %in% cond_fpd[cond_fpd$pos_id > c_pos_id, "token"]) {
forcond_number_flag <- TRUE
cond_fpd <- cond_fpd[cond_fpd$pos_id > c_pos_id, ]
memory_alloc_number <- 0
for(i in cond_fpd$token) {
if(i == "NUM_CONST") {
memory_alloc_number = memory_alloc_number + 1
}
}
}
forcond_number_flag <- FALSE
}
if(forcond_number_flag == TRUE) {
return (memory_alloc_number)
} else {
return (FALSE)
}
}
# Checks for presence of functions in the body of the `FOR` loop
#
# @param body_fpd A parsed data.frame containing the `body` of a `FOR` loop
#
check_fun_call <- function(body_fpd) {
return ("SYMBOL_FUNCTION_CALL" %in% body_fpd$token)
}
# Checks whether the index is explicitly mentioned at the time of assignment inside the loop
# Returns list of vectors that have the index, if none are present empty list is returned
#
# @param body_fpd A parsed data.frame containing the `body` of a `FOR` loop
# @param index_name A string representing the name of the iterator of the `FOR` loop
#
check_index_assignment <- function(body_fpd, index_name) {
assign_operator_list <- list()
assign_operator_list <- body_fpd[body_fpd$token %in% assigns, ]
check_index_assignment_list <- failed_list <- NULL
for(i in seq_len(length(assign_operator_list$id))) {
check_pos_id <- assign_operator_list[i, "pos_id"]
check_assign_op <- assign_operator_list[i, "token"]
check_parent_id <- assign_operator_list[i, "parent"]
open_bracket_flag <- close_bracket_flag <- FALSE
name_of_vector <- vector(mode = "character", length = 1)
##If = or <- is used, then we will search the expr before the assignment operator
if(check_assign_op == "EQ_ASSIGN" | check_assign_op == "LEFT_ASSIGN") {
assigned_vector_node_id <- body_fpd[body_fpd$token == "expr" &
body_fpd$parent == check_parent_id &
body_fpd$pos_id < check_pos_id, "id"]
if(length(assigned_vector_node_id) == 0) {
return (check_index_assignment_list)
}
parsed_assigned_expr <- get_children(body_fpd, assigned_vector_node_id)
index_name_idx <- which(parsed_assigned_expr$text == index_name &
parsed_assigned_expr$token == "SYMBOL", arr.ind = TRUE)
open_bracket_flag <- (parsed_assigned_expr[(index_name_idx - 1), "text"] == "[")
close_bracket_flag <- (parsed_assigned_expr[(index_name_idx + 1), "text"] == "]")
open_bracket_index <- which(parsed_assigned_expr$token == "'['", arr.ind = TRUE)
name_of_vector <- parsed_assigned_expr[(open_bracket_index-1), "text"]
} else {
assigned_vector_node_id <- body_fpd[body_fpd$token == "expr" &
body_fpd$parent == check_parent_id &
body_fpd$pos_id > check_pos_id, "id"]
if(length(assigned_vector_node_id) == 0) {
return (check_index_assignment_list)
}
parsed_assigned_expr <- get_children(body_fpd, assigned_vector_node_id)
index_name_idx <- which(parsed_assigned_expr$text == index_name &
parsed_assigned_expr$token == "SYMBOL", arr.ind = TRUE)
open_bracket_flag <- (parsed_assigned_expr[(index_name_idx - 1), "text"] == "[")
close_bracket_flag <- (parsed_assigned_expr[(index_name_idx + 1), "text"] == "]")
open_bracket_index <- which(parsed_assigned_expr$token == "'['", arr.ind = TRUE)
name_of_vector <- parsed_assigned_expr[(open_bracket_index-1), "text"]
}
if(open_bracket_flag & close_bracket_flag) {
check_index_assignment_list <- c(check_index_assignment_list, name_of_vector)
} else {
check_index_assignment_list <- c(check_index_assignment_list, name_of_vector)
failed_list <- c(failed_list, name_of_vector)
}
}
return (unique(check_index_assignment_list[!(check_index_assignment_list %in% failed_list)]))
}
ma_one_flatten_pd <- function(parsed_dataset, flatten_pd) {
pd <- parsed_dataset
fpd <- flatten_pd
fpd <- eq_assign_to_expr(fpd) ## eq_assign_to_expr helps to convert the `EQ_ASSIGN` token to `expr`
## taking a different approach from simply calling get_children(fpd) and including all `expr`.
exam_nodes <- fpd[fpd$token == "expr", ]
exam_nodes <- rbind(exam_nodes, get_roots(fpd))
exam_nodes <- unique(exam_nodes)
## custom_exam_nodes
## custom_exam_nodes will contain only loops and the assignment nodes from the entire exam_nodes
null_parents <- fpd[fpd$token == "NULL_CONST", "parent"]
na_parents <- fpd[fpd$token == "NUM_CONST" & fpd$text == "NA", "parent"]
init_list <- c("c()", "numeric()", "logical()", "double()", "factor()", "integer()")
alternate_initialization_parents <- fpd[fpd$token == "expr" & fpd$text %in% init_list, "parent"]
null_parents <- c(null_parents, c(alternate_initialization_parents, na_parents))
edit_nodes_list <- NULL
for(i in seq_len(length(alternate_initialization_parents))) {
alt_init_pd <- get_children(fpd, alternate_initialization_parents[i], FALSE)
expr_nodes <- alt_init_pd[alt_init_pd$token == "expr", "id"]
edit_nodes_list <- c(edit_nodes_list, expr_nodes)
}
custom_exam_nodes <- NULL
for(i in null_parents) {
custom_exam_nodes <- rbind(custom_exam_nodes, fpd[fpd$id == i, ])
}
for(i in exam_nodes$id) {
if(nrow(get_children(fpd, i)) > 1) {
if(get_children(fpd, i, FALSE)[1, "token"] == "FOR") {
custom_exam_nodes <- rbind(custom_exam_nodes, fpd[fpd$id == i, ])
}
}
}
# Only order a *custom_exam_nodes* that is not NULL
if(!(is.null(custom_exam_nodes))) {
custom_exam_nodes <- custom_exam_nodes[order(custom_exam_nodes$pos_id), ]
}
## Here we analyse all the nodes of custom_exam_nodes that consists of assignemnts and FOR loops
vector_initialization_list <- list()
initializations_to_change <- NULL
vectors_memory_list <- list()
## if condition handles the loops from custom_exam_nodes and the else part handles the vector assignment from custom_exam_nodes
for(i in seq_len(length(custom_exam_nodes$id))) {
if(get_children(fpd, custom_exam_nodes[i, "id"], F)[1, "token"] == "FOR") {
i <- custom_exam_nodes[i, "id"]
sub_fpd <- get_children(fpd, i)
cond_fpd <- get_children(sub_fpd, sub_fpd[sub_fpd$token=="forcond", "id"])
cond_fpd <- restrict_condition_to_one_loop(cond_fpd)
body_fpd <- sub_fpd[!(sub_fpd$id %in% cond_fpd$id), ]
body_fpd <- restrict_body_to_one_loop(body_fpd)
in_pos <- cond_fpd[cond_fpd$token == "IN", "pos_id"]
index_name <- cond_fpd[cond_fpd$pos_id < in_pos &
cond_fpd$token == "SYMBOL", "text"]
## Checking condition 1: No function calls inside the loop, to counter the possibility of use of `<<-` in that particular function
fun_call_flag <- check_fun_call(body_fpd)
## Checking condition 2: A number being explicitly mentioned in the for condition, and isolating that number if condition is true
forcond_number_flag <- check_forcond_num_declaration(cond_fpd, index_name, in_pos)
if(typeof(forcond_number_flag) != "logical") {
memory_alloc_num <- forcond_number_flag
}
## Checking condition 3: The index should be mentioned specifically of the vector to which something is being assigned
passing_vectors <- check_index_assignment(body_fpd, index_name)
index_assignment_flag <- (length(passing_vectors) == 0)
if(fun_call_flag | typeof(forcond_number_flag) == "logical" | index_assignment_flag) {
next
} else {
initializations_to_change <- c(initializations_to_change, passing_vectors)
initializations_to_change <- unique(initializations_to_change)
for(vec in passing_vectors) {
if(vec %in% names(vectors_memory_list)) {
if(vectors_memory_list[[vec]] < memory_alloc_num) {
vectors_memory_list[[vec]] <- memory_alloc_num
} else {
next
}
} else {
vectors_memory_list[[vec]] <- memory_alloc_num
}
}
}
} else{
# Here we associate all the vectors (of the same name too) with the IDs with which they were initialized.
initialized_vector_name <- extract_vector_name(fpd, custom_exam_nodes[i, "id"])
if(initialized_vector_name %in% names(vector_initialization_list)) {
vector_initialization_list[[initialized_vector_name]] <- c(vector_initialization_list[[initialized_vector_name]], custom_exam_nodes[i, "id"])
} else {
vector_initialization_list[[initialized_vector_name]] <- custom_exam_nodes[i, "id"]
}
}
}
# Now the id of the nodes that have to be edited is stored in vector_initialization_list, and when we remove those nodes from exam_nodes we have the not_to_edit fpd
for(i in seq_len(length(vector_initialization_list))) {
if(names(vector_initialization_list)[i] %in% names(vectors_memory_list)) {
edit_nodes_list <- c(edit_nodes_list, vector_initialization_list[[i]])
}
}
not_to_edit <- exam_nodes[!(exam_nodes$id %in% edit_nodes_list), ]
not_to_edit_copy <- not_to_edit
for(i in seq_len(length(not_to_edit_copy$id))) {
not_to_edit <- rbind(not_to_edit, get_children(fpd, not_to_edit_copy[i, "id"]))
}
not_to_edit <- not_to_edit[order(not_to_edit$pos_id), ]
not_to_edit <- unique(not_to_edit)
# Creating the final_exam_nodes list from the `vector_initialization_list` and `vectors_memory_list` ####
final_exam_nodes <- exam_nodes[exam_nodes$id %in% edit_nodes_list, ]
to_remove_indices <- NULL
## Now we iterate over all the entries of final_exam_nodes and change their text
for(i in seq_len(length(final_exam_nodes$id))) {
name_of_vector <- extract_vector_name(fpd, final_exam_nodes[i, "id"])
if(typeof(name_of_vector) == "logical") {
to_remove_indices <- c(to_remove_indices, i)
} else {
final_exam_nodes[i, "text"] <- sprintf("%s <- vector(length = %d)", name_of_vector, vectors_memory_list[[name_of_vector]])
}
}
if(!(is.null(to_remove_indices))) {
final_exam_nodes <- final_exam_nodes[-(to_remove_indices), ]
}
# Merging the final_exam_nodes and the not_to_edit and subsequent cleaning operations ####
final_exam_nodes_ids <- final_exam_nodes$id
new_fpd <- NULL
for(itr in seq_len(length(final_exam_nodes$id))) {
act_fpd <- final_exam_nodes[itr, ]
new_act_fpd <- parse_text(act_fpd$text)
#Setting new ids for the newly edited and parsed codes
new_act_fpd$id <- paste0(act_fpd$id, "_", new_act_fpd$id)
#Keeping old parents for new fpd
new_act_fpd$parent[new_act_fpd$parent != 0] <- paste0(act_fpd$id, "_", new_act_fpd$parent[new_act_fpd$parent != 0])
new_act_fpd$parent[new_act_fpd$parent == 0] <- act_fpd$parent
#Calling a pre-wriiten rco::function....
new_act_fpd$pos_id <- create_new_pos_id(act_fpd, nrow(new_act_fpd), act_fpd$id)
#Fixing the next_spaces section of new_fpd
new_act_fpd$next_spaces[nrow(new_act_fpd)] <- act_fpd$next_spaces
#Fixing the next_lines section of new_fpd
new_act_fpd$next_lines[nrow(new_act_fpd)] <- act_fpd$next_lines
#Fixing the prev_spaces section of new_fpd
new_act_fpd$prev_spaces[which(new_act_fpd$terminal)[[1]]] <- act_fpd$prev_spaces
#Merging the new_fpd and the act_fpd(obtained upon iteration)
new_fpd <- rbind(new_fpd, new_act_fpd)
#Ordering the new_fpd according to the pos_id
new_fpd <- new_fpd[order(new_fpd$pos_id), ]
}
# Final Steps of molding the not_to_edit and final_exam_nodes into the same fpd ####
resultant_fpd <- rbind(not_to_edit, new_fpd)
resultant_fpd <- resultant_fpd[order(resultant_fpd$pos_id), ]
test_fpd <- flatten_leaves(resultant_fpd)
comments_ids <- curr_new_line_ids <- next_line_ids <- correction_nodes <- correction_ids <- NULL
correction_nodes <- test_fpd[test_fpd$parent == 0 & is.na(test_fpd$next_spaces) & is.na(test_fpd$next_lines) & is.na(test_fpd$prev_spaces), ]
for(i in correction_nodes$id){
correction_ids <- append(correction_ids, which(test_fpd$id == i, arr.ind = TRUE))
}
test_fpd[which(is.na(test_fpd$next_lines)), "next_lines"] <- 0
test_fpd[which(is.na(test_fpd$next_spaces)), "next_spaces"] <- 0
test_fpd[which(is.na(test_fpd$prev_spaces)), "prev_spaces"] <- 0
comments_ids <- which(test_fpd$parent < 0)
correction_ids <- append(correction_ids, comments_ids)
for(i in correction_ids){
if((i-1) > 0){
test_fpd[i-1, "next_lines"] <- 1
}else{
next
}
}
test_fpd <- test_fpd[!(test_fpd$parent == 0 & test_fpd$terminal == FALSE), ]
deletion_nodes <- NULL
for(i in final_exam_nodes_ids){
if(i %in% test_fpd$id){
deletion_nodes <- rbind(deletion_nodes, get_children(test_fpd, i))
}
}
next_line_ids <- as.double(test_fpd[test_fpd$id %in% deletion_nodes$id & test_fpd$next_lines == 1, "pos_id"])
test_fpd <- test_fpd[!(test_fpd$id %in% deletion_nodes$id), ]
for(i in next_line_ids){
curr_new_line_ids <- append(curr_new_line_ids,
max(which(test_fpd$pos_id < i)))
}
for (i in curr_new_line_ids) {
test_fpd[i, ]$next_lines <- 1
}
#Copying the behaviour of "';'" from fpd to test_fpd
if("';'" %in% test_fpd$token) {
semiColon_indices <- which("';'" == fpd$token)
for(i in seq_len(length(semiColon_indices))) {
fpd_nextSpaces <- fpd[semiColon_indices[i], "next_spaces"]
fpd_nextLines <- fpd[semiColon_indices[i], "next_lines"]
fpd_prevSpaces <- fpd[semiColon_indices[i], "prev_spaces"]
fpd_id <- fpd[semiColon_indices[i], "id"]
testFpd_index <- which(fpd_id == test_fpd$id)
test_fpd[testFpd_index, "next_spaces"] <- fpd_nextSpaces
test_fpd[testFpd_index, "next_lines"] <- fpd_nextLines
test_fpd[testFpd_index, "prev_spaces"] <- fpd_prevSpaces
}
}
return(test_fpd)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.