Data used

The following example data is being used for all knapsack solutions in this package.

Our package depends on dplyr and parallel.

library(dplyr)
library(parallel)

# devtools::install_github("poceviciute/AdvRprogr_lab6")
library(knapsack)

set.seed(42)
n <- 2000
knapsack_objects <-
    data.frame(
        w=sample(1:4000, size = n, replace = TRUE),
        v=runif(n = n, 0, 10000)
    )

Brute Force Search

The Brute Force funtion computes all different combination of the selected items, then computes the value and the weight for each combination and selects the higest value within the weight restriction.

The function have two modes, run the computation one or several cores. The function defaults to R standard one core, the parallel = TRUE runs on all cores. Note, that parallel = TRUE can only be run on UNIX platforms (Mac/Linux).

Parallelisation improves performance more than two times.

brute_force_knapsack <- function(x, W, parallel = FALSE) {
    stopifnot(is.data.frame(x),
              apply(x, c(1, 2), is.numeric),
              is.numeric(W))

    stopifnot(x > 0,
              length(W) == 1,
              W > 0,
              is.logical(parallel))

    powerset <- function(items) {
        len = length(items)
        l = vector(mode = "list", length = 2 ^ len)
        l[[1]] = numeric()
        counter = 1L
        for (x in 1L:length(items)) {
            for (subset in 1L:counter) {
                counter = counter + 1L
                l[[counter]] = c(l[[subset]], items[x])
            }
        }
        return(l)
    }

    if (parallel == FALSE) {
        # initiate variables
        n <- nrow(x)
        best_v <- 0
        chosen_items <- c()
        items <- 1:n
        sets <- powerset(items)

        # loop through all possible sets (ommit empty set)
        for (i in 2:length(sets)) {
            c_sets <- unlist(sets[i])
            set_w <- 0
            set_v <- 0
            j <- 1
            # loop through the elements in the set
            while (j <= length(c_sets) && set_w <= W) {
                row <- c_sets[j]
                set_w <- set_w + x[row, 1]
                set_v <- set_v + x[row, 2]
                j <- j + 1
            }

            # compare the value of this set to the previous best value
            if (set_v > best_v && set_w <= W) {
                best_v <- round(set_v, 0)
                chosen_items <- c_sets
            }
        }
        result <- list(value = round(best_v, 2), elements = chosen_items)
        return(result)

    } else{

        cores <- 1


        selection <- unlist(mclapply(1:nrow(x), FUN = function(y) {
                combn(rownames(x), y, paste, collapse = " ")
            }, mc.cores = cores
        ))

        sum_w <- unlist(mclapply(1:nrow(x), FUN = function(y) {
                (combn(x[, "w"], y, sum))
            }, mc.cores = cores
            ))

        sum_v <- unlist(mclapply(1:nrow(x), FUN = function(y) {
                (combn(x[, "v"], y, sum))
            }, mc.cores = cores
        ))

        max_value <- max(sum_v[which(sum_w < W)])
        max_weight <- max(sum_w[which(sum_w < W)])
        elements <- selection[which(sum_v == max_value & sum_w <= W)]

        return(list(
            value = round(max_value, 0),
            weight = max_weight,
            element = elements
        ))

    }
}

lapply1core <- system.time(brute_force_knapsack(x = knapsack_objects[1:16,], W = 3500, parallel = TRUE))

Running brute force knapsack with 16 rows takes (in seconds)

single_core <- system.time(brute_force_knapsack(x = knapsack_objects[1:16,], W = 3500))
all_core <- system.time(brute_force_knapsack(x = knapsack_objects[1:16,], W = 3500, parallel = TRUE))

print(list(single_core = single_core,
           all_core = all_core))

Optimisation - What performance gain could you get by trying to improving your code?

In all functions the main time consumption is caused by looping. Changing the loops to lapply could potentially make the code run twice as fast. This is visible from brute_force_knapsack function with parameter parallel = TRUE and cores number modified to 1: lapply is used instead of loops and profiling shows that performance is much better.

print(list(single_core = single_core,
           lapply1core = lapply1core))

Dynamic Programming

Knapsack_dynamic takes into two arguments: x - data frame with weights and corresponding values, and W - the maximum allowed weight. The algorithm is based on dynamic programming approach and pseudo-code provided on Wikipedia.

Running dynamic knapsack with 500 rows takes (in seconds)

dynamic <- system.time(knapsack_dynamic(x = knapsack_objects[1:500,], W = 3500))

print(dynamic)

Greedy Heuristic

The greedy_heuristic function takes two arguments, x and w. x is the data frame and w is the maximum allowed weight of the function. The function computes the $density = \frac{v}{w}$, sorts the data in in descending density order and pick out the objects with higest density within the allowed maximum weight, w.

Running greedy knapsack with 1000000 rows takes (in seconds)

set.seed(42)
m1 <-  1000000
knapsack_objects_greedy <-
    data.frame(w = sample(1:1000000, size = m1, replace = TRUE),
               v = runif(n = m1, 0,  1000000))

greedy <- system.time(greedy_knapsack(x = knapsack_objects_greedy, W = 3500))

print(greedy)


poceviciute/AdvRprogr_lab6 documentation built on May 7, 2019, 9:43 p.m.