RunNetSNE: Run Net-SNE.

Description Usage Arguments Examples

Description

Run Net-SNE in three possible modes: "t-SNE for a t-SNE-like embedding, "Learn" for a training Net-SNE on an embedding and "Project" to project a new dataset on a learnt embedding (with "Learn").
Input: Depends on the mode.
Output: Different text (.txt) files depending on the mode saved in a single folder.
Requirment: C++ implementation of Net-SNE installed and functional.

Usage

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
RunNetSNE(utilis = c("t-SNE", "Learn", "Project"), out.dims = 2L,
          max.iter = 1000L, theta = 0.5, step.method = "adam",
          NN.layers = 2L, NN.units = 50L, NN.function = "relu", l2.reg.param = 0,
          mom.init = 0.5, mom.final = 0.8, mom.switch.iter = 250L,
          early.exag.iter = 250L, learn.rate = 0.02,
          local.sample = 20L, batch.frac = 0.1, min.sample.Z = 0.1,
          sgd = TRUE, seed = -1, verbose = TRUE,
          permute.after.iters = NULL, save.iters.cache = NULL,
          path.netSNE.dir = path.netSNE.dir,
          path.to.bin.train.file = path.to.bin.file, path.to.bin.test.file = NULL,
          path.to.simil.file = NULL, path.output.dir, path.ref.embedding = NULL,
          path.to.model.dir = NULL, model.prefix = "model_final")

Arguments

utilis

Character; Vector to choose the use of Net-SNE (default: c("t-SNE", "Learn", "Project") for running the three)

out.dims

Integer; Specifies the output dimensionality (default: 2)

max.iter

Integer; Specifies the number of iterations (default: 1e3)

theta

Double; Bounded by 0 and 1, controls the accuracy-efficiency tradeoff in SPTree for gradient computation; 0 means exact (default: 0.5)

step.method

Character; Specifies the gradient step schedule. Possible values: 'adam', 'mom' (momentum), 'mom_gain' (momentum with gains) or 'fixed' (default: adam)

NN.layers

Integer: Number of layers in the Neural Network (default: 2)

NN.units

Integer: Number of units for each layer in the Neural Network (default: 50)

NN.function

Character: Specifies the activation function of the Neural Network. Possible values: 'sigmoid' or 'relu' (default: relu)

l2.reg.param

Numerical; L2 regularization parameter for introducing sparcity in the Neural Network and avoid train data overfitting (default: 0, i.e. no sparcity)

mom.init

Double; Bounded by 0 and 1, defines the momentum used before n=mom.switch.iter iterations (default: 0.5)

mom.final

Double; Bounded by 0 and 1, defines the momentum used after n=mom.switch.iter iterations (default: 0.8)

mom.switch.iter

Integer; Number of iterations before switching the value of momentum (default: 250)

early.exag.iter

Integer; Number of iterations of early exaggeration (default: 250)

learn.rate

Double; Learning rate used for gradient steps (default: 0.02)

local.sample

Integer: Number of local samples for each data point in the mini-batch (default: 20)

batch.frac

Integer: Fraction of data to sample for mini-batch (default: 0.1, i.e. 10%)

min.sample.Z

Double; Minimum fraction of data to use for approximating the normalization factor Z in the gradient (default: 0.1, i.e. 10%)

sgd

Logical; Set to TRUE to use SGD acceleration. If set to FALSE (effective for small datasets), equivalent to t-SNE with an additional backpropagation step to train a neural network (default: TRUE)

seed

Integer; Equivalent to set.seed (default: -1, i.e. use current time as seed)

verbose

Logical; Should the outputs be printed to the console? (default: TRUE)

permute.after.iters

Integer; Number of iterations after which the ordering of data points is repeatidly permuted for fast mini-batching (default: NULL, i.e. permute.after.iters = max.iter)

save.iters.cache

Integer; Number of iterations after which an intermediary embedding is repeatidly recorded. The final embedding (Y_final.txt) is always saved (default: NULL, i.e. no intermediary embedding is recorded, only the definitive one after max.iter)

path.netSNE.dir

Character; The path to the directory containing the executables created after Net-SNE installation (usually : path/to/netsne-master/bin). Must end with the name of directory with exectuables (here: '/bin')

path.to.bin.train.file

Character; The path to the binary file of the train data matrix obtained with Write_binary_file (same as parameter path.to.bin.file in RunBhtSNE)

path.to.bin.test.file

Character; The path to the binary file of the test data matrix obtained with Write_binary_file

path.to.simil.file

Character; The path to the binary file obtained with Compute_similarities

path.output.dir

Character; The path to save to output directory containing the .txt files

path.ref.embedding

Character; The path to the Y_final.txt file produced by RunBhtSNE (or RunNetSNE in 't-SNE' mode) used as a reference embedding to learn

path.to.model.dir

Character; The path to the directory obtained with RunNetSNE in 'Learn' mode, containing 'model_final*.txt' files used to project new data.

model.prefix

Charatcer; Prefix of the text files used as model for RunNetSNE 'Project' mode (default: 'model_final')

Examples

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
##---- Should be DIRECTLY executable !! ----
##-- ==>  Define data, use random,
##--	or do  help(data=index)  for the standard data sets.

## The function is currently defined as
function (utilis = c("t-SNE", "Learn", "Project"), out.dims = 2L,
    max.iter = 1000L, theta = 0.5, step.method = "adam", NN.layers = 2L,
    NN.units = 50L, NN.function = "relu", mom.init = 0.5, mom.final = 0.8,
    mom.switch.iter = 250L, early.exag.iter = 250L, learn.rate = 0.02,
    local.sample = 20L, min.sample.Z = 0.1, l2.reg.param = 0,
    sgd = TRUE, seed = -1, verbose = TRUE, batch.frac = 0.1,
    random.init = TRUE, permute.after.iters = NULL, save.iters.cache = NULL,
    path.netSNE.dir = path.netSNE.dir, path.to.bin.train.file = path.to.bin.file,
    path.to.bin.test.file = NULL, path.to.simil.file = NULL,
    path.output.dir, path.ref.embedding = NULL, path.to.model.dir = NULL,
    model.prefix = "model_final")
{
    utilis = tolower(utilis)
    for (i in 1:length(utilis)) {
        if (!utilis[i] %in% c("t-sne", "tsne", "learn", "train",
            "project", "proj", "projection")) {
            stop(paste(utilis[i], ": Unknown use of Net-SNE (param: utilis).\nPossible values:\n\t- 't-SNE': t-SNE like embedding\n\t- 'Learn': train Net-SNE on a training dataset to latter project new data on the embedding\n\t- 'Project': project a new dataset on a prior embedding (obtain with utilis = Train)"),
                call. = FALSE)
        }
    }
    if (step.method != "adam" & step.method != "mom" & step.method !=
        "mom_gain" & step.method != "fixed") {
        stop(paste(step.method, ": Unknown gradient step schedule (param: step.method).\nPossible values:'adam', 'mom' (momentum), 'mom_gain' (momentum with gains) or 'fixed'"),
            call. = FALSE)
    }
    if (NN.function != "relu" & NN.function != "sigmoid") {
        stop(paste(NN.function, ": Unknown activation function of the neural network (param: NN.function).\nPossible values:'relu'or 'sigmoid'"),
            call. = FALSE)
    }
    path.netSNE.dir = paste(path.netSNE.dir, "RunNetsne", sep = "/")
    if ("t-sne" %in% utilis | "tsne" %in% utilis) {
        command = paste(path.netSNE.dir, "--input-P", path.to.simil.file,
            "--input-X", path.to.bin.train.file, "--out-dir",
            path.output.dir, sep = " ")
        command = paste(command, "--out-dim", out.dims, "--max-iter",
            max.iter, "--rand-seed", seed, "--theta", theta,
            "--learn-rate", learn.rate, "--mom-init", mom.init,
            "--mom-final", mom.final, "--mom-switch-iter", mom.switch.iter,
            "--early-exag-iter", early.exag.iter, "--num-local-sample",
            local.sample, "--batch-frac", batch.frac, "--min-sample-Z",
            min.sample.Z, "--l2-reg", l2.reg.param, "--step-method",
            step.method, "--num-layers", NN.layers, "--num-units",
            NN.units, "--act-fn", NN.function, sep = " ")
        if (!random.init) {
            command = paste(command, "--skip-random-init", sep = " ")
        }
        if (!is.null(permute.after.iters)) {
            command = paste(command, "--perm-iter", permute.after.iters,
                sep = " ")
        }
        if (!is.null(save.iters.cache)) {
            command = paste(command, "--cache-iter", save.iters.cache,
                sep = " ")
        }
        if (!sgd) {
            command = paste(command, "--no-sgd", sep = " ")
        }
        if (!verbose) {
            command = paste(command, "> /dev/null", sep = " ")
        }
        if ("crayon" %in% installed.packages()[, 1]) {
            cat(crayon::bold(crayon::red("\n\nRunning NetSNE !  (t-SNE like)\n\n")))
        }
        else {
            cat("\n\nRunning NetSNE !  (t-SNE like)\n\n")
        }
        system(command)
    }
    if ("learn" %in% utilis | "train" %in% utilis) {
        command = paste(path.netSNE.dir, "--input-Y", path.ref.embedding,
            "--input-X", path.to.bin.train.file, "--out-dir",
            path.output.dir, sep = " ")
        command = paste(command, "--out-dim", out.dims, "--max-iter",
            max.iter, "--rand-seed", seed, "--theta", theta,
            "--learn-rate", learn.rate, "--mom-init", mom.init,
            "--mom-final", mom.final, "--mom-switch-iter", mom.switch.iter,
            "--early-exag-iter", early.exag.iter, "--num-local-sample",
            local.sample, "--batch-frac", batch.frac, "--min-sample-Z",
            min.sample.Z, "--l2-reg", l2.reg.param, "--step-method",
            step.method, "--num-layers", NN.layers, "--num-units",
            NN.units, "--act-fn", NN.function, sep = " ")
        if (!random.init) {
            command = paste(command, "--skip-random-init", sep = " ")
        }
        if (!is.null(permute.after.iters)) {
            command = paste(command, "--perm-iter", permute.after.iters,
                sep = " ")
        }
        if (!is.null(save.iters.cache)) {
            command = paste(command, "--cache-iter", save.iters.cache,
                sep = " ")
        }
        if (!sgd) {
            command = paste(command, "--no-sgd", sep = " ")
        }
        if (!verbose) {
            command = paste(command, "> /dev/null", sep = " ")
        }
        if ("crayon" %in% installed.packages()[, 1]) {
            cat(crayon::bold(crayon::red("\n\nRunning NetSNE !  (learnining embedding)\n\n")))
        }
        else {
            cat("\n\nRunning NetSNE !  (learnining embedding)\n\n")
        }
        system(command)
    }
    if ("projection" %in% utilis | "project" %in% utilis | "proj" %in%
        utilis) {
        command = paste(path.netSNE.dir, "--input-X", path.to.bin.test.file,
            "--init-model-prefix", paste(path.to.model.dir, model.prefix,
                sep = "/"), "--test-model", "--no-target", "--out-dir",
            path.output.dir, sep = " ")
        if (!verbose) {
            command = paste(command, "> /dev/null", sep = " ")
        }
        if ("crayon" %in% installed.packages()[, 1]) {
            cat(crayon::bold(crayon::red("\n\nRunning NetSNE !  (projection)\n\n")))
        }
        else {
            cat("\n\nRunning NetSNE !  (projection)\n\n")
        }
        system(command)
    }
  }

schwikowskilab/rNetSNE documentation built on May 4, 2019, 6:40 p.m.