Generate linear discriminant scores from random data, after selection

Share:

Description

Simulates the effect of generating scores from random data, possibly with predicted scores calculates also for additional 'observations'

Usage

1
2
simulateScores(nrows = 7129, cl = rep(1:3, c(19, 10, 2)), x = NULL, cl.other = NULL,
               x.other = NULL, nfeatures = 15, dimen=2, seed = NULL)

Arguments

nrows

number of rows of random data matrix

cl

classifying factor

x

data matrix, by default randomly generated

cl.other

classifying factor for additional observations

x.other

additional observations

nfeatures

number of features to select (by default uses aov F-statistic)

dimen

number of sets of discriminant scores to retain (at most one less than number of levels of cl)

seed

set, if required, so that calculations can be reproduced

Value

scores

matrix of scores

cl

classifying factor

other

matrix of 'other' scores

cl.other

classifying factor for scores.other

nfeatures

number of features used in generating the scores

Note

NB: Prior to 0.53, this function made (wrongly) a random selection of features.

Author(s)

John Maindonald

Examples

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
scorelist <- simulateScores(nrows=500, cl=rep(1:3, c(19,10,2)))
plot(scorelist$scores, col=unclass(scorelist$cl), pch=16)


## The function is currently defined as
simulateScores <-
  function (nrows = 7129, cl = rep(1:3, c(19, 10, 2)), x = NULL,
            cl.other = NULL, x.other = NULL, nfeatures = 15, dimen = 2,
            seed = NULL)
{
  if (!is.null(seed))
    set.seed(seed)
  m <- length(cl)
  m.other <- length(cl.other)
  if (is.null(x)) {
    x <- matrix(rnorm(nrows * m), nrow = nrows)
    rownames(x) <- paste(1:nrows)
  }
  else nrows <- dim(x)[1]
  if (is.null(x.other)) {
    x.other <- matrix(rnorm(nrows * m.other), nrow = nrows)
    rownames(x.other) <- paste(1:nrows)
  }
  if (is.numeric(cl))
    cl <- paste("Gp", cl, sep = "")
  if(!is.null(cl.other)){
    if (is.numeric(cl.other))
      cl.other <- paste("Gp", cl.other, sep = "")
    cl.other <- factor(cl.other)
  }
  cl <- factor(cl)
  if (dimen > length(levels(cl)) - 1)
    dimen <- length(levels(cl)) - 1
  ordfeatures <- orderFeatures(x, cl = cl, values = TRUE)
  stat <- ordfeatures$stat[1:nfeatures]
  ord.use <- ordfeatures$ord[1:nfeatures]
  xUse.ord <- data.frame(t(x[ord.use, ]))
  xUseOther.ord <- data.frame(t(x.other[ord.use, ]))
  ordUse.lda <- lda(xUse.ord, grouping = cl)
  scores <- predict(ordUse.lda, dimen = dimen)$x
  if(!is.null(cl.other))
    scores.other <- predict(ordUse.lda, newdata = xUseOther.ord,
                            dimen = dimen)$x else
  scores.other <- NULL
  invisible(list(scores = scores, cl = cl, other = scores.other,
                 cl.other = cl.other, nfeatures = nfeatures))
}