FeatureSelection: Selects important genes.

Description Usage Arguments Examples

Description

Selects important genes by a combination of LASSO, Elastic Net and Random Forest Ranking of genes in terms of importance order.

Usage

1

Arguments

d

Examples

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
##---- Should be DIRECTLY executable !! ----
##-- ==>  Define data, use random,
##--	or do  help(data=index)  for the standard data sets.

## The function is currently defined as
function (d) 
{
    d1 <- d[, 2:dim(d)[2]]
    for (i in 1:(dim(d1)[2] - 1)) {
        if (is.factor(d1[, i])) 
            d1[, i] <- as.numeric(as.character(d[, i]))
    }
    library(randomForest)
    rf <- randomForest(as.factor(Status) ~ ., ntree = 500, data = d1, 
        replace = TRUE, na.action = na.omit)
    importanceOrder <- order(-rf$importance)
    pdf("Gene_Importance_Plot.pdf")
    varImpPlot(rf)
    dev.off()
    n <- dim(d1)[1]
    Boot = 100
    RFAcc <- as.data.frame(matrix(nrow = Boot, ncol = length(importanceOrder)))
    for (i in 1:length(importanceOrder)) {
        dt <- d1[, c(importanceOrder[1:i], dim(d1)[2])]
        for (k in 1:Boot) {
            index <- sample.int(n, size = floor(0.8 * n))
            train <- dt[index, ]
            test <- dt[-index, ]
            r1 <- randomForest(as.factor(Status) ~ ., ntree = 1000, 
                data = train)
            p1 <- predict(r1, newdata = test, type = "response")
            a1 <- acc(p1, test$Status)
            RFAcc[k, i] <- (a1[1, 1] + a1[2, 2])/(a1[1, 1] + 
                a1[1, 2] + a1[2, 1] + a1[2, 2])
        }
    }
    colnames(RFAcc) <- seq(1, length(importanceOrder), 1)
    write.csv(RFAcc, "Random_Forest_Accuracy_With_Importance_Ordering.csv", 
        quote = FALSE, row.names = FALSE)
    Mean <- apply(RFAcc, 2, FUN = mean)
    SD <- apply(RFAcc, 2, FUN = sd)
    pdf("Accuracy_Plot_With_Model_Size.pdf")
    plot(seq(1, length(Mean), 1), Mean, type = "b", ylim = c(0.8, 
        1))
    lines(seq(1, length(Mean), 1), Mean - SD, type = "l", col = 4)
    lines(seq(1, length(Mean), 1), Mean + SD, type = "l", col = 4)
    dev.off()
    return(list(Order = importanceOrder, ImportantGenes = rownames(rf$importance)[importanceOrder]))
  }

bvnlab/SCATTome documentation built on May 13, 2019, 9:05 a.m.