Test for Outlier

Description

This function is implemented in the unique function 'plot.pca'. It performs an test for outlier based on geometric distances of each point in the PCA score plot from a 95

Usage

1
outlier(pcx, pcy, scaling)

Arguments

pcx

an integer indicating the principal component to be plotted in x.

pcy

an integer indicating the principal component to be plotted in y

scaling

a character string indicating the name of the scaling previously specified in the function 'explore.data'

Details

For details see ?plot.pca.

Author(s)

Edoardo Gaude, Dimitrios Spiliotopoulos, Francesca Chignola, Silvia Mari, Andrea Spitaleri and Michela Ghitti

Examples

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
## The function is currently defined as
function (pcx, pcy, scaling) 
{
    pwd.score = paste(getwd(), "/PCA_Data_", scaling, "/PCA_ScoreMatrix.csv", 
        sep = "")
    Score <- read.csv(pwd.score, sep = ",", header = TRUE)
    Score.x <- Score[, 2:ncol(Score)]
    rownames(Score.x) <- Score[, 1]
    dx = scale(Score.x[, pcx], scale = FALSE)
    dy = scale(Score.x[, pcy], scale = FALSE)
    sumdxdx = sum(dx * dx)
    sumdydy = sum(dy * dy)
    sumdxdy = sum(dx * dy)
    theta = 0.5 * atan((2 * sumdxdy)/(sumdydy - sumdxdx))
    c = cos(theta)
    s = sin(theta)
    X = (c * dx) - (s * dy)
    Y = (s * dx) + (c * dy)
    varX = var(X)
    varY = var(Y)
    M = sqrt(varX)
    m = sqrt(varY)
    M95 = M * 3.03315
    m95 = m * 3.03315
    Fx = sqrt(abs((M95^2) - (m95^2)))
    Fy = Fx * tan(theta)
    F1 = c(-Fx, -Fy)
    F2 = c(Fx, Fy)
    one = matrix(rep(1, nrow(Score.x)), ncol = 1)
    F1.m = one %*% F1
    F2.m = one %*% F2
    library(pdist)
    Punti = cbind(dx, dy)
    dist1 = pdist(Punti[, 1:2], F1.m)
    dist2 = pdist(Punti[, 1:2], F2.m)
    D = matrix(dist1[, 1] + dist2[, 1], ncol = 1)
    v = M95 * 2
    outliers = c()
    O = paste(getwd(), "/PCA_Data_", scaling, "/Outliers_PC", 
        pcx, "vs", pcy, ".csv", sep = "")
    write.csv(outliers, O)
    cat("The following observations are calculated as outliers \n", 
        file = O)
    for (i in 1:nrow(D)) {
        if (D[i, ] > v) {
            cat(rownames(Score.x)[i], " \n", file = O, append = TRUE)
        }
    }
    outlierfile = read.csv(O, header = TRUE)
    n = nrow(outlierfile)
    if (n == 0) {
        print("No outliers are detected")
    }
    else {
        print(outlierfile)
    }
  }