checkData: Checks consistence of data and corrects for missingness. Maps...

Description Usage Arguments Examples

Description

Checks consistence of data and corrects for missingness. Maps the missing genes and deletes genes that are unusable.

Usage

1
checkData(d, Type = c("Train", "Test"))

Arguments

d
Type

Examples

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
##---- Should be DIRECTLY executable !! ----
##-- ==>  Define data, use random,
##--	or do  help(data=index)  for the standard data sets.

## The function is currently defined as
function (d, Type = c("Train", "Test")) 
{
    DataStatus = is.data.frame(d)
    if (DataStatus == FALSE) {
        print("Data is not a data frame. Kindly check data format")
        return(0)
    }
    ID <- d[, 1]
    ID.Status = is.character(na.omit(ID))
    if (ID.Status == FALSE) {
        print("Cell ID not valid character. Check cell ID format")
        return(0)
    }
    else {
        n1 <- length(ID) - length(na.omit(ID))
        if (n1 > 0) {
            print("Cell ID Missing. Missing cell ID not allowed.")
            return(0)
        }
        n2 <- length(ID) - length(unique(ID))
        if (n2 > 0) {
            print("Repetitive cell ID not allowed")
            return(0)
        }
    }
    if (Type == "Train") {
        Status = d[, dim(d)[2]]
        Status = na.omit(unique(Status))
        ClassCat = length(Status)
        l1 <- which(Status == 1)
        l2 <- which(Status == 0)
        if (!(ClassCat == 2)) {
            print("Two classes are required. Data does not have two classes.")
            return(0)
        }
        if ((length(l1) == 0) || (length(l2) == 0)) {
            print("Class labels need to be 0 and 1. Please check data")
            return(0)
        }
    }
    if (Type == "Train") {
        Gene <- d[, 2:(dim(d)[2] - 1)]
    }
    else {
        Gene <- d[, 2:(dim(d)[2])]
    }
    n3 <- length(colnames(Gene)) - length(unique(colnames(Gene)))
    if (n3 > 0) {
        print("Repetitive variable names not allowed")
        return(0)
    }
    for (i in 1:dim(Gene)[2]) {
        DataType <- is.numeric(na.omit(Gene[, i]))
        DataLength = length(na.omit(Gene[, i]))
        if (DataLength > 0) {
            if (DataType == FALSE) {
                print("Non-numeric data in gene expression")
                print(paste("Check the following gene: ", colnames(Gene)[i], 
                  sep = ""))
                return(0)
            }
        }
        else {
            print(paste("Warning: ", colnames(Gene)[i], " contains all NAs.", 
                sep = ""))
        }
    }
    print("Cell ID check OK")
    print("Class Status check OK")
    print("Gene expression data type check OK")
    return(1)
  }

bvnlab/SCATTome documentation built on May 13, 2019, 9:05 a.m.