chi2fisher: chi2fisher

Usage Arguments Examples

View source: R/r_standard_chi2_test.R

Usage

1
chi2fisher(inputfilepath, outputfilepath, th)

Arguments

inputfilepath
outputfilepath
th

Examples

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
##---- Should be DIRECTLY executable !! ----
##-- ==>  Define data, use random,
##--	or do  help(data=index)  for the standard data sets.

## The function is currently defined as
function (inputfilepath, outputfilepath, th) 
{
    library(gdsfmt)
    library(foreach)
    library(parallel)
    library(doParallel)
    f <- openfn.gds(filename = inputfilepath, readonly = T)
    phenotype <- as.factor(as.numeric(as.factor(as.character(read.gdsn(index.gdsn(f, 
        "phenotype"))))))
    phen_levels_number <- nlevels(phenotype)
    phenotype_length <- length(phenotype)
    phenotype <- as.numeric(phenotype)
    genotype <- read.gdsn(index.gdsn(f, "genotype"), c(1, 1), 
        c(-1, -1))
    genotypes_num_cols <- ncol(genotype)
    closefn.gds(f)
    start.time <- Sys.time()
    genotype_d <- matrix(genotype, ncol = genotypes_num_cols)
    genotype_d[genotype_d == 0 | genotype_d == 1] <- 0
    genotype_d[genotype_d == 2] <- 1
    genotype_r <- matrix(genotype, ncol = genotypes_num_cols)
    genotype_r[genotype_r == 1 | genotype_r == 2] <- 1
    genotype_allele1 <- matrix(as.numeric(genotype == 2) + 3 * 
        as.numeric(genotype == 3), nrow = phenotype_length)
    genotype_allele2 <- matrix(as.numeric(genotype == 2 | genotype == 
        1) + 3 * as.numeric(genotype == 3), nrow = phenotype_length)
    genotype_allele <- rbind(genotype_allele2, genotype_allele1)
    phenotype_allele <- c(phenotype, phenotype)
    cores <- th
    cl <- makePSOCKcluster(cores)
    registerDoParallel(cl)
    res <- data.frame(matrix(ncol = genotypes_num_cols, nrow = phenotype_length))
    res <- foreach(i = icount(genotypes_num_cols), .combine = rbind) %dopar% 
        {
            cur_genotype_cd <- genotype[, i]
            cd_table <- table(phenotype, cur_genotype_cd[cur_genotype_cd != 
                3])
            cd_stat <- chisq.test(cd_table, correct = F)
            stats_cd <- cd_stat$statistic
            degrees_freedom_cd <- cd_stat$parameter
            p_values_cd <- cd_stat$p.value
            corr_cd <- cor(phenotype, cur_genotype_cd[cur_genotype_cd != 
                3])
            cur_genotype_d <- genotype_d[, i]
            d_table <- table(phenotype, factor(cur_genotype_d[cur_genotype_d != 
                3], levels = 0:1))
            expected_table_d <- as.array(margin.table(d_table, 
                1)) %*% t(as.array(margin.table(d_table, 2)))/margin.table(d_table)
            min_d <- min(expected_table_d, na.rm = T)
            corr_d <- cor(phenotype, cur_genotype_d[cur_genotype_d != 
                3])
            if (min_d > 5) {
                d_stat <- chisq.test(d_table, correct = F)
                stats_d <- format(d_stat$statistic, scientific = F)
                degrees_freedom_d <- d_stat$parameter
                p_values_d <- d_stat$p.value
            }
            else {
                d_stat <- fisher.test(d_table)
                stats_d <- d_table[1, 1]
                p_values_d <- d_stat$p.value
                degrees_freedom_d <- -1
            }
            cur_genotype_r <- genotype_r[, i]
            r_table <- table(phenotype, factor(cur_genotype_r[cur_genotype_r != 
                3], levels = 0:1))
            expected_table_r <- as.array(margin.table(r_table, 
                1)) %*% t(as.array(margin.table(r_table, 2)))/margin.table(r_table)
            min_r <- min(expected_table_r, na.rm = T)
            corr_r <- cor(phenotype, cur_genotype_r[cur_genotype_r != 
                3])
            if (min_r > 5) {
                r_stat <- chisq.test(r_table, correct = F)
                stats_r <- r_stat$statistic
                degrees_freedom_r <- r_stat$parameter
                p_values_r <- r_stat$p.value
            }
            else {
                r_stat <- fisher.test(r_table)
                stats_r <- r_table[1, 1]
                p_values_r <- r_stat$p.value
                degrees_freedom_r <- -1
            }
            cur_genotype_allele <- genotype_allele[, i]
            allele_table <- table(phenotype_allele, factor(cur_genotype_allele[cur_genotype_allele != 
                3], levels = 0:1))
            corr_allele <- cor(phenotype_allele, cur_genotype_allele[cur_genotype_allele != 
                3])
            expected_table_allele <- as.array(margin.table(allele_table, 
                1)) %*% t(as.array(margin.table(allele_table, 
                2)))/margin.table(allele_table)
            min_allele <- min(expected_table_allele, na.rm = T)
            if (min_allele > 5) {
                allele_stat <- chisq.test(allele_table, correct = F)
                stats_allele <- allele_stat$statistic
                degrees_freedom_allele <- allele_stat$parameter
                p_values_allele <- allele_stat$p.value
            }
            else {
                allele_stat <- fisher.test(allele_table)
                stats_allele <- allele_table[1, 1]
                p_values_allele <- allele_stat$p.value
                degrees_freedom_allele <- -1
            }
            return(c(cd = stats_cd, df_cd = degrees_freedom_cd, 
                p_values_cd = p_values_cd, corr_cd = corr_cd, 
                d = stats_d, df_d = degrees_freedom_d, p_values_d = p_values_d, 
                corr_d = corr_d, r = stats_r, df_r = degrees_freedom_r, 
                p_values_r = p_values_r, corr_r = corr_r, allele = stats_allele, 
                df_allele = degrees_freedom_allele, p_values_allele = p_values_allele, 
                corr_allele = corr_allele))
        }
    stopImplicitCluster()
    end.time <- Sys.time()
    time.taken <- end.time - start.time
    filename <- "chi2fisherOutput.csv"
    dir.create(outputfilepath, showWarnings = F)
    outcsv <- file.path(outputfilepath, filename)
    write.csv(res, file = outcsv)
    print(time.taken)
  }

masterSplinterIO/chi2fisher documentation built on May 29, 2019, 1:19 p.m.