homework.R

setwd("~/Desktop/systems biology/")
snp = read.table("data.txt", header = T, sep ="\t", row.names = NULL)
# Now you have genotype data for 9 SNPs in 83 unrelated subjects.
#Let's assume that you need to test all the 9 SNPs for Hardy-Weinberg equilibrium (HWE). 
#Assess whether the first SNP (named SNP1) is statistically under HWE by using chi-square tests.
#You should make your decision after correcting multiple testing by Bonferroni-correction and Benjamini-Hochberg false discovery rate (FDR) methods.
pval_ls = c()
for (i in 2:length(names(snp))) {
    print(i)
    obs = summary(factor(snp[,i], levels =c("AA","AB","BB")))
    print(obs)
    snpge <- as.character(snp[,i])
    snpge_ <- strsplit(snpge, "")
    snpge_sep <- setNames(do.call(rbind.data.frame, snpge_), c("allele1","allele2"))
    hwal = summary(factor(c(as.character(snpge_sep$allele1), as.character(snpge_sep$allele2))), levels =c("A","B"))
    hwal = hwal/ sum(hwal)
    exp = c(hwal[1]^2, 2*hwal[1]*hwal[2], hwal[2]^2)*sum(obs)
    names(exp) = c("AA","AB","BB")
    xtot = sum((abs(obs -exp) - c(0.5,1.0,0.5))^2/exp)
    pval = 1-pchisq(xtot,2)
    print(pval)
    pval_ls <- c(pval_ls,pval)
}
pval_ls
pval_df = data.frame(name = names(snp)[2:10], pvalue = pval_ls)
pval_df_order = pval_df[order(pval_df$pvalue, pval_df$name),]
pval_df_order = cbind(pval_df_order, rank = c(1:nrow(pval_df_order)))
pval_df_order = cbind(pval_df_order, FDR_threshold = pval_df_order$rank / nrow(pval_df_order) * 0.05)
pval_df_order = cbind(pval_df_order, Bonferroni_correction_threshold = 0.001 / 9)
pval_df_order
pval_df_order = pval_df_order[order(pval_df_order$name),]
pval_df_order
results <- pval_df_order[,c(2,4,5)]
row.names(results) <- pval_df_order[,1]
results
#SNP1은 pvalue는 cv보다 크지만 FDR control 결과, FDR값보다 작은 largest pvalue이고 즉, HWE 라고 할 수 없다고 보여진다. (FDR = 0.05, set HWc.v as 10e-4)

library(fdrtool)
data(pvalues)
pvalues
fdr = fdrtool(pvalues, statistic = "pvalue")
fdr$qval
fdr = fdrtool(pval_ls, statistic = "pvalue")
fdr$qval
pval_ls
faarseer/systems_biology documentation built on May 20, 2019, 5:20 p.m.