pDNM: A function to create features from autosomes in a (human) VCF...

Usage Arguments Examples

Usage

1
pDNM(vcf, pat.col = 1, mat.col = 2, child.col = 3)

Arguments

vcf
pat.col
mat.col
child.col

Examples

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
##---- Should be DIRECTLY executable !! ----
##-- ==>  Define data, use random,
##--	or do  help(data=index)  for the standard data sets.

## The function is currently defined as
function (vcf, pat.col = 1, mat.col = 2, child.col = 3) 
{
    sn = seqnames(vcf)
    if (!all(sn %in% c(1:22, paste("chr", c(1:22), sep = "")))) 
        stop("this function intended for autosomes only")
    st = start(vcf)
    g = geno(vcf)$GT
    p1 = g[, pat.col] == g[, mat.col]
    p2 = g[, pat.col] == "0/0" | g[, pat.col] == "1/1" | g[, 
        pat.col] == "2/2"
    c1 = g[, child.col] == "0/1" | g[, child.col] == "0/2" | 
        g[, child.col] == "1/2"
    ind = p1 & p2 & c1
    pa = as.integer(substr(g[, pat.col], 1, 1)) + 1
    ca1 = as.integer(substr(g[, child.col], 1, 1)) + 1
    ca2 = as.integer(substr(g[, child.col], 3, 3)) + 1
    mut = rep(NA, length(ca1))
    mut[pa != ca1] = ca1[pa != ca1]
    mut[pa != ca2] = ca2[pa != ca2]
    ind = ind & (pa == ca1 | pa == ca2)
    REF = ref(vcf)[ind]
    ALT = alt(vcf)[ind]
    mut = mut[ind]
    pa = pa[ind]
    ca1 = ca1[ind]
    ca2 = ca2[ind]
    sn = as.vector(sn[ind])
    st = st[ind]
    AD = geno(vcf)$AD[ind, ]
    PL = geno(vcf)$PL[ind, ]
    GQ = geno(vcf)$GQ[ind, ]
    GT = geno(vcf)$GT[ind, ]
    par_allele = rep("", length(REF))
    par_allele[pa == 1] = as.character(REF[pa == 1])
    par_allele[pa == 2] = sapply(ALT[pa == 2], function(x) as.character(x)[1])
    par_allele[pa == 3] = sapply(ALT[pa == 3], function(x) as.character(x)[2])
    mut_allele = rep("", length(REF))
    mut_allele[mut == 1] = as.character(REF[mut == 1])
    mut_allele[mut == 2] = sapply(ALT[mut == 2], function(x) as.character(x)[1])
    mut_allele[mut == 3] = sapply(ALT[mut == 3], function(x) as.character(x)[2])
    p_mut1 = mapply(function(x, y) x[y], AD[, pat.col], mut)
    p_mut2 = mapply(function(x, y) x[y], AD[, mat.col], mut)
    p_par1 = mapply(function(x, y) x[y], AD[, pat.col], pa) + 
        1
    p_par2 = mapply(function(x, y) x[y], AD[, mat.col], pa) + 
        1
    c_par = mapply(function(x, y) x[y], AD[, child.col], pa) + 
        1
    c_mut = mapply(function(x, y) x[y], AD[, child.col], mut)
    AD_X = data.frame(p_ar_max = pmax(p_mut1/p_par1, p_mut2/p_par2, 
        na.rm = T), p_ar_min = pmin(p_mut1/p_par1, p_mut2/p_par2, 
        na.rm = T), c_ar = c_mut/c_par)
    p_homo_PL1 = mapply(function(x, y) x[c(1, 3, 6)][y], PL[, 
        pat.col], pa)
    p_homo_PL2 = mapply(function(x, y) x[c(1, 3, 6)][y], PL[, 
        mat.col], pa)
    c_homo_PL = mapply(function(x, y) x[c(1, 3, 6)][y], PL[, 
        child.col], pa)
    c_gt = rep(NA, nrow(GT))
    c_gt[GT[, child.col] == "0/1"] = 1
    c_gt[GT[, child.col] == "0/2"] = 2
    c_gt[GT[, child.col] == "1/2"] = 3
    p_het_PL1 = mapply(function(x, y) x[c(2, 4, 5)][y], PL[, 
        pat.col], c_gt)
    p_het_PL2 = mapply(function(x, y) x[c(2, 4, 5)][y], PL[, 
        mat.col], c_gt)
    c_het_PL = mapply(function(x, y) x[c(2, 4, 5)][y], PL[, child.col], 
        c_gt)
    PL_X = data.frame(p_cg_max = pmax(p_het_PL1, p_het_PL2, na.rm = T), 
        p_cg_min = pmin(p_het_PL1, p_het_PL2, na.rm = T), c_cg = c_het_PL, 
        p_pg_max = pmax(p_homo_PL1, p_homo_PL2, na.rm = T), p_pg_min = pmin(p_homo_PL1, 
            p_homo_PL2, na.rm = T), c_pg = c_homo_PL)
    X = data.frame(chr = sn, pos = st, QL = fixed(vcf)$QUAL[ind], 
        VQ = info(vcf)$VQSLOD[ind], SB = info(vcf)$SB[ind], BZ = info(vcf)$BaseQRankSum[ind], 
        DL = info(vcf)$Dels[ind], FS = info(vcf)$FS[ind], HS = info(vcf)$HaplotypeScore[ind], 
        MQ = info(vcf)$MQ[ind], MZ = info(vcf)$MQRankSum[ind], 
        QD = info(vcf)$QD[ind], PZ = info(vcf)$ReadPosRankSum[ind], 
        GQ_p_min = pmin(geno(vcf)$GQ[ind, pat.col], geno(vcf)$GQ[ind, 
            mat.col], na.rm = T), GQ_p_max = pmax(geno(vcf)$GQ[ind, 
            pat.col], geno(vcf)$GQ[ind, mat.col], na.rm = T), 
        GQ_c = geno(vcf)$GQ[ind, child.col], N_alt = sapply(alt(vcf)[ind], 
            length), par_allele = par_allele, mut_allele = mut_allele)
    X = cbind(AD_X, PL_X, X)
    rownames(X) = paste(sn, st, sep = ":")
    return(X)
  }

mbelmadani/forestDNM-archive documentation built on May 30, 2019, 2:17 p.m.