RunShapeIt2: Run ShapeIt2

Usage Arguments Examples

Usage

1
RunShapeIt2(sampledir, tumourplatekey, imputeinfofile, is.male, chrom, filename.input, filename.output, filename.shapeit2, seed = as.integer(Sys.time()))

Arguments

sampledir
tumourplatekey
imputeinfofile
is.male
chrom
filename.input
filename.output
filename.shapeit2
seed

Examples

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
##---- Should be DIRECTLY executable !! ----
##-- ==>  Define data, use random,
##--	or do  help(data=index)  for the standard data sets.

## The function is currently defined as
function (sampledir, tumourplatekey, imputeinfofile, is.male,
    chrom, filename.input, filename.output, filename.shapeit2,
    seed = as.integer(Sys.time()))
{
    impute.info = read.table(imputeinfofile, stringsAsFactors = F)
    colnames(impute.info) = c("chrom", "impute_legend", "genetic_map",
        "impute_hap", "impute_sample", "start", "end", "is_par")
    if (is.male) {
        impute.info = impute.info[impute.info$is_par == 1, ]
    }
    chr_names = unique(impute.info$chrom)
    if (!is.na(chrom)) {
        impute.info = impute.info[impute.info$chrom == chr_names[chrom],
            ]
    }
    n = nrow(impute.info)
    for (r in 1:n) {
        if (chrom == 23) {
            filename.input.ok <- paste0(filename.input, ".filt",
                r)
            legend <- read.delim(impute.info$impute_legend[r],
                sep = " ", header = T, stringsAsFactors = F)
            impute <- read.table(paste0(sampledir, "/D-GenerateImputeInputFromAlleleFrequencies/",
                tumourplatekey, "_impute_input_chr23.txt"), stringsAsFactors = F)
            impute <- impute[impute[[3]] %in% legend$position,
                ]
            indels.inds <- which(nchar(impute[[4]]) > 1 | nchar(impute[[5]]) >
                1)
            indels <- impute[indels.inds, ]
            gen <- impute[-indels.inds, ]
            write.table(gen, file = paste0(filename.input.ok,
                ".gen"), quote = F, sep = " ", row.names = F,
                col.names = F)
            write.table(indels, file = paste0(filename.input.ok,
                ".gen.missing"), quote = F, sep = " ", row.names = F,
                col.names = F)
        }
        else {
            filename.input.ok <- filename.input
            impute <- read.table(paste0(sampledir, "/D-GenerateImputeInputFromAlleleFrequencies/",
                tumourplatekey, "_impute_input_chr", chrom, ".txt"),
                stringsAsFactors = F)
            indels.inds <- which(nchar(impute[[4]]) > 1 | nchar(impute[[5]]) >
                1)
            indels <- impute[indels.inds, ]
            gen <- impute[-indels.inds, ]
            write.table(gen, file = paste0(filename.input.ok,
                ".gen"), quote = F, sep = " ", row.names = F,
                col.names = F)
            write.table(indels, file = paste0(filename.input.ok,
                ".gen.missing"), quote = F, sep = " ", row.names = F,
                col.names = F)
        }
        cmd = paste(filename.shapeit2, "-G", filename.input.ok,
            "-M", impute.info$genetic_map[r], "-R", impute.info$impute_hap[r],
            impute.info$impute_legend[r], impute.info$impute_sample[r],
            "-O", ifelse(n == 1, filename.output, paste(filename.output,
                r, sep = ".")), ifelse(!impute.info$is_par[r],
                "--chrX", ""), "--thread 1", "--seed", seed,
            "--force")
        system(cmd, wait = T)
    }
    if (n > 1) {
        filenames.haps <- paste(filename.output, 1:n, "haps",
            sep = ".")
        haps <- do.call("rbind", sapply(filenames.haps, read.delim,
            sep = " ", header = F, stringsAsFactors = F, simplify = F))
        haps <- haps[!duplicated(haps[[3]]), ]
        haps <- haps[order(haps[[3]]), ]
        write.table(haps, file = paste0(filename.output, ".haps"),
            quote = F, sep = " ", row.names = F, col.names = F)
        system(paste0("cp ", filename.output, ".1.sample ", filename.output,
            ".sample"))
    }
  }

afrangou/CleanCNA documentation built on Dec. 28, 2021, 8:21 p.m.