genFun-package: Functions for dealing with genetic data

Description Details Author(s) References See Also Examples

Description

This is a collection of functions which, if not for this package, I would have to rewrite or copy/paste for almost every R script dealing with genetic data that I write.

Details

Package: genFun
Type: Package
Version: 1.0
Date: 2015-03-04
License: GPL (>= 2)
Packaged: 2015-02-13 20:23:00 UTC; cb
Built: R 3.1.1; ; 2015-02-13 20:24:05 UTC; unix

Index:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
chr               Extract chromosome number from file names
chr2int           Convert chromosome string to integer
chunk             Extract chunk number from file names
combine_snptest   Summarize and combine snptest output by chromosome
extract_snps      Extract snps from imputed chunk files
find_genes        Map SNPs to genes
int2chr           Convert chromosome from integer to string
region_plot       Create region plot as pdf
snptest           Run snptest gwas on multiple cores
unslash           Remove trailing slash from directory names

Author(s)

Clemens Baumbach

Maintainer: Clemens Baumbach <clemens.baumbach@helmholtz-muenchen.de>

References

none

See Also

match_intervals

Examples

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
library(genFun)

## chr
chr(c("chr5_path/to/the_chr1_3.txt", "chr5_path/to/the_chr2_4.txt"))

## chr2int
chr2int(c("1", "MT", "X", "Y"))
chr2int(c("chr1", "chrMT", "chrX", "chrY"), prefix = "chr")

## chunk
chunk(c("chr5_path/to/the_chr1_3.txt", "chr5_path/to/the_chr2_4.txt"))

## combine_snptest
## Not run: 
#            OLD NAME                 NEW NAME
old2new <- c(rsid                   = "snp",
             chromosome             = "chr",
             position               = "pos",
                                      "imputed",
                                      "callrate",
             freq_alleleB           = "coding_freq",
             frequentist_add_beta_1 = "beta",
             frequentist_add_se_1   = "se",
             frequentist_add_pvalue = "pval")

# Custom `hook' function.
drop_snps_without_pvalue <- function(d)
{
    d[!is.na(d$pval), , drop = FALSE]
}

combine_snptest(indir    = "path/to/snptest_output_files",
                outdir   = "dir/where/output/should_go",
                ncore    = 6L,
                old2new  = old2new,
                select   = TRUE,
                template = "cohort_consortium_date_chr<CHROMOSOME>.txt.gz",
                gzip     = TRUE,
                hook     = drop_snps_without_pvalue)

## End(Not run)

## extract_snps
## Not run: 
extract_snps(
    snps      = c("rs123", "rs456"),
    indir     = "path/to/directory/with_chunk_files/",
    outdir    = "path/where/extracted/chunk/files/should/go",
    chunkmap  = c("path/to/chunkmap_part_1.txt", "path/to/chunkmap_part_2.txt"),
    idfile    = "path/to/file/with_all_person_ids_in_correct_order.txt",
    keep      = c("person_1", "person_2"),
    chunkmap_cols = c(1L, 3L, 4L),
    ncore     = 6L)

## End(Not run)

## find_genes
d <- read.table(textConnection("\
snp chr pos
rs1 1 1
rs2 1 5
rs3 2 3
rs4 3 4
", "r"), header = TRUE, stringsAsFactors = FALSE)

genes <- read.table(textConnection("\
id chr start end
g1 1 1 3
g2 1 1 6
g3 2 1 4
g4 4 1 5
", "r"), header = TRUE, stringsAsFactors = FALSE)

find_genes(d, genes)

## int2chr
int2chr(c(1:3, 23:25))

## snptest
## Not run: 
## Run snptest on men-only nonPAR chunks on chromosome X.
##
## Note: - The sample file must contain the same individuals as the
##         imputed chunk files.
##       - You have to specify "-assume_chromosome X".
##       - You have to craft your regular expression for `chr_chunk'
##         because the nonPAR chunk files don't follow the default
##         naming convention where chromosome and chunk are specified as
##         in "chr3_7".
##       - You have to specify a custom `pattern' because you need to
##         restrict the set of chunk files to those that contain only
##         male subjects.
snptest(
    indir          = "path/to/directory/with/gz-compressed/input/files",
    sample_file    = "path/to/MEN_ONLY_phenotype_file",
    outdir         = "path/to/directory/for/snptest/output/files",
    pheno          = "blood_sugar",
    covs           = c("age", "bmi"),
    exclusion_file = "path/to/file/with/exclusion/ids",
    add_args       = c("-missing_code NA",
                       "-frequentist 1",
                       "-method expected",
                       "-hwe",
                       "-printids",
                       "-use_raw_covariates",
                       "-use_raw_phenotypes",
                       "-assume_chromosome XY"),
    ncore          = 9L,
    pattern        = "males.*\\.gz$",
    chr_chunk      = ".*chr([^_]+)_(nonPAR_\\d+)",
    executable     = "/opt/snptest_v2.5_linux_x86_64_static/snptest_v2.5")

## End(Not run)

## unslash
unslash(c("path/to/dir_with_slash/", "path/to/dir_without_slash"))

cbaumbach/genFun documentation built on May 13, 2019, 1:47 p.m.