knitr::opts_chunk$set(
  collapse = FALSE,
  error=FALSE,
  message=FALSE,
  warning=FALSE,
  comment = "#>"
)
library(gwhap)
library(data.table)
library(readr)

Genome-Wide HAPloptype analysis

PART 1 : Genetic Map

GWHAP needs to depermine the location the genetic map (i.e in cM) of the phased markers.

For every marker postion in bp, GWHAP will try find it in a reference genetic map. If the marker's position is not in the genetic map, its position in cM is computed using linear interpolation. GWHAP can use several reference maps format : Rutgers genetic map v3 , 1000 Genomes ... See vignette "Ressources" for details TO DO : code that check the download URL for all downloaders

Reading the the reference genetic map :

# give the list of files to consider
f1 = system.file("extdata", "chr1.interpolated_genetic_map.gz",
                          package="gwhap", mustWork=TRUE)
f2 = system.file("extdata", "chr2.interpolated_genetic_map.gz",
                          package="gwhap", mustWork=TRUE)
# build a NAMED list mapping the chrom information
chr = list(1, 2)
names(chr) = c(f1, f2)

# now build the parameter for Genetic_Map object. Inspect the file to
# assign the right column name for cM and position.
# Here the chrom info is not read from the file it is read from the
# NAMED list chr
filepaths = c(f1, f2)
encodings = list("cM"="cM", "position"="bp","chr"=chr, "format"="table")

# get an instance of Genetic_Map
genetic_map = Genetic_Map(filepaths=filepaths, encodings=encodings)

# finally read the corresponding data and have a quick look at it
genetic_map = readData(genetic_map)
print(head(genetic_map@gmapData))
# Inspect gwhapConfig global varaible and see the descriptions of 
# the various toy genetic map  
names(gwhap:::gwhapConfig)

# get the filepaths and encodings for 1000 genome interpolated
filepaths = gwhap:::gwhapConfig$genmap_toy_interpolated_1000$filepaths
encodings = gwhap:::gwhapConfig$genmap_toy_interpolated_1000$encodings

genetic_map = Genetic_Map(filepaths=filepaths, encodings=encodings)
genetic_map = readData(genetic_map)
print(head(genetic_map@gmapData))
# init filepaths and encodings parameters
filepaths = gwhap:::gwhapConfig$genmap_toy_rutger$filepaths
encodings = gwhap:::gwhapConfig$genmap_toy_rutger$encodings

# get an instance of Genetic_Map
genetic_map = Genetic_Map(filepaths=filepaths, encodings=encodings)

# finally read the corresponding data and have a quick look at it
genetic_map = readData(genetic_map)
print(head(genetic_map@gmapData))

GWHAP can read 1000 genomes maps:

# init filepaths and encodings parameters
filepaths = gwhap:::gwhapConfig$genmap_toy_reference_1000$filepaths
encodings = gwhap:::gwhapConfig$genmap_toy_reference_1000$encodings

# get an instance of Genetic_Map
genetic_map = Genetic_Map(filepaths=filepaths, encodings=encodings)

# finally read the corresponding data and have a quick look at it
genetic_map = readData(genetic_map)
head(genetic_map@gmapData)

Reading the list of SNPs to place on the map :

# filepaths and encoding are build like for the Genetic_Map object
f1 = system.file("extdata", "haplotypes.bgen.bgi",package="gwhap", mustWork=TRUE)
chr = list(1)
names(chr) = c(f1)
filepaths = c(f1)
encodings = list("snp"="snp", "position"="position","chr"=chr, "format"="bgen")

snp_bucket = Snp_Bucket(filepaths=filepaths, encodings=encodings)
snp_bucket = readData(snp_bucket)
print(snp_bucket@bucketData)
# get the snp list position filename
filepaths = gwhap:::gwhapConfig$snpbucket_toy_flat$filepaths
encodings = gwhap:::gwhapConfig$snpbucket_toy_flat$encodings
# inspect the parameters
print(filepaths)
print(encodings)

#
snp_bucket = Snp_Bucket(filepaths=filepaths, encodings=encodings)
snp_bucket = readData(snp_bucket)

#
head(snp_bucket@bucketData)

GWHAP provide build-in function to interpolate a list of SNPs using a reference map :

# get an instance of Genetic_Map
filepaths = gwhap:::gwhapConfig$genmap_toy_interpolated_1000$filepaths
encodings = gwhap:::gwhapConfig$genmap_toy_interpolated_1000$encodings
interp1000_genetic_map = Genetic_Map(filepaths=filepaths, encodings=encodings)
interp1000_genetic_map = readData(interp1000_genetic_map)

# get the snp list position filename
filepaths = gwhap:::gwhapConfig$snpbucket_toy_flat$filepaths
encodings = gwhap:::gwhapConfig$snpbucket_toy_flat$encodings
snp_bucket = Snp_Bucket(filepaths=filepaths, encodings=encodings)
snp_bucket = readData(snp_bucket)

# get the augmented genetic_map
interpolated_map=create_augmented_genetic_map(
                            snp_bucket=snp_bucket,
                            genetic_map=interp1000_genetic_map)
head(interpolated_map@gmapData)
# read 1000 genome reference genetic map
filepaths = gwhap:::gwhapConfig$genmap_toy_reference_1000$filepaths
encodings = gwhap:::gwhapConfig$genmap_toy_reference_1000$encodings
ref1000_genetic_map = Genetic_Map(filepaths=filepaths, encodings=encodings)
ref1000_genetic_map = readData(ref1000_genetic_map)

# read a Snp_bucket for some physical position
filepaths = gwhap:::gwhapConfig$snpbucket_toy_flat$filepaths
encodings = gwhap:::gwhapConfig$snpbucket_toy_flat$encodings
snp_bucket = Snp_Bucket(filepaths=filepaths, encodings=encodings)
snp_bucket = readData(snp_bucket)


interpolated_map=create_augmented_genetic_map(
                            snp_bucket=snp_bucket,
                            genetic_map=ref1000_genetic_map)

head(interpolated_map@gmapData)
# read rutger ref genetic map
filepaths = gwhap:::gwhapConfig$genmap_toy_rutger$filepaths
encodings = gwhap:::gwhapConfig$genmap_toy_rutger$encodings
rutger_genetic_map = Genetic_Map(filepaths=filepaths, encodings=encodings)
rutger_genetic_map = readData(rutger_genetic_map)

#snp_physical_positions
filepaths = gwhap:::gwhapConfig$snpbucket_toy_bgen$filepaths
encodings = gwhap:::gwhapConfig$snpbucket_toy_bgen$encodings
snp_bucket = Snp_Bucket(filepaths=filepaths, encodings=encodings)
snp_bucket = readData(snp_bucket)


interpolated_map=create_augmented_genetic_map(
                        snp_bucket=snp_bucket,
                        genetic_map=rutger_genetic_map,
                        save_genetic_map=FALSE)

head(interpolated_map@gmapData)


yasmina-mekki/gwhap documentation built on Dec. 31, 2021, 9:19 p.m.