knitr::opts_chunk$set(
  collapse = FALSE,
  error=FALSE,
  message=FALSE,
  warning=FALSE,
  comment = "#>"
)
library(gwhap)
library(data.table)
library(readr)

Genome-Wide HAPloptype analysis

PART 2 : Haplotype blocks

GWHAP will determine haplotypes blocks where neigboring markers are less that \delta apopart on the map (in cM)

See vignette "Part1-Maps" for computing genetic maps of markers

Haplotype blocks are defined on genetic maps :

TODO unify output format from create_ and get_

# get an instance of Genetic_Map and readData  interpolted 1000 genome
filepaths = gwhap:::gwhapConfig$genmap_toy_interpolated_1000$filepaths
encodings = gwhap:::gwhapConfig$genmap_toy_interpolated_1000$encodings
interp1000_genetic_map = Genetic_Map(filepaths=filepaths, encodings=encodings)
interp1000_genetic_map = readData(interp1000_genetic_map)

# get snp physical position to interpolate from the reference map
filepaths = gwhap:::gwhapConfig$snpbucket_toy_flat$filepaths
encodings = gwhap:::gwhapConfig$snpbucket_toy_flat$encodings
snp_bucket = Snp_Bucket(filepaths=filepaths, encodings=encodings)
snp_bucket = readData(snp_bucket)

# interpolate
genetic_map=create_augmented_genetic_map(
                                snp_bucket=snp_bucket,
                                genetic_map=interp1000_genetic_map,
                                save_genetic_map=FALSE)

# save the interpolated map in tmpFilepath
# the Genetic_Map instance will be saved in a single rds R format 
tmpFilepath = tempfile(tmpdir = tempdir(), fileext = ".rds")
create_augmented_genetic_map(
                                snp_bucket=snp_bucket,
                                genetic_map=interp1000_genetic_map,
                                save_genetic_map=TRUE,
                                outputfile = tmpFilepath)
# Re read it
# Specify filepaths and encodings.
filepaths = tmpFilepath
encodings = list("chr"="", "position"="", "cM"="", "format"="rds")
saved_augmented_map_df = Genetic_Map(filepaths=filepaths, encodings=encodings)
saved_augmented_map_df = readData(saved_augmented_map_df)
# This function to fix use readData instead TODO
#augmented_map_df = get_augmented_genetic_map(augmented_genetic_map_dir= tmpDir,
#                                             chromosomes=1:23)
# Compare the maps before and after save.
head(genetic_map@gmapData)
head(saved_augmented_map_df@gmapData)

TODO : error when no blocks were created on a given chromosome

#create_blocs(genetics_map=augmented_map_df, 
#             delta=1e-2,
#             save_blocs=TRUE,
#             output=tempdir())
#df_blocks=get_blocs(blocs_dir = tempdir(),
#          chromosomes = 1:23)
#head(df_blocks)
#deltas = c(1e-3, 1e-2)
#dfs_blocks = c()
#for(delta in deltas){
#  df_blocks_tmp = create_blocs(genetics_map=augmented_map_df,
#                               delta=delta,
#                               save_blocs=FALSE)
#  dfs_blocks <- rbind(dfs_blocks, df_blocks_tmp)
#}
#df_blocs=do.call(rbind,dfs_blocks)
#head(df_blocs)

see vignette "visualization"

TODO : rebuild vignette visualization with toy examples



yasmina-mekki/gwhap documentation built on Dec. 31, 2021, 9:19 p.m.