# helpers.R
# helper functions for hapmap.R
# Randall Johnson
# BSP CCR Genetics Core at Frederick National Laboratory
# SAIC-Frederick, Inc
# Created August 28, 2013
# Last Modified October 31, 2013
get.phased <- function(chr, pop, clean.duos = FALSE, dataDir = '../Data/')
{
# read in and merge phased data (some individuals did not pass QC,
# hence those that were duos or unrelated)
# trios
phased1 <- try(read.table(paste(dataDir, toupper(pop), '/phased/hapmap3_r2_b36_fwd.consensus.qc.poly.chr',
chr, '_', tolower(pop), '.unr.phased', sep = ''),
header = TRUE, stringsAsFactors = FALSE, na.strings = '-'))
# duos
phased2 <- try(read.table(paste(dataDir, toupper(pop), '/phased/hapmap3_r2_b36_fwd.consensus.qc.poly.chr',
chr, '_', tolower(pop), '.D.phased', sep = ''),
header = TRUE, stringsAsFactors = FALSE, na.strings = '-'))
if(class(phased2) == 'data.frame' & clean.duos)
{
todrop <- c(grep('_NA', names(phased2), fixed = TRUE), grep('_0_A', names(phased2), fixed = TRUE))
if(length(todrop) > 0)
phased2 <- phased2[,-todrop]
}
# unrelated
phased3 <- try(read.table(paste(dataDir, toupper(pop), '/phased/hapmap3_r2_b36_fwd.consensus.qc.poly.chr',
chr, '_', tolower(pop), '.phased', sep = ''),
header = TRUE, stringsAsFactors = FALSE, na.strings = '-'))
# merge...not all of these are defined in all populations :P
if(class(phased1) == 'data.frame' & class(phased2) == 'data.frame' & class(phased3) == 'data.frame')
{
phased <- merge(merge(phased3, phased2), phased1)
if(dim(phased)[1] != mean(dim(phased1)[1], dim(phased2)[1], dim(phased3)[1]))
stop("Differing number of rows in chromosome ", chr)
}else{
if(class(phased1) == 'try-error')
{
if(class(phased2) == 'data.frame' & class(phased3) == 'data.frame')
{
phased <- merge(phased2, phased3)
if(dim(phased)[1] != mean(dim(phased2)[1], dim(phased3)[1]))
stop("Differing number of rows in chromosome ", chr)
}else{
if(class(phased2) == 'try-error')
phased <- phased3
if(class(phased3) == 'try-error')
phased <- phased2
}
}
if(class(phased2) == 'try-error')
{
if(class(phased1) == 'data.frame' & class(phased3) == 'data.frame')
{
phased <- merge(phased1, phased3)
if(dim(phased)[1] != mean(dim(phased1)[1], dim(phased3)[1]))
stop("Differing number of rows in chromosome ", chr)
}else{
if(class(phased1) == 'try-error')
phased <- phased3
if(class(phased3) == 'try-error')
phased <- phased1
}
}
if(class(phased3) == 'try-error')
{
if(class(phased2) == 'data.frame' & class(phased1) == 'data.frame')
{
phased <- merge(phased2, phased1)
if(dim(phased)[1] != mean(dim(phased1)[1], dim(phased2)[1]))
stop("Differing number of rows in chromosome ", chr)
}else{
if(class(phased2) == 'try-error')
phased <- phased1
if(class(phased1) == 'try-error')
phased <- phased2
}
}
}
# clean up "AFFY" junk at the beginning of the string
phased$rsID[substr(phased$rsID, 1, 2) == 'AF'] <-
unlist(sapply(strsplit(phased$rsID[substr(phased$rsID, 1, 2) == 'AF'], '__', fixed = TRUE), `[`, 2))
attributes(phased)$chr <- chr
attributes(phased)$pop <- pop
return(phased)
}
get.LD <- function(chr, pop, keep)
{
# read in LD data
LD <- read.table(paste(dataDir, toupper(pop), '/LD/ld_chr', chr, '_', toupper(pop), '.txt', sep = ''),
stringsAsFactors = FALSE)
names(LD) <- c('pos1', 'pos2', 'pop', 'rs1', 'rs2', 'Dprime', 'r2', 'lod', 'fbin')
# only include those that are significant at 0.05 level and are in the phased data
LD <- subset(LD, lod >= 3 & rs1 %in% keep & rs2 %in% keep)
# clean up "AFFY" junk at the beginning of the string
LD$rs1[substr(LD$rs1, 1, 2) == 'AF'] <-
unlist(sapply(strsplit(LD$rs1[substr(LD$rs1, 1, 2) == 'AF'], '__', fixed = TRUE), `[`, 2))
LD$rs2[substr(LD$rs2, 1, 2) == 'AF'] <-
unlist(sapply(strsplit(LD$rs2[substr(LD$rs2, 1, 2) == 'AF'], '__', fixed = TRUE), `[`, 2))
attributes(LD)$chr <- chr
attributes(LD)$pop <- pop
return(LD)
}
# example of importing data into ALDdata (could be coded better, but this will do)
if(FALSE)
{
library(ALDdata)
library(magrittr)
data(hapmap)
for(i in 1:23)
{
cat("Processing chromosome", i, '\n')
phased_raw <- get.phased(i, 'ASW')
phased <- matrix(nrow = dim(phased_raw)[2] - 2, ncol = dim(phased_raw)[1],
dimnames = list(names(phased_raw)[-(1:2)], phased_raw$rsID))
tmp <- subset(hapmap, rs %in% phased_raw$rsID)
vart <- tmp$var
names(vart) <- tmp$rs
for(j in 1:dim(phased_raw)[1])
{
phased[,phased_raw$rsID[j]] <- (phased_raw[j,-(1:2)] == vart[phased_raw$rsID[j]]) %>%
as.numeric()
}
save(phased, file = paste0('../Data/ASW/bin/asw', i, '.RData'))
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.