In syounkin/Trioconductor: Trioconductor

opts_chunk$set(fig.width=7, fig.height=10, comment = "", warning = FALSE, tidy = TRUE, highlight = TRUE, fig.path = 'figures/', dev = 'png', background = 'red', width = 100)

library("trioClasses")
library("GWASTools")
library("CleftCNVAssoc")

data("beaty.scanAnnot")
data("beaty.snpAnnot")
data("fe")
intensfile <- NcdfIntensityReader("/media/sgy/lockdown/GWASTools/beaty.intensity.nc")
genofile <- NcdfGenotypeReader("/media/sgy/lockdown/GWASTools/beaty.geno.nc")
xyfile <- NcdfIntensityReader("/media/sgy/lockdown/GWASTools/beaty.intensity.xy.nc")
scan.ids <- as.character( getScanID( beaty_scanAnnot ) )
scan.names <- as.character( getVariable( beaty_scanAnnot, "scanName" ) )
names(scan.names) <- scan.ids
names(scan.ids) <- as.character(scan.names)

fa.id <- as.character(completeTrios(fe.beaty)$fid)
ma.id <- as.character(completeTrios(fe.beaty)$mid)
names(fa.id) <- as.character(completeTrios(fe.beaty)$id)
names(ma.id) <- as.character(completeTrios(fe.beaty)$id)

First we create a vector of offspring IDs that we want plotted.

offspring.vec <- as.character(completeTrios(fe.beaty)$id)

Now we incorporate it into a GRange object with the ranges in this case being the chr6 region for each of the offspring.

gr <- GRanges( seqnames = rep('chr6',length(offspring.vec)), ranges = IRanges( start = 32611466, end =  32643872), id = offspring.vec )

Now we apply a function from CleftCNVAssoc to retrieve data from GWASTools objects. These data exist only on an encrypted hard drive and enigma.

raw.df.list <- getRaw( gr+1e6, intensfile = intensfile, snpAnnot = beaty_snpAnnot, scan.id = scan.ids, fa.id = fa.id, ma.id = ma.id, genofile = genofile, xyfile = xyfile )

Here are the data for the first offspring's first 5 markers.

head(raw.df.list[[1]],5)

pos.vec <- raw.df.list[[1]]$pos
logr.list <- lapply(raw.df.list, function(df) df$logr )
logr.fa.list <- lapply(raw.df.list, function(df) df$logr.fa )
logr.ma.list <- lapply(raw.df.list, function(df) df$logr.ma )
baf.list <- lapply(raw.df.list, function(df) df$baf )
baf.fa.list <- lapply(raw.df.list, function(df) df$baf.fa )
baf.ma.list <- lapply(raw.df.list, function(df) df$baf.ma )

Plot the logR values for everyone stratified by F,M,O. Purple is offspring, red is father, and blue is mother.

par(bg="lightyellow")
plot(1, type = 'n', xlim = range(pos.vec), ylim = c(-2,2) )
for( i in 1:length(logr.list) ){
  points(jitter(pos.vec,amount = 1e3), logr.list[[i]], pch = 20, col = "purple")
  }
plot(1, type = 'n', xlim = range(pos.vec), ylim = c(-2,2) )
for( i in 1:length(logr.fa.list) ){
  points(jitter(pos.vec,amount = 1e3), logr.fa.list[[i]], pch = 20, col = "red")
  }
plot(1, type = 'n', xlim = range(pos.vec), ylim = c(-2,2) )
for( i in 1:length(logr.ma.list) ){
  points(jitter(pos.vec,amount = 1e3), logr.ma.list[[i]], pch = 20, col = "blue")
  }

Not very informative so we turn to individual trios with an untransmitted deletion. First, we need to find a vector offspring IDs with an untransmitted deletion. This is a property of the CNVMatrix within the FamilyExperiment object and can be manipulated with the non-exported method TrioAssay. To begin we first subset the CNVMatrix on the chr6 region.

chr6.gr <- GRanges( seqnames = 'chr6', ranges = IRanges( start = 32611466, end =  32643872))

(fe.beaty.chr6 <- fe.beaty[queryHits(findOverlaps(rowData(fe.beaty),chr6.gr))])

Now with the smaller FE object we can easily construct the trio-states.

trioAssay.chr6 <- trioClasses:::TrioAssay(fe.beaty.chr6, type = "cnv")
trioStates.chr6 <- with(trioAssay.chr6, matrix( paste0(F,M,O), nrow = nrow(O), ncol = ncol(O)))
dimnames(trioStates.chr6) <- dimnames(trioAssay.chr6$O)
head(trioStates.chr6[,1:5],10)

Now we identify trio-cnv pairs with an untransmitted deletion, i.e., trio-states 100, 010, or 110. (This is not a complete list of trio-states with a non-transmission.)

untrans.mat <- matrix(trioStates.chr6 %in% c("100","010","110"),nrow=nrow(trioStates.chr6),ncol=ncol(trioStates.chr6),byrow=FALSE,dimnames=dimnames(trioStates.chr6))
head(untrans.mat[,1:10],10)

And finally we find the IDs of those with more than zero untransmitted deletions.

offspring.chr6 <- rownames(untrans.mat)[which(rowSums(untrans.mat)>0)]
length(offspring.chr6)
head(offspring.chr6)

Now we have what we need to dig deeper into the cause of the under-transmission.

par(bg="lightyellow")
for( i in which(names(logr.list) %in% offspring.chr6) ){
  plot(1, type = 'n', xlim = range(pos.vec), ylim = c(-1,11), axes = FALSE, main = paste0(names(logr.list)[i]), xlab = "MB", ylab = "")

  axis(1, at = at <- seq(min(pos.vec),max(pos.vec),length.out=5), labels = round(at/1e6,2) )

  lines( range(pos.vec), rep(0,2), lty = 2 )
  lines( range(pos.vec), rep(3,2), lty = 2 )
  lines( range(pos.vec), rep(6.5,2), lty = 2 )
  lines( range(pos.vec), rep(9.5,2), lty = 2 )

  points(pos.vec, logr.list[[i]], pch = 20, col = "purple")
  points(pos.vec, 3+logr.fa.list[[i]], pch = 20, col = "red")
  points(pos.vec, 3+logr.ma.list[[i]], pch = 20, col = "blue")
  points(pos.vec, 6 + baf.list[[i]], pch = 20, col = "purple")
  points(pos.vec, 9 + baf.fa.list[[i]], pch = 20, col = "red")
  points(pos.vec, 9 + baf.ma.list[[i]], pch = 20, col = "blue")

  foo <- reduce(rowData(fe.beaty.chr6)[untrans.mat[names(logr.list)[i],]])
  for( j in 1:length(foo) ) lines(c(start(foo[j]),end(foo[j])),rep(1.5,2), lwd = 5, col = "red")
  }

Now for the transmitted deletions in the chr6 region.

trans.mat <- matrix(trioStates.chr6 %in% c("101","011","112"),nrow=nrow(trioStates.chr6),ncol=ncol(trioStates.chr6),byrow=FALSE,dimnames=dimnames(trioStates.chr6))

offspring.chr6 <- rownames(trans.mat)[which(rowSums(trans.mat)>0)]
length(offspring.chr6)
head(offspring.chr6)

par(bg="lightyellow")
for( i in which(names(logr.list) %in% offspring.chr6) ){
  plot(1, type = 'n', xlim = range(pos.vec), ylim = c(-1,11), axes = FALSE, main = paste0(names(logr.list)[i]), xlab = "MB", ylab = "")

  axis(1, at = at <- seq(min(pos.vec),max(pos.vec),length.out=5), labels = round(at/1e6,2) )

  lines( range(pos.vec), rep(0,2), lty = 2 )
  lines( range(pos.vec), rep(3,2), lty = 2 )
  lines( range(pos.vec), rep(6.5,2), lty = 2 )
  lines( range(pos.vec), rep(9.5,2), lty = 2 )

  points(pos.vec, logr.list[[i]], pch = 20, col = "purple")
  points(pos.vec, 3+logr.fa.list[[i]], pch = 20, col = "red")
  points(pos.vec, 3+logr.ma.list[[i]], pch = 20, col = "blue")
  points(pos.vec, 6 + baf.list[[i]], pch = 20, col = "purple")
  points(pos.vec, 9 + baf.fa.list[[i]], pch = 20, col = "red")
  points(pos.vec, 9 + baf.ma.list[[i]], pch = 20, col = "blue")

  foo <- reduce(rowData(fe.beaty.chr6)[trans.mat[names(logr.list)[i],]])
  for( j in 1:length(foo) ) lines(c(start(foo[j]),end(foo[j])),rep(1.5,2), lwd = 5, col = "purple")
  }