# this function can be used for read count file for both versions
readSnpMatrixMSK <- function(filename, skip=0L, type=c("vpl", "ns1", "ns2")) {
# data file type
type <- match.arg(type)
# could have been generated by VES snp-pileup.pl code (vpl)
if (type == "vpl") {
rcmat <- scan(filename, what=list(Chrom="", Pos=0, NOR.DP=0, NOR.RD=0, TUM.DP=0, TUM.RD=0), skip=skip)
rcmat <- as.data.frame(rcmat, stringsAsFactors=FALSE)
# or the two counters developed by Nick Socci
} else {
if (type == "ns2") {
# cols: Chrom, Pos, TUM.DP, TUM.RD, TUM.AD, NOR.DP, NOR.RD, NOR.AD
colClasses <- c("numeric", "character")[c(2,1,1,1,1,1,1,1)]
} else {
# columns: Chrom, Pos, Ref, Alt, TUM.DP, TUM.Ap, TUM.Cp, TUM.Gp,
# TUM.Tp, TUM.An, TUM.Cn, TUM.Gn, TUM.Tn, NOR.DP, NOR.Ap,
# NOR.Cp, NOR.Gp, NOR.Tp, NOR.An, NOR.Cn, NOR.Gn, NOR.Tn
colClasses <- c("numeric", "character")[c(2,1,2,2,rep(1,18))]
}
# read in the file using fread from data.table
xx <- data.table::fread(paste("gunzip -c", filename), header=T, sep="\t", skip=skip, colClasses=colClasses)
# if using counter version v1 compute ref counts NOR.RD and TUM.RD
if (type == "ns1") {
ii <- 1:nrow(xx)
RefID <- match(xx$Ref, c("A","C","G","T"))
xx$NOR.RD <- (as.matrix(xx[,.(NOR.Ap,NOR.Cp,NOR.Gp,NOR.Tp)])[cbind(ii,RefID)] + as.matrix(xx[,.(NOR.An,NOR.Cn,NOR.Gn,NOR.Tn)])[cbind(ii,RefID)])
xx$TUM.RD <- (as.matrix(xx[,.(TUM.Ap,TUM.Cp,TUM.Gp,TUM.Tp)])[cbind(ii,RefID)] + as.matrix(xx[,.(TUM.An,TUM.Cn,TUM.Gn,TUM.Tn)])[cbind(ii,RefID)])
}
# create rcmat
rcmat <- xx[,.(Chrom, Pos, NOR.DP, NOR.RD, TUM.DP, TUM.RD)]
# remove chr if present in Chrom
if (rcmat$Chrom[1] == "chr1") rcmat$Chrom <- gsub("chr","",rcmat$Chrom)
}
names(rcmat)[1:2] <- c("Chromosome", "Position")
as.data.frame(rcmat, stringsAsFactors=FALSE)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.