inst/extRfns/readSnpMatrixMSK.R

# this function can be used for read count file for both versions 
readSnpMatrixMSK <- function(filename, skip=0L, type=c("vpl", "ns1", "ns2")) {
    # data file type
    type <- match.arg(type)
    # could have been generated by VES snp-pileup.pl code (vpl)
    if (type == "vpl") {
        rcmat <- scan(filename, what=list(Chrom="", Pos=0, NOR.DP=0, NOR.RD=0, TUM.DP=0, TUM.RD=0), skip=skip)
        rcmat <- as.data.frame(rcmat, stringsAsFactors=FALSE)
        # or the two counters developed by Nick Socci
    } else {
        if (type == "ns2") {
            # cols: Chrom, Pos, TUM.DP, TUM.RD, TUM.AD, NOR.DP, NOR.RD, NOR.AD
            colClasses <- c("numeric", "character")[c(2,1,1,1,1,1,1,1)]
        } else {
            # columns: Chrom, Pos, Ref, Alt, TUM.DP, TUM.Ap, TUM.Cp, TUM.Gp,
            #          TUM.Tp, TUM.An, TUM.Cn, TUM.Gn, TUM.Tn, NOR.DP, NOR.Ap,
            #          NOR.Cp, NOR.Gp, NOR.Tp, NOR.An, NOR.Cn, NOR.Gn, NOR.Tn
            colClasses <- c("numeric", "character")[c(2,1,2,2,rep(1,18))]
        }
        # read in the file using fread from data.table
        xx <- data.table::fread(paste("gunzip -c", filename), header=T, sep="\t", skip=skip, colClasses=colClasses)
        # if using counter version v1 compute ref counts NOR.RD and TUM.RD
        if (type == "ns1") {
            ii <- 1:nrow(xx)
            RefID <- match(xx$Ref, c("A","C","G","T"))
            xx$NOR.RD <- (as.matrix(xx[,.(NOR.Ap,NOR.Cp,NOR.Gp,NOR.Tp)])[cbind(ii,RefID)] + as.matrix(xx[,.(NOR.An,NOR.Cn,NOR.Gn,NOR.Tn)])[cbind(ii,RefID)])
            xx$TUM.RD <- (as.matrix(xx[,.(TUM.Ap,TUM.Cp,TUM.Gp,TUM.Tp)])[cbind(ii,RefID)] + as.matrix(xx[,.(TUM.An,TUM.Cn,TUM.Gn,TUM.Tn)])[cbind(ii,RefID)])
        }
        # create rcmat
        rcmat <- xx[,.(Chrom, Pos, NOR.DP, NOR.RD, TUM.DP, TUM.RD)]
        # remove chr if present in Chrom
        if (rcmat$Chrom[1] == "chr1") rcmat$Chrom <- gsub("chr","",rcmat$Chrom)
    }
    names(rcmat)[1:2] <- c("Chromosome", "Position")
    as.data.frame(rcmat, stringsAsFactors=FALSE)
}
rptashkin/facets2n documentation built on May 11, 2022, 1:34 p.m.