Nothing
######################
## Filter VCF files ##
######################
filterVars <- function (args, filter, varcaller, organism) {
if (!class(args) %in% c("SYSargs", "SYSargs2"))
stop("Argument 'args' needs to be of class SYSargs or SYSargs2")
if (all(!c("gatk", "bcftools") %in% varcaller))
stop("Argument 'varcaller' needs to be assigned 'gatk' or 'bcftools'")
if (length(filter) != 1 | !is.character(filter))
stop("Argument 'filter' needs to character vector of length 1")
if (class(args) == "SYSargs"){
outfile <- outfile1(args)
}
if (class(args) == "SYSargs2"){
outfile <- gsub("\\.bgz", "", subsetWF(args, slot='output', subset = 1, index=1))
}
for (i in seq(along = args)) {
vcf <- readVcf(infile1(args)[i], organism)
vr <- as(vcf, "VRanges")
if (varcaller == "gatk") {
vrfilt <- vr[eval(parse(text = filter)), ]
}
if (varcaller == "bcftools") {
vrsambcf <- vr
vr <- unlist(values(vr)$DP4)
vr <- matrix(vr, ncol = 4, byrow = TRUE)
totalDepth(vrsambcf) <- as.integer(values(vrsambcf)$DP)
refDepth(vrsambcf) <- rowSums(vr[, 1:2])
altDepth(vrsambcf) <- rowSums(vr[, 3:4])
vrfilt <- vrsambcf[eval(parse(text = filter)), ]
}
vcffilt <- asVCF(vrfilt)
writeVcf(vcffilt, outfile[i], index = TRUE)
print(paste("Generated file", i, gsub(".*/", "", outfile[i])))
}
}
## Usage for GATK:
# filter <- "totalDepth(vr) >= 20 & (altDepth(vr) / totalDepth(vr) >= 0.8) & rowSums(softFilterMatrix(vr))==6"
# filterVars(args, filter, varcaller="gatk", organism="Pinfest")
## Usage for BCFtools:
# filter <- "rowSums(vr) >= 20 & (rowSums(vr[,3:4])/rowSums(vr[,1:4]) >= 0.8)"
# filterVars(args, filter, varcaller="bcftools", organism="Pinfest")
#################
## VAR Reports ##
#################
## Report for locatVariants() where data for each variant is collapsed to a single line.
.allAnnot <- function(x, vcf) {
rd <- rowRanges(vcf)
## Make variant calls in rd unique by collapsing duplicated ones
VARID <- VARID <- unique(names(rd))
REF <- tapply(as.character(values(rd)$REF), factor(names(rd)), function(i) paste(unique(i), collapse=" "))
ALT <- tapply(as.character(unlist(values(rd)$ALT)), factor(names(rd)), function(i) paste(unique(i), collapse=" "))
QUAL <- tapply(values(rd)$QUAL, factor(names(rd)), function(i) paste(unique(i), collapse=" "))
## fix names field in x if incomplete
if(any(names(x)=="")) {
index <- unique(names(rd)); names(index) <- gsub("_.*", "", index)
names(x) <- index[paste(as.character(seqnames(x)), ":", start(x), sep="")]
}
## Make annotated variant calls in x unique by collapsing duplicated ones
LOCATION <- tapply(as.character(values(x)$LOCATION), as.factor(names(x)), function(i) paste(i, collapse=" "))
GENEID <- tapply(values(x)$GENEID, factor(names(x)), function(i) paste(unique(i), collapse=" "))
## Assemble results in data.frame
df <- data.frame(VARID=VARID,
REF=REF[VARID],
ALT=ALT[VARID],
QUAL=QUAL[VARID],
LOCATION=LOCATION[VARID],
GENEID=GENEID[VARID])
df[,"LOCATION"] <- gsub("NA", "", df$LOCATION)
df[,"GENEID"] <- gsub("NA", "", df$GENEID)
return(df)
}
## Usage:
# allvar <- locateVariants(rd, txdb, AllVariants())
# varreport <- .allAnnot(allvar, vcf)
## Report for predictCoding() where data for each variant if collapsed to one line.
.codingReport <- function(x, txdb) {
txids <- values(transcripts(txdb))$tx_name; names(txids) <- values(transcripts(txdb))$tx_id
#myf <- as.factor(names(values(x)$CDSLOC))
myf <- as.factor(names(x))
if(length(myf)>0) {
df <- data.frame(VARID=tapply(as.character(myf), myf, unique),
Strand=tapply(as.character(strand(x)), myf, unique),
Consequence=tapply(as.character(values(x)$CONSEQUENCE), myf, function(i) paste(unique(i), collapse=" ")),
Codon=tapply(paste(start(values(x)$CDSLOC), "_", as.character(values(x)$REFCODON), "/", as.character(values(x)$VARCODON), sep=""), myf, paste, collapse=" "),
AA=tapply(paste(sapply(values(x)$PROTEINLOC, paste, collapse="_"), "_", as.character(values(x)$REFAA), "/", as.character(values(x)$VARAA), sep=""), myf, paste, collapse=" "),
TXIDs=tapply(txids[values(x)$TXID], myf, paste, collapse=" "),
GENEIDcode=tapply(values(x)$GENEID, myf, function(i) paste(unique(i), collapse=" ")))
} else {
df <- data.frame(VARID=NA, Strand=NA, Consequence=NA, Codon=NA, AA=NA, TXIDs=NA, GENEIDcode=NA)[FALSE,,drop=FALSE]
}
return(df)
}
## Usage:
# codereport <- predictCoding(vcf, txdb, seqSource=fa)
# codereport <- .codingReport(coderport, txdb)
####################
## Variant Report ##
####################
variantReport <- function (args, txdb, fa, organism) {
if (!class(args) %in% c("SYSargs", "SYSargs2"))
stop("Argument 'args' needs to be of class SYSargs or SYSargs2")
if (class(args) == "SYSargs"){
outfile <- outfile1(args)
}
if (class(args) == "SYSargs2"){
outfile <- subsetWF(args, slot='output', subset = 1, index=1)
}
for (i in seq(along = args)) {
vcf <- readVcf(infile1(args)[i], organism)
allvar <- locateVariants(vcf, txdb, AllVariants())
varreport <- .allAnnot(allvar, vcf)
coding <- predictCoding(vcf, txdb, seqSource = fa)
codereport <- .codingReport(coding, txdb)
vr <- as(vcf, "VRanges")
varid <- paste(as.character(seqnames(vr)), ":", start(vr),
"_", ref(vr), "/", alt(vr), sep = "")
vrdf <- data.frame(row.names = varid, as.data.frame(vr))
vrdf <- vrdf[, c("totalDepth", "refDepth", "altDepth")]
fullreport <- cbind(varreport, codereport[rownames(varreport),
-1])
fullreport <- cbind(VARID = as.character(fullreport[, 1]),
vrdf[as.character(rownames(fullreport)), ],
fullreport[, -1])
fullreport <- data.frame(lapply(fullreport, as.character),
stringsAsFactors = FALSE)
write.table(fullreport, file = outfile[i], row.names = FALSE,
quote = FALSE, sep = "\t", na = "")
print(paste("Generated file", i, gsub(".*/", "", outfile[i])))
}
}
## Usage:
# variantReport(args=args, txdb=txdb, fa=fa, organism="Pinfest")
#############################
## Combine Variant Reports ##
#############################
combineVarReports <- function(args, filtercol, ncol=15) {
if (!class(args) %in% c("SYSargs", "SYSargs2"))
stop("Argument 'args' needs to be of class SYSargs or SYSargs2")
if (class(args) == "SYSargs"){
outpaths <- outpaths(args)
}
if (class(args) == "SYSargs2"){
outpaths <- subsetWF(args, slot='output', subset = 1, index=1)
}
samples <- names(outpaths)
for(i in seq(along=samples)) {
if(i==1) {
varDF <- read.delim(outpaths[i], colClasses=rep("character", ncol))
varDF <- cbind(Sample=samples[i], varDF)
if(filtercol[1]!="All") varDF <- varDF[varDF[,names(filtercol)]==filtercol,]
} else {
tmpDF <- read.delim(outpaths[i])
tmpDF <- read.delim(outpaths[i], colClasses=rep("character", ncol))
tmpDF <- cbind(Sample=samples[i], tmpDF)
if(filtercol[1]!="All") tmpDF <- tmpDF[tmpDF[,names(filtercol)]==filtercol,]
varDF <- rbind(as.data.frame(as.matrix(varDF)), as.data.frame(as.matrix(tmpDF)))
}
varDF <- varDF[order(varDF$VARID),]
}
return(varDF)
}
## Usage:
# args <- systemArgs(sysma="annotate_vars.param", mytargets="targets_gatk_filtered.txt")
# combineDF <- combineVarReports(args, filtercol=c(Consequence="nonsynonymous"))
###########################################
## Create summary statistics of variants ##
###########################################
varSummary <- function(args) {
if (!class(args) %in% c("SYSargs", "SYSargs2"))
stop("Argument 'args' needs to be of class SYSargs or SYSargs2")
if (class(args) == "SYSargs"){
outpaths <- outpaths(args)
}
if (class(args) == "SYSargs2"){
outpaths <- subsetWF(args, slot='output', subset = 1, index=1)
}
for(i in seq(along=args)) {
annotDF <- read.delim(outpaths[i])
count <- c(all=length(annotDF[,1]),
table(unlist(strsplit(as.character(annotDF$LOCATION), " "))),
table(unlist(strsplit(as.character(annotDF$Consequence), " "))))
if(i==1) {
countDF <- data.frame(count)
} else {
countDF <- cbind(countDF, count[rownames(countDF)])
}
}
countDF[is.na(countDF)] <- 0
colnames(countDF) <- names(outpaths)
return(countDF)
}
## Usage:
# args <- systemArgs(sysma="annotate_vars.param", mytargets="targets_gatk_filtered.txt")
# varSummaryDF <- varSummary(args)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.