R/FindGenes.R
In DECIPHER: Tools for curating, analyzing, and manipulating biological sequences

Documented in FindGenes

.chainGenes <- function(allstarts,
	totScore,
	codScore,
	sameScores,
	oppoScores,
	maxOverlapSame,
	maxOverlapOpposite,
	minScore,
	includeLength) {
	
	maxFracOverlap <- 0.2 # max overlap with adjacent genes
	maxFracOverlapInclude <- 0.05 # max overlap for forcing inclusion
	ORFanCutoff <- 300L # length cutoff for ORFans
	ORFanDensity <- 0.2 # allowed density of ORFans
	ORFanScore <- seq(minScore[2],
		minScore[1],
		length.out=ORFanDensity*100) # score cutoffs for ORFans
	ORFanDist <- 10L # minimum distance from nearest non-ORFan
	minScore <- minScore[1]
	
	alllengths <- allstarts[, 4L] - allstarts[, 3L] + 1L
	w <- which(totScore >= minScore)
	topstarts <- allstarts[w,, drop=FALSE]
	o <- order(topstarts[, 1L], topstarts[, 3L])
	topstarts <- topstarts[o,]
	toplengths <- alllengths[w[o]]
	topScore <- totScore[w[o]]
	
	maxL <- (length(sameScores) - 1L)/2L
	if (maxL==maxOverlapSame &&
		maxL==maxOverlapOpposite) {
		scoreIntergenic <- TRUE
	} else {
		scoreIntergenic <- FALSE
	}
	
	res <- .Call("chainGenes",
		topstarts,
		topScore,
		toplengths,
		scoreIntergenic,
		maxOverlapSame,
		maxOverlapOpposite,
		maxFracOverlap,
		sameScores,
		oppoScores,
		PACKAGE="DECIPHER")
	
	pointers <- res[[1]]
	cumscore <- res[[2]]
	
	pointer <- which.max(cumscore)
	indices <- integer(pointer)
	position <- 1L
	indices[position] <- pointer
	repeat {
		if (pointer==pointers[pointer]) {
			break
		} else {
			pointer <- pointers[pointer]
			position <- position + 1L
			indices[position] <- pointer
		}
	}
	length(indices) <- position
	indices <- rev(indices)
	
	# eliminate ORFans (small genes in low density regions)
	remove <- logical(length(indices))
	currIndex <- 0L
	i <- 1L
	while (i < length(indices)) {
		if (currIndex != topstarts[indices[i], 1L]) {
			currIndex <- topstarts[indices[i], 1L]
			last <- 0L
		}
		if (toplengths[indices[i]] < ORFanCutoff &&
			topstarts[indices[i], 3L] - last >= ORFanDist) {
			j <- i + 1L
			while (j < length(indices) &&
				topstarts[indices[j], 1L]==topstarts[indices[i], 1L]) {
				if (toplengths[indices[j]] < ORFanCutoff) {
					j <- j + 1L
				} else {
					break
				}
			}
			if (topstarts[indices[j], 1L]==topstarts[indices[i], 1L]) {
				if (topstarts[indices[j], 3L] - topstarts[indices[j - 1L], 4L] < ORFanDist)
					j <- j - 1L
				if (j > i) {
					r <- i:(j - 1L)
					density <- sum(toplengths[indices[r]])/(topstarts[indices[j], 3L] - last)
					if (density < ORFanDensity)
						remove[r] <- topScore[indices[r]] < ORFanScore[ceiling(density*100)]
				}
			}
			i <- j
		}
		last <- topstarts[indices[i], 4L]
		i <- i + 1L
	}
	remove <- which(remove)
	if (length(remove) > 0)
		indices <- indices[-remove]
	
	indices <- w[o[indices]]
	
	missing <- which(alllengths >= includeLength &
		totScore < minScore)
	currIndex <- 0L
	while (length(missing) > 0) {
		if (currIndex != allstarts[missing[1L], 1L]) {
			currIndex <- allstarts[missing[1L], 1L]
			W <- which(allstarts[, 1L]==allstarts[missing[1L], 1L])
		}
		if (allstarts[missing[1], 2L]==1L) { # reverse strand
			w <- W[allstarts[W, 3L]==allstarts[missing[1L], 3L]]
		} else { # forward strand
			w <- W[allstarts[W, 4L]==allstarts[missing[1L], 4L]]
		}
		if (all(totScore[w] < minScore)) { # could not be included already
			i <- w[which.max(totScore[w] - codScore[w])]
			
			overlap <- which(allstarts[indices, 3L] <= allstarts[i, 3L] &
				allstarts[indices, 4L] >= allstarts[i, 4L] &
				allstarts[indices, 1L] == allstarts[i, 1L])
			if (length(overlap)==0) { # check beginning overlap
				overlap <- which(allstarts[indices, 3L] > allstarts[i, 3L] &
					allstarts[indices, 3L] < allstarts[i, 4L] &
					allstarts[indices, 1L] == allstarts[i, 1L])
				if (length(overlap) > 0) {
					delta <- allstarts[i, 4L] - allstarts[indices[overlap], 3L] + 1
					overlap <- which(delta > alllengths[i]*maxFracOverlapInclude |
						delta > alllengths[indices[overlap]]*maxFracOverlapInclude)
				}
			}
			
			if (length(overlap)==0) { # check ending overlap
				overlap <- which(allstarts[indices, 4L] > allstarts[i, 3L] &
					allstarts[indices, 4L] < allstarts[i, 4L] &
					allstarts[indices, 1L] == allstarts[i, 1L])
				if (length(overlap) > 0) {
					delta <- allstarts[indices[overlap], 4L] - allstarts[i, 3L] + 1
					overlap <- which(delta > alllengths[i]*maxFracOverlapInclude |
						delta > alllengths[indices[overlap]]*maxFracOverlapInclude)
				}
			}
			
			if (length(overlap)==0)
				indices <- c(indices, i)
		}
		missing <- missing[!(missing %in% w)]
	}
	
	return(sort(indices))
}

.extractRBS <- function(dna,
	deltaGrulesRNA,
	upstreamWidth,
	pattern=DNAString("AAGGAGG"),
	precision=2,
	spacing=3) {
	
	mapping <- diag(4) # don't penalize mismatches
	dimnames(mapping) <- list(DNA_BASES,
			DNA_BASES)
	mapping["G", "G"] <- 4
	mapping["C", "C"] <- 4
	
	w <- which(width(dna)==upstreamWidth)
	p <- pairwiseAlignment(dna[w],
		pattern,
		type="local-global",
		substitutionMatrix=mapping,
		gapOpening=10,
		gapExtension=4)
	pattern <- as.character(pattern(p))
	subject <- as.character(subject(p))
	
	dG <- .Call("calculateDeltaG",
		pattern,
		subject,
		deltaGrulesRNA,
		PACKAGE="DECIPHER")
	dG <- round(dG/precision)*precision
	
	pos <- (start(pattern(p)) + end(pattern(p)))/2
	pos <- round(pos/spacing)*spacing
	
	motif <- rep(NA_character_,
		length(dna))
	motif[w] <- paste(dG, pos)
	
	return(motif)
}

.performFolding <- function(foldLeft,
	foldRight,
	deltaGrulesRNA) {
	
	mapping <- diag(4)
	mapping[mapping==0] <- -4
	dimnames(mapping) <- list(DNA_BASES,
			DNA_BASES)
	mapping["G", "G"] <- 6
	mapping["C", "C"] <- 6
	
	p <- pairwiseAlignment(foldLeft,
		foldRight,
		type="overlap",
		substitutionMatrix=mapping,
		gapOpening=5,
		gapExtension=1)
	pattern <- as.character(pattern(p))
	subject <- as.character(subject(p))
	
	dG <- .Call("calculateDeltaG",
		pattern,
		subject,
		deltaGrulesRNA,
		PACKAGE="DECIPHER")
	
	return(dG)
}

.logisticRegression <- function(response,
	predictor,
	indices,
	remove,
	penalty,
	interactions=FALSE,
	bins=25) {
	
	other <- seq_len(length(response))[-c(indices, remove)]
	w <- c(indices,
		sample(other,
			min(length(other),
				1e4)))
	
	# remove any remaining NA values from indices
	sub_data <- cbind(response=response[w],
		predictor[w,, drop=FALSE])
	sub_data <- na.omit(sub_data)
	
	# fit a logistic regression model
	if (interactions) { # include pairwise interactions
		suppressWarnings(model <- glm(response ~ .^2,
			binomial,
			data=sub_data,
			control=glm.control(1e-6)))
	} else {
		suppressWarnings(model <- glm(response ~ .,
			binomial,
			data=sub_data,
			control=glm.control(1e-6)))
	}
	
	# perform stepwise feature selection
	suppressWarnings(model <- step(model,
		k=penalty,
		trace=0,
		direction="forward"))
	
	# convert probabilies into scores
	suppressWarnings(p <- predict(model,
		predictor,
		"response"))
	p <- .bincode(p,
		c(0,
			quantile(p[indices],
				seq(1/bins,
					1 - 1/bins,
					1/bins),
				na.rm=TRUE),
			1))
	fg <- tabulate(p[indices], bins)
	fg <- ifelse(fg==0, 1, fg)
	fg <- fg/sum(fg)
	bg <- tabulate(p[-indices], bins)
	bg <- ifelse(bg==0, 1, bg)
	bg <- bg/sum(bg)
	score <- log(fg/bg)
	score[p]
}

FindGenes <- function(myDNAStringSet,
	geneticCode=getGeneticCode("11"),
	minGeneLength=60,
	allowEdges=TRUE,
	allScores=FALSE,
	showPlot=FALSE,
	verbose=TRUE) {
	
	# set default parameters
	threshold <- c(0.995, 0.999) # lower/upper cutoff quantiles for background scoring
	noiseCutoff <- 150L # cutoff length for background scoring
	backgroundLength <- 200L # background for lenScr
	initialCodons <- 8L # initial codon frequencies
	terminationCodons <- 6L # termination codon frequencies
	maxL <- 1000L # maximum gene overlap
	maxOverlapSame <- 60L # region for spline fitting overlap
	maxOverlapOpposite <- 200L # region for spline fitting overlap
	upstreamWidth <- 20L # upstream nucleotides for motif search
	upstreamMotifs <- 30L # upstream of upstreamWidth
	upstreamKmerSize <- 4L # must be <= 8
	RBSk <- 6L # length of possible RBS motifs
	foldingWidth <- 35L # nucleotides either side of start
	offset <- c(-20L, 0L, 20L, 40L) # folding sites
	temp <- 37 # degrees Celsius
	ions <- 1 # Na+ equivalents
	penalty <- 7.879 # qchisq(0.005, 1, lower.tail=FALSE)
	length_params <- c(-4, 1400, 0.25, -10, 200) # initial parameters for sigmoid fitting the distribution of gene lengths
	includeMultiplier <- 1.5 # multiplier on x_intercept to seek inclusion despite negative score
	startCodonDist <- c("ATG"=80, "GTG"=14, "TTG"=5.995, "CTG"=0.002, "ATA"=0.001, "ATT"=0.001, "ATC"=0.001) # prior distribution of initial codon frequencies
	signals <- matrix(c(3, 8, 9, 1, 12, 10, 4, 0, 2, 6, 7, 11),
		nrow=3)
	codonFreqCutoff <- 0.3 # average distance of codon frequencies within models
	foldBetter <- 4 # fold-increase required to choose an alternate codon model
	lenScrMultiplier <- 2 # multiplier on positive length scores
	staScrMultiplier <- 2 # multiplier on start scores
	intergenicCount <- 50 # number of intergenic observations to use a precise score
	maxIterations <- 9L # maximum number of iterations (must be > 1)
	
	# error checking
	if (!is(myDNAStringSet, "DNAStringSet"))
		stop("myDNAStringSet must be a DNAStringSet.")
	if (sum(width(myDNAStringSet))==0)
		stop("myDNAStringSet must contain sequences.")
	a <- vcountPattern("-", myDNAStringSet)
	if (any(a > 0))
		stop("Gap characters ('-') must be removed from myDNAStringSet.")
	a <- vcountPattern(".", myDNAStringSet)
	if (any(a > 0))
		stop("Unknown characters ('.') must be removed from myDNAStringSet.")
	codons <- c('AAA', 'AAC', 'AAG', 'AAT',
		'ACA', 'ACC', 'ACG', 'ACT',
		'AGA', 'AGC', 'AGG', 'AGT',
		'ATA', 'ATC', 'ATG', 'ATT',
		'CAA', 'CAC', 'CAG', 'CAT',
		'CCA', 'CCC', 'CCG', 'CCT',
		'CGA', 'CGC', 'CGG', 'CGT',
		'CTA', 'CTC', 'CTG', 'CTT',
		'GAA', 'GAC', 'GAG', 'GAT',
		'GCA', 'GCC', 'GCG', 'GCT',
		'GGA', 'GGC', 'GGG', 'GGT',
		'GTA', 'GTC', 'GTG', 'GTT',
		'TAA', 'TAC', 'TAG', 'TAT',
		'TCA', 'TCC', 'TCG', 'TCT',
		'TGA', 'TGC', 'TGG', 'TGT',
		'TTA', 'TTC', 'TTG', 'TTT')
	if (!is.character(geneticCode))
		stop("geneticCode must be a character vector.")
	if (length(geneticCode) != 64)
		stop("geneticCode must contain 64 codons.")
	if (is.null(names(geneticCode)))
		stop("geneticCode must be a named character vector.")
	startCodons <- names(geneticCode[which(geneticCode=="M")])
	startCodons <- c(startCodons,
		attr(geneticCode, "alt_init_codons"))
	if (length(startCodons)==0)
		stop("geneticCode must contain at least one start codon.")
	if (length(startCodons) >= 10)
		stop("geneticCode must contain fewer than 10 start codons.")
	startIndex <- match(startCodons, codons) - 1L
	if (any(is.na(startIndex)))
		stop("geneticCode may only contain standard 3-letter start codons.")
	stopIndex <- names(geneticCode[which(geneticCode=="*")])
	stopIndex <- match(stopIndex, codons) - 1L
	if (length(stopIndex)==0)
		stop("No stop codons (*) found in the geneticCode.")
	if (length(stopIndex) >= 10)
		stop("geneticCode must contain fewer than 10 stop codons.")
	if (any(startIndex %in% stopIndex))
		stop("Start codons cannot also be stop codons in the geneticCode.")
	gC <- geneticCode
	geneticCode <- geneticCode[codons]
	if (any(is.na(geneticCode)))
		stop("geneticCode is missing one or more codons.")
	geneticCode <- match(geneticCode, AA_STANDARD) - 1L
	if (!is.numeric(minGeneLength))
		stop("minGeneLength must be a numeric.")
	if (length(minGeneLength) != 1)
		stop("minGeneLength must contain a single integer.")
	if (minGeneLength < (2 + initialCodons)*3)
		stop("minGeneLength must be at least ",
			(2 + max(initialCodons, terminationCodons))*3,
			".")
	if (minGeneLength > 0.9*noiseCutoff)
		stop("minGeneLength can be at most ",
			floor(0.9*noiseCutoff),
			".")
	if (minGeneLength != floor(minGeneLength))
		stop("minGeneLength must be a whole number.")
	minGeneLength <- as.integer(minGeneLength)
	if (!is.logical(allowEdges))
		stop("allowEdges must be a logical.")
	if (!is.logical(allScores))
		stop("allScores must be a logical.")
	if (!is.logical(showPlot))
		stop("showPlot must be a logical.")
	if (!is.logical(verbose))
		stop("verbose must be a logical.")
	
	if (verbose) {
		time.1 <- Sys.time()
		cat("Iter  Models Start Motif  Fold  Init UpsNt  Term   RBS  Auto  Stop Genes\n1          1")
		flush.console()
		iter <- 1L
	}
	
	# find all possible genes
	ORFs <- .Call("getORFs",
		myDNAStringSet,
		startIndex,
		stopIndex,
		minGeneLength,
		allowEdges,
		PACKAGE="DECIPHER")
	colnames(ORFs) <- c("Index", "Strand", "Begin", "End")
	
	if (nrow(ORFs)==0)
		stop("myDNAStringSet does not contain any open reading frames.")
	
	longest <- .Call("longestORFs",
		ORFs,
		PACKAGE="DECIPHER")
	l <- ORFs[, 4] - ORFs[, 3] + 1L
	
	# determine the background intergenic length distribution
	o <- longest[l[longest] >= noiseCutoff]
	o <- o[order(ORFs[o, 1], ORFs[o, 3])]
	intergenic <- ORFs[o[-1], 3] - ORFs[o[-length(o)], 4] + 1L
	sameStrand <- ORFs[o[-1], 2]==ORFs[o[-length(o)], 2]
	sameIndex <- ORFs[o[-1], 1]==ORFs[o[-length(o)], 1]
	r <- -maxL:maxL # symmetric length range of scores
	bg_same <- tabulate(intergenic[sameStrand & sameIndex] + maxL + 1L, length(r))
	first <- maxL - maxOverlapSame
	bg_same[seq_len(first)] <- 0L
	bg_same <- bg_same/sum(bg_same)
	bg_oppo <- tabulate(intergenic[!sameStrand & sameIndex] + maxL + 1L, length(r))
	bg_oppo[seq_len(first)] <- 0L
	bg_oppo <- bg_oppo/sum(bg_oppo)
	same_scores <- oppo_scores <- rep(0, length(r))
	
	# fit the background ORF length distribution
	t <- tabulate(l[longest])
	x <- which(t[seq_len(backgroundLength)] > 0)
	weight <- sqrt(t[x])
	logt <- log10(t)
	# fit logt = slope*x + y_intercept
	mx <- mean(x)
	mt <- mean(logt[x])
	slope <- sum(weight*(x - mx)*(logt[x] - mt))/sum(weight*(x - mx)^2)
	y_intercept <- mt - slope*mx
	x_intercept <- -y_intercept/slope
	x_intercept <- as.integer(x_intercept)
	
	.PDF2 <- function(x, p)
		-((exp(p[1]*log(x/p[2]))+1)^(-p[3]-1)*(exp(p[4]*log(x/p[5]))+1)^(-p[3]-1)*(((p[4]*p[3]+p[1]*p[3])*exp(p[1]*log(x/p[2]))+p[4]*p[3])*exp(p[4]*log(x/p[5]))+p[1]*p[3]*exp(p[1]*log(x/p[2]))))/x
	.CDF2 <- function(x, p) {
		sig1 <- (1 + exp(p[1]*log(x/p[2])))^-p[3]
		sig2 <- (1 + exp(p[4]*log(x/p[5])))^-p[3]
		sig1*sig2
	}
	.fitSigmoid <- function(x, y, ini) {
		weights <- diff(c(0, log(x)))
		.SSE <- function(p) {
				pdf <- .PDF2(x, p)
				if (any(is.nan(pdf)))
					return(sum(y^2))
				expected <- .CDF2(x, p)
			
			sum(weights*(y - expected)^2)
		}
		o <- suppressWarnings(optim(ini,
			.SSE,
			control=list(maxit=1e4)))
		
		return(o$par)
	}
	
	# fit the distribution of approximate gene lengths
	s <- seq(3, length(t), 3)
	bg <- 10^(slope*seq_along(t) + y_intercept)
	observed <- t[s] - bg[s] # expected number of genes
	observed[seq_len(backgroundLength/3)] <- 0
	observed[observed < 0] <- 0
	observed <- cumsum(observed)
	observed <- observed/observed[length(observed)]
	o <- .fitSigmoid(s,
		observed,
		length_params)
	
	# calculate log-odds model of gene length
	fg <- .PDF2(s, o)
	fg <- fg/sum(fg)
	bg <- bg[s]
	bg <- bg/sum(bg)
	lenScr <- log(fg/bg)
	w <- which(is.infinite(lenScr))
	if (length(w) > 0)
		lenScr[w] <- (lenScr[w[1L] - 1] - lenScr[w[1L] - 2])*seq_along(w) + lenScr[w[1L] - 1]
	lenScr <- lenScr[l/3]
	
	# compute single codon log-odds model
	codon_scores <- .Call("codonModel",
		myDNAStringSet,
		ORFs,
		stopIndex,
		x_intercept,
		PACKAGE="DECIPHER")
	codScr <- .Call("scoreCodonModel",
		myDNAStringSet,
		ORFs,
		codon_scores,
		PACKAGE="DECIPHER")
	orf_scores <- lenScr + codScr
	
	# add preliminary start codon scoring
	
	starts <- .Call("getRegion",
		myDNAStringSet,
		ORFs,
		3L,
		3L,
		PACKAGE="DECIPHER")
	bgStarts <- table(starts)[startCodons]
	bgStarts[is.na(bgStarts)] <- 0L
	bgStarts <- bgStarts/sum(bgStarts)
	fgStarts <- startCodonDist[startCodons]
	fgStarts[is.na(fgStarts)] <- 0.01 # add missing start codons
	fgStarts <- fgStarts/sum(fgStarts)
	start_scores <- log(fgStarts/bgStarts)
	staScr <- unname(start_scores[starts])
	if (allowEdges) # potential non-canonical start
		staScr[is.na(staScr)] <- 0
	orf_scores <- orf_scores + staScr
	
	# chain high-scoring ORFs into putative genes
	cutoff <- quantile(orf_scores[longest[which(l[longest] <= noiseCutoff)]],
		threshold)
	indices <- .chainGenes(ORFs,
		orf_scores,
		codScr,
		sameScores=same_scores,
		oppoScores=oppo_scores,
		maxOverlapSame=maxOverlapSame,
		maxOverlapOpposite=maxOverlapOpposite,
		minScore=cutoff,
		includeLength=includeMultiplier*x_intercept)
	
	if (verbose) {
		deltaSta <- mean(staScr[indices]) - mean(staScr[-indices])
		show <- formatC(deltaSta,
			width=6,
			digits=2,
			format="f")
		cat(show)
		cat("                                                ")
		show <- formatC(length(indices),
			width=6,
			digits=0,
			format="f")
		cat(show)
		flush.console()
	}
	
	# split the upstream region into k-mers
	upstream <- .Call("getRegion",
		myDNAStringSet,
		ORFs,
		upstreamWidth,
		0L,
		PACKAGE="DECIPHER")
	
	data("deltaHrulesRNA", envir=environment(), package="DECIPHER")
	data("deltaSrulesRNA", envir=environment(), package="DECIPHER")
	deltaSrulesRNA <- deltaSrulesRNA + 0.368*log(ions)/1000
	deltaGrulesRNA <- deltaHrulesRNA - (273.15 + temp)*deltaSrulesRNA
	
	# Shine-Delgarno free energy
	upstream <- DNAStringSet(upstream)
	dG_RBS <- .extractRBS(upstream,
		deltaGrulesRNA,
		upstreamWidth)
	
	# tile subsequences 3 bases apart
	foldLeft <- foldRight <- vector("list", length(offset))
	for(i in seq_along(offset)) {
		foldLeft[[i]] <- .Call("getRegion",
			myDNAStringSet,
			ORFs,
			foldingWidth,
			offset[i],
			PACKAGE="DECIPHER")
		
		foldRight[[i]] <- .Call("getRegion",
			myDNAStringSet,
			ORFs,
			-foldingWidth,
			offset[i],
			PACKAGE="DECIPHER")
	}
	foldLeft <- DNAStringSet(unlist(foldLeft))
	foldRight <- DNAStringSet(unlist(foldRight))
	foldRight <- reverseComplement(foldRight)
	
	# mRNA folding free energy
	w <- which(width(foldLeft)==foldingWidth &
		width(foldRight)==foldingWidth &
		l >= offset[length(offset)] + foldingWidth + 3L)
	dG_Fold <- matrix(NA_real_,
		nrow=nrow(ORFs),
		ncol=length(offset))
	dG_Fold[w] <- .performFolding(foldLeft[w],
		foldRight[w],
		deltaGrulesRNA)
	reject <- which(rowSums(is.na(dG_Fold)) > 0)
	dG_Fold <- as.data.frame(dG_Fold)
	names(dG_Fold) <- paste("p",
		offset,
		sep="")
	
	.getGenes <- function(indices,
		multiModel=FALSE,
		startCodonModel=FALSE,
		initialCodons=0L,
		terminationCodons=0L,
		scoreRBS=FALSE,
		scoreAutocorr=FALSE,
		upstreamMotifs=0L,
		scoreFolding=FALSE,
		scoreIntergenicLength=FALSE,
		runLengthModel=FALSE,
		stopCodonModel=FALSE,
		scoreUpstream=FALSE,
		allScores=FALSE,
		showPlot=FALSE) {
		
		genes <- ORFs[indices,, drop=FALSE]
		
		# compute gene length model
		observed <- tabulate(l[indices], s[length(s)])
		observed <- observed[s]
		observed <- cumsum(observed)
		observed <- observed/observed[length(observed)]
		# reset initial condition for quicker convergence
		length_params <<- .fitSigmoid(s,
			observed,
			length_params)
		fg <- .PDF2(s, length_params)
		fg <- fg/sum(fg)
		lenScr <- log(fg/bg)
		w <- which(is.infinite(lenScr))
		if (length(w) > 0)
			lenScr[w] <- (lenScr[w[1L] - 1] - lenScr[w[1L] - 2])*seq_along(w) + lenScr[w[1L] - 1]
		lenScr[lenScr > 0] <- lenScrMultiplier*lenScr[lenScr > 0]
		lenScr <- lenScr[l/3]
		
		if (showPlot) {
			layout(matrix(c(1, 3, 2, 4),
				nrow=2))
			plot(logt,
				xlab='ORF length (nucleotides)',
				ylab=expression('log'[10]*'(Frequency)'),
				xlim=c(minGeneLength, length(logt)),
				pch=46,
				log="x")
			abline(a=y_intercept,
				b=slope,
				h=0,
				v=x_intercept,
				col=c(6, 5, 5),
				lty=c(1, 2, 2),
				untf=TRUE)
			legend("topright",
				c("Background", "Intercepts", "ORF"),
				col=c(6, 5, 1),
				lty=c(1, 2, NA),
				pch=c(NA, NA, 15),
				bg="white")
			
			plot(s,
				100*diff(c(0, observed)),
				xlab="Length (nucleotides)",
				ylab="Frequency (%)",
				xlim=c(minGeneLength,
					max(1e4,
						s[length(s)])),
				pch=46,
				col="darkgreen",
				log="x")
			lines(s,
				100*fg,
				lwd=2,
				col="darkgreen")
			lines(s,
				100*bg,
				col="magenta")
			legend("topright",
				c("Background", "Estimated", "Gene"),
				col=c("magenta", "darkgreen", "darkgreen"),
				lty=c(1, 1, NA),
				lwd=c(1, 2, NA),
				pch=c(NA, NA, 15),
				bg="white")
		}
		
		# compute codon log-odds models
		codFreqs <- .Call("codonFrequencies",
			myDNAStringSet,
			ORFs,
			indices,
			PACKAGE="DECIPHER")
		freqs <- colMeans(codFreqs)
		minSize <- quantile(freqs[freqs > 0], 0.1)
		minSize <- c(90*minSize^-1, # rare codons ~30 times
			0.3*minSize^-2) # rare dicodons ~0.1 times
		if (multiModel) {
			if (nrow(codFreqs) > 10000) # use a subset
				codFreqs <- codFreqs[seq_len(10000),]
			dists <- dist(codFreqs)
			clusts <- IdClusters(dists,
				cutoff=codonFreqCutoff,
				verbose=FALSE)
			clusts <- clusts$cluster
			clusts <- lapply(seq_len(max(clusts)),
				function(x)
					which(clusts==x))
			size <- sapply(clusts,
				function(x)
					sum(l[indices[x]]))
			keep <- size >= minSize[1]
			clusts <- clusts[keep]
			size <- size[keep]
			if (length(clusts) > 1 ||
				length(clusts)==0 ||
				(length(clusts)==1 &&
				length(clusts[[1]]) < length(indices))) {
				clusts <- c(list(seq_along(indices)),
					clusts)
				size <- c(sum(l[indices]),
					size)
			}
		} else {
			clusts <- list(seq_along(indices))
			size <- sum(l[indices])
		}
		for (i in seq_along(clusts)) {
			if (size[i] >= minSize[2]) { # use dicodon model
				codon_scores <- .Call("dicodonModel",
					myDNAStringSet,
					genes[clusts[[i]],, drop=FALSE],
					stopIndex,
					PACKAGE="DECIPHER")
				#dimnames(codon_scores) <- list(codons, codons)
				# codon_scores matrix is [next, prev]
			} else { # use unicodon model
				codon_scores <- .Call("unicodonModel",
					myDNAStringSet,
					genes[clusts[[i]],, drop=FALSE],
					stopIndex,
					PACKAGE="DECIPHER")
			}
			tempScr <- .Call("scoreCodonModel",
				myDNAStringSet,
				ORFs,
				codon_scores,
				PACKAGE="DECIPHER")
			if (i==1L) {
				codScr <- tempScr
				
				# add run length into model
				if (runLengthModel) {
					run_scores <- .Call("runLengthModel",
						myDNAStringSet,
						genes,
						codon_scores,
						PACKAGE="DECIPHER")
					runScr <- .Call("scoreRunLengthModel",
						myDNAStringSet,
						ORFs,
						codon_scores,
						run_scores,
						PACKAGE="DECIPHER")
					orf_scores <- orf_scores + runScr
				}
			} else if (i==2) {
				altScr <- tempScr
			} else { # record the best alternative codScr
				altScr <- ifelse(tempScr > altScr,
					tempScr,
					altScr)
			}
		}
		if (length(clusts) > 1) {
			w <- which(altScr > 0 &
				altScr/foldBetter > codScr)
			if (length(w) > 0)
				codScr[w] <- altScr[w]
		}
		orf_scores <- lenScr + codScr
		
		# add start codon preferences into model
		if (startCodonModel) {
			start_scores <- .Call("startCodonModel",
				myDNAStringSet,
				ORFs,
				indices,
				startIndex,
				PACKAGE="DECIPHER")
			staScr <- .Call("scoreStartCodonModel",
				myDNAStringSet,
				ORFs,
				start_scores,
				PACKAGE="DECIPHER")
		}
		staScr <- staScrMultiplier*staScr
		deltaSta <- mean(staScr[indices]) - mean(staScr[-indices])
		if (deltaSta > 0) {
			orf_scores <- orf_scores + staScr
		} else if (verbose) {
			deltaSta <- "-"
		}
		
		# add initial codon bias into model
		if (initialCodons) {
			ini_codon_scores <- .Call("initialCodonModel",
				myDNAStringSet,
				ORFs,
				indices,
				initialCodons,
				PACKAGE="DECIPHER")
			iniScr <- .Call("scoreInitialCodonModel",
				myDNAStringSet,
				ORFs,
				ini_codon_scores,
				PACKAGE="DECIPHER")
			deltaIni <- mean(iniScr[indices]) - mean(iniScr[-indices])
			if (deltaIni < 0) {
				deltaIni <- "-"
			} else {
				orf_scores <- orf_scores + iniScr
			}
		}
		
		# add termination codon bias into model
		if (terminationCodons) {
			ter_codon_scores <- .Call("terminationCodonModel",
				myDNAStringSet,
				ORFs,
				indices,
				terminationCodons,
				PACKAGE="DECIPHER")
			terScr <- .Call("scoreTerminationCodonModel",
				myDNAStringSet,
				ORFs,
				ter_codon_scores,
				PACKAGE="DECIPHER")
			deltaTer <- mean(terScr[indices]) - mean(terScr[-indices])
			if (deltaTer < 0) {
				deltaTer <- "-"
			} else {
				orf_scores <- orf_scores + terScr
			}
		}
		
		# add stop codon preferences into model
		if (stopCodonModel) {
			stop_scores <- .Call("stopCodonModel",
				myDNAStringSet,
				ORFs,
				indices,
				stopIndex,
				PACKAGE="DECIPHER")
			stoScr <- .Call("scoreStopCodonModel",
				myDNAStringSet,
				ORFs,
				stop_scores,
				PACKAGE="DECIPHER")
			deltaSto <- mean(stoScr[indices]) - mean(stoScr[-indices])
			if (deltaSto < 0) {
				deltaSto <- "-"
			} else {
				orf_scores <- orf_scores + stoScr
			}
		}
		
		# add autocorrelation into model
		if (scoreAutocorr) {
			autocorr_scores <- .Call("autocorrelationModel",
				myDNAStringSet,
				ORFs,
				indices,
				geneticCode,
				PACKAGE="DECIPHER")
			autScr <- .Call("scoreAutocorrelationModel",
				myDNAStringSet,
				ORFs,
				autocorr_scores,
				geneticCode,
				PACKAGE="DECIPHER")
			deltaAut <- mean(autScr[indices]) - mean(autScr[-indices])
			if (deltaAut < 0) {
				deltaAut <- "-"
			} else {
				orf_scores <- orf_scores + autScr
			}
		}
		
		# add upstream nucleotide bias model
		if (scoreUpstream) {
			up_nuc_scores <- .Call("nucleotideBiasModel",
				myDNAStringSet,
				ORFs,
				indices,
				upstreamWidth,
				PACKAGE="DECIPHER")
			upsScr <- .Call("scoreNucleotideBiasModel",
				myDNAStringSet,
				ORFs,
				up_nuc_scores,
				PACKAGE="DECIPHER")
			deltaUps <- mean(upsScr[indices]) - mean(upsScr[-indices])
			if (deltaUps < 0) {
				deltaUps <- "-"
			} else {
				orf_scores <- orf_scores + upsScr
			}
		}
		
		# add upstream motifs into model
		if (upstreamMotifs) {
			upstream_motif_scores <- .Call("upstreamMotifModel",
				myDNAStringSet,
				ORFs,
				indices,
				upstreamWidth + 1L,
				upstreamMotifs,
				upstreamKmerSize,
				PACKAGE="DECIPHER")
			motScr <- .Call("scoreUpstreamMotifModel",
				myDNAStringSet,
				ORFs,
				upstream_motif_scores,
				upstreamWidth + 1L,
				upstreamMotifs,
				upstreamKmerSize,
				PACKAGE="DECIPHER")
			deltaMot <- mean(motScr[indices]) - mean(motScr[-indices])
			if (deltaMot < 0) {
				deltaMot <- "-"
			} else {
				orf_scores <- orf_scores + motScr
			}
		}
		
		if (scoreFolding ||
			scoreRBS) {
			response <- integer(nrow(ORFs))
			response[indices] <- 1L
		}
		
		# add folding around start into model
		if (scoreFolding) {
			folScr <- .logisticRegression(response,
				dG_Fold,
				indices,
				reject,
				penalty,
				interactions=TRUE)
			
			if (length(reject) > 0)
				folScr[reject] <- 0
			
			deltaFol <- mean(folScr[indices]) - mean(folScr[-indices])
			if (deltaFol < 0) {
				deltaFol <- "-"
			} else {
				orf_scores <- orf_scores + folScr
			}
		}
		
		# add ribosome binding sites into model
		if (scoreRBS) {
			t <- unique(dG_RBS)
			t <- setNames(rep(1,
					length(t)),
				t)
			
			obs <- t
			temp <- table(dG_RBS[indices])
			obs[names(temp)] <- temp
			obs[is.na(names(obs))] <- 0
			obs <- obs/sum(obs)
			
			bg <- t
			temp <- table(dG_RBS[-indices])
			bg[names(temp)] <- temp
			bg[is.na(names(bg))] <- 0
			bg <- bg/sum(bg)
			
			rbsScr <- log(obs/bg)
			rbsScr <- rbsScr[dG_RBS]
			rbsScr[is.na(dG_RBS)] <- 0
			
			# learn alternative RBS sites
			fg <- oligonucleotideFrequency(upstream[indices],
				RBSk,
				simplify.as="collapse")
			fg <- fg/sum(fg)
			bg <- oligonucleotideFrequency(upstream[-indices],
				RBSk,
				simplify.as="collapse")
			bg[bg==0] <- 1L # add pseudocounts
			bg <- bg/sum(bg)
			score <- log(fg/bg)
			score <- sort(score,
				decreasing=TRUE)
			score <- score[score >= 1]
			if (length(score) > 0) {
				if (length(score) > 1000)
					score <- score[1:1000]
				
				motif <- names(score)
				motif <- DNAStringSet(motif)
				
				dists <- DistanceMatrix(motif,
					verbose=FALSE)
				clusts <- IdClusters(dists,
					method="complete",
					cutoff=1/RBSk, # 1 mismatch
					verbose=FALSE)
				clusts <- clusts$cluster
				o <- tapply(score,
					clusts,
					sum)
				o <- order(o,
					decreasing=TRUE)
				if (length(o) > 20)
					o <- o[1:20]
				
				PWMs <- lapply(o,
					function(x) {
						w <- which(clusts==x)
						
						# weight motifs approximately by score
						m <- rep(motif[w],
							score[w])
						
						pwm <- PWM(m)
						
						ns <- ConsensusSequence(m)
						ns <- as.character(ns)
						
						list(pwm, ns)
					})
				
				hits <- vector("list",
					length(PWMs) + 1L)
				names(hits) <- c(sapply(PWMs,
						`[`,
						2L),
					"dG_SD")
				PWMs <- lapply(PWMs,
					`[[`,
					1L)
				
				begin <- cumsum(width(upstream)) - width(upstream)
				merged <- unlist(upstream)
				pos <- seq(0L,
					upstreamWidth - RBSk,
					1L)
				for (i in seq_along(PWMs)) {
					scores <- matrix(0,
						nrow=length(starts),
						ncol=length(pos))
					for (j in seq_along(pos)) {
						w <- which(pos[j] + RBSk - 1L <= width(upstream))
						scores[w, j] <- PWMscoreStartingAt(PWMs[[i]],
							merged,
							starting.at=begin[w] + pos[j] + 1L)
					}
					hits[[i]] <- apply(scores,
						1L,
						max)
				}
				hits[[length(hits)]] <- rbsScr
				hits <- data.frame(hits)
				
				rbsScr <- .logisticRegression(response,
					hits,
					indices,
					integer(),
					penalty)
			}
			
			deltaRbs <- mean(rbsScr[indices]) - mean(rbsScr[-indices])
			if (deltaRbs < 0) {
				deltaRbs <- "-"
			} else {
				orf_scores <- orf_scores + rbsScr
			}
		}
		
		# add intergenic length into model
		if (scoreIntergenicLength) {
			o <- order(ORFs[indices, 1], ORFs[indices, 3])
			intergenic <- ORFs[indices[o][-1], 3] - ORFs[indices[o][-length(o)], 4] + 1L
			sameStrand <- ORFs[indices[o][-1], 2]==ORFs[indices[o][-length(o)], 2]
			sameIndex <- ORFs[indices[o][-1], 1]==ORFs[indices[o][-length(o)], 1]
			obs_same <- tabulate(intergenic[sameStrand & sameIndex] + maxL + 1L, length(r))
			obs_same[seq_len(first)] <- 0L
			# accumulate background
			sum <- 0
			for (i in (maxL - 1):(first + 1)) {
				if (obs_same[i]==0) {
					sum <- sum + bg_same[i]
				} else if (sum > 0) {
					bg_same[i] <- bg_same[i] + sum
					sum <- 0
				}
			}
			obs_oppo <- tabulate(intergenic[!sameStrand & sameIndex] + maxL + 1L, length(r))
			obs_oppo[seq_len(first)] <- 0L
			# accumulate background
			sum <- 0
			for (i in (maxL - 1):(first + 1)) {
				if (obs_oppo[i]==0) {
					sum <- sum + bg_oppo[i]
				} else if (sum > 0) {
					bg_oppo[i] <- bg_oppo[i] + sum
					sum <- 0
				}
			}
			
			obs_sum <- sum(obs_same, obs_oppo)
			odds_same <- log(obs_same/obs_sum/bg_same)
			odds_same[is.nan(odds_same) | is.infinite(odds_same)] <- 0
			t <- try(same_scores <- predict(smooth.spline(r,
					odds_same,
					obs_same,
					df=20))[[2]],
				silent=TRUE)
			if (is(t, "try-error")) {
				same_scores <- numeric(length(odds_same))
			} else {
				sub <- which(obs_same >= intergenicCount)
				if (length(sub) > 0)
					same_scores[sub] <- odds_same[sub]
			}
			
			odds_oppo <- log(obs_oppo/obs_sum/bg_oppo)
			odds_oppo[is.nan(odds_oppo) | is.infinite(odds_oppo)] <- 0
			t <- try(oppo_scores <- predict(smooth.spline(r,
					odds_oppo,
					obs_oppo,
					df=20))[[2]],
				silent=TRUE)
			if (is(t, "try-error")) {
				oppo_scores <- numeric(length(odds_oppo))
			} else {
				sub <- which(obs_oppo >= intergenicCount)
				if (length(sub) > 0)
					oppo_scores[sub] <- odds_oppo[sub]
			}
			
			maxOverlapSame <- maxL
			maxOverlapOpposite <- maxL
			if (showPlot) {
				halfMaxObs <- max(obs_same, obs_oppo)/2
				plot(r,
					odds_same,
					xlab="Intergenic length (nucleotides)",
					ylab="Log-odds",
					col="darkblue",
					cex=obs_same/halfMaxObs,
					ylim=c(-5, 5),
					xlim=c(-200, 200))
				points(r,
					odds_oppo,
					cex=obs_oppo/halfMaxObs,
					pch=0,
					col="darkred")
				lines(r,
					same_scores,
					col="darkblue")
				lines(r,
					oppo_scores,
					lty=3,
					lwd=2,
					col="darkred")
				legend("topleft",
					c("Same", "Oppo."),
					col=c("darkblue", "darkred"),
					lty=c(1, 3),
					pch=c(1, 0),
					lwd=c(1, 2),
					title=expression(underline("Strand")),
					bg="white")
			}
		}
		
		cutoff <- quantile(orf_scores[l <= noiseCutoff],
			threshold)
		cutoff <- cutoff + same_scores[maxL + 1L] # add typical intergenic score
		
		# chain high-scoring ORFs into genes
		indices <- .chainGenes(ORFs,
			orf_scores,
			codScr,
			sameScores=same_scores,
			oppoScores=oppo_scores,
			maxOverlapSame=maxOverlapSame,
			maxOverlapOpposite=maxOverlapOpposite,
			minScore=cutoff,
			includeLength=includeMultiplier*x_intercept)
		
		if (showPlot) {
			plot(l[longest],
				orf_scores[longest],
				xlab="ORF length (nucleotides)",
				ylab="Total score",
				log="x",
				pch=46)
			points(l[indices],
				orf_scores[indices],
				col="darkgreen",
				pch=46)
			abline(h=cutoff[1],	
				lty=2,
				col="orange")
			legend("topleft",
				c("ORF", "Gene", "Threshold"),
				pch=c(15, 15, NA),
				lty=c(NA, NA, 2),
				col=c("black", "darkgreen", "orange"),
				bg="white")
		}
		
		if (verbose) {
			cat("\r")
			iter <<- iter + 1L
			show <- formatC(iter,
				width=-6,
				digits=0,
				format="f")
			cat(show)
			show <- formatC(length(size),
				width=6,
				digits=0,
				format="f")
			cat(show)
			show <- formatC(deltaSta,
				width=6,
				digits=2,
				format="f")
			cat(show)
			if (upstreamMotifs) {
				show <- formatC(deltaMot,
					width=6,
					digits=2,
					format="f")
				cat(show)
			} else {
				cat("      ")
			}
			if (scoreFolding) {
				show <- formatC(deltaFol,
					width=6,
					digits=2,
					format="f")
				cat(show)
			} else {
				cat("      ")
			}
			if (initialCodons) {
				show <- formatC(deltaIni,
					width=6,
					digits=2,
					format="f")
				cat(show)
			} else {
				cat("      ")
			}
			if (scoreUpstream) {
				show <- formatC(deltaUps,
					width=6,
					digits=2,
					format="f")
				cat(show)
			} else {
				cat("      ")
			}
			if (terminationCodons) {
				show <- formatC(deltaTer,
					width=6,
					digits=2,
					format="f")
				cat(show)
			} else {
				cat("      ")
			}
			if (scoreRBS) {
				show <- formatC(deltaRbs,
					width=6,
					digits=2,
					format="f")
				cat(show)
			} else {
				cat("      ")
			}
			if (scoreAutocorr) {
				show <- formatC(deltaAut,
					width=6,
					digits=2,
					format="f")
				cat(show)
			} else {
				cat("      ")
			}
			if (stopCodonModel) {
				show <- formatC(deltaSto,
					width=6,
					digits=2,
					format="f")
				cat(show)
			} else {
				cat("      ")
			}
			show <- formatC(length(indices),
				width=6,
				digits=0,
				format="f")
			cat(show)
			flush.console()
		}
		
		if (allScores) {
			ans <- cbind(ORFs,
				TotalScore=orf_scores,
				LengthScore=lenScr,
				CodingScore=codScr)
			if (startCodonModel)
				ans <- cbind(ans,
					StartScore=staScr)
			if (stopCodonModel)
				ans <- cbind(ans,
					StopScore=stoScr)
			if (initialCodons > 0)
				ans <- cbind(ans,
					InitialCodonScore=iniScr)
			if (terminationCodons > 0)
				ans <- cbind(ans,
					TerminationCodonScore=terScr)
			if (scoreRBS)
				ans <- cbind(ans,
					RibosomeBindingSiteScore=rbsScr)
			if (scoreAutocorr)
				ans <- cbind(ans,
					AutocorrelationScore=autScr)
			if (scoreUpstream)
				ans <-cbind(ans,
					UpstreamNucleotideScore=upsScr)
			if (upstreamMotifs)
				ans <- cbind(ans,
					UpstreamMotifScore=motScr)
			if (scoreFolding)
				ans <- cbind(ans,
					FoldingScore=folScr)
			if (runLengthModel)
				ans <- cbind(ans,
					RunLengthScore=runScr)
			return(list(ans, indices))
		} else {
			return(indices)
		}
	}
	
	params <- list()
	for (j in seq_len(ncol(signals))) {
		for (i in seq_len(nrow(signals))) {
			if (signals[i, j]==1) {
				params <- c(params,
					list(multiModel=TRUE))
			} else if (signals[i, j]==2) {
				params <- c(params,
					list(startCodonModel=TRUE))
			} else if (signals[i, j]==3) {
				params <- c(params,
					list(initialCodons=initialCodons))
			} else if (signals[i, j]==4) {
				params <- c(params,
					list(terminationCodons=terminationCodons))
			} else if (signals[i, j]==5) {
				params <- c(params,
					list(runLengthModel=TRUE))
			} else if (signals[i, j]==6) {
				params <- c(params,
					list(scoreRBS=TRUE))
			} else if (signals[i, j]==7) {
				params <- c(params,
					list(scoreAutocorr=TRUE))
			} else if (signals[i, j]==8) {
				params <- c(params,
					list(upstreamMotifs=upstreamMotifs))
			} else if (signals[i, j]==9) {
				params <- c(params,
					list(scoreFolding=TRUE))
			} else if (signals[i, j]==10) {
				params <- c(params,
					list(scoreIntergenicLength=TRUE))
			} else if (signals[i, j]==11) {
				params <- c(params,
					list(stopCodonModel=TRUE))
			} else if (signals[i, j]==12) {
				params <- c(params,
					list(scoreUpstream=TRUE))
			}
		}
		indices <- do.call(.getGenes,
			c(params, list(indices)))
	}
	
	bootstraps <- integer(nrow(ORFs))
	bootstraps[indices] <- 1
	prev_indices <- indices
	for (i in 2L:maxIterations) {
		indices <- do.call(.getGenes,
			c(params, list(indices)))
		bootstraps[indices] <- bootstraps[indices] + 1
		indices <- which(bootstraps==i)
		if (length(prev_indices)==length(indices) &&
			all(prev_indices==indices))
			break
		prev_indices <- indices
	}
	
	params <- c(params,
		list(indices=indices, # indices no longer changes
			allScores=TRUE,
			showPlot=showPlot))
	ans <- do.call(.getGenes,
		params)
	indices <- ans[[2]]
	bootstraps[indices] <- bootstraps[indices] + 1
	ans <- ans[[1]]
	if (allScores) {
		gene <- numeric(nrow(ans))
		gene[indices] <- 1
		ans <- cbind(ans,
			FractionReps=bootstraps/(i + 1),
			Gene=gene)
	} else {
		ans <- ans[indices,]
		ans <- cbind(ans,
			FractionReps=bootstraps[indices]/(i + 1),
			Gene=rep(1, nrow(ans)))
	}
	o <- order(ans[, "Index"], ans[, "Begin"])
	ans <- ans[o,]
	rownames(ans) <- seq_len(nrow(ans))
	
	class(ans) <- "Genes"
	attr(ans, "widths") <- setNames(width(myDNAStringSet),
		names(myDNAStringSet))
	attr(ans, "geneticCode") <- gC
	attr(ans, "minGeneLength") <- minGeneLength
	attr(ans, "allowEdges") <- allowEdges
	
	if (verbose) {
		cat("\n\n")
		time.2 <- Sys.time()
		print(round(difftime(time.2,
			time.1,
			units='secs'),
			digits=2))
		cat("\n")
	}
	
	return(ans)
}
Any scripts or data that you put into this service are public.
DECIPHER documentation built on Nov. 8, 2020, 8:30 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
DECIPHER
Tools for curating, analyzing, and manipulating biological sequences

R/FindGenes.R
In DECIPHER: Tools for curating, analyzing, and manipulating biological sequences

Defines functions FindGenes .logisticRegression .performFolding .extractRBS .chainGenes

Documented in FindGenes

Try the DECIPHER package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

DECIPHER Tools for curating, analyzing, and manipulating biological sequences

R/FindGenes.R In DECIPHER: Tools for curating, analyzing, and manipulating biological sequences

Defines functions FindGenes .logisticRegression .performFolding .extractRBS .chainGenes

Documented in FindGenes

Try the DECIPHER package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

DECIPHER
Tools for curating, analyzing, and manipulating biological sequences

R/FindGenes.R
In DECIPHER: Tools for curating, analyzing, and manipulating biological sequences