Nothing
#' Evaluate changes to ORFs caused by alternative splicing
#' @param orfsX orf information for 'normal' transcripts. Generated by getOrfs()
#' @param orfsY orf information for 'alternative' transcripts. Generated by getOrfs()
#' @param filterNMD filter orf information for transcripts not targeted by nmd first?
#' @param geneSimilarity compare orf to all orfs in gene?
#' @param compareUTR compare UTRs?
#' @param compareBy compare by 'transcript' isoforms or by 'gene' groups
#' @param allORFs orf information for all transcripts for novel sequence comparisons.
#' Generated by getOrfs()
#' @return data.frame with orf changes
#' @export
#' @import stringr
#' @importFrom rtracklayer import
#' @importFrom stats aggregate
#' @family transcript isoform comparisons
#' @author Beth Signal
#' @examples
#'
#' whippetFiles <- system.file("extdata","whippet/",
#' package = "GeneStructureTools")
#' wds <- readWhippetDataSet(whippetFiles)
#' wds <- filterWhippetEvents(wds)
#'
#' gtf <- rtracklayer::import(system.file("extdata","example_gtf.gtf",
#' package = "GeneStructureTools"))
#' exons <- gtf[gtf$type=="exon"]
#' transcripts <- gtf[gtf$type=="transcript"]
#' g <- BSgenome.Mmusculus.UCSC.mm10::BSgenome.Mmusculus.UCSC.mm10
#'
#' orfsProteinCoding <- getOrfs(exons[exons$gene_name=="Prex2" &
#' exons$transcript_type=="protein_coding"], BSgenome = g)
#' orfsNMD <- getOrfs(exons[exons$gene_name=="Prex2" &
#' exons$transcript_type=="nonsense_mediated_decay"], BSgenome = g)
#' orfDiff(orfsProteinCoding, orfsNMD, filterNMD=FALSE)
#'
#' wds.exonSkip <- filterWhippetEvents(wds, eventTypes="CE",psiDelta = 0.2)
#' exons.exonSkip <- findExonContainingTranscripts(wds.exonSkip, exons,
#' variableWidth=0, findIntrons=FALSE, transcripts)
#' ExonSkippingTranscripts <- skipExonInTranscript(exons.exonSkip, exons, whippetDataSet=wds.exonSkip)
#'
#' orfsSkipped <- getOrfs(ExonSkippingTranscripts[ExonSkippingTranscripts$set=="skipped_exon"],
#' BSgenome = g)
#' orfsIncluded <- getOrfs(ExonSkippingTranscripts[ExonSkippingTranscripts$set=="included_exon"],
#' BSgenome = g)
#' orfDiff(orfsSkipped, orfsIncluded, filterNMD=FALSE)
orfDiff <- function(orfsX,
orfsY,
filterNMD = TRUE,
geneSimilarity = TRUE,
compareUTR = TRUE,
compareBy="gene",
allORFs=NULL){
if(filterNMD == TRUE){
orfChanges <- attrChangeAltSpliced(orfsX[which(orfsX$nmd_prob < 0.5),],
orfsY[which(orfsY$nmd_prob < 0.5),],
attribute = "orf_length",
compareBy = compareBy,
useMax = TRUE,
compareUTR = compareUTR)
orfChanges.filterx <- attrChangeAltSpliced(
orfsX[which(orfsX$nmd_prob < 0.5),],
orfsY,
attribute = "orf_length",
compareBy = compareBy,
useMax = TRUE,
compareUTR = compareUTR)
orfChanges.filtery <-
attrChangeAltSpliced(orfsX,
orfsY[which(orfsY$nmd_prob < 0.5),],
attribute = "orf_length",
compareBy = compareBy,
useMax = TRUE,
compareUTR = compareUTR)
orfChanges.nofilter <-
attrChangeAltSpliced(orfsX,
orfsY,
attribute = "orf_length",
compareBy = compareBy,
useMax = TRUE,
compareUTR = compareUTR)
if(nrow(orfChanges) > 0){
orfChanges$filtered <- "both"
}
if(nrow(orfChanges.filterx) > 0){
orfChanges.filterx$filtered <- "x"
}
if(nrow(orfChanges.filtery) > 0){
orfChanges.filtery$filtered <- "y"
}
if(nrow(orfChanges.nofilter) > 0){
orfChanges.nofilter$filtered <- "none"
}
add <- which(!(orfChanges.filterx$id %in% orfChanges$id))
orfChanges <- rbind(orfChanges, orfChanges.filterx[add,])
add <- which(!(orfChanges.filtery$id %in% orfChanges$id))
orfChanges <- rbind(orfChanges, orfChanges.filtery[add,])
add <- which(!(orfChanges.nofilter$id %in% orfChanges$id))
orfChanges <- rbind(orfChanges, orfChanges.nofilter[add,])
}else{
orfChanges <- attrChangeAltSpliced(orfsX,orfsY,attribute = "orf_length",
compareBy=compareBy,useMax = TRUE,
compareUTR = compareUTR)
orfChanges$filtered <- FALSE
}
hasASidX <- grep("AS", orfsX$id)
hasLeafIdX <- grep("dnre_", orfsX$id)
orfsX$spliced_id <- orfsX$gene_id
orfsX$transcript_id <- orfsX$id
orfsX$spliced_id[hasASidX] <-
unlist(lapply(str_split(orfsX$id[hasASidX], " "), '[[', 2))
orfsX$transcript_id[hasASidX] <-
unlist(lapply(str_split(orfsX$id[hasASidX], "[+]"), '[[', 1))
orfsX$spliced_id[hasLeafIdX] <- gsub("dnre_","",
orfsX$spliced_id[hasLeafIdX])
orfsX$spliced_id[hasLeafIdX] <-
stringr::str_sub(gsub("\\-[^]]\\:*", ":",
paste0(orfsX$spliced_id[hasLeafIdX], ":")), 1, -2)
hasASidY <- grep("AS", orfsY$id)
hasLeafIdY <- grep("upre_", orfsY$id)
orfsY$spliced_id <- orfsY$gene_id
orfsY$transcript_id <- orfsY$id
orfsY$spliced_id[hasASidY] <-
unlist(lapply(str_split(orfsY$id[hasASidY], " "), '[[', 2))
orfsY$transcript_id[hasASidY] <-
unlist(lapply(str_split(orfsY$id[hasASidY], "[+]"), '[[', 1))
orfsY$spliced_id[hasLeafIdY] <- gsub("upre_","",
orfsY$spliced_id[hasLeafIdY])
orfsY$spliced_id[hasLeafIdY] <-
stringr::str_sub(gsub("\\-[^]]\\:*", ":",
paste0(orfsY$spliced_id[hasLeafIdY], ":")), 1, -2)
m <- match(paste0(orfsX$spliced_id,"_", orfsX$frame),
paste0(orfsY$spliced_id,"_",orfsY$frame))
# check that there are matches if we ignore the frame (i.e 5'utr frame-shifts)
m.noFrame <- match(paste0(orfsX$spliced_id), paste0(orfsY$spliced_id))
m[which(is.na(m))] <- m.noFrame[which(is.na(m))]
if(any(is.na(m))){
if(!(any(grepl("clu", orfsX$id)) | any(grepl("clu", orfsY$id)))){
#hasASidX <- grep("[+]", orfsX$id)
if(length(hasASidX) > 0){
m2 <- match(paste0(orfsX$transcript_id[hasASidX],"_",
orfsX$frame),
paste0(orfsY$transcript_id,"_", orfsY$frame))
orfsY2 <- orfsY[m2,]
orfsY2$id <- orfsX$id[hasASidX]
orfsY2$spliced_id <- orfsX$spliced_id[hasASidX]
orfsY <- rbind(orfsY, orfsY2)
}
#hasASidY <- grep("[+]", orfsY$id)
if(length(hasASidY) > 0){
m2 <- match(paste0(orfsY$transcript_id[hasASidY],"_",
orfsY$frame),
paste0(orfsX$transcript_id,"_", orfsX$frame))
orfsX2 <- orfsX[m2,]
orfsX2$id <- orfsY$id[hasASidY]
orfsX2$spliced_id <- orfsY$spliced_id[hasASidY]
orfsX <- rbind(orfsX, orfsX2)
}
if(length(hasASidY) == 0 & length(hasASidX) == 0){
attributeX$id <- attributeX$gene_id
attributeY$id <- attributeY$gene_id
}else{
m2 <- match(paste0(orfsX$spliced_id,"_", orfsX$frame),
paste0(orfsY$spliced_id,"_",orfsY$frame))
orfsX <- orfsX[which(!is.na(m2)),]
orfsY <- orfsY[m2[which(!is.na(m2))],]
}
}
}
orfsY$id_with_len <- paste0(orfsY$spliced_id, "_", orfsY$orf_length)
orfChanges$id_orf_length_y <- paste0(orfChanges$id, "_",
orfChanges$orf_length_bygroup_y)
my <- match(orfChanges$id_orf_length_y, orfsY$id_with_len)
orfsX$id_with_len <- paste0(orfsX$spliced_id, "_", orfsX$orf_length)
orfChanges$id_orf_length_x <- paste0(orfChanges$id, "_",
orfChanges$orf_length_bygroup_x)
mx <- match(orfChanges$id_orf_length_x, orfsX$id_with_len)
x <- as.numeric(mapply(function(x,y) orfSimilarity(x,y),
orfsX$orf_sequence[mx],
orfsY$orf_sequence[my]))
orfChanges$percent_orf_shared <- x
maxLength <- apply(orfChanges[,c('orf_length_bygroup_y',
'orf_length_bygroup_x')], 1, max)
orfChanges$max_percent_orf_shared <-
(maxLength - abs(orfChanges$orf_length_bygroup_x -
orfChanges$orf_length_bygroup_y)) / maxLength
orfLengthShared <- orfChanges$percent_orf_shared * maxLength
orfChanges$orf_percent_kept_x <- orfLengthShared /
orfChanges$orf_length_bygroup_x
orfChanges$orf_percent_kept_y <- orfLengthShared /
orfChanges$orf_length_bygroup_y
if(geneSimilarity == TRUE & !is.null(allORFs)){
orfChanges$gene_id <- orfsX$gene_id[match(orfChanges$id,
orfsX$spliced_id)]
geneMatches <- lapply(orfChanges$gene_id,
function(x)
which(!is.na(match(allORFs$gene_id, x))))
geneMatches <- unlist(geneMatches)
idMatches <- mapply(function(x,y)
rep(x,length(y)) ,(1:length(orfChanges$gene_id)),
geneMatches)
idMatches.y <- match(orfChanges$id_orf_length_y[idMatches],
orfsY$id_with_len)
idMatches.x <- match(orfChanges$id_orf_length_x[idMatches],
orfsX$id_with_len)
orfSimBygene.y <- as.numeric(mapply(function(x,y) orfSimilarity(x,y),
allORFs$orf_sequence[geneMatches],
orfsY$orf_sequence[idMatches.y]))
orfSimBygene.x <- as.numeric(mapply(function(x,y) orfSimilarity(x,y),
allORFs$orf_sequence[geneMatches],
orfsX$orf_sequence[idMatches.x]))
orfSimilarity.bygene <-
data.frame(gene_id=allORFs$gene_id[geneMatches],
spliced_id_x=orfsX$spliced_id[idMatches.x],
spliced_id_y=orfsY$spliced_id[idMatches.y],
similarity_x=orfSimBygene.x,
similarity_y=orfSimBygene.y,
length_gene=allORFs$orf_length[geneMatches],
length_x=orfsX$orf_length[idMatches.x],
length_y=orfsY$orf_length[idMatches.y])
orfSimilarity.bygeneAggX <- aggregate(similarity_x ~ spliced_id_x,
orfSimilarity.bygene, max)
orfSimilarity.bygeneAggY <- aggregate(similarity_y ~ spliced_id_y,
orfSimilarity.bygene, max)
orfChanges$gene_similarity_x <-
orfSimilarity.bygeneAggX$similarity_x[
match(orfChanges$id, orfSimilarity.bygeneAggX$spliced_id_x)]
orfChanges$gene_similarity_y <-
orfSimilarity.bygeneAggY$similarity_y[
match(orfChanges$id, orfSimilarity.bygeneAggY$spliced_id_y)]
}
orfChanges$transcript_id <- NULL
orfChanges$gene_id <- NULL
orfChanges$id_orf_length_x <- NULL
orfChanges$id_orf_length_y <- NULL
return(orfChanges)
}
#' Evaluate the change in an attribute between a set of 'normal' transcripts and
#' 'alternative' transcripts
#' @param orfsX orf information for 'normal' transcripts. Generated by getOrfs()
#' @param orfsY orf information for 'alternative' transcripts. Generated by getOrfs()
#' @param attribute attribute to compare
#' @param compareBy compare by 'transcript' isoforms or by 'gene' groups
#' @param useMax use max as the summary function when multiple isoforms are aggregated?
#' If FALSE, will use min instead.
#' @param compareUTR compare the UTR lengths between transcripts?
#' Only runs if attribute = "orf_length"
#' @return data.frame with attribute changes
#' @export
#' @import stringr
#' @importFrom rtracklayer import
#' @importFrom stats aggregate
#' @family transcript isoform comparisons
#' @author Beth Signal
#' @examples
#' whippetFiles <- system.file("extdata","whippet/",
#' package = "GeneStructureTools")
#' wds <- readWhippetDataSet(whippetFiles)
#' wds <- filterWhippetEvents(wds)
#'
#' gtf <- rtracklayer::import(system.file("extdata","example_gtf.gtf",
#' package = "GeneStructureTools"))
#' exons <- gtf[gtf$type=="exon"]
#' transcripts <- gtf[gtf$type=="transcript"]
#' g <- BSgenome.Mmusculus.UCSC.mm10::BSgenome.Mmusculus.UCSC.mm10
#'
#' wds.exonSkip <- filterWhippetEvents(wds, eventTypes="CE",psiDelta = 0.2)
#' exons.exonSkip <- findExonContainingTranscripts(wds.exonSkip, exons,
#' variableWidth=0, findIntrons=FALSE, transcripts)
#' ExonSkippingTranscripts <- skipExonInTranscript(exons.exonSkip, exons, whippetDataSet=wds.exonSkip)
#'
#' orfsSkipped <- getOrfs(ExonSkippingTranscripts[ExonSkippingTranscripts$set=="skipped_exon"],
#' BSgenome = g)
#' orfsIncluded <- getOrfs(ExonSkippingTranscripts[ExonSkippingTranscripts$set=="included_exon"],
#' BSgenome = g)
#' attrChangeAltSpliced(orfsSkipped, orfsIncluded,attribute = "orf_length")
attrChangeAltSpliced <- function(orfsX,
orfsY,
attribute = "orf_length",
compareBy = "gene",
useMax = TRUE,
compareUTR = FALSE){
if(nrow(orfsX) > 0 & nrow(orfsY) > 0){
if(useMax){
aggFun <- max
}else{
aggFun <- min
}
# fix ids so spliced isoforms have same id
if(all(grepl("[+]",c(orfsX$id, orfsY$id)))){
asTypes <- unique(unlist(lapply(stringr::str_split(
lapply(stringr::str_split(
c(orfsX$id, orfsY$id), "[+]"
), "[[",2), " "),
"[[",1)))
for(asT in asTypes){
orfsX$id <- gsub(asT, "AS", orfsX$id)
orfsY$id <- gsub(asT, "AS", orfsY$id)
}
}
m <- match(c('id','gene_id', attribute), colnames(orfsX))
orfsX.part <- orfsX[,m]
attributeX <- aggregate(. ~ id+gene_id, orfsX.part, aggFun)
hasASidX <- grep("AS", attributeX$id)
attributeX$as_group <- attributeX$gene_id
attributeX$transcript_id <- attributeX$id
hasLeafIdX <- grep("dnre_", attributeX$id)
attributeX$id[hasLeafIdX] <-
gsub("dnre_","",attributeX$id[hasLeafIdX])
attributeX$id[hasLeafIdX] <-
stringr::str_sub(gsub("\\-[^]]\\:*", ":",
paste0(attributeX$id[hasLeafIdX], ":")),
1, -2)
attributeX$as_group[hasASidX] <-
unlist(lapply(str_split(attributeX$id[hasASidX], " "), '[[', 2))
attributeX$transcript_id[hasASidX] <-
unlist(lapply(str_split(attributeX$id[hasASidX], "[+]"), '[[', 1))
m <- match(c('id','gene_id', attribute), colnames(orfsY))
orfsY.part <- orfsY[,m]
attributeY <- aggregate(. ~ id+gene_id, orfsY.part, aggFun)
hasASidY <- grep("AS", attributeY$id)
attributeY$as_group <- attributeY$gene_id
attributeY$transcript_id <- attributeY$id
hasLeafIdY <- grep("upre_", attributeY$id)
attributeY$id[hasLeafIdY] <-
gsub("upre_","",attributeY$id[hasLeafIdY])
attributeY$id[hasLeafIdY] <-
stringr::str_sub(gsub("\\-[^]]\\:*", ":",
paste0(attributeY$id[hasLeafIdY], ":")),
1, -2)
attributeY$as_group[hasASidY] <-
unlist(lapply(str_split(attributeY$id[hasASidY], " "), '[[', 2))
attributeY$transcript_id[hasASidY] <-
unlist(lapply(str_split(attributeY$id[hasASidY], "[+]"), '[[', 1))
m <- match(attributeX$id, attributeY$id)
if(any(is.na(m))){
#hasASidX <- grep("[+]", attributeX$id)
# if isoforms are not generated by leafcutter
if(!(any(grepl("clu", attributeX$id)) |
any(grepl("clu", attributeY$id)))){
if(length(hasASidX) > 0){
m2 <- match(attributeX$transcript_id[hasASidX],
attributeY$transcript_id)
attributeY2 <- attributeY[m2,]
attributeY2$id <- attributeX$id[hasASidX]
attributeY2$as_group <- attributeX$as_group[hasASidX]
attributeY <- rbind(attributeY, attributeY2)
}
#hasASidY <- grep("[+]", attributeY$id)
if(length(hasASidY) > 0){
m2 <- match(attributeY$transcript_id[hasASidY],
attributeX$transcript_id)
attributeX2 <- attributeX[m2,]
attributeX2$id <- attributeY$id[hasASidY]
attributeX2$as_group <- attributeY$as_group[hasASidY]
attributeX <- rbind(attributeX, attributeX2)
}
if(length(hasASidY) == 0 & length(hasASidX) == 0){
attributeX$id <- attributeX$gene_id
attributeY$id <- attributeY$gene_id
}else{
m2 <- match(attributeX$id, attributeY$id)
attributeX <- attributeX[which(!is.na(m2)),]
attributeY <- attributeY[m2[which(!is.na(m2))],]
m <- match(attributeX$id, attributeY$id)
}
}
}
colnames(attributeX)[3] <- "attr"
colnames(attributeY)[3] <- "attr"
if(compareBy == "transcript"){
m <- match(attributeX$id, attributeY$id)
attributeComparisons <-
data.frame(id=attributeX$id,
attr_bygroup_x=attributeX$attr,
attr_bygroup_y=attributeY$attr[m])
m <- match(attributeY$id, attributeX$id)
attributeComparisonsY <-
data.frame(id=attributeY$id,
attr_bygroup_x=attributeX$attr[m],
attr_bygroup_y=attributeY$attr)
add <-
which(!(attributeComparisonsY$id %in% attributeComparisons$id))
if(length(add) > 0){
attributeComparisons <-
rbind(attributeComparisons, attributeComparisonsY[add,])
}
}else if(compareBy=="gene"){
attributeX2 <- aggregate(attr ~ as_group, attributeX, aggFun)
attributeY2 <- aggregate(attr ~ as_group, attributeY, aggFun)
m <- match(attributeX2$as_group, attributeY2$as_group)
attributeComparisons <- data.frame(id=attributeX2[,1],
attr_bygroup_x=attributeX2[,-1],
attr_bygroup_y=attributeY2[m,-1])
attributeComparisons <- attributeComparisons[which(!is.na(m)),]
}
colnames(attributeComparisons) <- gsub("attr",
attribute,
colnames(attributeComparisons))
if(compareUTR == TRUE & attribute == "orf_length"){
if(all(grepl("AS", orfsX$id))){
id.x <- paste0(attributeComparisons$id,"_" ,
attributeComparisons$orf_length_bygroup_x)
hasLeafIdX <- grep("dnre_", orfsX$id)
orfsXid <- orfsX$id
orfsXid[hasLeafIdX] <-
stringr::str_sub(gsub("\\-[^]]\\:*", ":",
paste0(orfsXid[hasLeafIdX], ":")),
1, -2)
orfsXid[hasLeafIdX] <- gsub("dnre_","", orfsXid[hasLeafIdX])
if(compareBy == "gene"){
orfsXid <- unlist(lapply(str_split(orfsXid, " "), "[[", 2))
m1 <- match(id.x, paste0(orfsXid,
"_", orfsX$orf_length))
}else{
m1 <- match(id.x, paste0(orfsXid, "_", orfsX$orf_length))
}
}else{
matchToGene <- match(attributeComparisons$id,
attributeX$as_group)
id.x <- paste0(unlist(lapply(str_split(
attributeX$id[matchToGene], "[+]"),"[[",1)),"_" ,
attributeComparisons$orf_length_bygroup_x)
m1 <- match(id.x, paste0((orfsX$gene_id), "_",
orfsX$orf_length))
}
if(all(grepl("AS", orfsY$id))){
id.y <- paste0(attributeComparisons$id,"_" ,
attributeComparisons$orf_length_bygroup_y)
hasLeafIdY <- grep("upre_", orfsY$id)
orfsYid <- orfsY$id
orfsYid[hasLeafIdY] <-
stringr::str_sub(gsub("\\-[^]]\\:*", ":",
paste0(orfsYid[hasLeafIdY], ":")),
1, -2)
orfsYid[hasLeafIdY] <- gsub("upre_","", orfsYid[hasLeafIdY])
if(compareBy == "gene"){
orfsYid <- unlist(lapply(str_split(orfsYid, " "), "[[", 2))
m2 <- match(id.y, paste0(orfsYid,
"_", orfsY$orf_length))
}else{
m2 <- match(id.y, paste0(orfsYid, "_", orfsY$orf_length))
}
}else{
matchToGene <- match(attributeComparisons$id,
attributeY$as_group)
id.y <- paste0(unlist(lapply(str_split(
attributeY$id[matchToGene], "[+]"),"[[",1)),"_" ,
attributeComparisons$orf_length_bygroup_y)
m2 <- match(id.y, paste0((orfsY$gene_id), "_", orfsY$orf_length))
}
attributeComparisons$utr3_length_bygroup_x <- orfsX$utr3_length[m1]
attributeComparisons$utr3_length_bygroup_y <- orfsY$utr3_length[m2]
attributeComparisons$utr5_length_bygroup_x <-
orfsX$start_site_nt[m1]
attributeComparisons$utr5_length_bygroup_y <-
orfsY$start_site_nt[m2]
}
return(attributeComparisons)
}else{
blank <- data.frame(id=character(0),
attr_bygroup_x=numeric(0),
attr_bygroup_y=numeric(0))
colnames(blank) <- gsub("attr", attribute, colnames(blank))
if(compareUTR == TRUE){
blank$utr3_length_bygroup_x <- numeric(0)
blank$utr3_length_bygroup_y <- numeric(0)
blank$utr5_length_bygroup_x <- numeric(0)
blank$utr5_length_bygroup_y <- numeric(0)
}
return(blank)
}
}
#' calculate percentage of orfB contained in orfA
#' @param orfA character string of ORF amino acid sequence
#' @param orfB character string of ORF amino acid sequence
#' @param substitutionCost cost for substitutions in ORF sequences.
#' Set to 1 if substitutions should be weighted equally to insertions and deletions.
#' @return percentage of orfB contained in orfA
#' @export
#' @import stringdist
#' @import stringr
#' @importFrom utils adist
#' @family ORF annotation
#' @author Beth Signal
#' @examples
#' orfSimilarity("MFGLDIYAGTRSSFRQFSLT","MFGLDIYAGTRSSFRQFSLT")
#' orfSimilarity("MFGLDIYAGTRSSFRQFSLT","MFGLDIYAFRQFSLT")
#' orfSimilarity("MFGLDIYAFRQFSLT","MFGLDIYAGTRSSFRQFSLT")
#' orfSimilarity("MFGLDIYAGTRXXFRQFSLT","MFGLDIYAGTRSSFRQFSLT")
#' orfSimilarity("MFGLDIYAGTRXXFSLT","MFGLDIYAGTRSSFRQFSLT", 1)
orfSimilarity <- function(orfA, orfB, substitutionCost=100){
if(is.na(orfA) | is.na(orfB)){
return(NA)
}else{
if(nchar(orfA) > nchar(orfB)){
lcs <- utils::adist(orfA, orfB,
costs=list(ins=100,
del=1,
sub=substitutionCost))[1,1]
}else if(nchar(orfA) == nchar(orfB)){
lcs <- utils::adist(orfA, orfB,
costs=list(ins=1,
del=1,
sub=substitutionCost))[1,1]
}else{
lcs <- utils::adist(orfA, orfB,
costs=list(ins=1,
del=100,
sub=substitutionCost))[1,1]
}
if(lcs <= nchar(orfA) + nchar(orfB)){
return(((nchar(orfA) + nchar(orfB) - lcs)/2) /
max(nchar(orfA), nchar(orfB)))
}else{
return(0)
}
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.