##' The effect of a splice variant is predicted for individual
##' protein-coding transcripts.
##'
##' @title Predict the effect of splice variants on protein-coding transcripts
##' @param sgv \code{SGVariants} object
##' @param tx \code{TxDb} object, or \code{GRangesList} of exons
##'   grouped by transcript with metadata columns \code{txName},
##'   \code{geneName}, \code{cdsStart} and \code{cdsEnd}
##'   (by convention, cdsStart < cdsEnd for both strands).
##'   For import from GFF format, use function \code{importTranscripts}.
##' @param genome \code{BSgenome} object
##' @param fix_start_codon Logical indicating whether the annotated start
##'   codon should be considered fixed and the variant transcript should
##'   not be scanned for alternative start codons
##' @param output Character string indicating whether short results or
##'   full results (with additional columns) should be returned
##' @param cores Number of cores available for parallel processing
##' @return \code{data.frame} with rows corresponding to a
##'   variant-transcript pair. The output includes columns for variant
##'   identifier, transcript name, gene name, type of alteration at the
##'   RNA and protein level, and variant description at the RNA and
##'   protein level in HGVS notation. For \code{output = "full"}
##'   additional columns are returned. These include the full-length RNA
##'   and protein sequence for the reference and variant transcript.
##'   Event start and end coordinates in the full output are 0- and
##'   1-based, respectively (to allow for description of deletions).
##'   Coordinates for the last junction in a transcript refer to the
##'   last base of the second-to-last exon.
##' @examples
##' require(BSgenome.Hsapiens.UCSC.hg19)
##' seqlevelsStyle(Hsapiens) <- "NCBI"
##' predictVariantEffects(sgv_pred, tx, Hsapiens)
##' @author Leonard Goldstein
predictVariantEffects <- function(sgv, tx, genome, fix_start_codon = TRUE,
    output = c("short", "full"), cores = 1)
{
    if (missing(genome)) {
        stop("missing argument genome")
    }
    output <- match.arg(output)
    sgv <- updateObject(sgv, verbose = TRUE)
    if (is(tx, "TxDb")) {
        tx <- convertToTranscripts(tx)
    } else if (is(tx, "GRangesList")) {
        checkTranscriptFormat(tx)
    } else {
        stop("tx must be a TxDb object or GRangesList of exons
            grouped by transcripts")
    }
    common <- intersect(seqlevels(sgv), seqlevels(tx))
    sgv <- keepSeqlevels(sgv, common, "coarse")
    tx <- keepSeqlevels(tx, common, "coarse")
    
    sgv_vid <- variantID(sgv)
    excl <- grep("(", type(sgv), fixed = TRUE)
    if (length(excl) > 0) {
        sgv <- sgv[-excl]
        msg <- paste("Excluded", length(excl), "nested variants")
        warning(msg, call. = FALSE, immediate. = TRUE)
    }
    sgv_tx <- mapVariantsToTranscripts(sgv, tx, cores)
    tx <- tx[names(tx) %in% unlist(sgv_tx)]
    tx <- processTranscripts(tx, genome, cores)
    sgv_tx <- lapply(sgv_tx, intersect, names(tx))
    msg <- paste("Predicting effect of", length(sgv),
        "variants on", length(tx), "coding transcripts...")
    message(msg)
    expanded_sgv <- sgv[togroup0(sgv_tx)]
    expanded_tx <- tx[match(unlist(sgv_tx), names(tx))]
    list_expanded_sgv <- split(expanded_sgv, seq_along(expanded_sgv))
    list_expanded_tx <- split(expanded_tx, seq_along(expanded_tx))
    list_res <- mcmapply(
        predictVariantEffectPerVariantAndTranscript,
        list_expanded_sgv,
        list_expanded_tx,
        MoreArgs = list(
            genome = genome,
            fix_start_codon = fix_start_codon,
            output = output),
        SIMPLIFY = FALSE,
        USE.NAMES = FALSE,
        mc.preschedule = setPreschedule(cores),
        mc.cores = cores)
    items <- paste("variant", variantID(expanded_sgv),
        "and transcript", names(expanded_tx))
    checkApplyResultsForErrors(
        list_res,
        "predictVariantEffectPerVariantAndTranscript\n",
        items,
        "character")
    res <- do.call(rbindDfsWithoutRowNames, list_res)
    return(res)
}
mapVariantsToTranscripts <- function(sgv, tx, cores)
{
    gr_from <- pos2gr(sub("^(D|S):", "", from(sgv)))
    gr_to <- pos2gr(sub("^(A|E):", "", to(sgv)))
    list_D <- vector("list", length(sgv))
    i_D <- which(substr(from(sgv), 1, 1) == "D")
    list_D[i_D] <- as.list(findOverlaps(gr_from[i_D], tx))
    list_A <- vector("list", length(sgv))
    i_A <- which(substr(to(sgv), 1, 1) == "A")
    list_A[i_A] <- as.list(findOverlaps(gr_to[i_A], tx))
    list_i <- vector("list", length(sgv))
    i <- intersect(i_D, i_A)
    list_i[i] <- mcmapply(
        intersect,
        list_D[i],
        list_A[i],
        SIMPLIFY = FALSE,
        mc.cores = cores)
    i <- setdiff(i_D, i_A)
    list_i[i] <- list_D[i]
    i <- setdiff(i_A, i_D)
    list_i[i] <- list_A[i]
    inc <- tx[unique(unlist(list_i))]
    sgv <- annotate(sgv, convertToTxFeatures(inc))
    sgv_tx <- relist(names(tx)[unlist(list_i)], list_i)
    sgv_tx <- mcmapply(
        setdiff,
        sgv_tx,
        txName(sgv),
        SIMPLIFY = FALSE,
        mc.cores = cores)
    return(sgv_tx)
}
predictVariantEffectPerVariantAndTranscript <- function(sgv, ref, genome,
    fix_start_codon, output)
{
    ref_loc <- range(ref[[1]])
    ref_cds <- range(cds(ref))
    ref_utr_5p <- range(c(flank(ref_loc, -1, TRUE), flank(ref_cds, 1, TRUE)))
    ref_utr_3p <- range(c(flank(ref_cds, 1, FALSE), flank(ref_loc, -1, FALSE)))
    event <- getEventLocation(sgv, ref)
    event_start <- flank(event, -1, TRUE)
    event_end <- flank(event, -1, FALSE)
    del <- intersect(ref[[1]], event)
    ins <- reduce(granges(sgv[[1]][type(sgv[[1]]) == "E"]))
    var <- GRangesList(reduce(c(setdiff(ref[[1]], del), ins)))
    var_3p <- getVariant(var, ref, sgv, genome, fix = "start")
    var_5p <- getVariant(var, ref, sgv, genome, fix = "stop")
    alt_RNA <- getHGVSVariantRNA(var_3p, ref)
    if (event_start %over% ref_utr_5p && event_end %over% ref_utr_5p) {
        if (fix_start_codon) {
            var <- var_3p
            alt_protein <- getHGVSVariantProteinUnaffected()
        } else {
            var <- var_5p
            if (cdsStart(var) == cdsStart(ref)) {
                alt_protein <- getHGVSVariantProteinUnaffected()
            } else {
                alt_protein <- getHGVSVariantNTerminalExtension(var)
            }
        }
    } else if (event_start %over% ref_utr_5p && event_end %over% ref_cds) {
        var <- var_5p
        if (identical(gr2co(cds(var)), gr2co(cds(ref)))) {
            alt_protein <- getHGVSVariantProteinUnaffected()
        } else if (fix_start_codon || is.na(cdsStart(var))) {
            alt_protein <- getHGVSVariantNoProtein()
        } else {
            alt_protein <- getHGVSVariantNTerminalVariant(var)
        }
    } else if (event_start %over% ref_utr_5p && event_end %over% ref_utr_3p) {
        var <- var_3p
        cdsStart(var) <- NA_integer_
        alt_protein <- getHGVSVariantNoProtein()
    } else if (event_start %over% ref_cds && event_end %over% ref_cds) {
        if (sum(width(del)) %% 3 == sum(width(ins)) %% 3) {
            var <- var_3p
            alt_protein <- getHGVSVariantDeletionInsertion(var_3p)
            if (cdsEnd(var_3p) != cdsEnd(ref)
                && !fix_start_codon && !is.na(cdsStart(var_5p))) {
                var <- c(var, var_5p)
                alt_protein <- pc(alt_protein,
                    getHGVSVariantNTerminalVariant(var_5p))
            }
        } else {
            var <- var_3p
            alt_protein <- getHGVSVariantFrameshift(var_3p)
            if (!fix_start_codon && !is.na(cdsStart(var_5p))) {
                var <- c(var, var_5p)
                alt_protein <- pc(alt_protein,
                    getHGVSVariantNTerminalVariant(var_5p))
            }
        }
    } else if (event_start %over% ref_cds && event_end %over% ref_utr_3p) {
        var <- var_3p
        if (identical(gr2co(cds(var)), gr2co(cds(ref)))) {
            alt_protein <- getHGVSVariantProteinUnaffected()
        } else {
            alt_protein <- getHGVSVariantFrameshift(var)
        }
    } else if (event_start %over% ref_utr_3p && event_end %over% ref_utr_3p) {
        var <- var_3p
        alt_protein <- getHGVSVariantProteinUnaffected()
    }
    n <- length(var)
    res <- data.frame(
        variantID = rep(variantID(sgv), n),
        txName = rep(mcols(ref)$txName, n),
        geneName = rep(mcols(ref)$geneName, n),
        RNA_change = rep(alt_RNA$HGVS, n),
        RNA_variant_type = rep(alt_RNA$type, n),
        protein_change = alt_protein$HGVS,
        protein_variant_type = alt_protein$type,
        stringsAsFactors = FALSE)
    res <- cbind(res, as.data.frame(mcols(var)))
    if (output == "short") {
        res <- res[c(
            "variantID",
            "txName",
            "geneName",
            "RNA_change",
            "RNA_variant_type",
            "protein_change",
            "protein_variant_type")]
    } else if (output == "full") {
        res <- res[c(
            "variantID",
            "txName",
            "geneName",
            "RNA_change",
            "RNA_variant_type",
            "RNA_ref_length",
            "RNA_ref_cds_start",
            "RNA_ref_cds_end",
            "RNA_ref_last_junction",
            "RNA_ref_event_start",
            "RNA_ref_event_end",
            "RNA_var_length",
            "RNA_var_cds_start",
            "RNA_var_cds_end",
            "RNA_var_last_junction",
            "RNA_var_event_start",
            "RNA_var_event_end",
            "protein_change",
            "protein_variant_type",
            "protein_ref_length",
            "protein_ref_event_start",
            "protein_ref_event_end",
            "protein_var_length",
            "protein_var_event_start",
            "protein_var_event_end",
            "RNA_ref_seq",
            "RNA_var_seq",
            "protein_ref_seq",
            "protein_var_seq")]
    }
    return(res)
}
getHGVSVariantRNA <- function(var, ref)
{
    ref_start <- mcols(var)$RNA_ref_event_start
    ref_end <- mcols(var)$RNA_ref_event_end
    var_start <- mcols(var)$RNA_var_event_start
    var_end <- mcols(var)$RNA_var_event_end
    type <- c()
    if (ref_end > ref_start) {
        hgvs <- getHGVSDeletion(var, "RNA")
        type <- c(type, "deletion")
    }
    if (var_end > var_start) {
        if (ref_end == ref_start) {
            hgvs <- getHGVSFlanking(var, "RNA")
        }
        hgvs <- paste0(hgvs, getHGVSInsertion(var, "RNA", ref))
        type <- c(type, "insertion")
    }
    list(type = paste(type, collapse = "/"), HGVS = hgvs)
}
getHGVSVariantNTerminalExtension <- function(var)
{
    ext <- mcols(var)$protein_var_length - mcols(var)$protein_ref_length
    hgvs <- paste0("p.M1ext-", ext)
    list(type = "N-terminal_extension", HGVS = hgvs)
}
getHGVSVariantNTerminalVariant <- function(var) {
    hgvs <- getHGVSDeletion(var, "protein")
    type <- "N-terminal_deletion"
    var_l <- mcols(var)$protein_var_length
    ref_l <- mcols(var)$protein_ref_length
    del_l <- mcols(var)$protein_ref_event_end -
        mcols(var)$protein_ref_event_start
    ext <- var_l - (ref_l - del_l)
    if (ext > 0) {
        hgvs <- paste0(hgvs, "ext-", ext)
        type <- "N-terminal_variant"
    }
    list(type = type, HGVS = hgvs)
}
getHGVSVariantDeletionInsertion <- function(var)
{
    ref_start <- mcols(var)$protein_ref_event_start
    ref_end <- mcols(var)$protein_ref_event_end
    var_start <- mcols(var)$protein_var_event_start
    var_end <- mcols(var)$protein_var_event_end
    type <- c()
    if (ref_end > ref_start) {
        hgvs <- getHGVSDeletion(var, "protein")
        type <- c(type, "deletion")
    }
    if (var_end > var_start) {
        if (ref_end == ref_start) {
            hgvs <- getHGVSFlanking(var, "protein")
        }
        hgvs <- paste0(hgvs, getHGVSInsertion(var, "protein"))
        type <- c(type, "insertion")
    }
    out <- list(
        type = paste0("in-frame_", paste(type, collapse = "/")),
        HGVS = hgvs)
    return(out)
}
getHGVSVariantFrameshift <- function(var)
{
    ref_start <- mcols(var)$protein_ref_event_start
    var_start <- mcols(var)$protein_var_event_start
    res <- getHGVSRefPos(var, ref_start + 1, "protein")
    alt <- substr(mcols(var)$protein_var_seq, var_start + 1, var_start + 1)
    hgvs <- paste0("p.", res, alt, "fs*")
    if (!is.na(cdsEnd(var))) {
        ## need to add 1 since sequence does not include stop codon
        ext <- mcols(var)$protein_var_length - var_start + 1
        hgvs <- paste0(hgvs, ext)
    } else {
        hgvs <- paste0(hgvs, "?")
    }
    list(type = "frame_shift", HGVS = hgvs)
}
getHGVSVariantProteinUnaffected <- function()
{
    list(type = "no_change", HGVS = "p.=")
}
getHGVSVariantNoProtein <- function()
{
    list(type = "no_protein", HGVS = "p.0")
}
getHGVSDeletion <- function(var, type)
{
    suffix <- switch(type, RNA = "r", protein = "p")
    ref_start <- mcols(var)[[paste0(type, "_ref_event_start")]]
    ref_end <- mcols(var)[[paste0(type, "_ref_event_end")]]
    del_start <- getHGVSRefPos(var, ref_start + 1, type)
    del_end <- getHGVSRefPos(var, ref_end, type)
    paste0(suffix, ".", del_start, "_", del_end, "del")
}
getHGVSFlanking <- function(var, type)
{
    suffix <- switch(type, RNA = "r", protein = "p")
    ref_start <- mcols(var)[[paste0(type, "_ref_event_start")]]
    ref_end <- mcols(var)[[paste0(type, "_ref_event_end")]]
    flanking_start <- getHGVSRefPos(var, ref_start, type)
    flanking_end <- getHGVSRefPos(var, ref_end + 1, type)
    paste0(suffix, ".", flanking_start, "_", flanking_end)
}
getHGVSInsertion <- function(var, type, ref)
{
    if (type == "RNA") {
        strand <- as.character(strand(ref[[1]][1]))
        I <- setdiff(var[[1]], ref[[1]])
        I <- sort(I, decreasing = (strand == "-"))
        ins <- vector()
        for(i in seq_along(I)) {
            I5p <- start(flank(I[i], -1, TRUE))
            I3p <- start(flank(I[i], -1, FALSE))
            ins <- c(ins, paste0(
                getHGVSRefPosIntronic(I5p, ref, var), "_",
                getHGVSRefPosIntronic(I3p, ref, var)))
        }
        if (length(ins) > 1) {
            ins <- paste0("[", paste(ins, collapse = ";"), "]")
        }
    } else if (type == "protein") {
        ins <- substr(mcols(var)$protein_var_seq,
            mcols(var)$protein_var_event_start + 1,
            mcols(var)$protein_var_event_end)
    }
    ins <- paste0("ins", ins)
    return(ins)
}
getHGVSRefPos <- function(var, pos, type)
{
    if (type == "RNA") {
        cdsStart <- mcols(var)$RNA_ref_cds_start
        cdsEnd <- mcols(var)$RNA_ref_cds_end
        if (pos < cdsStart) {
            pos <- pos - cdsStart
        } else if (pos >= cdsStart && pos <= cdsEnd) {
            pos <- pos - cdsStart + 1
        } else {
            pos <- paste0("*", pos - cdsEnd)
        }
        pos <- as.character(pos)
    } else if (type == "protein") {
        pos <- paste0(substr(mcols(var)$protein_ref_seq, pos, pos), pos)
    }
    return(pos)
}
getHGVSRefPosIntronic <- function(pos, ref, var)
{
    names(ref) <- "1"
    strand <- as.character(strand(ref[[1]][1]))
    E <- ref[[1]]
    E <- sort(E, decreasing = (strand == "-"))
    E5p <- flank(E, -1, TRUE)
    E3p <- flank(E, -1, FALSE)
    R5p <- c(start(mapToTranscripts(E5p, ref)), NA)
    R3p <- c(NA, start(mapToTranscripts(E3p, ref)))
    E5p <- c(start(E5p), NA)
    E3p <- c(NA, start(E3p))
    d5p <- (pos - E3p) * ifelse(strand == "-", -1, 1)
    d3p <- (E5p - pos) * ifelse(strand == "-", -1, 1)
    i <- which((is.na(d5p) | d5p > 0) & (is.na(d3p) | d3p > 0))
    if (is.na(d3p[i]) ||
        (!is.na(d5p[i]) && !is.na(d3p[i]) && d5p[i] <= d3p[i])) {
        pos <- paste0(getHGVSRefPos(var, R3p[i], "RNA"), "+", d5p[i])
    } else {
        pos <- paste0(getHGVSRefPos(var, R5p[i], "RNA"), "-", d3p[i])
    }
    return(pos)
}
getVariant <- function(var, ref, sgv, genome, fix = c("start", "stop"))
{
    fix <- match.arg(fix)
    if (fix == "start") {
        var <- findCDS(var, cdsStart(ref), NA_integer_, genome)
    } else {
        var <- findCDS(var, NA_integer_, cdsEnd(ref), genome)
    }
    var <- getVariantRNA(var, ref, sgv, genome)
    var <- getVariantProtein(var, ref, sgv, genome)
    return(var)
}
findCDS <- function(tx, cdsStart, cdsEnd, genome)
{
    start_codons <- c("ATG")
    stop_codons <- c("TAG", "TAA", "TGA")
    chrom <- as.character(seqnames(tx[[1]][1]))
    strand <- as.character(strand(tx[[1]][1]))
    names(tx) <- "1"
    tx[[1]] <- sort(tx[[1]], decreasing = (strand == "-"))
    tx_seq <- as.character(do.call(c,
        suppressWarnings(getSeq(genome, tx[[1]]))))
    if (is.na(cdsEnd)) {
        cdsStart_gr <- GRanges(chrom, IRanges(cdsStart, cdsStart), strand)
        if (!cdsStart_gr %over% tx) {
            cdsStart <- NA_integer_
            cdsEnd <- NA_integer_
        } else {
            cdsStart_tx <- start(mapToTranscripts(cdsStart_gr, tx))
            p <- seq(from = cdsStart_tx, to = nchar(tx_seq), by = 3)
            codons <- mapply(substr, p, p + 2, MoreArgs = list(x = tx_seq))
            i_stop <- which(codons %in% stop_codons)
            if (length(i_stop) > 0) {
                cdsEnd_tx <- (p + 2)[min(i_stop)]
                x <- GRanges(1, IRanges(cdsEnd_tx, cdsEnd_tx), "*")
                cdsEnd_gr <- mapFromTranscripts(x, tx)
                cdsEnd <- start(cdsEnd_gr)
            } else {
                cdsStart <- NA_integer_
                cdsEnd <- NA_integer_
            }
        }
    } else {
        cdsEnd_gr <- GRanges(chrom, IRanges(cdsEnd, cdsEnd), strand)
        if (!cdsEnd_gr %over% tx) {
            cdsStart <- NA_integer_
            cdsEnd <- NA_integer_
        } else {
            cdsEnd_tx <- start(mapToTranscripts(cdsEnd_gr, tx))
            p <- seq(from = cdsEnd_tx %% 3 + 1, to = cdsEnd_tx - 2, by = 3)
            codons <- mapply(substr, p, p + 2, MoreArgs = list(x = tx_seq))
            i_start <- which(codons %in% start_codons)
            i_stop <- which(codons[-length(codons)] %in% stop_codons)
            if (length(i_start) > 0 && length(i_stop) > 0) {
                i_start <- i_start[i_start > max(i_stop)]
            }
            if (length(i_start) > 0) {
                cdsStart_tx <- p[min(i_start)]
                x <- GRanges(1, IRanges(cdsStart_tx, cdsStart_tx), "*")
                cdsStart_gr <- mapFromTranscripts(x, tx)
                cdsStart <- start(cdsStart_gr)
            } else {
                cdsStart <- NA_integer_
                cdsEnd <- NA_integer_
            }
        }
    }
    cdsStart(tx) <- cdsStart
    cdsEnd(tx) <- cdsEnd
    return(tx)
}
getEventLocation <- function(sgv, ref)
{
    start_type <- substr(from(sgv), 1, 1)
    end_type <- substr(to(sgv), 1, 1)
    if (start_type == "D") {
        start_gr <- pos2gr(sub("^D:", "", from(sgv)))
        start_gr <- flank(start_gr, 1, FALSE)
    } else if (start_type == "S") {
        start_gr <- flank(range(ref[[1]]), -1, TRUE)
    }
    if (end_type == "A") {
        end_gr <- pos2gr(sub("^A:", "", to(sgv)))
        end_gr <- flank(end_gr, 1, TRUE)
    } else if (end_type == "E") {
        end_gr <- flank(range(ref[[1]]), -1, FALSE)
    }
    event <- range(c(start_gr, end_gr))
    return(event)
}
getRNASeq <- function(tx, genome)
{
    tx <- tx[[1]]
    strand <- as.character(strand(tx[1]))
    tx <- sort(tx, decreasing = (strand == "-"))
    nt <- do.call(c, suppressWarnings(getSeq(genome, tx)))
    nt <- as.character(nt)
    nt <- gsub("A", "a", nt)
    nt <- gsub("C", "c", nt)
    nt <- gsub("G", "g", nt)
    nt <- gsub("T", "u", nt)
    nt <- gsub("N", "n", nt)
    return(nt)
}
getVariantRNA <- function(var, ref, sgv, genome)
{
    strand <- as.character(strand(ref[[1]][1]))
    names(ref) <- "1"
    names(var) <- "1"
    ref[[1]] <- sort(ref[[1]], decreasing = (strand == "-"))
    var[[1]] <- sort(var[[1]], decreasing = (strand == "-"))
    ref_seq <- getRNASeq(ref, genome)
    var_seq <- getRNASeq(var, genome)
    ref_cds <- reduce(mapToTranscripts(cds(ref), ref))
    var_cds <- reduce(mapToTranscripts(cds(var), var))
    ref_event <- getEventLocation(sgv, ref) + 1
    ref_event <- intersect(ref_event, ref[[1]])
    ref_event <- reduce(mapToTranscripts(ref_event, ref))
    var_event <- getEventLocation(sgv, var) + 1
    var_event <- intersect(var_event, var[[1]])
    var_event <- reduce(mapToTranscripts(var_event, var))
    if (length(ref_event) > 1 || length(var_event) > 1) stop()
    if (grepl("^S", from(sgv))) {
        ref_event_start <- 0L
        var_event_start <- 0L
    } else {
        ref_event_start <- start(ref_event)
        var_event_start <- start(var_event)
    }
    if (grepl("^E", to(sgv))) {
        ref_event_end <- nchar(ref_seq)
        var_event_end <- nchar(var_seq)
    } else {
        ref_event_end <- end(ref_event) - 1L
        var_event_end <- end(var_event) - 1L
    }
    ref_last_exon <- ref[[1]][length(ref[[1]])]
    var_last_exon <- var[[1]][length(var[[1]])]
    ref_last_junction <- start(mapToTranscripts(ref_last_exon, ref)) - 1L
    var_last_junction <- start(mapToTranscripts(var_last_exon, var)) - 1L
    if (length(ref[[1]]) == 1) ref_last_junction <- NA_integer_
    if (length(var[[1]]) == 1) var_last_junction <- NA_integer_
    df <- data.frame(
        RNA_ref_cds_start = start(ref_cds),
        RNA_ref_cds_end = end(ref_cds),
        RNA_ref_last_junction = ref_last_junction,
        RNA_ref_event_start = ref_event_start,
        RNA_ref_event_end = ref_event_end,
        RNA_ref_length = nchar(ref_seq),
        RNA_ref_seq = ref_seq,
        RNA_var_cds_start = start(var_cds),
        RNA_var_cds_end = end(var_cds),
        RNA_var_last_junction = var_last_junction,
        RNA_var_event_start = var_event_start,
        RNA_var_event_end = var_event_end,
        RNA_var_length = nchar(var_seq),
        RNA_var_seq = var_seq,
        stringsAsFactors = FALSE)
    mcols(var) <- cbind(mcols(var), DataFrame(df))
    return(var)
}
getProteinSeq <- function(tx, genome)
{
    if (is.na(cdsStart(tx)) || is.na(cdsEnd(tx))) {
        return(NA_character_)
    }
    cds_seq <- do.call(c, suppressWarnings(getSeq(genome, cds(tx))))
    protein <- as.character(translate(cds_seq))
    protein <- sub("\\*$", "", protein)
    return(protein)
}
getVariantProtein <- function(var, ref, sgv, genome)
{
    ref_seq <- getProteinSeq(ref, genome)
    if (is.na(cdsStart(var)) || is.na(cdsEnd(var))) {
        df <- data.frame(
            protein_ref_event_start = 0L,
            protein_ref_event_end = nchar(ref_seq),
            protein_ref_length = nchar(ref_seq),
            protein_ref_seq = ref_seq,
            protein_var_event_start = NA_integer_,
            protein_var_event_end = NA_integer_,
            protein_var_length = NA_integer_,
            protein_var_seq = NA_character_,
            stringsAsFactors = FALSE)
        mcols(var) <- cbind(mcols(var), DataFrame(df))
        return(var)
    }
    var_seq <- getProteinSeq(var, genome)
    ref_loc <- range(ref[[1]])
    ref_cds <- range(cds(ref))
    var_loc <- range(var[[1]])
    var_cds <- range(cds(var))
    ref_cds_start <- granges(flank(ref_cds, -1, TRUE))
    ref_cds_end <- granges(flank(ref_cds, -1, FALSE))
    var_cds_start <- granges(flank(var_cds, -1, TRUE))
    var_cds_end <- granges(flank(var_cds, -1, FALSE))
    event <- reduce(c(
        getEventLocation(sgv, ref),
        getEventLocation(sgv, var)))
    if (start(ref_cds_start) != start(var_cds_start)) {
        event <- range(c(event, ref_cds_start, var_cds_start))
    }
    if (start(ref_cds_end) != start(var_cds_end)) {
        event <- range(c(event, ref_cds_end, var_cds_end))
    }
    event_ref <- mapEventToProtein(ref, event)
    if (!is.na(event_ref$end) && event_ref$end == nchar(ref_seq) + 1)
        event_ref$end <- nchar(ref_seq)
    event_var <- mapEventToProtein(var, event)
    if (!is.na(event_var$end) && event_var$end == nchar(var_seq) + 1)
        event_var$end <- nchar(var_seq)
    if (!is.na(event_ref$start) && !is.na(event_ref$end) &&
        !is.na(event_var$start) && !is.na(event_var$end)) {
        if (event_ref$end > event_ref$start &&
            event_var$end > event_var$start &&
            start(ref_cds_start) == start(var_cds_start)) {
            del <- substr(ref_seq, event_ref$start + 1, event_ref$end)
            ins <- substr(var_seq, event_var$start + 1, event_var$end)
            D <- nchar(del)
            I <- nchar(ins)
            P <- min(D, I)
            p <- 0
            while (p < P &&
                substr(del, p + 1, p + 1) == substr(ins, p + 1, p + 1)) {
                p <- p + 1
            }
            event_ref$start <- event_ref$start + p
            event_var$start <- event_var$start + p
        }
        if (event_ref$end > event_ref$start &&
            event_var$end > event_var$start &&
            start(ref_cds_end) == start(var_cds_end)) {
            del <- substr(ref_seq, event_ref$start + 1, event_ref$end)
            ins <- substr(var_seq, event_var$start + 1, event_var$end)
            D <- nchar(del)
            I <- nchar(ins)
            P <- min(D, I)
            p <- 0
            while (p < P &&
                substr(del, D - p, D - p) == substr(ins, I - p, I - p)) {
                p <- p + 1
            }
            event_ref$end <- event_ref$end - p
            event_var$end <- event_var$end - p
        }
    }
    df <- data.frame(
        protein_ref_event_start = event_ref$start,
        protein_ref_event_end = event_ref$end,
        protein_ref_length = nchar(ref_seq),
        protein_ref_seq = ref_seq,
        protein_var_event_start = event_var$start,
        protein_var_event_end = event_var$end,
        protein_var_length = nchar(var_seq),
        protein_var_seq = var_seq,
        stringsAsFactors = FALSE)
    mcols(var) <- cbind(mcols(var), DataFrame(df))
    return(var)
}
mapEventToProtein <- function(tx, event)
{
    event_start_0 <- start(flank(event, -1, TRUE))
    event_end_0 <- start(flank(event, -1, FALSE))
    cds <- setNames(restrict(tx, cdsLeft(tx), cdsRight(tx)), "1")
    event <- intersect(event + 1, cds[[1]])
    event <- ranges(reduce(mapToTranscripts(event, cds)))
    if (length(event) == 0) {
        out <- data.frame(start = NA_integer_, end = NA_integer_)
    } else {
        if (event_start_0 == cdsStart(tx)) {
            event_start <- 0L
        } else {
            event_start <- start(event)
        }
        if (event_end_0 == cdsEnd(tx)) {
            event_end <- end(event)
        } else {
            event_end <- end(event) - 1L
        }
        out <- data.frame(
            start = as.integer(floor(event_start / 3)),
            end = as.integer(ceiling(event_end / 3)))
    }
    return(out)
}
## processTranscripts() removes transcripts with invalid CDS
## and ensures that CDS coordinates include the stop codon
processTranscripts <- function(tx, genome, cores)
{
    tx <- tx[!is.na(cdsStart(tx)) & !is.na(cdsEnd(tx))]
    list_tx <- split(tx, seq_along(tx))
    tx_protein <- unlist(mclapply(list_tx,
        getProteinSeq, genome, mc.cores = cores))
    list_processed <- mcmapply(
        findCDS,
        list_tx,
        cdsStart(tx),
        rep(NA_integer_, length(tx)),
        MoreArgs = list(genome = genome),
        SIMPLIFY = FALSE,
        mc.cores = cores)
    processed <- Reduce(c, list_processed)
    names(processed) <- names(tx)
    processed_protein <- unlist(mclapply(list_processed,
        getProteinSeq, genome, mc.cores = cores))
    invalid <- which(is.na(tx_protein) | is.na(processed_protein) |
        substr(tx_protein, 1, 1) != "M" | tx_protein != processed_protein)
    if (length(invalid) > 0) {
        processed <- processed[-invalid]
        msg <- paste0("Excluded ", length(invalid),
            " transcripts with invalid CDS\n",
            paste(names(tx)[invalid], collapse = "\n"))
        warning(msg, call. = FALSE, immediate. = TRUE)
    }
    return(processed)
}
cdsLeft <- function(tx)
{
    mcols(tx)$cdsStart
}
'cdsLeft<-' <- function(tx, value)
{
    mcols(tx)$cdsStart <- value; tx
}
cdsRight <- function(tx)
{
    mcols(tx)$cdsEnd
}
'cdsRight<-' <- function(tx, value)
{
    mcols(tx)$cdsEnd <- value; tx
}
cdsStart <- function(tx)
{
    st <- as.character(strand(unlist(range(tx))))
    ifelse(st == "+", cdsLeft(tx), cdsRight(tx))
}
'cdsStart<-' <- function(tx, value)
{
    st <- as.character(strand(unlist(range(tx))))
    i_pos <- which(st == "+")
    cdsLeft(tx)[i_pos] <- value[i_pos]
    i_neg <- which(st == "-")
    cdsRight(tx)[i_neg] <- value[i_neg]
    return(tx)
}
cdsEnd <- function(tx)
{
    st <- as.character(strand(unlist(range(tx))))
    ifelse(st == "+", cdsRight(tx), cdsLeft(tx))
}
'cdsEnd<-' <- function(tx, value)
{
    st <- as.character(strand(unlist(range(tx))))
    i_pos <- which(st == "+")
    cdsRight(tx)[i_pos] <- value[i_pos]
    i_neg <- which(st == "-")
    cdsLeft(tx)[i_neg] <- value[i_neg]
    return(tx)
}
cds <- function(tx)
{
    if (length(tx) > 1) stop("tx must have length 1")
    tx <- restrict(tx, cdsLeft(tx), cdsRight(tx))
    tx <- granges(tx[[1]])
    st <- as.character(strand(tx[1]))
    tx <- sort(tx, decreasing = (st == "-"))
    return(tx)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.