R/ucsc.R

Defines functions ucscGet ucscPost ucscShow ucscURL ucscGenomes .getOrganism parseFormatFromTrackLine trackLineClass .export_SimpleGRangesList_RTLFile UCSCFile splitUCSCData chooseGraphType UCSCData ucscParsePairs ucscPair resolveTrackIndex viewURL outputTruncated inconsistentFieldCounts ucscExport .uploadTableBrowserRanges firstTableName tableHasOutput getTableNames ucscTablePost dropCookie ucscTableGet normTableQueryRange normArgTrack normArgTable GRangesForUCSCGenome SeqinfoForUCSCGenome handleError normArgTrackData

Documented in GRangesForUCSCGenome SeqinfoForUCSCGenome UCSCFile ucscGenomes viewURL

# UCSC genome browser interface

# every UCSC session is identified by a 'hguid'
setClass("UCSCSession",
         representation(url = "character", hguid = "character",
                        views = "environment"),
         contains = "BrowserSession")

# gets an 'hgsid' to initialize the session
setMethod("initialize", "UCSCSession",
          function(.Object, url = "http://genome.ucsc.edu/cgi-bin/",
                   user = NULL, session = NULL, force = FALSE, ...)
          {
            .Object@url <- url
            .Object@views <- new.env()
            gwURL <- ucscURL(.Object, "gateway", force=force)
            gw <- httpGet(gwURL, cookiefile = tempfile(), header = TRUE,
                          .parse=FALSE, ...)
            if (grepl("redirectTd", gw)) {
                url <- sub(".*?a href=\"h([^[:space:]]+cgi-bin/).*", "h\\1", gw)
                return(initialize(.Object, url, user=user, session=session,
                                  force=TRUE, ...))
            }
            cookie <- grep("Set-[Cc]ookie: hguid[^=]*=", gw)
            if (!length(cookie))
              stop("Failed to obtain 'hguid' cookie")
            hguid <- sub(".*Set-Cookie: (hguid[^=]*=[^;]*);.*", "\\1", gw)
            .Object@hguid <- hguid
            if (!is.null(user) && !is.null(session)) { ## bring in other session
              ucscGet(.Object, "tracks",
                      list(hgS_doOtherUser = "submit", hgS_otherUserName = user,
                           hgS_otherUserSessionName = session))
            }
            .Object
          })

setMethod("seqlengths", "UCSCSession", function(x) {
  query <- ucscTableQuery(x, range = GRanges(), table = "chromInfo", check=FALSE)
  chromInfo <- getTable(query, check=FALSE)
  ans <- setNames(chromInfo$size, chromInfo$chrom)
  ans[sortSeqlevels(names(ans))]
})

setMethod("seqnames", "UCSCSession", function(x) names(seqlengths(x)))

setMethod("seqinfo", "UCSCSession", function(x) {
  sl <- seqlengths(x)
  Seqinfo(names(sl), sl, genome = genome(x)) # no circularity information
})

normArgTrackData <- function(value, session) {
  genomes <- vapply(value, function(x) singleGenome(genome(x)), character(1L))
  genomes[is.na(genomes)] <- ""
  tapply(value, unlist(genomes),
         function(tracks)
         {
           genome <- singleGenome(genome(tracks[[1]]))
           if (!is.na(genome))
             genome(session) <- genome
           spaces <- do.call(c, unname(lapply(tracks, seqnames)))
           badSpaces <- setdiff(spaces, seqnames(session))
           if (length(badSpaces) > 0L)
             stop("Invalid chromosomes for ", genome(session), ": ",
                  paste(badSpaces, collapse = ", "))
         })
  value
}

handleError <- function(response) {
    msg <- getNodeSet(response, "//span[text()='Error']/../text()")
    if (length(msg) == 2L)
        stop(sub(".*? - ", "", xmlValue(msg[[2L]])))
}

setReplaceMethod("track", c("UCSCSession", "SimpleGRangesList"),
          function(object, name = names(value),
                   format = c("auto", "bed", "wig", "gff1", "bed15",
                     "bedGraph"), ..., value)
          {
            format <- match.arg(format)
            if (length(value)) {
              ## upload values in blocks, one for each genome
              value <- normArgTrackData(value, object)
              names(value) <- name
              genomes <- sapply(value, function(x) singleGenome(genome(x)))
              genomes[is.na(genomes)] <- ""
              tapply(value, unlist(genomes),
                     function(tracks)
                     {
                       form <- ucscForm(tracks, format, ...)
                       response <- ucscPost(object, "custom", form)
                       handleError(response)
                     })
            }
            object
          })

setReplaceMethod("track", c("UCSCSession", "RTLFile"),
                 function(object, name = names(value), ..., value)
                 {
                   form <- ucscForm(value, genome(object), ...)
                   response <- ucscPost(object, "custom", form)
                   object
                 })

setMethod("browserViews", "UCSCSession",
          function(object) object@views$instances)

## get the list of track names
setMethod("trackNames", "UCSCSession",
          function(object) ucscTracks(object)@ids)

## get the current range
setMethod("range", "UCSCSession",
          function(x, ..., na.rm) range(ucscCart(x)))

setReplaceMethod("range", "UCSCSession",
                 function(x, value) {
                   ucscGet(x, "cart", ucscForm(normGenomeRange(value, x)))
                   x
                 })

setMethod("genome", "UCSCSession", function(x) {
  genome(ucscCart(x))
})

setReplaceMethod("genome", "UCSCSession",
                 function(x, value) {
                   if (!isSingleString(value))
                     stop("'genome' must be a single non-NA string")
                   ucscGet(x, "gateway", list(db = value))
                   if (genome(x) != value)
                     stop("Failed to set session genome to '", value, "'")
                   x
                 })

SeqinfoForUCSCGenome <- function(genome) {
  tryCatch({
    session <- browserSession("UCSC")
    genome(session) <- genome
    seqinfo(session)
  }, error = function(cond) NULL)
}

GRangesForUCSCGenome <- function(genome, chrom = NULL, ranges = NULL, ...)
{
  GRangesForGenome(genome, chrom = chrom, ranges = ranges, method = "UCSC",
                   seqinfo = NULL, ...)
}


## context for querying UCSC tables
setClass("UCSCTableQuery",
         representation(session = "UCSCSession",
                        track = "character_OR_NULL",
                        table = "character_OR_NULL",
                        range = "GRanges",
                        outputType = "character_OR_NULL",
                        NAMES = "character_OR_NULL",
                        intersectTrack = "character_OR_NULL"))

setMethod("show", "UCSCTableQuery",
          function(object) {
            cat("Get ")
            if (!is.null(tableName(object)))
              cat("table '", tableName(object), "' from ", sep = "")
            cat("track '", names(trackName(object)), "' within ", sep = "")
            range <- range(object)
            if (length(range) > 1)
              start <- end <- chrom <- "*"
            else {
              chrom <- as.character(seqnames(range))
              start <- start(range)
              end <- end(range)
            }
            cat(genome(range)[1], ":", chrom, ":", start, "-", end, sep="")
            for (itrack in names(intersectTrack(object)))
              cat(" &", itrack)
            cat("\n")
          })

setMethod("browserSession", "UCSCTableQuery", function(object) {
  object@session
})

setGeneric("browserSession<-",
           function(object, ..., value) standardGeneric("browserSession<-"))
setReplaceMethod("browserSession", c("UCSCTableQuery", "UCSCSession"),
                 function(object, value) {
                   object@session <- value
                   object
                 })

setMethod("range", "UCSCTableQuery", function(x, ..., na.rm) x@range)
setReplaceMethod("range", "UCSCTableQuery",
                 function(x, value) {
                   x@range <- normTableQueryRange(value, browserSession(x))
                   x
                 })

setGeneric("trackName", function(x, ...) standardGeneric("trackName"))
setMethod("trackName", "UCSCTableQuery", function(x) x@track)

setGeneric("trackName<-",
           function(x, ..., value) standardGeneric("trackName<-"))
setReplaceMethod("trackName", "UCSCTableQuery", function(x, value)
                 {
                   x@track <- normArgTrack(value, x)
                   x
                 })

setGeneric("tableName", function(x, ...) standardGeneric("tableName"))
setMethod("tableName", "UCSCTableQuery", function(x) x@table)

normArgTable <- function(name, query) {
  if (!is.null(name)) {
    if (!isSingleString(name))
      stop("table name must be a single string or NULL")
    if (!name %in% tableNames(query))
      stop("Table '", name, "' is unavailable")
  }
  name
}

setGeneric("tableName<-", function(x, ..., value)
           standardGeneric("tableName<-"))
### FIXME: we need '...' in the formals due to a bug in R 3.5, remove for 3.6
setReplaceMethod("tableName", "UCSCTableQuery",
                 function(x, check=TRUE, ..., value)
                 {
                   if (!missing(...))
                     warning("arguments in '...' ignored")
                   if (check)
                       value <- normArgTable(value, x)
                   x@table <- value
                   x
                 })

setMethod("names", "UCSCTableQuery", function(x) x@NAMES)
setReplaceMethod("names", "UCSCTableQuery", function(x, value) {
  x@NAMES <- value
  x
})

setGeneric("intersectTrack", function(x, ...)
           standardGeneric("intersectTrack"))
setMethod("intersectTrack", "UCSCTableQuery", function(x) x@intersectTrack)
setGeneric("intersectTrack<-", function(x, ..., value)
           standardGeneric("intersectTrack<-"))
setReplaceMethod("intersectTrack", "UCSCTableQuery", function(x, value) {
  x@intersectTrack <- normArgTrack(value, x)
  x
})

## not exported
setGeneric("outputType", function(x, ...) standardGeneric("outputType"))
setMethod("outputType", "UCSCTableQuery", function(x) x@outputType)
setGeneric("outputType<-",
           function(x, ..., value) standardGeneric("outputType<-"))
setReplaceMethod("outputType", "UCSCTableQuery",
                 function(x, value) {
                   x@outputType <- value
                   x
                 })

normArgTrack <- function(name, trackids) {
  if (is.null(name))
    return(name)
  if (!isSingleString(name))
    stop("'track' must be a single string")
  if (is(trackids, "UCSCTableQuery"))
    trackids <- trackNames(trackids)
  if (!(name %in% trackids)) {
    mapped_name <- trackids[name]
    if (is.na(mapped_name))
      stop("Unknown track: ", name)
    name <- mapped_name
  } else names(name) <- name
  name
}

normTableQueryRange <- function(range, x) {
  normGenomeRange(range, x, max.length = 1000L)
}

setGeneric("ucscTableQuery", function(x, ...) standardGeneric("ucscTableQuery"))
setMethod("ucscTableQuery", "UCSCSession",
          function(x, track = NULL, range = seqinfo(x), table = NULL,
                   names = NULL, intersectTrack = NULL, check = TRUE)
          {
            if (!is(names, "character_OR_NULL"))
              stop("'names' must be 'NULL' or a character vector")
            ## only inherit the genome from the session
            if (missing(range) || !check)
                range <- as(range, "GRanges")
            else range <- normTableQueryRange(range, x)
            query <- new("UCSCTableQuery", session = x, range = range,
                         NAMES = names)
            ## the following line must always happen to initialize the session
            ## otherwise stuff can go haywire
            trackids <- trackNames(query)
            if (!is.null(track) || !is.null(intersectTrack)) {
              query@track <- normArgTrack(track, trackids)
              query@intersectTrack <- normArgTrack(intersectTrack, trackids)
            }
            tableName(query, check=check) <- table
            query
          })

setMethod("ucscTableQuery", "character",
          function(x, ...) {
              stopifnot(isSingleString(x))
              session <- browserSession()
              genome(session) <- x
              ucscTableQuery(session, ...)
          })

ucscTableGet <- function(query, .parse = TRUE, tracks = FALSE, ...)
  ucscGet(browserSession(query), "tables",
          c(ucscForm(query, tracks = tracks), ...), .parse = .parse)

dropCookie <- function(object) {
    object@hguid <- character()
    object
}

ucscTablePost <- function(query, .parse = TRUE, tracks = FALSE, ...)
  ucscPost(dropCookie(browserSession(query)), "tables",
           c(ucscForm(query, tracks = tracks), list(...)), .parse = .parse)

## gets the track names available from the table browser

setMethod("trackNames", "UCSCTableQuery",
          function(object) {
            doc <- ucscTableGet(object, tracks = TRUE)
            track_path <- "//select[@name = 'hgta_track']/option/@value"
            tracks <- unlist(getNodeSet(doc, track_path))
            label_path <- "//select[@name = 'hgta_track']/option/text()"
            labels <- sub("\n.*$", "",
                          sapply(getNodeSet(doc, label_path), xmlValue))
            names(tracks) <- labels
            tracks
          })

## returns a character vector of table names for a given track name + range
setGeneric("tableNames", function(object, ...)
           standardGeneric("tableNames"))

getTableNames <- function(object) {
    doc <- ucscTableGet(object)
    table_path <- "//select[@name = 'hgta_table']/option/@value"
    unlist(getNodeSet(doc, table_path))
}

setMethod("tableNames", "UCSCTableQuery",
          function(object, trackOnly = FALSE)
          {
            tables <- getTableNames(object)
            outputType <- outputType(object)
            if (trackOnly) {
              trackOutputs <- c("wigData", "wigBed", "bed")
              if (!is.null(outputType))
                outputType <- intersect(trackOutputs, outputType)
              else outputType <- trackOutputs
            }
            if (!is.null(outputType)) {
              tables <- tables[sapply(tables, tableHasOutput, object=object)]
            }
            unname(tables)
          })

tableHasOutput <- function(object, table) {
    tableName(object, check=FALSE) <- table
    any(outputType(object) %in% ucscTableOutputs(object))
}

firstTableName <- function(object) {
    tables <- getTableNames(object)
    for (table in tables)
        if (tableHasOutput(object, table))
            return(table)
}

setGeneric("ucscTableOutputs",
           function(object, ...)
           standardGeneric("ucscTableOutputs"))

## returns a character vector of available output types for the table
## not exported
setMethod("ucscTableOutputs", "UCSCTableQuery",
          function(object) {
            doc <- ucscTableGet(object)
            output_path <- "//select[@name = 'hgta_outputType']/option/@value"
            unlist(getNodeSet(doc, output_path))
          })

setClass("UCSCSchema",
         representation(genome = "character",
                        tableName = "character",
                        rowCount = "integer",
                        formatDescription = "character"),
         contains = "DFrame")

setMethod("genome", "UCSCSchema", function(x) {
  x@genome
})

setMethod("tableName", "UCSCSchema", function(x) {
  x@tableName
})

setMethod("nrow", "UCSCSchema", function(x) {
  x@rowCount
})

setGeneric("formatDescription",
           function(x, ...) standardGeneric("formatDescription"))
setMethod("formatDescription", "UCSCSchema", function(x) {
  x@formatDescription
})

setClass("UCSCLinks",
         representation(genome = "character",
                        tableName = "character",
                        fieldName = "character",
                        viaName = "character"))

setClass("UCSCSchemaDescription",
         representation(schema = "UCSCSchema", links = "UCSCLinks",
                        sample = "DataFrame"))

setGeneric("ucscSchemaDescription",
           function(object, ...) standardGeneric("ucscSchemaDescription"))

## not currently exported, just ucscSchema() is public
setMethod("ucscSchemaDescription", "UCSCTableQuery", function(object)
{
  alphaNum <- function(x) gsub("^ *", "", gsub("[^a-zA-Z0-9()+,. _'-]", "", x))
  getBoldLabeledField <- function(name) {
    expr <- sprintf("//b[text() = '%s:']/following::text()[1]", name)
    alphaNum(xmlValue(getNodeSet(doc, expr)[[1]]))
  }
  getDataFrame <- function(tableNode) {
    getColumn <- function(ind) {
      ## FIXME: special treatment required for missing cells
      ## Is there a way to get child counts for every node in XPath?
      expr <- sprintf("tr/td[%d]", ind)
      children <- sapply(getNodeSet(tableNode, expr), xmlChildren)
      col <- rep(NA, length(children))
      col[elementNROWS(children) > 0] <-
        alphaNum(sapply(unlist(children), xmlValue))
      col
    }
    columnNames <- sapply(getNodeSet(tableNode, "tr[1]/th//text()"), xmlValue)
    columns <- lapply(seq_along(columnNames), getColumn)
    names(columns) <- columnNames
    columns <- columns[elementNROWS(columns) > 0]
    DataFrame(columns)
  }
  doc <- ucscTableGet(object, hgta_doSchema = "describe table schema")
  genome <- getBoldLabeledField("Database")
  tableName <- getBoldLabeledField("Primary Table")
  rowCount <- as.integer(gsub(",", "", getBoldLabeledField("Row Count")))
  format <- getBoldLabeledField("Format description")
  schemaNode <- getNodeSet(doc, "//table[tr[1]/th[3]/text() = 'SQL type']")[[1]]
  schema <- getDataFrame(schemaNode)
  schema$RType <- sapply(schema$example, function(x) class(type.convert(x)))
  schema$RType[!nzchar(schema$example)] <- "factor"
  linkNode <- getNodeSet(doc, "//div[@class = 'subheadingBar' and contains(text(), 'Connected Tables and Joining Fields')]/following::table[1]/tr[2]/td[2]")
  if (length(linkNode)) { ## this is apparently optional
    linkNode <- linkNode[[1]]
    linkTable <- sapply(getNodeSet(linkNode, "a/text()"), xmlValue)
    linkText <- sapply(getNodeSet(linkNode, "text()"), xmlValue)
    linkMat <- matrix(linkText, nrow=2)
    linkGenome <- alphaNum(linkMat[1,])
    linkGenome <- substring(linkGenome, 1, nchar(linkGenome)-1L)
    linkSplit <- matrix(unlist(strsplit(linkMat[2,], " ", fixed=TRUE)), 3)
    linkField <- substring(linkSplit[1,], 2)
    linkVia <- sub(".*?\\.(.*?)\\)", "\\1", linkSplit[3,])
    links <- new("UCSCLinks", genome = linkGenome, tableName = linkTable,
                 fieldName = linkField, viaName = linkVia)
  } else links <- new("UCSCLinks")
  sampNode <- getNodeSet(doc, "//div[contains(@class, 'subheadingBar') and contains(text(), 'Sample')]/following::table[1]//table//table")[[1]]
  sample <- getDataFrame(sampNode)
  schema <- new("UCSCSchema", schema, genome = genome, tableName = tableName,
                rowCount = rowCount, formatDescription = format)
  new("UCSCSchemaDescription", schema = schema, links = links, sample = sample)
})

setGeneric("ucscSchema",
           function(object, ...) standardGeneric("ucscSchema"))

setMethod("ucscSchema", "UCSCSchemaDescription", function(object) {
  object@schema
})

setMethod("ucscSchema", "UCSCTableQuery", function(object) {
  ucscSchema(ucscSchemaDescription(object))
})

.uploadTableBrowserRanges <- function(query, hgsid) {
  lines <- export(query@range, format = "bed", ignore.strand = TRUE)
  text <- paste(paste(lines, collapse = "\n"), "\n", sep = "")
  upload <- fileUpload("ranges.bed", text, "text/plain")
  ucscTablePost(query, hgta_enteredUserRegionFile = upload,
                hgta_enteredUserRegions = "",
                hgta_doSubmitUserRegions = "submit",
                hgsid = hgsid)
}

## export data from UCSC (internal utility)
ucscExport <- function(object)
{
  get_hgsid <- function(node)
    getNodeSet(node, "//input[@name = 'hgsid']/@value")[[1]]
  hgsid <- NULL
  if (!is.null(names(object))) { # filter by names
    text <- paste(names(object), collapse = "\n")
    output <- ucscTablePost(object, hgta_doPastedIdentiers = "submit",
                            hgta_pastedIdentifiers = text)
    error <- getNodeSet(output,
                        "//script[contains(text(), '{showWarnBox')]/text()")
    if (length(error))
      warning(sub(".*'<li>(.*?)'.*", "\\1", xmlValue(error[[1]])))
    hgsid <- get_hgsid(output)
  }
  if (!is.null(intersectTrack(object))) {
    itrack <- intersectTrack(object)
    iquery <- object
    iquery@track <- itrack
    itable <- tableNames(iquery, TRUE)
    if (!length(itable))
      stop("No table for intersection track: ", itrack)
    if (length(itable) > 1) # for now anyway
      itable <- itable[1]
    output <- ucscTableGet(object, hgta_nextIntersectGroup = "allTracks",
                           hgta_nextIntersectTrack = itrack,
                           hgta_nextIntersectTable = itable,
                           hgta_nextIntersectOp = "any",
                           hgta_doIntersectSubmit = "submit",
                           boolshad.hgta_nextInvertTable = "0",
                           boolshad.hgta_nextInvertTable2 = "0",
                           hgsid = hgsid)
    hgsid <- get_hgsid(output)
  }
  if (!spansGenome(object@range) && length(object@range) > 1L) {
    output <- .uploadTableBrowserRanges(object, hgsid)
    hgsid <- get_hgsid(output)
  }
  followup <- NULL
  if (outputType(object) == "bed") { ## some formats have extra pages
    followup <- list(hgta_doGetBed = "get BED",
                     hgta_printCustomTrackHeaders = "on",
                     boolshad.hgta_printCustomTrackHeaders = "1")
  }
  output <- ucscTableGet(object, !is.null(followup),
                         hgta_doTopSubmit = "get output",
                         hgsid = hgsid)
  if (!is.null(followup)) {
    hgsid <- get_hgsid(output)
    form <- c(followup, list(hgsid = hgsid))
    output <- ucscGet(browserSession(object), "tables", form, .parse = FALSE)
  }
  output
}

setMethod("track", "UCSCSession",
          function(object, name, ...)
          {
            track(ucscTableQuery(object, track=name, ...))
          })

inconsistentFieldCounts <- function(x) {
  con <- file()
  on.exit(close(con))
  writeLines(x, con)
  length(unique(count.fields(con, skip=2L, sep="\t")) > 1L)
}

outputTruncated <- function(x) {
  has.msg <- grepl("^-", tail(x, 1))
  has.msg #|| inconsistentFieldCounts(x)
}

## download a trackSet by name
setMethod("track", "UCSCTableQuery",
          function(object)
          {
            tables <- tableNames(object)
            table <- tableName(object)
            if (!is.null(table) && !(table %in% tables))
              stop("Unknown table: '", table, "'. Valid table names: ", tables)
            formats <- c("wigData", "wigBed", "bed")
            ## attempt to automatically determine the table
            if (!is.null(table))
              tables <- table
            for (table in tables) {
              object@table <- table
              outputs <- ucscTableOutputs(object)
              if (any(formats %in% outputs))
                break
            }
            if (!any(formats %in% outputs))
              stop("No supported output types")
            if ("wigData" %in% outputs) { # track stored as wig
              format <- "wig"
              output <- "wigData"
            } else {
              format <- output <- "bed"
              if ("wigBed" %in% outputs)
                output <- "wigBed"
            }
            outputType(object) <- output
            output <- ucscExport(object)
            if (outputTruncated(output))
              stop("Output is incomplete: ",
                   "track may have more than 100,000 elements. ",
                   "Try downloading the data via the UCSC FTP site.")
            import(text = output, format = format,
                   seqinfo = seqinfo(range(object)))
          })

## grab sequences for features in 'track' at 'range'
## setMethod("getSeq", "UCSCSession",
##           function(object, range, table = "gold")
##           {
##             followup <- list(hgta_doGenomicDna = "get sequence",
##                              hgSeq.casing = "upper",
##                              hgSeq.repMasking = "lower")
##             output <- ucscExport(object, range, "gold", table, "sequence",
##                                  followup)
##             con <- file()
##             writeLines(output, con)
##             set <- read.DNAStringSet(con, "fasta")
##             close(con)
##             set
##           })

## get a data.frame from a UCSC table
## think about taking specific columns
setGeneric("getTable",
           function(object, ...) standardGeneric("getTable"))
setMethod("getTable", "UCSCTableQuery",
          function(object, check = TRUE)
          {
            outputType(object) <- "primaryTable"
            if (is.null(tableName(object))) { # must specify a table name
                tableName(object, check=FALSE) <- firstTableName(object)
                if (is.null(tableName(object)))
                    stop("No valid table found")
            }
            else if (check && !outputType(object) %in% ucscTableOutputs(object))
                stop("tabular output format not available")
            output <- ucscExport(object)
            ## since '#' is not treated as a comment, we discard the
            ## error message, leaving only the header
            if (grepl("\\n# No results", output))
              output <- gsub("\\n.*", "", output)
            f <- file()
            writeLines(output, f)
            tab <- read.table(f, sep = "\t", header=TRUE, comment.char = "",
                              quote = "")
            ## strip off the '#' => 'X.' header prefix
            colnames(tab)[1L] <- substring(colnames(tab)[1L], 3L)
            close(f)
            tab
          })
setMethod("getTable", "UCSCSession",
          function(object, name, range = base::range(object), table = NULL) {
            getTable(ucscTableQuery(object, name, range, table))
          })

## UCSC genome view
setClass("UCSCView", representation(hgsid = "character", form = "list"),
         contains = "BrowserView")

## create a view for the given session, position and track visibility settings
## if 'tracks' is a character vector (but not a UCSCTrackModes instance) it is
## assumed to name the tracks that should be in the view. otherwise, an
## attempt is made to coerce it to a UCSCTrackModes instance.
setMethod("browserView", "UCSCSession",
          function(object, range, track, imagewidth = 800, browse = TRUE, ...)
          {
            stopifnot(isTRUEorFALSE(browse))
            form <- list()
            if (!missing(range)) {
              if (is(range, "IntegerRangesList"))
                range <- range[elementNROWS(range) > 0L]
              if (length(range) > 1) {
                ranges <- range
                views <- vector("list", length(ranges))
                for (i in seq(length(ranges))) {
                  range <- ranges[i]
                  views[[i]] <- callGeneric()
                }
                return(BrowserViewList(views))
              }
              range <- normGenomeRange(range, object)
              form <- c(form, ucscForm(range))
            }
            view <- new("UCSCView", session = object)
            ## new hgsid for each browser launch
            doc <- ucscGet(object, "gateway")
            hgsid <- sub(".*=", "",
                         grep("hgsid=", getNodeSet(doc, "//a/@href"),
                              value=TRUE)[1L])
            view@hgsid <- as.character(hgsid)
            ## figure out track modes
            origModes <- modes <- ucscTrackModes(view)
            if (!missing(track)) {
              if (class(track) == "character")
                trackNames(modes) <- track
              else {
                userModes <- as(track, "UCSCTrackModes")
                modes[names(userModes)] <- userModes
              }
            }
            argModes <- ucscTrackModes(...)
            modes[names(argModes)] <- argModes
            modes <- modes[modes != origModes]
            form <- c(form, ucscForm(modes), ucscForm(view))
            if (!missing(imagewidth))
                form <- c(form, pix = imagewidth)
            if (browse) {
                ## launch a web browser
                ucscShow(object, "tracks", form)
            }
            view@form <- form
            ## save this view
            object@views$instances <- c(object@views$instances, view)
            view
          })

viewURL <- function(x) {
    urlForm(ucscURL(browserSession(x), "tracks"), x@form)
}

# every view has a "mode" (hide, dense, pack, squish, full) for each track
### FIXME: probably should be merged with ucscTracks
### Or just leave it; ucscTracks might become more complex, while we
### need a simple way to manipulate track modes.
setClass("UCSCTrackModes", representation(labels = "character"),
         contains = "character")

# get/set track modes to/from e.g. a view
setGeneric("ucscTrackModes",
           function(object, ...) standardGeneric("ucscTrackModes"))

# convenience constructor for track mode object
setMethod("ucscTrackModes", "character",
          function(object, labels, hide = character(),
                   dense = character(), pack = character(),
                   squish = character(), full = character())
          {
            object[hide] <- "hide"
            object[dense] <- "dense"
            object[pack] <- "pack"
            object[squish] <- "squish"
            object[full] <- "full"
            if (missing(labels))
              labels <- names(object)
            new("UCSCTrackModes", object, labels = as.character(labels))
          })
setMethod("ucscTrackModes", "missing",
          function(object, ...) ucscTrackModes(character(), ...))

setMethod("ucscTrackModes", "UCSCView",
          function(object)
          {
            ucscTrackModes(ucscTracks(object))
          })

setMethod("ucscTrackModes", "UCSCSession",
          function(object)
          {
            ucscTrackModes(ucscTracks(object))
          })

setGeneric("ucscTrackModes<-",
           function(object, value) standardGeneric("ucscTrackModes<-"))
setReplaceMethod("ucscTrackModes", c("UCSCView", "UCSCTrackModes"),
                 function(object, value)
                 { # FIXME: needs to be more extensible
                   browserView(object@session, range(object), value)
                 })
setReplaceMethod("ucscTrackModes", c("UCSCView", "character"),
                 function(object, value)
                 {
                   ucscTrackModes(object) <- ucscTrackModes(value)
                   object
                 })

## subsetting UCSCTrackModes

## if not in ids, try labels
resolveTrackIndex <- function(object, i) {
  if (is.character(i)) {
    missing <- !(i %in% names(object))
    matching <- match(i[missing], object@labels)
    if (any(is.na(matching))) {
      unmatched <- i[missing][is.na(matching)]
      stop("Unknown track(s): ", paste(unmatched, collapse = ", "))
    }
    i[missing] <- names(object)[matching]
  }
  i
}

setMethod("[", "UCSCTrackModes", function(x, i, j, ..., drop=FALSE) {
  vec <- x@.Data
  names(vec) <- names(x)
  names(x@labels) <- names(x)
  ind <- resolveTrackIndex(x, i)
  initialize(x, vec[ind], labels = x@labels[ind])
})

setReplaceMethod("[", "UCSCTrackModes", function(x, i, j, ..., value) {
  vec <- x@.Data
  names(vec) <- names(x)
  vec[resolveTrackIndex(x, i)] <- value
  x@.Data <- as.vector(vec)
  x
})

# handle simple track show/hide

setMethod("trackNames", "UCSCTrackModes",
          function(object)
          {
            visible <- object != "hide"
            tracks <- names(object)[visible]
            names(tracks) <- object@labels[visible]
            tracks
          })
setReplaceMethod("trackNames", "UCSCTrackModes",
                 function(object, value)
                 {
                   value <- resolveTrackIndex(object, value)
                   spec <- names(object) %in% value
                   object[!spec] <- "hide"
                   object[spec & object == "hide"] <- "full"
                   object
                 })

setMethod("trackNames", "UCSCView",
          function(object)
          {
            tracks <- ucscTracks(object)
            modes <- ucscTrackModes(tracks)
            tracks@ids[tracks@ids %in% trackNames(modes)]
          })
setReplaceMethod("trackNames", "UCSCView",
                 function(object, value)
                 {
                   trackNames(ucscTrackModes(object)) <- value
                   object
                 })


setMethod("visible", "UCSCView", function(object) {
  modes <- ucscTrackModes(object)
  vis <- modes != "hide"
  names(vis) <- modes@labels
  vis
})
setReplaceMethod("visible", "UCSCView", function(object, value) {
  modes <- ucscTrackModes(object)
  modes[value & modes == "hide"] <- "full"
  modes[!value] <- "hide"
  ucscTrackModes(object) <- modes
  object
})


setMethod("range", "UCSCView",
          function(x, ..., na.rm) range(ucscCart(x)))
setReplaceMethod("range", "UCSCView",
                 function(x, value)
                 {
                   browserView(x@session, value, ucscTrackModes(x))
                 })

# only one view per session; a view is always active
setMethod("activeView", "UCSCView", function(object) TRUE)

# ucscTrackSet

# visual properties are specified by a "track line" for UCSC
setClass("TrackLine",
         representation(name = "character", description = "character",
                        visibility = "character", color = "integer",
                        priority = "numeric"),
         prototype(name = "R Track"))

setMethod("show", "TrackLine",
          function(object)
          {
            cat(as(object, "character"), "\n")
          })

setClass("BasicTrackLine",
         representation(itemRgb = "logical", useScore = "logical",
                        group = "character", db = "character",
                        offset = "numeric", url = "character",
                        htmlUrl = "character", colorByStrand = "matrix"),
         contains = "TrackLine")

ucscPair <- function(key, value) paste(key, value, sep = "=")

# to a text line
setAs("TrackLine", "character",
      function(from)
      {
        checkString <- function(str, len) {
          ## These are more annoying than useful
          ## if (nchar(gsub("[a-zA-Z0-9_ ]", "", str)))
          ##   warning("The string '", str,
          ##           "' contains non-standard characters.")
          ## if (nchar(str) > len) {
          ##   str <- substring(str, 1, len)
          ##   warning("The string '", str, "' must be less than ", len,
          ##           " characters; it has been truncated.")
          ## }
          if (regexpr(" ", str)[1] != -1)
            str <- paste("\"", str, "\"", sep="")
          str
        }
        str <- "track"
        name <- from@name
        if (length(name))
          str <- paste(str, " name=", checkString(name, 15), sep="")
        desc <- from@description
        if (length(desc))
          str <- paste(str, " description=", checkString(desc, 60), sep="")
        vis <- from@visibility
        if (length(vis))
          str <- paste(str, " visibility=", vis, sep="")
        color <- from@color
        if (length(color))
            str <- paste0(str, " color=\"", paste0(color, collapse=","), "\"")
        priority <- from@priority
        if (length(priority))
          str <- paste(str, " priority=", priority, sep="")
        str
      })

setAs("BasicTrackLine", "character",
      function(from)
      {
        str <- as(as(from, "TrackLine"), "character")
        itemRgb <- from@itemRgb
        if (length(itemRgb))
          str <- paste(str, " itemRgb=", if (itemRgb) "on" else "off", sep = "")
        useScore <- from@useScore
        if (length(useScore))
          str <- paste(str, " useScore=", if (useScore) "1" else "0", sep = "")
        group <- from@group
        if (length(group))
          str <- paste(str, " group=", group, sep="")
        db <- from@db
        if (length(db))
          str <- paste(str, " db=", db, sep="")
        offset <- from@offset
        if (length(offset))
          str <- paste(str, " offset=", offset, sep="")
        url <- from@url
        if (length(url))
          str <- paste(str, " url=", "\"", url, "\"", sep="")
        htmlUrl <- from@htmlUrl
        if (length(htmlUrl))
          str <- paste(str, " htmlUrl=", "\"", htmlUrl, "\"", sep="")
        colorByStrand <- from@colorByStrand
        if (length(colorByStrand)) {
          colors <- paste(colorByStrand[1,], colorByStrand[2,],
                          colorByStrand[3,], sep = ",", collapse = " ")
          str <- paste(str, " colorByStrand=\"", colors, "\"", sep = "")
        }
        str
      })

ucscParsePairs <- function(str)
{  
  str <- sub("^[[:alpha:]]*[[:blank:]]", "", str)
  split <- as.character(read.table(sep = "=", comment.char = "", as.is = TRUE,
                                   strip.white = TRUE, text = str))
  vals <- character(0)
  if (length(split)) {
    mixed <- tail(head(split, -1), -1)
    tags <- head(split, 1)
    vals <- tail(split, 1)
    if (length(mixed)) {
      tags <- c(tags, sub(".*[[:space:]]([[:alnum:]]*)$", "\\1", mixed))
      vals <- c(sub("(.*)[[:space:]][[:alnum:]]*$", "\\1", mixed), vals)
    }
    names(vals) <- tags
  }
  vals
}

# from a text line
setAs("character", "TrackLine",
      function(from)
      {
        line <- new("TrackLine")
        vals <- ucscParsePairs(from)
        if (!is.na(vals["name"]))
          line@name <- vals[["name"]]
        if (!is.na(vals["description"]))
          line@description <- vals[["description"]]
        if (!is.na(vals["visibility"]))
          line@visibility <- vals[["visibility"]]
        if (!is.na(vals["color"]))
          line@color <- as.integer(strsplit(vals[["color"]], ",")[[1]])
        if (!is.na(vals["priority"]))
          line@priority <- as.numeric(vals[["priority"]])
        line
      })

setAs("character", "BasicTrackLine",
      function(from)
      {
        line <- new("BasicTrackLine", as(from, "TrackLine"))
        vals <- ucscParsePairs(from)
        if (!is.na(vals["itemRgb"]))
          line@itemRgb <- tolower(vals[["itemRgb"]]) == "on"
        if (!is.na(vals["useScore"]))
          line@useScore <- vals[["useScore"]] == "1"
        if (!is.na(vals["group"]))
          line@group <- vals[["group"]]
        if (!is.na(vals["db"]))
          line@db <- vals[["db"]]
        if (!is.na(vals["offset"]))
          line@offset <- as.integer(vals[["offset"]])
        if (!is.na(vals["url"]))
          line@url <- vals[["url"]]
        if (!is.na(vals["htmlUrl"]))
          line@htmlUrl <- vals[["htmlUrl"]]
        if (!is.na(vals["colorByStrand"])) {
          colorToken <- strsplit(strsplit(vals[["colorByStrand"]], " ")[[1]],
                                 ",")
          line@colorByStrand <- matrix(as.integer(unlist(colorToken)), nrow = 3)
        }
        line
      })


setClass("GraphTrackLine",
         representation(altColor = "integer", autoScale = "logical",
                        alwaysZero = "logical",
                        gridDefault = "logical", maxHeightPixels = "integer",
                        graphType = "character", viewLimits = "numeric",
                        yLineMark = "numeric", yLineOnOff = "logical",
                        windowingFunction = "character",
                        smoothingWindow = "numeric", type = "character"),
         contains = "TrackLine")

setAs("GraphTrackLine", "character",
      function(from)
      {
        str <- as(as(from, "TrackLine"), "character")
        type <- if (from@type == "wig") "wiggle_0" else "bedGraph"
        str <- paste(str, " type=", type, sep = "")
        color <- from@altColor
        if (length(color))
          str <- paste(str, " altColor=", paste(color, collapse=","), sep="")
        autoScale <- from@autoScale
        onoff <- function(x) if (x) "on" else "off"
        if (length(autoScale))
          str <- paste(str, " autoScale=", onoff(autoScale), sep = "")
        alwaysZero <- from@alwaysZero
        if (length(alwaysZero))
          str <- paste(str, " alwaysZero=", onoff(alwaysZero), sep = "")
        gridDefault <- from@gridDefault
        if (length(gridDefault))
          str <- paste(str, " gridDefault=", onoff(gridDefault), sep = "")
        maxHeightPixels <- from@maxHeightPixels
        if (length(maxHeightPixels))
          str <- paste(str, " maxHeightPixels=",
                       paste(maxHeightPixels, collapse=":"), sep = "")
        graphType <- from@graphType
        if (length(graphType))
          str <- paste(str, " graphType=", graphType, sep = "")
        viewLimits <- from@viewLimits
        if (length(viewLimits))
          str <- paste(str, " viewLimits=", paste(viewLimits, collapse = ":"),
                       sep = "")
        yLineMark <- from@yLineMark
        if (length(yLineMark))
          str <- paste(str, " yLineMark=", yLineMark, sep = "")
        yLineOnOff <- from@yLineOnOff
        if (length(yLineOnOff))
          str <- paste(str, " yLineOnOff=", onoff(yLineOnOff), sep = "")
        windowingFunction <- from@windowingFunction
        if (length(windowingFunction))
          str <- paste(str, " windowingFunction=", windowingFunction, sep = "")
        smoothingWindow <- from@smoothingWindow
        if (length(smoothingWindow))
          str <- paste(str, " smoothingWindow=", smoothingWindow, sep = "")
        str
      })

setAs("character", "GraphTrackLine",
      function(from)
      {
        line <- new("GraphTrackLine", as(from, "TrackLine"))
        vals <- ucscParsePairs(from)
        type <- vals[["type"]]
        if (!(type %in% c("wiggle_0", "bedGraph")))
          stop("Unknown graph track type: ", type)
        line@type <- if (type == "wiggle_0") "wig" else "bedGraph"
        if (!is.na(vals["altColor"]))
          line@altColor <- as.integer(strsplit(vals[["altColor"]], ",")[[1]])
        if (!is.na(vals["autoScale"]))
          line@autoScale <- tolower(vals[["autoScale"]]) == "on"
        if (!is.na(vals["alwaysZero"]))
          line@alwaysZero <- tolower(vals[["alwaysZero"]]) == "on"
        if (!is.na(vals["gridDefault"]))
          line@gridDefault <- tolower(vals[["gridDefault"]]) == "on"
        if (!is.na(vals["maxHeightPixels"]))
          line@maxHeightPixels <-
            as.integer(strsplit(vals[["maxHeightPixels"]], ":")[[1]])
        if (!is.na(vals["graphType"]))
          line@graphType <- vals[["graphType"]]
        if (!is.na(vals["viewLimits"]))
          line@viewLimits <-
            as.numeric(strsplit(vals[["viewLimits"]], ":")[[1]])
        if (!is.na(vals["yLineMark"]))
          line@yLineMark <- as.numeric(vals[["yLineMark"]])
        if (!is.na(vals["yLineOnOff"]))
          line@yLineOnOff <- tolower(vals[["yLineOnOff"]]) == "on"
        if (!is.na(vals["windowingFunction"]))
          line@windowingFunction <- vals[["windowingFunction"]]
        if (!is.na(vals["smoothingWindow"]))
          line@smoothingWindow <- as.numeric(vals[["smoothingWindow"]])
        line
      })

setAs("BasicTrackLine", "GraphTrackLine",
      function(from) new("GraphTrackLine", from))

setAs("GraphTrackLine", "BasicTrackLine",
      function(from) new("BasicTrackLine", from))

setClass("UCSCData",
         representation(trackLine = "TrackLine"),
         prototype(trackLine = new("BasicTrackLine")),
         "GRanges")

UCSCData <- function(ranges, trackLine = NULL) {
  ucsc <- as(ranges, "UCSCData")
  ucsc@trackLine <- trackLine
  ucsc
}

setMethod("show", "UCSCData",
          function(object)
          {
            if (!is.null(object@trackLine@name))
              cat("UCSC track '", object@trackLine@name, "'\n", sep = "")
            callNextMethod()
          })

chooseGraphType <- function(from) {
    if (is(from, "GPos")) {
        return(if (is(from, "StitchedGPos")) "bedGraph" else "wig")
    }
  r <- ranges(from)
  type <- "bedGraph"
  ## decide whether compression is a good idea
  steps <- diff(sort(start(r)))
  if (length(unique(width(r))) == 1L && # all spans must be the same for WIG
      (length(unique(steps)) == 1L || # fixed-step makes sense
       ((3L * length(unique(width(r)))) < length(r) && # makes sense wrt size
        mean(steps) < 100))) # dense enough for UCSC efficiency
    type <- "wig"
  type
}

setAs("GRanges", "UCSCData", function(from) {
  line <- metadata(from)$trackLine
  if (is.null(line)) {
    if (is.numeric(score(from))) { # have numbers, let's plot them
      type <- chooseGraphType(from)
      line <- new("GraphTrackLine", type = type)
    } else {
      line <- new("BasicTrackLine")
      db <- unique(genome(from))
      if (length(db) == 1 && !is.na(db))
        line@db <- db
    }
  } else {
    metadata(from)$trackLine <- NULL
  }
  new("UCSCData", as(from, "GRanges"), trackLine = line)
})

## We want 'as(UCSCData, "GRanges", strict=FALSE)' to do the right thing (i.e.
## be a no-op) but as() won't do that if a coerce,UCSCData,GRanges method
## exists (this is a serious flaw in as() current design/implementation).
## The workaround is to support the 'strict=FALSE' case at the level of
## the coerce() method but setAs() doesn't let us do that so we use
## setMethod("coerce", ...) to define the method.
setMethod("coerce", c("UCSCData", "GRanges"),
  function(from, to="GRanges", strict=TRUE) {
  if (strict) {
    gr <- new("GRanges")
    for (what in slotNames(gr))
      slot(gr, what) <- slot(from, what)
    metadata(gr)$trackLine <- from@trackLine
    gr
  } else from
})

splitUCSCData <- function(x, f, drop=FALSE, ...) {
  GRangesList(
    lapply(split(seq_along(x), f, drop=drop, ...),
           function(i) x[i]),
    compress=FALSE
  )
}

setMethod("split", "UCSCData", splitUCSCData)
setMethod("split", c("UCSCData", "Vector"), splitUCSCData)

setClass("UCSCFile", contains = "RTLFile")

UCSCFile <- function(resource) {
  new("UCSCFile", resource = resource)
}

## the 'ucsc' format is a meta format with a track line followed by
## features formatted as 'wig', 'bed', 'bed15', 'bedGraph', 'gff', or
## really any text track format.

setGeneric("export.ucsc",
           function(object, con, ...) standardGeneric("export.ucsc"))

setMethod("export.ucsc", c("ANY", "RTLFile"),
          function(object, con, subformat = "auto", ...)
          {
            if (subformat == "auto" && !is(con, "UCSCFile"))
              subformat <- fileFormat(con)
            export(object, UCSCFile(resource(con)), subformat=subformat, ...)
          })

setMethod("export.ucsc", c("ANY", "ANY"),
          function(object, con, ...)
          {
            export(object, con, "ucsc", ...)
          })

.export_SimpleGRangesList_RTLFile <- function(object, con, format, ...) {
  export(object, UCSCFile(resource(con)), subformat = fileFormat(con), ...)
}

setMethod("export", c("GRangesList", "UCSCFile"),
          function(object, con, format, append = FALSE, index = FALSE, ...)
          {
            if (isTRUE(index) && length(object) > 1)
              stop("Cannot index multiple tracks in a single file")
            trackNames <- names(object)
            if (is.null(trackNames))
              trackNames <- paste("R Track", seq_len(length(object)))
            ucsc <- unlist(lapply(object, is, "UCSCData"))
            lines <- unlist(lapply(object[ucsc], slot, "trackLine"))
            trackNames[ucsc] <- as.character(sapply(lines, slot, "name"))
            tracks <- vector("list", length(object))
            for (i in seq_len(length(object))) {
              tracks[[i]] <- export(object[[i]], con, name = trackNames[i],
                                    append = append, index = index, ...)
              append <- TRUE
            }
            RTLFileList(tracks)
          })

trackLineClass <- function(subformat)
{
  subformat <- tolower(subformat)
  if (subformat == "wig" || subformat == "bedgraph")
    "GraphTrackLine"
  else if (subformat == "bed15")
    "Bed15TrackLine"
  else "BasicTrackLine"
}

setMethod("fileFormat", "TrackLine", function(x) "bed")
setMethod("fileFormat", "GraphTrackLine", function(x) x@type)

setMethod("bestFileFormat", c("UCSCData", "ANY"), function(x, dest) {
  fileFormat(x@trackLine)
})

setMethod("export", c("ANY", "UCSCFile"),
          function(object, con, format, ...)
          {
            cl <- class(object)
            track <- try(as(object, "GRanges"), silent = TRUE)
            if (class(track) == "try-error") {
              track <- try(as(object, "SimpleGRangesList"), silent = TRUE)
              if (is(track, "try-error"))
                stop("cannot export object of class '", cl, "': ", track)
            }
            object <- track
            callGeneric()
          })

setMethod("export", c("GenomicRanges", "UCSCFile"),
          function(object, con, format, ...)
          {
            object <- as(object, "UCSCData")
            callGeneric()
           })

setMethod("export", c("UCSCData", "UCSCFile"),
          function(object, con, format, subformat = "auto", append = FALSE,
                   index = FALSE, ...)
          {
            auto <- FALSE
            if (subformat == "auto") {
              auto <- TRUE
              subformat <- bestFileFormat(object, con)
            }
            graphFormat <- tolower(subformat) %in% c("wig", "bedgraph")
            if (graphFormat) {
              strand <- as.character(strand(object))
              strand[is.na(strand)] <- "NA"
              isStrandDisjoint <- function(track) {
                all(tapply(ranges(track), seqnames(track), function(r) {
                  isDisjoint(r) && all(width(r) > 0)
                }), na.rm=TRUE)
              }
              if (!all(unlist(lapply(split(object, strand), isStrandDisjoint))))
              {
                if (auto) {
                  subformat <- "bed"
                  graphFormat <- FALSE
                }
                else stop("Track not compatible with WIG/bedGraph: ",
                          "Overlapping features must be on separate strands",
                          " and every feature width must be positive")
              }
            }
            lineClass <- trackLineClass(subformat)
            if (!is(object@trackLine, lineClass))
              object@trackLine <- as(object@trackLine, lineClass)
            if (is(object@trackLine, "GraphTrackLine"))
              object@trackLine@type <- subformat
            args <- list(...)
            lineArgs <- names(args) %in% slotNames(lineClass)
            for (argName in names(args)[lineArgs])
              slot(object@trackLine, argName) <- args[[argName]]
            if (is(object@trackLine, "BasicTrackLine") &&
                length(object@trackLine@offset))
              ranges(object) <- shift(ranges(object), -object@trackLine@offset)
            trackLine <- NULL
            if (graphFormat) {
              strand <- as.character(strand(object))
              strand[is.na(strand)] <- "NA"
              if (!all(strand[1] == strand)) {
                nameMap <- c("+" = "p", "-" = "m", "NA" = "NA")
                strand <- factor(strand)
                name <- paste(object@trackLine@name, nameMap[levels(strand)])
                tracks <- split(object, strand)
                export(tracks, con, subformat, append,
                       trackNames = name, ...)
                return()
              }
            } else if (subformat == "bed15") {
              if (is.null(object@trackLine@expNames))
                object@trackLine@expNames <- colnames(object)
              trackLine <- object@trackLine
            }
            file <- con
            m <- manager()
            con <- connection(m, con, if (append) "a" else "w")
            cat(as(object@trackLine, "character"), "\n", file=con, sep = "")
            do.call(export, c(list(as(object, "GRanges"), con, subformat),
                              args[!lineArgs], trackLine = trackLine))
            release(m, con)
            if (index)
              indexTrack(FileForFormat(resource(file), subformat), skip = 1L)
            else invisible(file)
          })

setGeneric("import.ucsc", function(con, ...) standardGeneric("import.ucsc"))

setMethod("import.ucsc", "ANY",
          function(con, ...)
          {
            import(con, "ucsc", ...)
          })

setMethod("import.ucsc", "RTLFile",
          function(con, subformat = "auto", ...)
          {
            if (!is(con, "UCSCFile")) {
              format <- fileFormat(con)
              if (subformat != "auto" && format != subformat)
                stop("Attempt to import '", class(con), "' as ", subformat)
              subformat <- format
            }
            import.ucsc(resource(con), subformat = subformat, ...)
          })

parseFormatFromTrackLine <- function(x) {
  if (!grepl("type=", x))
    NULL
  else {
    type <- sub(".*type=\"(.*?)\".*", "\\1", x)
    if (type == "array")
      "bed15"
    else if (type == "wiggle_0")
      "wig"
    else type
  }
}

setMethod("import", "UCSCFile",
          function(con, format, text, subformat = "auto", drop = FALSE,
                   genome = NA, ...)
          {
            lines <- readLines(resource(con), warn = FALSE)
            tracks <- grep("^track", lines)
            trackLines <- lines[tracks]
            starts <- tracks + 1L
            ends <- c(tail(tracks, -1) - 1L, length(lines))
            makeTrackSet <- function(i)
            {
              if (subformat == "auto") {
                subformat <- parseFormatFromTrackLine(trackLines[i])
                if (is.null(subformat)) {
                  p <- resourceDescription(con)
                  subformat <- file_ext(p)
                }
              }
              line <- as(trackLines[i], trackLineClass(subformat))
              if (starts[i] <= ends[i]) {
                text <- window(lines, starts[i], ends[i])
              } else {
                text <- character()
              }
              if (is.na(genome) && is(line, "BasicTrackLine") &&
                  length(line@db))
                genome <- line@db
              if (subformat == "bed15") { # need to pass track line
                ucsc <- import(format = "bed15", text = text,
                               trackLine = line,
                               genome = genome, ...)
              } else {
                ucsc <- import(format = subformat, text = text,
                               genome = genome, ...)
              }
              if (is(line, "BasicTrackLine") && length(line@offset))
                ranges(ucsc) <- shift(ranges(ucsc), line@offset)
              ucsc <- as(ucsc, "UCSCData", FALSE)
              ucsc@trackLine <- line
              ucsc
            }
            tsets <- lapply(seq_along(trackLines), makeTrackSet)
            trackNames <- sapply(tsets, function(x) x@trackLine@name)
            if (!any(is.na(trackNames)))
              names(tsets) <- trackNames
            if (drop && length(tsets) == 1)
              return(tsets[[1]])
            GRangesList(tsets, compress=FALSE)
          })



setMethod("login", "UCSCSession", function(x, username, password) {
  ucscPost(x, "hgLogin", list(hgLogin.do.displayLogin = "Login",
                              hgLogin_userName = username,
                              hgLogin_password = password))
  
})

setMethod("saveView", "UCSCView", function(x, name, username, password,
                                           share = TRUE)
          {
            if (!missing(username))
              login(browserSession(x), username, password)
            ucscPost(x, "hgSession",
                     list(hgS_newSessionName = name,
                          hgS_newSessionShare = if (share) "on" else "off"))
          })

setMethod("restoreView", "UCSCSession",
          function(x, name, username, password) {
            if (!missing(username))
              login(x, username, password)
            ucscPost(x, "hgSession",
                     setNames(list("use"), paste0("hgS_load_", name)))
          })

############ INTERNAL API ############

## every cgi variable is stored in the 'cart'
setClass("ucscCart", contains = "character")

setGeneric("ucscCart", function(object, ...) standardGeneric("ucscCart"))

setMethod("ucscCart", "UCSCSession",
          function(object, form = ucscForm(activeView(object)))
          {
            node <- ucscGet(object, "cart", form)
            raw <- xmlValue(getNodeSet(node, "//pre/text()")[[1]])
            lines <- strsplit(raw, "\n")[[1]]
            fields <- strsplit(lines, " ")
            pairs <- fields[sapply(fields, length) == 2]
            mat <- matrix(unlist(pairs), nrow = 2)
            vals <- mat[2,]
            names(vals) <- mat[1,]
            new("ucscCart", vals)
          })
setMethod("ucscCart", "UCSCView",
          function(object)
          {
            ucscCart(object@session, ucscForm(object))
          })

setMethod("genome", "ucscCart", function(x) x[["db"]])

setMethod("range", "ucscCart",
          function(x, ..., na.rm)
          {
            pos <- x["position"]
            posSplit <- strsplit(pos, ":")[[1]]
            range <- as.numeric(gsub(",", "", strsplit(posSplit[2], "-")[[1]]))
            GRangesForUCSCGenome(x[["db"]], posSplit[1],
                                 IRanges(range[1], range[2]))
          })

### track information

setClass("ucscTracks",
         representation(ids = "character", modes = "character"))

setGeneric("ucscTracks", function(object, ...) standardGeneric("ucscTracks"))

setMethod("ucscTracks", "UCSCSession",
          function(object, form = list())
          {
            tracks <- ucscGet(object, "tracks", form)
            nodes <- getNodeSet(tracks, "//select/option[@selected]/text()")
            trackModes <- sapply(nodes, xmlValue)
            nodes <- getNodeSet(tracks, "//select/@name")
            trackIds <- unlist(nodes)
            ##trackIds <- sapply(nodes, xmlValue)
            nodes <- getNodeSet(tracks, "//select/../a[not(@class)]/text()")
            nms <- sapply(nodes, xmlValue)
            names(trackIds) <- sub("^ ", "", nms)
            new("ucscTracks", ids = trackIds, modes = trackModes)
          })

setMethod("ucscTracks", "UCSCView",
          function(object)
          {
            ucscTracks(object@session, ucscForm(object))
          })

setMethod("ucscTrackModes", "ucscTracks",
          function(object)
          {
            modes <- object@modes
            labels <- names(object@ids)
            names(modes) <- object@ids
            ucscTrackModes(modes, labels)
          })

## List available UCSC genomes
.getOrganism <- function(db){
  .ucsc <- "http://genome.ucsc.edu/cgi-bin"
  .tryQuery <- function(url, query)
    tryCatch({
      htmlTreeParse(url, useInternalNodes=TRUE)[[query]]
    }, error=function(err) {
      warning(conditionMessage(err))
      NA_character_
    })
  urls <- sprintf("%s/hgGateway?db=%s", .ucsc, db)
  sapply(urls, .tryQuery, "string(//div[@id='sectTtl']/i)")
}

ucscGenomes <- function(organism=FALSE) {
  url <- "http://genome.ucsc.edu/FAQ/FAQreleases"
  df <- readHTMLTable(url)[[1L]][c(2,1,3,4,5)]
  .cleanTableCells <- function(x)
  {
    x <- sub("^ *", "", x)
    ## Some empty cells in the table of UCSC genome releases seem to contain
    ## invisible junk. This junk seems to vary from one platform to the other
    ## (not clear why, maybe some sort of local issue?).
    ## There must be a simplest way to get rid of this junk...
    ## TODO: Test this on Windows!
    is_empty_cell <- x %in% c("<c2><a0>", "\xc2\xa0", "\xc3\x82\xc2\xa0")
    x[is_empty_cell] <- ""
    x
  }
  COLS <- c("UCSC VERSION", "SPECIES", "RELEASE DATE", "RELEASE NAME", "STATUS")
  if (!identical(names(df), COLS))
    stop("table of UCSC genome releases (found at ", url, "#release1) ",
         "doesn't have expected columns ", paste(COLS, collapse=", ")) 
  ## readHTMLTable is returning one NA row for some reason
  names(df) <- c("db","species","date","name","status")
  df <- df[!apply(df, 1, function(x) all(is.na(x))),]
  df <- df[df$db != "" , ]
  not_empty <- df$species != ""
  df$species <- rep.int(df$species[not_empty], diff(which(c(not_empty, TRUE))))
  df <- df[df$status == "Available", -5L]
  if (organism) {
    df$organism <- NA_character_
    org <- lapply(as.character(df$db), function(xx) 
                  unique(suppressWarnings(mapGenomeBuilds(xx)$organism)))
    df$organism[!sapply(org, is.null)] <- unlist(org) 
  }
  rownames(df) <- NULL
  df
}

# form creation

setGeneric("ucscForm", function(object, ...) standardGeneric("ucscForm"))

setMethod("ucscForm", "IntegerRangesList",
          function(object)
          {
            form <- list()
            genome <- singleGenome(genome(object))
            if (!is.na(genome))
              form <- c(form, db = genome)
            chrom <- space(object)
            if (!is.null(chrom)) {
              if (!length(chrom))
                chrom <- levels(chrom)[1]
              scipen <- getOption("scipen")
              options(scipen = 100) # prevent use of scientific notation
              on.exit(options(scipen = scipen))
              position <- chrom
              if (length(unlist(start(object))))
                position <- paste(chrom, ":",
                                  unlist(start(object)), "-",
                                  unlist(end(object)), sep = "")
              form <- c(form, position = position)
            }
            form
          })
setMethod("ucscForm", "GRanges",
          function(object)
          {
            scipen <- getOption("scipen")
            options(scipen = 100) # prevent use of scientific notation
            on.exit(options(scipen = scipen))
            form <- list()
            genome <- singleGenome(genome(object))
            if (!is.na(genome))
              form <- c(form, db = genome)
            if (length(object) > 0L) {
              object <- object[1]
              c(form, position = paste(seqnames(object), ":",
                        unlist(start(object)), "-",
                        unlist(end(object)), sep = ""))
            } else form
          })

setMethod("ucscForm", "UCSCTrackModes",
          function(object)
          {
            as.list(object)
          })
setMethod("ucscForm", "UCSCView",
          function(object)
          {
            if (length(object@hgsid))
              list(hgsid = as.character(object@hgsid))
            else list()
          })
setOldClass("FileUploadInfo")
setMethod("ucscForm", "FileUploadInfo",
          function(object, genome = NA_character_, ...)
          {
            form <- list(Submit = "Submit", hgt.customFile = object)
            if (!is.na(genome))
              form <- c(form, db = genome)
            form
          })
setMethod("ucscForm", "SimpleGRangesList",
          function(object, format, ...)
          {
            lines <- export(object, format = "ucsc", subformat = format, ...)
            text <- paste(paste(lines, collapse = "\n"), "\n", sep = "")
            filename <- paste("track", format, sep = ".")
            upload <- fileUpload(filename, text, "text/plain")
            genome <- singleGenome(genome(object))
            ucscForm(upload, genome)
          })
setMethod("ucscForm", "RTLFile",
          function(object, genome, ...)
          {
            upload <- fileUpload(path(object), "text/plain")
            ucscForm(upload, genome)
          })

setMethod("ucscForm", "UCSCTableQuery",
          function(object, tracks = FALSE) {
            ## range (ie genome) is required
            range <- object@range
            table <- object@table
            form <- list()
            if (!spansGenome(range) && length(range) == 1) {
              form <- c(form, ucscForm(range))
            }
            if (is.null(object@track) && !tracks) {
              form <- c(form, list(hgta_group = "allTables"))
              if (is.null(table))
                table <- "chromInfo"
            }              
            else
              form <- c(form, list(hgta_group = "allTracks",
                                   hgta_track = object@track))
            if (spansGenome(range))
              regionType <- "genome"
            else if (length(range) == 1)
              regionType <- "range"
            else regionType <- "userRegions"
            form <- c(form, hgta_regionType = regionType,
                      hgta_table = table)
            if (!is.null(object@outputType)) {
              form <- c(form, hgta_outputType = object@outputType)
            }
            form$hgta_compressType <- "none" # TODO: support gzip
            form
          })

setMethod("ucscForm", "NULL", function(object) list())

# Transforming to a cookie string

setGeneric("ucscCookie", function(object, ...) standardGeneric("ucscCookie"))
setMethod("ucscCookie", "UCSCSession",
          function(object)
          {
            object@hguid
          })

# HTTP wrappers

# URL constants for UCSC
ucscURLTable <- c(gateway = "hgGateway", tracks = "hgTracks",
                  custom = "hgCustom", tables = "hgTables",
                  cart = "cartDump")

ucscURL <-
  function(object, key, force=TRUE)
  {
    path <- ucscURLTable[key]
    if (is.na(path))
        stop("Key '", key, "' does not match a known URL")
    if (force && key == "gateway") {
        path <- paste0(path, '?redirect="manual"')
    }
    paste(object@url, path, sep="")
  }

# convenience wrappers for _initialized_ sessions
ucscShow <- function(object, key, .form = list(), ...)
  httpShow(ucscURL(object, key), .form, ...)
ucscPost <- function(object, key, .form = list(), ...)
  httpPost(ucscURL(object, key), .form, ..., cookie = ucscCookie(object))
ucscGet <- function(object, key, .form = list(), ...)
  httpGet(ucscURL(object, key), .form, ..., cookie = ucscCookie(object))

Try the rtracklayer package in your browser

Any scripts or data that you put into this service are public.

rtracklayer documentation built on Nov. 8, 2020, 6:50 p.m.