R/validate_request.R

#' validate an API request by looking up its token
#'
#' @param json named list with at least these items:
#' \itemize{
#' \item authToken authorization token or authorization ticket
#' \item projectID (optional) integer projectID(s)
#' }
#' An authorization ticket is the cookie generated by Apache's mod-auth-tkt.
#' It is distinguished by including at least one '!' character, which tokens
#' never contain.  If \code{authToken} is deemed to be a ticket, then
#' this function looks in the parent.frame() for an environment or
#' list named `env`, and looks up the item `HTTP_X_FORWARDED_FOR` there.
#' This must give the client IP address as a dotted-quad character scalar;
#'     e.g. "131.162.131.200" Default: NULL
#' That IP address must be the same one from which the request to generate
#' a ticket came.
#'
#' @param needProjectID logical; if TRUE, a projectID to which the user
#' has permission must be in \code{json}; default:  TRUE
#'
#' @param needAdmin logical; if TRUE, the user must have userType="administrator"
#' in order to use the entry point; default:  FALSE
#'
#' @return  If the request was valid, a list with these items:
#' \itemize{
#' \item userID integer user ID
#' \item projects integer vector of *all* project IDs user has permission to
#' \item projectID the projectID(s) specified in the request (and it is guaranteed the user has permission to them),
#' present only if the user specified it.
#' \item userType character scalar; one of "administrator", "contributor"
#' \item isAdmin logical scalar; TRUE if user is an administrator.
#' }
#'
#' If the request was not valid, a value of class "error" and suitable
#' for return by a Rook app, which contains an appropriate error
#' message.  This value should be immediately returned by the caller.
#'
#' So typical usage is like:
#' \code{
#'    auth = validate_request(json, needProjectID=FALSE)
#'    if (inherits(auth, "error")) return(auth)
#'    projectID = auth$projectID
#' }
#'
#'
#' @note this function is meant for use inside Rook servers, such as \link{\code{dataServer}}
#' and \link{\code{statusServer}} in this package.
#'
#' @author John Brzustowski \email{jbrzusto@@REMOVE_THIS_PART_fastmail.fm}

validate_request = function(json, needProjectID=TRUE, needAdmin=FALSE) {

    msg = NULL

    openMotusDB() ## ensure connection is still valid after a possibly long time between requests

    authToken = safe_arg(json, authToken, char)
    if (is.null(authToken))
        return(error_from_app("this API call requires authentication"))

    projectID = safe_arg(json, projectID, int, scalar=FALSE)

    auth = NULL ## start with no authorization

    if (! grepl('!', authToken, fixed=TRUE)) {
        ## it's a token
        now = as.numeric(Sys.time())
        auth = AuthDB("
select
   userID,
   projects,
   expiry,
   userType
from
   auth
where
   token=:token",
token = authToken)
        if (! isTRUE(nrow(auth) > 0)) {
            ## authToken invalid
            msg = "token invalid"
        } else if (all(auth$expiry < now)) {
            ## authToken expired
            msg = "token expired"
        }
    } else {
        ## it's a ticket, with format described by /usr/share/doc/libapache2-mod-auth-tkt/README.gz:
        ##
        ##         The TKTAuthCookieName cookie is constructed using following algorithm:
        ##
        ##      ('+' is concatenation operation)
        ##
        ##      cookie := digest + hextimestamp + user_id + '!' + user_data
        ##
        ##      or if using tokens:
        ##
        ##      cookie := digest + hextimestamp + user_id + '!' + token_list + '!' + user_data
        ##
        ##      digest := MD5(digest0 + key)
        ##
        ##      digest0 := MD5(iptstamp + key + user_id + '\0' + token_list + '\0' + user_data)
        ##
        ##      iptstamp is a 8 bytes long byte array, bytes 0-3 are filled with
        ##      client's IP address as a binary number in network byte order, bytes
        ##      4-7 are filled with timestamp as a binary number in network byte
        ##      order.
        ##
        ##      hextimestamp is 8 character long hexadecimal number expressing
        ##      timestamp used in iptstamp.
        ##
        ##      token_list is an optional comma-separated list of access tokens
        ##      for this user. This list is checked if TKTAuthToken is set for a
        ##      particular area.
        ##
        ##      user_data is optional
        ##
        ##
        parts = strsplit(authToken, "!", fixed=TRUE)[[1]]
        if (length(parts) == 3) {
            token_list = parts[2]
            user_data = parts[3]
        } else {
            token_list = ""
            user_data = parts[2]
        }
        ticket_digest = substring(parts[1], 1, 32)

        ## Another nice R edge case; how do you convert an 8-digit hex
        ## string to a signed integer?  Nothing works very well
        ## because e.g. strtoi() 'overflows' after 0x7fffffff, thereby
        ## removing half of its potential domain.  Oh wait, I
        ## can do strtoi("-0x1234") as if anyone ever uses that...
        ## Can we not assume that anyone using hex is aware of integer
        ## representations and will want "0xffffffff" to map
        ## to -1L ???

        hextimestamp = as.numeric(paste0("0x", substring(parts[1], 33, 40)))
        if (hextimestamp > 2147483647)
            hextimestamp = hextimestamp - 4294967296

        ## there might have been more than one HTTP_X_FORWARDED_FOR
        ## header, in which case we want the IP address from the last
        ## one, which should be the IP address of the ultimate client;
        ## i.e. the one that was used when generating the auth ticket.

        env = parent.frame()$env
        remoteIP = strsplit(env$HTTP_X_FORWARDED_FOR, ", ", fixed=TRUE)[[1]][1]
        iptstamp = c(as.raw(as.integer(strsplit(remoteIP, ".", fixed=TRUE)[[1]])), rev(packBits(intToBits(hextimestamp))))
        user_id = substring(parts[1], 41)

        ## This bit mimics what happens in login.php.  In the first case, we're using a byte sequence.

        digest0 = digest::digest(c(iptstamp, MOTUS_SECRETS$mod_auth_tkt, charToRaw(user_id), as.raw(0), charToRaw(token_list), as.raw(0), charToRaw(user_data)), algo="md5", serialize=FALSE, ascii=TRUE)

        ## But the second time, we use the ascii representation of the
        ## digest from above, rather than its raw bytes.  Weird, but
        ## matches what happens in apache's mod-auth-tkt so that's
        ## what we have to do.

        digest = digest::digest(paste0(digest0, rawToChar(MOTUS_SECRETS$mod_auth_tkt)), serialize=FALSE, algo="md5")

        if (ticket_digest != digest) {
            msg = "invalid authorization ticket"
        } else {
            auth = list(userID=as.integer(user_id), projects=token_list, userType=user_data)
        }
    }
    isAdmin = isTRUE(auth$userType == "administrator")
    if (is.null(msg)) {
        rv = list(userID=auth$userID, projects = scan(text=auth$projects, sep=",", quiet=TRUE), projectID=projectID, userType=auth$userType, isAdmin=isAdmin)
        if (isAdmin)
            rv$projects = c(-1, rv$projects)  ## add the "unknown" sentinel for admin users
        if (! (isAdmin || all(projectID %in% rv$projects))) {
            ## user not authorized for project
            msg = "not authorized for project"
        }
        if (needProjectID && length(projectID) == 0) {
            ## project ID required but not specified
            msg = "missing required projectID"
        }
        if (needAdmin && ! isAdmin) {
            ## user not authorized for call
            msg = "not authorized for this API call"
        }
    }
    if (! is.null(msg)) {
        return(error_from_app(msg))
    }
    return(rv)
}
jbrzusto/motus-R-package documentation built on May 18, 2019, 7:03 p.m.