R/functions.R

Defines functions makeTable getZoneUTM utmTransform findPads collatePAD getCOMs

Documented in collatePAD findPads getCOMs getZoneUTM makeTable utmTransform

#' Creating a Collated WHT data frame
#'
#' This function creates a new data frame that will have containers for all of the outputs generated
#' by the collateWHT function.
#'
#' @param WHT a data fram containing columns xs, ys, xb, and yb. All values should be in lat/long
#' @return a data frame
#' @author Adam Cagle
#' @details
#' Latitude and longitude are assumed to be WGS84. WHT can contain extra columns, but columns named
#' xs (surface longitude), ys (surface latitude), xb (bottomhole longitude), and yb (bottomhole) latitude
#' are requried. Extra colums will be prserved and passed back to the user in the output.
makeTable <- function(WHT){
  if(!all(c("xs", "ys", "xb", "yb") %in% names(WHT))){
    stop("columns named xs, ys, xb, and yb are required. Check the names of your columns.")
  }else if(any(is.na(c(WHT$xs, WHT$ys, WHT$xb, WHT$yb)))){
    stop("Your data has missing values. All wells must have xs, ys, xb, and yb values.")
  }else if(any(c(WHT$xs, WHT$ys, WHT$xb, WHT$yb) == 0)){
    warning("You have lat and/or long values equal to zero which may indicate missing values")
  }

  rowCount <- nrow(WHT)
  cWHT <- cbind(data.frame(cWHT_ID = seq(1, rowCount),
                           pad = rep(NA, rowCount),
                           branch = rep(NA, rowCount),
                           stem = rep(NA, rowCount),
                           dLength = rep(NA, rowCount),
                           oLength = rep(NA, rowCount),
                           dAzimuth = rep(NA, rowCount),
                           oAzimuth = rep(NA, rowCount),
                           spacing = rep(NA, rowCount),
                           xsBar = rep(NA, rowCount),
                           ysBar = rep(NA, rowCount),
                           xs = WHT$xs,
                           ys = WHT$ys,
                           xo = rep(NA, rowCount),
                           yo = rep(NA, rowCount),
                           xb = WHT$xb,
                           yb = WHT$yb,
                           bo = rep(NA, rowCount),
                           zoneUTM = rep(NA, rowCount)),
                WHT[ ,!(names(WHT) %in% c("xs", "ys", "xb", "yb"))])

  cWHT$dAzimuth <- bearing(cbind(cWHT$xs, cWHT$ys), cbind(cWHT$xb, cWHT$yb))
  return(cWHT)
}

#' Finding the UTM Zone Number
#'
#' function to calculate the UTM zone wich contains the longitude provided
#'
#' @param longitude a vector of length n
#' @return a vector of length n with UTM Zone numbers
#' @author Adam Cagle
#' @export
getZoneUTM <- function(longitude){
  zone <- (floor((longitude+180)/6)+1) %% 60
  return(zone)
}

#' Transforming Coordinates To and From lat-long and x-y
#'
#' function to transform to and from geographic lat-long to UTM x-y
#' @param cWHT data frame as generated by the makeTable function
#' @param from string specifying CRS of coordinates in cWHT
#' @param to string specifying CRS of coordinates desired in output
#' @return a data frame
#' @author Adam Cagle
#' @details
#' from and two must be different from one another and can be either "lat-long" or "x-y". The
#' default is from lat-long to x-y.
#' @seealso \code{spTransform}
utmTransform <- function(cWHT, from = "lat-long", to = "x-y"){

  if(!(((from == "lat-long") && (to == "x-y")) | ((from == "x-y") && (to == "lat-long")))){
    stop("can only project from lat-long to x-y or x-y to lat-long")
  }else if(from == "lat-long"){
    cWHT$zoneUTM <- getZoneUTM(mean(c(cWHT$xs, cWHT$xb)))
    originalCRS <- CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs")
    newCRS <- CRS(paste("+proj=utm +zone=",
                        getZoneUTM(mean(c(cWHT$xs, cWHT$xb))),
                        " +datum=WGS84 +units=us-ft +no_defs", sep = ""))
  }else if(from == "x-y"){
    originalCRS <- CRS(paste("+proj=utm +zone=",
                             cWHT$zoneUTM[1],
                             " +datum=WGS84 +units=us-ft +no_defs", sep = ""))
    newCRS <- CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs")
  }

  cordsS <- data.frame(x = cWHT$xs, y = cWHT$ys)
  cordsO <- data.frame(x = cWHT$xo, y = cWHT$yo)
  cordsB <- data.frame(x = cWHT$xb, y = cWHT$yb)
  cordsBar <- data.frame(xs = cWHT$xsBar, ys = cWHT$ysBar)

  hasCordsO <- !all(is.na(cordsO))
  hasCordsBar <- !all(is.na(cordsBar))

  spPointsS <- SpatialPoints(cordsS, originalCRS)
  spPointsB <- SpatialPoints(cordsB, originalCRS)
  if(hasCordsO){
    spPointsO <- SpatialPoints(cordsO, originalCRS)
  }
  if(hasCordsBar){
    spPointsBar <- SpatialPoints(cordsBar, originalCRS)
  }

  pointsS <- data.frame(coordinates(spTransform(spPointsS, newCRS)))
  names(pointsS) <- c("x", "y")
  pointsB <- data.frame(coordinates(spTransform(spPointsB, newCRS)))
  names(pointsS) <- c("x", "y")
  if(hasCordsO){
    pointsO <- data.frame(coordinates(spTransform(spPointsO, newCRS)))
    names(pointsO) <- c("x", "y")
  }else{
    pointsO <- cordsO
  }
  if(hasCordsBar){
    pointsBar <- data.frame(coordinates(spTransform(spPointsBar, newCRS)))
    names(pointsBar) <- c("xs", "ys")
  }else{
    pointsBar <- cordsBar
  }

  trnCWHT <- cWHT
  trnCWHT$xs <- pointsS$x
  trnCWHT$ys <- pointsS$y
  trnCWHT$xo <- pointsO$x
  trnCWHT$yo <- pointsO$y
  trnCWHT$xb <- pointsB$x
  trnCWHT$yb <- pointsB$y
  trnCWHT$xsBar <- pointsBar$xs
  trnCWHT$ysBar <- pointsBar$ys

  return(trnCWHT)
}

#' Grouping Wells Into PADs
#'
#' This function uses clustering to find surface locations within a user specified threshold
#' distance, asigns these groups of wells to a PAD, and gives the pad an ID.
#' @param cWHTutm a data frame as generated by by utmTransform function (UTM Coordinates)
#' @return a data frame
#' @author Adam Cagle
findPads <- function(cWHTutm, dist = 100){
  # adapted from stack overflow question
  # https://gis.stackexchange.com/questions/64392/find-clusters-of-points-based-distance-rule

  x <- cWHTutm$xs
  y <- cWHTutm$ys
  xy <- SpatialPointsDataFrame(matrix(c(x,y), ncol=2),
                               cWHTutm,
                               proj4string=CRS(paste("+proj=utm +zone=",
                                                     cWHTutm$zoneUTM[1],
                                                     " +datum=WGS84 +units=us-ft +no_defs", sep = "")))

  chc <- hclust(dist(data.frame(rownames=rownames(xy@data), x=coordinates(xy)[,1],
                                y=coordinates(xy)[,2])), method="complete")

  # Distance with threshold
  chcDist <- cutree(chc, h=dist)

  cWHTutm$pad <- chcDist
  return(cWHTutm)
}

#' Collating Well Locations in a Well PAD
#'
#' function to collate the well locations of a single PAD
#' @param X a data frame as generated by findPads function but with rows limited to a single PAD
#' @return a data frame
#' @author Adam Cagle
collatePAD <- function(X){

  # function to calculate the COM's of surfacne and bottom-hole coordinates
  COM <- lapply(data.frame(xs = X$xs, ys = X$ys, xb = X$xb, yb = X$yb), mean)

  # calculating the slope of the central axis which is formed by the line through the COM of the
  # surface location and the bottom-hole locations
  slope <- (COM$yb-COM$ys)/(COM$xb-COM$xs)

  # determining if the slope of the central axis is a case that requires special treatment, i.e.,
  # horizontal or vertical
  getSlopeType <- function(slope){
    if(is.nan(slope)){
      return("surface=bottomhole")
    }else if(slope == Inf){
      return("vertical")
    }else if(slope == 0){
      return("horizontal")
    }else{
      return("ordinary")
    }
  }

  slopeType <- getSlopeType(slope)

  # formula for calculating the points of orthoganality, i.e., the points that will be drawn on an
  # axis passing through the center of mass of the surface locations on an axis perpendicular to the
  # central axis and forming a right angle with the bottom hole locations
  getNodes <- function(X){

    if(slopeType == "ordinary"){
      for (i in 1:nrow(X)) {
        xb = X$xb[i]
        yb = X$yb[i]
        xs = X$xs[i]
        ys = X$ys[i]

        X$xo[i] <- (slope*xb + (slope^-1)*COM$xs + COM$ys - yb)/(slope + (slope^-1))
        X$yo[i] <- slope*X$xo[i] + yb - slope*xb
        X$bo[i] <- yb - slope*xb
      }
    }else if(slopeType == "horizontal"){
      for (i in 1:nrow(X)) {
        xb = X$xb[i]
        yb = X$yb[i]
        xs = X$xs[i]
        ys = X$ys[i]

        X$xo[i] <- COM$xs
        X$yo[i] <- yb
        X$bo[i] <- yb
      }
    }else if(slopeType == "vertical"){
      for (i in 1:nrow(X)) {
        xb = X$xb[i]
        yb = X$yb[i]
        xs = X$xs[i]
        ys = X$ys[i]

        X$xo[i] <- xb
        X$yo[i] <- COM$ys
        X$bo[i] <- NA
      }
    }else if(slopeType == "surface=bottomhole"){
      for (i in 1:nrow(X)){
        xb = X$xb[i]
        yb = X$yb[i]
        xs = X$xs[i]
        ys = X$ys[i]

        X$xo[i] <- xs
        X$yo[i] <- ys
        X$bo[i] <- NA
      }
    }
    return(X)
  }

  X <- getNodes(X)

  getStems <- function(X){
    X <- X[order(X$yo,X$xo, decreasing = c(TRUE, FALSE)),]
    X$stem <- 1:nrow(X)
    return(X)
  }

  X <- getStems(X)

  # formula for calculating the distance between wells based on their parallelized trajectories
  getSpacing <- function(X){
    wellCount <- nrow(X)
    if(wellCount < 2){
      X$spacing <- NA
      return(X)
    }else{
      distance <- rep(NA, wellCount - 1)
      for (i in 1:(wellCount - 1)){
        if(slopeType == "ordinary"){
          # distance[i] <- abs(X$bo[i] - X$bo[i+1])/sqrt(1 + slope^2)
          distance[i] <- sqrt((X$xo[i]-X$xo[i+1])^2+(X$yo[i]-X$yo[i+1])^2)
        }else if(slopeType == "vertical"){
          distance[i] <- abs(X$xo[i] - X$xo[i+1])
        }else if(slopeType == "horizontal"){
          distance[i] <- abs(X$yo[i] - X$yo[i+1])
        }
      }

      if(wellCount == 2){
        X$spacing[1] <- distance[1]
        X$spacing[2] <- distance[1]
      }else if(wellCount > 2){
        for(j in 1){
          X$spacing[j] <- distance[j]
        }
        for(j in 2:(wellCount -1)){
          X$spacing[j] <- 0.5*distance[j-1] + 0.5*distance[j]
        }
        for(j in wellCount){
          X$spacing[j] <- distance[j-1]
        }
      }
    }
    return(X)
  }

  X <- getSpacing(X)

  getBranches <- function(X) {

    # function to test whether or not all bottom hole locations are on the same side of the orthogonal
    # axis. If they are, then the PAD is a "single" pitchfork. If not, then the PAD is a double
    # pitchfork.
    allSameSide <- function(X){

      nWells <- nrow(X)

      if(nWells == 1){
        return(TRUE)
      }else{
        Ax <- X$xo[1]
        Bx <- X$xo[2]
        Ay <- X$yo[1]
        By <- X$yo[2]


        test <- function(Ax, Ay, Bx, By, Cx, Cy){

          value <- (Bx - Ax) * (Cy - Ay) - (By - Ay) * (Cx - Ax)
          if(value > 0){
            side <- "S1"
          }else if(value < 0){
            side <- "S2"
          }else{
            side <- "ON LINE"
          }
          return(side)
        }

        sides <- rep(NA, nWells)

        for (i in 1:nWells) {
          Cx <- X$xb[i]
          Cy <- X$yb[i]
          sides[i] <- test(Ax, Ay, Bx, By, Cx, Cy)
        }
        return(length(unique(sides)) == 1)
      }
    }

    if(allSameSide(X)){
      # PATCH: prevent branch numbers from being reset to 1 when running on second itteration.
      # Will fix this when I add logic to work on branches recursively rather than running through
      # all calculations a second time. originally this was X$branch = 1 without a conditional is.na
      # test.
      if(all(is.na(X$branch))){X$branch = 1}
    }else{

      theta <- function(X){
        nWells <- nrow(X)
        theta <- rep(NA, nWells)
        surfaceLocation <- c(COM$xs, COM$ys)

        angle <- function(s, b){
          traj <- b - s
          (atan2(0,1) - atan2(traj[2],traj[1]))*180/pi
        }

        for (i in 1:nWells) {
          b <- c(X$xb[i], X$yb[i])
          theta[i] <- angle(surfaceLocation, b)
        }

        return(theta)
      }

      if(nrow(X) == 2){
        X$branch <- c(1,2)
      }else{
        X$branch <- kmeans(data.frame(xb = X$xb, yb = X$yb, angle = theta(X)), 2)$cluster
      }
    }
    return(X)
  }

  X <- getBranches(X)

  return(X)
}

#' Finding Surface Location COMs For Each PAD/Branch
#'
#' function to populate entire data frame with the final surface COM's grouped by pad and branch.
#' @param X a data frame as generated by the collatePAD function (see details).
#' @return a data frame
#' @author Adam Cagle
#' @details Before using getCOMs, the collatePAD function needs to be applied looping through the
#' data frame generated by findPads two times. The first itteration should be over "pad", and the
#' second itteration should be over pad and branch. Branches are generated when it's found that the
#' PAD is of the "dual pitchfork" variety.
getCOMs <- function(cWHTutm){
  cords <- function(X){
    COMs <- list(xsBar = mean(X$xs), ysBar = mean(X$ys))
    X$xsBar <- rep(COMs$xsBar, nrow(X))
    X$ysBar <- rep(COMs$ysBar, nrow(X))
    return(X)
  }
  cWHTutm <- ddply(cWHTutm, .(pad, branch), cords)
  return(cWHTutm)
}

#' Finding The Distances Along Well Trajectories
#'
#' function to calculate oLength and dLength, the length along the collated latteral and the direct
#' patch from the surface to bottomhome locations respectively.
#' @param X a data frame as generated by the getCOMs function.
#' @return a data frame which includes oLength and dLength in feet.
#' @author Adam Cagle
getDistances <- function(cWHT){
  cWHT$dLength <- distGeo(cbind(cWHT$xs, cWHT$ys), cbind(cWHT$xb, cWHT$yb))*3.28084
  cWHT$oLength <- distGeo(cbind(cWHT$xo, cWHT$yo), cbind(cWHT$xb, cWHT$yb))*3.28084
  return(cWHT)
}

#' Finding The Azimuth Along the collated Well Trajectory
#'
#' function to calculate oAximuth, the azimuth along the collated latteral.
#' @param X a data frame as generated by the getDistances function.
#' @return a data frame which includes oLength and dLength in feet.
#' @author Adam Cagle
getAzimuth <- function(cWHT){
  cWHT$oAzimuth <- bearing(cbind(cWHT$xo, cWHT$yo), cbind(cWHT$xb, cWHT$yb))
  return(cWHT)
}

#' Collating Well Trajectories
#'
#' function to generate collated well trajectories from well surface and bottomhole locations
#' @param WHT a data fram containing columns xs, ys, xb, and yb. All values should be in lat/long
#' @param dist a number specifying the threshold distance for surface locations to be grouped in
#' PADs. Units are in feet.
#' @return a data frame
#' @author Adam Cagle
#' @details
#' Latitude and longitude are assumed to be WGS84. WHT can contain extra columns, but columns named
#' xs (surface longitude), ys (surface latitude), xb (bottomhole longitude), and yb (bottomhole) latitude
#' are requried. Extra colums will be prserved and passed back to the user in the output.
#' @export
collateWHT <- function(WHT, dist = 250){
  cWHT <- makeTable(WHT)
  cWHTutm <- utmTransform(cWHT)
  cWHTutm <- findPads(cWHTutm, dist)
  cWHTutm <- ddply(cWHTutm, .(pad), collatePAD)
  cWHTutm <- ddply(cWHTutm, .(pad, branch), collatePAD)
  cWHTutm <- getCOMs(cWHTutm)
  cWHT <- utmTransform(cWHT = cWHTutm, from = "x-y", to = "lat-long")
  cWHT <- getDistances(cWHT)
  cWHT <- getAzimuth(cWHT)
  return(cWHT)
}

#' Mapping Collated Well Trajectories
#' function to plot collated well trajectories on an interactive map.
#' @param cWHT a data frame as generated by the collateWHT function.
#' @return a leflet map object
#' @author Adam Cagle
#' @export
mapIt <- function(cWHT){
  m <- leaflet() %>%
    addProviderTiles("CartoDB.Positron") %>%
    addCircles(lng = cWHT$xs, lat = cWHT$ys, color = "red")

  for(i in 1:nrow(cWHT)){
    m <- addPolylines(m, lng = c(cWHT$xsBar[i], cWHT$xo[i], cWHT$xb[i]),
                      lat = c(cWHT$ysBar[i], cWHT$yo[i], cWHT$yb[i]))
  }
  print(m)
  return(m)
}
R-S-C/wellpad documentation built on Oct. 30, 2019, 10:45 p.m.