# Some useful keyboard shortcuts for package authoring:
#
# Install Package: Ctrl + Shift + B
# Check Package: Ctrl + Shift + E
# Test Package: Ctrl + Shift + T
# Build documentation: Ctrl + Shift + D...not working
# Build vignette: Ctrl + Shift + K
#
# library(devtools)
# setwd("G:/R_Stuff/LidarIndexR")
#
# To build documentation:
# devtools::document()
# Utility functions
# ---------- unsignedFourByteIntToDouble
#
#' LidarIndexR -- Utility function to help read unsigned long data types
#'
#' This is a helper function used in the LidarIndexR package to help read
#' \code{unsigned long} data types. It takes a 4-byte block and interprets
#' it as and \code{unsigned long} value. The function is used mainly to read
#' value from LAS file headers.
#'
#' @param i A 4-byte block of data.
#' @return An (invisible) double value.
unsignedFourByteIntToDouble <- function(i) {
d <- as.numeric(i)
d[d<0] <- d[d<0] + 2^32
invisible(d)
}
# ---------- makeFileSize
#
#' LidarIndexR -- Utility function to convert file size string to actual
#' file size.
#'
#' This is a helper function used in the LidarIndexR package to help read
#' file sizes from http or https directory listings. It looks for a trailing
#' character to determine the multipliuer for the numeric portion of the size
#' value. In the case of directories, the \code{sizeString} will be "-". This
#' value is explicitly recognized.
#'
#' @param sizeString A string containing the file size or "-" for directories.
#' @return A (invisible) double value for files and a string for directories.
makeFileSize <- function(sizeString) {
if (sizeString == "-")
fileSize <- "<DIR>"
else {
# have strings with K, M, G, T indicating multiplier for leading numeric portion of string
# get last character
flag <- stringr::str_extract(sizeString, "[A-Z]{1}")
# get all but last character
if (is.na(flag))
fileSize <- as.numeric(substr(sizeString, 1, nchar(sizeString)))
else
fileSize <- as.numeric(substr(sizeString, 1, nchar(sizeString) - 1))
if (!is.na(fileSize) && !is.na(flag)) {
if (flag == "K")
fileSize <- fileSize * 1024.0
if (flag == "M")
fileSize <- fileSize * 1024.0 * 1024.0
if (flag == "G")
fileSize <- fileSize * 1024.0 * 1024.0 * 1024.0
if (flag == "T")
fileSize <- fileSize * 1024.0 * 1024.0 * 1024.0 * 1024.0
} else {
if (!is.na(flag))
fileSize <- 0
}
if (is.na(fileSize))
fileSize <- 0
}
invisible(fileSize)
}
# ---------- DirList_http
#
#' LidarIndexR -- Retrieve a directory listing from a remote http(s) host
#'
#' Retrieve a http or https directory listing from a remote host as either
#' a simple list of file/folder names or a data frame with file/folder information.
#'
#' @param URL URL for a folder on a remote host. Trailing slash is optional.
#' @param fileType Any valid string for the pattern parameter in \code{grep()}.
#' "$" will be appended to the string to search for file/folder names ending
#' with values in \code{fileType}.
#' @param namesOnly Boolean indicating you want only file names (TRUE) or all
#' directory information (FALSE).
#' @param excludeDirectories Boolean indicating you want to exclude folders
#' from the list of returned files/folders.
#' @param directoryOnly Boolean indicating you only want folder names included
#' in the list of returned files/folders.
#' @return A list of file names or a data frame with file names and attribute
#' information. The return value depends on \code{namesOnly}.
#' @examples
#' \dontrun{
#' DirList_http(pasteo("https://rockyweb.usgs.gov/vdelivery/Datasets/Staged/",
#' "Elevation/LPC/Projects/AK_BrooksCamp_2012/laz/"))
#' }
#' @export
DirList_http <- function (
URL,
fileType = NULL,
namesOnly = FALSE,
excludeDirectories = TRUE,
directoryOnly = FALSE
) {
# make sure URL ends with "/"
if (!endsWith(URL, "/"))
URL <- paste0(URL, "/")
dir_read <- readLines(URL)
parsed_dir <- XML::htmlParse(dir_read)
# set up data frame
df <- data.frame("line" = dir_read, isXML = FALSE, isParent = FALSE)
# see if lines are XML
df$isXML <- as.integer(sapply(dir_read, function(x) {XML::isXMLString(x)}))
# drop non-XML lines
df <- df[df$isXML == 1, ]
# see if line has parent directory...look for " - "
df$isParent <- as.integer(sapply(df$line, function(x) {!is.na(stringr::str_extract(x, "[ ]{2}-[ ]{2}"))}))
# drop lines up to and including parent line
parentRow <- match(1, df$isParent)
if (parentRow > 0)
df <- df[(parentRow + 1):nrow(df), ]
# 6/27/2024...USGS index format has changed to add a footer. We need to keep all
# lines that have img tags
# see if lines has "img src="
df$isImg <- as.integer(sapply(df$line, function(x) {!is.na(stringr::str_extract(x, "img src="))}))
# drop non-img lines
df <- df[df$isImg == 1, ]
## drop last row...should be <hr></pre>
##df <- df[1:(nrow(df) -1), ]
# make sure we still have rows...not an empty directory
if (nrow(df) > 0) {
# extract file name
df$Name <- XML::getHTMLLinks(df$line)
# remove slashes in filename
df$Name <- stringr::str_remove_all(df$Name, "/")
df$Name <- stringr::str_remove_all(df$Name, "\\\\")
# extract date
df$Date <- stringr::str_extract(df$line, "[0-9]{4}-[0-9]{2}-[0-9]{2}")
# extract time
df$Time <- stringr::str_extract(df$line, "[0-9]{2}:[0-9]{2}")
# check for directory...look for " - "
df$isFolder <- as.integer(sapply(df$line, function(x) {!is.na(stringr::str_extract(x, "[ ]{2}-[ ]{2}"))}))
t <- regexpr("[0-9]{2}:[0-9]{2}", df$line)
attributes(t) <- NULL
# get file size...assumes time is 5 characters and file size is no more than 8 characters
df$Attribute <- trimws(
substr(df$line,
t + 6,
t + 14
)
)
df$Attribute <- sapply(df$Attribute, makeFileSize)
# clean up
df <- df[, c("Date", "Time", "Attribute", "Name")]
# sort on attribute...<DIR> or size
df <- df[order(df$Attribute), ]
} else {
df <- NULL
}
# we now have the directory listing...apply the options
if (!is.null(df)) {
if (!is.null(fileType)) {
nameFlags <- grepl(paste0(fileType, "$"), df$Name, ignore.case=TRUE)
df <- df[nameFlags, ]
}
if (excludeDirectories) {
df <- df[df$Attribute != "<DIR>", ]
}
if (directoryOnly) {
df <- df[df$Attribute == "<DIR>", ]
}
if (namesOnly) {
df <- df$Name
}
}
return(df)
}
# ---------- DirList
#
#' LidarIndexR -- Retrieve a directory listing from a remote host
#'
#' Retrieve a directory listing from a remote host as either a simple list
#' of file/folder names or a data frame with file/folder information.
#'
#' @param URL URL for a folder on a remote host. Trailing slash is optional.
#' @param fileType Any valid string for the pattern parameter in \code{grep()}.
#' "$" will be appended to the string to search for file/folder names ending
#' with values in \code{fileType}.
#' @param namesOnly Boolean indicating you want only file names (TRUE) or all
#' directory information (FALSE).
#' @param excludeDirectories Boolean indicating you want to exclude folders
#' from the list of returned files/folders.
#' @param directoryOnly Boolean indicating you only want folder names included
#' in the list of returned files/folders.
#' @param dirFormat String indicating the expected directory format for the
#' \code{URL}. Valid values are "dir" if the URL returns the Date, Time,
#' Attribute/Size, and file name; "ls" if the URL returns directory
#' information similar to the UNIX \code{ls -al} command; or "" if you
#' want the function to try to determine the directory format. This is
#' ignored if the URL uses http or https schemes.
#' @param ... Arguments passed to \code{RCurl::getURL()}. This is ignored
#' if the URL uses http or https schemes.
#' @return A list of file names or a data frame with file names and attribute
#' information. The return value depends on \code{namesOnly}.
#' @examples
#' \dontrun{
#' DirList("ftp://rockyftp.cr.usgs.gov/vdelivery/Datasets/Staged/")
#' DirList(paste0("ftp://rockyftp.cr.usgs.gov/vdelivery/Datasets/",
#' "Staged/Elevation/LPC/Projects/AK_BrooksCamp_2012/laz/"),
#' "\\.las|\\.laz")
#' }
#' @export
DirList <- function (
URL,
fileType = NULL,
namesOnly = FALSE,
excludeDirectories = TRUE,
directoryOnly = FALSE,
dirFormat = "dir", # "ls" or "dir"
...
) {
# make sure URL ends with "/"
if (!endsWith(URL, "/"))
URL <- paste0(URL, "/")
# if URL is http or https, return list from DirList_http
if (urltools::scheme(URL) == "http" || urltools::scheme(URL) == "https") {
return(DirList_http(URL,
fileType = fileType,
namesOnly = namesOnly,
excludeDirectories = excludeDirectories,
directoryOnly = directoryOnly
)
)
}
# get folder listing and parse into individual files
# create an empty character vector on error
filenames <- tryCatch(RCurl::getURL(URL, ftp.use.epsv = FALSE, dirlistonly = namesOnly, ...), error = function(e) {character(0)})
if (length(filenames)) {
filenames <- strsplit(filenames, "\r\n")
filenames <- unlist(filenames)
if (!is.null(fileType))
filenames <- grep(paste0(fileType, "$"), filenames, ignore.case=TRUE, value=TRUE)
# create a dataframe and sort on the attribute (size for files)
if (!namesOnly) {
if (dirFormat == "") {
# look at first 10 characters of 1st string
if (grepl("/", filenames[1], fixed = TRUE))
dirFormat <- "dir"
else
dirFormat <- "ls"
}
# parse information
if (dirFormat == "ls") {
# add a blank filename entry
filenames[length(filenames) + 1] <- ""
df <- readr::read_table2(filenames, col_names = c("Permissions", "Links", "Owner", "Group", "Attribute", "Month", "Day", "Year", "Name"), n_max = length(filenames) - 1)
} else {
df <- readr::read_fwf(filenames, readr::fwf_widths(c(8, 9, 21, NA), c("Date", "Time", "Attribute", "Name")))
}
# df <- read.table(text = filenames)
# colnames(df) <- c("Date", "Time", "Attribute", "Name")
df <- df[order(df$Attribute),]
if (dirFormat != "dir") {
if (directoryOnly) {
df <- df[startsWith(df$Permissions, "d"), ]
} else if (excludeDirectories) {
df <- df[!startsWith(df$Permissions, "d"), ]
}
} else {
if (directoryOnly) {
df <- df[df$Attribute == "<DIR>", ]
} else if (excludeDirectories) {
df <- df[df$Attribute != "<DIR>", ]
}
}
return(df)
}
}
return(filenames)
}
# ---------- DirListByType
#
#' LidarIndexR -- Retrieve a directory listing from a remote host
#'
#' Retrieve a directory listing from a remote host as a simple list
#' of file names.
#'
#' DirListByType is an alias for \code{DirList(URL, fileType = fileType, namesOnly = TRUE, ...)}
#'
#' @param URL URL for a folder on a remote host. Trailing slash is optional.
#' @param fileType Any valid string for the pattern parameter in \code{grep()}.
#' "$" will be appended to the string to search for file/folder names ending
#' with values in \code{fileType}.
#' @param ... Arguments passed to \code{RCurl::getURL()}. This is ignored
#' if the URL uses http or https schemes.
#' @return A list of file names.
#' @examples
#' \dontrun{
#' DirListByType(paste0("ftp://rockyftp.cr.usgs.gov/vdelivery/Datasets/",
#' "Staged/Elevation/LPC/Projects/AK_BrooksCamp_2012/laz/"),
#' "\\.las|\\.laz")
#' }
#' @export
DirListByType <- function (
URL,
fileType,
...
) {
# make sure URL ends with "/"
if (!endsWith(URL, "/"))
URL <- paste0(URL, "/")
return(DirList(URL, fileType = fileType, namesOnly = TRUE, ...))
# # get folder listing and parse into individual files
# # create an empty character vector on error
# filenames <- tryCatch(RCurl::getURL(URL, ftp.use.epsv = FALSE, dirlistonly = TRUE, ...), error = function(e) {character(0)})
# if (length(filenames)) {
# filenames <- strsplit(filenames, "\r\n")
# filenames <- unlist(filenames)
#
# # find files ending with the desired type (extension)
# filenames <- grep(paste0(fileType, "$"), filenames, ignore.case=TRUE, value=TRUE)
# }
}
# ---------- DirListByName
#
#' LidarIndexR -- Retrieve a directory listing from a remote host of all files
#' with a specific name (various extensions)
#'
#' Retrieve a simple list of all files with the specified name but various
#' extensions.
#'
#' @param URL URL for a folder on a remote host. Trailing slash is optional.
#' @param fileName A string containing the name of a single file. All files
#' with the same name (but different extensions) will be returned.
#' @param ... Arguments passed to \code{RCurl::getURL()}.
#' @return A list of file names.
#' @examples
#' \dontrun{
#' DirListByName(paste0("ftp://rockyftp.cr.usgs.gov/vdelivery/Datasets/",
#' "Staged/Hydrography/NHD/HU4/HighResolution/Shape/"),
#' "NHD_H_0101_HU4_Shape.jpg")
#' }
#' @export
DirListByName <- function (
URL,
fileName,
...
) {
# make sure URL ends with "/"
if (!endsWith(URL, "/"))
URL <- paste0(URL, "/")
fileName <- sub(pattern = "(.*)\\..*$", replacement = "\\1", basename(fileName))
# get list of file names in URL
filenames <- DirList(URL, namesOnly = TRUE, excludeDirectories = TRUE, ...)
# get folder listing and parse into individual files
# create an empty character vector on error
# filenames <- tryCatch(RCurl::getURL(URL, ftp.use.epsv = FALSE, dirlistonly = TRUE, ...), error = function(e) {character(0)})
if (length(filenames)) {
# filenames <- strsplit(filenames, "\r\n")
# filenames <- unlist(filenames)
# find files matching the name...various extensions
filenames <- grep(paste0("^", fileName), filenames, ignore.case=TRUE, value=TRUE)
}
return(filenames)
}
# ---------- ReadRemoteLASProjection
#
#' LidarIndexR -- Read LAS/LAZ header and extract CRS information
#'
#' Download the header for a LAS/LAZ file and read CRS information from the header.
#' Only the file header is downloaded (including VLRs) so you don't have to worry
#' about the size of the LAS/LAZ file.
#'
#' @param URL URL for a LAS/LAZ file on a remote host.
#' @param tempFolder A valid folder name where the LAS/LAZ file header can be
#' temporarily stored. The header file is deleted after use.
#' @param quiet Boolean to control display of status information. If TRUE,
#' information is *not* displayed. Otherwise, status information is displayed.
#' @return A string (invisible) containing the CRS information retrieved from the LAS/LAZ
#' header.
#' @examples
#' \dontrun{
#' ReadRemoteLASProjection()
#' }
#' @export
ReadRemoteLASProjection <- function (
URL,
tempFolder,
quiet = FALSE
) {
# open a connection and read only enough information to get the header size. Then read the entire
# header and save to local file and use lidR::readLASheader to read the header and return
# the projection information for the file
#
# NOTE: we will get a larger offset to the point data due to the VLR for LAZ compression. LAZ readers
# reduce the offset to compensate for this VLR (effectively ignore that it exists)
OffsettoPointData <- 0
crs <- ""
con <- tryCatch(url(URL, open = "rb"), error = function(e) {NA})
if (is.object(con)) {
bs <- readBin(con, "raw", 120)
close(con)
} else {
return(crs)
}
# use raw vector as input stream and parse header values
con = rawConnection(bs, open = "rb")
Signaturebytes <- readBin(con, "raw", n = 4, size = 1, endian = "little")
Signature <- readBin(Signaturebytes, "character", size = 4, endian = "little")
if (Signature == "LASF") {
readBin(con, "raw", 92) # skip bytes
OffsettoPointData <- readBin(con, "integer", n = 1, size = 4)
OffsettoPointData <- unsignedFourByteIntToDouble(OffsettoPointData)
}
close(con)
if (Signature == "LASF" && OffsettoPointData > 0) {
# read the entire header and write to local file
con <- url(URL, open = "rb")
bs <- readBin(con, "raw", OffsettoPointData)
close(con)
writeBin(bs, con = file.path(tempFolder, "__temp__.las"), useBytes = TRUE)
if (file.exists(file.path(tempFolder, "__temp__.las"))) {
# read header
t <- tryCatch(lidR::readLASheader(file.path(tempFolder, "__temp__.las")), error = function(e) {NA})
if (is.object(t)) {
crs <- lidR::st_crs(t)
crs <- crs$wkt
}
# delete temp file
file.remove(file.path(tempFolder, "__temp__.las"))
} else {
if (!quiet) message("Temp file not created: ", file.path(tempFolder, "__temp__.las"))
}
}
if (!quiet) message(basename(url), ": ", crs)
return(invisible(crs))
}
# ---------- ReadRemoteLASHeader
#
#' LidarIndexR -- Read the header for a remote LAS/LAZ file
#'
#' Reads the header of a LAS/LAZ file and returns header information in a
#' data frame. Only the header is read.
#'
#' @param URL URL for a LAS/LAZ file on a remote host.
#' @param quiet Boolean to control display of status information. If TRUE,
#' information is *not* displayed. Otherwise, status information is displayed.
#' @return A data frame containing header information.
#' @examples
#' \dontrun{
#' ReadRemoteLASHeader()
#' }
#' @export
ReadRemoteLASHeader <- function(
URL,
quiet = FALSE
) {
# open connection and read 375 bytes...header for V1.4 is 375 bytes
# hopefully this is faster than reading individual values via ftp...but i didn't test
# if (!quiet)
# message("reading header for ", basename(URL))
con <- url(URL, open = "rb")
bs <- readBin(con, "raw", 375)
close(con)
# use raw vector as input stream and parse header values
con = rawConnection(bs, open = "rb")
Signaturebytes <- readBin(con, "raw", n = 4, size = 1, endian = "little")
Signature <- readBin(Signaturebytes, "character", size = 4, endian = "little")
if (Signature == "LASF") {
readBin(con, "raw", 4) # skip bytes
readBin(con, "raw", 16) # skip bytes
VersionMajor <- readBin(con, "integer", size = 1, n = 1, signed = FALSE)
VersionMinor <- readBin(con, "integer", size = 1, n = 1, signed = FALSE)
readBin(con, "raw", 64) # skip bytes
DayOfYear <- readBin(con, "int", n = 1, size = 2, signed = FALSE)
Year <- readBin(con, "integer", n = 1, size = 2, signed = FALSE)
HeaderSize <- readBin(con, "integer", n = 1, size = 2, signed = FALSE)
readBin(con, "raw", 4) # skip bytes
VLRCount <- readBin(con, "integer", n = 1, size = 4)
VLRCount <- unsignedFourByteIntToDouble(VLRCount)
PointRecordFormat <- readBin(con, "integer", n = 1, size = 1, signed = FALSE)
if (PointRecordFormat > 127) PointRecordFormat <- (PointRecordFormat - 128)
PointRecordLength <- readBin(con, "int", 1, size = 2, signed = FALSE)
PointCount <- readBin(con, "integer", 1, size = 4)
PointCount <- unsignedFourByteIntToDouble(PointCount)
readBin(con, "raw", 68) # skip bytes
MaxX <- readBin(con, "numeric", 1, size = 8)
MinX <- readBin(con, "numeric", 1, size = 8)
MaxY <- readBin(con, "numeric", 1, size = 8)
MinY <- readBin(con, "numeric", 1, size = 8)
MaxZ <- readBin(con, "numeric", 1, size = 8)
MinZ <- readBin(con, "numeric", 1, size = 8)
if (VersionMajor == 1 && VersionMinor > 3) {
readBin(con, "raw", 20) # skip bytes
PointCount <- readBin(con, "integer", 1, size = 8)
}
if (!quiet)
message("Read extent of ", basename(URL))
} else {
VersionMajor <- NA
VersionMinor <- NA
DayOfYear <- NA
Year <- NA
HeaderSize <- NA
VLRCount <- NA
PointRecordFormat <- NA
PointRecordLength <- NA
PointCount <- NA
MaxX <- NA
MinX <- NA
MaxY <- NA
MinY <- NA
MaxZ <- NA
MinZ <- NA
if (!quiet)
message("Failed to read extent of ", basename(URL))
}
close(con)
# build data frame to return
df <- data.frame(
"URL" = URL,
"FileName" = basename(URL),
"LASVersion" = VersionMajor + VersionMinor / 10,
"FileDayOfYear" = DayOfYear,
"FileYear" = Year,
"HeaderSize" = HeaderSize,
"VLRCount" = VLRCount,
"PointRecordFormat" = PointRecordFormat,
"PointRecordLength" = PointRecordLength,
"PointCount" = PointCount,
"MinX" = MinX,
"MinY" = MinY,
"MinZ" = MinZ,
"MaxX" = MaxX,
"MaxY" = MaxY,
"MaxZ" = MaxZ
)
return(df)
}
# ---------- FetchAndExtractCRSFromPoints
#
#' LidarIndexR -- Retrieve CRS information from a LAS/LAZ point file
#'
#' Reads CRS information from LAS/LAZ files on a remote host. This is done by
#' reading only the file header \code{headerOnly = TRUE} or retrieving the entire
#' file and reading the header \code{headerOnly = FALSE}.
#'
#' @param baseURL URL for a folder on a remote host. Trailing slash should *not*
#' be included.
#' @param folderName Folder name on the \code{baseURL} containing LAS/LAZ
#' files.
#' @param tempFolder A valid folder name where the LAS/LAZ file header can be
#' temporarily stored. The header file is deleted after use.
#' @param headerOnly Boolean. If TRUE, only the file header is read. If FALSE,
#' the entire LAS/LAZ file is retrieved and then the header is read to
#' extract CRS information.
#' @param fetchnew Boolean. If TRUE, the file is always retrieved. If FALSE,
#' the file is only retrieved if it does not already exist in the \code{tempFolder}.
#' @param quiet Boolean to control display of status information. If TRUE,
#' information is *not* displayed. Otherwise, status information is displayed.
#' @param ... Arguments passed to \code{RCurl::getURL()}.
#' @return String (invisible) containing a valid input value for \code{st_crs()}.
#' @examples
#' \dontrun{
#' FetchAndExtractCRSFromPoints()
#' }
#' @export
FetchAndExtractCRSFromPoints <- function (
baseURL,
folderName,
tempFolder,
headerOnly = TRUE,
fetchnew = FALSE,
quiet = FALSE,
...
) {
crs <- ""
t <- paste0(baseURL, "/", folderName, "/")
# build directory list...first file will be smallest
fileList <- DirList(t, fileType = "\\.las|\\.laz")
# download first file
if (length(fileList)) {
if (!quiet) message(folderName, "/", fileList$Name[1], " -- ")
if (headerOnly) {
crs <- ReadRemoteLASProjection(paste0(t, fileList$Name[1]), tempFolder, quiet = TRUE)
} else {
if (!file.exists(file.path(tempFolder, fileList$Name[1])) || fetchnew) {
utils::download.file(paste0(t, fileList$Name[1]), file.path(tempFolder, fileList$Name[1]), quiet = quiet, mode = "wb", ...)
}
# load file and extract CRS (if any)
las <- tryCatch(lidR::readLASheader(file.path(tempFolder, fileList$Name[1])), error = function(e) {NA})
if (is.object(las)) {
crs <- lidR::st_crs(las)
crs <- crs$wkt
} else {
crs <- ""
}
}
}
if (!quiet) {
message(basename(baseURL), ": ", crs)
}
return(invisible(crs))
}
# function to download a shapefile, load it with st_read and
# get the crs. Return is a CRS in a proj4string
# ---------- FetchAndExtractCRSFromIndex
#
#' LidarIndexR -- Retrieve a shapefile (all files) and read CRS information
#'
#' Retrieves all files related to a shapefile (same name as \code{fileName} but
#' different extensions) and then reads CRS information from the projection
#' file.
#'
#' @param baseURL URL for a folder on a remote host. Trailing slash should *not*
#' be included.
#' @param folderName Folder name on the \code{baseURL} containing the shapefile.
#' @param fileName A string containing the name of a single file. All
#' files with the same name (but different extensions) will be retrieved. The
#' projection file is then read to provide CRS information.
#' @param tempFolder A valid folder name where the files can be
#' temporarily stored.
#' @param fetchnew Boolean. If TRUE, the files are always retrieved. If FALSE,
#' the files are only retrieved if they do not already exist in the \code{tempFolder}.
#' @param quiet Boolean to control display of status information. If TRUE,
#' information is *not* displayed. Otherwise, status information is displayed.
#' @param ... Arguments passed to \code{RCurl::getURL()}.
#' @return String (invisible) containing a valid input value for \code{st_crs()}.
#' @examples
#' \dontrun{
#' FetchAndExtractCRSFromIndex("ftp://rockyftp.cr.usgs.gov/vdelivery/Datasets/",
#' "Staged/Hydrography/NHD/HU4/HighResolution/Shape/",
#' "NHD_H_0101_HU4_Shape.shp",
#' ".")
#' }
#' @export
FetchAndExtractCRSFromIndex <- function (
baseURL,
folderName,
fileName,
tempFolder,
fetchnew = FALSE,
quiet = FALSE,
...
) {
crs <- ""
t <- paste0(baseURL, "/", folderName, "/")
# this returns a list of files with the same name but different extensions
filenames <- DirListByName(t, fileName)
# get all the files associated with the index shapefile
if (length(filenames)) {
for (filename in filenames) {
if (!file.exists(paste0(tempFolder, "/", filename)) || fetchnew) {
utils::download.file(paste0(t, filename), paste0(tempFolder, "/", filename), mode = "wb")
}
}
# load index file and extract CRS (if any)
index <- tryCatch(sf::st_read(file.path(tempFolder, fileName)), error = function(e) {NA})
if (is.object(index)) {
crs <- lidR::st_crs(index)
crs <- crs$wkt
# crs <- raster::projection(index)
} else {
crs <- ""
}
}
if (!quiet) {
message(basename(baseURL), ": ", crs)
}
return(invisible(crs))
}
# function to download .prj file associated with the .shp file passed in fileName
# Use the string read form the .prj file to get the crs. Return is a CRS in proj4string
# ---------- FetchAndExtractCRSFromPrj
#
#' LidarIndexR -- Retrieve a projection file and read CRS information
#'
#' Retrieves only the projection file related to a shapefile (same name as
#' \code{fileName} but with .prj extensions) and then reads CRS information
#' from the projection file.
#'
#' @param baseURL URL for a folder on a remote host. Trailing slash should *not*
#' be included.
#' @param folderName Folder name on the \code{baseURL} containing the shapefile.
#' @param fileName A string containing the name of a single file. The projection
#' file with the same name (but extension of .prj) will be retrieved. The
#' projection file is then read to provide CRS information.
#' @param tempFolder A valid folder name where the projection file can be
#' temporarily stored.
#' @param fetchnew Boolean. If TRUE, the file is always retrieved. If FALSE,
#' the file is only retrieved if it does not already exist in the \code{tempFolder}.
#' @param quiet Boolean to control display of status information. If TRUE,
#' information is *not* displayed. Otherwise, status information is displayed.
#' @param ... Arguments passed to \code{RCurl::getURL()}.
#' @return String (invisible) containing a valid input value for \code{st_crs()}.
#' @examples
#' \dontrun{
#' FetchAndExtractCRSFromPrj("ftp://rockyftp.cr.usgs.gov/vdelivery/Datasets/",
#' "Staged/Hydrography/NHD/HU4/HighResolution/Shape/",
#' "NHD_H_0101_HU4_Shape.prj",
#' ".")
#' }
#' @export
FetchAndExtractCRSFromPrj <- function (
baseURL,
folderName,
fileName,
tempFolder,
fetchnew = FALSE,
quiet = FALSE,
...
) {
crs <- ""
t <- paste0(baseURL, "/", folderName, "/")
# this returns a list of files with the same name but different extensions
filenames <- DirListByName(t, fileName)
# we just need the .prj file
filenames <- grep(paste0(".prj", "$"), filenames, ignore.case=TRUE, value=TRUE)
# get all the files associated with the index shapefile
if (length(filenames)) {
for (f in filenames) {
if (!file.exists(paste0(tempFolder, "/", f)) || fetchnew) {
utils::download.file(paste0(t, f), paste0(tempFolder, "/", f), mode = "wb")
}
}
f <- file(file.path(tempFolder, fileName), open = "rt")
# cat("Opening:", file.path(tempFolder, fileName), "\n")
if (isOpen(f)) {
l <- readLines(f, n = -1, warn = FALSE)
close(f)
# cat(l, "\n")
crs <- sp::CRS(l)@projargs
} else {
crs <- ""
}
}
if (!quiet) {
message(basename(baseURL), ": ", crs)
}
return(invisible(crs))
}
# ---------- BuildIndexFromPoints
#
#' LidarIndexR -- Create spatial index by reading LAS/LAZ file headers
#'
#' Longer description
#'
#' @param baseURL URL for a folder on a remote host. Trailing slash should *not*
#' be included.
#' @param folderName Folder name on the \code{baseURL} containing LAS/LAZ files.
#' @param outputFile Full path and filename on the local file system for the index
#' file.
#' @param projString A valid projection string that can be used with the \code{crs}
#' parameter in \code{st_sf}. \code{projString} should represent the projection
#' pf the point data. If using EPSG codes, do not enclose the EPSG number in quotes.
#' @param outputCRS A valid projection string that can be used with the \code{crs}
#' parameter in \code{st_transform} to reproject the index.
#' @param fileType Any valid string for the pattern parameter in \code{grep()}.
#' "$" will be appended to the string to search for file/folder names ending
#' with values in \code{fileType}.
#' @param dirFormat String indicating the expected directory format for the
#' \code{URL}. Valid values are "dir" if the URL returns the Date, Time,
#' Attribute/Size, and file name; "ls" if the URL returns directory
#' information similar to the UNIX \code{ls -al} command; or "" if you
#' want the function to try to determine the directory format.
#' @param dimensionThreshold Size threshold used to omit files from the index.
#' This is intended to help omit invalid LAS.LAZ files from the index. If
#' the height or width of the point tile exceeds the threshold, the tile
#' will be omitted.
#' @param appendInfo Data frame (single row) with values to append to each
#' feature in the return \code{sf} object. If NULL, no information is appended.
#' If \code{appendInfo} is \code{sf} object, the geometry will be removed
#' before appending to each feature.
#' @param rebuild Boolean. If TRUE, the index is always created. If FALSE,
#' the index is only created if it does not already exist.
#' @param quiet Boolean to control display of status information. If TRUE,
#' information is *not* displayed. Otherwise, status information is displayed.
#' @return Boolean indicating success (TRUE) or failure (FALSE).
#' @examples
#' \dontrun{
#' BuildIndexFromPoints()
#' }
#' @export
BuildIndexFromPoints <- function (
baseURL,
folderName,
outputFile,
projString = NULL,
outputCRS = NULL,
fileType = "\\.las|\\.laz",
dirFormat = "ls",
dimensionThreshold = 50000,
appendInfo = NULL,
rebuild = FALSE,
quiet = FALSE
) {
if (file.exists(outputFile) && !rebuild) {
if (!quiet) message("Index already exist...skipping: ", basename(outputFile))
return(TRUE);
}
folderURL <- paste0(baseURL, "/", folderName, "/")
# get list of .laz & .las files...full directory info
flist <- DirList(folderURL, "\\.las|\\.laz", dirFormat = dirFormat)
if (nrow(flist) > 0) {
if (!quiet) message("Building index: ", basename(outputFile))
# prepend folder path to file names
fileURLs <- paste0(folderURL, flist$Name)
# read headers
t <- lapply(fileURLs, ReadRemoteLASHeader) # returns a list of dataframes
# convert to a simple dataframe
t_df <- do.call("rbind", t)
# drop any rows with NA values...bad LAS file...only check min/max XYZ values
t_df <- t_df[stats::complete.cases(t_df[, 11:16]), ]
# drop rows where width or height is >dimensionThreshold units
t_df <- t_df[((t_df$MaxX - t_df$MinX) < dimensionThreshold & (t_df$MaxY - t_df$MinY) < dimensionThreshold), ]
# create sf set of tile polygons
if (nrow(t_df) > 0) {
lst <- lapply(1:nrow(t_df), function(x) {
# create a matrix of coordinates that also 'close' the polygon
res <- matrix(c(t_df[x, 'MinX'], t_df[x, 'MinY'],
t_df[x, 'MinX'], t_df[x, 'MaxY'],
t_df[x, 'MaxX'], t_df[x, 'MaxY'],
t_df[x, 'MaxX'], t_df[x, 'MinY'],
t_df[x, 'MinX'], t_df[x, 'MinY']) ## need to close the polygon
, ncol =2, byrow = T
)
# create polygon objects
sf::st_polygon(list(res))
}
)
tiles_sf <- sf::st_sf(t_df, sf::st_sfc(lst), crs = projString)
if (!is.null(projString)) {
tiles_sf <- sf::st_set_crs(tiles_sf, projString)
}
# reproject to outputCRS
if (!is.null(projString) && !is.null(outputCRS)) {
tiles_sf <- sf::st_transform(tiles_sf, crs = outputCRS)
}
if (!is.null(appendInfo)) {
# see if appendInfo is sf object...if so drop the geometry
if (inherits(appendInfo, "sf"))
appendInfo$geometry <- NULL
# duplicate rows
info <- do.call("rbind", replicate(nrow(tiles_sf), appendInfo, simplify = FALSE))
# cbind to tile data frame
tiles_sf <- cbind(tiles_sf, info)
}
# write output
sf::st_write(tiles_sf, outputFile, delete_dsn = TRUE, quiet = TRUE)
return(TRUE)
} else {
if (!quiet) message(" ***No LAS polygons")
}
} else {
if (!quiet) message(" ***No LAS files")
}
return(FALSE)
}
# ---------- BuildIndexFromUSGSProjectIndexItem
#
#' LidarIndexR -- Create spatial index by reading LAS/LAZ file headers
#' associated with a USGS project index item.
#'
#' Longer description
#'
#' @param projectItem Data frame of sf object with information for a single USGS
#' lidar project as found in the USGS FESM index.
#' @param folderName Folder name on the \code{baseURL} containing LAS/LAZ files.
#' @param outputFile Full path and file name on the local file system for the index
#' file.
#' @param projString A valid projection string that can be used with the \code{crs}
#' parameter in \code{st_sf}. \code{projString} should represent the projection
#' of the point data. If using EPSG codes, do not enclose the EPSG number in quotes.
#' @param outputCRS A valid projection string that can be used with the \code{crs}
#' parameter in \code{st_transform} to reproject the index.
#' @param fileType Any valid string for the pattern parameter in \code{grep()}.
#' "$" will be appended to the string to search for file/folder names ending
#' with values in \code{fileType}.
#' @param dirFormat String indicating the expected directory format for the
#' \code{URL}. Valid values are "dir" if the URL returns the Date, Time,
#' Attribute/Size, and file name; "ls" if the URL returns directory
#' information similar to the UNIX \code{ls -al} command; or "" if you
#' want the function to try to determine the directory format.
#' @param appendCols List of column numbers or column names for information from the
#' \code{projectItem} to append to individual tile records in the index. If
#' column names conflict with existing column names, they will be adjusted
#' to remove the conflict. if NULL, no column information is appended.
#' @param dimensionThreshold Size threshold used to omit files from the index.
#' This is intended to help omit invalid LAS.LAZ files from the index. If
#' the height or width of the point tile exceeds the threshold, the tile
#' will ne omitted.
#' @param rebuild Boolean. If TRUE, the index is always created. If FALSE,
#' the index is only created if it does not already exist.
#' @param quiet Boolean to control display of status information. If TRUE,
#' information is *not* displayed. Otherwise, status information is displayed.
#' @return Boolean indicating success (TRUE) or failure (FALSE).
#' @examples
#' \dontrun{
#' BuildIndexFromUSGSProjectIndexItem()
#' }
#' @export
BuildIndexFromUSGSProjectIndexItem <- function (
projectItem,
folderName,
outputFile,
projString = NULL,
outputCRS = NULL,
fileType = "\\.las|\\.laz",
dirFormat = "ls",
appendCols = c("workunit_id",
"ql",
"collect_start",
"collect_end",
"p_method",
"horiz_crs",
"vert_crs"),
dimensionThreshold = 50000,
rebuild = FALSE,
quiet = FALSE
) {
if (file.exists(outputFile) && !rebuild) {
if(!quiet) message("Index already exists...skipping: ", basename(outputFile))
return(TRUE);
}
URL <- projectItem$lpc_link
if (!endsWith(URL, "/"))
URL <- paste0(URL, "/")
folderURL <- paste0(URL, folderName, "/")
# cat("Folder:", folderURL, "\n")
# get list of .laz & .las files...full directory info
flist <- DirList(folderURL, "\\.las|\\.laz", dirFormat = dirFormat)
if (nrow(flist) > 0) {
if (!quiet) message("Building index: ", basename(outputFile))
# prepend folder path to file names
fileURLs <- paste0(folderURL, flist$Name)
# read headers
t <- lapply(fileURLs, ReadRemoteLASHeader) # returns a list of dataframes
# convert to a simple dataframe
t_df <- do.call("rbind", t)
# drop any rows with NA values...bad LAS file...only check min/max XYZ values
t_df <- t_df[stats::complete.cases(t_df[, 11:16]), ]
# drop rows where width or height is >dimensionThreshold units
t_df <- t_df[((t_df$MaxX - t_df$MinX) < dimensionThreshold & (t_df$MaxY - t_df$MinY) < dimensionThreshold), ]
# create sf set of tile polygons
if (nrow(t_df) > 0) {
lst <- lapply(1:nrow(t_df), function(x) {
# create a matrix of coordinates that also 'close' the polygon
res <- matrix(c(t_df[x, 'MinX'], t_df[x, 'MinY'],
t_df[x, 'MinX'], t_df[x, 'MaxY'],
t_df[x, 'MaxX'], t_df[x, 'MaxY'],
t_df[x, 'MaxX'], t_df[x, 'MinY'],
t_df[x, 'MinX'], t_df[x, 'MinY']) ## need to close the polygon
, ncol =2, byrow = T
)
# create polygon objects
sf::st_polygon(list(res))
}
)
if (is.null(projString)) {
prs <- strtoi(projectItem$horiz_crs)
} else {
prs <- projString
}
tiles_sf <- sf::st_sf(t_df, sf::st_sfc(lst), crs = prs)
tiles_sf <- sf::st_set_crs(tiles_sf, prs)
# reproject to outputCRS
if (!is.na(lidR::st_crs(tiles_sf)) && !is.null(outputCRS)) {
# if (!is.na(raster::projection(tiles_sf)) && !is.null(outputCRS)) {
tiles_sf <- sf::st_transform(tiles_sf, crs = outputCRS)
}
# see if we have a list of columns to add to the tiles
if (!is.null(appendCols)) {
# build data frame with requested columns repeated over rows
info <- data.frame(projectItem[, appendCols])
# drop geometry
info$geometry <- NULL
# duplicate rows
info <- do.call("rbind", replicate(nrow(tiles_sf), info, simplify = FALSE))
# cbind to tile data frame
tiles_sf <- cbind(tiles_sf, info)
}
# else {
# # add fields from the project information
# tiles_sf$workunit_id <- projectItem$workunit_id
# tiles_sf$ql <- projectItem$ql
# tiles_sf$collect_start <- projectItem$collect_start
# tiles_sf$collect_end <- projectItem$collect_end
# tiles_sf$p_method <- projectItem$p_method
# tiles_sf$horiz_crs <- projectItem$horiz_crs
# tiles_sf$vert_crs <- projectItem$vert_crs
#
# # rearrange
# tiles_sf <- tiles_sf[, c(1:2, 17:23, 3:16, 24)]
# }
# write output
sf::st_write(tiles_sf, outputFile, delete_dsn = TRUE, quiet = TRUE)
return(TRUE)
} else {
if (!quiet) message(" ***No LAS polygons")
}
} else {
if (!quiet) message(" ***No LAS files")
}
return(FALSE)
}
# ---------- BuildProjectPolygonFromIndex
#
#' LidarIndexR -- Create polygon(s) representing the area covered by a tile index
#'
#' Creates a bounding polygon(s) given a set of point tile polygons. This is useful to
#' create project polygons to display the areas covered by a lidar project.
#'
#' @param indexFile Full path and filename on the local file system for the tile index
#' file.
#' @param projectIdentifier Character string containing an identifier for the project
#' area. This will be added to each feature in the return \code{sf} object.
#' @param nx Integer specifying the number of cells (width) of the raster used
#' to merge polygons.
#' @param ny Integer specifying the number of cells (height) of the raster used
#' to merge polygons.
#' @param layer Layer name in \code{indexFile} to use for the polygon creation.
#' @param outputCRS A valid projection string that can be used with the \code{crs}
#' parameter in \code{st_transform} to reproject the index. If using EPSG codes,
#' do not enclose the EPSG number in quotes.
#' @param appendInfo Data frame (single row) with values to append to each feature
#' in the return \code{sf} object. If NULL, no information is appended.
#' If \code{appendInfo} is \code{sf} object, the geometry will be removed
#' before appending to each feature.
#' @param outputFile Full path and file name on the local file system for the index
#' file.
#' @param rebuild Boolean. If TRUE, the index is always created. If FALSE,
#' the index is only created if it does not already exist.
#' @param quiet Boolean to control display of status information. If TRUE,
#' information is *not* displayed. Otherwise, status information is displayed.
#' @return An \code{sf} object containing the polygon(s) for the project area.
#' @examples
#' \dontrun{
#' BuildProjectPolygonFromIndex()
#' }
#' @export
BuildProjectPolygonFromIndex <- function (
indexFile,
projectIdentifier,
nx = 512,
ny = 512,
layer = "",
outputCRS = NULL,
appendInfo = NULL,
outputFile = NULL,
rebuild = FALSE,
quiet = TRUE
) {
if (!rebuild && !is.null(outputFile)) {
# see if output file exists
if (file.exists(outputFile)) {
if (!quiet) message("Project polygon file already exists...skipping: ", basename(outputFile))
return(TRUE);
}
}
if (layer == "")
t_sf <- sf::st_read(indexFile, quiet = TRUE)
else
t_sf <- sf::st_read(indexFile, layer = layer, quiet = TRUE)
if (is.object(t_sf)) {
t_sf$PID <- 1
# rasterize
r_poly <- stars::st_rasterize(t_sf[, "PID"], nx = nx, ny = ny)
# convert back to polygons...merge tiles
t_rp <- sf::st_as_sf(r_poly, as_points = FALSE, merge = TRUE)
sf::st_make_valid(t_rp)
if (!is.null(outputCRS)) {
t_rp <- sf::st_transform(t_rp, crs = outputCRS)
}
t_rp$PID <- projectIdentifier
if (!is.null(appendInfo)) {
# see if appendInfo is sf object...if so drop the geometry
if (inherits(appendInfo, "sf"))
appendInfo$geometry <- NULL
# duplicate rows
info <- do.call("rbind", replicate(nrow(t_rp), appendInfo, simplify = FALSE))
# cbind to tile data frame
t_rp <- cbind(t_rp, info)
}
if (!quiet) message("Done with: ", indexFile)
if (!is.null(outputFile)) {
sf::st_write(t_rp, outputFile, projectIdentifier, delete_dsn = TRUE, quiet = TRUE)
}
return(t_rp)
} else {
if (!quiet) message("Problems reading index file: ", indexFile)
}
return(NULL)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.