library(rvest) library(xml2) library(dplyr) library(readr) library(purrr)
Go to https://pro.arcgis.com/en/pro-app/latest/tool-reference/main/arcgis-pro-tool-reference.htm
and copy the inner html of accordion-content
. I cant scrape this because there seems to bo JS involved.
toolbox_html <- ' <a class="side-nav-link icon-ui-right" data-collapsed="true" href="/en/pro-app/2.7/tool-reference/3d-analyst">3D Analyst toolbox</a> <a class="side-nav-link icon-ui-right" data-collapsed="true" href="/en/pro-app/2.7/tool-reference/analysis">Analysis toolbox</a> <a class="side-nav-link icon-ui-right" data-collapsed="true" href="/en/pro-app/2.7/tool-reference/aviation">Aviation toolbox</a> <a class="side-nav-link icon-ui-right" data-collapsed="true" href="/en/pro-app/2.7/tool-reference/business-analyst">Business Analyst toolbox</a> <a class="side-nav-link icon-ui-right" data-collapsed="true" href="/en/pro-app/2.7/tool-reference/cartography">Cartography toolbox</a> <a class="side-nav-link icon-ui-right" data-collapsed="true" href="/en/pro-app/2.7/tool-reference/conversion">Conversion toolbox</a> <a class="side-nav-link icon-ui-right" data-collapsed="true" href="/en/pro-app/2.7/tool-reference/crime-analysis">Crime Analysis and Safety toolbox</a> <a class="side-nav-link icon-ui-right" data-collapsed="true" href="/en/pro-app/2.7/tool-reference/data-interoperability">Data Interoperability toolbox</a> <a class="side-nav-link icon-ui-right" data-collapsed="true" href="/en/pro-app/2.7/tool-reference/data-management">Data Management toolbox</a> <a class="side-nav-link icon-ui-right" data-collapsed="true" href="/en/pro-app/2.7/tool-reference/data-reviewer">Data Reviewer toolbox</a> <a class="side-nav-link icon-ui-right" data-collapsed="true" href="/en/pro-app/2.7/tool-reference/defense">Defense toolbox</a> <a class="side-nav-link icon-ui-right" data-collapsed="true" href="/en/pro-app/2.7/tool-reference/editing">Editing toolbox</a> <a class="side-nav-link icon-ui-right" data-collapsed="true" href="/en/pro-app/2.7/tool-reference/feature-analysis">Feature Analysis toolbox</a> <a class="side-nav-link icon-ui-right" data-collapsed="true" href="/en/pro-app/2.7/tool-reference/geoanalytics-desktop">GeoAnalytics Desktop toolbox</a> <a class="side-nav-link icon-ui-right" data-collapsed="true" href="/en/pro-app/2.7/tool-reference/big-data-analytics">GeoAnalytics Server toolbox</a> <a class="side-nav-link icon-ui-right" data-collapsed="true" href="/en/pro-app/2.7/tool-reference/geocoding">Geocoding toolbox</a> <a class="side-nav-link icon-ui-right" data-collapsed="true" href="/en/pro-app/2.7/tool-reference/geostatistical-analyst">Geostatistical Analyst toolbox</a> <a class="side-nav-link icon-ui-right" data-collapsed="true" href="/en/pro-app/2.7/tool-reference/image-analyst">Image Analyst toolbox</a> <a class="side-nav-link icon-ui-right" data-collapsed="true" href="/en/pro-app/2.7/tool-reference/indoors">Indoors toolbox</a> <a class="side-nav-link icon-ui-right" data-collapsed="true" href="/en/pro-app/2.7/tool-reference/intelligence">Intelligence toolbox</a> <a class="side-nav-link icon-ui-right" data-collapsed="true" href="/en/pro-app/2.7/tool-reference/linear-referencing">Linear Referencing toolbox</a> <a class="side-nav-link icon-ui-right" data-collapsed="true" href="/en/pro-app/2.7/tool-reference/location-referencing">Location Referencing toolbox</a> <a class="side-nav-link icon-ui-right" data-collapsed="true" href="/en/pro-app/2.7/tool-reference/maritime">Maritime toolbox</a> <a class="side-nav-link icon-ui-right" data-collapsed="true" href="/en/pro-app/2.7/tool-reference/multidimension">Multidimension toolbox</a> <a class="side-nav-link icon-ui-right" data-collapsed="true" href="/en/pro-app/2.7/tool-reference/network-analyst">Network Analyst toolbox</a> <a class="side-nav-link icon-ui-right" data-collapsed="true" href="/en/pro-app/2.7/tool-reference/network-diagram">Network Diagram toolbox</a> <a class="side-nav-link icon-ui-right" data-collapsed="true" href="/en/pro-app/2.7/tool-reference/parcel">Parcels toolbox</a> <a class="side-nav-link icon-ui-right" data-collapsed="true" href="/en/pro-app/2.7/tool-reference/raster-analysis">Raster Analysis toolbox</a> <a class="side-nav-link icon-ui-right" data-collapsed="true" href="/en/pro-app/2.7/tool-reference/ready-to-use">Ready To Use toolbox</a> <a class="side-nav-link icon-ui-right" data-collapsed="true" href="/en/pro-app/2.7/tool-reference/server">Server toolbox</a> <a class="side-nav-link icon-ui-right" data-collapsed="true" href="/en/pro-app/2.7/tool-reference/space-time-pattern-mining">Space Time Pattern Mining toolbox</a> <a class="side-nav-link icon-ui-right" data-collapsed="true" href="/en/pro-app/2.7/tool-reference/spatial-analyst">Spatial Analyst toolbox</a> <a class="side-nav-link icon-ui-right" data-collapsed="true" href="/en/pro-app/2.7/tool-reference/spatial-statistics">Spatial Statistics toolbox</a> <a class="side-nav-link icon-ui-right" data-collapsed="true" href="/en/pro-app/2.7/tool-reference/territory-design">Territory Design toolbox</a> <a class="side-nav-link icon-ui-right" data-collapsed="true" href="/en/pro-app/2.7/tool-reference/topographic-production">Topographic Production toolbox</a> <a class="side-nav-link icon-ui-right" data-collapsed="true" href="/en/pro-app/2.7/tool-reference/trace-network">Trace Network toolbox</a> <a class="side-nav-link icon-ui-right" data-collapsed="true" href="/en/pro-app/2.7/tool-reference/utility-networks">Utility Network toolbox</a> <a class="side-nav-link icon-ui-right" data-collapsed="true" href="/en/pro-app/2.7/tool-reference/workflow-manager">Workflow Manager toolbox</a> '
Create a dataframe with Toolbox name and URL
toolboxes <- read_html(toolbox_html) %>% html_nodes("a") %>% map_dfr(function(x){ tibble(toolbox = html_text(x), url = html_attr(x, "href")) }) toolboxes
Adjust the URL
toolboxes <- toolboxes %>% mutate( last = map_chr(str_split(url,"/"), ~.x[length(.x)]), # There are some cases where the pattern is a bit different last = case_when( last == "parcel"~"parcels", last == "utility-networks"~"utility-network", last == "crime-analysis"~"crime-analysis-and-safety", last == "defense"~"defense-tools", last == "feature-analysis"~"standard-feature-analysis", TRUE~last ), url_adjusted = case_when( # the pattern is still different for one toolbox last == "workflow-manager"~paste0(url,"/overview-of-wmx-tools.htm"), TRUE~paste0(url,"/an-overview-of-the-",last,"-toolbox.htm") ), url_adjusted = str_replace(url_adjusted, "/2.7/","/latest/"), url_adjusted = paste0("https://pro.arcgis.com",url_adjusted) ) %>% select(-last) toolboxes
Get Toolbox description (Only the first
objects is scraped from the website, which might also just hold licensing information).
get_toolboxdesc <- function(url){ url %>% xml2::read_html() %>% html_node("p") %>% html_text() } toolbox_desc <- toolboxes %>% pmap_dfr(function(toolbox, url, url_adjusted){ Sys.sleep(0.5) desc <- tryCatch( get_toolboxdesc(url_adjusted), error = function(e){ print(paste("Toolbox",toolbox,"is causing an error")) NA } ) tibble(toolbox = toolbox, description = desc) }) toolboxes <- left_join(toolboxes, toolbox_desc, by = "toolbox") %>% mutate(description = str_trim(description)) write_csv(toolboxes, "auxiliary/toolboxes.csv")
# This function takes the URL of a toolbox and returns a table of all toolsets from this toolbox grab_toolsets <- function(url){ tab <- url %>% xml2::read_html() %>% html_node(".tablexyz") tab2 <- html_table(tab) %>% rename("toolset" = 1) tab3 <- tab %>% html_nodes("a") %>% map_dfr(~tibble(url = html_attr(.x, "href"),text = html_text(.x))) left_join(tab2, tab3, by = c("Toolset" = "text")) }
toolsets <- toolboxes %>% select(toolbox, url_adjusted) %>% pmap_dfr(function(toolbox, url_adjusted){ Sys.sleep(0.5) mytable <- tryCatch( grab_toolsets(url_adjusted), error = function(e){ print(paste("toolbox",toolbox,"is causing an error")) tibble(.rows = 1) } ) mytable <- mytable %>% mutate(toolbox = toolbox) }) toolset_url_adjuster <- function(url){ paste0("https://pro.arcgis.com",str_replace(url,"/2.7/","/latest/")) } toolsets <- toolsets %>% mutate( url_adjusted = toolset_url_adjuster(url) ) %>% mutate( url_adjusted = case_when( toolbox == "Defense toolbox" & Toolset == "Gridded Reference Graphic"~"https://pro.arcgis.com/en/pro-app/latest/tool-reference/defense/an-overview-of-the-gridded-reference-graphic-toolset.htm", toolbox == "GeoAnalytics Desktop toolbox" & Toolset == "Analyze Patterns"~"https://pro.arcgis.com/en/pro-app/latest/tool-reference/geoanalytics-desktop/an-overview-of-the-analyze-patterns-toolset.htm", TRUE~url_adjusted ) ) # Any empty URLS? toolsets %>% filter(is.na(url))
# did every toolbox get a row (as expected)? length(unique(toolsets$toolbox)) == nrow(toolboxes) # if so, is any toolset NA? I hope not any(is.na(toolsets$Toolset)) toolsets <- rename(toolsets, description = Description) write_csv(toolsets, "auxiliary/toolsets.csv")
toolsets <- read_csv("auxiliary/toolsets.csv") # This function grabs all tools from a given toolset URL grab_tools <- function(input_url, input_toolbox, input_toolset){ url_html <- input_url %>% read_html() %>% html_nodes(".tablexyz") my_tables <- html_table(url_html) a_nodes <- url_html %>% html_nodes("a") url_table <- tibble(tool = html_text(a_nodes),href = html_attr(a_nodes, "href")) df <- map_dfr(my_tables, function(my_table){ mycolnames <- colnames(my_table) if(mycolnames[1] %in% c("Tool","Tool name","Geoprocessing tools","tool")){ colnames(my_table)[1] <- "tool" colnames(my_table)[2] <- "description" left_join(my_table,url_table, by = "tool") } else if(mycolnames[1] %in% c("Toolsets","Toolset")){ colnames(my_table)[1] <- "sub_toolset" colnames(my_table)[2] <- "toolset_description" left_join(my_table, url_table, by = c("toolset" = "tool")) } else{ tibble(.rows = 1) } }) df <- df %>% mutate( toolset = input_toolset, toolbox = input_toolbox ) return(df) } toolsets <- mutate(toolsets, row = row_number()) maxi <- max(toolsets$row) tools <- toolsets %>% select(toolbox, toolset,url_adjusted, row) %>% pmap(function(toolbox, toolset, url_adjusted, row){ # Sys.sleep(0.5) print(paste(row,toolbox,toolset,round(row/maxi,2)," (",url_adjusted,")",sep = " ")) tryCatch(grab_tools(url_adjusted,toolbox, toolset), error = function(e){ print(paste("ERROR")) }) }) success <- map_lgl(tools, ~ifelse(is.data.frame(.x),nrow(.x)>0,FALSE)) # These 14 Special case toolboxes were not successfull toolsets %>% filter(!success) %>% group_by(toolbox) %>% summarise() tools_df <- map_dfr(tools[success], ~.x) tools_df %>% filter_all(any_vars(is.na(.))) %>% View() write_csv(tools, "auxiliary/tools.csv")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.