#' Run rosstat shiny application for interactive mapping
#'
#' @return
#' @export
#'
#' @examples
#' rosstat::run_app()
run_app <- function() {
app_dir <- system.file("shiny", "rosstat", package = "rosstat")
if (app_dir == "") {
stop("Could not find application directory. Try re-installing `rosstat`.", call. = FALSE)
}
shiny::runApp(app_dir, display.mode = "normal")
}
#' Read Rosstat table from docx file
#'
#' @param first first file (required)
#' @param second second file (optional)
#' @param max_dist maximum misspellings for fuzzy join
#'
#' @return tidy data.frame
#' @export
#'
#' @examples
#' \dontrun{
#' read_rosstat('12-03.docx')
#' }
read_rosstat <- function(first, second, max_dist = 2){
if(missing(second)) {
tbls = docxtractr::read_docx(first) %>%
docxtractr::docx_extract_all_tbls()
df = tbls[[1]]
# Count the number of rows in table header -- N
tab = tbls[[1]]
n = ncol(tab)
N = 1
for (v in 1:nrow(tab)) {
if (tab[v,1] != "") {
N = v-1
break
}
}
# Attach remaining tables from the bottom
k = 2
while (k <= length(tbls)){
tab_new = tbls[[k]][-(1:N), ]
tab = tab %>% dplyr::bind_rows(tab_new)
k = k + 1
}
tab = setNames(tab, c('reg', 2:n))
# Convert rosstat table to tidy format ------------------------------------
resource = vector(mode = "integer", length = N) # empty cells that should be filled
d = vector(mode = "integer", length = N) # empty cells that should NOT be filled
g = vector(mode = "integer", length = N-1) # a difference — is used to define header row type
nvals = vector(mode = "integer", length = N)
# Calculate number of empty cells (excluding NA) and to-be-filled cells
for (r in 1:N) {
row = tab %>%
slice(r) %>%
unlist(use.names=FALSE)
d[r] = length(row[row == ""]) - length(row[is.na(row)])
nvals[r] = n - length(row[row == ""])
resource[r] = (n - d[r]) / nvals[r]
}
# Calculate header row type
for (r in 1:N-1) {
g[r] = d[r+1]-d[r]
}
# For every row beginning from one before last, for every column
for (i in (N-1):1) {
m = 1
for (j in 1:(n-1)) {
# If empty cell then do nothing
if ((tab[i, j] == "") | (tab[i+1, j] == "") | (tab[i+1, j+1] == "")) next
# If g > 0 (e.g. in the next row there are more empty non-filled cells then in current)
# then duplicate the value until filled cell is reached
if (g[i]>0) {
for (k in n:(j+1)) {
tab[i,k] = tab[i,k-1]
}
} else { # if g <= 0, then spread remaining values
if (m < resource[i]){
for (k in n:(j+1)) {
tab[i,k] = tab[i,k-1]
}
m = m+1
} else {
m = 1
}
}
}
}
# join ISO identifiers and filter only regions
tab = tab %>%
fuzzyjoin::stringdist_left_join(rosstat_regions[, c('name_local', 'iso'), drop = TRUE],
by = c('reg' = 'name_local'), max_dist = max_dist) %>%
dplyr::select(-name_local)
# reconstruct a long form of a table
values = tab %>%
dplyr::slice(N+1:n()) %>%
magrittr::set_names(c('reg', 2:n, 'iso')) %>%
tidyr::gather(var, value, 2:n) %>%
dplyr::mutate(var = as.numeric(var),
value = as.numeric(value))
classifier = tab %>%
select(2:n) %>%
slice(1:N) %>%
rownames_to_column() %>%
gather(var, value, -rowname) %>%
mutate(var = as.integer(var)) %>%
spread(rowname, value) %>%
mutate(pathString =
dplyr::select(., -var) %>%
apply(1, function(X){
paste(X, collapse = '/')
})
)
return(list(values = values, vars = classifier))
}
}
#' Transform
#'
#' @param tree data.tree object
#' @param number number of the variable
#' @param path full path to a variable
#'
#' @return sf (simple features) object
#' @export
#'
#' @examples
to_sf = function(df, wide = TRUE){
df_wide = df
if(wide)
df_wide = df %>%
dplyr::group_by(var) %>%
dplyr::mutate(id = dplyr::row_number()) %>%
tidyr::spread(var, value, sep = '')
rosstat::rosstat_regions %>%
dplyr::full_join(df_wide, by = 'iso')
}
#' Generate list of variables for plotting
#'
#' @param input tidy data frame resulting from read_rosstat function
#'
#' @return list of variables
#' @export
list_rosstat = function(input){
L = length(input)
level = vector(mode = 'list')
for (i in 1:L){
nms = names(input)
if (is.list(input[[i]])){
K = length(input[[i]])
for (j in 1:K){
if(is.list(input[[i]][[j]])){
level[[i]] = list_rosstat(input[[i]])
names(level)[i] = nms[i]
} else {
level[[i]] = nms[i]
names(level)[i] = nms[i]
break
}
}
} else {
level[[i]] = nms[i]
}
}
return(level)
}
get_selected = function(tree, format=c("names", "slices")){
format = match.arg(format, c("names", "slices"), FALSE)
switch(format,
"names"=get_selected_names(tree),
"slices"=get_selected_slices(tree))
}
get_selected_names = function(tree, ancestry=NULL, vec=list()){
if (is.list(tree)){
for (i in 1:length(tree)){
anc = c(ancestry, names(tree)[i])
vec = get_selected_names(tree[[i]], anc, vec)
}
}
a = attr(tree, "stselected", TRUE)
if (!is.null(a) && a == TRUE){
# Get the element name
el = tail(ancestry,n=1)
vec[length(vec)+1] = el
attr(vec[[length(vec)]], "ancestry") = head(ancestry, n=length(ancestry)-1)
}
return(vec)
}
get_selected_slices = function(tree, ancestry=NULL, vec=list()){
if (is.list(tree)){
for (i in 1:length(tree)){
anc = c(ancestry, names(tree)[i])
vec = get_selected_slices(tree[[i]], anc, vec)
}
}
a = attr(tree, "stselected", TRUE)
if (!is.null(a) && a == TRUE){
# Get the element name
ancList = 0
for (i in length(ancestry):1){
nl = list()
nl[ancestry[i]] = list(ancList)
ancList = nl
}
vec[length(vec)+1] = list(ancList)
}
return(vec)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.