R/functions.R

Defines functions run_app read_rosstat to_sf list_rosstat get_selected get_selected_names get_selected_slices

Documented in list_rosstat read_rosstat run_app to_sf

#' Run rosstat shiny application for interactive mapping
#'
#' @return
#' @export
#'
#' @examples
#' rosstat::run_app()
run_app <- function() {
  app_dir <- system.file("shiny", "rosstat", package = "rosstat")
  if (app_dir == "") {
    stop("Could not find application directory. Try re-installing `rosstat`.", call. = FALSE)
  }

  shiny::runApp(app_dir, display.mode = "normal")
}

#' Read Rosstat table from docx file
#'
#' @param first first file (required)
#' @param second second file (optional)
#' @param max_dist maximum misspellings for fuzzy join
#'
#' @return tidy data.frame
#' @export
#'
#' @examples
#' \dontrun{
#' read_rosstat('12-03.docx')
#' }
read_rosstat <- function(first, second, max_dist = 2){

  if(missing(second)) {

    tbls = docxtractr::read_docx(first) %>%
      docxtractr::docx_extract_all_tbls()

    df = tbls[[1]]

    # Count the number of rows in table header -- N

    tab = tbls[[1]]
    n = ncol(tab)


    N = 1
    for (v in 1:nrow(tab)) {
      if (tab[v,1] != "") {
        N = v-1
        break
      }
    }

    # Attach remaining tables from the bottom
    k = 2
    while (k <= length(tbls)){
      tab_new = tbls[[k]][-(1:N), ]
      tab = tab %>% dplyr::bind_rows(tab_new)
      k = k + 1
    }

    tab = setNames(tab, c('reg', 2:n))

    # Convert rosstat table to tidy format ------------------------------------

    resource = vector(mode = "integer", length = N) # empty cells that should be filled
    d = vector(mode = "integer", length = N) # empty cells that should NOT be filled
    g = vector(mode = "integer", length = N-1) # a difference — is used to define header row type
    nvals = vector(mode = "integer", length = N)

    # Calculate number of empty cells (excluding NA) and to-be-filled cells

    for (r in 1:N) {
      row = tab %>%
        slice(r) %>%
        unlist(use.names=FALSE)
      d[r] = length(row[row == ""]) - length(row[is.na(row)])
      nvals[r] = n - length(row[row == ""])
      resource[r] = (n - d[r]) / nvals[r]
    }

    # Calculate header row type

    for (r in 1:N-1) {
      g[r] = d[r+1]-d[r]
    }

    # For every row beginning from one before last, for every column
    for (i in (N-1):1) {
      m = 1
      for (j in 1:(n-1)) {

        # If empty cell then do nothing

        if ((tab[i, j] == "") | (tab[i+1, j] == "") | (tab[i+1, j+1] == "")) next

        # If g > 0 (e.g. in the next row there are more empty non-filled cells then in current)
        # then duplicate the value until filled cell is reached

        if (g[i]>0) {
          for (k in n:(j+1)) {
            tab[i,k] = tab[i,k-1]
          }
        } else {         # if g <= 0, then spread remaining values
          if (m < resource[i]){
            for (k in n:(j+1)) {
              tab[i,k] = tab[i,k-1]
            }
            m = m+1

          } else {
            m = 1
          }
        }
      }
    }

    # join ISO identifiers and filter only regions
    tab = tab %>%
      fuzzyjoin::stringdist_left_join(rosstat_regions[, c('name_local', 'iso'), drop = TRUE],
                                      by = c('reg' = 'name_local'), max_dist = max_dist) %>%
      dplyr::select(-name_local)

    # reconstruct a long form of a table

    values = tab %>%
      dplyr::slice(N+1:n()) %>%
      magrittr::set_names(c('reg', 2:n, 'iso')) %>%
      tidyr::gather(var, value, 2:n) %>%
      dplyr::mutate(var = as.numeric(var),
                    value = as.numeric(value))

    classifier = tab %>%
      select(2:n) %>%
      slice(1:N) %>%
      rownames_to_column() %>%
      gather(var, value, -rowname) %>%
      mutate(var = as.integer(var)) %>%
      spread(rowname, value) %>%
      mutate(pathString =
               dplyr::select(., -var) %>%
               apply(1, function(X){
                 paste(X, collapse = '/')
               })
      )

    return(list(values = values, vars = classifier))
  }
}

#' Transform
#'
#' @param tree data.tree object
#' @param number number of the variable
#' @param path full path to a variable
#'
#' @return sf (simple features) object
#' @export
#'
#' @examples
to_sf = function(df, wide = TRUE){
  df_wide = df

  if(wide)
    df_wide = df %>%
      dplyr::group_by(var) %>%
      dplyr::mutate(id = dplyr::row_number()) %>%
      tidyr::spread(var, value, sep = '')

  rosstat::rosstat_regions %>%
    dplyr::full_join(df_wide, by = 'iso')
}

#' Generate list of variables for plotting
#'
#' @param input tidy data frame resulting from read_rosstat function
#'
#' @return list of variables
#' @export
list_rosstat = function(input){

  L = length(input)

  level = vector(mode = 'list')

  for (i in 1:L){
    nms = names(input)

    if (is.list(input[[i]])){

      K = length(input[[i]])

      for (j in 1:K){
        if(is.list(input[[i]][[j]])){
          level[[i]] = list_rosstat(input[[i]])
          names(level)[i] = nms[i]
        } else {
          level[[i]] = nms[i]
          names(level)[i] = nms[i]
          break
        }
      }

    } else {
      level[[i]] = nms[i]
    }
  }

  return(level)

}

get_selected = function(tree, format=c("names", "slices")){
  format = match.arg(format, c("names", "slices"), FALSE)
  switch(format,
         "names"=get_selected_names(tree),
         "slices"=get_selected_slices(tree))
}

get_selected_names = function(tree, ancestry=NULL, vec=list()){
  if (is.list(tree)){
    for (i in 1:length(tree)){
      anc = c(ancestry, names(tree)[i])
      vec = get_selected_names(tree[[i]], anc, vec)
    }
  }
  a = attr(tree, "stselected", TRUE)
  if (!is.null(a) && a == TRUE){
    # Get the element name
    el = tail(ancestry,n=1)
    vec[length(vec)+1] = el
    attr(vec[[length(vec)]], "ancestry") = head(ancestry, n=length(ancestry)-1)
  }
  return(vec)
}

get_selected_slices = function(tree, ancestry=NULL, vec=list()){
  if (is.list(tree)){
    for (i in 1:length(tree)){
      anc = c(ancestry, names(tree)[i])
      vec = get_selected_slices(tree[[i]], anc, vec)
    }
  }
  a = attr(tree, "stselected", TRUE)
  if (!is.null(a) && a == TRUE){
    # Get the element name
    ancList = 0
    for (i in length(ancestry):1){
      nl = list()
      nl[ancestry[i]] = list(ancList)
      ancList = nl
    }
    vec[length(vec)+1] = list(ancList)
  }
  return(vec)
}
tsamsonov/rosstat documentation built on May 28, 2019, 4:32 a.m.