#' @title Export ggplot with city time series to pdf file
#' @description The function helps to create pdf with ggplot for visual checking the quality of city data
#' @param dataset Dataframe with mandatory fields : year (integer), location (integer), indicator (c-code in character), industry (character) or okved (character)
#' @param folder Path in string for saving pdf-document
#' @param filename filename, like "all2",
#' @param begin First year for charts plot
#' @param end Last year for charts plot
#' @param lag The numbers of intervals which splite time period between "begin" & "end" years to render on x-axix
#' @param oktmo_style Boolean, TRUE (by default) if title should include oktmo instead of city name. FALSE if city name on title preferable
#' @param scn_vec Vector with column's names needed to be displayed on chart. e.g. c ("fact", "dyn_calc')
#' @param kpi_vec Vector of C-codes in character like c("C019", "C475") or "All" in cases ploting all indicators on 1 per plot
#' @param ind_vec Vector with the code of industries & okved like c("1", "5") or "All" in cases ploting all industies in 1 pdf section."AllCity" by default if no industry column
#' @usage time_series_pdf(dataset, folder = getwd(), filename = NULL, begin = 2010, end = 2020, lag = 5, oktmo_style = FALSE, scn_vec = NULL, kpi_vec = "All", ind_vec = "All")
#' @return Pdf-file with ggplots in folder
#' @importFrom dplyr %>%
#' @importFrom DBI dbCanConnect
#' @importFrom RPostgres Postgres
#' @importFrom ggplot2 ggplot
#' @importFrom dplyr left_join
#' @importFrom dplyr distinct
#' @importFrom dplyr case_when
#' @importFrom ggplot2 geom_line
#' @importFrom ggplot2 geom_path
#' @importFrom ggplot2 scale_y_continuous
#' @importFrom ggplot2 scale_x_continuous
#' @importFrom ggplot2 geom_ribbon
#' @importFrom ggplot2 geom_point
#' @importFrom ggplot2 labs
#' @importFrom ggplot2 theme
#' @importFrom ggforce facet_wrap_paginate
#' @importFrom ggplot2 ggproto
#' @importFrom ggforce n_pages
#' @export
#' @examples
#'
#' path <- 'https://www.dropbox.com/s/x0izu2v5b1jrd7d/sd_sample.csv?dl=1'
#' sd <- read.table(path, header = T, sep = ";")
#' time_series_pdf(dataset = sd, folder = getwd(), begin = 2000, end = 2030, lag = 1, oktmo_style = F, kpi_vec = c ("C475", "C499"), ind_vec = "All")
time_series_pdf <- function (dataset, # Пакет данных для вывода dataset <- sd
folder = getwd(),
filename = NULL,
begin = 2010,
end = 2020,
lag = 5,
oktmo_style = FALSE,
scn_vec = NULL, # c ("fact", "dyn_calc')
kpi_vec = "All", # kpi_vec <- c ("C475", "C499")
ind_vec = "AllCity") { # ind_vec <- c ("4", "6") ind_vec <- "All" (не всего, но все отрасли)
require(ggforce)
{
sprav <- read_all_prosha()$dwh_prod
sprav$okved <- sprav$okved %>% as_tibble() %>% mutate (industry_geoveb = as.character(industry_geoveb),
industry_geoveb = ifelse(okved == "всего", "AllCity", industry_geoveb))
} # 1. Считывание и обработка справочников test <- sprav$okved
{
# dataset %>% slice_sample(n= 1) %>% paste0(collapse = "','")
column_names <- names(dataset) #%>% paste0(collapse = ",")
dataset1 <- dataset # Убрать и оставить только dataset - для уменьшения памяти
} # 2. Забираем данные
{# Обрабатываем все что связано с полем year
if (!all(c("year", "location", "indicator") %in% column_names)) stop("No mandatory fields")
if (begin > end || begin < min (distinct(dataset,year)) || end > max (distinct(dataset,year))) stop ("Wrong time frame")
if (lag > (end-begin)) stop("To wide time lag")
dataset1 <- dataset1 %>% filter (year >= begin, year <= end) # Уберем лишние годы
# Обрабатываем все что связано с полем location
dataset1 <- dataset1 %>% mutate(location = as.character(location))
# Обрабатываем все что связано с полем indicator
if (!"All" %in% kpi_vec & nrow(distinct(dataset,indicator) %>% filter(indicator %in% kpi_vec)) < length(kpi_vec) ) stop ("Kpi vector out of dataset's range")
if (!all(kpi_vec %in% 'All')) dataset1 <- dataset1 %>% filter (indicator %in% kpi_vec) # Уберем лишние вектора
# Обрабатываем все что связано с полем industry or okved
if (any(column_names %in% "okved")) {
dataset1 <- dataset1 %>% left_join(sprav$okved %>% select (okved, ind_name = long_name ) %>% distinct(okved , ind_name), by = c("okved" = "okved"))}
else {
if (any(column_names %in% "industry")) dataset1 <- dataset1 %>% left_join(sprav$okved %>% select (industry_geoveb, ind_name = industry_name_geoveb) %>% distinct(industry_geoveb, ind_name), by = c("industry" = "industry_geoveb"))
else{
dataset1 <- dataset1 %>% mutate (industry = "AllCity", ind_name = "AllCity")
}}
if (any(column_names %in% "okved")) dataset1 <- dataset1 %>% rename ("industry" = "okved") # для выгрузок в которых вместо индустрий находятся оквэды переходим на единое именование колонки industry
# if ((!"All" %in% ind_vec & !"всего" %in% ind_vec & (any(column_names %in% "okved")) ) & nrow(distinct(dataset,okved) %>% filter(okved %in% ind_vec)) < length(ind_vec) ) stop ("OKVED vector out of dataset's range")
# if ((!"All" %in% ind_vec & !"всего" %in% ind_vec & (any(column_names %in% "industry")) ) & nrow(distinct(dataset,industry) %>% filter(industry %in% ind_vec)) < length(ind_vec) ) stop ("Industry vector out of dataset's range")
if (ind_vec == "AllCity" & any(column_names %in% "industry") ) dataset1 <- dataset1 %>% filter (industry == "AllCity")
if (ind_vec == "AllCity" & any(column_names %in% "okved" ) ) dataset1 <- dataset1 %>% filter (okved == "AllCity")
if (ind_vec == "all" & any(column_names %in% "industry") ) dataset1 <- dataset1 %>% filter (industry != "AllCity")
if (ind_vec == "all" & any(column_names %in% "okved" ) ) dataset1 <- dataset1 %>% filter (okved != "AllCity")
if (ind_vec != "AllCity" & ind_vec != "All") dataset1 <- dataset1 %>% filter (industry %in% ind_vec)
} # 3. Проверяем содержимое датасета
{
dataset1 <- dataset1 %>%
oktmo_restore("location") %>%
left_join(sprav$indicator %>% as_tibble() %>% select (code , kpi_name = long_name, unit_id), by = c("indicator" = "code" )) %>%
left_join(sprav$location %>% as_tibble() %>% select (oktmo, loc_name = long_name ), by = c("location" = "oktmo")) %>%
left_join(sprav$unit %>% as_tibble() %>% select (id, unit = unique_name ), by = c("unit_id" = "id")) %>%
mutate (ind_name = ifelse ( any(column_names %in% "industry" || column_names %in% "okved"), "AllCity", str_remove(ind_name, "Деятельность ")),
ind_name = ifelse (industry == "AllCity", "AllCity", str_sub(ind_name, end = 20L)),
# ind_name = paste0(industry, ".", ind_name),
kpi_name = str_remove(kpi_name, "-ПОКАЗАТЕЛЬ ОТМЕНЕН- "),
loc_name = str_replace(loc_name, "агломерация", "аг."),
loc_name = str_replace(loc_name, "район", "р."),
loc_name = str_sub (loc_name, end = 20L),
year = as.numeric(year),
ind_name = as.factor (ind_name),
kpi_name = as.factor (kpi_name) #,city_title = as.factor(city_title)
)
if(oktmo_style) dataset1 <- dataset1 %>% mutate (city_title = as.factor(location)) else dataset1 <- dataset1 %>% mutate (city_title = as.factor(loc_name))
dataset1 <- dataset1 %>% mutate (facet_ind = paste0(ind_name, ": ", city_title))
} # 4. Пристыковываем имена полей из справочников
{
ggplot_prep <- function(dataset2, kpi_set,
begin = 2010,
end = 2020,
lag = 5){ #dataset2 <- dataset1 kpi_set <-"C013" c ("C009", "C011")
kpi_title <- dataset2 %>% filter(indicator %in% kpi_set) %>%
distinct(indicator , kpi_name, unit) %>%
mutate (n = paste0(indicator, ", ", kpi_name, ", ", unit)) %>% select(n) %>% deframe() %>% paste0(collapse = "\n")
dataset2 <- dataset2 %>% filter(indicator %in% kpi_set)
if (all(distinct(dataset2,industry) == "AllCity")){
dataset2 <- dataset2 %>% mutate (facet_ind = paste0(city_title))
page_title <- kpi_title
page_subtitle <- "По городу в целом"
} else {
dataset2 <- dataset2 %>% mutate (facet_ind = paste0(ind_name, ": ", city_title))
page_title <- "Показатели по отрасли в каждом городе"
page_subtitle <- kpi_title} # выбор фасета в зависимости от датасета
version <- ifelse(any(names(dataset) %in% "version"), distinct(dataset, version)[1], "undf")
f <- ggplot(dataset2)
{
if ("value" %in% names(dataset2)) {f <- f + geom_line (aes(x = year, y = value , color = kpi_name) , size = .5)}
if (("dyn" %in% names(dataset2) & is.null(scn_vec)) || "dyn" %in% scn_vec) {f <- f + geom_line (aes(x = year, y = dyn , color = "Динамическая") , size = .5)}
if (("dyn_calc" %in% names(dataset2) & is.null(scn_vec)) || "dyn_calc" %in% scn_vec) {f <- f + geom_line (aes(x = year, y = dyn_calc , color = "Динамическая") , size = .5)}
if (("dyn_use" %in% names(dataset2) & is.null(scn_vec)) || "dyn_use" %in% scn_vec) {f <- f + geom_line (aes(x = year, y = dyn_use , color = "Дин. с фактами") , size = .5)}
if (("calc" %in% names(dataset2) & is.null(scn_vec)) || "calc" %in% scn_vec) {f <- f + geom_line (aes(x = year, y = calc , color = "Регрессионный") , size = .5)}
if (("sk_calc" %in% names(dataset2) & is.null(scn_vec)) || "sk_calc" %in% scn_vec) {f <- f + geom_line (aes(x = year, y = sk_calc , color = "Регрессионный каркас") , size = .5, linetype = "dashed")}
if (("sk_use" %in% names(dataset2) & is.null(scn_vec)) || "sk_use" %in% scn_vec) {f <- f + geom_line (aes(x = year, y = sk_use , color = "Регр. каркас с фактами ") , size = .5, linetype = "dashed")}
if (("fact" %in% names(dataset2) & is.null(scn_vec)) || "fact" %in% scn_vec) {f <- f + geom_point (aes(x = year, y = fact , color = "Факты" ) , size = 1 )}
if (("d_fact" %in% names(dataset2) & is.null(scn_vec)) || "d_fact" %in% scn_vec) {f <- f + geom_point (aes(x = year, y = fact , color = "D-факты" ) , size = 1 )}
if (("low" %in% names(dataset2) & is.null(scn_vec)) || "low" %in% scn_vec) {f <- f + geom_ribbon(aes(x = year, ymin = low , ymax = high ), color = "grey70", alpha = .3)}
if (("dyn_high" %in% names(dataset2) & is.null(scn_vec)) || "dyn_high" %in% scn_vec) {f <- f + geom_ribbon(aes(x = year, ymin = dyn_low, ymax = dyn_high ), color = "grey70", alpha = .3)}
} # Выбор сценария выводмого значения
{f <- f +
scale_y_continuous(labels = scales::comma) +
scale_x_continuous(breaks = seq(from = begin, to = end, by = lag) ) +
theme(plot.title = element_text (face = "bold", size = 14, hjust = 0.5),
plot.subtitle = element_text ( size = 8 , hjust = 0.5),
strip.text = element_text(size = 8),
axis.title.y = element_blank(), axis.title.x = element_blank(),
legend.position = "bottom", legend.text = element_text (size = 8), legend.title = element_text (size = 8),
axis.text.y = element_text (size = 5), axis.text.x = element_text (size = 5)) +
labs(title = page_title ,
subtitle = page_subtitle,
color = "Легенда: ", x = "",
caption = paste0("Версия ", version ,", от ", Sys.time()))
} # Оформление
if(nrow(distinct(dataset2, location)) > 1){
f <- f + ggforce:::facet_wrap_paginate(~ facet_ind, ncol = 6, nrow = 6, page = 1, scales = "free", labeller = labeller(facet_ind = label_wrap_gen(width = 30)))} else{
f <- f + ggforce:::facet_wrap_paginate(~ facet_ind, ncol = 1, nrow = , page = 1, scales = "free", labeller = labeller(facet_ind = label_wrap_gen(width = 30)))}
return(f)
} # Функция подготовки основного графика
ggplot_add_page <- function(dataset , kpi_set, begin, end, lag){
f <- ggplot_prep(dataset, kpi_set, begin = begin, end = end, lag = lag)
if(nrow(distinct(dataset %>% filter(indicator %in% kpi_set), location)) > 1){
num <- ggforce:::n_pages(f)
for(i in 1:num){
print(f +
ggforce:::facet_wrap_paginate(~ facet_ind, ncol = 6, nrow = 6, page = i,
scales = "free",
labeller = labeller(facet_ind = label_wrap_gen(width = 30))) )
cat(i, " page from ", num, " pages \r\n") }
} else f
# cat(paste0("Документ сохранен в папке ", folder, "\n под именем "))
} # Функция печати на отдельную страницу
} # 6. Функция создания ggplot для
{
ver <- ifelse(any(names(dataset1) %in% "version"), distinct(dataset1, version)[1], "undf")
name_pdf <- case_when(
ind_vec == "AllCity" ~ paste0("All_", "v.", ver, "_", ".pdf"),
TRUE ~ paste0("Industry_", paste0(ind_vec, collapse="_"), ".KPI_", paste0(kpi_vec, collapse="_"), "_v.", ver, "_", ".pdf"))
if(!is.null(filename)) name_pdf <- paste0(filename, ".pdf")
cairo_pdf(paste0(folder, "/",name_pdf), h = 210/25.4, w = 297/25.4, onefile = TRUE, fallback_resolution = 120)
# pdf(name_pdf, height = 210/25.4, width = 297/25.4, onefile = TRUE, encoding="MacRoman")
kpis <- unique(dataset1$indicator, 2) #%>% head(2) %>% print()
jmax <- length(kpis)
num_cities <- unique(dataset1$location, 2) %>% length() #%>% print()
num_indus <- unique(dataset1$industry, 2) %>% length() #%>% print()
if(ind_vec == "AllCity" || jmax < 4){ # Когда нужен 1 показатель на график
cat(paste0("Ориентировочная скорость вывода ", Sys.time() + num_cities*jmax*num_indus * 0.254386, " \r\n" ))
for (j in 1:jmax){
cat (paste0("Показатель: ", kpis[j], ". ", j , "-й из ", jmax, " показателей \r\n"))
ggplot_add_page (dataset1, kpis[j], begin = begin, end = end, lag = lag)
}} else { ggplot_add_page (dataset1, kpi_vec, begin = begin, end = end, lag = lag)} # Когда нужны несколько показателей на графике
dev.off()
} # 7. Печатаем страницы
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.