knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) library(tidyverse) library(ggplot2) library(plotly) library(stats) library(lubridate) library(ggplot2) library(plotly) library(stats) library(lubridate) library(sqldf) library(forecast) graves <- readRDS("../data/graves.rds") graves <- graves %>% distinct()
date_birth_n <- graves %>% group_by(g_date_birth) %>% count() date_birth_n %>% arrange(desc(n)) %>% head(n=10)
cementy <- openPoznan::cemeteries(coords = F) graves_name <- left_join(graves, cementy, by=c("cm_id"="ID")) graves_name <- graves_name %>% distinct() ggplot(graves_name, aes(as.factor(Cemetery_Name))) + geom_bar(aes(y = (..count..)/sum(..count..))) + geom_text(aes(label = scales::percent((..count..)/sum(..count..)), y= (..count..)/sum(..count..)), size=3, stat= "count", hjust = -.5) + xlab("Cementary name") + ylab("Percent") + scale_y_continuous(labels=scales::percent) + theme_bw()+ coord_flip()
grave_size <- graves %>% group_by(paid,g_size) %>% count() %>% filter(g_size != "", !is.na(g_size)) ggplot(grave_size, aes(x=as.numeric(g_size), y=n)) + geom_col() + facet_wrap(~ paid) + xlab("Grave size") + ylab("number")
# oczyszczanie danych z błędnych danych birth <- graves %>% filter(g_date_birth != "0001-01-01", 1) %>% filter(g_date_birth != "1900-01-01", 1) %>% select(g_date_birth) %>% arrange(g_date_birth) %>% count(g_date_birth) %>% mutate(g_date_birth=ymd(g_date_birth), # zamiana na datę w formacie ymd z pakietu lubridate g_year_birth=year(g_date_birth)) %>% distinct() plot_ly(data = birth, x = ~g_date_birth, y = ~n, type = "bar") %>% layout(xaxis = list(range = c("1800","2018"))) death <- graves %>%filter(g_date_death != "0001-01-01", 1) %>% filter(g_date_death != "1900-01-01", 1) %>% filter(g_date_death != "1945-01-01", 1) %>% select(g_date_death) %>% count(g_date_death) %>% arrange(g_date_death) %>% mutate(g_date_death=ymd(g_date_death), # zamiana na datę w formacie ymd z pakietu lubridate g_year_birth=year(g_date_death)) %>% distinct() plot_ly(data = death, x = ~g_date_death, y = ~n, type = "bar") %>% layout(xaxis = list(range = c("1800","2018"))) burial <- graves %>% filter(g_date_burial != "0001-01-01", 1) %>% select(g_date_burial) %>% arrange(g_date_burial) %>% count(g_date_burial) %>% mutate(g_date_burial=ymd(g_date_burial), # zamiana na datę w formacie ymd z pakietu lubridate g_year_birth=year(g_date_burial)) %>% distinct() plot_ly(data = burial, x = ~g_date_burial, y = ~n, type = "bar") %>% layout(xaxis = list(range = c("1800","2018"))) # analiza szeregów czasowych
graves_birth <- graves %>% filter(g_date_birth != "0001-01-01", 1) %>% mutate(g_date_birth=ymd(g_date_birth), # zamiana na datę w formacie ymd z pakietu lubridate g_year_birth=year(g_date_birth)) # wydobycie roku graves_birth_count <- graves_birth %>% group_by(g_year_birth) %>% count() # w roku urodzenia wystęuje wartość 1 aż 16 razy - po sprawdzeniu zbioru graves_birth okazuje się, że jedna osoba występuje tam 16 razy - czyli w zbiorze występują duplikaty graves_birth_distinct <- graves_birth %>% distinct() graves_birth_count_distinct <- graves_birth_distinct %>% group_by(g_year_birth) %>% count() # usuwamy nieprawidłowe wartości graves_birth_distinct <- graves_birth_distinct %>% filter(g_year_birth %in% c(1000:2018)) # lata urodzenia w przedziale od 1000 do 2018 ggplot(graves_birth_distinct, aes(g_year_birth)) + geom_bar()
ggplot(graves_birth_distinct, aes(g_year_birth)) + geom_bar() + xlim(1850,2018)
# usunięcie daty 1900-01-01, która też jest brakiem danych graves_birth_distinct %>% distinct() %>% filter(g_date_birth != "1900-01-01") %>% ggplot(aes(g_year_birth)) + geom_bar() + xlim(1850,2018)
# pojedyncze roczniki graves_birth_distinct %>% distinct() %>% filter(g_date_birth != c("1900-01-01"),1) %>% ggplot(aes(g_year_birth)) + geom_histogram(binwidth = 1) + xlim(1850,2018)
graves_death <- graves %>% filter(g_date_death != "0001-01-01", 1) %>% mutate(g_date_death=ymd(g_date_death), g_year_death=year(g_date_death)) graves_death_counr <- graves_death %>% group_by(g_year_death) %>% count() graves_death_distinct <- graves_death %>% distinct() graves_death_count_distinct <- graves_death_distinct %>% group_by(g_year_death) %>% count() graves_death_distinct <- graves_death_distinct %>% filter(g_year_death %in% c(100:2018)) ggplot(graves_death_distinct, aes(g_year_death)) + geom_bar()
graves_death_distinct %>% distinct() %>% filter(g_date_death != "1900-01-01") %>% ggplot(aes(g_year_death)) + geom_bar() + xlim(1850,2018)
# pojedyncze roczniki graves_death_distinct %>% distinct() %>% filter(g_date_death != "1900-01-01") %>% ggplot(aes(g_year_death)) + geom_histogram(binwidth = 1) + xlim(1900,2018)
graves_burial <- graves %>% filter(g_date_burial != "0001-01-01", 1) %>% mutate(g_date_burial=ymd(g_date_burial), g_year_burial=year(g_date_burial)) graves_burial_count <- graves_burial %>% group_by(g_date_burial) %>% count() graves_burial_distinct <- graves_burial %>% distinct() graves_burial_count_distinct <- graves_burial_distinct %>% group_by(g_year_burial) %>% count() graves_burial_distinct <- graves_burial_distinct %>% filter(g_year_burial %in% c(1100:2018)) ggplot(graves_burial_distinct, aes(g_date_burial)) + ylim(0,100)+ geom_bar()
graves_burial_distinct %>% distinct() %>% filter(g_date_burial != c("1900-01-01","1945-01-01")) %>% ggplot(aes(g_year_burial)) + geom_bar() + xlim(1850,2018)
graves_burial_distinct %>% distinct() %>% filter(g_date_burial != "1900-01-01") %>% ggplot(aes(g_year_burial)) + geom_histogram(binwidth = 1) + xlim(1850,2018)
graves_birth_count_distinct %>% ggplot(aes(x=g_year_birth), color = "red")+ geom_col(data = graves_birth_count_distinct, aes(g_year_birth,y=n), color = "red")+ geom_col(data = graves_death_count_distinct, aes(g_year_death,y=n), color = "blue")+ geom_col(data = graves_burial_count_distinct, aes(g_year_burial,y=n), color = "green")+ ylim(0,2000)+ xlim(1750,2018)
datatime <- data2 %>% mutate(year = year(g_date_death)) %>% ggplot(aes(x = year, y = n)) + geom_col()+ xlim(1910,1921) datatime2 <- data2 %>% mutate(year = year(g_date_death)) %>% ggplot(aes(x = year, y = n)) + geom_col()+ xlim(1920,1931) datatime3 <- data2 %>% mutate(year = year(g_date_death)) %>% ggplot(aes(x = year, y = n)) + geom_col()+ xlim(1930,1941) datatime datatime2 datatime3
# miłostowo birth_cm_2 <- graves_birth %>% select(c(cm_id,g_year_birth)) %>% count(g_year_birth,cm_id) birth_clean_cm_2 <- birth_cm_2 %>% filter(cm_id == 2) %>% filter(g_year_birth != "1900") %>% arrange(g_year_birth) %>% filter(g_year_birth >= "1900", g_year_birth <= "2016") plot_ly(data = birth_clean_cm_2, x = ~g_year_birth, y = ~n, type = "scatter", mode = "lines+markers") # junikowo birth_cm_1 <- graves_birth %>% select(c(cm_id,g_year_birth)) %>% count(g_year_birth,cm_id) birth_clean_cm_1 <- birth_cm_1 %>% filter(cm_id == 1) %>% filter(g_year_birth != "1900") %>% arrange(g_year_birth) %>% filter(g_year_birth >= "1900", g_year_birth <= "2016") plot_ly(data = birth_clean_cm_1, x = ~g_year_birth, y = ~n, type = "scatter", mode = "lines+markers") # jeżycki zabytkowy birth_cm_4 <- graves_birth %>% select(c(cm_id,g_year_birth)) %>% count(g_year_birth,cm_id) birth_clean_cm_4 <- birth_cm_4 %>% filter(cm_id == 4) %>% filter(g_year_birth != "1900") %>% arrange(g_year_birth) %>% filter(g_year_birth >= "1900", g_year_birth <= "2016") plot_ly(data = birth_clean_cm_4, x = ~g_year_birth, y = ~n, type = "scatter", mode = "lines+markers") # górczyn birth_cm_6 <- graves_birth %>% select(c(cm_id,g_year_birth)) %>% count(g_year_birth,cm_id) birth_clean_cm_6 <- birth_cm_6 %>% filter(cm_id == 6) %>% filter(g_year_birth != "1900") %>% arrange(g_year_birth) %>% filter(g_year_birth >= "1900", g_year_birth <= "2016") plot_ly(data = birth_clean_cm_6, x = ~g_year_birth, y = ~n, type = "scatter", mode = "lines+markers") All_birth <-graves_birth %>% select(c(cm_id,g_year_birth)) %>% count(g_year_birth,cm_id) #all All_birth_clean <- birth_cm_2 %>% filter(cm_id == c("1","2","4","6")) %>% filter(g_year_birth != "1900") %>% arrange(g_year_birth) %>% filter(g_year_birth >= "1900", g_year_birth <= "2016") ggplot(All_birth_clean, aes(x=as.numeric(g_year_birth), y=n)) + geom_line() + facet_wrap(~ g_year_birth) + xlab("") + ylab("")
ggplot(data = birth_clean_cm_2, aes(x=g_year_birth,y=n), color = "red")+ geom_col(data = birth_clean_cm_2, aes(g_year_birth,y=n), color = "red")+ geom_col(data = birth_clean_cm_4, aes(g_year_birth,y=n), color = "blue")+ geom_col(data = birth_clean_cm_6, aes(g_year_birth,y=n), color = "green")+ geom_col(data = birth_clean_cm_1, aes(g_year_birth,y=n), color = "yellow")+ facet_wrap(~n)+ ylim(0,2000)+ xlim(1900,2016)
choice_cm_4 <- graves_death %>% select(c(cm_id,g_year_death)) %>% count(g_year_death,cm_id) clean_cm_4 <- choice_cm_4 %>% filter(cm_id == 4) %>% arrange(g_year_death) %>% filter(g_year_death >= "1999", g_year_death <= "2016") plot_ly(data = clean_cm_4, x = ~g_year_death, y = ~n, type = "scatter", mode = "lines+markers") plot(clean_cm_4$n, main = "zgony") boxplot(clean_cm_4$n) mean_4 <- mean(clean_cm_4$n) stat_4 <- t(summary(clean_cm_4$n)) sd_4 <- sd(clean_cm_4$n) sd_4 var_4 <- var(clean_cm_4$n) var_4 statystic_4 <- cbind(sd_4,var_4,stat_4) statystic_4 acf(clean_cm_4$n) pacf(clean_cm_4$n) frequency(clean_cm_4$n) library(forecast) graves.prediction_4 <- ets(clean_cm_4$n) graves.prediction.forecast_4 <- forecast(graves.prediction_4) plot(graves.prediction_4) plot(graves.prediction.forecast_4) log <- (log(clean_cm_4$n)) plot(log) plot(clean_cm_4$n)
choice_cm_6 <- graves_death %>% select(c(cm_id,g_year_death)) %>% count(g_year_death,cm_id) clean_cm_6 <- choice_cm_6 %>% filter(cm_id == 6) %>% arrange(g_year_death) %>% filter(g_year_death >= "1999", g_year_death <= "2016") plot_ly(data = clean_cm_6, x = ~g_year_death, y = ~n, type = "scatter", mode = "lines+markers") plot(clean_cm_6$n, main = "zgony") boxplot(clean_cm_6$n) mean_6 <- mean(clean_cm_6$n) stat_6 <- t(summary(clean_cm_6$n)) sd_6 <- sd(clean_cm_6$n) sd_6 var_6 <- var(clean_cm_6$n) var_6 statystic_6 <- cbind(sd_6,var_6,stat_6) statystic_6 acf(clean_cm_6$n) pacf(clean_cm_6$n) frequency(clean_cm_6$n) graves.prediction_6 <- ets(clean_cm_6$n) graves.prediction.forecast_6 <- forecast(graves.prediction_6) plot(graves.prediction_6) plot(graves.prediction.forecast_6) log_6 <- (log(clean_cm_6$n)) plot(log) plot(clean_cm_6$n)
choice_cm_2 <- graves_birth %>% select(c(cm_id,g_year_birth)) %>% count(g_year_birth,cm_id) clean_cm_2 <- choice_cm_2 %>% filter(cm_id == 2) %>% arrange(g_year_birth) %>% filter(g_year_birth >= "1920", g_year_birth <= "1950") plot_ly(data = clean_cm_2, x = ~g_year_birth, y = ~n, type = "scatter", mode = "lines+markers")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.