sti <- function(cases, pop){
# cases is a vector of daily cases, pop the population of the group
sti <- stats::filter(cases, rep(1, 7), method="convolution", sides=1)
sti[1:6] <- cases[1:6]
return(sti / pop * 1e5)
}
#'Time series of COVID-19 cases and deaths
#'
#'\code{get_time_series_for()} is used to create a tibble of
#'COVID-19 cases and COVID-19 related deaths for a region and a certain
#'age group. The time period can be defined by the user.
#'
#'@param ages A vector of numbers specifying the desired age groups. The available age groups are "A00-A04","A05-A14","A15-A34","A35-A59","A60-A79" and "A80+".
#'The numbers in \code{ages} are automatically assigned to the belonging age group and afterwards, the cases and deaths of of these age groups are added up.
#'@param regions A vector that either consists of strings (the names of German districts or the names of German states) or district ID's.
#'If this vector has more than one entry, the tibble contains the summed cases/deaths of these regions.
#'@param from A date that specifies the beginning of the time series
#'@param to A date that specifies the end of the time series
#'
#'@return A tibble that contains the number of cases and deaths for the desired regions during the defined period of time.
#'The default version without any arguments returns a tibble that contains all the cases/deaths of Germany between 2020-01-01 and today.
#'@examples get_time_series_for(ages=c(3,55),regions="Sachsen",from="2021-01-05",to="2021-02-06")
#'
#'get_time_series_for(ages=32,regions=c(8221,8222))
#'
#'\dontrun{get_time_series_for(ages=c(12,42),from="2020-05-02",to="2020-05-01"}
#'##"from" must always be an earlier date than "to"
#'
#'@family COVID-19 cases and deaths
#'@export
get_time_series_for <- function(ages="all", regions="Germany", from="2020-01-01", to=Sys.Date()){
# regions can be either Landkreise, Bundesländer or just Germany
# ages should be a number or a numeric vector (eg c(10, 76, 42))
data <- filter_data_by(ages, regions, from, to)
# create time series
days_series <- seq(as.Date(from), as.Date(to), by="days")
dplyr::tibble(date=days_series) %>%
dplyr::left_join(data, by=c("date"="Meldedatum")) %>%
dplyr::group_by(date) %>%
dplyr::summarise(cases=sum(AnzahlFall), deaths=sum(AnzahlTodesfall)) %>%
# the days for which we have no infection data for are days with 0 infections
dplyr::mutate(cases=tidyr::replace_na(cases, 0), deaths=tidyr::replace_na(deaths, 0)) -> time_series
return(time_series)
}
#'Time series of 7-day-incidence
#'
#'\code{get_sti_series_for()} is used to create a tibble of
#'the 7-day-incidence of a region and a certain
#'age group. The time period can be defined by the user.
#'
#'@param ages A vector of numbers specifying the desired age groups. The available age groups are "A00-A04","A05-A14","A15-A34","A35-A59","A60-A79" and "A80+".
#'The numbers in \code{ages} are automatically assigned to the belonging age group and afterwards, the cases and deaths of of these age groups are added up.
#'@param regions A vector that either consists of strings (either the names of German districts or the names of German states) or district ID's.
#'If this vector has more than one entry, the tibble contains the 7-day-incidence of these regions together.
#'@param from A date that specifies the beginning of the time series
#'@param to A date that specifies the end of the time series
#'@param return_deaths A boolean. If one sets \code{return_deaths=T}, another column is added to the tibble that contains the 7-day-death-average weighted by
#'the population. (Basically, this is the something like a 7-day-death-incidence.)
#'
#'@section Warning:
#'When specifying region \strong{and} agegroup, the incidence will not be accurate because
#'there is no population data for the age groups in each district and it will be estimated
#'by the age distribution of Germany.
#'Therefore, it is recommended to specify only one or the other
#'
#'@return A tibble that contains the 7-day-incidence (or additionally, the 7-day-death-average) for the desired regions during the defined period of time.
#'The default version without any arguments returns a tibble that contains the 7-day-incidence of Germany between 2020-01-01 and today.
#'@examples get_sti_series_for(ages=c(3,55),regions="Sachsen",from="2021-01-05",to="2021-02-06")
#'
#'get_sti_series_for(ages=32,regions=c(8221,8222))
#'
#'\dontrun{get_time_series_for(ages=c(12,42),from="2020-05-02",to="2020-05-01"}
#'#"from" must always be an earlier date than "to"
#'
#' @family 7-day-incidence
#' @export
get_sti_series_for <- function(ages="all", regions="Germany", from="2020-01-01", to=Sys.Date(),
return_deaths=F){
# careful! when specifying region *and* agegroup, the incidence will not be accurate because
# there is no population data for the age groups in each Landkreis and it will be estimated
# by the age distribution in all of Germany
# therefore, it is recommended to specify only one or the other
ids <- is.numeric(regions)
stopifnot("invalid age"=(ages=="all" || suppressWarnings(!is.na(as.numeric(ages)))))
stopifnot("from must be before to"=as.Date(from)<as.Date(to))
# calculate the population of the specified group
rev.env$population_lk_data %>% `[[`("Bevölkerung") %>% sum() -> total_pop
spec_pop_percentage <- 1
if(!all(ages=="all")){
age_labels <- rep("", length(ages))
for(i in 1:length(ages)){
age_labels[i] <- get_age_label_from_number(ages[i])
}
rev.env$population_age_data %>%
dplyr::filter(Altersgruppe %in% age_labels) %>% `[[`("Bevölkerung") %>% sum() -> spec_pop
spec_pop_percentage <- spec_pop / total_pop
}
time_series <- get_time_series_for(ages, regions, from, to)
days_series <- seq(as.Date(from), as.Date(to), by="days")
# filter the regions (not robust at the moment)
rki_data %>% dplyr::select(Bundesland) %>% unique() %>%
dplyr::filter(!Bundesland %in% c("Berlin", "Bremen", "Hamburg")) %>%
`[[`("Bundesland") %>% tolower() -> bundesländer
if(all(tolower(regions)=="germany")){
final_pop <- spec_pop_percentage * total_pop
}
else if(all(tolower(regions) %in% bundesländer)){
rev.env$population_lk_data %>%
dplyr::filter(tolower(Bundesland) %in% tolower(regions)) %>%
`[[`("Bevölkerung") %>% sum() ->
region_pop
final_pop <- region_pop * spec_pop_percentage
}
else{
if(!ids){
for(i in 1:length(regions)) regions[i] <- get_lk_id_from_string(regions[i])
}
rev.env$population_lk_data %>% dplyr::filter(IdLandkreis %in% regions) %>%
`[[`("Bevölkerung") %>% sum() -> region_pop
final_pop <- region_pop * spec_pop_percentage
}
if(return_deaths){
sti_series <- sti(time_series[["deaths"]], final_pop)
}
else{
sti_series <- sti(time_series[["cases"]], final_pop)
}
return(dplyr::tibble(date=days_series, sti=sti_series))
}
# A faster sti function, it is however less adaptable
sti_id <- function(id){
# Getting population
pop <- as.integer(dplyr::filter(rev.env$population_lk_data,IdLandkreis==id)[5])
#Getting each case
rki_id <- rki_data[which(rki_data$IdLandkreis == id),]
dplyr::left_join(tibble::tibble(date=rev.env$days_since_2020),rki_id, by=c("date"="Meldedatum")) %>%
dplyr::group_by(date) %>%
dplyr::summarise(cases=sum(AnzahlFall), deaths=sum(AnzahlTodesfall), recoveries=sum(AnzahlGenesen)) %>%
# the days we have no infection data for are days with 0 infections
dplyr::mutate(cases=tidyr::replace_na(cases, 0), deaths=tidyr::replace_na(deaths, 0),
recoveries=tidyr::replace_na(recoveries, 0)) ->
id_time_series
# Calculating the rolling average
n <- length(id_time_series$date)
sti <- rep(0, n)
for(i in 1:n){
for(j in max(1, i-6):i) sti[i] <- sti[i] + id_time_series$cases[j]
}
sti <- sti / pop * 1e5
return(tibble::tibble(id_time_series$date,sti, .name_repair = ~ c("date", "sti")))
}
#Eine weitere "schnelle sti"
get_sti_series_simple <- function(lk_id){
rev.env$population_lk_data %>% dplyr::filter(IdLandkreis %in% lk_id) %>%
unique() -> data
data %>% `[[`("Landkreis") -> lk_name
data %>% `[[`("Bevölkerung") -> population
ts <- get_time_series_for(regions=lk_name)
cases_ts <- ts[["cases"]]
sti(cases_ts, population) %>% return()
}
#'Creating Time Series of Cases, Deaths or Incidence
#'
#'\code{get_data_for} is used to create a table of the absolute number of cases, of deaths or of
#'the 7-day-incidence of a region and a certain
#'age group. The time period can be defined by the user.
#'
#'@param regions A vector that either consists of strings (the names of German districts or the names of German states) or district ID's, or just "Germany" as a whole
#'If this vector has more than one entry, the tibble contains the values of these regions separately.
#'@param ages A vector of numbers specifying the desired age groups. The available age groups are "A00-A04","A05-A14","A15-A34","A35-A59","A60-A79" and "A80+".
#'The numbers in \code{ages} are automatically assigned to the belonging age group and afterwards, the cases and deaths of of these age groups are added up.
#'@param from A date that specifies the beginning of the time series
#'@param to A date that specifies the end of the time series
#'@param type A string specifying which time series is returned. Can be "sti" for the incidence or just "cases" / "deaths"
#'
#'@section Warning:
#'When specifying region \strong{and} agegroup, the incidence will not be accurate because
#'there is no population data for the age groups in each district and it will be estimated
#'by the age distribution of Germany.
#'Therefore, it is recommended to specify only one or the other
#'
#'@return A tibble that contains the wanted value for the desired regions during the defined period of time.
#'@examples get_data_for(ages=c(3,55),regions="Sachsen",from="2021-01-05",to="2021-02-06")
#'
#'get_data_for(ages=32,regions=c(8221, "Hessen", "Germany"), type="sti")
#'
#'\dontrun{get_time_series_for(ages=c(12,42),from="2020-05-02",to="2020-05-01"}
#'#"from" must always be an earlier date than "to"
#'
#' @family time series of cases, deaths or incidence
#' @export
get_data_for <- function(regions, ages="all", from="2020-01-01", to=Sys.Date(), type="cases"){
# type can be "cases", "sti", "deaths"
stopifnot("invalid type!"=type %in% c("cases", "sti", "deaths"))
regions_user <- regions
rki_data %>% dplyr::select(Bundesland) %>% unique() %>%
dplyr::filter(!Bundesland %in% c("Bremen", "Hamburg")) %>%
`[[`("Bundesland") %>% tolower() -> bundesländer
for(i in 1:length(regions)){
if(tolower(regions[i]) %in% bundesländer){
rev.env$population_lk_data %>%
dplyr::filter(tolower(Bundesland) == tolower(regions[i])) %>%
dplyr::select(Bundesland) %>%
unique() %>%
`[[`(1) ->
regions[i]
}
else if(tolower(regions[i])=="germany"){
regions[i] <- "Germany"
}
else{
regions[i] <- get_lk_id_from_string(regions[i])
}
}
regions_labels <- tibble::tibble(region_name=regions_user, region=regions)
data <- tibble::tibble(date=character(), value=numeric(), region=character())
for(reg in regions){
if(type=="sti"){
get_sti_series_for(regions=reg, ages=ages, from=from, to=to) %>%
dplyr::mutate(date=as.character(date), region=reg, value=sti) %>%
dplyr::select(-sti) %>%
tibble::add_row(data, .) ->
data
}
else if(type=="cases"){
get_time_series_for(regions=reg, ages=ages, from=from, to=to) %>%
dplyr::mutate(date=as.character(date), region=reg, value=cases) %>%
dplyr::select(-cases, -deaths) %>%
tibble::add_row(data, .) ->
data
}
else if(type=="deaths"){
get_time_series_for(regions=reg, ages=ages, from=from, to=to) %>%
dplyr::mutate(date=as.character(date), region=reg, value=deaths) %>%
dplyr::select(-cases, -deaths) %>%
tibble::add_row(data, .) ->
data
}
}
data %>%
dplyr::left_join(regions_labels, by="region") %>%
dplyr::select(-region) %>%
dplyr::mutate(date=as.Date(date)) %>%
return()
}
#'Plotting a Time Series of Cases, Deaths or Incidence
#'
#'\code{plot_data_for} is used to plot the absolute number of cases, of deaths or of
#'the 7-day-incidence of a region and a certain
#'age group. The time period can be defined by the user.
#'The function will plot the returns of \code{get_data_for}
#'
#'@param smoothing A positive integer that defines the window size of the moving average. Thus, the plot will be smoother
#'@param regions A vector that either consists of strings (the names of German districts or the names of German states) or district ID's, or just "Germany" as a whole
#'If this vector has more than one entry, the tibble contains the values of these regions separately.
#'@param ages A vector of numbers specifying the desired age groups. The available age groups are "A00-A04","A05-A14","A15-A34","A35-A59","A60-A79" and "A80+".
#'The numbers in \code{ages} are automatically assigned to the belonging age group and afterwards, the cases and deaths of of these age groups are added up.
#'@param from A date that specifies the beginning of the time series
#'@param to A date that specifies the end of the time series
#'@param type A string specifying which time series is returned. Can be "sti" for the incidence or just "cases" / "deaths"
#'
#'@section Warning:
#'When specifying region \strong{and} agegroup, the incidence will not be accurate because
#'there is no population data for the age groups in each district and it will be estimated
#'by the age distribution of Germany.
#'Therefore, it is recommended to specify only one or the other
#'
#'@return A plot that displays the wanted value for the desired regions during the defined period of time.
#'@examples plot_data_for(ages=c(3,55),regions="Sachsen",from="2021-01-05",to="2021-02-06")
#'
#'plot_data_for(ages=32,regions=c(8221, "Hessen", "Germany"), type="sti")
#'
#'\dontrun{plot_time_series_for(ages=c(12,42),from="2020-05-02",to="2020-05-01"}
#'#"from" must always be an earlier date than "to"
#'
#' @family time series of cases, deaths or incidence
#' @export
plot_data_for <- function(regions, ages="all", from="2020-01-01", to=Sys.Date(), type="cases", smoothing=0){
smoothing <- as.integer(smoothing)
stopifnot("invalid smoothing"=smoothing>=0)
data <- get_data_for(regions, ages, from, to, type)
data %>%
dplyr::group_by(region_name) %>%
dplyr::mutate(value=slider::slide_dbl(value,mean,.before=smoothing,.after=smoothing)) %>%
dplyr::ungroup()->data
data %>% dplyr::mutate(date=as.Date(date)) %>%
ggplot2::ggplot(ggplot2::aes(x=date, y=value, color=region_name, group=region_name)) %>%
`+`(ggplot2::geom_line()) %>% return()
}
#' Plotting a Comparison of Age Groups over the Pandemic
#'
#'\code{plot_for_agegroups} is used to generate a plot that can compare the case/death numbers
#'and incidence age groups. When comparing the cases or deaths, it returns a plot
#'which shows the share of cases/deaths for each age group. When plotting cases or deaths,
#' the plot will get smoothened automatically by the ggstream library
#' @param type A string which can be "cases" or "deaths".
#'@examples plot_for_agegroups(type="deaths")
#'
#' @family comparison age groups
#' @export
plot_for_agegroups <- function(type="cases"){
# type can be cases or deaths
stopifnot(type %in% c("cases", "deaths"))
days_since_2020 <- rev.env$days_since_2020
rki_data %>% dplyr::select(Altersgruppe) %>% unique() %>% `[[`(1) -> Altersgruppe
tidyr::crossing(Altersgruppe, days_since_2020) -> series1
options(dplyr.summarise.inform = FALSE)
series1 %>%
dplyr::mutate(date=days_since_2020) %>%
dplyr::select(-days_since_2020) %>%
dplyr::left_join(filter_data_by(), by=c("date"="Meldedatum", "Altersgruppe")) %>%
dplyr::group_by(date, Altersgruppe) %>%
dplyr::summarise(cases=sum(AnzahlFall), deaths=sum(AnzahlTodesfall)) %>%
# the days for which we have no infection data for are days with 0 infections
dplyr::mutate(cases=tidyr::replace_na(cases, 0), deaths=tidyr::replace_na(deaths, 0)) -> data
if(type=="cases"){
data %>% ggplot2::ggplot(ggplot2::aes(x=date, y=cases, fill=Altersgruppe)) %>%
`+`(ggstream::geom_stream(type="ridge")) %>% print()
}
if(type=="deaths"){
data %>% ggplot(ggplot2::aes(x=date, y=deaths, fill=Altersgruppe)) %>%
`+`(ggstream::geom_stream(type="ridge")) %>% print()
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.