corona: Corona Data Import

Documented in create_day_sequence filter_min_days pivot_longer_by_date preprocess_corona_data rename_corona_data specify_countries specify_number specify_statistic

#' Pivots date column names into one colum
#'
#' @param data A dataframe with data columns
#' @param date_format A regular expression to detect the date columns (default value matches DD/MM/YYY); works with dplyr's 'matches' function to select date columns)
#'
#' @return A dataframe in a long format (containing a new date column)
#'
pivot_longer_by_date <- function(data, date_format = "\\d+\\/\\d+\\/\\d+") {
  data %>%
    tidyr::pivot_longer(cols = dplyr::matches(date_format), names_to = "date") %>%
    dplyr::mutate_at("date", lubridate::mdy)
}

#' Rename state and country column
#'
#' Replace clumsy column names with accessable names
#'
#' @param data A dataframe with columnames `Country/Region` and `Province/State`
#'
#' @return A long dataframe (with two renamed colums)
#'
rename_corona_data <- function(data) {
  data %>%
    dplyr::rename(country = .data$`Country/Region`, state = .data$`Province/State`)
}

#' Add day column to dataframe (grouped for each country and state)
#'
#' Day column where day = 1 corresponds to the first date with at least n `number_of_cases`
#'
#' @param data A dataframe (suits with dataframe generated by \code{read_corona()})
#' @param number_of_cases Defaul value: 100, Integer, indicating the minimum number of cases (infections/ deaths/ recovered)
#'
#' @return A dataframe (with an additional day colum as incremental integers starting at 1)
#'
#' @export
#'
#' @examples \dontrun{
#' data <- read_corona()
#' data %>% create_day_sequence()
#' }
#'
create_day_sequence <- function(data, number_of_cases = 100) {
  data %>%
    dplyr::group_by(.data$country, .data$state) %>%
    dplyr::filter(.data$value >= 100) %>%
    dplyr::arrange(.data$country, .data$state, .data$date) %>%
    dplyr::mutate(day = 1) %>%
    dplyr::mutate(day = cumsum(.data$day))
}

#' Filter dataframe for at least n cases per group
#'
#' The function computes the number of values for each group (e.g. country) and filters out those with less than the specified number of values.
#' It is recommended to first group the dataframe for the stata the number of values have to be evaluated (e.g. at least 10 values in a country)
#'
#' @param data Dataframe (containing a day column)
#' @param min_n_days Integer How many values must be available at least for each group
#'
#' @return A dataframe
filter_min_days <- function(data, min_n_days = 5) {
  data %>%
    dplyr::mutate(n_days = base::max(.data$day)) %>%
    dplyr::filter(.data$n_days >= min_n_days) %>%
    dplyr::select(-.data$n_days)
}

#' Specify the statistic to analyse
#'
#' This function species the statistic to use in a
#' subsequent analysis. At the moment, three stistics are available:
#'
#' 1. infections
#' 2. deaths
#' 3. recoveries
#'
#' @param corona_data A dataframe (tibble) imported with `read_corona`
#' @param statistic A character string
#'
#' @return A data.frame (tibble) with columns country, date, statistic
#'
#' @examples
#' \dontrun{
#' data <- read_corona()
#' data %>%
#'  specify_statistic("infections")
#' }
#'
specify_statistic <- function(corona_data, statistic) {
  corona_data %>%
    dplyr::rename(statistic = statistic) %>%
    dplyr::select(.data$country, .data$Lat, .data$Long, .data$date, .data$statistic)
}


#' Specify the countries to include
#'
#' @param corona_data A dataframe (tibble) imported with `read_corona`
#' @param countries A character vector containing the country names to include
#'
#' @return A data.frame (tibble) with the data of inclueded countries
#'
#' @examples
#' \dontrun{
#' data <- read_corona()
#' data %>%
#'  specify_countries(c("Italy", "Germany"))
#' }
specify_countries <- function(corona_data, countries) {
  corona_data %>%
    dplyr::filter(.data$country %in% countries) %>%
    dplyr::group_by(.data$country)
}

#' Specify the countries to include
#'
#' @param corona_data A dataframe (tibble) imported with `read_corona`
#' @param n Integer, number of cases per day to include
#'
#' @return A data.frame (tibble) with the data of inclueded countries
#'
#' @examples
#' \dontrun{
#' data <- read_corona()
#' data %>%
#'  specify_number(n = 100)
#' }
specify_number <- function(corona_data, n) {
  corona_data %>%
    dplyr::filter(.data$statistic >= n)
}


#' Preprocess the corona data for analysis
#'
#' @param corona_data A dataframe (tibble) imported with `read_corona`
#' @param statistic A character string ("infections", "deaths" or "recovered")
#' @param countries A character vector containing the country names to include
#' @param n Integer, number of cases per day to include
#'
#' @return A dataframe (tibble)
#' @export
#'
#' @examples
#' \dontrun{
#' data <- read_corona()
#' data %>%
#'  preprocess_corona_data(
#'    statistic = "infections", # Focus on infections
#'    countries = "Italy", # Focus on Italy
#'    n = 100) # Include days where the death toll exceeded 100 cases
#'  }
#'
#'
preprocess_corona_data <- function(corona_data, statistic, countries, n) {

  corona_data %>%
    # 1. Specify statistic (e.g. infections)
    specify_statistic(statistic = statistic) %>%
    # 2. Specify countries (Germany, Italy, etc.)
    specify_countries(countries = countries) %>%
    # 3. Specify minimum number of cases
    specify_number(n = n) %>%
    # 4. Create an integer sequence for the days since the n th case
    dplyr::arrange(date) %>%
    dplyr::mutate(day = seq_along(date))

}

jnshsrs/corona documentation built on April 9, 2020, 11:10 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

jnshsrs/corona
Corona Data Import

R/preprocess-data.R
In jnshsrs/corona: Corona Data Import

Defines functions pivot_longer_by_date rename_corona_data create_day_sequence filter_min_days specify_statistic specify_countries specify_number preprocess_corona_data

Documented in create_day_sequence filter_min_days pivot_longer_by_date preprocess_corona_data rename_corona_data specify_countries specify_number specify_statistic

R Package Documentation

Browse R Packages

We want your feedback!

jnshsrs/corona Corona Data Import

R/preprocess-data.R In jnshsrs/corona: Corona Data Import

Defines functions pivot_longer_by_date rename_corona_data create_day_sequence filter_min_days specify_statistic specify_countries specify_number preprocess_corona_data

Documented in create_day_sequence filter_min_days pivot_longer_by_date preprocess_corona_data rename_corona_data specify_countries specify_number specify_statistic

R Package Documentation

Browse R Packages

We want your feedback!

jnshsrs/corona
Corona Data Import

R/preprocess-data.R
In jnshsrs/corona: Corona Data Import