knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
library(shiny.CSVImport)
library(knitr)
library(jsonlite)

Date Formats

.DateFormats <- '{
    "af-ZA": "yyyy/MM/dd",
    "am-ET": "d/M/yyyy",
    "ar-AE": "dd/MM/yyyy",
    "ar-BH": "dd/MM/yyyy",
    "ar-DZ": "dd-MM-yyyy",
    "ar-EG": "dd/MM/yyyy",
    "ar-IQ": "dd/MM/yyyy",
    "ar-JO": "dd/MM/yyyy",
    "ar-KW": "dd/MM/yyyy",
    "ar-LB": "dd/MM/yyyy",
    "ar-LY": "dd/MM/yyyy",
    "ar-MA": "dd-MM-yyyy",
    "ar-OM": "dd/MM/yyyy",
    "ar-QA": "dd/MM/yyyy",
    "ar-SA": "dd/MM/yy",
    "ar-SY": "dd/MM/yyyy",
    "ar-TN": "dd-MM-yyyy",
    "ar-YE": "dd/MM/yyyy",
    "arn-CL": "dd-MM-yyyy",
    "as-IN": "dd-MM-yyyy",
    "az-Cyrl-AZ": "dd.MM.yyyy",
    "az-Latn-AZ": "dd.MM.yyyy",
    "ba-RU": "dd.MM.yy",
    "be-BY": "dd.MM.yyyy",
    "bg-BG": "dd.M.yyyy",
    "bn-BD": "dd-MM-yy",
    "bn-IN": "dd-MM-yy",
    "bo-CN": "yyyy/M/d",
    "br-FR": "dd/MM/yyyy",
    "bs-Cyrl-BA": "d.M.yyyy",
    "bs-Latn-BA": "d.M.yyyy",
    "ca-ES": "dd/MM/yyyy",
    "co-FR": "dd/MM/yyyy",
    "cs-CZ": "d.M.yyyy",
    "cy-GB": "dd/MM/yyyy",
    "da-DK": "dd-MM-yyyy",
    "de-AT": "dd.MM.yyyy",
    "de-CH": "dd.MM.yyyy",
    "de-DE": "dd.MM.yyyy",
    "de-LI": "dd.MM.yyyy",
    "de-LU": "dd.MM.yyyy",
    "dsb-DE": "d. M. yyyy",
    "dv-MV": "dd/MM/yy",
    "el-GR": "d/M/yyyy",
    "en-029": "MM/dd/yyyy",
    "en-AU": "d/MM/yyyy",
    "en-BZ": "dd/MM/yyyy",
    "en-CA": "dd/MM/yyyy",
    "en-GB": "dd/MM/yyyy",
    "en-IE": "dd/MM/yyyy",
    "en-IN": "dd-MM-yyyy",
    "en-JM": "dd/MM/yyyy",
    "en-MY": "d/M/yyyy",
    "en-NZ": "d/MM/yyyy",
    "en-PH": "M/d/yyyy",
    "en-SG": "d/M/yyyy",
    "en-TT": "dd/MM/yyyy",
    "en-US": "M/d/yyyy",
    "en-ZA": "yyyy/MM/dd",
    "en-ZW": "M/d/yyyy",
    "es-AR": "dd/MM/yyyy",
    "es-BO": "dd/MM/yyyy",
    "es-CL": "dd-MM-yyyy",
    "es-CO": "dd/MM/yyyy",
    "es-CR": "dd/MM/yyyy",
    "es-DO": "dd/MM/yyyy",
    "es-EC": "dd/MM/yyyy",
    "es-ES": "dd/MM/yyyy",
    "es-GT": "dd/MM/yyyy",
    "es-HN": "dd/MM/yyyy",
    "es-MX": "dd/MM/yyyy",
    "es-NI": "dd/MM/yyyy",
    "es-PA": "MM/dd/yyyy",
    "es-PE": "dd/MM/yyyy",
    "es-PR": "dd/MM/yyyy",
    "es-PY": "dd/MM/yyyy",
    "es-SV": "dd/MM/yyyy",
    "es-US": "M/d/yyyy",
    "es-UY": "dd/MM/yyyy",
    "es-VE": "dd/MM/yyyy",
    "et-EE": "d.MM.yyyy",
    "eu-ES": "yyyy/MM/dd",
    "fa-IR": "MM/dd/yyyy",
    "fi-FI": "d.M.yyyy",
    "fil-PH": "M/d/yyyy",
    "fo-FO": "dd-MM-yyyy",
    "fr-BE": "d/MM/yyyy",
    "fr-CA": "yyyy-MM-dd",
    "fr-CH": "dd.MM.yyyy",
    "fr-FR": "dd/MM/yyyy",
    "fr-LU": "dd/MM/yyyy",
    "fr-MC": "dd/MM/yyyy",
    "fy-NL": "d-M-yyyy",
    "ga-IE": "dd/MM/yyyy",
    "gd-GB": "dd/MM/yyyy",
    "gl-ES": "dd/MM/yy",
    "gsw-FR": "dd/MM/yyyy",
    "gu-IN": "dd-MM-yy",
    "ha-Latn-NG": "d/M/yyyy",
    "he-IL": "dd/MM/yyyy",
    "hi-IN": "dd-MM-yyyy",
    "hr-BA": "d.M.yyyy.",
    "hr-HR": "d.M.yyyy",
    "hsb-DE": "d. M. yyyy",
    "hu-HU": "yyyy. MM. dd.",
    "hy-AM": "dd.MM.yyyy",
    "id-ID": "dd/MM/yyyy",
    "ig-NG": "d/M/yyyy",
    "ii-CN": "yyyy/M/d",
    "is-IS": "d.M.yyyy",
    "it-CH": "dd.MM.yyyy",
    "it-IT": "dd/MM/yyyy",
    "iu-Cans-CA": "d/M/yyyy",
    "iu-Latn-CA": "d/MM/yyyy",
    "ja-JP": "yyyy/MM/dd",
    "ka-GE": "dd.MM.yyyy",
    "kk-KZ": "dd.MM.yyyy",
    "kl-GL": "dd-MM-yyyy",
    "km-KH": "yyyy-MM-dd",
    "kn-IN": "dd-MM-yy",
    "ko-KR": "yyyy-MM-dd",
    "kok-IN": "dd-MM-yyyy",
    "ky-KG": "dd.MM.yy",
    "lb-LU": "dd/MM/yyyy",
    "lo-LA": "dd/MM/yyyy",
    "lt-LT": "yyyy.MM.dd",
    "lv-LV": "yyyy.MM.dd.",
    "mi-NZ": "dd/MM/yyyy",
    "mk-MK": "dd.MM.yyyy",
    "ml-IN": "dd-MM-yy",
    "mn-MN": "yy.MM.dd",
    "mn-Mong-CN": "yyyy/M/d",
    "moh-CA": "M/d/yyyy",
    "mr-IN": "dd-MM-yyyy",
    "ms-BN": "dd/MM/yyyy",
    "ms-MY": "dd/MM/yyyy",
    "mt-MT": "dd/MM/yyyy",
    "nb-NO": "dd.MM.yyyy",
    "ne-NP": "M/d/yyyy",
    "nl-BE": "d/MM/yyyy",
    "nl-NL": "d-M-yyyy",
    "nn-NO": "dd.MM.yyyy",
    "nso-ZA": "yyyy/MM/dd",
    "oc-FR": "dd/MM/yyyy",
    "or-IN": "dd-MM-yy",
    "pa-IN": "dd-MM-yy",
    "pl-PL": "yyyy-MM-dd",
    "prs-AF": "dd/MM/yy",
    "ps-AF": "dd/MM/yy",
    "pt-BR": "d/M/yyyy",
    "pt-PT": "dd-MM-yyyy",
    "qut-GT": "dd/MM/yyyy",
    "quz-BO": "dd/MM/yyyy",
    "quz-EC": "dd/MM/yyyy",
    "quz-PE": "dd/MM/yyyy",
    "rm-CH": "dd/MM/yyyy",
    "ro-RO": "dd.MM.yyyy",
    "ru-RU": "dd.MM.yyyy",
    "rw-RW": "M/d/yyyy",
    "sa-IN": "dd-MM-yyyy",
    "sah-RU": "MM.dd.yyyy",
    "se-FI": "d.M.yyyy",
    "se-NO": "dd.MM.yyyy",
    "se-SE": "yyyy-MM-dd",
    "si-LK": "yyyy-MM-dd",
    "sk-SK": "d. M. yyyy",
    "sl-SI": "d.M.yyyy",
    "sma-NO": "dd.MM.yyyy",
    "sma-SE": "yyyy-MM-dd",
    "smj-NO": "dd.MM.yyyy",
    "smj-SE": "yyyy-MM-dd",
    "smn-FI": "d.M.yyyy",
    "sms-FI": "d.M.yyyy",
    "sq-AL": "yyyy-MM-dd",
    "sr-Cyrl-BA": "d.M.yyyy",
    "sr-Cyrl-CS": "d.M.yyyy",
    "sr-Cyrl-ME": "d.M.yyyy",
    "sr-Cyrl-RS": "d.M.yyyy",
    "sr-Latn-BA": "d.M.yyyy",
    "sr-Latn-CS": "d.M.yyyy",
    "sr-Latn-ME": "d.M.yyyy",
    "sr-Latn-RS": "d.M.yyyy",
    "sv-FI": "d.M.yyyy",
    "sv-SE": "yyyy-MM-dd",
    "sw-KE": "M/d/yyyy",
    "syr-SY": "dd/MM/yyyy",
    "ta-IN": "dd-MM-yyyy",
    "te-IN": "dd-MM-yy",
    "tg-Cyrl-TJ": "dd.MM.yy",
    "th-TH": "d/M/yyyy",
    "tk-TM": "dd.MM.yy",
    "tn-ZA": "yyyy/MM/dd",
    "tr-TR": "dd.MM.yyyy",
    "tt-RU": "dd.MM.yyyy",
    "tzm-Latn-DZ": "dd-MM-yyyy",
    "ug-CN": "yyyy-M-d",
    "uk-UA": "dd.MM.yyyy",
    "ur-PK": "dd/MM/yyyy",
    "uz-Cyrl-UZ": "dd.MM.yyyy",
    "uz-Latn-UZ": "dd/MM yyyy",
    "vi-VN": "dd/MM/yyyy",
    "wo-SN": "dd/MM/yyyy",
    "xh-ZA": "yyyy/MM/dd",
    "yo-NG": "d/M/yyyy",
    "zh-CN": "yyyy/M/d",
    "zh-HK": "d/M/yyyy",
    "zh-MO": "d/M/yyyy",
    "zh-SG": "d/M/yyyy",
    "zh-TW": "yyyy/M/d",
    "zu-ZA": "yyyy/MM/dd"
  }'

DateFormats <- jsonlite::fromJSON(.DateFormats)

The list if date formats has been taken from StackOverflow. It looks like this have r length(DateFormats) entries in total. The number of unique formats is r length(unique(unlist(DateFormats))).

kable(head(as.data.frame(unlist(DateFormats))))

The unique date formats are:

Frequencies <- table(unlist(DateFormats))
Frequencies <- as.data.frame(Frequencies)
Frequencies <- Frequencies[order(Frequencies[[2]], decreasing = TRUE), ]
colnames(Frequencies) <- c("Format", "Frequency")
kable(Frequencies)

Based on these data I chose the following formats as default. All formats with 10 occurrences or more are selected without question. I chose a few more that seemed pretty much redundant given the list which was already there. The standard format specified by ISO 8601 (yyyy-mm-dd) is among the top 10, too.

SelectedDates <- Frequencies[c(1:6), ]
SelectedDates <- rbind(SelectedDates, Frequencies[Frequencies[["Format"]] == "dd/MM/yy", ])
SelectedDates <- rbind(SelectedDates, Frequencies[Frequencies[["Format"]] == "dd.MM.yy", ])
SelectedDates[["R-Format"]] <- SelectedDates[["Format"]]
SelectedDates[["R-Format"]] <- gsub("d{1,2}", "%d", SelectedDates[["R-Format"]])
SelectedDates[["R-Format"]] <- gsub("MM", "%m", SelectedDates[["R-Format"]])
SelectedDates[["R-Format"]] <- gsub("yyyy", "%Y", SelectedDates[["R-Format"]])
SelectedDates[["R-Format"]] <- gsub("yy", "%y", SelectedDates[["R-Format"]])

DateCoverage <- sum(SelectedDates[["Frequency"]]) / sum(Frequencies[["Frequency"]])

kable(SelectedDates)

The total coverage of default formats is r format(DateCoverage, digits = 2)

SelectedDates <- SelectedDates[ order(SelectedDates[["R-Format"]]), ]
Choices <- SelectedDates[["R-Format"]]
names(Choices) <- SelectedDates[["Format"]]
dput(Choices)

Time

Time formats seem simpler. Standard ISO format is HH:MM:SS. In most English-speaking regions no 24 hour notation is used.


Formatting Strings

For a better usability it makes more sense to use a "dd.mm.yyyy" format syntax instead of Rs POSIX notation.

| Format | POSIX Format | Description | Examples | |:----------- |:----------- |:------------------------|:--------------------------------| | d | %e | Day of the month, 1-31 | 2009-06-01T13:45:30 -> 1
2009-06-15T13:45:30 -> 15 | | dd | %d | Day of the month, 01-31 | 2009-06-01T13:45:30 -> 01
2009-06-15T13:45:30 -> 15 | | ddd | %a | Abbreviated name of the day of the week | 2009-06-15T13:45:30 -> Mon (en-US)
2009-06-15T13:45:30 -> Пн (ru-RU)
2009-06-15T13:45:30 -> lun. (fr-FR) | | dddd | %A | Full name of the day of the week | 2009-06-15T13:45:30 -> Monday (en-US)
2009-06-15T13:45:30 -> понедельник (ru-RU)
2009-06-15T13:45:30 -> lundi (fr-FR) | | | %u | Weekday as a decimal number (1--7, Monday is 1) | | | | %w | Weekday as decimal number (0--6, Sunday is 0) | | | f | - | Tenths of a second | 2009-06-15T13:45:30.6170000 -> 6
2009-06-15T13:45:30.05 -> 0 | | ff | - | Hundredths of a second | 2009-06-15T13:45:30.6170000 -> 61
2009-06-15T13:45:30.0050000 -> 00 | | fff | - | Milliseconds | 6/15/2009 13:45:30.617 -> 617
6/15/2009 13:45:30.0005 -> 000 | | ffff | - | Ten thousandths of a second | 2009-06-15T13:45:30.6175000 -> 6175
2009-06-15T13:45:30.0000500 -> 0000 | | ffffff | - | Millionths of a second | 2009-06-15T13:45:30.6175425 -> 6175425
2009-06-15T13:45:30.0001150 -> 0001150 | | F | - | Tenths of a second (if non-zero) | 2009-06-15T13:45:30.6170000 -> 6
2009-06-15T13:45:30.0500000 -> (no output) | | FF | - | Hundredths of a second (if non-zero) | 2009-06-15T13:45:30.6170000 -> 61
2009-06-15T13:45:30.0050000 -> (no output) | | FFF | - | Milliseconds (if non-zero) | 009-06-15T13:45:30.6170000 -> 617
2009-06-15T13:45:30.0005000 -> (no output) | | FFFF | - | Ten thousandths of a second (if non-zero) | 2009-06-15T13:45:30.5275000 -> 5275
2009-06-15T13:45:30.0000500 -> (no output) | | FFFFF | - | Hundred thousandths of a second (if non-zero) | 2009-06-15T13:45:30.6175400 -> 61754
2009-06-15T13:45:30.0000050 -> (no output) | | FFFFFF | - | Millionths of a second (if non-zero) | 2009-06-15T13:45:30.6175420 -> 617542
2009-06-15T13:45:30.0000005 -> (no output) | | FFFFFFF | - | Ten millionths of a second (if non-zero) | 2009-06-15T13:45:30.6175425 -> 6175425
2009-06-15T13:45:30.0001150 -> 000115 | | ss.f[f[f[f]]]| %OSn | Seconds and fractions. n specifies 10E-n | | | g, gg | | Period or era | 2009-06-15T13:45:30.6170000 -> A.D. | | h | %l * | Hour, on a 12-hour clock 1-12 | 2009-06-15T01:45:30 -> 1 | | hh | %I | Hour, on a 12-hour clock 01-12 | 2009-06-15T01:45:30 -> 01 | | H | %k * | Hour, on a 24-hour clock 0-23 | 2009-06-15T01:45:30 -> 1
2009-06-15T13:45:30 -> 13 | | HH | %H | Hour, on a 24-hour clock 00-23 | 2009-06-15T01:45:30 -> 01
2009-06-15T13:45:30 -> 13 | | K | | Time zone information | 2009-06-15T13:45:30, Kind Unspecified ->
2009-06-15T13:45:30, Kind Utc -> Z
2009-06-15T13:45:30, Kind Local -> -07:00 (depends on local computer settings)
With DateTimeOffset values:
2009-06-15T01:45:30-07:00 --> -07:00
2009-06-15T08:45:30+00:00 --> +00:00 | | m | | Minute, 0-59 | 2009-06-15T13:29:30 -> 29 | | mm | %M | Minute, 00-59 | 2009-06-15T01:45:30 -> 45 | | M | | Month, 1-12 | 2009-06-15T13:45:30 -> 6 | | MM | %m | Month, 01-12 | 2009-06-15T13:45:30 -> 06 | | MMM | %b | Abbreviated name of month | 2009-06-15T13:45:30 -> Jun (en-US)
2009-06-15T13:45:30 -> juin (fr-FR)
2009-06-15T13:45:30 -> Jun (zu-ZA) | | MMMM | %B | Full name of month | 2009-06-15T13:45:30 -> June (en-US)
2009-06-15T13:45:30 -> juni (da-DK)
2009-06-15T13:45:30 -> uJuni (zu-ZA)| | s | %S (input only)| Second, 0-59 | 2012-06-15T13:45:09 -> 9 | | ss | %S | Second, 00-59 | 2012-06-15T13:45:09 -> 09 | | t | | First character of the AM/PM designator | 2009-06-15T13:45:30 -> P (en-US)
2009-06-15T13:45:30 -> 午 (ja-JP)
2009-06-15T13:45:30 -> (fr-FR) | | tt | %p | AM/PM designator | 2009-06-15T13:45:30 -> PM (en-US)
2009-06-15T13:45:30 -> 午後 (ja-JP)
2009-06-15T13:45:30 -> (fr-FR) | | y | %y (input only) | Year, 0-99 | 0001-01-01T00:00:00 -> 1
0900-01-01T00:00:00 -> 0
1900-01-01T00:00:00 -> 0
2009-06-15T13:45:30 -> 9
2019-06-15T13:45:30 -> 19 | | yy | %y | Year, 00-99 | 0001-01-01T00:00:00 -> 01
0900-01-01T00:00:00 -> 00
1900-01-01T00:00:00 -> 00
2019-06-15T13:45:30 -> 19 | | yyy | n/a | Year, with a minimum of three digits | 0001-01-01T00:00:00 -> 001
0900-01-01T00:00:00 -> 900
1900-01-01T00:00:00 -> 1900
2009-06-15T13:45:30 -> 2009 | | yyyy | %Y | Year as a four-digit number | 0900-01-01T00:00:00 -> 0900
1900-01-01T00:00:00 -> 1900
2009-06-15T13:45:30 -> 2009 | | yyyyy | | Year as a five-digit number | 0001-01-01T00:00:00 -> 00001
2009-06-15T13:45:30 -> 02009 | | z | | Hours offset from UTC, without leading zeros | 2009-06-15T13:45:30-07:00 -> -7 | | zz | | Hours offset from UTC, with leading zero for a single-digit value | 009-06-15T13:45:30-07:00 -> -07 | | zzz | | Hours and minutes offset from UTC | 2009-06-15T13:45:30-07:00 -> -07:00 | | | %z | Signed offset in hours and minutes from UTC, so -0800 is 8 hours behind UTC. | | | : | | Time separator | 2009-06-15T13:45:30 -> : (en-US) | | | | | 2009-06-15T13:45:30 -> . (it-IT) | | | | | 2009-06-15T13:45:30 -> : (ja-JP) | | / | | Date separator | 2009-06-15T13:45:30 -> / (en-US) | | | | | 2009-06-15T13:45:30 -> - (ar-DZ) | | | | | 2009-06-15T13:45:30 -> . (tr-TR) | | "string" | | Literal string delimiter | 2009-06-15T13:45:30 ("arr:" h:m t) -> arr: 1:45 P | | | | | | | 'string' | | | 2009-06-15T13:45:30 ('arr:' h:m t) -> arr: 1:45 P | | \% | | Defines the following character as a custom format specifier | 2009-06-15T13:45:30 (%h) -> 1 | | \ | | Escape character | 2009-06-15T13:45:30 (h \h) -> 1 h | | Any other character | | The character is copied to the result string unchanged | 2009-06-15T01:45:30 (arr hh:mm t) -> arr 01:45 A | * Not in the POSIX standards and less widely implemented

POSIX Combination Formats

| Format | POSIX Format | Description | Examples | |-------------|:-------------|:---------------------------------------|:--------------------------------| | - | %c | Date and time. Locale-specific on output, "%a %b %e %H:%M:%S %Y" on input. | | | - | %D | Date format such as %m/%d/%y: the C99 standard says it should be that exact format (but not all OSes comply). | | | yyyy-mm-dd | %F | %Y-%m-%d | | | hh:mm:ss tt | %r | For output, the 12-hour clock time (using the locale's AM or PM): only defined in some locales, and on some OSes misleading in locales which do not define an AM/PM indicator. For input, equivalent to %I:%M:%S %p. | | | HH:mm | %R | %H:%M | | | HH:mm:ss | %T | %H:%M:%S | | | yy/mm/dd | %x | Date. | | | | | Locale-specific on output, "%y/%m/%d" on input. | | | (HH:mm:ss) | %X | Time. Locale-specific on output, "%H:%M:%S" on input. | | | - | %+ | (Output only) Similar to %c, often "%a %b %e %H:%M:%S %Z %Y". May depend on locale. | |

POSIX only

| Format | POSIX Format | Description | Examples | |-------------|:-------------|:---------------------------------------|:--------------------------------| | | %g | last two digits of the week-based year (see %V) | | | | %G | week-based year (see %V) as a decimal number | | | | %j | Day of year as decimal number (001--366): For input, 366 is only valid in a leap year.| | | - | %n | Newline on output, arbitrary white space on input. | | | | %t | Tab on output, arbitrary white space on input. | | | | %U | Week of the year as decimal number (00--53) using Sunday as the first day 1 of the week | | | | %W | Week of the year as decimal number (00--53) using Monday as the first day of week | | | | %s * | | |

* Not in the POSIX standards and less widely implemented



SigurdJanson/shinyCSVImpoMod documentation built on Jan. 5, 2023, 3:57 a.m.